001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.fs; 020 021import java.io.IOException; 022import java.net.URI; 023import java.net.URISyntaxException; 024 025import java.util.regex.Matcher; 026import java.util.regex.Pattern; 027 028import org.apache.avro.reflect.Stringable; 029import org.apache.hadoop.HadoopIllegalArgumentException; 030import org.apache.hadoop.classification.InterfaceAudience; 031import org.apache.hadoop.classification.InterfaceStability; 032import org.apache.hadoop.conf.Configuration; 033 034/** Names a file or directory in a {@link FileSystem}. 035 * Path strings use slash as the directory separator. A path string is 036 * absolute if it begins with a slash. 037 */ 038@Stringable 039@InterfaceAudience.Public 040@InterfaceStability.Stable 041public class Path implements Comparable { 042 // Static pattern objects to improve performance of regex matching. 043 private static final Pattern fSlashPattern = Pattern.compile("//"); 044 private static final Pattern bSlashPattern = Pattern.compile("\\\\"); 045 046 /** The directory separator, a slash. */ 047 public static final String SEPARATOR = "/"; 048 public static final char SEPARATOR_CHAR = '/'; 049 050 public static final String CUR_DIR = "."; 051 052 public static final boolean WINDOWS 053 = System.getProperty("os.name").startsWith("Windows"); 054 055 /** 056 * Pre-compiled regular expressions to detect path formats. 057 */ 058 private static final Pattern hasUriScheme = 059 Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]+:"); 060 private static final Pattern hasDriveLetterSpecifier = 061 Pattern.compile("^/?[a-zA-Z]:"); 062 063 private URI uri; // a hierarchical uri 064 065 /** 066 * Pathnames with scheme and relative path are illegal. 067 */ 068 void checkNotSchemeWithRelative() { 069 if (toUri().isAbsolute() && !isUriPathAbsolute()) { 070 throw new HadoopIllegalArgumentException( 071 "Unsupported name: has scheme but relative path-part"); 072 } 073 } 074 075 void checkNotRelative() { 076 if (!isAbsolute() && toUri().getScheme() == null) { 077 throw new HadoopIllegalArgumentException("Path is relative"); 078 } 079 } 080 081 public static Path getPathWithoutSchemeAndAuthority(Path path) { 082 // This code depends on Path.toString() to remove the leading slash before 083 // the drive specification on Windows. 084 Path newPath = path.isUriPathAbsolute() ? 085 new Path(null, null, path.toUri().getPath()) : 086 path; 087 return newPath; 088 } 089 090 /** Resolve a child path against a parent path. */ 091 public Path(String parent, String child) { 092 this(new Path(parent), new Path(child)); 093 } 094 095 /** Resolve a child path against a parent path. */ 096 public Path(Path parent, String child) { 097 this(parent, new Path(child)); 098 } 099 100 /** Resolve a child path against a parent path. */ 101 public Path(String parent, Path child) { 102 this(new Path(parent), child); 103 } 104 105 /** Resolve a child path against a parent path. */ 106 public Path(Path parent, Path child) { 107 // Add a slash to parent's path so resolution is compatible with URI's 108 URI parentUri = parent.uri; 109 String parentPath = parentUri.getPath(); 110 if (!(parentPath.equals("/") || parentPath.isEmpty())) { 111 try { 112 parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(), 113 parentUri.getPath()+"/", null, parentUri.getFragment()); 114 } catch (URISyntaxException e) { 115 throw new IllegalArgumentException(e); 116 } 117 } 118 URI resolved = parentUri.resolve(child.uri); 119 initialize(resolved.getScheme(), resolved.getAuthority(), 120 resolved.getPath(), resolved.getFragment()); 121 } 122 123 private void checkPathArg( String path ) throws IllegalArgumentException { 124 // disallow construction of a Path from an empty string 125 if ( path == null ) { 126 throw new IllegalArgumentException( 127 "Can not create a Path from a null string"); 128 } 129 if( path.length() == 0 ) { 130 throw new IllegalArgumentException( 131 "Can not create a Path from an empty string"); 132 } 133 } 134 135 /** Construct a path from a String. Path strings are URIs, but with 136 * unescaped elements and some additional normalization. */ 137 public Path(String pathString) throws IllegalArgumentException { 138 checkPathArg( pathString ); 139 140 // We can't use 'new URI(String)' directly, since it assumes things are 141 // escaped, which we don't require of Paths. 142 143 // add a slash in front of paths with Windows drive letters 144 if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') { 145 pathString = "/" + pathString; 146 } 147 148 // parse uri components 149 String scheme = null; 150 String authority = null; 151 152 int start = 0; 153 154 // If there are more than one leading slashes, reduce them to just one 155 // slash. Otherwise, the URI won't be created correctly. 156 // For e.g., //abc will get converted into hdfs://abc while it should be 157 // hdfs:///abc. 158 if (pathString.charAt(0) == '/') { 159 pathString = pathString.replaceFirst("^/+", "/"); 160 } 161 162 // parse uri scheme, if any 163 int colon = pathString.indexOf(':'); 164 int slash = pathString.indexOf('/'); 165 if ((colon != -1) && 166 ((slash == -1) || (colon < slash))) { // has a scheme 167 scheme = pathString.substring(0, colon); 168 start = colon+1; 169 } 170 171 // parse uri authority, if any 172 if (pathString.startsWith("//", start) && 173 (pathString.length()-start > 2)) { // has authority 174 int nextSlash = pathString.indexOf('/', start+2); 175 int authEnd = nextSlash > 0 ? nextSlash : pathString.length(); 176 authority = pathString.substring(start+2, authEnd); 177 start = authEnd; 178 } 179 180 // uri path is the rest of the string -- query & fragment not supported 181 String path = pathString.substring(start, pathString.length()); 182 183 initialize(scheme, authority, path, null); 184 } 185 186 /** 187 * Construct a path from a URI 188 */ 189 public Path(URI aUri) { 190 uri = aUri.normalize(); 191 } 192 193 /** Construct a Path from components. */ 194 public Path(String scheme, String authority, String path) { 195 checkPathArg( path ); 196 197 // add a slash in front of paths with Windows drive letters 198 if (hasWindowsDrive(path) && path.charAt(0) != '/') { 199 path = "/" + path; 200 } 201 202 // add "./" in front of Linux relative paths so that a path containing 203 // a colon e.q. "a:b" will not be interpreted as scheme "a". 204 if (!WINDOWS && path.charAt(0) != '/') { 205 path = "./" + path; 206 } 207 208 initialize(scheme, authority, path, null); 209 } 210 211 private void initialize(String scheme, String authority, String path, 212 String fragment) { 213 try { 214 this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment) 215 .normalize(); 216 } catch (URISyntaxException e) { 217 throw new IllegalArgumentException(e); 218 } 219 } 220 221 /** 222 * Merge 2 paths such that the second path is appended relative to the first. 223 * The returned path has the scheme and authority of the first path. On 224 * Windows, the drive specification in the second path is discarded. 225 * 226 * @param path1 Path first path 227 * @param path2 Path second path, to be appended relative to path1 228 * @return Path merged path 229 */ 230 public static Path mergePaths(Path path1, Path path2) { 231 String path2Str = path2.toUri().getPath(); 232 path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str)); 233 // Add path components explicitly, because simply concatenating two path 234 // string is not safe, for example: 235 // "/" + "/foo" yields "//foo", which will be parsed as authority in Path 236 return new Path(path1.toUri().getScheme(), 237 path1.toUri().getAuthority(), 238 path1.toUri().getPath() + path2Str); 239 } 240 241 /** 242 * Normalize a path string to use non-duplicated forward slashes as 243 * the path separator and remove any trailing path separators. 244 * @param scheme Supplies the URI scheme. Used to deduce whether we 245 * should replace backslashes or not. 246 * @param path Supplies the scheme-specific part 247 * @return Normalized path string. 248 */ 249 private static String normalizePath(String scheme, String path) { 250 // Remove double forward slashes. 251 final Matcher fMatcher = fSlashPattern.matcher(path); 252 path = fMatcher.replaceAll("/"); 253 254 // Remove backslashes if this looks like a Windows path. Avoid 255 // the substitution if it looks like a non-local URI. 256 if (WINDOWS && 257 (hasWindowsDrive(path) || 258 (scheme == null) || 259 (scheme.isEmpty()) || 260 (scheme.equals("file")))) { 261 final Matcher bMatcher = bSlashPattern.matcher(path); 262 path = bMatcher.replaceAll("/"); 263 } 264 265 // trim trailing slash from non-root path (ignoring windows drive) 266 int minLength = startPositionWithoutWindowsDrive(path) + 1; 267 if (path.length() > minLength && path.endsWith(SEPARATOR)) { 268 path = path.substring(0, path.length()-1); 269 } 270 271 return path; 272 } 273 274 private static boolean hasWindowsDrive(String path) { 275 return (WINDOWS && hasDriveLetterSpecifier.matcher(path).find()); 276 } 277 278 private static int startPositionWithoutWindowsDrive(String path) { 279 if (hasWindowsDrive(path)) { 280 return path.charAt(0) == SEPARATOR_CHAR ? 3 : 2; 281 } else { 282 return 0; 283 } 284 } 285 286 /** 287 * Determine whether a given path string represents an absolute path on 288 * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not. 289 * 290 * @param pathString Supplies the path string to evaluate. 291 * @param slashed true if the given path is prefixed with "/". 292 * @return true if the supplied path looks like an absolute path with a Windows 293 * drive-specifier. 294 */ 295 public static boolean isWindowsAbsolutePath(final String pathString, 296 final boolean slashed) { 297 int start = startPositionWithoutWindowsDrive(pathString); 298 return start > 0 299 && pathString.length() > start 300 && ((pathString.charAt(start) == SEPARATOR_CHAR) || 301 (pathString.charAt(start) == '\\')); 302 } 303 304 /** Convert this to a URI. */ 305 public URI toUri() { return uri; } 306 307 /** Return the FileSystem that owns this Path. */ 308 public FileSystem getFileSystem(Configuration conf) throws IOException { 309 return FileSystem.get(this.toUri(), conf); 310 } 311 312 /** 313 * Is an absolute path (ie a slash relative path part) 314 * AND a scheme is null AND authority is null. 315 */ 316 public boolean isAbsoluteAndSchemeAuthorityNull() { 317 return (isUriPathAbsolute() && 318 uri.getScheme() == null && uri.getAuthority() == null); 319 } 320 321 /** 322 * True if the path component (i.e. directory) of this URI is absolute. 323 */ 324 public boolean isUriPathAbsolute() { 325 int start = startPositionWithoutWindowsDrive(uri.getPath()); 326 return uri.getPath().startsWith(SEPARATOR, start); 327 } 328 329 /** True if the path component of this URI is absolute. */ 330 /** 331 * There is some ambiguity here. An absolute path is a slash 332 * relative name without a scheme or an authority. 333 * So either this method was incorrectly named or its 334 * implementation is incorrect. This method returns true 335 * even if there is a scheme and authority. 336 */ 337 public boolean isAbsolute() { 338 return isUriPathAbsolute(); 339 } 340 341 /** 342 * @return true if and only if this path represents the root of a file system 343 */ 344 public boolean isRoot() { 345 return getParent() == null; 346 } 347 348 /** Returns the final component of this path.*/ 349 public String getName() { 350 String path = uri.getPath(); 351 int slash = path.lastIndexOf(SEPARATOR); 352 return path.substring(slash+1); 353 } 354 355 /** Returns the parent of a path or null if at root. */ 356 public Path getParent() { 357 String path = uri.getPath(); 358 int lastSlash = path.lastIndexOf('/'); 359 int start = startPositionWithoutWindowsDrive(path); 360 if ((path.length() == start) || // empty path 361 (lastSlash == start && path.length() == start+1)) { // at root 362 return null; 363 } 364 String parent; 365 if (lastSlash==-1) { 366 parent = CUR_DIR; 367 } else { 368 parent = path.substring(0, lastSlash==start?start+1:lastSlash); 369 } 370 return new Path(uri.getScheme(), uri.getAuthority(), parent); 371 } 372 373 /** Adds a suffix to the final name in the path.*/ 374 public Path suffix(String suffix) { 375 return new Path(getParent(), getName()+suffix); 376 } 377 378 @Override 379 public String toString() { 380 // we can't use uri.toString(), which escapes everything, because we want 381 // illegal characters unescaped in the string, for glob processing, etc. 382 StringBuilder buffer = new StringBuilder(); 383 if (uri.getScheme() != null) { 384 buffer.append(uri.getScheme()); 385 buffer.append(":"); 386 } 387 if (uri.getAuthority() != null) { 388 buffer.append("//"); 389 buffer.append(uri.getAuthority()); 390 } else { 391 // Add the two forward slashes if the input path had it. 392 // This can be checked from the scheme specific part. 393 // E.g., hdfs://a/b will have authority = NULL. But we want to return 394 // the double slashes in the result. This is the behavior of URI.toString. 395 String ssPart = uri.getSchemeSpecificPart(); 396 if (ssPart != null 397 && ssPart.length() > 2 398 && ssPart.charAt(0) == '/' 399 && ssPart.charAt(1) == '/') { 400 401 buffer.append("//"); 402 } 403 } 404 if (uri.getPath() != null) { 405 String path = uri.getPath(); 406 if (path.indexOf('/')==0 && 407 hasWindowsDrive(path) && // has windows drive 408 uri.getScheme() == null && // but no scheme 409 uri.getAuthority() == null) // or authority 410 path = path.substring(1); // remove slash before drive 411 buffer.append(path); 412 } 413 if (uri.getFragment() != null) { 414 buffer.append("#"); 415 buffer.append(uri.getFragment()); 416 } 417 return buffer.toString(); 418 } 419 420 @Override 421 public boolean equals(Object o) { 422 if (!(o instanceof Path)) { 423 return false; 424 } 425 Path that = (Path)o; 426 return this.uri.equals(that.uri); 427 } 428 429 @Override 430 public int hashCode() { 431 return uri.hashCode(); 432 } 433 434 @Override 435 public int compareTo(Object o) { 436 Path that = (Path)o; 437 return this.uri.compareTo(that.uri); 438 } 439 440 /** Return the number of elements in this path. */ 441 public int depth() { 442 String path = uri.getPath(); 443 int depth = 0; 444 int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0; 445 while (slash != -1) { 446 depth++; 447 slash = path.indexOf(SEPARATOR, slash+1); 448 } 449 return depth; 450 } 451 452 /** 453 * Returns a qualified path object. 454 * 455 * Deprecated - use {@link #makeQualified(URI, Path)} 456 */ 457 @Deprecated 458 public Path makeQualified(FileSystem fs) { 459 return makeQualified(fs.getUri(), fs.getWorkingDirectory()); 460 } 461 462 /** Returns a qualified path object. */ 463 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 464 public Path makeQualified(URI defaultUri, Path workingDir ) { 465 Path path = this; 466 if (!isAbsolute()) { 467 path = new Path(workingDir, this); 468 } 469 470 URI pathUri = path.toUri(); 471 472 String scheme = pathUri.getScheme(); 473 String authority = pathUri.getAuthority(); 474 String fragment = pathUri.getFragment(); 475 476 if (scheme != null && 477 (authority != null || defaultUri.getAuthority() == null)) 478 return path; 479 480 if (scheme == null) { 481 scheme = defaultUri.getScheme(); 482 } 483 484 if (authority == null) { 485 authority = defaultUri.getAuthority(); 486 if (authority == null) { 487 authority = ""; 488 } 489 } 490 491 URI newUri = null; 492 try { 493 newUri = new URI(scheme, authority , 494 normalizePath(scheme, pathUri.getPath()), null, fragment); 495 } catch (URISyntaxException e) { 496 throw new IllegalArgumentException(e); 497 } 498 return new Path(newUri); 499 } 500}