001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.fs; 020 021import java.io.IOException; 022import java.net.URI; 023import java.net.URISyntaxException; 024 025import java.util.regex.Matcher; 026import java.util.regex.Pattern; 027 028import org.apache.avro.reflect.Stringable; 029import org.apache.hadoop.HadoopIllegalArgumentException; 030import org.apache.hadoop.classification.InterfaceAudience; 031import org.apache.hadoop.classification.InterfaceStability; 032import org.apache.hadoop.conf.Configuration; 033 034/** Names a file or directory in a {@link FileSystem}. 035 * Path strings use slash as the directory separator. A path string is 036 * absolute if it begins with a slash. 037 */ 038@Stringable 039@InterfaceAudience.Public 040@InterfaceStability.Stable 041public class Path implements Comparable { 042 // Static pattern objects to improve performance of regex matching. 043 private static final Pattern fSlashPattern = Pattern.compile("//"); 044 private static final Pattern bSlashPattern = Pattern.compile("\\\\"); 045 046 /** The directory separator, a slash. */ 047 public static final String SEPARATOR = "/"; 048 public static final char SEPARATOR_CHAR = '/'; 049 050 public static final String CUR_DIR = "."; 051 052 public static final boolean WINDOWS 053 = System.getProperty("os.name").startsWith("Windows"); 054 055 /** 056 * Pre-compiled regular expressions to detect path formats. 057 */ 058 private static final Pattern hasUriScheme = 059 Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]+:"); 060 private static final Pattern hasDriveLetterSpecifier = 061 Pattern.compile("^/?[a-zA-Z]:"); 062 063 private URI uri; // a hierarchical uri 064 065 /** 066 * Pathnames with scheme and relative path are illegal. 067 * @param path to be checked 068 */ 069 void checkNotSchemeWithRelative() { 070 if (toUri().isAbsolute() && !isUriPathAbsolute()) { 071 throw new HadoopIllegalArgumentException( 072 "Unsupported name: has scheme but relative path-part"); 073 } 074 } 075 076 void checkNotRelative() { 077 if (!isAbsolute() && toUri().getScheme() == null) { 078 throw new HadoopIllegalArgumentException("Path is relative"); 079 } 080 } 081 082 public static Path getPathWithoutSchemeAndAuthority(Path path) { 083 // This code depends on Path.toString() to remove the leading slash before 084 // the drive specification on Windows. 085 Path newPath = path.isUriPathAbsolute() ? 086 new Path(null, null, path.toUri().getPath()) : 087 path; 088 return newPath; 089 } 090 091 /** Resolve a child path against a parent path. */ 092 public Path(String parent, String child) { 093 this(new Path(parent), new Path(child)); 094 } 095 096 /** Resolve a child path against a parent path. */ 097 public Path(Path parent, String child) { 098 this(parent, new Path(child)); 099 } 100 101 /** Resolve a child path against a parent path. */ 102 public Path(String parent, Path child) { 103 this(new Path(parent), child); 104 } 105 106 /** Resolve a child path against a parent path. */ 107 public Path(Path parent, Path child) { 108 // Add a slash to parent's path so resolution is compatible with URI's 109 URI parentUri = parent.uri; 110 String parentPath = parentUri.getPath(); 111 if (!(parentPath.equals("/") || parentPath.isEmpty())) { 112 try { 113 parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(), 114 parentUri.getPath()+"/", null, parentUri.getFragment()); 115 } catch (URISyntaxException e) { 116 throw new IllegalArgumentException(e); 117 } 118 } 119 URI resolved = parentUri.resolve(child.uri); 120 initialize(resolved.getScheme(), resolved.getAuthority(), 121 resolved.getPath(), resolved.getFragment()); 122 } 123 124 private void checkPathArg( String path ) throws IllegalArgumentException { 125 // disallow construction of a Path from an empty string 126 if ( path == null ) { 127 throw new IllegalArgumentException( 128 "Can not create a Path from a null string"); 129 } 130 if( path.length() == 0 ) { 131 throw new IllegalArgumentException( 132 "Can not create a Path from an empty string"); 133 } 134 } 135 136 /** Construct a path from a String. Path strings are URIs, but with 137 * unescaped elements and some additional normalization. */ 138 public Path(String pathString) throws IllegalArgumentException { 139 checkPathArg( pathString ); 140 141 // We can't use 'new URI(String)' directly, since it assumes things are 142 // escaped, which we don't require of Paths. 143 144 // add a slash in front of paths with Windows drive letters 145 if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') { 146 pathString = "/" + pathString; 147 } 148 149 // parse uri components 150 String scheme = null; 151 String authority = null; 152 153 int start = 0; 154 155 // If there are more than one leading slashes, reduce them to just one 156 // slash. Otherwise, the URI won't be created correctly. 157 // For e.g., //abc will get converted into hdfs://abc while it should be 158 // hdfs:///abc. 159 if (pathString.charAt(0) == '/') { 160 pathString = pathString.replaceFirst("^/+", "/"); 161 } 162 163 // parse uri scheme, if any 164 int colon = pathString.indexOf(':'); 165 int slash = pathString.indexOf('/'); 166 if ((colon != -1) && 167 ((slash == -1) || (colon < slash))) { // has a scheme 168 scheme = pathString.substring(0, colon); 169 start = colon+1; 170 } 171 172 // parse uri authority, if any 173 if (pathString.startsWith("//", start) && 174 (pathString.length()-start > 2)) { // has authority 175 int nextSlash = pathString.indexOf('/', start+2); 176 int authEnd = nextSlash > 0 ? nextSlash : pathString.length(); 177 authority = pathString.substring(start+2, authEnd); 178 start = authEnd; 179 } 180 181 // uri path is the rest of the string -- query & fragment not supported 182 String path = pathString.substring(start, pathString.length()); 183 184 initialize(scheme, authority, path, null); 185 } 186 187 /** 188 * Construct a path from a URI 189 */ 190 public Path(URI aUri) { 191 uri = aUri.normalize(); 192 } 193 194 /** Construct a Path from components. */ 195 public Path(String scheme, String authority, String path) { 196 checkPathArg( path ); 197 198 // add a slash in front of paths with Windows drive letters 199 if (hasWindowsDrive(path) && path.charAt(0) != '/') { 200 path = "/" + path; 201 } 202 203 // add "./" in front of Linux relative paths so that a path containing 204 // a colon e.q. "a:b" will not be interpreted as scheme "a". 205 if (!WINDOWS && path.charAt(0) != '/') { 206 path = "./" + path; 207 } 208 209 initialize(scheme, authority, path, null); 210 } 211 212 private void initialize(String scheme, String authority, String path, 213 String fragment) { 214 try { 215 this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment) 216 .normalize(); 217 } catch (URISyntaxException e) { 218 throw new IllegalArgumentException(e); 219 } 220 } 221 222 /** 223 * Merge 2 paths such that the second path is appended relative to the first. 224 * The returned path has the scheme and authority of the first path. On 225 * Windows, the drive specification in the second path is discarded. 226 * 227 * @param path1 Path first path 228 * @param path2 Path second path, to be appended relative to path1 229 * @return Path merged path 230 */ 231 public static Path mergePaths(Path path1, Path path2) { 232 String path2Str = path2.toUri().getPath(); 233 path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str)); 234 // Add path components explicitly, because simply concatenating two path 235 // string is not safe, for example: 236 // "/" + "/foo" yields "//foo", which will be parsed as authority in Path 237 return new Path(path1.toUri().getScheme(), 238 path1.toUri().getAuthority(), 239 path1.toUri().getPath() + path2Str); 240 } 241 242 /** 243 * Normalize a path string to use non-duplicated forward slashes as 244 * the path separator and remove any trailing path separators. 245 * @param scheme Supplies the URI scheme. Used to deduce whether we 246 * should replace backslashes or not. 247 * @param path Supplies the scheme-specific part 248 * @return Normalized path string. 249 */ 250 private static String normalizePath(String scheme, String path) { 251 // Remove double forward slashes. 252 final Matcher fMatcher = fSlashPattern.matcher(path); 253 path = fMatcher.replaceAll("/"); 254 255 // Remove backslashes if this looks like a Windows path. Avoid 256 // the substitution if it looks like a non-local URI. 257 if (WINDOWS && 258 (hasWindowsDrive(path) || 259 (scheme == null) || 260 (scheme.isEmpty()) || 261 (scheme.equals("file")))) { 262 final Matcher bMatcher = bSlashPattern.matcher(path); 263 path = bMatcher.replaceAll("/"); 264 } 265 266 // trim trailing slash from non-root path (ignoring windows drive) 267 int minLength = startPositionWithoutWindowsDrive(path) + 1; 268 if (path.length() > minLength && path.endsWith(SEPARATOR)) { 269 path = path.substring(0, path.length()-1); 270 } 271 272 return path; 273 } 274 275 private static boolean hasWindowsDrive(String path) { 276 return (WINDOWS && hasDriveLetterSpecifier.matcher(path).find()); 277 } 278 279 private static int startPositionWithoutWindowsDrive(String path) { 280 if (hasWindowsDrive(path)) { 281 return path.charAt(0) == SEPARATOR_CHAR ? 3 : 2; 282 } else { 283 return 0; 284 } 285 } 286 287 /** 288 * Determine whether a given path string represents an absolute path on 289 * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not. 290 * 291 * @param pathString Supplies the path string to evaluate. 292 * @param slashed true if the given path is prefixed with "/". 293 * @return true if the supplied path looks like an absolute path with a Windows 294 * drive-specifier. 295 */ 296 public static boolean isWindowsAbsolutePath(final String pathString, 297 final boolean slashed) { 298 int start = startPositionWithoutWindowsDrive(pathString); 299 return start > 0 300 && pathString.length() > start 301 && ((pathString.charAt(start) == SEPARATOR_CHAR) || 302 (pathString.charAt(start) == '\\')); 303 } 304 305 /** Convert this to a URI. */ 306 public URI toUri() { return uri; } 307 308 /** Return the FileSystem that owns this Path. */ 309 public FileSystem getFileSystem(Configuration conf) throws IOException { 310 return FileSystem.get(this.toUri(), conf); 311 } 312 313 /** 314 * Is an absolute path (ie a slash relative path part) 315 * AND a scheme is null AND authority is null. 316 */ 317 public boolean isAbsoluteAndSchemeAuthorityNull() { 318 return (isUriPathAbsolute() && 319 uri.getScheme() == null && uri.getAuthority() == null); 320 } 321 322 /** 323 * True if the path component (i.e. directory) of this URI is absolute. 324 */ 325 public boolean isUriPathAbsolute() { 326 int start = startPositionWithoutWindowsDrive(uri.getPath()); 327 return uri.getPath().startsWith(SEPARATOR, start); 328 } 329 330 /** True if the path component of this URI is absolute. */ 331 /** 332 * There is some ambiguity here. An absolute path is a slash 333 * relative name without a scheme or an authority. 334 * So either this method was incorrectly named or its 335 * implementation is incorrect. This method returns true 336 * even if there is a scheme and authority. 337 */ 338 public boolean isAbsolute() { 339 return isUriPathAbsolute(); 340 } 341 342 /** 343 * @return true if and only if this path represents the root of a file system 344 */ 345 public boolean isRoot() { 346 return getParent() == null; 347 } 348 349 /** Returns the final component of this path.*/ 350 public String getName() { 351 String path = uri.getPath(); 352 int slash = path.lastIndexOf(SEPARATOR); 353 return path.substring(slash+1); 354 } 355 356 /** Returns the parent of a path or null if at root. */ 357 public Path getParent() { 358 String path = uri.getPath(); 359 int lastSlash = path.lastIndexOf('/'); 360 int start = startPositionWithoutWindowsDrive(path); 361 if ((path.length() == start) || // empty path 362 (lastSlash == start && path.length() == start+1)) { // at root 363 return null; 364 } 365 String parent; 366 if (lastSlash==-1) { 367 parent = CUR_DIR; 368 } else { 369 parent = path.substring(0, lastSlash==start?start+1:lastSlash); 370 } 371 return new Path(uri.getScheme(), uri.getAuthority(), parent); 372 } 373 374 /** Adds a suffix to the final name in the path.*/ 375 public Path suffix(String suffix) { 376 return new Path(getParent(), getName()+suffix); 377 } 378 379 @Override 380 public String toString() { 381 // we can't use uri.toString(), which escapes everything, because we want 382 // illegal characters unescaped in the string, for glob processing, etc. 383 StringBuilder buffer = new StringBuilder(); 384 if (uri.getScheme() != null) { 385 buffer.append(uri.getScheme()); 386 buffer.append(":"); 387 } 388 if (uri.getAuthority() != null) { 389 buffer.append("//"); 390 buffer.append(uri.getAuthority()); 391 } 392 if (uri.getPath() != null) { 393 String path = uri.getPath(); 394 if (path.indexOf('/')==0 && 395 hasWindowsDrive(path) && // has windows drive 396 uri.getScheme() == null && // but no scheme 397 uri.getAuthority() == null) // or authority 398 path = path.substring(1); // remove slash before drive 399 buffer.append(path); 400 } 401 if (uri.getFragment() != null) { 402 buffer.append("#"); 403 buffer.append(uri.getFragment()); 404 } 405 return buffer.toString(); 406 } 407 408 @Override 409 public boolean equals(Object o) { 410 if (!(o instanceof Path)) { 411 return false; 412 } 413 Path that = (Path)o; 414 return this.uri.equals(that.uri); 415 } 416 417 @Override 418 public int hashCode() { 419 return uri.hashCode(); 420 } 421 422 @Override 423 public int compareTo(Object o) { 424 Path that = (Path)o; 425 return this.uri.compareTo(that.uri); 426 } 427 428 /** Return the number of elements in this path. */ 429 public int depth() { 430 String path = uri.getPath(); 431 int depth = 0; 432 int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0; 433 while (slash != -1) { 434 depth++; 435 slash = path.indexOf(SEPARATOR, slash+1); 436 } 437 return depth; 438 } 439 440 /** 441 * Returns a qualified path object. 442 * 443 * Deprecated - use {@link #makeQualified(URI, Path)} 444 */ 445 @Deprecated 446 public Path makeQualified(FileSystem fs) { 447 return makeQualified(fs.getUri(), fs.getWorkingDirectory()); 448 } 449 450 /** Returns a qualified path object. */ 451 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 452 public Path makeQualified(URI defaultUri, Path workingDir ) { 453 Path path = this; 454 if (!isAbsolute()) { 455 path = new Path(workingDir, this); 456 } 457 458 URI pathUri = path.toUri(); 459 460 String scheme = pathUri.getScheme(); 461 String authority = pathUri.getAuthority(); 462 String fragment = pathUri.getFragment(); 463 464 if (scheme != null && 465 (authority != null || defaultUri.getAuthority() == null)) 466 return path; 467 468 if (scheme == null) { 469 scheme = defaultUri.getScheme(); 470 } 471 472 if (authority == null) { 473 authority = defaultUri.getAuthority(); 474 if (authority == null) { 475 authority = ""; 476 } 477 } 478 479 URI newUri = null; 480 try { 481 newUri = new URI(scheme, authority , 482 normalizePath(scheme, pathUri.getPath()), null, fragment); 483 } catch (URISyntaxException e) { 484 throw new IllegalArgumentException(e); 485 } 486 return new Path(newUri); 487 } 488}