001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.fs;
020
021import java.io.IOException;
022import java.net.URI;
023import java.net.URISyntaxException;
024
025import java.util.regex.Matcher;
026import java.util.regex.Pattern;
027
028import org.apache.avro.reflect.Stringable;
029import org.apache.hadoop.HadoopIllegalArgumentException;
030import org.apache.hadoop.classification.InterfaceAudience;
031import org.apache.hadoop.classification.InterfaceStability;
032import org.apache.hadoop.conf.Configuration;
033
034/** Names a file or directory in a {@link FileSystem}.
035 * Path strings use slash as the directory separator.  A path string is
036 * absolute if it begins with a slash.
037 */
038@Stringable
039@InterfaceAudience.Public
040@InterfaceStability.Stable
041public class Path implements Comparable {
042  // Static pattern objects to improve performance of regex matching.
043  private static final Pattern fSlashPattern = Pattern.compile("//");
044  private static final Pattern bSlashPattern = Pattern.compile("\\\\");
045
046  /** The directory separator, a slash. */
047  public static final String SEPARATOR = "/";
048  public static final char SEPARATOR_CHAR = '/';
049  
050  public static final String CUR_DIR = ".";
051  
052  public static final boolean WINDOWS
053    = System.getProperty("os.name").startsWith("Windows");
054
055  /**
056   *  Pre-compiled regular expressions to detect path formats.
057   */
058  private static final Pattern hasUriScheme =
059      Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]+:");
060  private static final Pattern hasDriveLetterSpecifier =
061      Pattern.compile("^/?[a-zA-Z]:");
062
063  private URI uri;                                // a hierarchical uri
064
065  /**
066   * Pathnames with scheme and relative path are illegal.
067   */
068  void checkNotSchemeWithRelative() {
069    if (toUri().isAbsolute() && !isUriPathAbsolute()) {
070      throw new HadoopIllegalArgumentException(
071          "Unsupported name: has scheme but relative path-part");
072    }
073  }
074
075  void checkNotRelative() {
076    if (!isAbsolute() && toUri().getScheme() == null) {
077      throw new HadoopIllegalArgumentException("Path is relative");
078    }
079  }
080
081  public static Path getPathWithoutSchemeAndAuthority(Path path) {
082    // This code depends on Path.toString() to remove the leading slash before
083    // the drive specification on Windows.
084    Path newPath = path.isUriPathAbsolute() ?
085      new Path(null, null, path.toUri().getPath()) :
086      path;
087    return newPath;
088  }
089
090  /** Resolve a child path against a parent path. */
091  public Path(String parent, String child) {
092    this(new Path(parent), new Path(child));
093  }
094
095  /** Resolve a child path against a parent path. */
096  public Path(Path parent, String child) {
097    this(parent, new Path(child));
098  }
099
100  /** Resolve a child path against a parent path. */
101  public Path(String parent, Path child) {
102    this(new Path(parent), child);
103  }
104
105  /** Resolve a child path against a parent path. */
106  public Path(Path parent, Path child) {
107    // Add a slash to parent's path so resolution is compatible with URI's
108    URI parentUri = parent.uri;
109    String parentPath = parentUri.getPath();
110    if (!(parentPath.equals("/") || parentPath.isEmpty())) {
111      try {
112        parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(),
113                      parentUri.getPath()+"/", null, parentUri.getFragment());
114      } catch (URISyntaxException e) {
115        throw new IllegalArgumentException(e);
116      }
117    }
118    URI resolved = parentUri.resolve(child.uri);
119    initialize(resolved.getScheme(), resolved.getAuthority(),
120               resolved.getPath(), resolved.getFragment());
121  }
122
123  private void checkPathArg( String path ) throws IllegalArgumentException {
124    // disallow construction of a Path from an empty string
125    if ( path == null ) {
126      throw new IllegalArgumentException(
127          "Can not create a Path from a null string");
128    }
129    if( path.length() == 0 ) {
130       throw new IllegalArgumentException(
131           "Can not create a Path from an empty string");
132    }   
133  }
134  
135  /** Construct a path from a String.  Path strings are URIs, but with
136   * unescaped elements and some additional normalization. */
137  public Path(String pathString) throws IllegalArgumentException {
138    checkPathArg( pathString );
139    
140    // We can't use 'new URI(String)' directly, since it assumes things are
141    // escaped, which we don't require of Paths. 
142    
143    // add a slash in front of paths with Windows drive letters
144    if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') {
145      pathString = "/" + pathString;
146    }
147
148    // parse uri components
149    String scheme = null;
150    String authority = null;
151
152    int start = 0;
153
154    // If there are more than one leading slashes, reduce them to just one
155    // slash. Otherwise, the URI won't be created correctly.
156    // For e.g., //abc will get converted into hdfs://abc while it should be
157    // hdfs:///abc.
158    if (pathString.charAt(0) == '/') {
159      pathString = pathString.replaceFirst("^/+", "/");
160    }
161
162    // parse uri scheme, if any
163    int colon = pathString.indexOf(':');
164    int slash = pathString.indexOf('/');
165    if ((colon != -1) &&
166        ((slash == -1) || (colon < slash))) {     // has a scheme
167      scheme = pathString.substring(0, colon);
168      start = colon+1;
169    }
170
171    // parse uri authority, if any
172    if (pathString.startsWith("//", start) &&
173        (pathString.length()-start > 2)) {       // has authority
174      int nextSlash = pathString.indexOf('/', start+2);
175      int authEnd = nextSlash > 0 ? nextSlash : pathString.length();
176      authority = pathString.substring(start+2, authEnd);
177      start = authEnd;
178    }
179
180    // uri path is the rest of the string -- query & fragment not supported
181    String path = pathString.substring(start, pathString.length());
182
183    initialize(scheme, authority, path, null);
184  }
185
186  /**
187   * Construct a path from a URI
188   */
189  public Path(URI aUri) {
190    uri = aUri.normalize();
191  }
192  
193  /** Construct a Path from components. */
194  public Path(String scheme, String authority, String path) {
195    checkPathArg( path );
196
197    // add a slash in front of paths with Windows drive letters
198    if (hasWindowsDrive(path) && path.charAt(0) != '/') {
199      path = "/" + path;
200    }
201
202    // add "./" in front of Linux relative paths so that a path containing
203    // a colon e.q. "a:b" will not be interpreted as scheme "a".
204    if (!WINDOWS && path.charAt(0) != '/') {
205      path = "./" + path;
206    }
207
208    initialize(scheme, authority, path, null);
209  }
210
211  private void initialize(String scheme, String authority, String path,
212      String fragment) {
213    try {
214      this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment)
215        .normalize();
216    } catch (URISyntaxException e) {
217      throw new IllegalArgumentException(e);
218    }
219  }
220
221  /**
222   * Merge 2 paths such that the second path is appended relative to the first.
223   * The returned path has the scheme and authority of the first path.  On
224   * Windows, the drive specification in the second path is discarded.
225   * 
226   * @param path1 Path first path
227   * @param path2 Path second path, to be appended relative to path1
228   * @return Path merged path
229   */
230  public static Path mergePaths(Path path1, Path path2) {
231    String path2Str = path2.toUri().getPath();
232    path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str));
233    // Add path components explicitly, because simply concatenating two path
234    // string is not safe, for example:
235    // "/" + "/foo" yields "//foo", which will be parsed as authority in Path
236    return new Path(path1.toUri().getScheme(), 
237        path1.toUri().getAuthority(), 
238        path1.toUri().getPath() + path2Str);
239  }
240
241  /**
242   * Normalize a path string to use non-duplicated forward slashes as
243   * the path separator and remove any trailing path separators.
244   * @param scheme Supplies the URI scheme. Used to deduce whether we
245   *               should replace backslashes or not.
246   * @param path Supplies the scheme-specific part
247   * @return Normalized path string.
248   */
249  private static String normalizePath(String scheme, String path) {
250    // Remove double forward slashes.
251    final Matcher fMatcher = fSlashPattern.matcher(path);
252    path = fMatcher.replaceAll("/");
253
254    // Remove backslashes if this looks like a Windows path. Avoid
255    // the substitution if it looks like a non-local URI.
256    if (WINDOWS &&
257        (hasWindowsDrive(path) ||
258         (scheme == null) ||
259         (scheme.isEmpty()) ||
260         (scheme.equals("file")))) {
261      final Matcher bMatcher = bSlashPattern.matcher(path);
262      path = bMatcher.replaceAll("/");
263    }
264    
265    // trim trailing slash from non-root path (ignoring windows drive)
266    int minLength = startPositionWithoutWindowsDrive(path) + 1;
267    if (path.length() > minLength && path.endsWith(SEPARATOR)) {
268      path = path.substring(0, path.length()-1);
269    }
270    
271    return path;
272  }
273
274  private static boolean hasWindowsDrive(String path) {
275    return (WINDOWS && hasDriveLetterSpecifier.matcher(path).find());
276  }
277
278  private static int startPositionWithoutWindowsDrive(String path) {
279    if (hasWindowsDrive(path)) {
280      return path.charAt(0) ==  SEPARATOR_CHAR ? 3 : 2;
281    } else {
282      return 0;
283    }
284  }
285  
286  /**
287   * Determine whether a given path string represents an absolute path on
288   * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not.
289   *
290   * @param pathString Supplies the path string to evaluate.
291   * @param slashed true if the given path is prefixed with "/".
292   * @return true if the supplied path looks like an absolute path with a Windows
293   * drive-specifier.
294   */
295  public static boolean isWindowsAbsolutePath(final String pathString,
296                                              final boolean slashed) {
297    int start = startPositionWithoutWindowsDrive(pathString);
298    return start > 0
299        && pathString.length() > start
300        && ((pathString.charAt(start) == SEPARATOR_CHAR) ||
301            (pathString.charAt(start) == '\\'));
302  }
303
304  /** Convert this to a URI. */
305  public URI toUri() { return uri; }
306
307  /** Return the FileSystem that owns this Path. */
308  public FileSystem getFileSystem(Configuration conf) throws IOException {
309    return FileSystem.get(this.toUri(), conf);
310  }
311
312  /**
313   * Is an absolute path (ie a slash relative path part)
314   *  AND  a scheme is null AND  authority is null.
315   */
316  public boolean isAbsoluteAndSchemeAuthorityNull() {
317    return  (isUriPathAbsolute() && 
318        uri.getScheme() == null && uri.getAuthority() == null);
319  }
320  
321  /**
322   *  True if the path component (i.e. directory) of this URI is absolute.
323   */
324  public boolean isUriPathAbsolute() {
325    int start = startPositionWithoutWindowsDrive(uri.getPath());
326    return uri.getPath().startsWith(SEPARATOR, start);
327   }
328  
329  /** True if the path component of this URI is absolute. */
330  /**
331   * There is some ambiguity here. An absolute path is a slash
332   * relative name without a scheme or an authority.
333   * So either this method was incorrectly named or its
334   * implementation is incorrect. This method returns true
335   * even if there is a scheme and authority.
336   */
337  public boolean isAbsolute() {
338     return isUriPathAbsolute();
339  }
340
341  /**
342   * @return true if and only if this path represents the root of a file system
343   */
344  public boolean isRoot() {
345    return getParent() == null;
346  }
347
348  /** Returns the final component of this path.*/
349  public String getName() {
350    String path = uri.getPath();
351    int slash = path.lastIndexOf(SEPARATOR);
352    return path.substring(slash+1);
353  }
354
355  /** Returns the parent of a path or null if at root. */
356  public Path getParent() {
357    String path = uri.getPath();
358    int lastSlash = path.lastIndexOf('/');
359    int start = startPositionWithoutWindowsDrive(path);
360    if ((path.length() == start) ||               // empty path
361        (lastSlash == start && path.length() == start+1)) { // at root
362      return null;
363    }
364    String parent;
365    if (lastSlash==-1) {
366      parent = CUR_DIR;
367    } else {
368      parent = path.substring(0, lastSlash==start?start+1:lastSlash);
369    }
370    return new Path(uri.getScheme(), uri.getAuthority(), parent);
371  }
372
373  /** Adds a suffix to the final name in the path.*/
374  public Path suffix(String suffix) {
375    return new Path(getParent(), getName()+suffix);
376  }
377
378  @Override
379  public String toString() {
380    // we can't use uri.toString(), which escapes everything, because we want
381    // illegal characters unescaped in the string, for glob processing, etc.
382    StringBuilder buffer = new StringBuilder();
383    if (uri.getScheme() != null) {
384      buffer.append(uri.getScheme());
385      buffer.append(":");
386    }
387    if (uri.getAuthority() != null) {
388      buffer.append("//");
389      buffer.append(uri.getAuthority());
390    } else {
391      // Add the two forward slashes if the input path had it.
392      // This can be checked from the scheme specific part.
393      // E.g., hdfs://a/b will have authority = NULL. But we want to return
394      // the double slashes in the result. This is the behavior of URI.toString.
395      String ssPart = uri.getSchemeSpecificPart();
396      if (ssPart != null
397          && ssPart.length() > 2
398          && ssPart.charAt(0) == '/'
399          && ssPart.charAt(1) == '/')  {
400
401        buffer.append("//");
402      }
403    }
404    if (uri.getPath() != null) {
405      String path = uri.getPath();
406      if (path.indexOf('/')==0 &&
407          hasWindowsDrive(path) &&                // has windows drive
408          uri.getScheme() == null &&              // but no scheme
409          uri.getAuthority() == null)             // or authority
410        path = path.substring(1);                 // remove slash before drive
411      buffer.append(path);
412    }
413    if (uri.getFragment() != null) {
414      buffer.append("#");
415      buffer.append(uri.getFragment());
416    }
417    return buffer.toString();
418  }
419
420  @Override
421  public boolean equals(Object o) {
422    if (!(o instanceof Path)) {
423      return false;
424    }
425    Path that = (Path)o;
426    return this.uri.equals(that.uri);
427  }
428
429  @Override
430  public int hashCode() {
431    return uri.hashCode();
432  }
433
434  @Override
435  public int compareTo(Object o) {
436    Path that = (Path)o;
437    return this.uri.compareTo(that.uri);
438  }
439  
440  /** Return the number of elements in this path. */
441  public int depth() {
442    String path = uri.getPath();
443    int depth = 0;
444    int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0;
445    while (slash != -1) {
446      depth++;
447      slash = path.indexOf(SEPARATOR, slash+1);
448    }
449    return depth;
450  }
451
452  /**
453   *  Returns a qualified path object.
454   *  
455   *  Deprecated - use {@link #makeQualified(URI, Path)}
456   */
457  @Deprecated
458  public Path makeQualified(FileSystem fs) {
459    return makeQualified(fs.getUri(), fs.getWorkingDirectory());
460  }
461  
462  /** Returns a qualified path object. */
463  @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
464  public Path makeQualified(URI defaultUri, Path workingDir ) {
465    Path path = this;
466    if (!isAbsolute()) {
467      path = new Path(workingDir, this);
468    }
469
470    URI pathUri = path.toUri();
471      
472    String scheme = pathUri.getScheme();
473    String authority = pathUri.getAuthority();
474    String fragment = pathUri.getFragment();
475
476    if (scheme != null &&
477        (authority != null || defaultUri.getAuthority() == null))
478      return path;
479
480    if (scheme == null) {
481      scheme = defaultUri.getScheme();
482    }
483
484    if (authority == null) {
485      authority = defaultUri.getAuthority();
486      if (authority == null) {
487        authority = "";
488      }
489    }
490    
491    URI newUri = null;
492    try {
493      newUri = new URI(scheme, authority , 
494        normalizePath(scheme, pathUri.getPath()), null, fragment);
495    } catch (URISyntaxException e) {
496      throw new IllegalArgumentException(e);
497    }
498    return new Path(newUri);
499  }
500}