001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.fs;
020
021import java.io.IOException;
022import java.net.URI;
023import java.net.URISyntaxException;
024
025import java.util.regex.Matcher;
026import java.util.regex.Pattern;
027
028import org.apache.avro.reflect.Stringable;
029import org.apache.hadoop.HadoopIllegalArgumentException;
030import org.apache.hadoop.classification.InterfaceAudience;
031import org.apache.hadoop.classification.InterfaceStability;
032import org.apache.hadoop.conf.Configuration;
033
034/** Names a file or directory in a {@link FileSystem}.
035 * Path strings use slash as the directory separator.  A path string is
036 * absolute if it begins with a slash.
037 */
038@Stringable
039@InterfaceAudience.Public
040@InterfaceStability.Stable
041public class Path implements Comparable {
042  // Static pattern objects to improve performance of regex matching.
043  private static final Pattern fSlashPattern = Pattern.compile("//");
044  private static final Pattern bSlashPattern = Pattern.compile("\\\\");
045
046  /** The directory separator, a slash. */
047  public static final String SEPARATOR = "/";
048  public static final char SEPARATOR_CHAR = '/';
049  
050  public static final String CUR_DIR = ".";
051  
052  public static final boolean WINDOWS
053    = System.getProperty("os.name").startsWith("Windows");
054
055  /**
056   *  Pre-compiled regular expressions to detect path formats.
057   */
058  private static final Pattern hasUriScheme =
059      Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]+:");
060  private static final Pattern hasDriveLetterSpecifier =
061      Pattern.compile("^/?[a-zA-Z]:");
062
063  private URI uri;                                // a hierarchical uri
064
065  /**
066   * Pathnames with scheme and relative path are illegal.
067   * @param path to be checked
068   */
069  void checkNotSchemeWithRelative() {
070    if (toUri().isAbsolute() && !isUriPathAbsolute()) {
071      throw new HadoopIllegalArgumentException(
072          "Unsupported name: has scheme but relative path-part");
073    }
074  }
075
076  void checkNotRelative() {
077    if (!isAbsolute() && toUri().getScheme() == null) {
078      throw new HadoopIllegalArgumentException("Path is relative");
079    }
080  }
081
082  public static Path getPathWithoutSchemeAndAuthority(Path path) {
083    // This code depends on Path.toString() to remove the leading slash before
084    // the drive specification on Windows.
085    Path newPath = path.isUriPathAbsolute() ?
086      new Path(null, null, path.toUri().getPath()) :
087      path;
088    return newPath;
089  }
090
091  /** Resolve a child path against a parent path. */
092  public Path(String parent, String child) {
093    this(new Path(parent), new Path(child));
094  }
095
096  /** Resolve a child path against a parent path. */
097  public Path(Path parent, String child) {
098    this(parent, new Path(child));
099  }
100
101  /** Resolve a child path against a parent path. */
102  public Path(String parent, Path child) {
103    this(new Path(parent), child);
104  }
105
106  /** Resolve a child path against a parent path. */
107  public Path(Path parent, Path child) {
108    // Add a slash to parent's path so resolution is compatible with URI's
109    URI parentUri = parent.uri;
110    String parentPath = parentUri.getPath();
111    if (!(parentPath.equals("/") || parentPath.isEmpty())) {
112      try {
113        parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(),
114                      parentUri.getPath()+"/", null, parentUri.getFragment());
115      } catch (URISyntaxException e) {
116        throw new IllegalArgumentException(e);
117      }
118    }
119    URI resolved = parentUri.resolve(child.uri);
120    initialize(resolved.getScheme(), resolved.getAuthority(),
121               resolved.getPath(), resolved.getFragment());
122  }
123
124  private void checkPathArg( String path ) throws IllegalArgumentException {
125    // disallow construction of a Path from an empty string
126    if ( path == null ) {
127      throw new IllegalArgumentException(
128          "Can not create a Path from a null string");
129    }
130    if( path.length() == 0 ) {
131       throw new IllegalArgumentException(
132           "Can not create a Path from an empty string");
133    }   
134  }
135  
136  /** Construct a path from a String.  Path strings are URIs, but with
137   * unescaped elements and some additional normalization. */
138  public Path(String pathString) throws IllegalArgumentException {
139    checkPathArg( pathString );
140    
141    // We can't use 'new URI(String)' directly, since it assumes things are
142    // escaped, which we don't require of Paths. 
143    
144    // add a slash in front of paths with Windows drive letters
145    if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') {
146      pathString = "/" + pathString;
147    }
148
149    // parse uri components
150    String scheme = null;
151    String authority = null;
152
153    int start = 0;
154
155    // If there are more than one leading slashes, reduce them to just one
156    // slash. Otherwise, the URI won't be created correctly.
157    // For e.g., //abc will get converted into hdfs://abc while it should be
158    // hdfs:///abc.
159    if (pathString.charAt(0) == '/') {
160      pathString = pathString.replaceFirst("^/+", "/");
161    }
162
163    // parse uri scheme, if any
164    int colon = pathString.indexOf(':');
165    int slash = pathString.indexOf('/');
166    if ((colon != -1) &&
167        ((slash == -1) || (colon < slash))) {     // has a scheme
168      scheme = pathString.substring(0, colon);
169      start = colon+1;
170    }
171
172    // parse uri authority, if any
173    if (pathString.startsWith("//", start) &&
174        (pathString.length()-start > 2)) {       // has authority
175      int nextSlash = pathString.indexOf('/', start+2);
176      int authEnd = nextSlash > 0 ? nextSlash : pathString.length();
177      authority = pathString.substring(start+2, authEnd);
178      start = authEnd;
179    }
180
181    // uri path is the rest of the string -- query & fragment not supported
182    String path = pathString.substring(start, pathString.length());
183
184    initialize(scheme, authority, path, null);
185  }
186
187  /**
188   * Construct a path from a URI
189   */
190  public Path(URI aUri) {
191    uri = aUri.normalize();
192  }
193  
194  /** Construct a Path from components. */
195  public Path(String scheme, String authority, String path) {
196    checkPathArg( path );
197
198    // add a slash in front of paths with Windows drive letters
199    if (hasWindowsDrive(path) && path.charAt(0) != '/') {
200      path = "/" + path;
201    }
202
203    // add "./" in front of Linux relative paths so that a path containing
204    // a colon e.q. "a:b" will not be interpreted as scheme "a".
205    if (!WINDOWS && path.charAt(0) != '/') {
206      path = "./" + path;
207    }
208
209    initialize(scheme, authority, path, null);
210  }
211
212  private void initialize(String scheme, String authority, String path,
213      String fragment) {
214    try {
215      this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment)
216        .normalize();
217    } catch (URISyntaxException e) {
218      throw new IllegalArgumentException(e);
219    }
220  }
221
222  /**
223   * Merge 2 paths such that the second path is appended relative to the first.
224   * The returned path has the scheme and authority of the first path.  On
225   * Windows, the drive specification in the second path is discarded.
226   * 
227   * @param path1 Path first path
228   * @param path2 Path second path, to be appended relative to path1
229   * @return Path merged path
230   */
231  public static Path mergePaths(Path path1, Path path2) {
232    String path2Str = path2.toUri().getPath();
233    path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str));
234    // Add path components explicitly, because simply concatenating two path
235    // string is not safe, for example:
236    // "/" + "/foo" yields "//foo", which will be parsed as authority in Path
237    return new Path(path1.toUri().getScheme(), 
238        path1.toUri().getAuthority(), 
239        path1.toUri().getPath() + path2Str);
240  }
241
242  /**
243   * Normalize a path string to use non-duplicated forward slashes as
244   * the path separator and remove any trailing path separators.
245   * @param scheme Supplies the URI scheme. Used to deduce whether we
246   *               should replace backslashes or not.
247   * @param path Supplies the scheme-specific part
248   * @return Normalized path string.
249   */
250  private static String normalizePath(String scheme, String path) {
251    // Remove double forward slashes.
252    final Matcher fMatcher = fSlashPattern.matcher(path);
253    path = fMatcher.replaceAll("/");
254
255    // Remove backslashes if this looks like a Windows path. Avoid
256    // the substitution if it looks like a non-local URI.
257    if (WINDOWS &&
258        (hasWindowsDrive(path) ||
259         (scheme == null) ||
260         (scheme.isEmpty()) ||
261         (scheme.equals("file")))) {
262      final Matcher bMatcher = bSlashPattern.matcher(path);
263      path = bMatcher.replaceAll("/");
264    }
265    
266    // trim trailing slash from non-root path (ignoring windows drive)
267    int minLength = startPositionWithoutWindowsDrive(path) + 1;
268    if (path.length() > minLength && path.endsWith(SEPARATOR)) {
269      path = path.substring(0, path.length()-1);
270    }
271    
272    return path;
273  }
274
275  private static boolean hasWindowsDrive(String path) {
276    return (WINDOWS && hasDriveLetterSpecifier.matcher(path).find());
277  }
278
279  private static int startPositionWithoutWindowsDrive(String path) {
280    if (hasWindowsDrive(path)) {
281      return path.charAt(0) ==  SEPARATOR_CHAR ? 3 : 2;
282    } else {
283      return 0;
284    }
285  }
286  
287  /**
288   * Determine whether a given path string represents an absolute path on
289   * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not.
290   *
291   * @param pathString Supplies the path string to evaluate.
292   * @param slashed true if the given path is prefixed with "/".
293   * @return true if the supplied path looks like an absolute path with a Windows
294   * drive-specifier.
295   */
296  public static boolean isWindowsAbsolutePath(final String pathString,
297                                              final boolean slashed) {
298    int start = startPositionWithoutWindowsDrive(pathString);
299    return start > 0
300        && pathString.length() > start
301        && ((pathString.charAt(start) == SEPARATOR_CHAR) ||
302            (pathString.charAt(start) == '\\'));
303  }
304
305  /** Convert this to a URI. */
306  public URI toUri() { return uri; }
307
308  /** Return the FileSystem that owns this Path. */
309  public FileSystem getFileSystem(Configuration conf) throws IOException {
310    return FileSystem.get(this.toUri(), conf);
311  }
312
313  /**
314   * Is an absolute path (ie a slash relative path part)
315   *  AND  a scheme is null AND  authority is null.
316   */
317  public boolean isAbsoluteAndSchemeAuthorityNull() {
318    return  (isUriPathAbsolute() && 
319        uri.getScheme() == null && uri.getAuthority() == null);
320  }
321  
322  /**
323   *  True if the path component (i.e. directory) of this URI is absolute.
324   */
325  public boolean isUriPathAbsolute() {
326    int start = startPositionWithoutWindowsDrive(uri.getPath());
327    return uri.getPath().startsWith(SEPARATOR, start);
328   }
329  
330  /** True if the path component of this URI is absolute. */
331  /**
332   * There is some ambiguity here. An absolute path is a slash
333   * relative name without a scheme or an authority.
334   * So either this method was incorrectly named or its
335   * implementation is incorrect. This method returns true
336   * even if there is a scheme and authority.
337   */
338  public boolean isAbsolute() {
339     return isUriPathAbsolute();
340  }
341
342  /**
343   * @return true if and only if this path represents the root of a file system
344   */
345  public boolean isRoot() {
346    return getParent() == null;
347  }
348
349  /** Returns the final component of this path.*/
350  public String getName() {
351    String path = uri.getPath();
352    int slash = path.lastIndexOf(SEPARATOR);
353    return path.substring(slash+1);
354  }
355
356  /** Returns the parent of a path or null if at root. */
357  public Path getParent() {
358    String path = uri.getPath();
359    int lastSlash = path.lastIndexOf('/');
360    int start = startPositionWithoutWindowsDrive(path);
361    if ((path.length() == start) ||               // empty path
362        (lastSlash == start && path.length() == start+1)) { // at root
363      return null;
364    }
365    String parent;
366    if (lastSlash==-1) {
367      parent = CUR_DIR;
368    } else {
369      parent = path.substring(0, lastSlash==start?start+1:lastSlash);
370    }
371    return new Path(uri.getScheme(), uri.getAuthority(), parent);
372  }
373
374  /** Adds a suffix to the final name in the path.*/
375  public Path suffix(String suffix) {
376    return new Path(getParent(), getName()+suffix);
377  }
378
379  @Override
380  public String toString() {
381    // we can't use uri.toString(), which escapes everything, because we want
382    // illegal characters unescaped in the string, for glob processing, etc.
383    StringBuilder buffer = new StringBuilder();
384    if (uri.getScheme() != null) {
385      buffer.append(uri.getScheme());
386      buffer.append(":");
387    }
388    if (uri.getAuthority() != null) {
389      buffer.append("//");
390      buffer.append(uri.getAuthority());
391    }
392    if (uri.getPath() != null) {
393      String path = uri.getPath();
394      if (path.indexOf('/')==0 &&
395          hasWindowsDrive(path) &&                // has windows drive
396          uri.getScheme() == null &&              // but no scheme
397          uri.getAuthority() == null)             // or authority
398        path = path.substring(1);                 // remove slash before drive
399      buffer.append(path);
400    }
401    if (uri.getFragment() != null) {
402      buffer.append("#");
403      buffer.append(uri.getFragment());
404    }
405    return buffer.toString();
406  }
407
408  @Override
409  public boolean equals(Object o) {
410    if (!(o instanceof Path)) {
411      return false;
412    }
413    Path that = (Path)o;
414    return this.uri.equals(that.uri);
415  }
416
417  @Override
418  public int hashCode() {
419    return uri.hashCode();
420  }
421
422  @Override
423  public int compareTo(Object o) {
424    Path that = (Path)o;
425    return this.uri.compareTo(that.uri);
426  }
427  
428  /** Return the number of elements in this path. */
429  public int depth() {
430    String path = uri.getPath();
431    int depth = 0;
432    int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0;
433    while (slash != -1) {
434      depth++;
435      slash = path.indexOf(SEPARATOR, slash+1);
436    }
437    return depth;
438  }
439
440  /**
441   *  Returns a qualified path object.
442   *  
443   *  Deprecated - use {@link #makeQualified(URI, Path)}
444   */
445  @Deprecated
446  public Path makeQualified(FileSystem fs) {
447    return makeQualified(fs.getUri(), fs.getWorkingDirectory());
448  }
449  
450  /** Returns a qualified path object. */
451  @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
452  public Path makeQualified(URI defaultUri, Path workingDir ) {
453    Path path = this;
454    if (!isAbsolute()) {
455      path = new Path(workingDir, this);
456    }
457
458    URI pathUri = path.toUri();
459      
460    String scheme = pathUri.getScheme();
461    String authority = pathUri.getAuthority();
462    String fragment = pathUri.getFragment();
463
464    if (scheme != null &&
465        (authority != null || defaultUri.getAuthority() == null))
466      return path;
467
468    if (scheme == null) {
469      scheme = defaultUri.getScheme();
470    }
471
472    if (authority == null) {
473      authority = defaultUri.getAuthority();
474      if (authority == null) {
475        authority = "";
476      }
477    }
478    
479    URI newUri = null;
480    try {
481      newUri = new URI(scheme, authority , 
482        normalizePath(scheme, pathUri.getPath()), null, fragment);
483    } catch (URISyntaxException e) {
484      throw new IllegalArgumentException(e);
485    }
486    return new Path(newUri);
487  }
488}