001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.fs;
020    
021    import java.io.IOException;
022    import java.net.URI;
023    import java.net.URISyntaxException;
024    
025    import java.util.regex.Matcher;
026    import java.util.regex.Pattern;
027    
028    import org.apache.avro.reflect.Stringable;
029    import org.apache.hadoop.HadoopIllegalArgumentException;
030    import org.apache.hadoop.classification.InterfaceAudience;
031    import org.apache.hadoop.classification.InterfaceStability;
032    import org.apache.hadoop.conf.Configuration;
033    
034    /** Names a file or directory in a {@link FileSystem}.
035     * Path strings use slash as the directory separator.  A path string is
036     * absolute if it begins with a slash.
037     */
038    @Stringable
039    @InterfaceAudience.Public
040    @InterfaceStability.Stable
041    public class Path implements Comparable {
042      // Static pattern objects to improve performance of regex matching.
043      private static final Pattern fSlashPattern = Pattern.compile("//");
044      private static final Pattern bSlashPattern = Pattern.compile("\\\\");
045    
046      /** The directory separator, a slash. */
047      public static final String SEPARATOR = "/";
048      public static final char SEPARATOR_CHAR = '/';
049      
050      public static final String CUR_DIR = ".";
051      
052      public static final boolean WINDOWS
053        = System.getProperty("os.name").startsWith("Windows");
054    
055      /**
056       *  Pre-compiled regular expressions to detect path formats.
057       */
058      private static final Pattern hasUriScheme =
059          Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]+:");
060      private static final Pattern hasDriveLetterSpecifier =
061          Pattern.compile("^/?[a-zA-Z]:");
062    
063      private URI uri;                                // a hierarchical uri
064    
065      /**
066       * Pathnames with scheme and relative path are illegal.
067       * @param path to be checked
068       */
069      void checkNotSchemeWithRelative() {
070        if (toUri().isAbsolute() && !isUriPathAbsolute()) {
071          throw new HadoopIllegalArgumentException(
072              "Unsupported name: has scheme but relative path-part");
073        }
074      }
075    
076      void checkNotRelative() {
077        if (!isAbsolute() && toUri().getScheme() == null) {
078          throw new HadoopIllegalArgumentException("Path is relative");
079        }
080      }
081    
082      public static Path getPathWithoutSchemeAndAuthority(Path path) {
083        // This code depends on Path.toString() to remove the leading slash before
084        // the drive specification on Windows.
085        Path newPath = path.isUriPathAbsolute() ?
086          new Path(null, null, path.toUri().getPath()) :
087          path;
088        return newPath;
089      }
090    
091      /** Resolve a child path against a parent path. */
092      public Path(String parent, String child) {
093        this(new Path(parent), new Path(child));
094      }
095    
096      /** Resolve a child path against a parent path. */
097      public Path(Path parent, String child) {
098        this(parent, new Path(child));
099      }
100    
101      /** Resolve a child path against a parent path. */
102      public Path(String parent, Path child) {
103        this(new Path(parent), child);
104      }
105    
106      /** Resolve a child path against a parent path. */
107      public Path(Path parent, Path child) {
108        // Add a slash to parent's path so resolution is compatible with URI's
109        URI parentUri = parent.uri;
110        String parentPath = parentUri.getPath();
111        if (!(parentPath.equals("/") || parentPath.isEmpty())) {
112          try {
113            parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(),
114                          parentUri.getPath()+"/", null, parentUri.getFragment());
115          } catch (URISyntaxException e) {
116            throw new IllegalArgumentException(e);
117          }
118        }
119        URI resolved = parentUri.resolve(child.uri);
120        initialize(resolved.getScheme(), resolved.getAuthority(),
121                   resolved.getPath(), resolved.getFragment());
122      }
123    
124      private void checkPathArg( String path ) throws IllegalArgumentException {
125        // disallow construction of a Path from an empty string
126        if ( path == null ) {
127          throw new IllegalArgumentException(
128              "Can not create a Path from a null string");
129        }
130        if( path.length() == 0 ) {
131           throw new IllegalArgumentException(
132               "Can not create a Path from an empty string");
133        }   
134      }
135      
136      /** Construct a path from a String.  Path strings are URIs, but with
137       * unescaped elements and some additional normalization. */
138      public Path(String pathString) throws IllegalArgumentException {
139        checkPathArg( pathString );
140        
141        // We can't use 'new URI(String)' directly, since it assumes things are
142        // escaped, which we don't require of Paths. 
143        
144        // add a slash in front of paths with Windows drive letters
145        if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') {
146          pathString = "/" + pathString;
147        }
148    
149        // parse uri components
150        String scheme = null;
151        String authority = null;
152    
153        int start = 0;
154    
155        // If there are more than one leading slashes, reduce them to just one
156        // slash. Otherwise, the URI won't be created correctly.
157        // For e.g., //abc will get converted into hdfs://abc while it should be
158        // hdfs:///abc.
159        if (pathString.charAt(0) == '/') {
160          pathString = pathString.replaceFirst("^/+", "/");
161        }
162    
163        // parse uri scheme, if any
164        int colon = pathString.indexOf(':');
165        int slash = pathString.indexOf('/');
166        if ((colon != -1) &&
167            ((slash == -1) || (colon < slash))) {     // has a scheme
168          scheme = pathString.substring(0, colon);
169          start = colon+1;
170        }
171    
172        // parse uri authority, if any
173        if (pathString.startsWith("//", start) &&
174            (pathString.length()-start > 2)) {       // has authority
175          int nextSlash = pathString.indexOf('/', start+2);
176          int authEnd = nextSlash > 0 ? nextSlash : pathString.length();
177          authority = pathString.substring(start+2, authEnd);
178          start = authEnd;
179        }
180    
181        // uri path is the rest of the string -- query & fragment not supported
182        String path = pathString.substring(start, pathString.length());
183    
184        initialize(scheme, authority, path, null);
185      }
186    
187      /**
188       * Construct a path from a URI
189       */
190      public Path(URI aUri) {
191        uri = aUri.normalize();
192      }
193      
194      /** Construct a Path from components. */
195      public Path(String scheme, String authority, String path) {
196        checkPathArg( path );
197    
198        // add a slash in front of paths with Windows drive letters
199        if (hasWindowsDrive(path) && path.charAt(0) != '/') {
200          path = "/" + path;
201        }
202    
203        // add "./" in front of Linux relative paths so that a path containing
204        // a colon e.q. "a:b" will not be interpreted as scheme "a".
205        if (!WINDOWS && path.charAt(0) != '/') {
206          path = "./" + path;
207        }
208    
209        initialize(scheme, authority, path, null);
210      }
211    
212      private void initialize(String scheme, String authority, String path,
213          String fragment) {
214        try {
215          this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment)
216            .normalize();
217        } catch (URISyntaxException e) {
218          throw new IllegalArgumentException(e);
219        }
220      }
221    
222      /**
223       * Merge 2 paths such that the second path is appended relative to the first.
224       * The returned path has the scheme and authority of the first path.  On
225       * Windows, the drive specification in the second path is discarded.
226       * 
227       * @param path1 Path first path
228       * @param path2 Path second path, to be appended relative to path1
229       * @return Path merged path
230       */
231      public static Path mergePaths(Path path1, Path path2) {
232        String path2Str = path2.toUri().getPath();
233        if(hasWindowsDrive(path2Str)) {
234          path2Str = path2Str.substring(path2Str.indexOf(':')+1);
235        }
236        return new Path(path1 + path2Str);
237      }
238    
239      /**
240       * Normalize a path string to use non-duplicated forward slashes as
241       * the path separator and remove any trailing path separators.
242       * @param scheme Supplies the URI scheme. Used to deduce whether we
243       *               should replace backslashes or not.
244       * @param path Supplies the scheme-specific part
245       * @return Normalized path string.
246       */
247      private static String normalizePath(String scheme, String path) {
248        // Remove double forward slashes.
249        final Matcher fMatcher = fSlashPattern.matcher(path);
250        path = fMatcher.replaceAll("/");
251    
252        // Remove backslashes if this looks like a Windows path. Avoid
253        // the substitution if it looks like a non-local URI.
254        if (WINDOWS &&
255            (hasWindowsDrive(path) ||
256             (scheme == null) ||
257             (scheme.isEmpty()) ||
258             (scheme.equals("file")))) {
259          final Matcher bMatcher = bSlashPattern.matcher(path);
260          path = bMatcher.replaceAll("/");
261        }
262        
263        // trim trailing slash from non-root path (ignoring windows drive)
264        int minLength = hasWindowsDrive(path) ? 4 : 1;
265        if (path.length() > minLength && path.endsWith("/")) {
266          path = path.substring(0, path.length()-1);
267        }
268        
269        return path;
270      }
271    
272      private static boolean hasWindowsDrive(String path) {
273        return (WINDOWS && hasDriveLetterSpecifier.matcher(path).find());
274      }
275    
276      /**
277       * Determine whether a given path string represents an absolute path on
278       * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not.
279       *
280       * @param pathString Supplies the path string to evaluate.
281       * @param slashed true if the given path is prefixed with "/".
282       * @return true if the supplied path looks like an absolute path with a Windows
283       * drive-specifier.
284       */
285      public static boolean isWindowsAbsolutePath(final String pathString,
286                                                  final boolean slashed) {
287        int start = (slashed ? 1 : 0);
288    
289        return
290            hasWindowsDrive(pathString) &&
291            pathString.length() >= (start + 3) &&
292            ((pathString.charAt(start + 2) == SEPARATOR_CHAR) ||
293              (pathString.charAt(start + 2) == '\\'));
294      }
295    
296      /** Convert this to a URI. */
297      public URI toUri() { return uri; }
298    
299      /** Return the FileSystem that owns this Path. */
300      public FileSystem getFileSystem(Configuration conf) throws IOException {
301        return FileSystem.get(this.toUri(), conf);
302      }
303    
304      /**
305       * Is an absolute path (ie a slash relative path part)
306       *  AND  a scheme is null AND  authority is null.
307       */
308      public boolean isAbsoluteAndSchemeAuthorityNull() {
309        return  (isUriPathAbsolute() && 
310            uri.getScheme() == null && uri.getAuthority() == null);
311      }
312      
313      /**
314       *  True if the path component (i.e. directory) of this URI is absolute.
315       */
316      public boolean isUriPathAbsolute() {
317        int start = hasWindowsDrive(uri.getPath()) ? 3 : 0;
318        return uri.getPath().startsWith(SEPARATOR, start);
319       }
320      
321      /** True if the path component of this URI is absolute. */
322      /**
323       * There is some ambiguity here. An absolute path is a slash
324       * relative name without a scheme or an authority.
325       * So either this method was incorrectly named or its
326       * implementation is incorrect. This method returns true
327       * even if there is a scheme and authority.
328       */
329      public boolean isAbsolute() {
330         return isUriPathAbsolute();
331      }
332    
333      /**
334       * @return true if and only if this path represents the root of a file system
335       */
336      public boolean isRoot() {
337        return getParent() == null;
338      }
339    
340      /** Returns the final component of this path.*/
341      public String getName() {
342        String path = uri.getPath();
343        int slash = path.lastIndexOf(SEPARATOR);
344        return path.substring(slash+1);
345      }
346    
347      /** Returns the parent of a path or null if at root. */
348      public Path getParent() {
349        String path = uri.getPath();
350        int lastSlash = path.lastIndexOf('/');
351        int start = hasWindowsDrive(path) ? 3 : 0;
352        if ((path.length() == start) ||               // empty path
353            (lastSlash == start && path.length() == start+1)) { // at root
354          return null;
355        }
356        String parent;
357        if (lastSlash==-1) {
358          parent = CUR_DIR;
359        } else {
360          int end = hasWindowsDrive(path) ? 3 : 0;
361          parent = path.substring(0, lastSlash==end?end+1:lastSlash);
362        }
363        return new Path(uri.getScheme(), uri.getAuthority(), parent);
364      }
365    
366      /** Adds a suffix to the final name in the path.*/
367      public Path suffix(String suffix) {
368        return new Path(getParent(), getName()+suffix);
369      }
370    
371      @Override
372      public String toString() {
373        // we can't use uri.toString(), which escapes everything, because we want
374        // illegal characters unescaped in the string, for glob processing, etc.
375        StringBuilder buffer = new StringBuilder();
376        if (uri.getScheme() != null) {
377          buffer.append(uri.getScheme());
378          buffer.append(":");
379        }
380        if (uri.getAuthority() != null) {
381          buffer.append("//");
382          buffer.append(uri.getAuthority());
383        }
384        if (uri.getPath() != null) {
385          String path = uri.getPath();
386          if (path.indexOf('/')==0 &&
387              hasWindowsDrive(path) &&                // has windows drive
388              uri.getScheme() == null &&              // but no scheme
389              uri.getAuthority() == null)             // or authority
390            path = path.substring(1);                 // remove slash before drive
391          buffer.append(path);
392        }
393        if (uri.getFragment() != null) {
394          buffer.append("#");
395          buffer.append(uri.getFragment());
396        }
397        return buffer.toString();
398      }
399    
400      @Override
401      public boolean equals(Object o) {
402        if (!(o instanceof Path)) {
403          return false;
404        }
405        Path that = (Path)o;
406        return this.uri.equals(that.uri);
407      }
408    
409      @Override
410      public int hashCode() {
411        return uri.hashCode();
412      }
413    
414      @Override
415      public int compareTo(Object o) {
416        Path that = (Path)o;
417        return this.uri.compareTo(that.uri);
418      }
419      
420      /** Return the number of elements in this path. */
421      public int depth() {
422        String path = uri.getPath();
423        int depth = 0;
424        int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0;
425        while (slash != -1) {
426          depth++;
427          slash = path.indexOf(SEPARATOR, slash+1);
428        }
429        return depth;
430      }
431    
432      /**
433       *  Returns a qualified path object.
434       *  
435       *  Deprecated - use {@link #makeQualified(URI, Path)}
436       */
437      @Deprecated
438      public Path makeQualified(FileSystem fs) {
439        return makeQualified(fs.getUri(), fs.getWorkingDirectory());
440      }
441      
442      /** Returns a qualified path object. */
443      @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
444      public Path makeQualified(URI defaultUri, Path workingDir ) {
445        Path path = this;
446        if (!isAbsolute()) {
447          path = new Path(workingDir, this);
448        }
449    
450        URI pathUri = path.toUri();
451          
452        String scheme = pathUri.getScheme();
453        String authority = pathUri.getAuthority();
454        String fragment = pathUri.getFragment();
455    
456        if (scheme != null &&
457            (authority != null || defaultUri.getAuthority() == null))
458          return path;
459    
460        if (scheme == null) {
461          scheme = defaultUri.getScheme();
462        }
463    
464        if (authority == null) {
465          authority = defaultUri.getAuthority();
466          if (authority == null) {
467            authority = "";
468          }
469        }
470        
471        URI newUri = null;
472        try {
473          newUri = new URI(scheme, authority , 
474            normalizePath(scheme, pathUri.getPath()), null, fragment);
475        } catch (URISyntaxException e) {
476          throw new IllegalArgumentException(e);
477        }
478        return new Path(newUri);
479      }
480    }