001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.fs;
020
021 import java.io.IOException;
022 import java.net.URI;
023 import java.net.URISyntaxException;
024
025 import java.util.regex.Matcher;
026 import java.util.regex.Pattern;
027
028 import org.apache.avro.reflect.Stringable;
029 import org.apache.hadoop.HadoopIllegalArgumentException;
030 import org.apache.hadoop.classification.InterfaceAudience;
031 import org.apache.hadoop.classification.InterfaceStability;
032 import org.apache.hadoop.conf.Configuration;
033
034 /** Names a file or directory in a {@link FileSystem}.
035 * Path strings use slash as the directory separator. A path string is
036 * absolute if it begins with a slash.
037 */
038 @Stringable
039 @InterfaceAudience.Public
040 @InterfaceStability.Stable
041 public class Path implements Comparable {
042 // Static pattern objects to improve performance of regex matching.
043 private static final Pattern fSlashPattern = Pattern.compile("//");
044 private static final Pattern bSlashPattern = Pattern.compile("\\\\");
045
046 /** The directory separator, a slash. */
047 public static final String SEPARATOR = "/";
048 public static final char SEPARATOR_CHAR = '/';
049
050 public static final String CUR_DIR = ".";
051
052 public static final boolean WINDOWS
053 = System.getProperty("os.name").startsWith("Windows");
054
055 /**
056 * Pre-compiled regular expressions to detect path formats.
057 */
058 private static final Pattern hasUriScheme =
059 Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]+:");
060 private static final Pattern hasDriveLetterSpecifier =
061 Pattern.compile("^/?[a-zA-Z]:");
062
063 private URI uri; // a hierarchical uri
064
065 /**
066 * Pathnames with scheme and relative path are illegal.
067 * @param path to be checked
068 */
069 void checkNotSchemeWithRelative() {
070 if (toUri().isAbsolute() && !isUriPathAbsolute()) {
071 throw new HadoopIllegalArgumentException(
072 "Unsupported name: has scheme but relative path-part");
073 }
074 }
075
076 void checkNotRelative() {
077 if (!isAbsolute() && toUri().getScheme() == null) {
078 throw new HadoopIllegalArgumentException("Path is relative");
079 }
080 }
081
082 public static Path getPathWithoutSchemeAndAuthority(Path path) {
083 // This code depends on Path.toString() to remove the leading slash before
084 // the drive specification on Windows.
085 Path newPath = path.isUriPathAbsolute() ?
086 new Path(null, null, path.toUri().getPath()) :
087 path;
088 return newPath;
089 }
090
091 /** Resolve a child path against a parent path. */
092 public Path(String parent, String child) {
093 this(new Path(parent), new Path(child));
094 }
095
096 /** Resolve a child path against a parent path. */
097 public Path(Path parent, String child) {
098 this(parent, new Path(child));
099 }
100
101 /** Resolve a child path against a parent path. */
102 public Path(String parent, Path child) {
103 this(new Path(parent), child);
104 }
105
106 /** Resolve a child path against a parent path. */
107 public Path(Path parent, Path child) {
108 // Add a slash to parent's path so resolution is compatible with URI's
109 URI parentUri = parent.uri;
110 String parentPath = parentUri.getPath();
111 if (!(parentPath.equals("/") || parentPath.isEmpty())) {
112 try {
113 parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(),
114 parentUri.getPath()+"/", null, parentUri.getFragment());
115 } catch (URISyntaxException e) {
116 throw new IllegalArgumentException(e);
117 }
118 }
119 URI resolved = parentUri.resolve(child.uri);
120 initialize(resolved.getScheme(), resolved.getAuthority(),
121 resolved.getPath(), resolved.getFragment());
122 }
123
124 private void checkPathArg( String path ) throws IllegalArgumentException {
125 // disallow construction of a Path from an empty string
126 if ( path == null ) {
127 throw new IllegalArgumentException(
128 "Can not create a Path from a null string");
129 }
130 if( path.length() == 0 ) {
131 throw new IllegalArgumentException(
132 "Can not create a Path from an empty string");
133 }
134 }
135
136 /** Construct a path from a String. Path strings are URIs, but with
137 * unescaped elements and some additional normalization. */
138 public Path(String pathString) throws IllegalArgumentException {
139 checkPathArg( pathString );
140
141 // We can't use 'new URI(String)' directly, since it assumes things are
142 // escaped, which we don't require of Paths.
143
144 // add a slash in front of paths with Windows drive letters
145 if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') {
146 pathString = "/" + pathString;
147 }
148
149 // parse uri components
150 String scheme = null;
151 String authority = null;
152
153 int start = 0;
154
155 // If there are more than one leading slashes, reduce them to just one
156 // slash. Otherwise, the URI won't be created correctly.
157 // For e.g., //abc will get converted into hdfs://abc while it should be
158 // hdfs:///abc.
159 if (pathString.charAt(0) == '/') {
160 pathString = pathString.replaceFirst("^/+", "/");
161 }
162
163 // parse uri scheme, if any
164 int colon = pathString.indexOf(':');
165 int slash = pathString.indexOf('/');
166 if ((colon != -1) &&
167 ((slash == -1) || (colon < slash))) { // has a scheme
168 scheme = pathString.substring(0, colon);
169 start = colon+1;
170 }
171
172 // parse uri authority, if any
173 if (pathString.startsWith("//", start) &&
174 (pathString.length()-start > 2)) { // has authority
175 int nextSlash = pathString.indexOf('/', start+2);
176 int authEnd = nextSlash > 0 ? nextSlash : pathString.length();
177 authority = pathString.substring(start+2, authEnd);
178 start = authEnd;
179 }
180
181 // uri path is the rest of the string -- query & fragment not supported
182 String path = pathString.substring(start, pathString.length());
183
184 initialize(scheme, authority, path, null);
185 }
186
187 /**
188 * Construct a path from a URI
189 */
190 public Path(URI aUri) {
191 uri = aUri.normalize();
192 }
193
194 /** Construct a Path from components. */
195 public Path(String scheme, String authority, String path) {
196 checkPathArg( path );
197
198 // add a slash in front of paths with Windows drive letters
199 if (hasWindowsDrive(path) && path.charAt(0) != '/') {
200 path = "/" + path;
201 }
202
203 // add "./" in front of Linux relative paths so that a path containing
204 // a colon e.q. "a:b" will not be interpreted as scheme "a".
205 if (!WINDOWS && path.charAt(0) != '/') {
206 path = "./" + path;
207 }
208
209 initialize(scheme, authority, path, null);
210 }
211
212 private void initialize(String scheme, String authority, String path,
213 String fragment) {
214 try {
215 this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment)
216 .normalize();
217 } catch (URISyntaxException e) {
218 throw new IllegalArgumentException(e);
219 }
220 }
221
222 /**
223 * Merge 2 paths such that the second path is appended relative to the first.
224 * The returned path has the scheme and authority of the first path. On
225 * Windows, the drive specification in the second path is discarded.
226 *
227 * @param path1 Path first path
228 * @param path2 Path second path, to be appended relative to path1
229 * @return Path merged path
230 */
231 public static Path mergePaths(Path path1, Path path2) {
232 String path2Str = path2.toUri().getPath();
233 if(hasWindowsDrive(path2Str)) {
234 path2Str = path2Str.substring(path2Str.indexOf(':')+1);
235 }
236 return new Path(path1 + path2Str);
237 }
238
239 /**
240 * Normalize a path string to use non-duplicated forward slashes as
241 * the path separator and remove any trailing path separators.
242 * @param scheme Supplies the URI scheme. Used to deduce whether we
243 * should replace backslashes or not.
244 * @param path Supplies the scheme-specific part
245 * @return Normalized path string.
246 */
247 private static String normalizePath(String scheme, String path) {
248 // Remove double forward slashes.
249 final Matcher fMatcher = fSlashPattern.matcher(path);
250 path = fMatcher.replaceAll("/");
251
252 // Remove backslashes if this looks like a Windows path. Avoid
253 // the substitution if it looks like a non-local URI.
254 if (WINDOWS &&
255 (hasWindowsDrive(path) ||
256 (scheme == null) ||
257 (scheme.isEmpty()) ||
258 (scheme.equals("file")))) {
259 final Matcher bMatcher = bSlashPattern.matcher(path);
260 path = bMatcher.replaceAll("/");
261 }
262
263 // trim trailing slash from non-root path (ignoring windows drive)
264 int minLength = hasWindowsDrive(path) ? 4 : 1;
265 if (path.length() > minLength && path.endsWith("/")) {
266 path = path.substring(0, path.length()-1);
267 }
268
269 return path;
270 }
271
272 private static boolean hasWindowsDrive(String path) {
273 return (WINDOWS && hasDriveLetterSpecifier.matcher(path).find());
274 }
275
276 /**
277 * Determine whether a given path string represents an absolute path on
278 * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not.
279 *
280 * @param pathString Supplies the path string to evaluate.
281 * @param slashed true if the given path is prefixed with "/".
282 * @return true if the supplied path looks like an absolute path with a Windows
283 * drive-specifier.
284 */
285 public static boolean isWindowsAbsolutePath(final String pathString,
286 final boolean slashed) {
287 int start = (slashed ? 1 : 0);
288
289 return
290 hasWindowsDrive(pathString) &&
291 pathString.length() >= (start + 3) &&
292 ((pathString.charAt(start + 2) == SEPARATOR_CHAR) ||
293 (pathString.charAt(start + 2) == '\\'));
294 }
295
296 /** Convert this to a URI. */
297 public URI toUri() { return uri; }
298
299 /** Return the FileSystem that owns this Path. */
300 public FileSystem getFileSystem(Configuration conf) throws IOException {
301 return FileSystem.get(this.toUri(), conf);
302 }
303
304 /**
305 * Is an absolute path (ie a slash relative path part)
306 * AND a scheme is null AND authority is null.
307 */
308 public boolean isAbsoluteAndSchemeAuthorityNull() {
309 return (isUriPathAbsolute() &&
310 uri.getScheme() == null && uri.getAuthority() == null);
311 }
312
313 /**
314 * True if the path component (i.e. directory) of this URI is absolute.
315 */
316 public boolean isUriPathAbsolute() {
317 int start = hasWindowsDrive(uri.getPath()) ? 3 : 0;
318 return uri.getPath().startsWith(SEPARATOR, start);
319 }
320
321 /** True if the path component of this URI is absolute. */
322 /**
323 * There is some ambiguity here. An absolute path is a slash
324 * relative name without a scheme or an authority.
325 * So either this method was incorrectly named or its
326 * implementation is incorrect. This method returns true
327 * even if there is a scheme and authority.
328 */
329 public boolean isAbsolute() {
330 return isUriPathAbsolute();
331 }
332
333 /**
334 * @return true if and only if this path represents the root of a file system
335 */
336 public boolean isRoot() {
337 return getParent() == null;
338 }
339
340 /** Returns the final component of this path.*/
341 public String getName() {
342 String path = uri.getPath();
343 int slash = path.lastIndexOf(SEPARATOR);
344 return path.substring(slash+1);
345 }
346
347 /** Returns the parent of a path or null if at root. */
348 public Path getParent() {
349 String path = uri.getPath();
350 int lastSlash = path.lastIndexOf('/');
351 int start = hasWindowsDrive(path) ? 3 : 0;
352 if ((path.length() == start) || // empty path
353 (lastSlash == start && path.length() == start+1)) { // at root
354 return null;
355 }
356 String parent;
357 if (lastSlash==-1) {
358 parent = CUR_DIR;
359 } else {
360 int end = hasWindowsDrive(path) ? 3 : 0;
361 parent = path.substring(0, lastSlash==end?end+1:lastSlash);
362 }
363 return new Path(uri.getScheme(), uri.getAuthority(), parent);
364 }
365
366 /** Adds a suffix to the final name in the path.*/
367 public Path suffix(String suffix) {
368 return new Path(getParent(), getName()+suffix);
369 }
370
371 @Override
372 public String toString() {
373 // we can't use uri.toString(), which escapes everything, because we want
374 // illegal characters unescaped in the string, for glob processing, etc.
375 StringBuilder buffer = new StringBuilder();
376 if (uri.getScheme() != null) {
377 buffer.append(uri.getScheme());
378 buffer.append(":");
379 }
380 if (uri.getAuthority() != null) {
381 buffer.append("//");
382 buffer.append(uri.getAuthority());
383 }
384 if (uri.getPath() != null) {
385 String path = uri.getPath();
386 if (path.indexOf('/')==0 &&
387 hasWindowsDrive(path) && // has windows drive
388 uri.getScheme() == null && // but no scheme
389 uri.getAuthority() == null) // or authority
390 path = path.substring(1); // remove slash before drive
391 buffer.append(path);
392 }
393 if (uri.getFragment() != null) {
394 buffer.append("#");
395 buffer.append(uri.getFragment());
396 }
397 return buffer.toString();
398 }
399
400 @Override
401 public boolean equals(Object o) {
402 if (!(o instanceof Path)) {
403 return false;
404 }
405 Path that = (Path)o;
406 return this.uri.equals(that.uri);
407 }
408
409 @Override
410 public int hashCode() {
411 return uri.hashCode();
412 }
413
414 @Override
415 public int compareTo(Object o) {
416 Path that = (Path)o;
417 return this.uri.compareTo(that.uri);
418 }
419
420 /** Return the number of elements in this path. */
421 public int depth() {
422 String path = uri.getPath();
423 int depth = 0;
424 int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0;
425 while (slash != -1) {
426 depth++;
427 slash = path.indexOf(SEPARATOR, slash+1);
428 }
429 return depth;
430 }
431
432 /**
433 * Returns a qualified path object.
434 *
435 * Deprecated - use {@link #makeQualified(URI, Path)}
436 */
437 @Deprecated
438 public Path makeQualified(FileSystem fs) {
439 return makeQualified(fs.getUri(), fs.getWorkingDirectory());
440 }
441
442 /** Returns a qualified path object. */
443 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
444 public Path makeQualified(URI defaultUri, Path workingDir ) {
445 Path path = this;
446 if (!isAbsolute()) {
447 path = new Path(workingDir, this);
448 }
449
450 URI pathUri = path.toUri();
451
452 String scheme = pathUri.getScheme();
453 String authority = pathUri.getAuthority();
454 String fragment = pathUri.getFragment();
455
456 if (scheme != null &&
457 (authority != null || defaultUri.getAuthority() == null))
458 return path;
459
460 if (scheme == null) {
461 scheme = defaultUri.getScheme();
462 }
463
464 if (authority == null) {
465 authority = defaultUri.getAuthority();
466 if (authority == null) {
467 authority = "";
468 }
469 }
470
471 URI newUri = null;
472 try {
473 newUri = new URI(scheme, authority ,
474 normalizePath(scheme, pathUri.getPath()), null, fragment);
475 } catch (URISyntaxException e) {
476 throw new IllegalArgumentException(e);
477 }
478 return new Path(newUri);
479 }
480 }