001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.fs; 019 020import java.io.Closeable; 021import java.io.FileNotFoundException; 022import java.io.IOException; 023import java.lang.ref.WeakReference; 024 025import java.net.InetSocketAddress; 026import java.net.URI; 027import java.net.URISyntaxException; 028import java.security.PrivilegedExceptionAction; 029import java.util.ArrayList; 030import java.util.Arrays; 031import java.util.EnumSet; 032import java.util.HashMap; 033import java.util.HashSet; 034import java.util.IdentityHashMap; 035import java.util.Iterator; 036import java.util.LinkedList; 037import java.util.List; 038import java.util.Map; 039import java.util.NoSuchElementException; 040import java.util.ServiceLoader; 041import java.util.Set; 042import java.util.Stack; 043import java.util.TreeSet; 044import java.util.concurrent.atomic.AtomicLong; 045 046import org.apache.commons.logging.Log; 047import org.apache.commons.logging.LogFactory; 048import org.apache.hadoop.classification.InterfaceAudience; 049import org.apache.hadoop.classification.InterfaceStability; 050import org.apache.hadoop.conf.Configuration; 051import org.apache.hadoop.conf.Configured; 052import org.apache.hadoop.fs.Options.ChecksumOpt; 053import org.apache.hadoop.fs.Options.Rename; 054import org.apache.hadoop.fs.permission.AclEntry; 055import org.apache.hadoop.fs.permission.AclStatus; 056import org.apache.hadoop.fs.permission.FsAction; 057import org.apache.hadoop.fs.permission.FsPermission; 058import org.apache.hadoop.io.MultipleIOException; 059import org.apache.hadoop.io.Text; 060import org.apache.hadoop.net.NetUtils; 061import org.apache.hadoop.security.AccessControlException; 062import org.apache.hadoop.security.Credentials; 063import org.apache.hadoop.security.SecurityUtil; 064import org.apache.hadoop.security.UserGroupInformation; 065import org.apache.hadoop.security.token.Token; 066import org.apache.hadoop.util.DataChecksum; 067import org.apache.hadoop.util.Progressable; 068import org.apache.hadoop.util.ReflectionUtils; 069import org.apache.hadoop.util.ShutdownHookManager; 070import org.apache.hadoop.util.StringUtils; 071 072import com.google.common.annotations.VisibleForTesting; 073 074/**************************************************************** 075 * An abstract base class for a fairly generic filesystem. It 076 * may be implemented as a distributed filesystem, or as a "local" 077 * one that reflects the locally-connected disk. The local version 078 * exists for small Hadoop instances and for testing. 079 * 080 * <p> 081 * 082 * All user code that may potentially use the Hadoop Distributed 083 * File System should be written to use a FileSystem object. The 084 * Hadoop DFS is a multi-machine system that appears as a single 085 * disk. It's useful because of its fault tolerance and potentially 086 * very large capacity. 087 * 088 * <p> 089 * The local implementation is {@link LocalFileSystem} and distributed 090 * implementation is DistributedFileSystem. 091 *****************************************************************/ 092@InterfaceAudience.Public 093@InterfaceStability.Stable 094public abstract class FileSystem extends Configured implements Closeable { 095 public static final String FS_DEFAULT_NAME_KEY = 096 CommonConfigurationKeys.FS_DEFAULT_NAME_KEY; 097 public static final String DEFAULT_FS = 098 CommonConfigurationKeys.FS_DEFAULT_NAME_DEFAULT; 099 100 public static final Log LOG = LogFactory.getLog(FileSystem.class); 101 102 /** 103 * Priority of the FileSystem shutdown hook. 104 */ 105 public static final int SHUTDOWN_HOOK_PRIORITY = 10; 106 107 /** FileSystem cache */ 108 static final Cache CACHE = new Cache(); 109 110 /** The key this instance is stored under in the cache. */ 111 private Cache.Key key; 112 113 /** Recording statistics per a FileSystem class */ 114 private static final Map<Class<? extends FileSystem>, Statistics> 115 statisticsTable = 116 new IdentityHashMap<Class<? extends FileSystem>, Statistics>(); 117 118 /** 119 * The statistics for this file system. 120 */ 121 protected Statistics statistics; 122 123 /** 124 * A cache of files that should be deleted when filsystem is closed 125 * or the JVM is exited. 126 */ 127 private Set<Path> deleteOnExit = new TreeSet<Path>(); 128 129 boolean resolveSymlinks; 130 /** 131 * This method adds a file system for testing so that we can find it later. It 132 * is only for testing. 133 * @param uri the uri to store it under 134 * @param conf the configuration to store it under 135 * @param fs the file system to store 136 * @throws IOException 137 */ 138 static void addFileSystemForTesting(URI uri, Configuration conf, 139 FileSystem fs) throws IOException { 140 CACHE.map.put(new Cache.Key(uri, conf), fs); 141 } 142 143 /** 144 * Get a filesystem instance based on the uri, the passed 145 * configuration and the user 146 * @param uri of the filesystem 147 * @param conf the configuration to use 148 * @param user to perform the get as 149 * @return the filesystem instance 150 * @throws IOException 151 * @throws InterruptedException 152 */ 153 public static FileSystem get(final URI uri, final Configuration conf, 154 final String user) throws IOException, InterruptedException { 155 String ticketCachePath = 156 conf.get(CommonConfigurationKeys.KERBEROS_TICKET_CACHE_PATH); 157 UserGroupInformation ugi = 158 UserGroupInformation.getBestUGI(ticketCachePath, user); 159 return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() { 160 @Override 161 public FileSystem run() throws IOException { 162 return get(uri, conf); 163 } 164 }); 165 } 166 167 /** 168 * Returns the configured filesystem implementation. 169 * @param conf the configuration to use 170 */ 171 public static FileSystem get(Configuration conf) throws IOException { 172 return get(getDefaultUri(conf), conf); 173 } 174 175 /** Get the default filesystem URI from a configuration. 176 * @param conf the configuration to use 177 * @return the uri of the default filesystem 178 */ 179 public static URI getDefaultUri(Configuration conf) { 180 return URI.create(fixName(conf.get(FS_DEFAULT_NAME_KEY, DEFAULT_FS))); 181 } 182 183 /** 184 * create PathId: A Factory method to create PathIds. PathIds are used 185 * by MapRFS in the direct shuffle to get access to the file paths. 186 */ 187 public PathId createPathId() { 188 //Base class throws Unsupported Exception 189 //TODO determine if this is the right approach 190 throw new UnsupportedOperationException(); 191 } 192 193 public FSDataInputStream openFid2(PathId pfid, String file, int readAheadBytesHint) 194 throws IOException { 195 throw new UnsupportedOperationException("See concrete FS for implementation"); 196 } 197 public FSDataOutputStream createFid(String pfid, String file) 198 throws IOException { 199 throw new UnsupportedOperationException("See concrete FS for implementation"); 200 } 201 public boolean deleteFid(String pfid, String dir) 202 throws IOException { 203 throw new UnsupportedOperationException("See concrete FS for implementation"); 204 } 205 public String mkdirsFid(Path p) throws IOException { 206 throw new UnsupportedOperationException("See concrete FS for implementation"); 207 } 208 public String mkdirsFid(String pfid, String dir) 209 throws IOException { 210 throw new UnsupportedOperationException("See concrete FS for implementation"); 211 } 212 public void setOwnerFid(String fid, String user, String group) throws IOException { 213 throw new UnsupportedOperationException("See concrete FS for implementation"); 214 } 215 216 217 /** Set the default filesystem URI in a configuration. 218 * @param conf the configuration to alter 219 * @param uri the new default filesystem uri 220 */ 221 public static void setDefaultUri(Configuration conf, URI uri) { 222 conf.set(FS_DEFAULT_NAME_KEY, uri.toString()); 223 } 224 225 /** Set the default filesystem URI in a configuration. 226 * @param conf the configuration to alter 227 * @param uri the new default filesystem uri 228 */ 229 public static void setDefaultUri(Configuration conf, String uri) { 230 setDefaultUri(conf, URI.create(fixName(uri))); 231 } 232 233 /** Called after a new FileSystem instance is constructed. 234 * @param name a uri whose authority section names the host, port, etc. 235 * for this FileSystem 236 * @param conf the configuration 237 */ 238 public void initialize(URI name, Configuration conf) throws IOException { 239 statistics = getStatistics(name.getScheme(), getClass()); 240 resolveSymlinks = conf.getBoolean( 241 CommonConfigurationKeys.FS_CLIENT_RESOLVE_REMOTE_SYMLINKS_KEY, 242 CommonConfigurationKeys.FS_CLIENT_RESOLVE_REMOTE_SYMLINKS_DEFAULT); 243 } 244 245 /** 246 * Return the protocol scheme for the FileSystem. 247 * <p/> 248 * This implementation throws an <code>UnsupportedOperationException</code>. 249 * 250 * @return the protocol scheme for the FileSystem. 251 */ 252 public String getScheme() { 253 throw new UnsupportedOperationException("Not implemented by the " + getClass().getSimpleName() + " FileSystem implementation"); 254 } 255 256 /** Returns a URI whose scheme and authority identify this FileSystem.*/ 257 public abstract URI getUri(); 258 259 /** 260 * Return a canonicalized form of this FileSystem's URI. 261 * 262 * The default implementation simply calls {@link #canonicalizeUri(URI)} 263 * on the filesystem's own URI, so subclasses typically only need to 264 * implement that method. 265 * 266 * @see #canonicalizeUri(URI) 267 */ 268 protected URI getCanonicalUri() { 269 return canonicalizeUri(getUri()); 270 } 271 272 /** 273 * Canonicalize the given URI. 274 * 275 * This is filesystem-dependent, but may for example consist of 276 * canonicalizing the hostname using DNS and adding the default 277 * port if not specified. 278 * 279 * The default implementation simply fills in the default port if 280 * not specified and if the filesystem has a default port. 281 * 282 * @return URI 283 * @see NetUtils#getCanonicalUri(URI, int) 284 */ 285 protected URI canonicalizeUri(URI uri) { 286 if (uri.getPort() == -1 && getDefaultPort() > 0) { 287 // reconstruct the uri with the default port set 288 try { 289 uri = new URI(uri.getScheme(), uri.getUserInfo(), 290 uri.getHost(), getDefaultPort(), 291 uri.getPath(), uri.getQuery(), uri.getFragment()); 292 } catch (URISyntaxException e) { 293 // Should never happen! 294 throw new AssertionError("Valid URI became unparseable: " + 295 uri); 296 } 297 } 298 299 return uri; 300 } 301 302 /** 303 * Get the default port for this file system. 304 * @return the default port or 0 if there isn't one 305 */ 306 protected int getDefaultPort() { 307 return 0; 308 } 309 310 protected static FileSystem getFSofPath(final Path absOrFqPath, 311 final Configuration conf) 312 throws UnsupportedFileSystemException, IOException { 313 absOrFqPath.checkNotSchemeWithRelative(); 314 absOrFqPath.checkNotRelative(); 315 316 // Uses the default file system if not fully qualified 317 return get(absOrFqPath.toUri(), conf); 318 } 319 320 /** 321 * Get a canonical service name for this file system. The token cache is 322 * the only user of the canonical service name, and uses it to lookup this 323 * filesystem's service tokens. 324 * If file system provides a token of its own then it must have a canonical 325 * name, otherwise canonical name can be null. 326 * 327 * Default Impl: If the file system has child file systems 328 * (such as an embedded file system) then it is assumed that the fs has no 329 * tokens of its own and hence returns a null name; otherwise a service 330 * name is built using Uri and port. 331 * 332 * @return a service string that uniquely identifies this file system, null 333 * if the filesystem does not implement tokens 334 * @see SecurityUtil#buildDTServiceName(URI, int) 335 */ 336 @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" }) 337 public String getCanonicalServiceName() { 338 return (getChildFileSystems() == null) 339 ? SecurityUtil.buildDTServiceName(getUri(), getDefaultPort()) 340 : null; 341 } 342 343 /** @deprecated call #getUri() instead.*/ 344 @Deprecated 345 public String getName() { return getUri().toString(); } 346 347 /** @deprecated call #get(URI,Configuration) instead. */ 348 @Deprecated 349 public static FileSystem getNamed(String name, Configuration conf) 350 throws IOException { 351 return get(URI.create(fixName(name)), conf); 352 } 353 354 /** Update old-format filesystem names, for back-compatibility. This should 355 * eventually be replaced with a checkName() method that throws an exception 356 * for old-format names. */ 357 private static String fixName(String name) { 358 // convert old-format name to new-format name 359 if (name.equals("local")) { // "local" is now "file:///". 360 LOG.warn("\"local\" is a deprecated filesystem name." 361 +" Use \"file:///\" instead."); 362 name = "file:///"; 363 } else if (name.indexOf('/')==-1) { // unqualified is "hdfs://" 364 LOG.warn("\""+name+"\" is a deprecated filesystem name." 365 +" Use \"hdfs://"+name+"/\" instead."); 366 name = "hdfs://"+name; 367 } 368 return name; 369 } 370 371 /** 372 * Get the local file system. 373 * @param conf the configuration to configure the file system with 374 * @return a LocalFileSystem 375 */ 376 public static LocalFileSystem getLocal(Configuration conf) 377 throws IOException { 378 return (LocalFileSystem)get(LocalFileSystem.NAME, conf); 379 } 380 381 /** Returns the FileSystem for this URI's scheme and authority. The scheme 382 * of the URI determines a configuration property name, 383 * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class. 384 * The entire URI is passed to the FileSystem instance's initialize method. 385 */ 386 public static FileSystem get(URI uri, Configuration conf) throws IOException { 387 String scheme = uri.getScheme(); 388 String authority = uri.getAuthority(); 389 390 if (scheme == null && authority == null) { // use default FS 391 return get(conf); 392 } 393 394 if (scheme != null && authority == null) { // no authority 395 URI defaultUri = getDefaultUri(conf); 396 if (scheme.equals(defaultUri.getScheme()) // if scheme matches default 397 && defaultUri.getAuthority() != null) { // & default has authority 398 return get(defaultUri, conf); // return default 399 } 400 } 401 402 String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme); 403 if (conf.getBoolean(disableCacheName, false)) { 404 return createFileSystem(uri, conf); 405 } 406 407 return CACHE.get(uri, conf); 408 } 409 410 /** 411 * Returns the FileSystem for this URI's scheme and authority and the 412 * passed user. Internally invokes {@link #newInstance(URI, Configuration)} 413 * @param uri of the filesystem 414 * @param conf the configuration to use 415 * @param user to perform the get as 416 * @return filesystem instance 417 * @throws IOException 418 * @throws InterruptedException 419 */ 420 public static FileSystem newInstance(final URI uri, final Configuration conf, 421 final String user) throws IOException, InterruptedException { 422 String ticketCachePath = 423 conf.get(CommonConfigurationKeys.KERBEROS_TICKET_CACHE_PATH); 424 UserGroupInformation ugi = 425 UserGroupInformation.getBestUGI(ticketCachePath, user); 426 return ugi.doAs(new PrivilegedExceptionAction<FileSystem>() { 427 @Override 428 public FileSystem run() throws IOException { 429 return newInstance(uri,conf); 430 } 431 }); 432 } 433 /** Returns the FileSystem for this URI's scheme and authority. The scheme 434 * of the URI determines a configuration property name, 435 * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class. 436 * The entire URI is passed to the FileSystem instance's initialize method. 437 * This always returns a new FileSystem object. 438 */ 439 public static FileSystem newInstance(URI uri, Configuration conf) throws IOException { 440 String scheme = uri.getScheme(); 441 String authority = uri.getAuthority(); 442 443 if (scheme == null) { // no scheme: use default FS 444 return newInstance(conf); 445 } 446 447 if (authority == null) { // no authority 448 URI defaultUri = getDefaultUri(conf); 449 if (scheme.equals(defaultUri.getScheme()) // if scheme matches default 450 && defaultUri.getAuthority() != null) { // & default has authority 451 return newInstance(defaultUri, conf); // return default 452 } 453 } 454 return CACHE.getUnique(uri, conf); 455 } 456 457 /** Returns a unique configured filesystem implementation. 458 * This always returns a new FileSystem object. 459 * @param conf the configuration to use 460 */ 461 public static FileSystem newInstance(Configuration conf) throws IOException { 462 return newInstance(getDefaultUri(conf), conf); 463 } 464 465 /** 466 * Get a unique local file system object 467 * @param conf the configuration to configure the file system with 468 * @return a LocalFileSystem 469 * This always returns a new FileSystem object. 470 */ 471 public static LocalFileSystem newInstanceLocal(Configuration conf) 472 throws IOException { 473 return (LocalFileSystem)newInstance(LocalFileSystem.NAME, conf); 474 } 475 476 /** 477 * Close all cached filesystems. Be sure those filesystems are not 478 * used anymore. 479 * 480 * @throws IOException 481 */ 482 public static void closeAll() throws IOException { 483 CACHE.closeAll(); 484 } 485 486 /** 487 * Close all cached filesystems for a given UGI. Be sure those filesystems 488 * are not used anymore. 489 * @param ugi user group info to close 490 * @throws IOException 491 */ 492 public static void closeAllForUGI(UserGroupInformation ugi) 493 throws IOException { 494 CACHE.closeAll(ugi); 495 } 496 497 /** 498 * Make sure that a path specifies a FileSystem. 499 * @param path to use 500 */ 501 public Path makeQualified(Path path) { 502 checkPath(path); 503 return path.makeQualified(this.getUri(), this.getWorkingDirectory()); 504 } 505 506 /** 507 * Get a new delegation token for this file system. 508 * This is an internal method that should have been declared protected 509 * but wasn't historically. 510 * Callers should use {@link #addDelegationTokens(String, Credentials)} 511 * 512 * @param renewer the account name that is allowed to renew the token. 513 * @return a new delegation token 514 * @throws IOException 515 */ 516 @InterfaceAudience.Private() 517 public Token<?> getDelegationToken(String renewer) throws IOException { 518 return null; 519 } 520 521 /** 522 * Obtain all delegation tokens used by this FileSystem that are not 523 * already present in the given Credentials. Existing tokens will neither 524 * be verified as valid nor having the given renewer. Missing tokens will 525 * be acquired and added to the given Credentials. 526 * 527 * Default Impl: works for simple fs with its own token 528 * and also for an embedded fs whose tokens are those of its 529 * children file system (i.e. the embedded fs has not tokens of its 530 * own). 531 * 532 * @param renewer the user allowed to renew the delegation tokens 533 * @param credentials cache in which to add new delegation tokens 534 * @return list of new delegation tokens 535 * @throws IOException 536 */ 537 @InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" }) 538 public Token<?>[] addDelegationTokens( 539 final String renewer, Credentials credentials) throws IOException { 540 if (credentials == null) { 541 credentials = new Credentials(); 542 } 543 final List<Token<?>> tokens = new ArrayList<Token<?>>(); 544 collectDelegationTokens(renewer, credentials, tokens); 545 return tokens.toArray(new Token<?>[tokens.size()]); 546 } 547 548 /** 549 * Recursively obtain the tokens for this FileSystem and all descended 550 * FileSystems as determined by getChildFileSystems(). 551 * @param renewer the user allowed to renew the delegation tokens 552 * @param credentials cache in which to add the new delegation tokens 553 * @param tokens list in which to add acquired tokens 554 * @throws IOException 555 */ 556 private void collectDelegationTokens(final String renewer, 557 final Credentials credentials, 558 final List<Token<?>> tokens) 559 throws IOException { 560 final String serviceName = getCanonicalServiceName(); 561 // Collect token of the this filesystem and then of its embedded children 562 if (serviceName != null) { // fs has token, grab it 563 final Text service = new Text(serviceName); 564 Token<?> token = credentials.getToken(service); 565 if (token == null) { 566 token = getDelegationToken(renewer); 567 if (token != null) { 568 tokens.add(token); 569 credentials.addToken(service, token); 570 } 571 } 572 } 573 // Now collect the tokens from the children 574 final FileSystem[] children = getChildFileSystems(); 575 if (children != null) { 576 for (final FileSystem fs : children) { 577 fs.collectDelegationTokens(renewer, credentials, tokens); 578 } 579 } 580 } 581 582 /** 583 * Get all the immediate child FileSystems embedded in this FileSystem. 584 * It does not recurse and get grand children. If a FileSystem 585 * has multiple child FileSystems, then it should return a unique list 586 * of those FileSystems. Default is to return null to signify no children. 587 * 588 * @return FileSystems used by this FileSystem 589 */ 590 @InterfaceAudience.LimitedPrivate({ "HDFS" }) 591 @VisibleForTesting 592 public FileSystem[] getChildFileSystems() { 593 return null; 594 } 595 596 /** create a file with the provided permission 597 * The permission of the file is set to be the provided permission as in 598 * setPermission, not permission&~umask 599 * 600 * It is implemented using two RPCs. It is understood that it is inefficient, 601 * but the implementation is thread-safe. The other option is to change the 602 * value of umask in configuration to be 0, but it is not thread-safe. 603 * 604 * @param fs file system handle 605 * @param file the name of the file to be created 606 * @param permission the permission of the file 607 * @return an output stream 608 * @throws IOException 609 */ 610 public static FSDataOutputStream create(FileSystem fs, 611 Path file, FsPermission permission) throws IOException { 612 // create the file with default permission 613 FSDataOutputStream out = fs.create(file); 614 // set its permission to the supplied one 615 fs.setPermission(file, permission); 616 return out; 617 } 618 619 /** create a directory with the provided permission 620 * The permission of the directory is set to be the provided permission as in 621 * setPermission, not permission&~umask 622 * 623 * @see #create(FileSystem, Path, FsPermission) 624 * 625 * @param fs file system handle 626 * @param dir the name of the directory to be created 627 * @param permission the permission of the directory 628 * @return true if the directory creation succeeds; false otherwise 629 * @throws IOException 630 */ 631 public static boolean mkdirs(FileSystem fs, Path dir, FsPermission permission) 632 throws IOException { 633 // create the directory using the default permission 634 boolean result = fs.mkdirs(dir); 635 // set its permission to be the supplied one 636 fs.setPermission(dir, permission); 637 return result; 638 } 639 640 /////////////////////////////////////////////////////////////// 641 // FileSystem 642 /////////////////////////////////////////////////////////////// 643 644 protected FileSystem() { 645 super(null); 646 } 647 648 /** 649 * Check that a Path belongs to this FileSystem. 650 * @param path to check 651 */ 652 protected void checkPath(Path path) { 653 URI uri = path.toUri(); 654 String thatScheme = uri.getScheme(); 655 if (thatScheme == null) // fs is relative 656 return; 657 URI thisUri = getCanonicalUri(); 658 String thisScheme = thisUri.getScheme(); 659 //authority and scheme are not case sensitive 660 if (thisScheme.equalsIgnoreCase(thatScheme)) {// schemes match 661 String thisAuthority = thisUri.getAuthority(); 662 String thatAuthority = uri.getAuthority(); 663 if (thatAuthority == null && // path's authority is null 664 thisAuthority != null) { // fs has an authority 665 URI defaultUri = getDefaultUri(getConf()); 666 if (thisScheme.equalsIgnoreCase(defaultUri.getScheme())) { 667 uri = defaultUri; // schemes match, so use this uri instead 668 } else { 669 uri = null; // can't determine auth of the path 670 } 671 } 672 if (uri != null) { 673 // canonicalize uri before comparing with this fs 674 uri = canonicalizeUri(uri); 675 thatAuthority = uri.getAuthority(); 676 if (thisAuthority == thatAuthority || // authorities match 677 (thisAuthority != null && 678 thisAuthority.equalsIgnoreCase(thatAuthority))) 679 return; 680 } 681 } 682 throw new IllegalArgumentException("Wrong FS: "+path+ 683 ", expected: "+this.getUri()); 684 } 685 686 /** 687 * Return an array containing hostnames, offset and size of 688 * portions of the given file. For a nonexistent 689 * file or regions, null will be returned. 690 * 691 * This call is most helpful with DFS, where it returns 692 * hostnames of machines that contain the given file. 693 * 694 * The FileSystem will simply return an elt containing 'localhost'. 695 * 696 * @param file FilesStatus to get data from 697 * @param start offset into the given file 698 * @param len length for which to get locations for 699 */ 700 public BlockLocation[] getFileBlockLocations(FileStatus file, 701 long start, long len) throws IOException { 702 if (file == null) { 703 return null; 704 } 705 706 if (start < 0 || len < 0) { 707 throw new IllegalArgumentException("Invalid start or len parameter"); 708 } 709 710 if (file.getLen() <= start) { 711 return new BlockLocation[0]; 712 713 } 714 String[] name = { "localhost:50010" }; 715 String[] host = { "localhost" }; 716 return new BlockLocation[] { 717 new BlockLocation(name, host, 0, file.getLen()) }; 718 } 719 720 721 /** 722 * Return an array containing hostnames, offset and size of 723 * portions of the given file. For a nonexistent 724 * file or regions, null will be returned. 725 * 726 * This call is most helpful with DFS, where it returns 727 * hostnames of machines that contain the given file. 728 * 729 * The FileSystem will simply return an elt containing 'localhost'. 730 * 731 * @param p path is used to identify an FS since an FS could have 732 * another FS that it could be delegating the call to 733 * @param start offset into the given file 734 * @param len length for which to get locations for 735 */ 736 public BlockLocation[] getFileBlockLocations(Path p, 737 long start, long len) throws IOException { 738 if (p == null) { 739 throw new NullPointerException(); 740 } 741 FileStatus file = getFileStatus(p); 742 return getFileBlockLocations(file, start, len); 743 } 744 745 /** 746 * Return a set of server default configuration values 747 * @return server default configuration values 748 * @throws IOException 749 * @deprecated use {@link #getServerDefaults(Path)} instead 750 */ 751 @Deprecated 752 public FsServerDefaults getServerDefaults() throws IOException { 753 Configuration conf = getConf(); 754 // CRC32 is chosen as default as it is available in all 755 // releases that support checksum. 756 // The client trash configuration is ignored. 757 return new FsServerDefaults(getDefaultBlockSize(), 758 conf.getInt("io.bytes.per.checksum", 512), 759 64 * 1024, 760 getDefaultReplication(), 761 conf.getInt("io.file.buffer.size", 4096), 762 false, 763 CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT, 764 DataChecksum.Type.CRC32); 765 } 766 767 /** 768 * Return a set of server default configuration values 769 * @param p path is used to identify an FS since an FS could have 770 * another FS that it could be delegating the call to 771 * @return server default configuration values 772 * @throws IOException 773 */ 774 public FsServerDefaults getServerDefaults(Path p) throws IOException { 775 return getServerDefaults(); 776 } 777 778 /** 779 * Return the fully-qualified path of path f resolving the path 780 * through any symlinks or mount point 781 * @param p path to be resolved 782 * @return fully qualified path 783 * @throws FileNotFoundException 784 */ 785 public Path resolvePath(final Path p) throws IOException { 786 checkPath(p); 787 return getFileStatus(p).getPath(); 788 } 789 790 /** 791 * Opens an FSDataInputStream at the indicated Path. 792 * @param f the file name to open 793 * @param bufferSize the size of the buffer to be used. 794 */ 795 public abstract FSDataInputStream open(Path f, int bufferSize) 796 throws IOException; 797 798 /** 799 * Opens an FSDataInputStream at the indicated Path. 800 * @param f the file to open 801 */ 802 public FSDataInputStream open(Path f) throws IOException { 803 return open(f, getConf().getInt("io.file.buffer.size", 4096)); 804 } 805 806 /** 807 * Create an FSDataOutputStream at the indicated Path. 808 * Files are overwritten by default. 809 * @param f the file to create 810 */ 811 public FSDataOutputStream create(Path f) throws IOException { 812 return create(f, true); 813 } 814 815 /** 816 * Create an FSDataOutputStream at the indicated Path. 817 * @param f the file to create 818 * @param overwrite if a file with this name already exists, then if true, 819 * the file will be overwritten, and if false an exception will be thrown. 820 */ 821 public FSDataOutputStream create(Path f, boolean overwrite) 822 throws IOException { 823 return create(f, overwrite, 824 getConf().getInt("io.file.buffer.size", 4096), 825 getDefaultReplication(f), 826 getDefaultBlockSize(f)); 827 } 828 829 /** 830 * Create an FSDataOutputStream at the indicated Path with write-progress 831 * reporting. 832 * Files are overwritten by default. 833 * @param f the file to create 834 * @param progress to report progress 835 */ 836 public FSDataOutputStream create(Path f, Progressable progress) 837 throws IOException { 838 return create(f, true, 839 getConf().getInt("io.file.buffer.size", 4096), 840 getDefaultReplication(f), 841 getDefaultBlockSize(f), progress); 842 } 843 844 /** 845 * Create an FSDataOutputStream at the indicated Path. 846 * Files are overwritten by default. 847 * @param f the file to create 848 * @param replication the replication factor 849 */ 850 public FSDataOutputStream create(Path f, short replication) 851 throws IOException { 852 return create(f, true, 853 getConf().getInt("io.file.buffer.size", 4096), 854 replication, 855 getDefaultBlockSize(f)); 856 } 857 858 /** 859 * Create an FSDataOutputStream at the indicated Path with write-progress 860 * reporting. 861 * Files are overwritten by default. 862 * @param f the file to create 863 * @param replication the replication factor 864 * @param progress to report progress 865 */ 866 public FSDataOutputStream create(Path f, short replication, 867 Progressable progress) throws IOException { 868 return create(f, true, 869 getConf().getInt( 870 CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY, 871 CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT), 872 replication, 873 getDefaultBlockSize(f), progress); 874 } 875 876 877 /** 878 * Create an FSDataOutputStream at the indicated Path. 879 * @param f the file name to create 880 * @param overwrite if a file with this name already exists, then if true, 881 * the file will be overwritten, and if false an error will be thrown. 882 * @param bufferSize the size of the buffer to be used. 883 */ 884 public FSDataOutputStream create(Path f, 885 boolean overwrite, 886 int bufferSize 887 ) throws IOException { 888 return create(f, overwrite, bufferSize, 889 getDefaultReplication(f), 890 getDefaultBlockSize(f)); 891 } 892 893 /** 894 * Create an FSDataOutputStream at the indicated Path with write-progress 895 * reporting. 896 * @param f the path of the file to open 897 * @param overwrite if a file with this name already exists, then if true, 898 * the file will be overwritten, and if false an error will be thrown. 899 * @param bufferSize the size of the buffer to be used. 900 */ 901 public FSDataOutputStream create(Path f, 902 boolean overwrite, 903 int bufferSize, 904 Progressable progress 905 ) throws IOException { 906 return create(f, overwrite, bufferSize, 907 getDefaultReplication(f), 908 getDefaultBlockSize(f), progress); 909 } 910 911 912 /** 913 * Create an FSDataOutputStream at the indicated Path. 914 * @param f the file name to open 915 * @param overwrite if a file with this name already exists, then if true, 916 * the file will be overwritten, and if false an error will be thrown. 917 * @param bufferSize the size of the buffer to be used. 918 * @param replication required block replication for the file. 919 */ 920 public FSDataOutputStream create(Path f, 921 boolean overwrite, 922 int bufferSize, 923 short replication, 924 long blockSize 925 ) throws IOException { 926 return create(f, overwrite, bufferSize, replication, blockSize, null); 927 } 928 929 /** 930 * Create an FSDataOutputStream at the indicated Path with write-progress 931 * reporting. 932 * @param f the file name to open 933 * @param overwrite if a file with this name already exists, then if true, 934 * the file will be overwritten, and if false an error will be thrown. 935 * @param bufferSize the size of the buffer to be used. 936 * @param replication required block replication for the file. 937 */ 938 public FSDataOutputStream create(Path f, 939 boolean overwrite, 940 int bufferSize, 941 short replication, 942 long blockSize, 943 Progressable progress 944 ) throws IOException { 945 return this.create(f, FsPermission.getFileDefault().applyUMask( 946 FsPermission.getUMask(getConf())), overwrite, bufferSize, 947 replication, blockSize, progress); 948 } 949 950 /** 951 * Create an FSDataOutputStream at the indicated Path with write-progress 952 * reporting. 953 * @param f the file name to open 954 * @param permission 955 * @param overwrite if a file with this name already exists, then if true, 956 * the file will be overwritten, and if false an error will be thrown. 957 * @param bufferSize the size of the buffer to be used. 958 * @param replication required block replication for the file. 959 * @param blockSize 960 * @param progress 961 * @throws IOException 962 * @see #setPermission(Path, FsPermission) 963 */ 964 public abstract FSDataOutputStream create(Path f, 965 FsPermission permission, 966 boolean overwrite, 967 int bufferSize, 968 short replication, 969 long blockSize, 970 Progressable progress) throws IOException; 971 972 /** 973 * Create an FSDataOutputStream at the indicated Path with write-progress 974 * reporting. 975 * @param f the file name to open 976 * @param permission 977 * @param flags {@link CreateFlag}s to use for this stream. 978 * @param bufferSize the size of the buffer to be used. 979 * @param replication required block replication for the file. 980 * @param blockSize 981 * @param progress 982 * @throws IOException 983 * @see #setPermission(Path, FsPermission) 984 */ 985 public FSDataOutputStream create(Path f, 986 FsPermission permission, 987 EnumSet<CreateFlag> flags, 988 int bufferSize, 989 short replication, 990 long blockSize, 991 Progressable progress) throws IOException { 992 return create(f, permission, flags, bufferSize, replication, 993 blockSize, progress, null); 994 } 995 996 /** 997 * Create an FSDataOutputStream at the indicated Path with a custom 998 * checksum option 999 * @param f the file name to open 1000 * @param permission 1001 * @param flags {@link CreateFlag}s to use for this stream. 1002 * @param bufferSize the size of the buffer to be used. 1003 * @param replication required block replication for the file. 1004 * @param blockSize 1005 * @param progress 1006 * @param checksumOpt checksum parameter. If null, the values 1007 * found in conf will be used. 1008 * @throws IOException 1009 * @see #setPermission(Path, FsPermission) 1010 */ 1011 public FSDataOutputStream create(Path f, 1012 FsPermission permission, 1013 EnumSet<CreateFlag> flags, 1014 int bufferSize, 1015 short replication, 1016 long blockSize, 1017 Progressable progress, 1018 ChecksumOpt checksumOpt) throws IOException { 1019 // Checksum options are ignored by default. The file systems that 1020 // implement checksum need to override this method. The full 1021 // support is currently only available in DFS. 1022 return create(f, permission, flags.contains(CreateFlag.OVERWRITE), 1023 bufferSize, replication, blockSize, progress); 1024 } 1025 1026 /*. 1027 * This create has been added to support the FileContext that processes 1028 * the permission 1029 * with umask before calling this method. 1030 * This a temporary method added to support the transition from FileSystem 1031 * to FileContext for user applications. 1032 */ 1033 @Deprecated 1034 protected FSDataOutputStream primitiveCreate(Path f, 1035 FsPermission absolutePermission, EnumSet<CreateFlag> flag, int bufferSize, 1036 short replication, long blockSize, Progressable progress, 1037 ChecksumOpt checksumOpt) throws IOException { 1038 1039 boolean pathExists = exists(f); 1040 CreateFlag.validate(f, pathExists, flag); 1041 1042 // Default impl assumes that permissions do not matter and 1043 // nor does the bytesPerChecksum hence 1044 // calling the regular create is good enough. 1045 // FSs that implement permissions should override this. 1046 1047 if (pathExists && flag.contains(CreateFlag.APPEND)) { 1048 return append(f, bufferSize, progress); 1049 } 1050 1051 return this.create(f, absolutePermission, 1052 flag.contains(CreateFlag.OVERWRITE), bufferSize, replication, 1053 blockSize, progress); 1054 } 1055 1056 /** 1057 * This version of the mkdirs method assumes that the permission is absolute. 1058 * It has been added to support the FileContext that processes the permission 1059 * with umask before calling this method. 1060 * This a temporary method added to support the transition from FileSystem 1061 * to FileContext for user applications. 1062 */ 1063 @Deprecated 1064 protected boolean primitiveMkdir(Path f, FsPermission absolutePermission) 1065 throws IOException { 1066 // Default impl is to assume that permissions do not matter and hence 1067 // calling the regular mkdirs is good enough. 1068 // FSs that implement permissions should override this. 1069 return this.mkdirs(f, absolutePermission); 1070 } 1071 1072 1073 /** 1074 * This version of the mkdirs method assumes that the permission is absolute. 1075 * It has been added to support the FileContext that processes the permission 1076 * with umask before calling this method. 1077 * This a temporary method added to support the transition from FileSystem 1078 * to FileContext for user applications. 1079 */ 1080 @Deprecated 1081 protected void primitiveMkdir(Path f, FsPermission absolutePermission, 1082 boolean createParent) 1083 throws IOException { 1084 1085 if (!createParent) { // parent must exist. 1086 // since the this.mkdirs makes parent dirs automatically 1087 // we must throw exception if parent does not exist. 1088 final FileStatus stat = getFileStatus(f.getParent()); 1089 if (stat == null) { 1090 throw new FileNotFoundException("Missing parent:" + f); 1091 } 1092 if (!stat.isDirectory()) { 1093 throw new ParentNotDirectoryException("parent is not a dir"); 1094 } 1095 // parent does exist - go ahead with mkdir of leaf 1096 } 1097 // Default impl is to assume that permissions do not matter and hence 1098 // calling the regular mkdirs is good enough. 1099 // FSs that implement permissions should override this. 1100 if (!this.mkdirs(f, absolutePermission)) { 1101 throw new IOException("mkdir of "+ f + " failed"); 1102 } 1103 } 1104 1105 /** 1106 * Opens an FSDataOutputStream at the indicated Path with write-progress 1107 * reporting. Same as create(), except fails if parent directory doesn't 1108 * already exist. 1109 * @param f the file name to open 1110 * @param overwrite if a file with this name already exists, then if true, 1111 * the file will be overwritten, and if false an error will be thrown. 1112 * @param bufferSize the size of the buffer to be used. 1113 * @param replication required block replication for the file. 1114 * @param blockSize 1115 * @param progress 1116 * @throws IOException 1117 * @see #setPermission(Path, FsPermission) 1118 * @deprecated API only for 0.20-append 1119 */ 1120 @Deprecated 1121 public FSDataOutputStream createNonRecursive(Path f, 1122 boolean overwrite, 1123 int bufferSize, short replication, long blockSize, 1124 Progressable progress) throws IOException { 1125 return this.createNonRecursive(f, FsPermission.getFileDefault(), 1126 overwrite, bufferSize, replication, blockSize, progress); 1127 } 1128 1129 /** 1130 * Opens an FSDataOutputStream at the indicated Path with write-progress 1131 * reporting. Same as create(), except fails if parent directory doesn't 1132 * already exist. 1133 * @param f the file name to open 1134 * @param permission 1135 * @param overwrite if a file with this name already exists, then if true, 1136 * the file will be overwritten, and if false an error will be thrown. 1137 * @param bufferSize the size of the buffer to be used. 1138 * @param replication required block replication for the file. 1139 * @param blockSize 1140 * @param progress 1141 * @throws IOException 1142 * @see #setPermission(Path, FsPermission) 1143 * @deprecated API only for 0.20-append 1144 */ 1145 @Deprecated 1146 public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, 1147 boolean overwrite, int bufferSize, short replication, long blockSize, 1148 Progressable progress) throws IOException { 1149 return createNonRecursive(f, permission, 1150 overwrite ? EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE) 1151 : EnumSet.of(CreateFlag.CREATE), bufferSize, 1152 replication, blockSize, progress); 1153 } 1154 1155 /** 1156 * Opens an FSDataOutputStream at the indicated Path with write-progress 1157 * reporting. Same as create(), except fails if parent directory doesn't 1158 * already exist. 1159 * @param f the file name to open 1160 * @param permission 1161 * @param flags {@link CreateFlag}s to use for this stream. 1162 * @param bufferSize the size of the buffer to be used. 1163 * @param replication required block replication for the file. 1164 * @param blockSize 1165 * @param progress 1166 * @throws IOException 1167 * @see #setPermission(Path, FsPermission) 1168 * @deprecated API only for 0.20-append 1169 */ 1170 @Deprecated 1171 public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, 1172 EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize, 1173 Progressable progress) throws IOException { 1174 throw new IOException("createNonRecursive unsupported for this filesystem " 1175 + this.getClass()); 1176 } 1177 1178 /** 1179 * Creates the given Path as a brand-new zero-length file. If 1180 * create fails, or if it already existed, return false. 1181 * 1182 * @param f path to use for create 1183 */ 1184 public boolean createNewFile(Path f) throws IOException { 1185 if (exists(f)) { 1186 return false; 1187 } else { 1188 create(f, false, getConf().getInt("io.file.buffer.size", 4096)).close(); 1189 return true; 1190 } 1191 } 1192 1193 /** 1194 * Append to an existing file (optional operation). 1195 * Same as append(f, getConf().getInt("io.file.buffer.size", 4096), null) 1196 * @param f the existing file to be appended. 1197 * @throws IOException 1198 */ 1199 public FSDataOutputStream append(Path f) throws IOException { 1200 return append(f, getConf().getInt("io.file.buffer.size", 4096), null); 1201 } 1202 /** 1203 * Append to an existing file (optional operation). 1204 * Same as append(f, bufferSize, null). 1205 * @param f the existing file to be appended. 1206 * @param bufferSize the size of the buffer to be used. 1207 * @throws IOException 1208 */ 1209 public FSDataOutputStream append(Path f, int bufferSize) throws IOException { 1210 return append(f, bufferSize, null); 1211 } 1212 1213 /** 1214 * Append to an existing file (optional operation). 1215 * @param f the existing file to be appended. 1216 * @param bufferSize the size of the buffer to be used. 1217 * @param progress for reporting progress if it is not null. 1218 * @throws IOException 1219 */ 1220 public abstract FSDataOutputStream append(Path f, int bufferSize, 1221 Progressable progress) throws IOException; 1222 1223 /** 1224 * Concat existing files together. 1225 * @param trg the path to the target destination. 1226 * @param psrcs the paths to the sources to use for the concatenation. 1227 * @throws IOException 1228 */ 1229 public void concat(final Path trg, final Path [] psrcs) throws IOException { 1230 throw new UnsupportedOperationException("Not implemented by the " + 1231 getClass().getSimpleName() + " FileSystem implementation"); 1232 } 1233 1234 /** 1235 * Get replication. 1236 * 1237 * @deprecated Use getFileStatus() instead 1238 * @param src file name 1239 * @return file replication 1240 * @throws IOException 1241 */ 1242 @Deprecated 1243 public short getReplication(Path src) throws IOException { 1244 return getFileStatus(src).getReplication(); 1245 } 1246 1247 /** 1248 * Set replication for an existing file. 1249 * 1250 * @param src file name 1251 * @param replication new replication 1252 * @throws IOException 1253 * @return true if successful; 1254 * false if file does not exist or is a directory 1255 */ 1256 public boolean setReplication(Path src, short replication) 1257 throws IOException { 1258 return true; 1259 } 1260 1261 /** 1262 * Renames Path src to Path dst. Can take place on local fs 1263 * or remote DFS. 1264 * @param src path to be renamed 1265 * @param dst new path after rename 1266 * @throws IOException on failure 1267 * @return true if rename is successful 1268 */ 1269 public abstract boolean rename(Path src, Path dst) throws IOException; 1270 1271 /** 1272 * Renames Path src to Path dst 1273 * <ul> 1274 * <li 1275 * <li>Fails if src is a file and dst is a directory. 1276 * <li>Fails if src is a directory and dst is a file. 1277 * <li>Fails if the parent of dst does not exist or is a file. 1278 * </ul> 1279 * <p> 1280 * If OVERWRITE option is not passed as an argument, rename fails 1281 * if the dst already exists. 1282 * <p> 1283 * If OVERWRITE option is passed as an argument, rename overwrites 1284 * the dst if it is a file or an empty directory. Rename fails if dst is 1285 * a non-empty directory. 1286 * <p> 1287 * Note that atomicity of rename is dependent on the file system 1288 * implementation. Please refer to the file system documentation for 1289 * details. This default implementation is non atomic. 1290 * <p> 1291 * This method is deprecated since it is a temporary method added to 1292 * support the transition from FileSystem to FileContext for user 1293 * applications. 1294 * 1295 * @param src path to be renamed 1296 * @param dst new path after rename 1297 * @throws IOException on failure 1298 */ 1299 @Deprecated 1300 protected void rename(final Path src, final Path dst, 1301 final Rename... options) throws IOException { 1302 // Default implementation 1303 final FileStatus srcStatus = getFileLinkStatus(src); 1304 if (srcStatus == null) { 1305 throw new FileNotFoundException("rename source " + src + " not found."); 1306 } 1307 1308 boolean overwrite = false; 1309 if (null != options) { 1310 for (Rename option : options) { 1311 if (option == Rename.OVERWRITE) { 1312 overwrite = true; 1313 } 1314 } 1315 } 1316 1317 FileStatus dstStatus; 1318 try { 1319 dstStatus = getFileLinkStatus(dst); 1320 } catch (IOException e) { 1321 dstStatus = null; 1322 } 1323 if (dstStatus != null) { 1324 if (srcStatus.isDirectory() != dstStatus.isDirectory()) { 1325 throw new IOException("Source " + src + " Destination " + dst 1326 + " both should be either file or directory"); 1327 } 1328 if (!overwrite) { 1329 throw new FileAlreadyExistsException("rename destination " + dst 1330 + " already exists."); 1331 } 1332 // Delete the destination that is a file or an empty directory 1333 if (dstStatus.isDirectory()) { 1334 FileStatus[] list = listStatus(dst); 1335 if (list != null && list.length != 0) { 1336 throw new IOException( 1337 "rename cannot overwrite non empty destination directory " + dst); 1338 } 1339 } 1340 delete(dst, false); 1341 } else { 1342 final Path parent = dst.getParent(); 1343 final FileStatus parentStatus = getFileStatus(parent); 1344 if (parentStatus == null) { 1345 throw new FileNotFoundException("rename destination parent " + parent 1346 + " not found."); 1347 } 1348 if (!parentStatus.isDirectory()) { 1349 throw new ParentNotDirectoryException("rename destination parent " + parent 1350 + " is a file."); 1351 } 1352 } 1353 if (!rename(src, dst)) { 1354 throw new IOException("rename from " + src + " to " + dst + " failed."); 1355 } 1356 } 1357 1358 /** 1359 * Truncate the file in the indicated path to the indicated size. 1360 * <ul> 1361 * <li>Fails if path is a directory. 1362 * <li>Fails if path does not exist. 1363 * <li>Fails if path is not closed. 1364 * <li>Fails if new size is greater than current size. 1365 * </ul> 1366 * @param f The path to the file to be truncated 1367 * @param newLength The size the file is to be truncated to 1368 * 1369 * @return <code>true</code> if the file has been truncated to the desired 1370 * <code>newLength</code> and is immediately available to be reused for 1371 * write operations such as <code>append</code>, or 1372 * <code>false</code> if a background process of adjusting the length of 1373 * the last block has been started, and clients should wait for it to 1374 * complete before proceeding with further file updates. 1375 */ 1376 public boolean truncate(Path f, long newLength) throws IOException { 1377 throw new UnsupportedOperationException("Not implemented by the " + 1378 getClass().getSimpleName() + " FileSystem implementation"); 1379 } 1380 1381 /** 1382 * Delete a file 1383 * @deprecated Use {@link #delete(Path, boolean)} instead. 1384 */ 1385 @Deprecated 1386 public boolean delete(Path f) throws IOException { 1387 return delete(f, true); 1388 } 1389 1390 /** Delete a file. 1391 * 1392 * @param f the path to delete. 1393 * @param recursive if path is a directory and set to 1394 * true, the directory is deleted else throws an exception. In 1395 * case of a file the recursive can be set to either true or false. 1396 * @return true if delete is successful else false. 1397 * @throws IOException 1398 */ 1399 public abstract boolean delete(Path f, boolean recursive) throws IOException; 1400 1401 /** 1402 * Mark a path to be deleted when FileSystem is closed. 1403 * When the JVM shuts down, 1404 * all FileSystem objects will be closed automatically. 1405 * Then, 1406 * the marked path will be deleted as a result of closing the FileSystem. 1407 * 1408 * The path has to exist in the file system. 1409 * 1410 * @param f the path to delete. 1411 * @return true if deleteOnExit is successful, otherwise false. 1412 * @throws IOException 1413 */ 1414 public boolean deleteOnExit(Path f) throws IOException { 1415 if (!exists(f)) { 1416 return false; 1417 } 1418 synchronized (deleteOnExit) { 1419 deleteOnExit.add(f); 1420 } 1421 return true; 1422 } 1423 1424 /** 1425 * Cancel the deletion of the path when the FileSystem is closed 1426 * @param f the path to cancel deletion 1427 */ 1428 public boolean cancelDeleteOnExit(Path f) { 1429 synchronized (deleteOnExit) { 1430 return deleteOnExit.remove(f); 1431 } 1432 } 1433 1434 /** 1435 * Delete all files that were marked as delete-on-exit. This recursively 1436 * deletes all files in the specified paths. 1437 */ 1438 protected void processDeleteOnExit() { 1439 synchronized (deleteOnExit) { 1440 for (Iterator<Path> iter = deleteOnExit.iterator(); iter.hasNext();) { 1441 Path path = iter.next(); 1442 try { 1443 if (exists(path)) { 1444 delete(path, true); 1445 } 1446 } 1447 catch (IOException e) { 1448 LOG.info("Ignoring failure to deleteOnExit for path " + path); 1449 } 1450 iter.remove(); 1451 } 1452 } 1453 } 1454 1455 /** Check if exists. 1456 * @param f source file 1457 */ 1458 public boolean exists(Path f) throws IOException { 1459 try { 1460 return getFileStatus(f) != null; 1461 } catch (FileNotFoundException e) { 1462 return false; 1463 } 1464 } 1465 1466 /** True iff the named path is a directory. 1467 * Note: Avoid using this method. Instead reuse the FileStatus 1468 * returned by getFileStatus() or listStatus() methods. 1469 * @param f path to check 1470 */ 1471 public boolean isDirectory(Path f) throws IOException { 1472 try { 1473 return getFileStatus(f).isDirectory(); 1474 } catch (FileNotFoundException e) { 1475 return false; // f does not exist 1476 } 1477 } 1478 1479 /** True iff the named path is a regular file. 1480 * Note: Avoid using this method. Instead reuse the FileStatus 1481 * returned by getFileStatus() or listStatus() methods. 1482 * @param f path to check 1483 */ 1484 public boolean isFile(Path f) throws IOException { 1485 try { 1486 return getFileStatus(f).isFile(); 1487 } catch (FileNotFoundException e) { 1488 return false; // f does not exist 1489 } 1490 } 1491 1492 /** The number of bytes in a file. */ 1493 /** @deprecated Use getFileStatus() instead */ 1494 @Deprecated 1495 public long getLength(Path f) throws IOException { 1496 return getFileStatus(f).getLen(); 1497 } 1498 1499 /** Return the {@link ContentSummary} of a given {@link Path}. 1500 * @param f path to use 1501 */ 1502 public ContentSummary getContentSummary(Path f) throws IOException { 1503 FileStatus status = getFileStatus(f); 1504 if (status.isFile()) { 1505 // f is a file 1506 long length = status.getLen(); 1507 return new ContentSummary.Builder().length(length). 1508 fileCount(1).directoryCount(0).spaceConsumed(length).build(); 1509 } 1510 // f is a directory 1511 long[] summary = {0, 0, 1}; 1512 for(FileStatus s : listStatus(f)) { 1513 long length = s.getLen(); 1514 ContentSummary c = s.isDirectory() ? getContentSummary(s.getPath()) : 1515 new ContentSummary.Builder().length(length). 1516 fileCount(1).directoryCount(0).spaceConsumed(length).build(); 1517 summary[0] += c.getLength(); 1518 summary[1] += c.getFileCount(); 1519 summary[2] += c.getDirectoryCount(); 1520 } 1521 return new ContentSummary.Builder().length(summary[0]). 1522 fileCount(summary[1]).directoryCount(summary[2]). 1523 spaceConsumed(summary[0]).build(); 1524 } 1525 1526 final private static PathFilter DEFAULT_FILTER = new PathFilter() { 1527 @Override 1528 public boolean accept(Path file) { 1529 return true; 1530 } 1531 }; 1532 1533 /** 1534 * List the statuses of the files/directories in the given path if the path is 1535 * a directory. 1536 * 1537 * @param f given path 1538 * @return the statuses of the files/directories in the given patch 1539 * @throws FileNotFoundException when the path does not exist; 1540 * IOException see specific implementation 1541 */ 1542 public abstract FileStatus[] listStatus(Path f) throws FileNotFoundException, 1543 IOException; 1544 1545 /* 1546 * Filter files/directories in the given path using the user-supplied path 1547 * filter. Results are added to the given array <code>results</code>. 1548 */ 1549 private void listStatus(ArrayList<FileStatus> results, Path f, 1550 PathFilter filter) throws FileNotFoundException, IOException { 1551 FileStatus listing[] = listStatus(f); 1552 if (listing == null) { 1553 throw new IOException("Error accessing " + f); 1554 } 1555 1556 for (int i = 0; i < listing.length; i++) { 1557 if (filter.accept(listing[i].getPath())) { 1558 results.add(listing[i]); 1559 } 1560 } 1561 } 1562 1563 /** 1564 * @return an iterator over the corrupt files under the given path 1565 * (may contain duplicates if a file has more than one corrupt block) 1566 * @throws IOException 1567 */ 1568 public RemoteIterator<Path> listCorruptFileBlocks(Path path) 1569 throws IOException { 1570 throw new UnsupportedOperationException(getClass().getCanonicalName() + 1571 " does not support" + 1572 " listCorruptFileBlocks"); 1573 } 1574 1575 /** 1576 * Filter files/directories in the given path using the user-supplied path 1577 * filter. 1578 * 1579 * @param f 1580 * a path name 1581 * @param filter 1582 * the user-supplied path filter 1583 * @return an array of FileStatus objects for the files under the given path 1584 * after applying the filter 1585 * @throws FileNotFoundException when the path does not exist; 1586 * IOException see specific implementation 1587 */ 1588 public FileStatus[] listStatus(Path f, PathFilter filter) 1589 throws FileNotFoundException, IOException { 1590 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1591 listStatus(results, f, filter); 1592 return results.toArray(new FileStatus[results.size()]); 1593 } 1594 1595 /** 1596 * Filter files/directories in the given list of paths using default 1597 * path filter. 1598 * 1599 * @param files 1600 * a list of paths 1601 * @return a list of statuses for the files under the given paths after 1602 * applying the filter default Path filter 1603 * @throws FileNotFoundException when the path does not exist; 1604 * IOException see specific implementation 1605 */ 1606 public FileStatus[] listStatus(Path[] files) 1607 throws FileNotFoundException, IOException { 1608 return listStatus(files, DEFAULT_FILTER); 1609 } 1610 1611 /** 1612 * Filter files/directories in the given list of paths using user-supplied 1613 * path filter. 1614 * 1615 * @param files 1616 * a list of paths 1617 * @param filter 1618 * the user-supplied path filter 1619 * @return a list of statuses for the files under the given paths after 1620 * applying the filter 1621 * @throws FileNotFoundException when the path does not exist; 1622 * IOException see specific implementation 1623 */ 1624 public FileStatus[] listStatus(Path[] files, PathFilter filter) 1625 throws FileNotFoundException, IOException { 1626 ArrayList<FileStatus> results = new ArrayList<FileStatus>(); 1627 for (int i = 0; i < files.length; i++) { 1628 listStatus(results, files[i], filter); 1629 } 1630 return results.toArray(new FileStatus[results.size()]); 1631 } 1632 1633 /** 1634 * <p>Return all the files that match filePattern and are not checksum 1635 * files. Results are sorted by their names. 1636 * 1637 * <p> 1638 * A filename pattern is composed of <i>regular</i> characters and 1639 * <i>special pattern matching</i> characters, which are: 1640 * 1641 * <dl> 1642 * <dd> 1643 * <dl> 1644 * <p> 1645 * <dt> <tt> ? </tt> 1646 * <dd> Matches any single character. 1647 * 1648 * <p> 1649 * <dt> <tt> * </tt> 1650 * <dd> Matches zero or more characters. 1651 * 1652 * <p> 1653 * <dt> <tt> [<i>abc</i>] </tt> 1654 * <dd> Matches a single character from character set 1655 * <tt>{<i>a,b,c</i>}</tt>. 1656 * 1657 * <p> 1658 * <dt> <tt> [<i>a</i>-<i>b</i>] </tt> 1659 * <dd> Matches a single character from the character range 1660 * <tt>{<i>a...b</i>}</tt>. Note that character <tt><i>a</i></tt> must be 1661 * lexicographically less than or equal to character <tt><i>b</i></tt>. 1662 * 1663 * <p> 1664 * <dt> <tt> [^<i>a</i>] </tt> 1665 * <dd> Matches a single character that is not from character set or range 1666 * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur 1667 * immediately to the right of the opening bracket. 1668 * 1669 * <p> 1670 * <dt> <tt> \<i>c</i> </tt> 1671 * <dd> Removes (escapes) any special meaning of character <i>c</i>. 1672 * 1673 * <p> 1674 * <dt> <tt> {ab,cd} </tt> 1675 * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt> 1676 * 1677 * <p> 1678 * <dt> <tt> {ab,c{de,fh}} </tt> 1679 * <dd> Matches a string from the string set <tt>{<i>ab, cde, cfh</i>}</tt> 1680 * 1681 * </dl> 1682 * </dd> 1683 * </dl> 1684 * 1685 * @param pathPattern a regular expression specifying a pth pattern 1686 1687 * @return an array of paths that match the path pattern 1688 * @throws IOException 1689 */ 1690 public FileStatus[] globStatus(Path pathPattern) throws IOException { 1691 return new Globber(this, pathPattern, DEFAULT_FILTER).glob(); 1692 } 1693 1694 /** 1695 * Return an array of FileStatus objects whose path names match pathPattern 1696 * and is accepted by the user-supplied path filter. Results are sorted by 1697 * their path names. 1698 * Return null if pathPattern has no glob and the path does not exist. 1699 * Return an empty array if pathPattern has a glob and no path matches it. 1700 * 1701 * @param pathPattern 1702 * a regular expression specifying the path pattern 1703 * @param filter 1704 * a user-supplied path filter 1705 * @return an array of FileStatus objects 1706 * @throws IOException if any I/O error occurs when fetching file status 1707 */ 1708 public FileStatus[] globStatus(Path pathPattern, PathFilter filter) 1709 throws IOException { 1710 return new Globber(this, pathPattern, filter).glob(); 1711 } 1712 1713 /** 1714 * List the statuses of the files/directories in the given path if the path is 1715 * a directory. 1716 * Return the file's status and block locations If the path is a file. 1717 * 1718 * If a returned status is a file, it contains the file's block locations. 1719 * 1720 * @param f is the path 1721 * 1722 * @return an iterator that traverses statuses of the files/directories 1723 * in the given path 1724 * 1725 * @throws FileNotFoundException If <code>f</code> does not exist 1726 * @throws IOException If an I/O error occurred 1727 */ 1728 public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f) 1729 throws FileNotFoundException, IOException { 1730 return listLocatedStatus(f, DEFAULT_FILTER); 1731 } 1732 1733 /** 1734 * Listing a directory 1735 * The returned results include its block location if it is a file 1736 * The results are filtered by the given path filter 1737 * @param f a path 1738 * @param filter a path filter 1739 * @return an iterator that traverses statuses of the files/directories 1740 * in the given path 1741 * @throws FileNotFoundException if <code>f</code> does not exist 1742 * @throws IOException if any I/O error occurred 1743 */ 1744 protected RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f, 1745 final PathFilter filter) 1746 throws FileNotFoundException, IOException { 1747 return new RemoteIterator<LocatedFileStatus>() { 1748 private final FileStatus[] stats = listStatus(f, filter); 1749 private int i = 0; 1750 1751 @Override 1752 public boolean hasNext() { 1753 return i<stats.length; 1754 } 1755 1756 @Override 1757 public LocatedFileStatus next() throws IOException { 1758 if (!hasNext()) { 1759 throw new NoSuchElementException("No more entry in " + f); 1760 } 1761 FileStatus result = stats[i++]; 1762 BlockLocation[] locs = result.isFile() ? 1763 getFileBlockLocations(result.getPath(), 0, result.getLen()) : 1764 null; 1765 return new LocatedFileStatus(result, locs); 1766 } 1767 }; 1768 } 1769 1770 /** 1771 * Returns a remote iterator so that followup calls are made on demand 1772 * while consuming the entries. Each file system implementation should 1773 * override this method and provide a more efficient implementation, if 1774 * possible. 1775 * 1776 * @param p target path 1777 * @return remote iterator 1778 */ 1779 public RemoteIterator<FileStatus> listStatusIterator(final Path p) 1780 throws FileNotFoundException, IOException { 1781 return new RemoteIterator<FileStatus>() { 1782 private final FileStatus[] stats = listStatus(p); 1783 private int i = 0; 1784 1785 @Override 1786 public boolean hasNext() { 1787 return i<stats.length; 1788 } 1789 1790 @Override 1791 public FileStatus next() throws IOException { 1792 if (!hasNext()) { 1793 throw new NoSuchElementException("No more entry in " + p); 1794 } 1795 return stats[i++]; 1796 } 1797 }; 1798 } 1799 1800 /** 1801 * List the statuses and block locations of the files in the given path. 1802 * 1803 * If the path is a directory, 1804 * if recursive is false, returns files in the directory; 1805 * if recursive is true, return files in the subtree rooted at the path. 1806 * If the path is a file, return the file's status and block locations. 1807 * 1808 * @param f is the path 1809 * @param recursive if the subdirectories need to be traversed recursively 1810 * 1811 * @return an iterator that traverses statuses of the files 1812 * 1813 * @throws FileNotFoundException when the path does not exist; 1814 * IOException see specific implementation 1815 */ 1816 public RemoteIterator<LocatedFileStatus> listFiles( 1817 final Path f, final boolean recursive) 1818 throws FileNotFoundException, IOException { 1819 return new RemoteIterator<LocatedFileStatus>() { 1820 private Stack<RemoteIterator<LocatedFileStatus>> itors = 1821 new Stack<RemoteIterator<LocatedFileStatus>>(); 1822 private RemoteIterator<LocatedFileStatus> curItor = 1823 listLocatedStatus(f); 1824 private LocatedFileStatus curFile; 1825 1826 @Override 1827 public boolean hasNext() throws IOException { 1828 while (curFile == null) { 1829 if (curItor.hasNext()) { 1830 handleFileStat(curItor.next()); 1831 } else if (!itors.empty()) { 1832 curItor = itors.pop(); 1833 } else { 1834 return false; 1835 } 1836 } 1837 return true; 1838 } 1839 1840 /** 1841 * Process the input stat. 1842 * If it is a file, return the file stat. 1843 * If it is a directory, traverse the directory if recursive is true; 1844 * ignore it if recursive is false. 1845 * @param stat input status 1846 * @throws IOException if any IO error occurs 1847 */ 1848 private void handleFileStat(LocatedFileStatus stat) throws IOException { 1849 if (stat.isFile()) { // file 1850 curFile = stat; 1851 } else if (recursive) { // directory 1852 itors.push(curItor); 1853 curItor = listLocatedStatus(stat.getPath()); 1854 } 1855 } 1856 1857 @Override 1858 public LocatedFileStatus next() throws IOException { 1859 if (hasNext()) { 1860 LocatedFileStatus result = curFile; 1861 curFile = null; 1862 return result; 1863 } 1864 throw new java.util.NoSuchElementException("No more entry in " + f); 1865 } 1866 }; 1867 } 1868 1869 /** Return the current user's home directory in this filesystem. 1870 * The default implementation returns "/user/$USER/". 1871 */ 1872 public Path getHomeDirectory() { 1873 return this.makeQualified( 1874 new Path("/user/"+System.getProperty("user.name"))); 1875 } 1876 1877 1878 /** 1879 * Set the current working directory for the given file system. All relative 1880 * paths will be resolved relative to it. 1881 * 1882 * @param new_dir 1883 */ 1884 public abstract void setWorkingDirectory(Path new_dir); 1885 1886 /** 1887 * Get the current working directory for the given file system 1888 * @return the directory pathname 1889 */ 1890 public abstract Path getWorkingDirectory(); 1891 1892 1893 /** 1894 * Note: with the new FilesContext class, getWorkingDirectory() 1895 * will be removed. 1896 * The working directory is implemented in FilesContext. 1897 * 1898 * Some file systems like LocalFileSystem have an initial workingDir 1899 * that we use as the starting workingDir. For other file systems 1900 * like HDFS there is no built in notion of an initial workingDir. 1901 * 1902 * @return if there is built in notion of workingDir then it 1903 * is returned; else a null is returned. 1904 */ 1905 protected Path getInitialWorkingDirectory() { 1906 return null; 1907 } 1908 1909 /** 1910 * Call {@link #mkdirs(Path, FsPermission)} with default permission. 1911 */ 1912 public boolean mkdirs(Path f) throws IOException { 1913 return mkdirs(f, FsPermission.getDirDefault()); 1914 } 1915 1916 /** 1917 * Make the given file and all non-existent parents into 1918 * directories. Has the semantics of Unix 'mkdir -p'. 1919 * Existence of the directory hierarchy is not an error. 1920 * @param f path to create 1921 * @param permission to apply to f 1922 */ 1923 public abstract boolean mkdirs(Path f, FsPermission permission 1924 ) throws IOException; 1925 1926 /** 1927 * The src file is on the local disk. Add it to FS at 1928 * the given dst name and the source is kept intact afterwards 1929 * @param src path 1930 * @param dst path 1931 */ 1932 public void copyFromLocalFile(Path src, Path dst) 1933 throws IOException { 1934 copyFromLocalFile(false, src, dst); 1935 } 1936 1937 /** 1938 * The src files is on the local disk. Add it to FS at 1939 * the given dst name, removing the source afterwards. 1940 * @param srcs path 1941 * @param dst path 1942 */ 1943 public void moveFromLocalFile(Path[] srcs, Path dst) 1944 throws IOException { 1945 copyFromLocalFile(true, true, srcs, dst); 1946 } 1947 1948 /** 1949 * The src file is on the local disk. Add it to FS at 1950 * the given dst name, removing the source afterwards. 1951 * @param src path 1952 * @param dst path 1953 */ 1954 public void moveFromLocalFile(Path src, Path dst) 1955 throws IOException { 1956 copyFromLocalFile(true, src, dst); 1957 } 1958 1959 /** 1960 * The src file is on the local disk. Add it to FS at 1961 * the given dst name. 1962 * delSrc indicates if the source should be removed 1963 * @param delSrc whether to delete the src 1964 * @param src path 1965 * @param dst path 1966 */ 1967 public void copyFromLocalFile(boolean delSrc, Path src, Path dst) 1968 throws IOException { 1969 copyFromLocalFile(delSrc, true, src, dst); 1970 } 1971 1972 /** 1973 * The src files are on the local disk. Add it to FS at 1974 * the given dst name. 1975 * delSrc indicates if the source should be removed 1976 * @param delSrc whether to delete the src 1977 * @param overwrite whether to overwrite an existing file 1978 * @param srcs array of paths which are source 1979 * @param dst path 1980 */ 1981 public void copyFromLocalFile(boolean delSrc, boolean overwrite, 1982 Path[] srcs, Path dst) 1983 throws IOException { 1984 Configuration conf = getConf(); 1985 FileUtil.copy(getLocal(conf), srcs, this, dst, delSrc, overwrite, conf); 1986 } 1987 1988 /** 1989 * The src file is on the local disk. Add it to FS at 1990 * the given dst name. 1991 * delSrc indicates if the source should be removed 1992 * @param delSrc whether to delete the src 1993 * @param overwrite whether to overwrite an existing file 1994 * @param src path 1995 * @param dst path 1996 */ 1997 public void copyFromLocalFile(boolean delSrc, boolean overwrite, 1998 Path src, Path dst) 1999 throws IOException { 2000 Configuration conf = getConf(); 2001 FileUtil.copy(getLocal(conf), src, this, dst, delSrc, overwrite, conf); 2002 } 2003 2004 /** 2005 * The src file is under FS, and the dst is on the local disk. 2006 * Copy it from FS control to the local dst name. 2007 * @param src path 2008 * @param dst path 2009 */ 2010 public void copyToLocalFile(Path src, Path dst) throws IOException { 2011 copyToLocalFile(false, src, dst); 2012 } 2013 2014 /** 2015 * The src file is under FS, and the dst is on the local disk. 2016 * Copy it from FS control to the local dst name. 2017 * Remove the source afterwards 2018 * @param src path 2019 * @param dst path 2020 */ 2021 public void moveToLocalFile(Path src, Path dst) throws IOException { 2022 copyToLocalFile(true, src, dst); 2023 } 2024 2025 /** 2026 * The src file is under FS, and the dst is on the local disk. 2027 * Copy it from FS control to the local dst name. 2028 * delSrc indicates if the src will be removed or not. 2029 * @param delSrc whether to delete the src 2030 * @param src path 2031 * @param dst path 2032 */ 2033 public void copyToLocalFile(boolean delSrc, Path src, Path dst) 2034 throws IOException { 2035 copyToLocalFile(delSrc, src, dst, false); 2036 } 2037 2038 /** 2039 * The src file is under FS, and the dst is on the local disk. Copy it from FS 2040 * control to the local dst name. delSrc indicates if the src will be removed 2041 * or not. useRawLocalFileSystem indicates whether to use RawLocalFileSystem 2042 * as local file system or not. RawLocalFileSystem is non crc file system.So, 2043 * It will not create any crc files at local. 2044 * 2045 * @param delSrc 2046 * whether to delete the src 2047 * @param src 2048 * path 2049 * @param dst 2050 * path 2051 * @param useRawLocalFileSystem 2052 * whether to use RawLocalFileSystem as local file system or not. 2053 * 2054 * @throws IOException 2055 * - if any IO error 2056 */ 2057 public void copyToLocalFile(boolean delSrc, Path src, Path dst, 2058 boolean useRawLocalFileSystem) throws IOException { 2059 Configuration conf = getConf(); 2060 FileSystem local = null; 2061 if (useRawLocalFileSystem) { 2062 local = getLocal(conf).getRawFileSystem(); 2063 } else { 2064 local = getLocal(conf); 2065 } 2066 FileUtil.copy(this, src, local, dst, delSrc, conf); 2067 } 2068 2069 /** 2070 * Returns a local File that the user can write output to. The caller 2071 * provides both the eventual FS target name and the local working 2072 * file. If the FS is local, we write directly into the target. If 2073 * the FS is remote, we write into the tmp local area. 2074 * @param fsOutputFile path of output file 2075 * @param tmpLocalFile path of local tmp file 2076 */ 2077 public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) 2078 throws IOException { 2079 return tmpLocalFile; 2080 } 2081 2082 /** 2083 * Called when we're all done writing to the target. A local FS will 2084 * do nothing, because we've written to exactly the right place. A remote 2085 * FS will copy the contents of tmpLocalFile to the correct target at 2086 * fsOutputFile. 2087 * @param fsOutputFile path of output file 2088 * @param tmpLocalFile path to local tmp file 2089 */ 2090 public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) 2091 throws IOException { 2092 moveFromLocalFile(tmpLocalFile, fsOutputFile); 2093 } 2094 2095 /** 2096 * No more filesystem operations are needed. Will 2097 * release any held locks. 2098 */ 2099 @Override 2100 public void close() throws IOException { 2101 // delete all files that were marked as delete-on-exit. 2102 processDeleteOnExit(); 2103 CACHE.remove(this.key, this); 2104 } 2105 2106 /** Return the total size of all files in the filesystem.*/ 2107 public long getUsed() throws IOException{ 2108 long used = 0; 2109 FileStatus[] files = listStatus(new Path("/")); 2110 for(FileStatus file:files){ 2111 used += file.getLen(); 2112 } 2113 return used; 2114 } 2115 2116 /** 2117 * Get the block size for a particular file. 2118 * @param f the filename 2119 * @return the number of bytes in a block 2120 */ 2121 /** @deprecated Use getFileStatus() instead */ 2122 @Deprecated 2123 public long getBlockSize(Path f) throws IOException { 2124 return getFileStatus(f).getBlockSize(); 2125 } 2126 2127 /** 2128 * Return the number of bytes that large input files should be optimally 2129 * be split into to minimize i/o time. 2130 * @deprecated use {@link #getDefaultBlockSize(Path)} instead 2131 */ 2132 @Deprecated 2133 public long getDefaultBlockSize() { 2134 // default to 32MB: large enough to minimize the impact of seeks 2135 return getConf().getLong("fs.local.block.size", 32 * 1024 * 1024); 2136 } 2137 2138 /** Return the number of bytes that large input files should be optimally 2139 * be split into to minimize i/o time. The given path will be used to 2140 * locate the actual filesystem. The full path does not have to exist. 2141 * @param f path of file 2142 * @return the default block size for the path's filesystem 2143 */ 2144 public long getDefaultBlockSize(Path f) { 2145 return getDefaultBlockSize(); 2146 } 2147 2148 /** 2149 * Get the default replication. 2150 * @deprecated use {@link #getDefaultReplication(Path)} instead 2151 */ 2152 @Deprecated 2153 public short getDefaultReplication() { return 1; } 2154 2155 /** 2156 * Get the default replication for a path. The given path will be used to 2157 * locate the actual filesystem. The full path does not have to exist. 2158 * @param path of the file 2159 * @return default replication for the path's filesystem 2160 */ 2161 public short getDefaultReplication(Path path) { 2162 return getDefaultReplication(); 2163 } 2164 2165 /** 2166 * Return a file status object that represents the path. 2167 * @param f The path we want information from 2168 * @return a FileStatus object 2169 * @throws FileNotFoundException when the path does not exist; 2170 * IOException see specific implementation 2171 */ 2172 public abstract FileStatus getFileStatus(Path f) throws IOException; 2173 2174 /** 2175 * Checks if the user can access a path. The mode specifies which access 2176 * checks to perform. If the requested permissions are granted, then the 2177 * method returns normally. If access is denied, then the method throws an 2178 * {@link AccessControlException}. 2179 * <p/> 2180 * The default implementation of this method calls {@link #getFileStatus(Path)} 2181 * and checks the returned permissions against the requested permissions. 2182 * Note that the getFileStatus call will be subject to authorization checks. 2183 * Typically, this requires search (execute) permissions on each directory in 2184 * the path's prefix, but this is implementation-defined. Any file system 2185 * that provides a richer authorization model (such as ACLs) may override the 2186 * default implementation so that it checks against that model instead. 2187 * <p> 2188 * In general, applications should avoid using this method, due to the risk of 2189 * time-of-check/time-of-use race conditions. The permissions on a file may 2190 * change immediately after the access call returns. Most applications should 2191 * prefer running specific file system actions as the desired user represented 2192 * by a {@link UserGroupInformation}. 2193 * 2194 * @param path Path to check 2195 * @param mode type of access to check 2196 * @throws AccessControlException if access is denied 2197 * @throws FileNotFoundException if the path does not exist 2198 * @throws IOException see specific implementation 2199 */ 2200 @InterfaceAudience.LimitedPrivate({"HDFS", "Hive"}) 2201 public void access(Path path, FsAction mode) throws AccessControlException, 2202 FileNotFoundException, IOException { 2203 checkAccessPermissions(this.getFileStatus(path), mode); 2204 } 2205 2206 /** 2207 * This method provides the default implementation of 2208 * {@link #access(Path, FsAction)}. 2209 * 2210 * @param stat FileStatus to check 2211 * @param mode type of access to check 2212 * @throws IOException for any error 2213 */ 2214 @InterfaceAudience.Private 2215 static void checkAccessPermissions(FileStatus stat, FsAction mode) 2216 throws IOException { 2217 FsPermission perm = stat.getPermission(); 2218 UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); 2219 String user = ugi.getShortUserName(); 2220 List<String> groups = Arrays.asList(ugi.getGroupNames()); 2221 if (user.equals(stat.getOwner())) { 2222 if (perm.getUserAction().implies(mode)) { 2223 return; 2224 } 2225 } else if (groups.contains(stat.getGroup())) { 2226 if (perm.getGroupAction().implies(mode)) { 2227 return; 2228 } 2229 } else { 2230 if (perm.getOtherAction().implies(mode)) { 2231 return; 2232 } 2233 } 2234 throw new AccessControlException(String.format( 2235 "Permission denied: user=%s, path=\"%s\":%s:%s:%s%s", user, stat.getPath(), 2236 stat.getOwner(), stat.getGroup(), stat.isDirectory() ? "d" : "-", perm)); 2237 } 2238 2239 /** 2240 * See {@link FileContext#fixRelativePart} 2241 */ 2242 protected Path fixRelativePart(Path p) { 2243 if (p.isUriPathAbsolute()) { 2244 return p; 2245 } else { 2246 return new Path(getWorkingDirectory(), p); 2247 } 2248 } 2249 2250 /** 2251 * See {@link FileContext#createSymlink(Path, Path, boolean)} 2252 */ 2253 public void createSymlink(final Path target, final Path link, 2254 final boolean createParent) throws AccessControlException, 2255 FileAlreadyExistsException, FileNotFoundException, 2256 ParentNotDirectoryException, UnsupportedFileSystemException, 2257 IOException { 2258 // Supporting filesystems should override this method 2259 throw new UnsupportedOperationException( 2260 "Filesystem does not support symlinks!"); 2261 } 2262 2263 /** 2264 * See {@link FileContext#getFileLinkStatus(Path)} 2265 */ 2266 public FileStatus getFileLinkStatus(final Path f) 2267 throws AccessControlException, FileNotFoundException, 2268 UnsupportedFileSystemException, IOException { 2269 // Supporting filesystems should override this method 2270 return getFileStatus(f); 2271 } 2272 2273 /** 2274 * See {@link AbstractFileSystem#supportsSymlinks()} 2275 */ 2276 public boolean supportsSymlinks() { 2277 return false; 2278 } 2279 2280 /** 2281 * See {@link FileContext#getLinkTarget(Path)} 2282 */ 2283 public Path getLinkTarget(Path f) throws IOException { 2284 // Supporting filesystems should override this method 2285 throw new UnsupportedOperationException( 2286 "Filesystem does not support symlinks!"); 2287 } 2288 2289 /** 2290 * See {@link AbstractFileSystem#getLinkTarget(Path)} 2291 */ 2292 protected Path resolveLink(Path f) throws IOException { 2293 // Supporting filesystems should override this method 2294 throw new UnsupportedOperationException( 2295 "Filesystem does not support symlinks!"); 2296 } 2297 2298 /** 2299 * Get the checksum of a file. 2300 * 2301 * @param f The file path 2302 * @return The file checksum. The default return value is null, 2303 * which indicates that no checksum algorithm is implemented 2304 * in the corresponding FileSystem. 2305 */ 2306 public FileChecksum getFileChecksum(Path f) throws IOException { 2307 return getFileChecksum(f, Long.MAX_VALUE); 2308 } 2309 2310 /** 2311 * Get the checksum of a file, from the beginning of the file till the 2312 * specific length. 2313 * @param f The file path 2314 * @param length The length of the file range for checksum calculation 2315 * @return The file checksum. 2316 */ 2317 public FileChecksum getFileChecksum(Path f, final long length) 2318 throws IOException { 2319 return null; 2320 } 2321 2322 /** 2323 * Set the verify checksum flag. This is only applicable if the 2324 * corresponding FileSystem supports checksum. By default doesn't do anything. 2325 * @param verifyChecksum 2326 */ 2327 public void setVerifyChecksum(boolean verifyChecksum) { 2328 //doesn't do anything 2329 } 2330 2331 /** 2332 * Set the write checksum flag. This is only applicable if the 2333 * corresponding FileSystem supports checksum. By default doesn't do anything. 2334 * @param writeChecksum 2335 */ 2336 public void setWriteChecksum(boolean writeChecksum) { 2337 //doesn't do anything 2338 } 2339 2340 /** 2341 * Returns a status object describing the use and capacity of the 2342 * file system. If the file system has multiple partitions, the 2343 * use and capacity of the root partition is reflected. 2344 * 2345 * @return a FsStatus object 2346 * @throws IOException 2347 * see specific implementation 2348 */ 2349 public FsStatus getStatus() throws IOException { 2350 return getStatus(null); 2351 } 2352 2353 /** 2354 * Returns a status object describing the use and capacity of the 2355 * file system. If the file system has multiple partitions, the 2356 * use and capacity of the partition pointed to by the specified 2357 * path is reflected. 2358 * @param p Path for which status should be obtained. null means 2359 * the default partition. 2360 * @return a FsStatus object 2361 * @throws IOException 2362 * see specific implementation 2363 */ 2364 public FsStatus getStatus(Path p) throws IOException { 2365 return new FsStatus(Long.MAX_VALUE, 0, Long.MAX_VALUE); 2366 } 2367 2368 /** 2369 * Set permission of a path. 2370 * @param p 2371 * @param permission 2372 */ 2373 public void setPermission(Path p, FsPermission permission 2374 ) throws IOException { 2375 } 2376 2377 /** 2378 * Set owner of a path (i.e. a file or a directory). 2379 * The parameters username and groupname cannot both be null. 2380 * @param p The path 2381 * @param username If it is null, the original username remains unchanged. 2382 * @param groupname If it is null, the original groupname remains unchanged. 2383 */ 2384 public void setOwner(Path p, String username, String groupname 2385 ) throws IOException { 2386 } 2387 2388 /** 2389 * Set access time of a file 2390 * @param p The path 2391 * @param mtime Set the modification time of this file. 2392 * The number of milliseconds since Jan 1, 1970. 2393 * A value of -1 means that this call should not set modification time. 2394 * @param atime Set the access time of this file. 2395 * The number of milliseconds since Jan 1, 1970. 2396 * A value of -1 means that this call should not set access time. 2397 */ 2398 public void setTimes(Path p, long mtime, long atime 2399 ) throws IOException { 2400 } 2401 2402 /** 2403 * Create a snapshot with a default name. 2404 * @param path The directory where snapshots will be taken. 2405 * @return the snapshot path. 2406 */ 2407 public final Path createSnapshot(Path path) throws IOException { 2408 return createSnapshot(path, null); 2409 } 2410 2411 /** 2412 * Create a snapshot 2413 * @param path The directory where snapshots will be taken. 2414 * @param snapshotName The name of the snapshot 2415 * @return the snapshot path. 2416 */ 2417 public Path createSnapshot(Path path, String snapshotName) 2418 throws IOException { 2419 throw new UnsupportedOperationException(getClass().getSimpleName() 2420 + " doesn't support createSnapshot"); 2421 } 2422 2423 /** 2424 * Rename a snapshot 2425 * @param path The directory path where the snapshot was taken 2426 * @param snapshotOldName Old name of the snapshot 2427 * @param snapshotNewName New name of the snapshot 2428 * @throws IOException 2429 */ 2430 public void renameSnapshot(Path path, String snapshotOldName, 2431 String snapshotNewName) throws IOException { 2432 throw new UnsupportedOperationException(getClass().getSimpleName() 2433 + " doesn't support renameSnapshot"); 2434 } 2435 2436 /** 2437 * Delete a snapshot of a directory 2438 * @param path The directory that the to-be-deleted snapshot belongs to 2439 * @param snapshotName The name of the snapshot 2440 */ 2441 public void deleteSnapshot(Path path, String snapshotName) 2442 throws IOException { 2443 throw new UnsupportedOperationException(getClass().getSimpleName() 2444 + " doesn't support deleteSnapshot"); 2445 } 2446 2447 /** 2448 * Modifies ACL entries of files and directories. This method can add new ACL 2449 * entries or modify the permissions on existing ACL entries. All existing 2450 * ACL entries that are not specified in this call are retained without 2451 * changes. (Modifications are merged into the current ACL.) 2452 * 2453 * @param path Path to modify 2454 * @param aclSpec List<AclEntry> describing modifications 2455 * @throws IOException if an ACL could not be modified 2456 */ 2457 public void modifyAclEntries(Path path, List<AclEntry> aclSpec) 2458 throws IOException { 2459 throw new UnsupportedOperationException(getClass().getSimpleName() 2460 + " doesn't support modifyAclEntries"); 2461 } 2462 2463 /** 2464 * Removes ACL entries from files and directories. Other ACL entries are 2465 * retained. 2466 * 2467 * @param path Path to modify 2468 * @param aclSpec List<AclEntry> describing entries to remove 2469 * @throws IOException if an ACL could not be modified 2470 */ 2471 public void removeAclEntries(Path path, List<AclEntry> aclSpec) 2472 throws IOException { 2473 throw new UnsupportedOperationException(getClass().getSimpleName() 2474 + " doesn't support removeAclEntries"); 2475 } 2476 2477 /** 2478 * Removes all default ACL entries from files and directories. 2479 * 2480 * @param path Path to modify 2481 * @throws IOException if an ACL could not be modified 2482 */ 2483 public void removeDefaultAcl(Path path) 2484 throws IOException { 2485 throw new UnsupportedOperationException(getClass().getSimpleName() 2486 + " doesn't support removeDefaultAcl"); 2487 } 2488 2489 /** 2490 * Removes all but the base ACL entries of files and directories. The entries 2491 * for user, group, and others are retained for compatibility with permission 2492 * bits. 2493 * 2494 * @param path Path to modify 2495 * @throws IOException if an ACL could not be removed 2496 */ 2497 public void removeAcl(Path path) 2498 throws IOException { 2499 throw new UnsupportedOperationException(getClass().getSimpleName() 2500 + " doesn't support removeAcl"); 2501 } 2502 2503 /** 2504 * Fully replaces ACL of files and directories, discarding all existing 2505 * entries. 2506 * 2507 * @param path Path to modify 2508 * @param aclSpec List<AclEntry> describing modifications, must include entries 2509 * for user, group, and others for compatibility with permission bits. 2510 * @throws IOException if an ACL could not be modified 2511 */ 2512 public void setAcl(Path path, List<AclEntry> aclSpec) throws IOException { 2513 throw new UnsupportedOperationException(getClass().getSimpleName() 2514 + " doesn't support setAcl"); 2515 } 2516 2517 /** 2518 * Gets the ACL of a file or directory. 2519 * 2520 * @param path Path to get 2521 * @return AclStatus describing the ACL of the file or directory 2522 * @throws IOException if an ACL could not be read 2523 */ 2524 public AclStatus getAclStatus(Path path) throws IOException { 2525 throw new UnsupportedOperationException(getClass().getSimpleName() 2526 + " doesn't support getAclStatus"); 2527 } 2528 2529 /** 2530 * Set an xattr of a file or directory. 2531 * The name must be prefixed with the namespace followed by ".". For example, 2532 * "user.attr". 2533 * <p/> 2534 * Refer to the HDFS extended attributes user documentation for details. 2535 * 2536 * @param path Path to modify 2537 * @param name xattr name. 2538 * @param value xattr value. 2539 * @throws IOException 2540 */ 2541 public void setXAttr(Path path, String name, byte[] value) 2542 throws IOException { 2543 setXAttr(path, name, value, EnumSet.of(XAttrSetFlag.CREATE, 2544 XAttrSetFlag.REPLACE)); 2545 } 2546 2547 /** 2548 * Set an xattr of a file or directory. 2549 * The name must be prefixed with the namespace followed by ".". For example, 2550 * "user.attr". 2551 * <p/> 2552 * Refer to the HDFS extended attributes user documentation for details. 2553 * 2554 * @param path Path to modify 2555 * @param name xattr name. 2556 * @param value xattr value. 2557 * @param flag xattr set flag 2558 * @throws IOException 2559 */ 2560 public void setXAttr(Path path, String name, byte[] value, 2561 EnumSet<XAttrSetFlag> flag) throws IOException { 2562 throw new UnsupportedOperationException(getClass().getSimpleName() 2563 + " doesn't support setXAttr"); 2564 } 2565 2566 /** 2567 * Get an xattr name and value for a file or directory. 2568 * The name must be prefixed with the namespace followed by ".". For example, 2569 * "user.attr". 2570 * <p/> 2571 * Refer to the HDFS extended attributes user documentation for details. 2572 * 2573 * @param path Path to get extended attribute 2574 * @param name xattr name. 2575 * @return byte[] xattr value. 2576 * @throws IOException 2577 */ 2578 public byte[] getXAttr(Path path, String name) throws IOException { 2579 throw new UnsupportedOperationException(getClass().getSimpleName() 2580 + " doesn't support getXAttr"); 2581 } 2582 2583 /** 2584 * Get all of the xattr name/value pairs for a file or directory. 2585 * Only those xattrs which the logged-in user has permissions to view 2586 * are returned. 2587 * <p/> 2588 * Refer to the HDFS extended attributes user documentation for details. 2589 * 2590 * @param path Path to get extended attributes 2591 * @return Map<String, byte[]> describing the XAttrs of the file or directory 2592 * @throws IOException 2593 */ 2594 public Map<String, byte[]> getXAttrs(Path path) throws IOException { 2595 throw new UnsupportedOperationException(getClass().getSimpleName() 2596 + " doesn't support getXAttrs"); 2597 } 2598 2599 /** 2600 * Get all of the xattrs name/value pairs for a file or directory. 2601 * Only those xattrs which the logged-in user has permissions to view 2602 * are returned. 2603 * <p/> 2604 * Refer to the HDFS extended attributes user documentation for details. 2605 * 2606 * @param path Path to get extended attributes 2607 * @param names XAttr names. 2608 * @return Map<String, byte[]> describing the XAttrs of the file or directory 2609 * @throws IOException 2610 */ 2611 public Map<String, byte[]> getXAttrs(Path path, List<String> names) 2612 throws IOException { 2613 throw new UnsupportedOperationException(getClass().getSimpleName() 2614 + " doesn't support getXAttrs"); 2615 } 2616 2617 /** 2618 * Get all of the xattr names for a file or directory. 2619 * Only those xattr names which the logged-in user has permissions to view 2620 * are returned. 2621 * <p/> 2622 * Refer to the HDFS extended attributes user documentation for details. 2623 * 2624 * @param path Path to get extended attributes 2625 * @return List<String> of the XAttr names of the file or directory 2626 * @throws IOException 2627 */ 2628 public List<String> listXAttrs(Path path) throws IOException { 2629 throw new UnsupportedOperationException(getClass().getSimpleName() 2630 + " doesn't support listXAttrs"); 2631 } 2632 2633 /** 2634 * Remove an xattr of a file or directory. 2635 * The name must be prefixed with the namespace followed by ".". For example, 2636 * "user.attr". 2637 * <p/> 2638 * Refer to the HDFS extended attributes user documentation for details. 2639 * 2640 * @param path Path to remove extended attribute 2641 * @param name xattr name 2642 * @throws IOException 2643 */ 2644 public void removeXAttr(Path path, String name) throws IOException { 2645 throw new UnsupportedOperationException(getClass().getSimpleName() 2646 + " doesn't support removeXAttr"); 2647 } 2648 2649 // making it volatile to be able to do a double checked locking 2650 private volatile static boolean FILE_SYSTEMS_LOADED = false; 2651 2652 private static final Map<String, Class<? extends FileSystem>> 2653 SERVICE_FILE_SYSTEMS = new HashMap<String, Class<? extends FileSystem>>(); 2654 2655 private static void loadFileSystems() { 2656 synchronized (FileSystem.class) { 2657 if (!FILE_SYSTEMS_LOADED) { 2658 ServiceLoader<FileSystem> serviceLoader = ServiceLoader.load(FileSystem.class); 2659 for (FileSystem fs : serviceLoader) { 2660 SERVICE_FILE_SYSTEMS.put(fs.getScheme(), fs.getClass()); 2661 } 2662 FILE_SYSTEMS_LOADED = true; 2663 } 2664 } 2665 } 2666 2667 public static Class<? extends FileSystem> getFileSystemClass(String scheme, 2668 Configuration conf) throws IOException { 2669 if (!FILE_SYSTEMS_LOADED) { 2670 loadFileSystems(); 2671 } 2672 Class<? extends FileSystem> clazz = null; 2673 if (conf != null) { 2674 clazz = (Class<? extends FileSystem>) conf.getClass("fs." + scheme + ".impl", null); 2675 } 2676 if (clazz == null) { 2677 clazz = SERVICE_FILE_SYSTEMS.get(scheme); 2678 } 2679 if (clazz == null) { 2680 throw new IOException("No FileSystem for scheme: " + scheme); 2681 } 2682 return clazz; 2683 } 2684 2685 private static FileSystem createFileSystem(URI uri, Configuration conf 2686 ) throws IOException { 2687 Class<?> clazz = getFileSystemClass(uri.getScheme(), conf); 2688 FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf); 2689 fs.initialize(uri, conf); 2690 return fs; 2691 } 2692 2693 /** Caching FileSystem objects */ 2694 static class Cache { 2695 private final ClientFinalizer clientFinalizer = new ClientFinalizer(); 2696 2697 private final Map<Key, FileSystem> map = new HashMap<Key, FileSystem>(); 2698 private final Set<Key> toAutoClose = new HashSet<Key>(); 2699 2700 /** A variable that makes all objects in the cache unique */ 2701 private static AtomicLong unique = new AtomicLong(1); 2702 2703 FileSystem get(URI uri, Configuration conf) throws IOException{ 2704 Key key = new Key(uri, conf); 2705 return getInternal(uri, conf, key); 2706 } 2707 2708 /** The objects inserted into the cache using this method are all unique */ 2709 FileSystem getUnique(URI uri, Configuration conf) throws IOException{ 2710 Key key = new Key(uri, conf, unique.getAndIncrement()); 2711 return getInternal(uri, conf, key); 2712 } 2713 2714 private FileSystem getInternal(URI uri, Configuration conf, Key key) throws IOException{ 2715 FileSystem fs; 2716 synchronized (this) { 2717 fs = map.get(key); 2718 } 2719 if (fs != null) { 2720 return fs; 2721 } 2722 2723 fs = createFileSystem(uri, conf); 2724 synchronized (this) { // refetch the lock again 2725 FileSystem oldfs = map.get(key); 2726 if (oldfs != null) { // a file system is created while lock is releasing 2727 fs.close(); // close the new file system 2728 return oldfs; // return the old file system 2729 } 2730 2731 // now insert the new file system into the map 2732 if (map.isEmpty() 2733 && !ShutdownHookManager.get().isShutdownInProgress()) { 2734 ShutdownHookManager.get().addShutdownHook(clientFinalizer, SHUTDOWN_HOOK_PRIORITY); 2735 } 2736 fs.key = key; 2737 map.put(key, fs); 2738 if (conf.getBoolean("fs.automatic.close", true)) { 2739 toAutoClose.add(key); 2740 } 2741 return fs; 2742 } 2743 } 2744 2745 synchronized void remove(Key key, FileSystem fs) { 2746 if (map.containsKey(key) && fs == map.get(key)) { 2747 map.remove(key); 2748 toAutoClose.remove(key); 2749 } 2750 } 2751 2752 synchronized void closeAll() throws IOException { 2753 closeAll(false); 2754 } 2755 2756 /** 2757 * Close all FileSystem instances in the Cache. 2758 * @param onlyAutomatic only close those that are marked for automatic closing 2759 */ 2760 synchronized void closeAll(boolean onlyAutomatic) throws IOException { 2761 List<IOException> exceptions = new ArrayList<IOException>(); 2762 2763 // Make a copy of the keys in the map since we'll be modifying 2764 // the map while iterating over it, which isn't safe. 2765 List<Key> keys = new ArrayList<Key>(); 2766 keys.addAll(map.keySet()); 2767 2768 for (Key key : keys) { 2769 final FileSystem fs = map.get(key); 2770 2771 if (onlyAutomatic && !toAutoClose.contains(key)) { 2772 continue; 2773 } 2774 2775 //remove from cache 2776 remove(key, fs); 2777 2778 if (fs != null) { 2779 try { 2780 fs.close(); 2781 } 2782 catch(IOException ioe) { 2783 exceptions.add(ioe); 2784 } 2785 } 2786 } 2787 2788 if (!exceptions.isEmpty()) { 2789 throw MultipleIOException.createIOException(exceptions); 2790 } 2791 } 2792 2793 private class ClientFinalizer implements Runnable { 2794 @Override 2795 public synchronized void run() { 2796 try { 2797 closeAll(true); 2798 } catch (IOException e) { 2799 LOG.info("FileSystem.Cache.closeAll() threw an exception:\n" + e); 2800 } 2801 } 2802 } 2803 2804 synchronized void closeAll(UserGroupInformation ugi) throws IOException { 2805 List<FileSystem> targetFSList = new ArrayList<FileSystem>(); 2806 //Make a pass over the list and collect the filesystems to close 2807 //we cannot close inline since close() removes the entry from the Map 2808 for (Map.Entry<Key, FileSystem> entry : map.entrySet()) { 2809 final Key key = entry.getKey(); 2810 final FileSystem fs = entry.getValue(); 2811 if (ugi.equals(key.ugi) && fs != null) { 2812 targetFSList.add(fs); 2813 } 2814 } 2815 List<IOException> exceptions = new ArrayList<IOException>(); 2816 //now make a pass over the target list and close each 2817 for (FileSystem fs : targetFSList) { 2818 try { 2819 fs.close(); 2820 } 2821 catch(IOException ioe) { 2822 exceptions.add(ioe); 2823 } 2824 } 2825 if (!exceptions.isEmpty()) { 2826 throw MultipleIOException.createIOException(exceptions); 2827 } 2828 } 2829 2830 /** FileSystem.Cache.Key */ 2831 static class Key { 2832 final String scheme; 2833 final String authority; 2834 final UserGroupInformation ugi; 2835 final long unique; // an artificial way to make a key unique 2836 2837 Key(URI uri, Configuration conf) throws IOException { 2838 this(uri, conf, 0); 2839 } 2840 2841 Key(URI uri, Configuration conf, long unique) throws IOException { 2842 scheme = uri.getScheme()==null ? 2843 "" : StringUtils.toLowerCase(uri.getScheme()); 2844 authority = uri.getAuthority()==null ? 2845 "" : StringUtils.toLowerCase(uri.getAuthority()); 2846 this.unique = unique; 2847 2848 this.ugi = UserGroupInformation.getCurrentUser(); 2849 } 2850 2851 @Override 2852 public int hashCode() { 2853 return (scheme + authority).hashCode() + ugi.hashCode() + (int)unique; 2854 } 2855 2856 static boolean isEqual(Object a, Object b) { 2857 return a == b || (a != null && a.equals(b)); 2858 } 2859 2860 @Override 2861 public boolean equals(Object obj) { 2862 if (obj == this) { 2863 return true; 2864 } 2865 if (obj != null && obj instanceof Key) { 2866 Key that = (Key)obj; 2867 return isEqual(this.scheme, that.scheme) 2868 && isEqual(this.authority, that.authority) 2869 && isEqual(this.ugi, that.ugi) 2870 && (this.unique == that.unique); 2871 } 2872 return false; 2873 } 2874 2875 @Override 2876 public String toString() { 2877 return "("+ugi.toString() + ")@" + scheme + "://" + authority; 2878 } 2879 } 2880 } 2881 2882 /** 2883 * Tracks statistics about how many reads, writes, and so forth have been 2884 * done in a FileSystem. 2885 * 2886 * Since there is only one of these objects per FileSystem, there will 2887 * typically be many threads writing to this object. Almost every operation 2888 * on an open file will involve a write to this object. In contrast, reading 2889 * statistics is done infrequently by most programs, and not at all by others. 2890 * Hence, this is optimized for writes. 2891 * 2892 * Each thread writes to its own thread-local area of memory. This removes 2893 * contention and allows us to scale up to many, many threads. To read 2894 * statistics, the reader thread totals up the contents of all of the 2895 * thread-local data areas. 2896 */ 2897 public static final class Statistics { 2898 /** 2899 * Statistics data. 2900 * 2901 * There is only a single writer to thread-local StatisticsData objects. 2902 * Hence, volatile is adequate here-- we do not need AtomicLong or similar 2903 * to prevent lost updates. 2904 * The Java specification guarantees that updates to volatile longs will 2905 * be perceived as atomic with respect to other threads, which is all we 2906 * need. 2907 */ 2908 public static class StatisticsData { 2909 volatile long bytesRead; 2910 volatile long bytesWritten; 2911 volatile long readOps; 2912 volatile long largeReadOps; 2913 volatile long writeOps; 2914 /** 2915 * Stores a weak reference to the thread owning this StatisticsData. 2916 * This allows us to remove StatisticsData objects that pertain to 2917 * threads that no longer exist. 2918 */ 2919 final WeakReference<Thread> owner; 2920 2921 StatisticsData(WeakReference<Thread> owner) { 2922 this.owner = owner; 2923 } 2924 2925 /** 2926 * Add another StatisticsData object to this one. 2927 */ 2928 void add(StatisticsData other) { 2929 this.bytesRead += other.bytesRead; 2930 this.bytesWritten += other.bytesWritten; 2931 this.readOps += other.readOps; 2932 this.largeReadOps += other.largeReadOps; 2933 this.writeOps += other.writeOps; 2934 } 2935 2936 /** 2937 * Negate the values of all statistics. 2938 */ 2939 void negate() { 2940 this.bytesRead = -this.bytesRead; 2941 this.bytesWritten = -this.bytesWritten; 2942 this.readOps = -this.readOps; 2943 this.largeReadOps = -this.largeReadOps; 2944 this.writeOps = -this.writeOps; 2945 } 2946 2947 @Override 2948 public String toString() { 2949 return bytesRead + " bytes read, " + bytesWritten + " bytes written, " 2950 + readOps + " read ops, " + largeReadOps + " large read ops, " 2951 + writeOps + " write ops"; 2952 } 2953 2954 public long getBytesRead() { 2955 return bytesRead; 2956 } 2957 2958 public long getBytesWritten() { 2959 return bytesWritten; 2960 } 2961 2962 public int getReadOps() { 2963 return (int) readOps; 2964 } 2965 2966 public long getNumReadOps() { 2967 return readOps; 2968 } 2969 2970 public int getLargeReadOps() { 2971 return (int) largeReadOps; 2972 } 2973 2974 public long getNumLargeReadOps() { 2975 return largeReadOps; 2976 } 2977 2978 public int getWriteOps() { 2979 return (int) writeOps; 2980 } 2981 2982 public long getNumWriteOps() { 2983 return writeOps; 2984 } 2985 } 2986 2987 private interface StatisticsAggregator<T> { 2988 void accept(StatisticsData data); 2989 T aggregate(); 2990 } 2991 2992 private final String scheme; 2993 2994 /** 2995 * rootData is data that doesn't belong to any thread, but will be added 2996 * to the totals. This is useful for making copies of Statistics objects, 2997 * and for storing data that pertains to threads that have been garbage 2998 * collected. Protected by the Statistics lock. 2999 */ 3000 private final StatisticsData rootData; 3001 3002 /** 3003 * Thread-local data. 3004 */ 3005 private final ThreadLocal<StatisticsData> threadData; 3006 3007 /** 3008 * List of all thread-local data areas. Protected by the Statistics lock. 3009 */ 3010 private LinkedList<StatisticsData> allData; 3011 3012 public Statistics(String scheme) { 3013 this.scheme = scheme; 3014 this.rootData = new StatisticsData(null); 3015 this.threadData = new ThreadLocal<StatisticsData>(); 3016 this.allData = null; 3017 } 3018 3019 /** 3020 * Copy constructor. 3021 * 3022 * @param other The input Statistics object which is cloned. 3023 */ 3024 public Statistics(Statistics other) { 3025 this.scheme = other.scheme; 3026 this.rootData = new StatisticsData(null); 3027 other.visitAll(new StatisticsAggregator<Void>() { 3028 @Override 3029 public void accept(StatisticsData data) { 3030 rootData.add(data); 3031 } 3032 3033 public Void aggregate() { 3034 return null; 3035 } 3036 }); 3037 this.threadData = new ThreadLocal<StatisticsData>(); 3038 } 3039 3040 /** 3041 * Get or create the thread-local data associated with the current thread. 3042 */ 3043 public StatisticsData getThreadStatistics() { 3044 StatisticsData data = threadData.get(); 3045 if (data == null) { 3046 data = new StatisticsData( 3047 new WeakReference<Thread>(Thread.currentThread())); 3048 threadData.set(data); 3049 synchronized(this) { 3050 if (allData == null) { 3051 allData = new LinkedList<StatisticsData>(); 3052 } 3053 allData.add(data); 3054 } 3055 } 3056 return data; 3057 } 3058 3059 /** 3060 * Increment the bytes read in the statistics 3061 * @param newBytes the additional bytes read 3062 */ 3063 public void incrementBytesRead(long newBytes) { 3064 getThreadStatistics().bytesRead += newBytes; 3065 } 3066 3067 /** 3068 * Increment the bytes written in the statistics 3069 * @param newBytes the additional bytes written 3070 */ 3071 public void incrementBytesWritten(long newBytes) { 3072 getThreadStatistics().bytesWritten += newBytes; 3073 } 3074 3075 /** 3076 * Increment the number of read operations 3077 * @param count number of read operations 3078 */ 3079 public void incrementReadOps(int count) { 3080 getThreadStatistics().readOps += count; 3081 } 3082 3083 /** 3084 * Increment the number of large read operations 3085 * @param count number of large read operations 3086 */ 3087 public void incrementLargeReadOps(int count) { 3088 getThreadStatistics().largeReadOps += count; 3089 } 3090 3091 /** 3092 * Increment the number of write operations 3093 * @param count number of write operations 3094 */ 3095 public void incrementWriteOps(int count) { 3096 getThreadStatistics().writeOps += count; 3097 } 3098 3099 /** 3100 * Apply the given aggregator to all StatisticsData objects associated with 3101 * this Statistics object. 3102 * 3103 * For each StatisticsData object, we will call accept on the visitor. 3104 * Finally, at the end, we will call aggregate to get the final total. 3105 * 3106 * @param The visitor to use. 3107 * @return The total. 3108 */ 3109 private synchronized <T> T visitAll(StatisticsAggregator<T> visitor) { 3110 visitor.accept(rootData); 3111 if (allData != null) { 3112 for (Iterator<StatisticsData> iter = allData.iterator(); 3113 iter.hasNext(); ) { 3114 StatisticsData data = iter.next(); 3115 visitor.accept(data); 3116 if (data.owner.get() == null) { 3117 /* 3118 * If the thread that created this thread-local data no 3119 * longer exists, remove the StatisticsData from our list 3120 * and fold the values into rootData. 3121 */ 3122 rootData.add(data); 3123 iter.remove(); 3124 } 3125 } 3126 } 3127 return visitor.aggregate(); 3128 } 3129 3130 /** 3131 * Get the total number of bytes read 3132 * @return the number of bytes 3133 */ 3134 public long getBytesRead() { 3135 return visitAll(new StatisticsAggregator<Long>() { 3136 private long bytesRead = 0; 3137 3138 @Override 3139 public void accept(StatisticsData data) { 3140 bytesRead += data.bytesRead; 3141 } 3142 3143 public Long aggregate() { 3144 return bytesRead; 3145 } 3146 }); 3147 } 3148 3149 /** 3150 * Get the total number of bytes written 3151 * @return the number of bytes 3152 */ 3153 public long getBytesWritten() { 3154 return visitAll(new StatisticsAggregator<Long>() { 3155 private long bytesWritten = 0; 3156 3157 @Override 3158 public void accept(StatisticsData data) { 3159 bytesWritten += data.bytesWritten; 3160 } 3161 3162 public Long aggregate() { 3163 return bytesWritten; 3164 } 3165 }); 3166 } 3167 3168 /** 3169 * Get the number of file system read operations such as list files 3170 * @return number of read operations 3171 */ 3172 public int getReadOps() { 3173 return (int) getNumReadOps(); 3174 } 3175 3176 /** 3177 * Get the number of file system read operations such as list files 3178 * @return number of read operations as a long type 3179 */ 3180 public long getNumReadOps() { 3181 return visitAll(new StatisticsAggregator<Long>() { 3182 private long readOps = 0; 3183 3184 @Override 3185 public void accept(StatisticsData data) { 3186 readOps += data.readOps; 3187 readOps += data.largeReadOps; 3188 } 3189 3190 public Long aggregate() { 3191 return readOps; 3192 } 3193 }); 3194 } 3195 3196 /** 3197 * Get the number of large file system read operations such as list files 3198 * under a large directory 3199 * @return number of large read operations 3200 */ 3201 public int getLargeReadOps() { 3202 return (int) getNumLargeReadOps(); 3203 } 3204 3205 /** 3206 * Get the number of large file system read operations such as list files 3207 * under a large directory 3208 * @return number of large read operations as a long type 3209 */ 3210 public long getNumLargeReadOps() { 3211 return visitAll(new StatisticsAggregator<Long>() { 3212 private long largeReadOps = 0; 3213 3214 @Override 3215 public void accept(StatisticsData data) { 3216 largeReadOps += data.largeReadOps; 3217 } 3218 3219 public Long aggregate() { 3220 return largeReadOps; 3221 } 3222 }); 3223 } 3224 3225 /** 3226 * Get the number of file system write operations such as create, append 3227 * rename etc. 3228 * @return number of write operations 3229 */ 3230 public int getWriteOps() { 3231 return (int) getNumWriteOps(); 3232 } 3233 3234 /** 3235 * Get the number of file system write operations such as create, append 3236 * rename etc. 3237 * @return number of write operations as a long type 3238 */ 3239 public long getNumWriteOps() { 3240 return visitAll(new StatisticsAggregator<Long>() { 3241 private long writeOps = 0; 3242 3243 @Override 3244 public void accept(StatisticsData data) { 3245 writeOps += data.writeOps; 3246 } 3247 3248 public Long aggregate() { 3249 return writeOps; 3250 } 3251 }); 3252 } 3253 3254 3255 @Override 3256 public String toString() { 3257 return visitAll(new StatisticsAggregator<String>() { 3258 private StatisticsData total = new StatisticsData(null); 3259 3260 @Override 3261 public void accept(StatisticsData data) { 3262 total.add(data); 3263 } 3264 3265 public String aggregate() { 3266 return total.toString(); 3267 } 3268 }); 3269 } 3270 3271 /** 3272 * Resets all statistics to 0. 3273 * 3274 * In order to reset, we add up all the thread-local statistics data, and 3275 * set rootData to the negative of that. 3276 * 3277 * This may seem like a counterintuitive way to reset the statsitics. Why 3278 * can't we just zero out all the thread-local data? Well, thread-local 3279 * data can only be modified by the thread that owns it. If we tried to 3280 * modify the thread-local data from this thread, our modification might get 3281 * interleaved with a read-modify-write operation done by the thread that 3282 * owns the data. That would result in our update getting lost. 3283 * 3284 * The approach used here avoids this problem because it only ever reads 3285 * (not writes) the thread-local data. Both reads and writes to rootData 3286 * are done under the lock, so we're free to modify rootData from any thread 3287 * that holds the lock. 3288 */ 3289 public void reset() { 3290 visitAll(new StatisticsAggregator<Void>() { 3291 private StatisticsData total = new StatisticsData(null); 3292 3293 @Override 3294 public void accept(StatisticsData data) { 3295 total.add(data); 3296 } 3297 3298 public Void aggregate() { 3299 total.negate(); 3300 rootData.add(total); 3301 return null; 3302 } 3303 }); 3304 } 3305 3306 /** 3307 * Get the uri scheme associated with this statistics object. 3308 * @return the schema associated with this set of statistics 3309 */ 3310 public String getScheme() { 3311 return scheme; 3312 } 3313 } 3314 3315 /** 3316 * Get the Map of Statistics object indexed by URI Scheme. 3317 * @return a Map having a key as URI scheme and value as Statistics object 3318 * @deprecated use {@link #getAllStatistics} instead 3319 */ 3320 @Deprecated 3321 public static synchronized Map<String, Statistics> getStatistics() { 3322 Map<String, Statistics> result = new HashMap<String, Statistics>(); 3323 for(Statistics stat: statisticsTable.values()) { 3324 result.put(stat.getScheme(), stat); 3325 } 3326 return result; 3327 } 3328 3329 /** 3330 * Return the FileSystem classes that have Statistics 3331 */ 3332 public static synchronized List<Statistics> getAllStatistics() { 3333 return new ArrayList<Statistics>(statisticsTable.values()); 3334 } 3335 3336 /** 3337 * Get the statistics for a particular file system 3338 * @param cls the class to lookup 3339 * @return a statistics object 3340 */ 3341 public static synchronized 3342 Statistics getStatistics(String scheme, Class<? extends FileSystem> cls) { 3343 Statistics result = statisticsTable.get(cls); 3344 if (result == null) { 3345 result = new Statistics(scheme); 3346 statisticsTable.put(cls, result); 3347 } 3348 return result; 3349 } 3350 3351 /** 3352 * Reset all statistics for all file systems 3353 */ 3354 public static synchronized void clearStatistics() { 3355 for(Statistics stat: statisticsTable.values()) { 3356 stat.reset(); 3357 } 3358 } 3359 3360 /** 3361 * Print all statistics for all file systems 3362 */ 3363 public static synchronized 3364 void printStatistics() throws IOException { 3365 for (Map.Entry<Class<? extends FileSystem>, Statistics> pair: 3366 statisticsTable.entrySet()) { 3367 System.out.println(" FileSystem " + pair.getKey().getName() + 3368 ": " + pair.getValue()); 3369 } 3370 } 3371 3372 // But this causes issues for existing MR1 class TaskRunner which needs to 3373 // create symlink using local file system. So enabling symlinks. 3374 private static boolean symlinksEnabled = true; 3375 3376 private static Configuration conf = null; 3377 3378 @VisibleForTesting 3379 public static boolean areSymlinksEnabled() { 3380 return symlinksEnabled; 3381 } 3382 3383 @VisibleForTesting 3384 public static void enableSymlinks() { 3385 symlinksEnabled = true; 3386 } 3387 3388 /** 3389 * MapR addition: 3390 * Opens an FSDataInputStream at the indicated fid. 3391 * @param fid the fid to open 3392 * @param ips the list of ip/ports which this fid belongs to 3393 * @param chunkSize the chunkSize of the file corresponding to the fid 3394 * @param fileSize the size of the file corresponding to the fid 3395 */ 3396 public FSDataInputStream openFid(String fid, long[] ips, 3397 long chunkSize, long fileSize) throws IOException { 3398 throw new UnsupportedOperationException("See concrete FS for implementation"); 3399 } 3400 3401 /** 3402 * MapR addition: 3403 * Opens an FSDataInputStream at the indicated fid. 3404 * @param pfid the parent-fid of the file to open 3405 * @param file the file to be opened 3406 */ 3407 public FSDataInputStream openFid(String pfid, String file, long [] ips) 3408 throws IOException { 3409 throw new UnsupportedOperationException("See concrete FS for implementation"); 3410 } 3411 3412 /** 3413 * MapR - get Zookeeper connect string for the default cluster. 3414 */ 3415 public String getZkConnectString() throws IOException { 3416 throw new UnsupportedOperationException("See concrete FS for implementation"); 3417 } 3418 3419 /** 3420 * MapR - get jobTracker addresses given by cluster name 3421 * in mapred.job.tracker 3422 */ 3423 public InetSocketAddress[] getJobTrackerAddrs(Configuration conf) 3424 throws IOException { 3425 throw new UnsupportedOperationException("See concrete FS for implementation"); 3426 } 3427}