001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import static org.apache.hadoop.util.Time.now;
021
022import java.io.Closeable;
023import java.io.FileNotFoundException;
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.Arrays;
027import java.util.List;
028import java.util.concurrent.TimeUnit;
029import java.util.concurrent.locks.Condition;
030import java.util.concurrent.locks.ReentrantReadWriteLock;
031
032import org.apache.hadoop.HadoopIllegalArgumentException;
033import org.apache.hadoop.conf.Configuration;
034import org.apache.hadoop.fs.ContentSummary;
035import org.apache.hadoop.fs.FileAlreadyExistsException;
036import org.apache.hadoop.fs.Options;
037import org.apache.hadoop.fs.Options.Rename;
038import org.apache.hadoop.fs.ParentNotDirectoryException;
039import org.apache.hadoop.fs.Path;
040import org.apache.hadoop.fs.PathIsNotDirectoryException;
041import org.apache.hadoop.fs.UnresolvedLinkException;
042import org.apache.hadoop.fs.permission.AclEntry;
043import org.apache.hadoop.fs.permission.AclStatus;
044import org.apache.hadoop.fs.permission.FsAction;
045import org.apache.hadoop.fs.permission.FsPermission;
046import org.apache.hadoop.fs.permission.PermissionStatus;
047import org.apache.hadoop.hdfs.DFSConfigKeys;
048import org.apache.hadoop.hdfs.DFSUtil;
049import org.apache.hadoop.hdfs.DistributedFileSystem;
050import org.apache.hadoop.hdfs.protocol.AclException;
051import org.apache.hadoop.hdfs.protocol.Block;
052import org.apache.hadoop.hdfs.protocol.ClientProtocol;
053import org.apache.hadoop.hdfs.protocol.DirectoryListing;
054import org.apache.hadoop.hdfs.protocol.FSLimitException.MaxDirectoryItemsExceededException;
055import org.apache.hadoop.hdfs.protocol.FSLimitException.PathComponentTooLongException;
056import org.apache.hadoop.hdfs.protocol.FsAclPermission;
057import org.apache.hadoop.hdfs.protocol.HdfsConstants;
058import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
059import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus;
060import org.apache.hadoop.hdfs.protocol.LocatedBlock;
061import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
062import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
063import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException;
064import org.apache.hadoop.hdfs.protocol.SnapshotException;
065import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
066import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
067import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
068import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
069import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
070import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
071import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
072import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithCount;
073import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
074import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
075import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.Root;
076import org.apache.hadoop.hdfs.util.ByteArray;
077import org.apache.hadoop.hdfs.util.ChunkedArrayList;
078import org.apache.hadoop.hdfs.util.ReadOnlyList;
079
080import com.google.common.annotations.VisibleForTesting;
081import com.google.common.base.Preconditions;
082
083/*************************************************
084 * FSDirectory stores the filesystem directory state.
085 * It handles writing/loading values to disk, and logging
086 * changes as we go.
087 *
088 * It keeps the filename->blockset mapping always-current
089 * and logged to disk.
090 * 
091 *************************************************/
092public class FSDirectory implements Closeable {
093  private static INodeDirectorySnapshottable createRoot(FSNamesystem namesystem) {
094    final INodeDirectory r = new INodeDirectory(
095        INodeId.ROOT_INODE_ID,
096        INodeDirectory.ROOT_NAME,
097        namesystem.createFsOwnerPermissions(new FsPermission((short) 0755)),
098        0L);
099    r.addDirectoryWithQuotaFeature(
100        DirectoryWithQuotaFeature.DEFAULT_NAMESPACE_QUOTA,
101        DirectoryWithQuotaFeature.DEFAULT_DISKSPACE_QUOTA);
102    final INodeDirectorySnapshottable s = new INodeDirectorySnapshottable(r);
103    s.setSnapshotQuota(0);
104    return s;
105  }
106
107  @VisibleForTesting
108  static boolean CHECK_RESERVED_FILE_NAMES = true;
109  public final static String DOT_RESERVED_STRING = ".reserved";
110  public final static String DOT_RESERVED_PATH_PREFIX = Path.SEPARATOR
111      + DOT_RESERVED_STRING;
112  public final static byte[] DOT_RESERVED = 
113      DFSUtil.string2Bytes(DOT_RESERVED_STRING);
114  public final static String DOT_INODES_STRING = ".inodes";
115  public final static byte[] DOT_INODES = 
116      DFSUtil.string2Bytes(DOT_INODES_STRING);
117  INodeDirectory rootDir;
118  FSImage fsImage;  
119  private final FSNamesystem namesystem;
120  private volatile boolean ready = false;
121  private final int maxComponentLength;
122  private final int maxDirItems;
123  private final int lsLimit;  // max list limit
124  private final int contentCountLimit; // max content summary counts per run
125  private final INodeMap inodeMap; // Synchronized by dirLock
126  private long yieldCount = 0; // keep track of lock yield count.
127
128  // lock to protect the directory and BlockMap
129  private final ReentrantReadWriteLock dirLock;
130  private final Condition cond;
131
132  // utility methods to acquire and release read lock and write lock
133  void readLock() {
134    this.dirLock.readLock().lock();
135  }
136
137  void readUnlock() {
138    this.dirLock.readLock().unlock();
139  }
140
141  void writeLock() {
142    this.dirLock.writeLock().lock();
143  }
144
145  void writeUnlock() {
146    this.dirLock.writeLock().unlock();
147  }
148
149  boolean hasWriteLock() {
150    return this.dirLock.isWriteLockedByCurrentThread();
151  }
152
153  boolean hasReadLock() {
154    return this.dirLock.getReadHoldCount() > 0;
155  }
156
157  public int getReadHoldCount() {
158    return this.dirLock.getReadHoldCount();
159  }
160
161  public int getWriteHoldCount() {
162    return this.dirLock.getWriteHoldCount();
163  }
164
165  /**
166   * Caches frequently used file names used in {@link INode} to reuse 
167   * byte[] objects and reduce heap usage.
168   */
169  private final NameCache<ByteArray> nameCache;
170
171  FSDirectory(FSImage fsImage, FSNamesystem ns, Configuration conf) {
172    this.dirLock = new ReentrantReadWriteLock(true); // fair
173    this.cond = dirLock.writeLock().newCondition();
174    rootDir = createRoot(ns);
175    inodeMap = INodeMap.newInstance(rootDir);
176    this.fsImage = fsImage;
177    int configuredLimit = conf.getInt(
178        DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT);
179    this.lsLimit = configuredLimit>0 ?
180        configuredLimit : DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT;
181    this.contentCountLimit = conf.getInt(
182        DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_KEY,
183        DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_DEFAULT);
184    
185    // filesystem limits
186    this.maxComponentLength = conf.getInt(
187        DFSConfigKeys.DFS_NAMENODE_MAX_COMPONENT_LENGTH_KEY,
188        DFSConfigKeys.DFS_NAMENODE_MAX_COMPONENT_LENGTH_DEFAULT);
189    this.maxDirItems = conf.getInt(
190        DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_KEY,
191        DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_DEFAULT);
192    // We need a maximum maximum because by default, PB limits message sizes
193    // to 64MB. This means we can only store approximately 6.7 million entries
194    // per directory, but let's use 6.4 million for some safety.
195    final int MAX_DIR_ITEMS = 64 * 100 * 1000;
196    Preconditions.checkArgument(
197        maxDirItems > 0 && maxDirItems <= MAX_DIR_ITEMS, "Cannot set "
198            + DFSConfigKeys.DFS_NAMENODE_MAX_DIRECTORY_ITEMS_KEY
199            + " to a value less than 0 or greater than " + MAX_DIR_ITEMS);
200
201    int threshold = conf.getInt(
202        DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY,
203        DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_DEFAULT);
204    NameNode.LOG.info("Caching file names occuring more than " + threshold
205        + " times");
206    nameCache = new NameCache<ByteArray>(threshold);
207    namesystem = ns;
208  }
209    
210  private FSNamesystem getFSNamesystem() {
211    return namesystem;
212  }
213
214  private BlockManager getBlockManager() {
215    return getFSNamesystem().getBlockManager();
216  }
217
218  /** @return the root directory inode. */
219  public INodeDirectory getRoot() {
220    return rootDir;
221  }
222
223  /**
224   * Notify that loading of this FSDirectory is complete, and
225   * it is ready for use 
226   */
227  void imageLoadComplete() {
228    Preconditions.checkState(!ready, "FSDirectory already loaded");
229    setReady();
230  }
231
232  void setReady() {
233    if(ready) return;
234    writeLock();
235    try {
236      setReady(true);
237      this.nameCache.initialized();
238      cond.signalAll();
239    } finally {
240      writeUnlock();
241    }
242  }
243  
244  //This is for testing purposes only
245  @VisibleForTesting
246  boolean isReady() {
247    return ready;
248  }
249
250  // exposed for unit tests
251  protected void setReady(boolean flag) {
252    ready = flag;
253  }
254
255  private void incrDeletedFileCount(long count) {
256    if (getFSNamesystem() != null)
257      NameNode.getNameNodeMetrics().incrFilesDeleted(count);
258  }
259    
260  /**
261   * Shutdown the filestore
262   */
263  @Override
264  public void close() throws IOException {
265    fsImage.close();
266  }
267
268  /**
269   * Block until the object is ready to be used.
270   */
271  void waitForReady() {
272    if (!ready) {
273      writeLock();
274      try {
275        while (!ready) {
276          try {
277            cond.await(5000, TimeUnit.MILLISECONDS);
278          } catch (InterruptedException ie) {
279          }
280        }
281      } finally {
282        writeUnlock();
283      }
284    }
285  }
286
287  /**
288   * Add the given filename to the fs.
289   * @throws FileAlreadyExistsException
290   * @throws QuotaExceededException
291   * @throws UnresolvedLinkException
292   * @throws SnapshotAccessControlException 
293   */
294  INodeFile addFile(String path, PermissionStatus permissions,
295      short replication, long preferredBlockSize, String clientName,
296      String clientMachine, DatanodeDescriptor clientNode)
297    throws FileAlreadyExistsException, QuotaExceededException,
298      UnresolvedLinkException, SnapshotAccessControlException, AclException {
299    waitForReady();
300
301    // Always do an implicit mkdirs for parent directory tree.
302    long modTime = now();
303    
304    Path parent = new Path(path).getParent();
305    if (parent == null) {
306      // Trying to add "/" as a file - this path has no
307      // parent -- avoids an NPE below.
308      return null;
309    }
310    
311    if (!mkdirs(parent.toString(), permissions, true, modTime)) {
312      return null;
313    }
314    INodeFile newNode = new INodeFile(namesystem.allocateNewInodeId(), null,
315        permissions, modTime, modTime, BlockInfo.EMPTY_ARRAY, replication,
316        preferredBlockSize);
317    newNode.toUnderConstruction(clientName, clientMachine, clientNode);
318
319    boolean added = false;
320    writeLock();
321    try {
322      added = addINode(path, newNode);
323    } finally {
324      writeUnlock();
325    }
326    if (!added) {
327      NameNode.stateChangeLog.info("DIR* addFile: failed to add " + path);
328      return null;
329    }
330
331    if(NameNode.stateChangeLog.isDebugEnabled()) {
332      NameNode.stateChangeLog.debug("DIR* addFile: " + path + " is added");
333    }
334    return newNode;
335  }
336
337  INodeFile unprotectedAddFile( long id,
338                            String path, 
339                            PermissionStatus permissions,
340                            List<AclEntry> aclEntries,
341                            short replication,
342                            long modificationTime,
343                            long atime,
344                            long preferredBlockSize,
345                            boolean underConstruction,
346                            String clientName,
347                            String clientMachine) {
348    final INodeFile newNode;
349    assert hasWriteLock();
350    if (underConstruction) {
351      newNode = new INodeFile(id, null, permissions, modificationTime,
352          modificationTime, BlockInfo.EMPTY_ARRAY, replication,
353          preferredBlockSize);
354      newNode.toUnderConstruction(clientName, clientMachine, null);
355
356    } else {
357      newNode = new INodeFile(id, null, permissions, modificationTime, atime,
358          BlockInfo.EMPTY_ARRAY, replication, preferredBlockSize);
359    }
360
361    try {
362      if (addINode(path, newNode)) {
363        if (aclEntries != null) {
364          AclStorage.updateINodeAcl(newNode, aclEntries,
365            Snapshot.CURRENT_STATE_ID);
366        }
367        return newNode;
368      }
369    } catch (IOException e) {
370      if(NameNode.stateChangeLog.isDebugEnabled()) {
371        NameNode.stateChangeLog.debug(
372            "DIR* FSDirectory.unprotectedAddFile: exception when add " + path
373                + " to the file system", e);
374      }
375    }
376    return null;
377  }
378
379  /**
380   * Add a block to the file. Returns a reference to the added block.
381   */
382  BlockInfo addBlock(String path, INodesInPath inodesInPath, Block block,
383      DatanodeStorageInfo[] targets) throws IOException {
384    waitForReady();
385
386    writeLock();
387    try {
388      final INodeFile fileINode = inodesInPath.getLastINode().asFile();
389      Preconditions.checkState(fileINode.isUnderConstruction());
390
391      // check quota limits and updated space consumed
392      updateCount(inodesInPath, 0, fileINode.getBlockDiskspace(), true);
393
394      // associate new last block for the file
395      BlockInfoUnderConstruction blockInfo =
396        new BlockInfoUnderConstruction(
397            block,
398            fileINode.getFileReplication(),
399            BlockUCState.UNDER_CONSTRUCTION,
400            targets);
401      getBlockManager().addBlockCollection(blockInfo, fileINode);
402      fileINode.addBlock(blockInfo);
403
404      if(NameNode.stateChangeLog.isDebugEnabled()) {
405        NameNode.stateChangeLog.debug("DIR* FSDirectory.addBlock: "
406            + path + " with " + block
407            + " block is added to the in-memory "
408            + "file system");
409      }
410      return blockInfo;
411    } finally {
412      writeUnlock();
413    }
414  }
415
416  /**
417   * Persist the block list for the inode.
418   */
419  void persistBlocks(String path, INodeFile file, boolean logRetryCache) {
420    Preconditions.checkArgument(file.isUnderConstruction());
421    waitForReady();
422
423    writeLock();
424    try {
425      fsImage.getEditLog().logUpdateBlocks(path, file, logRetryCache);
426      if(NameNode.stateChangeLog.isDebugEnabled()) {
427        NameNode.stateChangeLog.debug("DIR* FSDirectory.persistBlocks: "
428            +path+" with "+ file.getBlocks().length 
429            +" blocks is persisted to the file system");
430      }
431    } finally {
432      writeUnlock();
433    }
434  }
435  
436  /**
437   * Persist the new block (the last block of the given file).
438   */
439  void persistNewBlock(String path, INodeFile file) {
440    Preconditions.checkArgument(file.isUnderConstruction());
441    waitForReady();
442
443    writeLock();
444    try {
445      fsImage.getEditLog().logAddBlock(path, file);
446    } finally {
447      writeUnlock();
448    }
449    if (NameNode.stateChangeLog.isDebugEnabled()) {
450      NameNode.stateChangeLog.debug("DIR* FSDirectory.persistNewBlock: "
451          + path + " with new block " + file.getLastBlock().toString()
452          + ", current total block count is " + file.getBlocks().length);
453    }
454  }
455  
456  /**
457   * Close file.
458   */
459  void closeFile(String path, INodeFile file) {
460    waitForReady();
461    writeLock();
462    try {
463      // file is closed
464      fsImage.getEditLog().logCloseFile(path, file);
465      if (NameNode.stateChangeLog.isDebugEnabled()) {
466        NameNode.stateChangeLog.debug("DIR* FSDirectory.closeFile: "
467            +path+" with "+ file.getBlocks().length 
468            +" blocks is persisted to the file system");
469      }
470    } finally {
471      writeUnlock();
472    }
473  }
474
475  /**
476   * Remove a block from the file.
477   * @return Whether the block exists in the corresponding file
478   */
479  boolean removeBlock(String path, INodeFile fileNode, Block block)
480      throws IOException {
481    Preconditions.checkArgument(fileNode.isUnderConstruction());
482    waitForReady();
483
484    writeLock();
485    try {
486      return unprotectedRemoveBlock(path, fileNode, block);
487    } finally {
488      writeUnlock();
489    }
490  }
491  
492  boolean unprotectedRemoveBlock(String path,
493      INodeFile fileNode, Block block) throws IOException {
494    // modify file-> block and blocksMap
495    // fileNode should be under construction
496    boolean removed = fileNode.removeLastBlock(block);
497    if (!removed) {
498      return false;
499    }
500    getBlockManager().removeBlockFromMap(block);
501
502    if(NameNode.stateChangeLog.isDebugEnabled()) {
503      NameNode.stateChangeLog.debug("DIR* FSDirectory.removeBlock: "
504          +path+" with "+block
505          +" block is removed from the file system");
506    }
507
508    // update space consumed
509    final INodesInPath iip = rootDir.getINodesInPath4Write(path, true);
510    updateCount(iip, 0, -fileNode.getBlockDiskspace(), true);
511    return true;
512  }
513
514  /**
515   * @throws SnapshotAccessControlException 
516   * @see #unprotectedRenameTo(String, String, long)
517   * @deprecated Use {@link #renameTo(String, String, Rename...)} instead.
518   */
519  @Deprecated
520  boolean renameTo(String src, String dst, boolean logRetryCache) 
521      throws QuotaExceededException, UnresolvedLinkException, 
522      FileAlreadyExistsException, SnapshotAccessControlException, IOException {
523    if (NameNode.stateChangeLog.isDebugEnabled()) {
524      NameNode.stateChangeLog.debug("DIR* FSDirectory.renameTo: "
525          +src+" to "+dst);
526    }
527    waitForReady();
528    long now = now();
529    writeLock();
530    try {
531      if (!unprotectedRenameTo(src, dst, now))
532        return false;
533    } finally {
534      writeUnlock();
535    }
536    fsImage.getEditLog().logRename(src, dst, now, logRetryCache);
537    return true;
538  }
539
540  /**
541   * @see #unprotectedRenameTo(String, String, long, Options.Rename...)
542   */
543  void renameTo(String src, String dst, boolean logRetryCache, 
544      Options.Rename... options)
545      throws FileAlreadyExistsException, FileNotFoundException,
546      ParentNotDirectoryException, QuotaExceededException,
547      UnresolvedLinkException, IOException {
548    if (NameNode.stateChangeLog.isDebugEnabled()) {
549      NameNode.stateChangeLog.debug("DIR* FSDirectory.renameTo: " + src
550          + " to " + dst);
551    }
552    waitForReady();
553    long now = now();
554    writeLock();
555    try {
556      if (unprotectedRenameTo(src, dst, now, options)) {
557        incrDeletedFileCount(1);
558      }
559    } finally {
560      writeUnlock();
561    }
562    fsImage.getEditLog().logRename(src, dst, now, logRetryCache, options);
563  }
564
565  /**
566   * Change a path name
567   * 
568   * @param src source path
569   * @param dst destination path
570   * @return true if rename succeeds; false otherwise
571   * @throws QuotaExceededException if the operation violates any quota limit
572   * @throws FileAlreadyExistsException if the src is a symlink that points to dst
573   * @throws SnapshotAccessControlException if path is in RO snapshot
574   * @deprecated See {@link #renameTo(String, String)}
575   */
576  @Deprecated
577  boolean unprotectedRenameTo(String src, String dst, long timestamp)
578    throws QuotaExceededException, UnresolvedLinkException, 
579    FileAlreadyExistsException, SnapshotAccessControlException, IOException {
580    assert hasWriteLock();
581    INodesInPath srcIIP = rootDir.getINodesInPath4Write(src, false);
582    final INode srcInode = srcIIP.getLastINode();
583    
584    // check the validation of the source
585    if (srcInode == null) {
586      NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
587          + "failed to rename " + src + " to " + dst
588          + " because source does not exist");
589      return false;
590    } 
591    if (srcIIP.getINodes().length == 1) {
592      NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
593          +"failed to rename "+src+" to "+dst+ " because source is the root");
594      return false;
595    }
596    
597    // srcInode and its subtree cannot contain snapshottable directories with
598    // snapshots
599    List<INodeDirectorySnapshottable> snapshottableDirs = 
600        new ArrayList<INodeDirectorySnapshottable>();
601    checkSnapshot(srcInode, snapshottableDirs);
602    
603    if (isDir(dst)) {
604      dst += Path.SEPARATOR + new Path(src).getName();
605    }
606    
607    // check the validity of the destination
608    if (dst.equals(src)) {
609      return true;
610    }
611    if (srcInode.isSymlink() && 
612        dst.equals(srcInode.asSymlink().getSymlinkString())) {
613      throw new FileAlreadyExistsException(
614          "Cannot rename symlink "+src+" to its target "+dst);
615    }
616    
617    // dst cannot be directory or a file under src
618    if (dst.startsWith(src) && 
619        dst.charAt(src.length()) == Path.SEPARATOR_CHAR) {
620      NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
621          + "failed to rename " + src + " to " + dst
622          + " because destination starts with src");
623      return false;
624    }
625    
626    byte[][] dstComponents = INode.getPathComponents(dst);
627    INodesInPath dstIIP = getExistingPathINodes(dstComponents);
628    if (dstIIP.isSnapshot()) {
629      throw new SnapshotAccessControlException(
630          "Modification on RO snapshot is disallowed");
631    }
632    if (dstIIP.getLastINode() != null) {
633      NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
634                                   +"failed to rename "+src+" to "+dst+ 
635                                   " because destination exists");
636      return false;
637    }
638    INode dstParent = dstIIP.getINode(-2);
639    if (dstParent == null) {
640      NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
641          +"failed to rename "+src+" to "+dst+ 
642          " because destination's parent does not exist");
643      return false;
644    }
645    
646    // Ensure dst has quota to accommodate rename
647    verifyFsLimitsForRename(srcIIP, dstIIP);
648    verifyQuotaForRename(srcIIP.getINodes(), dstIIP.getINodes());
649    
650    boolean added = false;
651    INode srcChild = srcIIP.getLastINode();
652    final byte[] srcChildName = srcChild.getLocalNameBytes();
653    final boolean isSrcInSnapshot = srcChild.isInLatestSnapshot(
654        srcIIP.getLatestSnapshotId());
655    final boolean srcChildIsReference = srcChild.isReference();
656    
657    // Record the snapshot on srcChild. After the rename, before any new 
658    // snapshot is taken on the dst tree, changes will be recorded in the latest
659    // snapshot of the src tree.
660    if (isSrcInSnapshot) {
661      srcChild = srcChild.recordModification(srcIIP.getLatestSnapshotId());
662      srcIIP.setLastINode(srcChild);
663    }
664    
665    // check srcChild for reference
666    final INodeReference.WithCount withCount;
667    Quota.Counts oldSrcCounts = Quota.Counts.newInstance();
668    int srcRefDstSnapshot = srcChildIsReference ? srcChild.asReference()
669        .getDstSnapshotId() : Snapshot.CURRENT_STATE_ID;
670    if (isSrcInSnapshot) {
671      final INodeReference.WithName withName = 
672          srcIIP.getINode(-2).asDirectory().replaceChild4ReferenceWithName(
673              srcChild, srcIIP.getLatestSnapshotId()); 
674      withCount = (INodeReference.WithCount) withName.getReferredINode();
675      srcChild = withName;
676      srcIIP.setLastINode(srcChild);
677      // get the counts before rename
678      withCount.getReferredINode().computeQuotaUsage(oldSrcCounts, true);
679    } else if (srcChildIsReference) {
680      // srcChild is reference but srcChild is not in latest snapshot
681      withCount = (WithCount) srcChild.asReference().getReferredINode();
682    } else {
683      withCount = null;
684    }
685
686    try {
687      // remove src
688      final long removedSrc = removeLastINode(srcIIP);
689      if (removedSrc == -1) {
690        NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
691            + "failed to rename " + src + " to " + dst
692            + " because the source can not be removed");
693        return false;
694      }
695      
696      if (dstParent.getParent() == null) {
697        // src and dst file/dir are in the same directory, and the dstParent has
698        // been replaced when we removed the src. Refresh the dstIIP and
699        // dstParent.
700        dstIIP = getExistingPathINodes(dstComponents);
701        dstParent = dstIIP.getINode(-2);
702      }
703      
704      // add src to the destination
705      
706      srcChild = srcIIP.getLastINode();
707      final byte[] dstChildName = dstIIP.getLastLocalName();
708      final INode toDst;
709      if (withCount == null) {
710        srcChild.setLocalName(dstChildName);
711        toDst = srcChild;
712      } else {
713        withCount.getReferredINode().setLocalName(dstChildName);
714        int dstSnapshotId = dstIIP.getLatestSnapshotId();
715        final INodeReference.DstReference ref = new INodeReference.DstReference(
716            dstParent.asDirectory(), withCount, dstSnapshotId);
717        toDst = ref;
718      }
719      
720      added = addLastINodeNoQuotaCheck(dstIIP, toDst);
721      if (added) {
722        if (NameNode.stateChangeLog.isDebugEnabled()) {
723          NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedRenameTo: " 
724              + src + " is renamed to " + dst);
725        }
726        // update modification time of dst and the parent of src
727        final INode srcParent = srcIIP.getINode(-2);
728        srcParent.updateModificationTime(timestamp, srcIIP.getLatestSnapshotId());
729        dstParent = dstIIP.getINode(-2); // refresh dstParent
730        dstParent.updateModificationTime(timestamp, dstIIP.getLatestSnapshotId());
731        // update moved leases with new filename
732        getFSNamesystem().unprotectedChangeLease(src, dst);     
733
734        // update the quota usage in src tree
735        if (isSrcInSnapshot) {
736          // get the counts after rename
737          Quota.Counts newSrcCounts = srcChild.computeQuotaUsage(
738              Quota.Counts.newInstance(), false);
739          newSrcCounts.subtract(oldSrcCounts);
740          srcParent.addSpaceConsumed(newSrcCounts.get(Quota.NAMESPACE),
741              newSrcCounts.get(Quota.DISKSPACE), false);
742        }
743        
744        return true;
745      }
746    } finally {
747      if (!added) {
748        final INodeDirectory srcParent = srcIIP.getINode(-2).asDirectory();
749        final INode oldSrcChild = srcChild;
750        // put it back
751        if (withCount == null) {
752          srcChild.setLocalName(srcChildName);
753        } else if (!srcChildIsReference) { // src must be in snapshot
754          // the withCount node will no longer be used thus no need to update
755          // its reference number here
756          final INode originalChild = withCount.getReferredINode();
757          srcChild = originalChild;
758          srcChild.setLocalName(srcChildName);
759        } else {
760          withCount.removeReference(oldSrcChild.asReference());
761          final INodeReference originalRef = new INodeReference.DstReference(
762              srcParent, withCount, srcRefDstSnapshot);
763          srcChild = originalRef;
764          withCount.getReferredINode().setLocalName(srcChildName);
765        }
766        
767        if (isSrcInSnapshot) {
768          // srcParent must have snapshot feature since isSrcInSnapshot is true
769          // and src node has been removed from srcParent 
770          srcParent.undoRename4ScrParent(oldSrcChild.asReference(), srcChild);
771        } else {
772          // original srcChild is not in latest snapshot, we only need to add
773          // the srcChild back
774          addLastINodeNoQuotaCheck(srcIIP, srcChild);
775        }
776      }
777    }
778    NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
779        +"failed to rename "+src+" to "+dst);
780    return false;
781  }
782
783  /**
784   * Rename src to dst.
785   * See {@link DistributedFileSystem#rename(Path, Path, Options.Rename...)}
786   * for details related to rename semantics and exceptions.
787   * 
788   * @param src source path
789   * @param dst destination path
790   * @param timestamp modification time
791   * @param options Rename options
792   */
793  boolean unprotectedRenameTo(String src, String dst, long timestamp,
794      Options.Rename... options) throws FileAlreadyExistsException,
795      FileNotFoundException, ParentNotDirectoryException,
796      QuotaExceededException, UnresolvedLinkException, IOException {
797    assert hasWriteLock();
798    boolean overwrite = false;
799    if (null != options) {
800      for (Rename option : options) {
801        if (option == Rename.OVERWRITE) {
802          overwrite = true;
803        }
804      }
805    }
806    String error = null;
807    final INodesInPath srcIIP = rootDir.getINodesInPath4Write(src, false);
808    final INode srcInode = srcIIP.getLastINode();
809    // validate source
810    if (srcInode == null) {
811      error = "rename source " + src + " is not found.";
812      NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
813          + error);
814      throw new FileNotFoundException(error);
815    }
816    if (srcIIP.getINodes().length == 1) {
817      error = "rename source cannot be the root";
818      NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
819          + error);
820      throw new IOException(error);
821    }
822    // srcInode and its subtree cannot contain snapshottable directories with
823    // snapshots
824    checkSnapshot(srcInode, null);
825    
826    // validate the destination
827    if (dst.equals(src)) {
828      throw new FileAlreadyExistsException(
829          "The source "+src+" and destination "+dst+" are the same");
830    }
831    if (srcInode.isSymlink() && 
832        dst.equals(srcInode.asSymlink().getSymlinkString())) {
833      throw new FileAlreadyExistsException(
834          "Cannot rename symlink "+src+" to its target "+dst);
835    }
836    // dst cannot be a directory or a file under src
837    if (dst.startsWith(src) && 
838        dst.charAt(src.length()) == Path.SEPARATOR_CHAR) {
839      error = "Rename destination " + dst
840          + " is a directory or file under source " + src;
841      NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
842          + error);
843      throw new IOException(error);
844    }
845    INodesInPath dstIIP = rootDir.getINodesInPath4Write(dst, false);
846    if (dstIIP.getINodes().length == 1) {
847      error = "rename destination cannot be the root";
848      NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
849          + error);
850      throw new IOException(error);
851    }
852
853    final INode dstInode = dstIIP.getLastINode();
854    List<INodeDirectorySnapshottable> snapshottableDirs = 
855        new ArrayList<INodeDirectorySnapshottable>();
856    if (dstInode != null) { // Destination exists
857      // It's OK to rename a file to a symlink and vice versa
858      if (dstInode.isDirectory() != srcInode.isDirectory()) {
859        error = "Source " + src + " and destination " + dst
860            + " must both be directories";
861        NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
862            + error);
863        throw new IOException(error);
864      }
865      if (!overwrite) { // If destination exists, overwrite flag must be true
866        error = "rename destination " + dst + " already exists";
867        NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
868            + error);
869        throw new FileAlreadyExistsException(error);
870      }
871      if (dstInode.isDirectory()) {
872        final ReadOnlyList<INode> children = dstInode.asDirectory()
873            .getChildrenList(Snapshot.CURRENT_STATE_ID);
874        if (!children.isEmpty()) {
875          error = "rename destination directory is not empty: " + dst;
876          NameNode.stateChangeLog.warn(
877              "DIR* FSDirectory.unprotectedRenameTo: " + error);
878          throw new IOException(error);
879        }
880      }
881      checkSnapshot(dstInode, snapshottableDirs);
882    }
883
884    INode dstParent = dstIIP.getINode(-2);
885    if (dstParent == null) {
886      error = "rename destination parent " + dst + " not found.";
887      NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
888          + error);
889      throw new FileNotFoundException(error);
890    }
891    if (!dstParent.isDirectory()) {
892      error = "rename destination parent " + dst + " is a file.";
893      NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
894          + error);
895      throw new ParentNotDirectoryException(error);
896    }
897
898    // Ensure dst has quota to accommodate rename
899    verifyFsLimitsForRename(srcIIP, dstIIP);
900    verifyQuotaForRename(srcIIP.getINodes(), dstIIP.getINodes());
901
902    INode srcChild = srcIIP.getLastINode();
903    final byte[] srcChildName = srcChild.getLocalNameBytes();
904    final boolean isSrcInSnapshot = srcChild.isInLatestSnapshot(
905        srcIIP.getLatestSnapshotId());
906    final boolean srcChildIsReference = srcChild.isReference();
907    
908    // Record the snapshot on srcChild. After the rename, before any new 
909    // snapshot is taken on the dst tree, changes will be recorded in the latest
910    // snapshot of the src tree.
911    if (isSrcInSnapshot) {
912      srcChild = srcChild.recordModification(srcIIP.getLatestSnapshotId());
913      srcIIP.setLastINode(srcChild);
914    }
915    
916    // check srcChild for reference
917    final INodeReference.WithCount withCount;
918    int srcRefDstSnapshot = srcChildIsReference ? srcChild.asReference()
919        .getDstSnapshotId() : Snapshot.CURRENT_STATE_ID;
920    Quota.Counts oldSrcCounts = Quota.Counts.newInstance();    
921    if (isSrcInSnapshot) {
922      final INodeReference.WithName withName = srcIIP.getINode(-2).asDirectory()
923          .replaceChild4ReferenceWithName(srcChild, srcIIP.getLatestSnapshotId()); 
924      withCount = (INodeReference.WithCount) withName.getReferredINode();
925      srcChild = withName;
926      srcIIP.setLastINode(srcChild);
927      // get the counts before rename
928      withCount.getReferredINode().computeQuotaUsage(oldSrcCounts, true);
929    } else if (srcChildIsReference) {
930      // srcChild is reference but srcChild is not in latest snapshot
931      withCount = (WithCount) srcChild.asReference().getReferredINode();
932    } else {
933      withCount = null;
934    }
935    
936    boolean undoRemoveSrc = true;
937    final long removedSrc = removeLastINode(srcIIP);
938    if (removedSrc == -1) {
939      error = "Failed to rename " + src + " to " + dst
940          + " because the source can not be removed";
941      NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
942          + error);
943      throw new IOException(error);
944    }
945    
946    if (dstParent.getParent() == null) {
947      // src and dst file/dir are in the same directory, and the dstParent has
948      // been replaced when we removed the src. Refresh the dstIIP and
949      // dstParent.
950      dstIIP = rootDir.getINodesInPath4Write(dst, false);
951    }
952    
953    boolean undoRemoveDst = false;
954    INode removedDst = null;
955    try {
956      if (dstInode != null) { // dst exists remove it
957        if (removeLastINode(dstIIP) != -1) {
958          removedDst = dstIIP.getLastINode();
959          undoRemoveDst = true;
960        }
961      }
962      
963      srcChild = srcIIP.getLastINode();
964
965      final byte[] dstChildName = dstIIP.getLastLocalName();
966      final INode toDst;
967      if (withCount == null) {
968        srcChild.setLocalName(dstChildName);
969        toDst = srcChild;
970      } else {
971        withCount.getReferredINode().setLocalName(dstChildName);
972        int dstSnapshotId = dstIIP.getLatestSnapshotId();
973        final INodeReference.DstReference ref = new INodeReference.DstReference(
974            dstIIP.getINode(-2).asDirectory(), withCount, dstSnapshotId);
975        toDst = ref;
976      }
977
978      // add src as dst to complete rename
979      if (addLastINodeNoQuotaCheck(dstIIP, toDst)) {
980        undoRemoveSrc = false;
981        if (NameNode.stateChangeLog.isDebugEnabled()) {
982          NameNode.stateChangeLog.debug(
983              "DIR* FSDirectory.unprotectedRenameTo: " + src
984              + " is renamed to " + dst);
985        }
986
987        final INode srcParent = srcIIP.getINode(-2);
988        srcParent.updateModificationTime(timestamp, srcIIP.getLatestSnapshotId());
989        dstParent = dstIIP.getINode(-2);
990        dstParent.updateModificationTime(timestamp, dstIIP.getLatestSnapshotId());
991        // update moved lease with new filename
992        getFSNamesystem().unprotectedChangeLease(src, dst);
993
994        // Collect the blocks and remove the lease for previous dst
995        long filesDeleted = -1;
996        if (removedDst != null) {
997          undoRemoveDst = false;
998          BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
999          List<INode> removedINodes = new ChunkedArrayList<INode>();
1000          filesDeleted = removedDst.cleanSubtree(Snapshot.CURRENT_STATE_ID,
1001              dstIIP.getLatestSnapshotId(), collectedBlocks, removedINodes, true)
1002              .get(Quota.NAMESPACE);
1003          getFSNamesystem().removePathAndBlocks(src, collectedBlocks,
1004              removedINodes);
1005        }
1006
1007        if (snapshottableDirs.size() > 0) {
1008          // There are snapshottable directories (without snapshots) to be
1009          // deleted. Need to update the SnapshotManager.
1010          namesystem.removeSnapshottableDirs(snapshottableDirs);
1011        }
1012        
1013        // update the quota usage in src tree
1014        if (isSrcInSnapshot) {
1015          // get the counts after rename
1016          Quota.Counts newSrcCounts = srcChild.computeQuotaUsage(
1017              Quota.Counts.newInstance(), false);
1018          newSrcCounts.subtract(oldSrcCounts);
1019          srcParent.addSpaceConsumed(newSrcCounts.get(Quota.NAMESPACE),
1020              newSrcCounts.get(Quota.DISKSPACE), false);
1021        }
1022        
1023        return filesDeleted >= 0;
1024      }
1025    } finally {
1026      if (undoRemoveSrc) {
1027        // Rename failed - restore src
1028        final INodeDirectory srcParent = srcIIP.getINode(-2).asDirectory();
1029        final INode oldSrcChild = srcChild;
1030        // put it back
1031        if (withCount == null) {
1032          srcChild.setLocalName(srcChildName);
1033        } else if (!srcChildIsReference) { // src must be in snapshot
1034          // the withCount node will no longer be used thus no need to update
1035          // its reference number here
1036          final INode originalChild = withCount.getReferredINode();
1037          srcChild = originalChild;
1038          srcChild.setLocalName(srcChildName);
1039        } else {
1040          withCount.removeReference(oldSrcChild.asReference());
1041          final INodeReference originalRef = new INodeReference.DstReference(
1042              srcParent, withCount, srcRefDstSnapshot);
1043          srcChild = originalRef;
1044          withCount.getReferredINode().setLocalName(srcChildName);
1045        }
1046        
1047        if (srcParent.isWithSnapshot()) {
1048          srcParent.undoRename4ScrParent(oldSrcChild.asReference(), srcChild);
1049        } else {
1050          // srcParent is not an INodeDirectoryWithSnapshot, we only need to add
1051          // the srcChild back
1052          addLastINodeNoQuotaCheck(srcIIP, srcChild);
1053        }
1054      }
1055      if (undoRemoveDst) {
1056        // Rename failed - restore dst
1057        if (dstParent.isDirectory() && dstParent.asDirectory().isWithSnapshot()) {
1058          dstParent.asDirectory().undoRename4DstParent(removedDst,
1059              dstIIP.getLatestSnapshotId());
1060        } else {
1061          addLastINodeNoQuotaCheck(dstIIP, removedDst);
1062        }
1063        if (removedDst.isReference()) {
1064          final INodeReference removedDstRef = removedDst.asReference();
1065          final INodeReference.WithCount wc = 
1066              (WithCount) removedDstRef.getReferredINode().asReference();
1067          wc.addReference(removedDstRef);
1068        }
1069      }
1070    }
1071    NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
1072        + "failed to rename " + src + " to " + dst);
1073    throw new IOException("rename from " + src + " to " + dst + " failed.");
1074  }
1075  
1076  /**
1077   * Set file replication
1078   * 
1079   * @param src file name
1080   * @param replication new replication
1081   * @param blockRepls block replications - output parameter
1082   * @return array of file blocks
1083   * @throws QuotaExceededException
1084   * @throws SnapshotAccessControlException 
1085   */
1086  Block[] setReplication(String src, short replication, short[] blockRepls)
1087      throws QuotaExceededException, UnresolvedLinkException,
1088      SnapshotAccessControlException {
1089    waitForReady();
1090    writeLock();
1091    try {
1092      final Block[] fileBlocks = unprotectedSetReplication(
1093          src, replication, blockRepls);
1094      if (fileBlocks != null)  // log replication change
1095        fsImage.getEditLog().logSetReplication(src, replication);
1096      return fileBlocks;
1097    } finally {
1098      writeUnlock();
1099    }
1100  }
1101
1102  Block[] unprotectedSetReplication(String src, short replication,
1103      short[] blockRepls) throws QuotaExceededException,
1104      UnresolvedLinkException, SnapshotAccessControlException {
1105    assert hasWriteLock();
1106
1107    final INodesInPath iip = rootDir.getINodesInPath4Write(src, true);
1108    final INode inode = iip.getLastINode();
1109    if (inode == null || !inode.isFile()) {
1110      return null;
1111    }
1112    INodeFile file = inode.asFile();
1113    final short oldBR = file.getBlockReplication();
1114
1115    // before setFileReplication, check for increasing block replication.
1116    // if replication > oldBR, then newBR == replication.
1117    // if replication < oldBR, we don't know newBR yet. 
1118    if (replication > oldBR) {
1119      long dsDelta = (replication - oldBR)*(file.diskspaceConsumed()/oldBR);
1120      updateCount(iip, 0, dsDelta, true);
1121    }
1122
1123    file = file.setFileReplication(replication, iip.getLatestSnapshotId(),
1124        inodeMap);
1125    
1126    final short newBR = file.getBlockReplication(); 
1127    // check newBR < oldBR case. 
1128    if (newBR < oldBR) {
1129      long dsDelta = (newBR - oldBR)*(file.diskspaceConsumed()/newBR);
1130      updateCount(iip, 0, dsDelta, true);
1131    }
1132
1133    if (blockRepls != null) {
1134      blockRepls[0] = oldBR;
1135      blockRepls[1] = newBR;
1136    }
1137    return file.getBlocks();
1138  }
1139
1140  /**
1141   * @param path the file path
1142   * @return the block size of the file. 
1143   */
1144  long getPreferredBlockSize(String path) throws UnresolvedLinkException,
1145      FileNotFoundException, IOException {
1146    readLock();
1147    try {
1148      return INodeFile.valueOf(rootDir.getNode(path, false), path
1149          ).getPreferredBlockSize();
1150    } finally {
1151      readUnlock();
1152    }
1153  }
1154
1155  boolean exists(String src) throws UnresolvedLinkException {
1156    src = normalizePath(src);
1157    readLock();
1158    try {
1159      INode inode = rootDir.getNode(src, false);
1160      if (inode == null) {
1161         return false;
1162      }
1163      return !inode.isFile() || inode.asFile().getBlocks() != null;
1164    } finally {
1165      readUnlock();
1166    }
1167  }
1168  
1169  void setPermission(String src, FsPermission permission)
1170      throws FileNotFoundException, UnresolvedLinkException,
1171      QuotaExceededException, SnapshotAccessControlException {
1172    writeLock();
1173    try {
1174      unprotectedSetPermission(src, permission);
1175    } finally {
1176      writeUnlock();
1177    }
1178    fsImage.getEditLog().logSetPermissions(src, permission);
1179  }
1180  
1181  void unprotectedSetPermission(String src, FsPermission permissions)
1182      throws FileNotFoundException, UnresolvedLinkException,
1183      QuotaExceededException, SnapshotAccessControlException {
1184    assert hasWriteLock();
1185    final INodesInPath inodesInPath = rootDir.getINodesInPath4Write(src, true);
1186    final INode inode = inodesInPath.getLastINode();
1187    if (inode == null) {
1188      throw new FileNotFoundException("File does not exist: " + src);
1189    }
1190    int snapshotId = inodesInPath.getLatestSnapshotId();
1191    inode.setPermission(permissions, snapshotId);
1192  }
1193
1194  void setOwner(String src, String username, String groupname)
1195      throws FileNotFoundException, UnresolvedLinkException,
1196      QuotaExceededException, SnapshotAccessControlException {
1197    writeLock();
1198    try {
1199      unprotectedSetOwner(src, username, groupname);
1200    } finally {
1201      writeUnlock();
1202    }
1203    fsImage.getEditLog().logSetOwner(src, username, groupname);
1204  }
1205
1206  void unprotectedSetOwner(String src, String username, String groupname)
1207      throws FileNotFoundException, UnresolvedLinkException,
1208      QuotaExceededException, SnapshotAccessControlException {
1209    assert hasWriteLock();
1210    final INodesInPath inodesInPath = rootDir.getINodesInPath4Write(src, true);
1211    INode inode = inodesInPath.getLastINode();
1212    if (inode == null) {
1213      throw new FileNotFoundException("File does not exist: " + src);
1214    }
1215    if (username != null) {
1216      inode = inode.setUser(username, inodesInPath.getLatestSnapshotId());
1217    }
1218    if (groupname != null) {
1219      inode.setGroup(groupname, inodesInPath.getLatestSnapshotId());
1220    }
1221  }
1222
1223  /**
1224   * Concat all the blocks from srcs to trg and delete the srcs files
1225   */
1226  void concat(String target, String [] srcs, boolean supportRetryCache) 
1227      throws UnresolvedLinkException, QuotaExceededException,
1228      SnapshotAccessControlException, SnapshotException {
1229    writeLock();
1230    try {
1231      // actual move
1232      waitForReady();
1233      long timestamp = now();
1234      unprotectedConcat(target, srcs, timestamp);
1235      // do the commit
1236      fsImage.getEditLog().logConcat(target, srcs, timestamp, 
1237          supportRetryCache);
1238    } finally {
1239      writeUnlock();
1240    }
1241  }
1242
1243  /**
1244   * Concat all the blocks from srcs to trg and delete the srcs files
1245   * @param target target file to move the blocks to
1246   * @param srcs list of file to move the blocks from
1247   */
1248  void unprotectedConcat(String target, String [] srcs, long timestamp) 
1249      throws UnresolvedLinkException, QuotaExceededException,
1250      SnapshotAccessControlException, SnapshotException {
1251    assert hasWriteLock();
1252    if (NameNode.stateChangeLog.isDebugEnabled()) {
1253      NameNode.stateChangeLog.debug("DIR* FSNamesystem.concat to "+target);
1254    }
1255    // do the move
1256    
1257    final INodesInPath trgIIP = rootDir.getINodesInPath4Write(target, true);
1258    final INode[] trgINodes = trgIIP.getINodes();
1259    final INodeFile trgInode = trgIIP.getLastINode().asFile();
1260    INodeDirectory trgParent = trgINodes[trgINodes.length-2].asDirectory();
1261    final int trgLatestSnapshot = trgIIP.getLatestSnapshotId();
1262    
1263    final INodeFile [] allSrcInodes = new INodeFile[srcs.length];
1264    for(int i = 0; i < srcs.length; i++) {
1265      final INodesInPath iip = getINodesInPath4Write(srcs[i]);
1266      final int latest = iip.getLatestSnapshotId();
1267      final INode inode = iip.getLastINode();
1268
1269      // check if the file in the latest snapshot
1270      if (inode.isInLatestSnapshot(latest)) {
1271        throw new SnapshotException("Concat: the source file " + srcs[i]
1272            + " is in snapshot " + latest);
1273      }
1274
1275      // check if the file has other references.
1276      if (inode.isReference() && ((INodeReference.WithCount)
1277          inode.asReference().getReferredINode()).getReferenceCount() > 1) {
1278        throw new SnapshotException("Concat: the source file " + srcs[i]
1279            + " is referred by some other reference in some snapshot.");
1280      }
1281
1282      allSrcInodes[i] = inode.asFile();
1283    }
1284    trgInode.concatBlocks(allSrcInodes);
1285    
1286    // since we are in the same dir - we can use same parent to remove files
1287    int count = 0;
1288    for(INodeFile nodeToRemove: allSrcInodes) {
1289      if(nodeToRemove == null) continue;
1290      
1291      nodeToRemove.setBlocks(null);
1292      trgParent.removeChild(nodeToRemove, trgLatestSnapshot);
1293      inodeMap.remove(nodeToRemove);
1294      count++;
1295    }
1296    
1297    // update inodeMap
1298    removeFromInodeMap(Arrays.asList(allSrcInodes));
1299    
1300    trgInode.setModificationTime(timestamp, trgLatestSnapshot);
1301    trgParent.updateModificationTime(timestamp, trgLatestSnapshot);
1302    // update quota on the parent directory ('count' files removed, 0 space)
1303    unprotectedUpdateCount(trgIIP, trgINodes.length-1, -count, 0);
1304  }
1305
1306  /**
1307   * Delete the target directory and collect the blocks under it
1308   * 
1309   * @param src Path of a directory to delete
1310   * @param collectedBlocks Blocks under the deleted directory
1311   * @param removedINodes INodes that should be removed from {@link #inodeMap}
1312   * @param logRetryCache Whether to record RPC IDs in editlog to support retry
1313   *                      cache rebuilding.
1314   * @return true on successful deletion; else false
1315   */
1316  boolean delete(String src, BlocksMapUpdateInfo collectedBlocks,
1317      List<INode> removedINodes, boolean logRetryCache) throws IOException {
1318    if (NameNode.stateChangeLog.isDebugEnabled()) {
1319      NameNode.stateChangeLog.debug("DIR* FSDirectory.delete: " + src);
1320    }
1321    waitForReady();
1322    long now = now();
1323    final long filesRemoved;
1324    writeLock();
1325    try {
1326      final INodesInPath inodesInPath = rootDir.getINodesInPath4Write(
1327          normalizePath(src), false);
1328      if (!deleteAllowed(inodesInPath, src) ) {
1329        filesRemoved = -1;
1330      } else {
1331        List<INodeDirectorySnapshottable> snapshottableDirs = 
1332            new ArrayList<INodeDirectorySnapshottable>();
1333        checkSnapshot(inodesInPath.getLastINode(), snapshottableDirs);
1334        filesRemoved = unprotectedDelete(inodesInPath, collectedBlocks,
1335            removedINodes, now);
1336        namesystem.removeSnapshottableDirs(snapshottableDirs);
1337      }
1338    } finally {
1339      writeUnlock();
1340    }
1341    if (filesRemoved < 0) {
1342      return false;
1343    }
1344    fsImage.getEditLog().logDelete(src, now, logRetryCache);
1345    incrDeletedFileCount(filesRemoved);
1346    // Blocks/INodes will be handled later by the caller of this method
1347    getFSNamesystem().removePathAndBlocks(src, null, null);
1348    return true;
1349  }
1350  
1351  private static boolean deleteAllowed(final INodesInPath iip,
1352      final String src) {
1353    final INode[] inodes = iip.getINodes(); 
1354    if (inodes == null || inodes.length == 0
1355        || inodes[inodes.length - 1] == null) {
1356      if(NameNode.stateChangeLog.isDebugEnabled()) {
1357        NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedDelete: "
1358            + "failed to remove " + src + " because it does not exist");
1359      }
1360      return false;
1361    } else if (inodes.length == 1) { // src is the root
1362      NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedDelete: "
1363          + "failed to remove " + src
1364          + " because the root is not allowed to be deleted");
1365      return false;
1366    }
1367    return true;
1368  }
1369  
1370  /**
1371   * @return true if the path is a non-empty directory; otherwise, return false.
1372   */
1373  boolean isNonEmptyDirectory(String path) throws UnresolvedLinkException {
1374    readLock();
1375    try {
1376      final INodesInPath inodesInPath = rootDir.getLastINodeInPath(path, false);
1377      final INode inode = inodesInPath.getINode(0);
1378      if (inode == null || !inode.isDirectory()) {
1379        //not found or not a directory
1380        return false;
1381      }
1382      final int s = inodesInPath.getPathSnapshotId();
1383      return !inode.asDirectory().getChildrenList(s).isEmpty();
1384    } finally {
1385      readUnlock();
1386    }
1387  }
1388
1389  /**
1390   * Delete a path from the name space
1391   * Update the count at each ancestor directory with quota
1392   * <br>
1393   * Note: This is to be used by {@link FSEditLog} only.
1394   * <br>
1395   * @param src a string representation of a path to an inode
1396   * @param mtime the time the inode is removed
1397   * @throws SnapshotAccessControlException if path is in RO snapshot
1398   */
1399  void unprotectedDelete(String src, long mtime) throws UnresolvedLinkException,
1400      QuotaExceededException, SnapshotAccessControlException, IOException {
1401    assert hasWriteLock();
1402    BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
1403    List<INode> removedINodes = new ChunkedArrayList<INode>();
1404
1405    final INodesInPath inodesInPath = rootDir.getINodesInPath4Write(
1406        normalizePath(src), false);
1407    long filesRemoved = -1;
1408    if (deleteAllowed(inodesInPath, src)) {
1409      List<INodeDirectorySnapshottable> snapshottableDirs = 
1410          new ArrayList<INodeDirectorySnapshottable>();
1411      checkSnapshot(inodesInPath.getLastINode(), snapshottableDirs);
1412      filesRemoved = unprotectedDelete(inodesInPath, collectedBlocks,
1413          removedINodes, mtime);
1414      namesystem.removeSnapshottableDirs(snapshottableDirs); 
1415    }
1416
1417    if (filesRemoved >= 0) {
1418      getFSNamesystem().removePathAndBlocks(src, collectedBlocks, 
1419          removedINodes);
1420    }
1421  }
1422  
1423  /**
1424   * Delete a path from the name space
1425   * Update the count at each ancestor directory with quota
1426   * @param iip the inodes resolved from the path
1427   * @param collectedBlocks blocks collected from the deleted path
1428   * @param removedINodes inodes that should be removed from {@link #inodeMap}
1429   * @param mtime the time the inode is removed
1430   * @return the number of inodes deleted; 0 if no inodes are deleted.
1431   */ 
1432  long unprotectedDelete(INodesInPath iip, BlocksMapUpdateInfo collectedBlocks,
1433      List<INode> removedINodes, long mtime) throws QuotaExceededException {
1434    assert hasWriteLock();
1435
1436    // check if target node exists
1437    INode targetNode = iip.getLastINode();
1438    if (targetNode == null) {
1439      return -1;
1440    }
1441
1442    // record modification
1443    final int latestSnapshot = iip.getLatestSnapshotId();
1444    targetNode = targetNode.recordModification(latestSnapshot);
1445    iip.setLastINode(targetNode);
1446
1447    // Remove the node from the namespace
1448    long removed = removeLastINode(iip);
1449    if (removed == -1) {
1450      return -1;
1451    }
1452
1453    // set the parent's modification time
1454    final INodeDirectory parent = targetNode.getParent();
1455    parent.updateModificationTime(mtime, latestSnapshot);
1456    if (removed == 0) {
1457      return 0;
1458    }
1459    
1460    // collect block
1461    if (!targetNode.isInLatestSnapshot(latestSnapshot)) {
1462      targetNode.destroyAndCollectBlocks(collectedBlocks, removedINodes);
1463    } else {
1464      Quota.Counts counts = targetNode.cleanSubtree(Snapshot.CURRENT_STATE_ID,
1465          latestSnapshot, collectedBlocks, removedINodes, true);
1466      parent.addSpaceConsumed(-counts.get(Quota.NAMESPACE),
1467          -counts.get(Quota.DISKSPACE), true);
1468      removed = counts.get(Quota.NAMESPACE);
1469    }
1470    if (NameNode.stateChangeLog.isDebugEnabled()) {
1471      NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedDelete: "
1472          + targetNode.getFullPathName() + " is removed");
1473    }
1474    return removed;
1475  }
1476  
1477  /**
1478   * Check if the given INode (or one of its descendants) is snapshottable and
1479   * already has snapshots.
1480   * 
1481   * @param target The given INode
1482   * @param snapshottableDirs The list of directories that are snapshottable 
1483   *                          but do not have snapshots yet
1484   */
1485  private static void checkSnapshot(INode target,
1486      List<INodeDirectorySnapshottable> snapshottableDirs) throws IOException {
1487    if (target.isDirectory()) {
1488      INodeDirectory targetDir = target.asDirectory();
1489      if (targetDir.isSnapshottable()) {
1490        INodeDirectorySnapshottable ssTargetDir = 
1491            (INodeDirectorySnapshottable) targetDir;
1492        if (ssTargetDir.getNumSnapshots() > 0) {
1493          throw new IOException("The directory " + ssTargetDir.getFullPathName()
1494              + " cannot be deleted since " + ssTargetDir.getFullPathName()
1495              + " is snapshottable and already has snapshots");
1496        } else {
1497          if (snapshottableDirs != null) {
1498            snapshottableDirs.add(ssTargetDir);
1499          }
1500        }
1501      } 
1502      for (INode child : targetDir.getChildrenList(Snapshot.CURRENT_STATE_ID)) {
1503        checkSnapshot(child, snapshottableDirs);
1504      }
1505    }
1506  }
1507
1508  /**
1509   * Get a partial listing of the indicated directory
1510   *
1511   * We will stop when any of the following conditions is met:
1512   * 1) this.lsLimit files have been added
1513   * 2) needLocation is true AND enough files have been added such
1514   * that at least this.lsLimit block locations are in the response
1515   *
1516   * @param src the directory name
1517   * @param startAfter the name to start listing after
1518   * @param needLocation if block locations are returned
1519   * @return a partial listing starting after startAfter
1520   */
1521  DirectoryListing getListing(String src, byte[] startAfter,
1522      boolean needLocation) throws UnresolvedLinkException, IOException {
1523    String srcs = normalizePath(src);
1524
1525    readLock();
1526    try {
1527      if (srcs.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR)) {
1528        return getSnapshotsListing(srcs, startAfter);
1529      }
1530      final INodesInPath inodesInPath = rootDir.getLastINodeInPath(srcs, true);
1531      final int snapshot = inodesInPath.getPathSnapshotId();
1532      final INode targetNode = inodesInPath.getINode(0);
1533      if (targetNode == null)
1534        return null;
1535      
1536      if (!targetNode.isDirectory()) {
1537        return new DirectoryListing(
1538            new HdfsFileStatus[]{createFileStatus(HdfsFileStatus.EMPTY_NAME,
1539                targetNode, needLocation, snapshot)}, 0);
1540      }
1541
1542      final INodeDirectory dirInode = targetNode.asDirectory();
1543      final ReadOnlyList<INode> contents = dirInode.getChildrenList(snapshot);
1544      int startChild = INodeDirectory.nextChild(contents, startAfter);
1545      int totalNumChildren = contents.size();
1546      int numOfListing = Math.min(totalNumChildren-startChild, this.lsLimit);
1547      int locationBudget = this.lsLimit;
1548      int listingCnt = 0;
1549      HdfsFileStatus listing[] = new HdfsFileStatus[numOfListing];
1550      for (int i=0; i<numOfListing && locationBudget>0; i++) {
1551        INode cur = contents.get(startChild+i);
1552        listing[i] = createFileStatus(cur.getLocalNameBytes(), cur,
1553            needLocation, snapshot);
1554        listingCnt++;
1555        if (needLocation) {
1556            // Once we  hit lsLimit locations, stop.
1557            // This helps to prevent excessively large response payloads.
1558            // Approximate #locations with locatedBlockCount() * repl_factor
1559            LocatedBlocks blks = 
1560                ((HdfsLocatedFileStatus)listing[i]).getBlockLocations();
1561            locationBudget -= (blks == null) ? 0 :
1562               blks.locatedBlockCount() * listing[i].getReplication();
1563        }
1564      }
1565      // truncate return array if necessary
1566      if (listingCnt < numOfListing) {
1567          listing = Arrays.copyOf(listing, listingCnt);
1568      }
1569      return new DirectoryListing(
1570          listing, totalNumChildren-startChild-listingCnt);
1571    } finally {
1572      readUnlock();
1573    }
1574  }
1575  
1576  /**
1577   * Get a listing of all the snapshots of a snapshottable directory
1578   */
1579  private DirectoryListing getSnapshotsListing(String src, byte[] startAfter)
1580      throws UnresolvedLinkException, IOException {
1581    Preconditions.checkState(hasReadLock());
1582    Preconditions.checkArgument(
1583        src.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR), 
1584        "%s does not end with %s", src, HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR);
1585    
1586    final String dirPath = normalizePath(src.substring(0,
1587        src.length() - HdfsConstants.DOT_SNAPSHOT_DIR.length()));
1588    
1589    final INode node = this.getINode(dirPath);
1590    final INodeDirectorySnapshottable dirNode = INodeDirectorySnapshottable
1591        .valueOf(node, dirPath);
1592    final ReadOnlyList<Snapshot> snapshots = dirNode.getSnapshotList();
1593    int skipSize = ReadOnlyList.Util.binarySearch(snapshots, startAfter);
1594    skipSize = skipSize < 0 ? -skipSize - 1 : skipSize + 1;
1595    int numOfListing = Math.min(snapshots.size() - skipSize, this.lsLimit);
1596    final HdfsFileStatus listing[] = new HdfsFileStatus[numOfListing];
1597    for (int i = 0; i < numOfListing; i++) {
1598      Root sRoot = snapshots.get(i + skipSize).getRoot();
1599      listing[i] = createFileStatus(sRoot.getLocalNameBytes(), sRoot,
1600          Snapshot.CURRENT_STATE_ID);
1601    }
1602    return new DirectoryListing(
1603        listing, snapshots.size() - skipSize - numOfListing);
1604  }
1605
1606  /** Get the file info for a specific file.
1607   * @param src The string representation of the path to the file
1608   * @param resolveLink whether to throw UnresolvedLinkException 
1609   * @return object containing information regarding the file
1610   *         or null if file not found
1611   */
1612  HdfsFileStatus getFileInfo(String src, boolean resolveLink) 
1613      throws UnresolvedLinkException {
1614    String srcs = normalizePath(src);
1615    readLock();
1616    try {
1617      if (srcs.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR)) {
1618        return getFileInfo4DotSnapshot(srcs);
1619      }
1620      final INodesInPath inodesInPath = rootDir.getLastINodeInPath(srcs, resolveLink);
1621      final INode i = inodesInPath.getINode(0);
1622      return i == null? null: createFileStatus(HdfsFileStatus.EMPTY_NAME, i,
1623          inodesInPath.getPathSnapshotId());
1624    } finally {
1625      readUnlock();
1626    }
1627  }
1628  
1629  /**
1630   * Currently we only support "ls /xxx/.snapshot" which will return all the
1631   * snapshots of a directory. The FSCommand Ls will first call getFileInfo to
1632   * make sure the file/directory exists (before the real getListing call).
1633   * Since we do not have a real INode for ".snapshot", we return an empty
1634   * non-null HdfsFileStatus here.
1635   */
1636  private HdfsFileStatus getFileInfo4DotSnapshot(String src)
1637      throws UnresolvedLinkException {
1638    if (getINode4DotSnapshot(src) != null) {
1639      return new HdfsFileStatus(0, true, 0, 0, 0, 0, null, null, null, null,
1640          HdfsFileStatus.EMPTY_NAME, -1L, 0);
1641    }
1642    return null;
1643  }
1644
1645  private INode getINode4DotSnapshot(String src) throws UnresolvedLinkException {
1646    Preconditions.checkArgument(
1647        src.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR), 
1648        "%s does not end with %s", src, HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR);
1649    
1650    final String dirPath = normalizePath(src.substring(0,
1651        src.length() - HdfsConstants.DOT_SNAPSHOT_DIR.length()));
1652    
1653    final INode node = this.getINode(dirPath);
1654    if (node != null
1655        && node.isDirectory()
1656        && node.asDirectory() instanceof INodeDirectorySnapshottable) {
1657      return node;
1658    }
1659    return null;
1660  }
1661
1662  /**
1663   * Get the blocks associated with the file.
1664   */
1665  Block[] getFileBlocks(String src) throws UnresolvedLinkException {
1666    waitForReady();
1667    readLock();
1668    try {
1669      final INode i = rootDir.getNode(src, false);
1670      return i != null && i.isFile()? i.asFile().getBlocks(): null;
1671    } finally {
1672      readUnlock();
1673    }
1674  }
1675
1676
1677  INodesInPath getExistingPathINodes(byte[][] components)
1678      throws UnresolvedLinkException {
1679    return INodesInPath.resolve(rootDir, components);
1680  }
1681
1682  /**
1683   * Get {@link INode} associated with the file / directory.
1684   */
1685  public INode getINode(String src) throws UnresolvedLinkException {
1686    return getLastINodeInPath(src).getINode(0);
1687  }
1688
1689  /**
1690   * Get {@link INode} associated with the file / directory.
1691   */
1692  public INodesInPath getLastINodeInPath(String src)
1693       throws UnresolvedLinkException {
1694    readLock();
1695    try {
1696      return rootDir.getLastINodeInPath(src, true);
1697    } finally {
1698      readUnlock();
1699    }
1700  }
1701  
1702  /**
1703   * Get {@link INode} associated with the file / directory.
1704   */
1705  public INodesInPath getINodesInPath4Write(String src
1706      ) throws UnresolvedLinkException, SnapshotAccessControlException {
1707    readLock();
1708    try {
1709      return rootDir.getINodesInPath4Write(src, true);
1710    } finally {
1711      readUnlock();
1712    }
1713  }
1714
1715  /**
1716   * Get {@link INode} associated with the file / directory.
1717   * @throws SnapshotAccessControlException if path is in RO snapshot
1718   */
1719  public INode getINode4Write(String src) throws UnresolvedLinkException,
1720      SnapshotAccessControlException {
1721    readLock();
1722    try {
1723      return rootDir.getINode4Write(src, true);
1724    } finally {
1725      readUnlock();
1726    }
1727  }
1728
1729  /** 
1730   * Check whether the filepath could be created
1731   * @throws SnapshotAccessControlException if path is in RO snapshot
1732   */
1733  boolean isValidToCreate(String src) throws UnresolvedLinkException,
1734      SnapshotAccessControlException {
1735    String srcs = normalizePath(src);
1736    readLock();
1737    try {
1738      if (srcs.startsWith("/") && !srcs.endsWith("/")
1739          && rootDir.getINode4Write(srcs, false) == null) {
1740        return true;
1741      } else {
1742        return false;
1743      }
1744    } finally {
1745      readUnlock();
1746    }
1747  }
1748
1749  /**
1750   * Check whether the path specifies a directory
1751   */
1752  boolean isDir(String src) throws UnresolvedLinkException {
1753    src = normalizePath(src);
1754    readLock();
1755    try {
1756      INode node = rootDir.getNode(src, false);
1757      return node != null && node.isDirectory();
1758    } finally {
1759      readUnlock();
1760    }
1761  }
1762  
1763  /**
1764   * Check whether the path specifies a directory
1765   * @throws SnapshotAccessControlException if path is in RO snapshot
1766   */
1767  boolean isDirMutable(String src) throws UnresolvedLinkException,
1768      SnapshotAccessControlException {
1769    src = normalizePath(src);
1770    readLock();
1771    try {
1772      INode node = rootDir.getINode4Write(src, false);
1773      return node != null && node.isDirectory();
1774    } finally {
1775      readUnlock();
1776    }
1777  }
1778
1779  /** Updates namespace and diskspace consumed for all
1780   * directories until the parent directory of file represented by path.
1781   * 
1782   * @param path path for the file.
1783   * @param nsDelta the delta change of namespace
1784   * @param dsDelta the delta change of diskspace
1785   * @throws QuotaExceededException if the new count violates any quota limit
1786   * @throws FileNotFoundException if path does not exist.
1787   */
1788  void updateSpaceConsumed(String path, long nsDelta, long dsDelta)
1789      throws QuotaExceededException, FileNotFoundException,
1790          UnresolvedLinkException, SnapshotAccessControlException {
1791    writeLock();
1792    try {
1793      final INodesInPath iip = rootDir.getINodesInPath4Write(path, false);
1794      if (iip.getLastINode() == null) {
1795        throw new FileNotFoundException("Path not found: " + path);
1796      }
1797      updateCount(iip, nsDelta, dsDelta, true);
1798    } finally {
1799      writeUnlock();
1800    }
1801  }
1802  
1803  private void updateCount(INodesInPath iip, long nsDelta, long dsDelta,
1804      boolean checkQuota) throws QuotaExceededException {
1805    updateCount(iip, iip.getINodes().length - 1, nsDelta, dsDelta, checkQuota);
1806  }
1807
1808  /** update count of each inode with quota
1809   * 
1810   * @param iip inodes in a path
1811   * @param numOfINodes the number of inodes to update starting from index 0
1812   * @param nsDelta the delta change of namespace
1813   * @param dsDelta the delta change of diskspace
1814   * @param checkQuota if true then check if quota is exceeded
1815   * @throws QuotaExceededException if the new count violates any quota limit
1816   */
1817  private void updateCount(INodesInPath iip, int numOfINodes, 
1818                           long nsDelta, long dsDelta, boolean checkQuota)
1819                           throws QuotaExceededException {
1820    assert hasWriteLock();
1821    if (!ready) {
1822      //still initializing. do not check or update quotas.
1823      return;
1824    }
1825    final INode[] inodes = iip.getINodes();
1826    if (numOfINodes > inodes.length) {
1827      numOfINodes = inodes.length;
1828    }
1829    if (checkQuota) {
1830      verifyQuota(inodes, numOfINodes, nsDelta, dsDelta, null);
1831    }
1832    unprotectedUpdateCount(iip, numOfINodes, nsDelta, dsDelta);
1833  }
1834  
1835  /** 
1836   * update quota of each inode and check to see if quota is exceeded. 
1837   * See {@link #updateCount(INode[], int, long, long, boolean)}
1838   */ 
1839  private void updateCountNoQuotaCheck(INodesInPath inodesInPath,
1840      int numOfINodes, long nsDelta, long dsDelta) {
1841    assert hasWriteLock();
1842    try {
1843      updateCount(inodesInPath, numOfINodes, nsDelta, dsDelta, false);
1844    } catch (QuotaExceededException e) {
1845      NameNode.LOG.error("BUG: unexpected exception ", e);
1846    }
1847  }
1848  
1849  /**
1850   * updates quota without verification
1851   * callers responsibility is to make sure quota is not exceeded
1852   */
1853  private static void unprotectedUpdateCount(INodesInPath inodesInPath,
1854      int numOfINodes, long nsDelta, long dsDelta) {
1855    final INode[] inodes = inodesInPath.getINodes();
1856    for(int i=0; i < numOfINodes; i++) {
1857      if (inodes[i].isQuotaSet()) { // a directory with quota
1858        inodes[i].asDirectory().getDirectoryWithQuotaFeature()
1859            .addSpaceConsumed2Cache(nsDelta, dsDelta);
1860      }
1861    }
1862  }
1863  
1864  /** Return the name of the path represented by inodes at [0, pos] */
1865  static String getFullPathName(INode[] inodes, int pos) {
1866    StringBuilder fullPathName = new StringBuilder();
1867    if (inodes[0].isRoot()) {
1868      if (pos == 0) return Path.SEPARATOR;
1869    } else {
1870      fullPathName.append(inodes[0].getLocalName());
1871    }
1872    
1873    for (int i=1; i<=pos; i++) {
1874      fullPathName.append(Path.SEPARATOR_CHAR).append(inodes[i].getLocalName());
1875    }
1876    return fullPathName.toString();
1877  }
1878
1879  /**
1880   * @return the relative path of an inode from one of its ancestors,
1881   *         represented by an array of inodes.
1882   */
1883  private static INode[] getRelativePathINodes(INode inode, INode ancestor) {
1884    // calculate the depth of this inode from the ancestor
1885    int depth = 0;
1886    for (INode i = inode; i != null && !i.equals(ancestor); i = i.getParent()) {
1887      depth++;
1888    }
1889    INode[] inodes = new INode[depth];
1890
1891    // fill up the inodes in the path from this inode to root
1892    for (int i = 0; i < depth; i++) {
1893      if (inode == null) {
1894        NameNode.stateChangeLog.warn("Could not get full path."
1895            + " Corresponding file might have deleted already.");
1896        return null;
1897      }
1898      inodes[depth-i-1] = inode;
1899      inode = inode.getParent();
1900    }
1901    return inodes;
1902  }
1903  
1904  private static INode[] getFullPathINodes(INode inode) {
1905    return getRelativePathINodes(inode, null);
1906  }
1907  
1908  /** Return the full path name of the specified inode */
1909  static String getFullPathName(INode inode) {
1910    INode[] inodes = getFullPathINodes(inode);
1911    // inodes can be null only when its called without holding lock
1912    return inodes == null ? "" : getFullPathName(inodes, inodes.length - 1);
1913  }
1914  
1915  /**
1916   * Create a directory 
1917   * If ancestor directories do not exist, automatically create them.
1918
1919   * @param src string representation of the path to the directory
1920   * @param permissions the permission of the directory
1921   * @param isAutocreate if the permission of the directory should inherit
1922   *                          from its parent or not. u+wx is implicitly added to
1923   *                          the automatically created directories, and to the
1924   *                          given directory if inheritPermission is true
1925   * @param now creation time
1926   * @return true if the operation succeeds false otherwise
1927   * @throws FileNotFoundException if an ancestor or itself is a file
1928   * @throws QuotaExceededException if directory creation violates 
1929   *                                any quota limit
1930   * @throws UnresolvedLinkException if a symlink is encountered in src.                      
1931   * @throws SnapshotAccessControlException if path is in RO snapshot
1932   */
1933  boolean mkdirs(String src, PermissionStatus permissions,
1934      boolean inheritPermission, long now)
1935      throws FileAlreadyExistsException, QuotaExceededException, 
1936             UnresolvedLinkException, SnapshotAccessControlException,
1937             AclException {
1938    src = normalizePath(src);
1939    String[] names = INode.getPathNames(src);
1940    byte[][] components = INode.getPathComponents(names);
1941    final int lastInodeIndex = components.length - 1;
1942
1943    writeLock();
1944    try {
1945      INodesInPath iip = getExistingPathINodes(components);
1946      if (iip.isSnapshot()) {
1947        throw new SnapshotAccessControlException(
1948            "Modification on RO snapshot is disallowed");
1949      }
1950      INode[] inodes = iip.getINodes();
1951
1952      // find the index of the first null in inodes[]
1953      StringBuilder pathbuilder = new StringBuilder();
1954      int i = 1;
1955      for(; i < inodes.length && inodes[i] != null; i++) {
1956        pathbuilder.append(Path.SEPARATOR).append(names[i]);
1957        if (!inodes[i].isDirectory()) {
1958          throw new FileAlreadyExistsException("Parent path is not a directory: "
1959              + pathbuilder+ " "+inodes[i].getLocalName());
1960        }
1961      }
1962
1963      // default to creating parent dirs with the given perms
1964      PermissionStatus parentPermissions = permissions;
1965
1966      // if not inheriting and it's the last inode, there's no use in
1967      // computing perms that won't be used
1968      if (inheritPermission || (i < lastInodeIndex)) {
1969        // if inheriting (ie. creating a file or symlink), use the parent dir,
1970        // else the supplied permissions
1971        // NOTE: the permissions of the auto-created directories violate posix
1972        FsPermission parentFsPerm = inheritPermission
1973            ? inodes[i-1].getFsPermission() : permissions.getPermission();
1974        
1975        // ensure that the permissions allow user write+execute
1976        if (!parentFsPerm.getUserAction().implies(FsAction.WRITE_EXECUTE)) {
1977          parentFsPerm = new FsPermission(
1978              parentFsPerm.getUserAction().or(FsAction.WRITE_EXECUTE),
1979              parentFsPerm.getGroupAction(),
1980              parentFsPerm.getOtherAction()
1981          );
1982        }
1983        
1984        if (!parentPermissions.getPermission().equals(parentFsPerm)) {
1985          parentPermissions = new PermissionStatus(
1986              parentPermissions.getUserName(),
1987              parentPermissions.getGroupName(),
1988              parentFsPerm
1989          );
1990          // when inheriting, use same perms for entire path
1991          if (inheritPermission) permissions = parentPermissions;
1992        }
1993      }
1994      
1995      // create directories beginning from the first null index
1996      for(; i < inodes.length; i++) {
1997        pathbuilder.append(Path.SEPARATOR + names[i]);
1998        unprotectedMkdir(namesystem.allocateNewInodeId(), iip, i,
1999            components[i], (i < lastInodeIndex) ? parentPermissions
2000                : permissions, null, now);
2001        if (inodes[i] == null) {
2002          return false;
2003        }
2004        // Directory creation also count towards FilesCreated
2005        // to match count of FilesDeleted metric.
2006        if (getFSNamesystem() != null)
2007          NameNode.getNameNodeMetrics().incrFilesCreated();
2008
2009        final String cur = pathbuilder.toString();
2010        fsImage.getEditLog().logMkDir(cur, inodes[i]);
2011        if(NameNode.stateChangeLog.isDebugEnabled()) {
2012          NameNode.stateChangeLog.debug(
2013              "DIR* FSDirectory.mkdirs: created directory " + cur);
2014        }
2015      }
2016    } finally {
2017      writeUnlock();
2018    }
2019    return true;
2020  }
2021
2022  INode unprotectedMkdir(long inodeId, String src, PermissionStatus permissions,
2023                          List<AclEntry> aclEntries, long timestamp)
2024      throws QuotaExceededException, UnresolvedLinkException, AclException {
2025    assert hasWriteLock();
2026    byte[][] components = INode.getPathComponents(src);
2027    INodesInPath iip = getExistingPathINodes(components);
2028    INode[] inodes = iip.getINodes();
2029    final int pos = inodes.length - 1;
2030    unprotectedMkdir(inodeId, iip, pos, components[pos], permissions, aclEntries,
2031        timestamp);
2032    return inodes[pos];
2033  }
2034
2035  /** create a directory at index pos.
2036   * The parent path to the directory is at [0, pos-1].
2037   * All ancestors exist. Newly created one stored at index pos.
2038   */
2039  private void unprotectedMkdir(long inodeId, INodesInPath inodesInPath,
2040      int pos, byte[] name, PermissionStatus permission,
2041      List<AclEntry> aclEntries, long timestamp)
2042      throws QuotaExceededException, AclException {
2043    assert hasWriteLock();
2044    final INodeDirectory dir = new INodeDirectory(inodeId, name, permission,
2045        timestamp);
2046    if (addChild(inodesInPath, pos, dir, true)) {
2047      if (aclEntries != null) {
2048        AclStorage.updateINodeAcl(dir, aclEntries, Snapshot.CURRENT_STATE_ID);
2049      }
2050      inodesInPath.setINode(pos, dir);
2051    }
2052  }
2053  
2054  /**
2055   * Add the given child to the namespace.
2056   * @param src The full path name of the child node.
2057   * @throw QuotaExceededException is thrown if it violates quota limit
2058   */
2059  private boolean addINode(String src, INode child
2060      ) throws QuotaExceededException, UnresolvedLinkException {
2061    byte[][] components = INode.getPathComponents(src);
2062    child.setLocalName(components[components.length-1]);
2063    cacheName(child);
2064    writeLock();
2065    try {
2066      return addLastINode(getExistingPathINodes(components), child, true);
2067    } finally {
2068      writeUnlock();
2069    }
2070  }
2071
2072  /**
2073   * Verify quota for adding or moving a new INode with required 
2074   * namespace and diskspace to a given position.
2075   *  
2076   * @param inodes INodes corresponding to a path
2077   * @param pos position where a new INode will be added
2078   * @param nsDelta needed namespace
2079   * @param dsDelta needed diskspace
2080   * @param commonAncestor Last node in inodes array that is a common ancestor
2081   *          for a INode that is being moved from one location to the other.
2082   *          Pass null if a node is not being moved.
2083   * @throws QuotaExceededException if quota limit is exceeded.
2084   */
2085  private static void verifyQuota(INode[] inodes, int pos, long nsDelta,
2086      long dsDelta, INode commonAncestor) throws QuotaExceededException {
2087    if (nsDelta <= 0 && dsDelta <= 0) {
2088      // if quota is being freed or not being consumed
2089      return;
2090    }
2091
2092    // check existing components in the path
2093    for(int i = (pos > inodes.length? inodes.length: pos) - 1; i >= 0; i--) {
2094      if (commonAncestor == inodes[i]) {
2095        // Stop checking for quota when common ancestor is reached
2096        return;
2097      }
2098      final DirectoryWithQuotaFeature q
2099          = inodes[i].asDirectory().getDirectoryWithQuotaFeature();
2100      if (q != null) { // a directory with quota
2101        try {
2102          q.verifyQuota(nsDelta, dsDelta);
2103        } catch (QuotaExceededException e) {
2104          e.setPathName(getFullPathName(inodes, i));
2105          throw e;
2106        }
2107      }
2108    }
2109  }
2110  
2111  /**
2112   * Verify quota for rename operation where srcInodes[srcInodes.length-1] moves
2113   * dstInodes[dstInodes.length-1]
2114   * 
2115   * @param src directory from where node is being moved.
2116   * @param dst directory to where node is moved to.
2117   * @throws QuotaExceededException if quota limit is exceeded.
2118   */
2119  private void verifyQuotaForRename(INode[] src, INode[] dst)
2120      throws QuotaExceededException {
2121    if (!ready) {
2122      // Do not check quota if edits log is still being processed
2123      return;
2124    }
2125    int i = 0;
2126    for(; src[i] == dst[i]; i++);
2127    // src[i - 1] is the last common ancestor.
2128
2129    final Quota.Counts delta = src[src.length - 1].computeQuotaUsage();
2130    
2131    // Reduce the required quota by dst that is being removed
2132    final int dstIndex = dst.length - 1;
2133    if (dst[dstIndex] != null) {
2134      delta.subtract(dst[dstIndex].computeQuotaUsage());
2135    }
2136    verifyQuota(dst, dstIndex, delta.get(Quota.NAMESPACE),
2137        delta.get(Quota.DISKSPACE), src[i - 1]);
2138  }
2139
2140  /**
2141   * Checks file system limits (max component length and max directory items)
2142   * during a rename operation.
2143   *
2144   * @param srcIIP INodesInPath containing every inode in the rename source
2145   * @param dstIIP INodesInPath containing every inode in the rename destination
2146   * @throws PathComponentTooLongException child's name is too long.
2147   * @throws MaxDirectoryItemsExceededException too many children.
2148   */
2149  private void verifyFsLimitsForRename(INodesInPath srcIIP, INodesInPath dstIIP)
2150      throws PathComponentTooLongException, MaxDirectoryItemsExceededException {
2151    byte[] dstChildName = dstIIP.getLastLocalName();
2152    INode[] dstInodes = dstIIP.getINodes();
2153    int pos = dstInodes.length - 1;
2154    verifyMaxComponentLength(dstChildName, dstInodes, pos);
2155    // Do not enforce max directory items if renaming within same directory.
2156    if (srcIIP.getINode(-2) != dstIIP.getINode(-2)) {
2157      verifyMaxDirItems(dstInodes, pos);
2158    }
2159  }
2160
2161  /** Verify if the snapshot name is legal. */
2162  void verifySnapshotName(String snapshotName, String path)
2163      throws PathComponentTooLongException {
2164    if (snapshotName.contains(Path.SEPARATOR)) {
2165      throw new HadoopIllegalArgumentException(
2166          "Snapshot name cannot contain \"" + Path.SEPARATOR + "\"");
2167    }
2168    final byte[] bytes = DFSUtil.string2Bytes(snapshotName);
2169    verifyINodeName(bytes);
2170    verifyMaxComponentLength(bytes, path, 0);
2171  }
2172  
2173  /** Verify if the inode name is legal. */
2174  void verifyINodeName(byte[] childName) throws HadoopIllegalArgumentException {
2175    if (Arrays.equals(HdfsConstants.DOT_SNAPSHOT_DIR_BYTES, childName)) {
2176      String s = "\"" + HdfsConstants.DOT_SNAPSHOT_DIR + "\" is a reserved name.";
2177      if (!ready) {
2178        s += "  Please rename it before upgrade.";
2179      }
2180      throw new HadoopIllegalArgumentException(s);
2181    }
2182  }
2183
2184  /**
2185   * Verify child's name for fs limit.
2186   *
2187   * @param childName byte[] containing new child name
2188   * @param parentPath Object either INode[] or String containing parent path
2189   * @param pos int position of new child in path
2190   * @throws PathComponentTooLongException child's name is too long.
2191   */
2192  private void verifyMaxComponentLength(byte[] childName, Object parentPath,
2193      int pos) throws PathComponentTooLongException {
2194    if (maxComponentLength == 0) {
2195      return;
2196    }
2197
2198    final int length = childName.length;
2199    if (length > maxComponentLength) {
2200      final String p = parentPath instanceof INode[]?
2201          getFullPathName((INode[])parentPath, pos - 1): (String)parentPath;
2202      final PathComponentTooLongException e = new PathComponentTooLongException(
2203          maxComponentLength, length, p, DFSUtil.bytes2String(childName));
2204      if (ready) {
2205        throw e;
2206      } else {
2207        // Do not throw if edits log is still being processed
2208        NameNode.LOG.error("ERROR in FSDirectory.verifyINodeName", e);
2209      }
2210    }
2211  }
2212
2213  /**
2214   * Verify children size for fs limit.
2215   *
2216   * @param pathComponents INode[] containing full path of inodes to new child
2217   * @param pos int position of new child in pathComponents
2218   * @throws MaxDirectoryItemsExceededException too many children.
2219   */
2220  private void verifyMaxDirItems(INode[] pathComponents, int pos)
2221      throws MaxDirectoryItemsExceededException {
2222
2223    final INodeDirectory parent = pathComponents[pos-1].asDirectory();
2224    final int count = parent.getChildrenList(Snapshot.CURRENT_STATE_ID).size();
2225    if (count >= maxDirItems) {
2226      final MaxDirectoryItemsExceededException e
2227          = new MaxDirectoryItemsExceededException(maxDirItems, count);
2228      if (ready) {
2229        e.setPathName(getFullPathName(pathComponents, pos - 1));
2230        throw e;
2231      } else {
2232        // Do not throw if edits log is still being processed
2233        NameNode.LOG.error("FSDirectory.verifyMaxDirItems: "
2234            + e.getLocalizedMessage());
2235      }
2236    }
2237  }
2238  
2239  /**
2240   * The same as {@link #addChild(INodesInPath, int, INode, boolean)}
2241   * with pos = length - 1.
2242   */
2243  private boolean addLastINode(INodesInPath inodesInPath,
2244      INode inode, boolean checkQuota) throws QuotaExceededException {
2245    final int pos = inodesInPath.getINodes().length - 1;
2246    return addChild(inodesInPath, pos, inode, checkQuota);
2247  }
2248
2249  /** Add a node child to the inodes at index pos. 
2250   * Its ancestors are stored at [0, pos-1].
2251   * @return false if the child with this name already exists; 
2252   *         otherwise return true;
2253   * @throw QuotaExceededException is thrown if it violates quota limit
2254   */
2255  private boolean addChild(INodesInPath iip, int pos,
2256      INode child, boolean checkQuota) throws QuotaExceededException {
2257    final INode[] inodes = iip.getINodes();
2258    // Disallow creation of /.reserved. This may be created when loading
2259    // editlog/fsimage during upgrade since /.reserved was a valid name in older
2260    // release. This may also be called when a user tries to create a file
2261    // or directory /.reserved.
2262    if (pos == 1 && inodes[0] == rootDir && isReservedName(child)) {
2263      throw new HadoopIllegalArgumentException(
2264          "File name \"" + child.getLocalName() + "\" is reserved and cannot "
2265              + "be created. If this is during upgrade change the name of the "
2266              + "existing file or directory to another name before upgrading "
2267              + "to the new release.");
2268    }
2269    // The filesystem limits are not really quotas, so this check may appear
2270    // odd. It's because a rename operation deletes the src, tries to add
2271    // to the dest, if that fails, re-adds the src from whence it came.
2272    // The rename code disables the quota when it's restoring to the
2273    // original location becase a quota violation would cause the the item
2274    // to go "poof".  The fs limits must be bypassed for the same reason.
2275    if (checkQuota) {
2276      verifyMaxComponentLength(child.getLocalNameBytes(), inodes, pos);
2277      verifyMaxDirItems(inodes, pos);
2278    }
2279    // always verify inode name
2280    verifyINodeName(child.getLocalNameBytes());
2281    
2282    final Quota.Counts counts = child.computeQuotaUsage();
2283    updateCount(iip, pos,
2284        counts.get(Quota.NAMESPACE), counts.get(Quota.DISKSPACE), checkQuota);
2285    boolean isRename = (child.getParent() != null);
2286    final INodeDirectory parent = inodes[pos-1].asDirectory();
2287    boolean added = false;
2288    try {
2289      added = parent.addChild(child, true, iip.getLatestSnapshotId());
2290    } catch (QuotaExceededException e) {
2291      updateCountNoQuotaCheck(iip, pos,
2292          -counts.get(Quota.NAMESPACE), -counts.get(Quota.DISKSPACE));
2293      throw e;
2294    }
2295    if (!added) {
2296      updateCountNoQuotaCheck(iip, pos,
2297          -counts.get(Quota.NAMESPACE), -counts.get(Quota.DISKSPACE));
2298    } else {
2299      iip.setINode(pos - 1, child.getParent());
2300      if (!isRename) {
2301        AclStorage.copyINodeDefaultAcl(child);
2302      }
2303      addToInodeMap(child);
2304    }
2305    return added;
2306  }
2307  
2308  private boolean addLastINodeNoQuotaCheck(INodesInPath inodesInPath, INode i) {
2309    try {
2310      return addLastINode(inodesInPath, i, false);
2311    } catch (QuotaExceededException e) {
2312      NameNode.LOG.warn("FSDirectory.addChildNoQuotaCheck - unexpected", e);
2313    }
2314    return false;
2315  }
2316  
2317  /**
2318   * Remove the last inode in the path from the namespace.
2319   * Count of each ancestor with quota is also updated.
2320   * @return -1 for failing to remove;
2321   *          0 for removing a reference whose referred inode has other 
2322   *            reference nodes;
2323   *         >0 otherwise. 
2324   */
2325  private long removeLastINode(final INodesInPath iip)
2326      throws QuotaExceededException {
2327    final int latestSnapshot = iip.getLatestSnapshotId();
2328    final INode last = iip.getLastINode();
2329    final INodeDirectory parent = iip.getINode(-2).asDirectory();
2330    if (!parent.removeChild(last, latestSnapshot)) {
2331      return -1;
2332    }
2333    INodeDirectory newParent = last.getParent();
2334    if (parent != newParent) {
2335      iip.setINode(-2, newParent);
2336    }
2337    
2338    if (!last.isInLatestSnapshot(latestSnapshot)) {
2339      final Quota.Counts counts = last.computeQuotaUsage();
2340      updateCountNoQuotaCheck(iip, iip.getINodes().length - 1,
2341          -counts.get(Quota.NAMESPACE), -counts.get(Quota.DISKSPACE));
2342
2343      if (INodeReference.tryRemoveReference(last) > 0) {
2344        return 0;
2345      } else {
2346        return counts.get(Quota.NAMESPACE);
2347      }
2348    }
2349    return 1;
2350  }
2351  
2352  /**
2353   */
2354  String normalizePath(String src) {
2355    if (src.length() > 1 && src.endsWith("/")) {
2356      src = src.substring(0, src.length() - 1);
2357    }
2358    return src;
2359  }
2360
2361  ContentSummary getContentSummary(String src) 
2362    throws FileNotFoundException, UnresolvedLinkException {
2363    String srcs = normalizePath(src);
2364    readLock();
2365    try {
2366      INode targetNode = rootDir.getNode(srcs, false);
2367      if (targetNode == null) {
2368        throw new FileNotFoundException("File does not exist: " + srcs);
2369      }
2370      else {
2371        // Make it relinquish locks everytime contentCountLimit entries are
2372        // processed. 0 means disabled. I.e. blocking for the entire duration.
2373        ContentSummaryComputationContext cscc =
2374
2375            new ContentSummaryComputationContext(this, getFSNamesystem(),
2376            contentCountLimit);
2377        ContentSummary cs = targetNode.computeAndConvertContentSummary(cscc);
2378        yieldCount += cscc.getYieldCount();
2379        return cs;
2380      }
2381    } finally {
2382      readUnlock();
2383    }
2384  }
2385
2386  @VisibleForTesting
2387  public long getYieldCount() {
2388    return yieldCount;
2389  }
2390
2391  public INodeMap getINodeMap() {
2392    return inodeMap;
2393  }
2394  
2395  /**
2396   * This method is always called with writeLock of FSDirectory held.
2397   */
2398  public final void addToInodeMap(INode inode) {
2399    if (inode instanceof INodeWithAdditionalFields) {
2400      inodeMap.put((INodeWithAdditionalFields)inode);
2401    }
2402  }
2403
2404  
2405  /**
2406   * This method is always called with writeLock of FSDirectory held.
2407   */
2408  public final void removeFromInodeMap(List<? extends INode> inodes) {
2409    if (inodes != null) {
2410      for (INode inode : inodes) {
2411        if (inode != null && inode instanceof INodeWithAdditionalFields) {
2412          inodeMap.remove(inode);
2413        }
2414      }
2415    }
2416  }
2417  
2418  /**
2419   * Get the inode from inodeMap based on its inode id.
2420   * @param id The given id
2421   * @return The inode associated with the given id
2422   */
2423  public INode getInode(long id) {
2424    readLock();
2425    try {
2426      return inodeMap.get(id);
2427    } finally {
2428      readUnlock();
2429    }
2430  }
2431  
2432  @VisibleForTesting
2433  int getInodeMapSize() {
2434    return inodeMap.size();
2435  }
2436  
2437  /**
2438   * See {@link ClientProtocol#setQuota(String, long, long)} for the contract.
2439   * Sets quota for for a directory.
2440   * @returns INodeDirectory if any of the quotas have changed. null other wise.
2441   * @throws FileNotFoundException if the path does not exist.
2442   * @throws PathIsNotDirectoryException if the path is not a directory.
2443   * @throws QuotaExceededException if the directory tree size is 
2444   *                                greater than the given quota
2445   * @throws UnresolvedLinkException if a symlink is encountered in src.
2446   * @throws SnapshotAccessControlException if path is in RO snapshot
2447   */
2448  INodeDirectory unprotectedSetQuota(String src, long nsQuota, long dsQuota)
2449      throws FileNotFoundException, PathIsNotDirectoryException,
2450      QuotaExceededException, UnresolvedLinkException,
2451      SnapshotAccessControlException {
2452    assert hasWriteLock();
2453    // sanity check
2454    if ((nsQuota < 0 && nsQuota != HdfsConstants.QUOTA_DONT_SET && 
2455         nsQuota < HdfsConstants.QUOTA_RESET) || 
2456        (dsQuota < 0 && dsQuota != HdfsConstants.QUOTA_DONT_SET && 
2457          dsQuota < HdfsConstants.QUOTA_RESET)) {
2458      throw new IllegalArgumentException("Illegal value for nsQuota or " +
2459                                         "dsQuota : " + nsQuota + " and " +
2460                                         dsQuota);
2461    }
2462    
2463    String srcs = normalizePath(src);
2464    final INodesInPath iip = rootDir.getINodesInPath4Write(srcs, true);
2465    INodeDirectory dirNode = INodeDirectory.valueOf(iip.getLastINode(), srcs);
2466    if (dirNode.isRoot() && nsQuota == HdfsConstants.QUOTA_RESET) {
2467      throw new IllegalArgumentException("Cannot clear namespace quota on root.");
2468    } else { // a directory inode
2469      final Quota.Counts oldQuota = dirNode.getQuotaCounts();
2470      final long oldNsQuota = oldQuota.get(Quota.NAMESPACE);
2471      final long oldDsQuota = oldQuota.get(Quota.DISKSPACE);
2472      if (nsQuota == HdfsConstants.QUOTA_DONT_SET) {
2473        nsQuota = oldNsQuota;
2474      }
2475      if (dsQuota == HdfsConstants.QUOTA_DONT_SET) {
2476        dsQuota = oldDsQuota;
2477      }        
2478      if (oldNsQuota == nsQuota && oldDsQuota == dsQuota) {
2479        return null;
2480      }
2481
2482      final int latest = iip.getLatestSnapshotId();
2483      dirNode = dirNode.recordModification(latest);
2484      dirNode.setQuota(nsQuota, dsQuota);
2485      return dirNode;
2486    }
2487  }
2488  
2489  /**
2490   * See {@link ClientProtocol#setQuota(String, long, long)} for the contract.
2491   * @throws SnapshotAccessControlException if path is in RO snapshot
2492   * @see #unprotectedSetQuota(String, long, long)
2493   */
2494  void setQuota(String src, long nsQuota, long dsQuota) 
2495      throws FileNotFoundException, PathIsNotDirectoryException,
2496      QuotaExceededException, UnresolvedLinkException,
2497      SnapshotAccessControlException {
2498    writeLock();
2499    try {
2500      INodeDirectory dir = unprotectedSetQuota(src, nsQuota, dsQuota);
2501      if (dir != null) {
2502        final Quota.Counts q = dir.getQuotaCounts();
2503        fsImage.getEditLog().logSetQuota(src,
2504            q.get(Quota.NAMESPACE), q.get(Quota.DISKSPACE));
2505      }
2506    } finally {
2507      writeUnlock();
2508    }
2509  }
2510  
2511  long totalInodes() {
2512    readLock();
2513    try {
2514      return rootDir.getDirectoryWithQuotaFeature().getSpaceConsumed()
2515          .get(Quota.NAMESPACE);
2516    } finally {
2517      readUnlock();
2518    }
2519  }
2520
2521  /**
2522   * Sets the access time on the file/directory. Logs it in the transaction log.
2523   */
2524  void setTimes(String src, INode inode, long mtime, long atime, boolean force,
2525      int latestSnapshotId) throws QuotaExceededException {
2526    boolean status = false;
2527    writeLock();
2528    try {
2529      status = unprotectedSetTimes(inode, mtime, atime, force, latestSnapshotId);
2530    } finally {
2531      writeUnlock();
2532    }
2533    if (status) {
2534      fsImage.getEditLog().logTimes(src, mtime, atime);
2535    }
2536  }
2537
2538  boolean unprotectedSetTimes(String src, long mtime, long atime, boolean force) 
2539      throws UnresolvedLinkException, QuotaExceededException {
2540    assert hasWriteLock();
2541    final INodesInPath i = getLastINodeInPath(src); 
2542    return unprotectedSetTimes(i.getLastINode(), mtime, atime, force,
2543        i.getLatestSnapshotId());
2544  }
2545
2546  private boolean unprotectedSetTimes(INode inode, long mtime,
2547      long atime, boolean force, int latest) throws QuotaExceededException {
2548    assert hasWriteLock();
2549    boolean status = false;
2550    if (mtime != -1) {
2551      inode = inode.setModificationTime(mtime, latest);
2552      status = true;
2553    }
2554    if (atime != -1) {
2555      long inodeTime = inode.getAccessTime();
2556
2557      // if the last access time update was within the last precision interval, then
2558      // no need to store access time
2559      if (atime <= inodeTime + getFSNamesystem().getAccessTimePrecision() && !force) {
2560        status =  false;
2561      } else {
2562        inode.setAccessTime(atime, latest);
2563        status = true;
2564      }
2565    } 
2566    return status;
2567  }
2568
2569  /**
2570   * Reset the entire namespace tree.
2571   */
2572  void reset() {
2573    writeLock();
2574    try {
2575      setReady(false);
2576      rootDir = createRoot(getFSNamesystem());
2577      inodeMap.clear();
2578      addToInodeMap(rootDir);
2579      nameCache.reset();
2580    } finally {
2581      writeUnlock();
2582    }
2583  }
2584
2585  /**
2586   * create an hdfs file status from an inode
2587   * 
2588   * @param path the local name
2589   * @param node inode
2590   * @param needLocation if block locations need to be included or not
2591   * @return a file status
2592   * @throws IOException if any error occurs
2593   */
2594  private HdfsFileStatus createFileStatus(byte[] path, INode node,
2595      boolean needLocation, int snapshot) throws IOException {
2596    if (needLocation) {
2597      return createLocatedFileStatus(path, node, snapshot);
2598    } else {
2599      return createFileStatus(path, node, snapshot);
2600    }
2601  }
2602  /**
2603   * Create FileStatus by file INode 
2604   */
2605   HdfsFileStatus createFileStatus(byte[] path, INode node,
2606       int snapshot) {
2607     long size = 0;     // length is zero for directories
2608     short replication = 0;
2609     long blocksize = 0;
2610     if (node.isFile()) {
2611       final INodeFile fileNode = node.asFile();
2612       size = fileNode.computeFileSize(snapshot);
2613       replication = fileNode.getFileReplication(snapshot);
2614       blocksize = fileNode.getPreferredBlockSize();
2615     }
2616     int childrenNum = node.isDirectory() ? 
2617         node.asDirectory().getChildrenNum(snapshot) : 0;
2618         
2619     return new HdfsFileStatus(
2620        size, 
2621        node.isDirectory(), 
2622        replication, 
2623        blocksize,
2624        node.getModificationTime(snapshot),
2625        node.getAccessTime(snapshot),
2626        getPermissionForFileStatus(node, snapshot),
2627        node.getUserName(snapshot),
2628        node.getGroupName(snapshot),
2629        node.isSymlink() ? node.asSymlink().getSymlink() : null,
2630        path,
2631        node.getId(),
2632        childrenNum);
2633  }
2634
2635  /**
2636   * Create FileStatus with location info by file INode
2637   */
2638  private HdfsLocatedFileStatus createLocatedFileStatus(byte[] path,
2639      INode node, int snapshot) throws IOException {
2640    assert hasReadLock();
2641    long size = 0; // length is zero for directories
2642    short replication = 0;
2643    long blocksize = 0;
2644    LocatedBlocks loc = null;
2645    if (node.isFile()) {
2646      final INodeFile fileNode = node.asFile();
2647      size = fileNode.computeFileSize(snapshot);
2648      replication = fileNode.getFileReplication(snapshot);
2649      blocksize = fileNode.getPreferredBlockSize();
2650
2651      final boolean inSnapshot = snapshot != Snapshot.CURRENT_STATE_ID; 
2652      final boolean isUc = inSnapshot ? false : fileNode.isUnderConstruction();
2653      final long fileSize = !inSnapshot && isUc ? 
2654          fileNode.computeFileSizeNotIncludingLastUcBlock() : size;
2655      loc = getFSNamesystem().getBlockManager().createLocatedBlocks(
2656          fileNode.getBlocks(), fileSize, isUc, 0L, size, false,
2657          inSnapshot);
2658      if (loc == null) {
2659        loc = new LocatedBlocks();
2660      }
2661    }
2662    int childrenNum = node.isDirectory() ? 
2663        node.asDirectory().getChildrenNum(snapshot) : 0;
2664        
2665    HdfsLocatedFileStatus status =
2666        new HdfsLocatedFileStatus(size, node.isDirectory(), replication,
2667          blocksize, node.getModificationTime(snapshot),
2668          node.getAccessTime(snapshot),
2669          getPermissionForFileStatus(node, snapshot),
2670          node.getUserName(snapshot), node.getGroupName(snapshot),
2671          node.isSymlink() ? node.asSymlink().getSymlink() : null, path,
2672          node.getId(), loc, childrenNum);
2673        // Set caching information for the located blocks.
2674    if (loc != null) {
2675      CacheManager cacheManager = namesystem.getCacheManager();
2676      for (LocatedBlock lb: loc.getLocatedBlocks()) {
2677        cacheManager.setCachedLocations(lb);
2678      }
2679    }
2680    return status;
2681  }
2682
2683  /**
2684   * Returns an inode's FsPermission for use in an outbound FileStatus.  If the
2685   * inode has an ACL, then this method will convert to a FsAclPermission.
2686   *
2687   * @param node INode to check
2688   * @param snapshot int snapshot ID
2689   * @return FsPermission from inode, with ACL bit on if the inode has an ACL
2690   */
2691  private static FsPermission getPermissionForFileStatus(INode node,
2692      int snapshot) {
2693    FsPermission perm = node.getFsPermission(snapshot);
2694    if (node.getAclFeature(snapshot) != null) {
2695      perm = new FsAclPermission(perm);
2696    }
2697    return perm;
2698  }
2699    
2700  /**
2701   * Add the given symbolic link to the fs. Record it in the edits log.
2702   */
2703  INodeSymlink addSymlink(String path, String target,
2704      PermissionStatus dirPerms, boolean createParent, boolean logRetryCache)
2705      throws UnresolvedLinkException, FileAlreadyExistsException,
2706      QuotaExceededException, SnapshotAccessControlException, AclException {
2707    waitForReady();
2708
2709    final long modTime = now();
2710    if (createParent) {
2711      final String parent = new Path(path).getParent().toString();
2712      if (!mkdirs(parent, dirPerms, true, modTime)) {
2713        return null;
2714      }
2715    }
2716    final String userName = dirPerms.getUserName();
2717    INodeSymlink newNode  = null;
2718    long id = namesystem.allocateNewInodeId();
2719    writeLock();
2720    try {
2721      newNode = unprotectedAddSymlink(id, path, target, modTime, modTime,
2722          new PermissionStatus(userName, null, FsPermission.getDefault()));
2723    } finally {
2724      writeUnlock();
2725    }
2726    if (newNode == null) {
2727      NameNode.stateChangeLog.info("DIR* addSymlink: failed to add " + path);
2728      return null;
2729    }
2730    fsImage.getEditLog().logSymlink(path, target, modTime, modTime, newNode,
2731        logRetryCache);
2732    
2733    if(NameNode.stateChangeLog.isDebugEnabled()) {
2734      NameNode.stateChangeLog.debug("DIR* addSymlink: " + path + " is added");
2735    }
2736    return newNode;
2737  }
2738
2739  /**
2740   * Add the specified path into the namespace. Invoked from edit log processing.
2741   */
2742  INodeSymlink unprotectedAddSymlink(long id, String path, String target,
2743      long mtime, long atime, PermissionStatus perm)
2744      throws UnresolvedLinkException, QuotaExceededException {
2745    assert hasWriteLock();
2746    final INodeSymlink symlink = new INodeSymlink(id, null, perm, mtime, atime,
2747        target);
2748    return addINode(path, symlink) ? symlink : null;
2749  }
2750
2751  void modifyAclEntries(String src, List<AclEntry> aclSpec) throws IOException {
2752    writeLock();
2753    try {
2754      List<AclEntry> newAcl = unprotectedModifyAclEntries(src, aclSpec);
2755      fsImage.getEditLog().logSetAcl(src, newAcl);
2756    } finally {
2757      writeUnlock();
2758    }
2759  }
2760
2761  private List<AclEntry> unprotectedModifyAclEntries(String src,
2762      List<AclEntry> aclSpec) throws IOException {
2763    assert hasWriteLock();
2764    INodesInPath iip = rootDir.getINodesInPath4Write(normalizePath(src), true);
2765    INode inode = resolveLastINode(src, iip);
2766    int snapshotId = iip.getLatestSnapshotId();
2767    List<AclEntry> existingAcl = AclStorage.readINodeLogicalAcl(inode);
2768    List<AclEntry> newAcl = AclTransformation.mergeAclEntries(existingAcl,
2769      aclSpec);
2770    AclStorage.updateINodeAcl(inode, newAcl, snapshotId);
2771    return newAcl;
2772  }
2773
2774  void removeAclEntries(String src, List<AclEntry> aclSpec) throws IOException {
2775    writeLock();
2776    try {
2777      List<AclEntry> newAcl = unprotectedRemoveAclEntries(src, aclSpec);
2778      fsImage.getEditLog().logSetAcl(src, newAcl);
2779    } finally {
2780      writeUnlock();
2781    }
2782  }
2783
2784  private List<AclEntry> unprotectedRemoveAclEntries(String src,
2785      List<AclEntry> aclSpec) throws IOException {
2786    assert hasWriteLock();
2787    INodesInPath iip = rootDir.getINodesInPath4Write(normalizePath(src), true);
2788    INode inode = resolveLastINode(src, iip);
2789    int snapshotId = iip.getLatestSnapshotId();
2790    List<AclEntry> existingAcl = AclStorage.readINodeLogicalAcl(inode);
2791    List<AclEntry> newAcl = AclTransformation.filterAclEntriesByAclSpec(
2792      existingAcl, aclSpec);
2793    AclStorage.updateINodeAcl(inode, newAcl, snapshotId);
2794    return newAcl;
2795  }
2796
2797  void removeDefaultAcl(String src) throws IOException {
2798    writeLock();
2799    try {
2800      List<AclEntry> newAcl = unprotectedRemoveDefaultAcl(src);
2801      fsImage.getEditLog().logSetAcl(src, newAcl);
2802    } finally {
2803      writeUnlock();
2804    }
2805  }
2806
2807  private List<AclEntry> unprotectedRemoveDefaultAcl(String src)
2808      throws IOException {
2809    assert hasWriteLock();
2810    INodesInPath iip = rootDir.getINodesInPath4Write(normalizePath(src), true);
2811    INode inode = resolveLastINode(src, iip);
2812    int snapshotId = iip.getLatestSnapshotId();
2813    List<AclEntry> existingAcl = AclStorage.readINodeLogicalAcl(inode);
2814    List<AclEntry> newAcl = AclTransformation.filterDefaultAclEntries(
2815      existingAcl);
2816    AclStorage.updateINodeAcl(inode, newAcl, snapshotId);
2817    return newAcl;
2818  }
2819
2820  void removeAcl(String src) throws IOException {
2821    writeLock();
2822    try {
2823      unprotectedRemoveAcl(src);
2824      fsImage.getEditLog().logSetAcl(src, AclFeature.EMPTY_ENTRY_LIST);
2825    } finally {
2826      writeUnlock();
2827    }
2828  }
2829
2830  private void unprotectedRemoveAcl(String src) throws IOException {
2831    assert hasWriteLock();
2832    INodesInPath iip = rootDir.getINodesInPath4Write(normalizePath(src), true);
2833    INode inode = resolveLastINode(src, iip);
2834    int snapshotId = iip.getLatestSnapshotId();
2835    AclStorage.removeINodeAcl(inode, snapshotId);
2836  }
2837
2838  void setAcl(String src, List<AclEntry> aclSpec) throws IOException {
2839    writeLock();
2840    try {
2841      List<AclEntry> newAcl = unprotectedSetAcl(src, aclSpec);
2842      fsImage.getEditLog().logSetAcl(src, newAcl);
2843    } finally {
2844      writeUnlock();
2845    }
2846  }
2847
2848  List<AclEntry> unprotectedSetAcl(String src, List<AclEntry> aclSpec)
2849      throws IOException {
2850    // ACL removal is logged to edits as OP_SET_ACL with an empty list.
2851    if (aclSpec.isEmpty()) {
2852      unprotectedRemoveAcl(src);
2853      return AclFeature.EMPTY_ENTRY_LIST;
2854    }
2855
2856    assert hasWriteLock();
2857    INodesInPath iip = rootDir.getINodesInPath4Write(normalizePath(src), true);
2858    INode inode = resolveLastINode(src, iip);
2859    int snapshotId = iip.getLatestSnapshotId();
2860    List<AclEntry> existingAcl = AclStorage.readINodeLogicalAcl(inode);
2861    List<AclEntry> newAcl = AclTransformation.replaceAclEntries(existingAcl,
2862      aclSpec);
2863    AclStorage.updateINodeAcl(inode, newAcl, snapshotId);
2864    return newAcl;
2865  }
2866
2867  AclStatus getAclStatus(String src) throws IOException {
2868    String srcs = normalizePath(src);
2869    readLock();
2870    try {
2871      // There is no real inode for the path ending in ".snapshot", so return a
2872      // non-null, unpopulated AclStatus.  This is similar to getFileInfo.
2873      if (srcs.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR) &&
2874          getINode4DotSnapshot(srcs) != null) {
2875        return new AclStatus.Builder().owner("").group("").build();
2876      }
2877      INodesInPath iip = rootDir.getLastINodeInPath(srcs, true);
2878      INode inode = resolveLastINode(src, iip);
2879      int snapshotId = iip.getPathSnapshotId();
2880      List<AclEntry> acl = AclStorage.readINodeAcl(inode, snapshotId);
2881      return new AclStatus.Builder()
2882          .owner(inode.getUserName()).group(inode.getGroupName())
2883          .stickyBit(inode.getFsPermission(snapshotId).getStickyBit())
2884          .addEntries(acl).build();
2885    } finally {
2886      readUnlock();
2887    }
2888  }
2889
2890  private static INode resolveLastINode(String src, INodesInPath iip)
2891      throws FileNotFoundException {
2892    INode inode = iip.getLastINode();
2893    if (inode == null)
2894      throw new FileNotFoundException("cannot find " + src);
2895    return inode;
2896  }
2897
2898  /**
2899   * Caches frequently used file names to reuse file name objects and
2900   * reduce heap size.
2901   */
2902  void cacheName(INode inode) {
2903    // Name is cached only for files
2904    if (!inode.isFile()) {
2905      return;
2906    }
2907    ByteArray name = new ByteArray(inode.getLocalNameBytes());
2908    name = nameCache.put(name);
2909    if (name != null) {
2910      inode.setLocalName(name.getBytes());
2911    }
2912  }
2913  
2914  void shutdown() {
2915    nameCache.reset();
2916    inodeMap.clear();
2917  }
2918  
2919  /**
2920   * Given an INode get all the path complents leading to it from the root.
2921   * If an Inode corresponding to C is given in /A/B/C, the returned
2922   * patch components will be {root, A, B, C}
2923   */
2924  static byte[][] getPathComponents(INode inode) {
2925    List<byte[]> components = new ArrayList<byte[]>();
2926    components.add(0, inode.getLocalNameBytes());
2927    while(inode.getParent() != null) {
2928      components.add(0, inode.getParent().getLocalNameBytes());
2929      inode = inode.getParent();
2930    }
2931    return components.toArray(new byte[components.size()][]);
2932  }
2933  
2934  /**
2935   * @return path components for reserved path, else null.
2936   */
2937  static byte[][] getPathComponentsForReservedPath(String src) {
2938    return !isReservedName(src) ? null : INode.getPathComponents(src);
2939  }
2940  
2941  /**
2942   * Resolve the path of /.reserved/.inodes/<inodeid>/... to a regular path
2943   * 
2944   * @param src path that is being processed
2945   * @param pathComponents path components corresponding to the path
2946   * @param fsd FSDirectory
2947   * @return if the path indicates an inode, return path after replacing upto
2948   *         <inodeid> with the corresponding path of the inode, else the path
2949   *         in {@code src} as is.
2950   * @throws FileNotFoundException if inodeid is invalid
2951   */
2952  static String resolvePath(String src, byte[][] pathComponents, FSDirectory fsd)
2953      throws FileNotFoundException {
2954    if (pathComponents == null || pathComponents.length <= 3) {
2955      return src;
2956    }
2957    // Not /.reserved/.inodes
2958    if (!Arrays.equals(DOT_RESERVED, pathComponents[1])
2959        || !Arrays.equals(DOT_INODES, pathComponents[2])) { // Not .inodes path
2960      return src;
2961    }
2962    final String inodeId = DFSUtil.bytes2String(pathComponents[3]);
2963    long id = 0;
2964    try {
2965      id = Long.valueOf(inodeId);
2966    } catch (NumberFormatException e) {
2967      throw new FileNotFoundException("Invalid inode path: " + src);
2968    }
2969    if (id == INodeId.ROOT_INODE_ID && pathComponents.length == 4) {
2970      return Path.SEPARATOR;
2971    }
2972    INode inode = fsd.getInode(id);
2973    if (inode == null) {
2974      throw new FileNotFoundException(
2975          "File for given inode path does not exist: " + src);
2976    }
2977    
2978    // Handle single ".." for NFS lookup support.
2979    if ((pathComponents.length > 4)
2980        && DFSUtil.bytes2String(pathComponents[4]).equals("..")) {
2981      INode parent = inode.getParent();
2982      if (parent == null || parent.getId() == INodeId.ROOT_INODE_ID) {
2983        // inode is root, or its parent is root.
2984        return Path.SEPARATOR;
2985      } else {
2986        return parent.getFullPathName();
2987      }
2988    }
2989
2990    StringBuilder path = id == INodeId.ROOT_INODE_ID ? new StringBuilder()
2991        : new StringBuilder(inode.getFullPathName());
2992    for (int i = 4; i < pathComponents.length; i++) {
2993      path.append(Path.SEPARATOR).append(DFSUtil.bytes2String(pathComponents[i]));
2994    }
2995    if (NameNode.LOG.isDebugEnabled()) {
2996      NameNode.LOG.debug("Resolved path is " + path);
2997    }
2998    return path.toString();
2999  }
3000  
3001  /** Check if a given inode name is reserved */
3002  public static boolean isReservedName(INode inode) {
3003    return CHECK_RESERVED_FILE_NAMES
3004        && Arrays.equals(inode.getLocalNameBytes(), DOT_RESERVED);
3005  }
3006  
3007  /** Check if a given path is reserved */
3008  public static boolean isReservedName(String src) {
3009    return src.startsWith(DOT_RESERVED_PATH_PREFIX);
3010  }
3011}