001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import static org.apache.hadoop.util.Time.now; 021 022import java.io.DataInput; 023import java.io.DataInputStream; 024import java.io.File; 025import java.io.FileInputStream; 026import java.io.FileNotFoundException; 027import java.io.IOException; 028import java.security.DigestInputStream; 029import java.security.MessageDigest; 030import java.util.Arrays; 031import java.util.Collection; 032import java.util.Map; 033import java.util.TreeMap; 034 035import org.apache.commons.logging.Log; 036import org.apache.hadoop.classification.InterfaceAudience; 037import org.apache.hadoop.classification.InterfaceStability; 038import org.apache.hadoop.conf.Configuration; 039import org.apache.hadoop.fs.FileSystem; 040import org.apache.hadoop.fs.Path; 041import org.apache.hadoop.fs.PathIsNotDirectoryException; 042import org.apache.hadoop.fs.UnresolvedLinkException; 043import org.apache.hadoop.fs.permission.PermissionStatus; 044import org.apache.hadoop.hdfs.DFSUtil; 045import org.apache.hadoop.hdfs.protocol.HdfsConstants; 046import org.apache.hadoop.hdfs.protocol.LayoutFlags; 047import org.apache.hadoop.hdfs.protocol.LayoutVersion; 048import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; 049import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; 050import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; 051import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; 052import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 053import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException; 054import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList; 055import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable; 056import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; 057import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat; 058import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap; 059import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; 060import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 061import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; 062import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; 063import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType; 064import org.apache.hadoop.io.IOUtils; 065import org.apache.hadoop.io.MD5Hash; 066import org.apache.hadoop.io.Text; 067import org.apache.hadoop.util.StringUtils; 068 069import com.google.common.base.Preconditions; 070import com.google.common.annotations.VisibleForTesting; 071 072/** 073 * This class loads and stores the FSImage of the NameNode. The file 074 * src/main/proto/fsimage.proto describes the on-disk layout of the FSImage. 075 */ 076@InterfaceAudience.Private 077@InterfaceStability.Evolving 078public class FSImageFormat { 079 private static final Log LOG = FSImage.LOG; 080 081 // Static-only class 082 private FSImageFormat() {} 083 084 interface AbstractLoader { 085 MD5Hash getLoadedImageMd5(); 086 long getLoadedImageTxId(); 087 } 088 089 static class LoaderDelegator implements AbstractLoader { 090 private AbstractLoader impl; 091 private final Configuration conf; 092 private final FSNamesystem fsn; 093 094 LoaderDelegator(Configuration conf, FSNamesystem fsn) { 095 this.conf = conf; 096 this.fsn = fsn; 097 } 098 099 @Override 100 public MD5Hash getLoadedImageMd5() { 101 return impl.getLoadedImageMd5(); 102 } 103 104 @Override 105 public long getLoadedImageTxId() { 106 return impl.getLoadedImageTxId(); 107 } 108 109 public void load(File file) throws IOException { 110 Preconditions.checkState(impl == null, "Image already loaded!"); 111 112 FileInputStream is = null; 113 try { 114 is = new FileInputStream(file); 115 byte[] magic = new byte[FSImageUtil.MAGIC_HEADER.length]; 116 IOUtils.readFully(is, magic, 0, magic.length); 117 if (Arrays.equals(magic, FSImageUtil.MAGIC_HEADER)) { 118 FSImageFormatProtobuf.Loader loader = new FSImageFormatProtobuf.Loader( 119 conf, fsn); 120 impl = loader; 121 loader.load(file); 122 } else { 123 Loader loader = new Loader(conf, fsn); 124 impl = loader; 125 loader.load(file); 126 } 127 128 } finally { 129 IOUtils.cleanup(LOG, is); 130 } 131 } 132 } 133 134 /** 135 * Construct a loader class to load the image. It chooses the loader based on 136 * the layout version. 137 */ 138 public static LoaderDelegator newLoader(Configuration conf, FSNamesystem fsn) { 139 return new LoaderDelegator(conf, fsn); 140 } 141 142 /** 143 * A one-shot class responsible for loading an image. The load() function 144 * should be called once, after which the getter methods may be used to retrieve 145 * information about the image that was loaded, if loading was successful. 146 */ 147 public static class Loader implements AbstractLoader { 148 private final Configuration conf; 149 /** which namesystem this loader is working for */ 150 private final FSNamesystem namesystem; 151 152 /** Set to true once a file has been loaded using this loader. */ 153 private boolean loaded = false; 154 155 /** The transaction ID of the last edit represented by the loaded file */ 156 private long imgTxId; 157 /** The MD5 sum of the loaded file */ 158 private MD5Hash imgDigest; 159 160 private Map<Integer, Snapshot> snapshotMap = null; 161 private final ReferenceMap referenceMap = new ReferenceMap(); 162 163 Loader(Configuration conf, FSNamesystem namesystem) { 164 this.conf = conf; 165 this.namesystem = namesystem; 166 } 167 168 /** 169 * Return the MD5 checksum of the image that has been loaded. 170 * @throws IllegalStateException if load() has not yet been called. 171 */ 172 @Override 173 public MD5Hash getLoadedImageMd5() { 174 checkLoaded(); 175 return imgDigest; 176 } 177 178 @Override 179 public long getLoadedImageTxId() { 180 checkLoaded(); 181 return imgTxId; 182 } 183 184 /** 185 * Throw IllegalStateException if load() has not yet been called. 186 */ 187 private void checkLoaded() { 188 if (!loaded) { 189 throw new IllegalStateException("Image not yet loaded!"); 190 } 191 } 192 193 /** 194 * Throw IllegalStateException if load() has already been called. 195 */ 196 private void checkNotLoaded() { 197 if (loaded) { 198 throw new IllegalStateException("Image already loaded!"); 199 } 200 } 201 202 public void load(File curFile) throws IOException { 203 checkNotLoaded(); 204 assert curFile != null : "curFile is null"; 205 206 StartupProgress prog = NameNode.getStartupProgress(); 207 Step step = new Step(StepType.INODES); 208 prog.beginStep(Phase.LOADING_FSIMAGE, step); 209 long startTime = now(); 210 211 // 212 // Load in bits 213 // 214 MessageDigest digester = MD5Hash.getDigester(); 215 DigestInputStream fin = new DigestInputStream( 216 new FileInputStream(curFile), digester); 217 218 DataInputStream in = new DataInputStream(fin); 219 try { 220 // read image version: first appeared in version -1 221 int imgVersion = in.readInt(); 222 if (getLayoutVersion() != imgVersion) { 223 throw new InconsistentFSStateException(curFile, 224 "imgVersion " + imgVersion + 225 " expected to be " + getLayoutVersion()); 226 } 227 boolean supportSnapshot = NameNodeLayoutVersion.supports( 228 LayoutVersion.Feature.SNAPSHOT, imgVersion); 229 if (NameNodeLayoutVersion.supports( 230 LayoutVersion.Feature.ADD_LAYOUT_FLAGS, imgVersion)) { 231 LayoutFlags.read(in); 232 } 233 234 // read namespaceID: first appeared in version -2 235 in.readInt(); 236 237 long numFiles = in.readLong(); 238 239 // read in the last generation stamp for legacy blocks. 240 long genstamp = in.readLong(); 241 namesystem.setGenerationStampV1(genstamp); 242 243 if (NameNodeLayoutVersion.supports( 244 LayoutVersion.Feature.SEQUENTIAL_BLOCK_ID, imgVersion)) { 245 // read the starting generation stamp for sequential block IDs 246 genstamp = in.readLong(); 247 namesystem.setGenerationStampV2(genstamp); 248 249 // read the last generation stamp for blocks created after 250 // the switch to sequential block IDs. 251 long stampAtIdSwitch = in.readLong(); 252 namesystem.setGenerationStampV1Limit(stampAtIdSwitch); 253 254 // read the max sequential block ID. 255 long maxSequentialBlockId = in.readLong(); 256 namesystem.setLastAllocatedBlockId(maxSequentialBlockId); 257 } else { 258 long startingGenStamp = namesystem.upgradeGenerationStampToV2(); 259 // This is an upgrade. 260 LOG.info("Upgrading to sequential block IDs. Generation stamp " + 261 "for new blocks set to " + startingGenStamp); 262 } 263 264 // read the transaction ID of the last edit represented by 265 // this image 266 if (NameNodeLayoutVersion.supports( 267 LayoutVersion.Feature.STORED_TXIDS, imgVersion)) { 268 imgTxId = in.readLong(); 269 } else { 270 imgTxId = 0; 271 } 272 273 // read the last allocated inode id in the fsimage 274 if (NameNodeLayoutVersion.supports( 275 LayoutVersion.Feature.ADD_INODE_ID, imgVersion)) { 276 long lastInodeId = in.readLong(); 277 namesystem.resetLastInodeId(lastInodeId); 278 if (LOG.isDebugEnabled()) { 279 LOG.debug("load last allocated InodeId from fsimage:" + lastInodeId); 280 } 281 } else { 282 if (LOG.isDebugEnabled()) { 283 LOG.debug("Old layout version doesn't have inode id." 284 + " Will assign new id for each inode."); 285 } 286 } 287 288 if (supportSnapshot) { 289 snapshotMap = namesystem.getSnapshotManager().read(in, this); 290 } 291 292 // read compression related info 293 FSImageCompression compression; 294 if (NameNodeLayoutVersion.supports( 295 LayoutVersion.Feature.FSIMAGE_COMPRESSION, imgVersion)) { 296 compression = FSImageCompression.readCompressionHeader(conf, in); 297 } else { 298 compression = FSImageCompression.createNoopCompression(); 299 } 300 in = compression.unwrapInputStream(fin); 301 302 LOG.info("Loading image file " + curFile + " using " + compression); 303 304 // load all inodes 305 LOG.info("Number of files = " + numFiles); 306 prog.setTotal(Phase.LOADING_FSIMAGE, step, numFiles); 307 Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step); 308 if (NameNodeLayoutVersion.supports( 309 LayoutVersion.Feature.FSIMAGE_NAME_OPTIMIZATION, imgVersion)) { 310 if (supportSnapshot) { 311 loadLocalNameINodesWithSnapshot(numFiles, in, counter); 312 } else { 313 loadLocalNameINodes(numFiles, in, counter); 314 } 315 } else { 316 loadFullNameINodes(numFiles, in, counter); 317 } 318 319 loadFilesUnderConstruction(in, supportSnapshot, counter); 320 prog.endStep(Phase.LOADING_FSIMAGE, step); 321 // Now that the step is finished, set counter equal to total to adjust 322 // for possible under-counting due to reference inodes. 323 prog.setCount(Phase.LOADING_FSIMAGE, step, numFiles); 324 325 loadSecretManagerState(in); 326 327 loadCacheManagerState(in); 328 329 // make sure to read to the end of file 330 boolean eof = (in.read() == -1); 331 assert eof : "Should have reached the end of image file " + curFile; 332 } finally { 333 in.close(); 334 } 335 336 imgDigest = new MD5Hash(digester.digest()); 337 loaded = true; 338 339 LOG.info("Image file " + curFile + " of size " + curFile.length() + 340 " bytes loaded in " + (now() - startTime)/1000 + " seconds."); 341 } 342 343 /** Update the root node's attributes */ 344 private void updateRootAttr(INodeWithAdditionalFields root) { 345 final Quota.Counts q = root.getQuotaCounts(); 346 final long nsQuota = q.get(Quota.NAMESPACE); 347 final long dsQuota = q.get(Quota.DISKSPACE); 348 FSDirectory fsDir = namesystem.dir; 349 if (nsQuota != -1 || dsQuota != -1) { 350 fsDir.rootDir.getDirectoryWithQuotaFeature().setQuota(nsQuota, dsQuota); 351 } 352 fsDir.rootDir.cloneModificationTime(root); 353 fsDir.rootDir.clonePermissionStatus(root); 354 } 355 356 /** 357 * Load fsimage files when 1) only local names are stored, 358 * and 2) snapshot is supported. 359 * 360 * @param numFiles number of files expected to be read 361 * @param in Image input stream 362 * @param counter Counter to increment for namenode startup progress 363 */ 364 private void loadLocalNameINodesWithSnapshot(long numFiles, DataInput in, 365 Counter counter) throws IOException { 366 assert NameNodeLayoutVersion.supports( 367 LayoutVersion.Feature.FSIMAGE_NAME_OPTIMIZATION, getLayoutVersion()); 368 assert NameNodeLayoutVersion.supports( 369 LayoutVersion.Feature.SNAPSHOT, getLayoutVersion()); 370 371 // load root 372 loadRoot(in, counter); 373 // load rest of the nodes recursively 374 loadDirectoryWithSnapshot(in, counter); 375 } 376 377 /** 378 * load fsimage files assuming only local names are stored. Used when 379 * snapshots are not supported by the layout version. 380 * 381 * @param numFiles number of files expected to be read 382 * @param in image input stream 383 * @param counter Counter to increment for namenode startup progress 384 * @throws IOException 385 */ 386 private void loadLocalNameINodes(long numFiles, DataInput in, Counter counter) 387 throws IOException { 388 assert NameNodeLayoutVersion.supports( 389 LayoutVersion.Feature.FSIMAGE_NAME_OPTIMIZATION, getLayoutVersion()); 390 assert numFiles > 0; 391 392 // load root 393 loadRoot(in, counter); 394 // have loaded the first file (the root) 395 numFiles--; 396 397 // load rest of the nodes directory by directory 398 while (numFiles > 0) { 399 numFiles -= loadDirectory(in, counter); 400 } 401 if (numFiles != 0) { 402 throw new IOException("Read unexpect number of files: " + -numFiles); 403 } 404 } 405 406 /** 407 * Load information about root, and use the information to update the root 408 * directory of NameSystem. 409 * @param in The {@link DataInput} instance to read. 410 * @param counter Counter to increment for namenode startup progress 411 */ 412 private void loadRoot(DataInput in, Counter counter) 413 throws IOException { 414 // load root 415 if (in.readShort() != 0) { 416 throw new IOException("First node is not root"); 417 } 418 final INodeDirectory root = loadINode(null, false, in, counter) 419 .asDirectory(); 420 // update the root's attributes 421 updateRootAttr(root); 422 } 423 424 /** Load children nodes for the parent directory. */ 425 private int loadChildren(INodeDirectory parent, DataInput in, 426 Counter counter) throws IOException { 427 int numChildren = in.readInt(); 428 for (int i = 0; i < numChildren; i++) { 429 // load single inode 430 INode newNode = loadINodeWithLocalName(false, in, true, counter); 431 addToParent(parent, newNode); 432 } 433 return numChildren; 434 } 435 436 /** 437 * Load a directory when snapshot is supported. 438 * @param in The {@link DataInput} instance to read. 439 * @param counter Counter to increment for namenode startup progress 440 */ 441 private void loadDirectoryWithSnapshot(DataInput in, Counter counter) 442 throws IOException { 443 // Step 1. Identify the parent INode 444 long inodeId = in.readLong(); 445 final INodeDirectory parent = this.namesystem.dir.getInode(inodeId) 446 .asDirectory(); 447 448 // Check if the whole subtree has been saved (for reference nodes) 449 boolean toLoadSubtree = referenceMap.toProcessSubtree(parent.getId()); 450 if (!toLoadSubtree) { 451 return; 452 } 453 454 // Step 2. Load snapshots if parent is snapshottable 455 int numSnapshots = in.readInt(); 456 if (numSnapshots >= 0) { 457 final INodeDirectorySnapshottable snapshottableParent 458 = INodeDirectorySnapshottable.valueOf(parent, parent.getLocalName()); 459 // load snapshots and snapshotQuota 460 SnapshotFSImageFormat.loadSnapshotList(snapshottableParent, 461 numSnapshots, in, this); 462 if (snapshottableParent.getSnapshotQuota() > 0) { 463 // add the directory to the snapshottable directory list in 464 // SnapshotManager. Note that we only add root when its snapshot quota 465 // is positive. 466 this.namesystem.getSnapshotManager().addSnapshottable( 467 snapshottableParent); 468 } 469 } 470 471 // Step 3. Load children nodes under parent 472 loadChildren(parent, in, counter); 473 474 // Step 4. load Directory Diff List 475 SnapshotFSImageFormat.loadDirectoryDiffList(parent, in, this); 476 477 // Recursively load sub-directories, including snapshot copies of deleted 478 // directories 479 int numSubTree = in.readInt(); 480 for (int i = 0; i < numSubTree; i++) { 481 loadDirectoryWithSnapshot(in, counter); 482 } 483 } 484 485 /** 486 * Load all children of a directory 487 * 488 * @param in 489 * @param counter Counter to increment for namenode startup progress 490 * @return number of child inodes read 491 * @throws IOException 492 */ 493 private int loadDirectory(DataInput in, Counter counter) throws IOException { 494 String parentPath = FSImageSerialization.readString(in); 495 // Rename .snapshot paths if we're doing an upgrade 496 parentPath = renameReservedPathsOnUpgrade(parentPath, getLayoutVersion()); 497 final INodeDirectory parent = INodeDirectory.valueOf( 498 namesystem.dir.rootDir.getNode(parentPath, true), parentPath); 499 return loadChildren(parent, in, counter); 500 } 501 502 /** 503 * load fsimage files assuming full path names are stored 504 * 505 * @param numFiles total number of files to load 506 * @param in data input stream 507 * @param counter Counter to increment for namenode startup progress 508 * @throws IOException if any error occurs 509 */ 510 private void loadFullNameINodes(long numFiles, DataInput in, Counter counter) 511 throws IOException { 512 byte[][] pathComponents; 513 byte[][] parentPath = {{}}; 514 FSDirectory fsDir = namesystem.dir; 515 INodeDirectory parentINode = fsDir.rootDir; 516 for (long i = 0; i < numFiles; i++) { 517 pathComponents = FSImageSerialization.readPathComponents(in); 518 final INode newNode = loadINode( 519 pathComponents[pathComponents.length-1], false, in, counter); 520 521 if (isRoot(pathComponents)) { // it is the root 522 // update the root's attributes 523 updateRootAttr(newNode.asDirectory()); 524 continue; 525 } 526 527 namesystem.dir.addToInodeMap(newNode); 528 // check if the new inode belongs to the same parent 529 if(!isParent(pathComponents, parentPath)) { 530 parentINode = getParentINodeDirectory(pathComponents); 531 parentPath = getParent(pathComponents); 532 } 533 534 // add new inode 535 addToParent(parentINode, newNode); 536 } 537 } 538 539 private INodeDirectory getParentINodeDirectory(byte[][] pathComponents 540 ) throws FileNotFoundException, PathIsNotDirectoryException, 541 UnresolvedLinkException { 542 if (pathComponents.length < 2) { // root 543 return null; 544 } 545 // Gets the parent INode 546 final INodesInPath inodes = namesystem.dir.getExistingPathINodes( 547 pathComponents); 548 return INodeDirectory.valueOf(inodes.getINode(-2), pathComponents); 549 } 550 551 /** 552 * Add the child node to parent and, if child is a file, update block map. 553 * This method is only used for image loading so that synchronization, 554 * modification time update and space count update are not needed. 555 */ 556 private void addToParent(INodeDirectory parent, INode child) { 557 FSDirectory fsDir = namesystem.dir; 558 if (parent == fsDir.rootDir) { 559 child.setLocalName(renameReservedRootComponentOnUpgrade( 560 child.getLocalNameBytes(), getLayoutVersion())); 561 } 562 // NOTE: This does not update space counts for parents 563 if (!parent.addChild(child)) { 564 return; 565 } 566 namesystem.dir.cacheName(child); 567 568 if (child.isFile()) { 569 updateBlocksMap(child.asFile()); 570 } 571 } 572 573 public void updateBlocksMap(INodeFile file) { 574 // Add file->block mapping 575 final BlockInfo[] blocks = file.getBlocks(); 576 if (blocks != null) { 577 final BlockManager bm = namesystem.getBlockManager(); 578 for (int i = 0; i < blocks.length; i++) { 579 file.setBlock(i, bm.addBlockCollection(blocks[i], file)); 580 } 581 } 582 } 583 584 public INode loadINodeWithLocalName(boolean isSnapshotINode, DataInput in, 585 boolean updateINodeMap) throws IOException { 586 return loadINodeWithLocalName(isSnapshotINode, in, updateINodeMap, null); 587 } 588 589 public INode loadINodeWithLocalName(boolean isSnapshotINode, 590 DataInput in, boolean updateINodeMap, Counter counter) 591 throws IOException { 592 byte[] localName = FSImageSerialization.readLocalName(in); 593 localName = 594 renameReservedComponentOnUpgrade(localName, getLayoutVersion()); 595 INode inode = loadINode(localName, isSnapshotINode, in, counter); 596 if (updateINodeMap) { 597 namesystem.dir.addToInodeMap(inode); 598 } 599 return inode; 600 } 601 602 /** 603 * load an inode from fsimage except for its name 604 * 605 * @param in data input stream from which image is read 606 * @param counter Counter to increment for namenode startup progress 607 * @return an inode 608 */ 609 @SuppressWarnings("deprecation") 610 INode loadINode(final byte[] localName, boolean isSnapshotINode, 611 DataInput in, Counter counter) throws IOException { 612 final int imgVersion = getLayoutVersion(); 613 if (NameNodeLayoutVersion.supports( 614 LayoutVersion.Feature.SNAPSHOT, imgVersion)) { 615 namesystem.getFSDirectory().verifyINodeName(localName); 616 } 617 618 long inodeId = NameNodeLayoutVersion.supports( 619 LayoutVersion.Feature.ADD_INODE_ID, imgVersion) ? in.readLong() 620 : namesystem.allocateNewInodeId(); 621 622 final short replication = namesystem.getBlockManager().adjustReplication( 623 in.readShort()); 624 final long modificationTime = in.readLong(); 625 long atime = 0; 626 if (NameNodeLayoutVersion.supports( 627 LayoutVersion.Feature.FILE_ACCESS_TIME, imgVersion)) { 628 atime = in.readLong(); 629 } 630 final long blockSize = in.readLong(); 631 final int numBlocks = in.readInt(); 632 633 if (numBlocks >= 0) { 634 // file 635 636 // read blocks 637 BlockInfo[] blocks = new BlockInfo[numBlocks]; 638 for (int j = 0; j < numBlocks; j++) { 639 blocks[j] = new BlockInfo(replication); 640 blocks[j].readFields(in); 641 } 642 643 String clientName = ""; 644 String clientMachine = ""; 645 boolean underConstruction = false; 646 FileDiffList fileDiffs = null; 647 if (NameNodeLayoutVersion.supports( 648 LayoutVersion.Feature.SNAPSHOT, imgVersion)) { 649 // read diffs 650 fileDiffs = SnapshotFSImageFormat.loadFileDiffList(in, this); 651 652 if (isSnapshotINode) { 653 underConstruction = in.readBoolean(); 654 if (underConstruction) { 655 clientName = FSImageSerialization.readString(in); 656 clientMachine = FSImageSerialization.readString(in); 657 // convert the last block to BlockUC 658 if (blocks != null && blocks.length > 0) { 659 BlockInfo lastBlk = blocks[blocks.length - 1]; 660 blocks[blocks.length - 1] = new BlockInfoUnderConstruction( 661 lastBlk, replication); 662 } 663 } 664 } 665 } 666 667 final PermissionStatus permissions = PermissionStatus.read(in); 668 669 // return 670 if (counter != null) { 671 counter.increment(); 672 } 673 final INodeFile file = new INodeFile(inodeId, localName, permissions, 674 modificationTime, atime, blocks, replication, blockSize); 675 if (underConstruction) { 676 file.toUnderConstruction(clientName, clientMachine, null); 677 } 678 return fileDiffs == null ? file : new INodeFile(file, fileDiffs); 679 } else if (numBlocks == -1) { 680 //directory 681 682 //read quotas 683 final long nsQuota = in.readLong(); 684 long dsQuota = -1L; 685 if (NameNodeLayoutVersion.supports( 686 LayoutVersion.Feature.DISKSPACE_QUOTA, imgVersion)) { 687 dsQuota = in.readLong(); 688 } 689 690 //read snapshot info 691 boolean snapshottable = false; 692 boolean withSnapshot = false; 693 if (NameNodeLayoutVersion.supports( 694 LayoutVersion.Feature.SNAPSHOT, imgVersion)) { 695 snapshottable = in.readBoolean(); 696 if (!snapshottable) { 697 withSnapshot = in.readBoolean(); 698 } 699 } 700 701 final PermissionStatus permissions = PermissionStatus.read(in); 702 703 //return 704 if (counter != null) { 705 counter.increment(); 706 } 707 final INodeDirectory dir = new INodeDirectory(inodeId, localName, 708 permissions, modificationTime); 709 if (nsQuota >= 0 || dsQuota >= 0) { 710 dir.addDirectoryWithQuotaFeature(nsQuota, dsQuota); 711 } 712 if (withSnapshot) { 713 dir.addSnapshotFeature(null); 714 } 715 return snapshottable ? new INodeDirectorySnapshottable(dir) : dir; 716 } else if (numBlocks == -2) { 717 //symlink 718 if (!FileSystem.areSymlinksEnabled()) { 719 throw new IOException("Symlinks not supported - please remove symlink before upgrading to this version of HDFS"); 720 } 721 722 final String symlink = Text.readString(in); 723 final PermissionStatus permissions = PermissionStatus.read(in); 724 if (counter != null) { 725 counter.increment(); 726 } 727 return new INodeSymlink(inodeId, localName, permissions, 728 modificationTime, atime, symlink); 729 } else if (numBlocks == -3) { 730 //reference 731 // Intentionally do not increment counter, because it is too difficult at 732 // this point to assess whether or not this is a reference that counts 733 // toward quota. 734 735 final boolean isWithName = in.readBoolean(); 736 // lastSnapshotId for WithName node, dstSnapshotId for DstReference node 737 int snapshotId = in.readInt(); 738 739 final INodeReference.WithCount withCount 740 = referenceMap.loadINodeReferenceWithCount(isSnapshotINode, in, this); 741 742 if (isWithName) { 743 return new INodeReference.WithName(null, withCount, localName, 744 snapshotId); 745 } else { 746 final INodeReference ref = new INodeReference.DstReference(null, 747 withCount, snapshotId); 748 return ref; 749 } 750 } 751 752 throw new IOException("Unknown inode type: numBlocks=" + numBlocks); 753 } 754 755 /** Load {@link INodeFileAttributes}. */ 756 public INodeFileAttributes loadINodeFileAttributes(DataInput in) 757 throws IOException { 758 final int layoutVersion = getLayoutVersion(); 759 760 if (!NameNodeLayoutVersion.supports( 761 LayoutVersion.Feature.OPTIMIZE_SNAPSHOT_INODES, layoutVersion)) { 762 return loadINodeWithLocalName(true, in, false).asFile(); 763 } 764 765 final byte[] name = FSImageSerialization.readLocalName(in); 766 final PermissionStatus permissions = PermissionStatus.read(in); 767 final long modificationTime = in.readLong(); 768 final long accessTime = in.readLong(); 769 770 final short replication = namesystem.getBlockManager().adjustReplication( 771 in.readShort()); 772 final long preferredBlockSize = in.readLong(); 773 774 return new INodeFileAttributes.SnapshotCopy(name, permissions, null, modificationTime, 775 accessTime, replication, preferredBlockSize); 776 } 777 778 public INodeDirectoryAttributes loadINodeDirectoryAttributes(DataInput in) 779 throws IOException { 780 final int layoutVersion = getLayoutVersion(); 781 782 if (!NameNodeLayoutVersion.supports( 783 LayoutVersion.Feature.OPTIMIZE_SNAPSHOT_INODES, layoutVersion)) { 784 return loadINodeWithLocalName(true, in, false).asDirectory(); 785 } 786 787 final byte[] name = FSImageSerialization.readLocalName(in); 788 final PermissionStatus permissions = PermissionStatus.read(in); 789 final long modificationTime = in.readLong(); 790 791 //read quotas 792 final long nsQuota = in.readLong(); 793 final long dsQuota = in.readLong(); 794 795 return nsQuota == -1L && dsQuota == -1L? 796 new INodeDirectoryAttributes.SnapshotCopy(name, permissions, null, modificationTime) 797 : new INodeDirectoryAttributes.CopyWithQuota(name, permissions, 798 null, modificationTime, nsQuota, dsQuota); 799 } 800 801 private void loadFilesUnderConstruction(DataInput in, 802 boolean supportSnapshot, Counter counter) throws IOException { 803 FSDirectory fsDir = namesystem.dir; 804 int size = in.readInt(); 805 806 LOG.info("Number of files under construction = " + size); 807 808 for (int i = 0; i < size; i++) { 809 INodeFile cons = FSImageSerialization.readINodeUnderConstruction(in, 810 namesystem, getLayoutVersion()); 811 counter.increment(); 812 813 // verify that file exists in namespace 814 String path = cons.getLocalName(); 815 INodeFile oldnode = null; 816 boolean inSnapshot = false; 817 if (path != null && FSDirectory.isReservedName(path) && 818 NameNodeLayoutVersion.supports( 819 LayoutVersion.Feature.ADD_INODE_ID, getLayoutVersion())) { 820 // TODO: for HDFS-5428, we use reserved path for those INodeFileUC in 821 // snapshot. If we support INode ID in the layout version, we can use 822 // the inode id to find the oldnode. 823 oldnode = namesystem.dir.getInode(cons.getId()).asFile(); 824 inSnapshot = true; 825 } else { 826 final INodesInPath iip = fsDir.getLastINodeInPath(path); 827 oldnode = INodeFile.valueOf(iip.getINode(0), path); 828 } 829 830 FileUnderConstructionFeature uc = cons.getFileUnderConstructionFeature(); 831 oldnode.toUnderConstruction(uc.getClientName(), uc.getClientMachine(), 832 uc.getClientNode()); 833 if (oldnode.numBlocks() > 0) { 834 BlockInfo ucBlock = cons.getLastBlock(); 835 // we do not replace the inode, just replace the last block of oldnode 836 BlockInfo info = namesystem.getBlockManager().addBlockCollection( 837 ucBlock, oldnode); 838 oldnode.setBlock(oldnode.numBlocks() - 1, info); 839 } 840 841 if (!inSnapshot) { 842 namesystem.leaseManager.addLease(cons 843 .getFileUnderConstructionFeature().getClientName(), path); 844 } 845 } 846 } 847 848 private void loadSecretManagerState(DataInput in) 849 throws IOException { 850 int imgVersion = getLayoutVersion(); 851 852 if (!NameNodeLayoutVersion.supports( 853 LayoutVersion.Feature.DELEGATION_TOKEN, imgVersion)) { 854 //SecretManagerState is not available. 855 //This must not happen if security is turned on. 856 return; 857 } 858 namesystem.loadSecretManagerStateCompat(in); 859 } 860 861 private void loadCacheManagerState(DataInput in) throws IOException { 862 int imgVersion = getLayoutVersion(); 863 if (!NameNodeLayoutVersion.supports( 864 LayoutVersion.Feature.CACHING, imgVersion)) { 865 return; 866 } 867 namesystem.getCacheManager().loadStateCompat(in); 868 } 869 870 private int getLayoutVersion() { 871 return namesystem.getFSImage().getStorage().getLayoutVersion(); 872 } 873 874 private boolean isRoot(byte[][] path) { 875 return path.length == 1 && 876 path[0] == null; 877 } 878 879 private boolean isParent(byte[][] path, byte[][] parent) { 880 if (path == null || parent == null) 881 return false; 882 if (parent.length == 0 || path.length != parent.length + 1) 883 return false; 884 boolean isParent = true; 885 for (int i = 0; i < parent.length; i++) { 886 isParent = isParent && Arrays.equals(path[i], parent[i]); 887 } 888 return isParent; 889 } 890 891 /** 892 * Return string representing the parent of the given path. 893 */ 894 String getParent(String path) { 895 return path.substring(0, path.lastIndexOf(Path.SEPARATOR)); 896 } 897 898 byte[][] getParent(byte[][] path) { 899 byte[][] result = new byte[path.length - 1][]; 900 for (int i = 0; i < result.length; i++) { 901 result[i] = new byte[path[i].length]; 902 System.arraycopy(path[i], 0, result[i], 0, path[i].length); 903 } 904 return result; 905 } 906 907 public Snapshot getSnapshot(DataInput in) throws IOException { 908 return snapshotMap.get(in.readInt()); 909 } 910 } 911 912 @VisibleForTesting 913 public static final TreeMap<String, String> renameReservedMap = 914 new TreeMap<String, String>(); 915 916 /** 917 * Use the default key-value pairs that will be used to determine how to 918 * rename reserved paths on upgrade. 919 */ 920 @VisibleForTesting 921 public static void useDefaultRenameReservedPairs() { 922 renameReservedMap.clear(); 923 for (String key: HdfsConstants.RESERVED_PATH_COMPONENTS) { 924 renameReservedMap.put( 925 key, 926 key + "." + HdfsConstants.NAMENODE_LAYOUT_VERSION + "." 927 + "UPGRADE_RENAMED"); 928 } 929 } 930 931 /** 932 * Set the key-value pairs that will be used to determine how to rename 933 * reserved paths on upgrade. 934 */ 935 @VisibleForTesting 936 public static void setRenameReservedPairs(String renameReserved) { 937 // Clear and set the default values 938 useDefaultRenameReservedPairs(); 939 // Overwrite with provided values 940 setRenameReservedMapInternal(renameReserved); 941 } 942 943 private static void setRenameReservedMapInternal(String renameReserved) { 944 Collection<String> pairs = 945 StringUtils.getTrimmedStringCollection(renameReserved); 946 for (String p : pairs) { 947 String[] pair = StringUtils.split(p, '/', '='); 948 Preconditions.checkArgument(pair.length == 2, 949 "Could not parse key-value pair " + p); 950 String key = pair[0]; 951 String value = pair[1]; 952 Preconditions.checkArgument(DFSUtil.isReservedPathComponent(key), 953 "Unknown reserved path " + key); 954 Preconditions.checkArgument(DFSUtil.isValidNameForComponent(value), 955 "Invalid rename path for " + key + ": " + value); 956 LOG.info("Will rename reserved path " + key + " to " + value); 957 renameReservedMap.put(key, value); 958 } 959 } 960 961 /** 962 * When upgrading from an old version, the filesystem could contain paths 963 * that are now reserved in the new version (e.g. .snapshot). This renames 964 * these new reserved paths to a user-specified value to avoid collisions 965 * with the reserved name. 966 * 967 * @param path Old path potentially containing a reserved path 968 * @return New path with reserved path components renamed to user value 969 */ 970 static String renameReservedPathsOnUpgrade(String path, 971 final int layoutVersion) { 972 final String oldPath = path; 973 // If any known LVs aren't supported, we're doing an upgrade 974 if (!NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, layoutVersion)) { 975 String[] components = INode.getPathNames(path); 976 // Only need to worry about the root directory 977 if (components.length > 1) { 978 components[1] = DFSUtil.bytes2String( 979 renameReservedRootComponentOnUpgrade( 980 DFSUtil.string2Bytes(components[1]), 981 layoutVersion)); 982 path = DFSUtil.strings2PathString(components); 983 } 984 } 985 if (!NameNodeLayoutVersion.supports(Feature.SNAPSHOT, layoutVersion)) { 986 String[] components = INode.getPathNames(path); 987 // Special case the root path 988 if (components.length == 0) { 989 return path; 990 } 991 for (int i=0; i<components.length; i++) { 992 components[i] = DFSUtil.bytes2String( 993 renameReservedComponentOnUpgrade( 994 DFSUtil.string2Bytes(components[i]), 995 layoutVersion)); 996 } 997 path = DFSUtil.strings2PathString(components); 998 } 999 1000 if (!path.equals(oldPath)) { 1001 LOG.info("Upgrade process renamed reserved path " + oldPath + " to " 1002 + path); 1003 } 1004 return path; 1005 } 1006 1007 private final static String RESERVED_ERROR_MSG = 1008 FSDirectory.DOT_RESERVED_PATH_PREFIX + " is a reserved path and " 1009 + HdfsConstants.DOT_SNAPSHOT_DIR + " is a reserved path component in" 1010 + " this version of HDFS. Please rollback and delete or rename" 1011 + " this path, or upgrade with the " 1012 + StartupOption.RENAMERESERVED.getName() 1013 + " [key-value pairs]" 1014 + " option to automatically rename these paths during upgrade."; 1015 1016 /** 1017 * Same as {@link #renameReservedPathsOnUpgrade}, but for a single 1018 * byte array path component. 1019 */ 1020 private static byte[] renameReservedComponentOnUpgrade(byte[] component, 1021 final int layoutVersion) { 1022 // If the LV doesn't support snapshots, we're doing an upgrade 1023 if (!NameNodeLayoutVersion.supports(Feature.SNAPSHOT, layoutVersion)) { 1024 if (Arrays.equals(component, HdfsConstants.DOT_SNAPSHOT_DIR_BYTES)) { 1025 Preconditions.checkArgument( 1026 renameReservedMap != null && 1027 renameReservedMap.containsKey(HdfsConstants.DOT_SNAPSHOT_DIR), 1028 RESERVED_ERROR_MSG); 1029 component = 1030 DFSUtil.string2Bytes(renameReservedMap 1031 .get(HdfsConstants.DOT_SNAPSHOT_DIR)); 1032 } 1033 } 1034 return component; 1035 } 1036 1037 /** 1038 * Same as {@link #renameReservedPathsOnUpgrade}, but for a single 1039 * byte array path component. 1040 */ 1041 private static byte[] renameReservedRootComponentOnUpgrade(byte[] component, 1042 final int layoutVersion) { 1043 // If the LV doesn't support inode IDs, we're doing an upgrade 1044 if (!NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, layoutVersion)) { 1045 if (Arrays.equals(component, FSDirectory.DOT_RESERVED)) { 1046 Preconditions.checkArgument( 1047 renameReservedMap != null && 1048 renameReservedMap.containsKey(FSDirectory.DOT_RESERVED_STRING), 1049 RESERVED_ERROR_MSG); 1050 final String renameString = renameReservedMap 1051 .get(FSDirectory.DOT_RESERVED_STRING); 1052 component = 1053 DFSUtil.string2Bytes(renameString); 1054 LOG.info("Renamed root path " + FSDirectory.DOT_RESERVED_STRING 1055 + " to " + renameString); 1056 } 1057 } 1058 return component; 1059 } 1060}