001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import java.io.Closeable; 021import java.io.File; 022import java.io.IOException; 023import java.io.RandomAccessFile; 024import java.net.URI; 025import java.net.UnknownHostException; 026import java.util.ArrayList; 027import java.util.Collection; 028import java.util.EnumSet; 029import java.util.HashMap; 030import java.util.Iterator; 031import java.util.List; 032import java.util.Properties; 033import java.util.UUID; 034import java.util.concurrent.CopyOnWriteArrayList; 035 036import org.apache.hadoop.classification.InterfaceAudience; 037import org.apache.hadoop.conf.Configuration; 038import org.apache.hadoop.fs.FileUtil; 039import org.apache.hadoop.hdfs.DFSUtil; 040import org.apache.hadoop.hdfs.protocol.HdfsConstants; 041import org.apache.hadoop.hdfs.protocol.LayoutVersion; 042import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType; 043import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 044import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException; 045import org.apache.hadoop.hdfs.server.common.Storage; 046import org.apache.hadoop.hdfs.server.common.StorageErrorReporter; 047import org.apache.hadoop.hdfs.server.common.Util; 048import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; 049import org.apache.hadoop.hdfs.util.PersistentLongFile; 050import org.apache.hadoop.io.IOUtils; 051import org.apache.hadoop.net.DNS; 052import org.apache.hadoop.util.Time; 053 054import com.google.common.annotations.VisibleForTesting; 055import com.google.common.base.Preconditions; 056import com.google.common.collect.Lists; 057 058/** 059 * NNStorage is responsible for management of the StorageDirectories used by 060 * the NameNode. 061 */ 062@InterfaceAudience.Private 063public class NNStorage extends Storage implements Closeable, 064 StorageErrorReporter { 065 static final String DEPRECATED_MESSAGE_DIGEST_PROPERTY = "imageMD5Digest"; 066 static final String LOCAL_URI_SCHEME = "file"; 067 068 // 069 // The filenames used for storing the images 070 // 071 public enum NameNodeFile { 072 IMAGE ("fsimage"), 073 TIME ("fstime"), // from "old" pre-HDFS-1073 format 074 SEEN_TXID ("seen_txid"), 075 EDITS ("edits"), 076 IMAGE_NEW ("fsimage.ckpt"), 077 IMAGE_ROLLBACK("fsimage_rollback"), 078 EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format 079 EDITS_INPROGRESS ("edits_inprogress"), 080 EDITS_TMP ("edits_tmp"); 081 082 private String fileName = null; 083 private NameNodeFile(String name) { this.fileName = name; } 084 @VisibleForTesting 085 public String getName() { return fileName; } 086 } 087 088 /** 089 * Implementation of StorageDirType specific to namenode storage 090 * A Storage directory could be of type IMAGE which stores only fsimage, 091 * or of type EDITS which stores edits or of type IMAGE_AND_EDITS which 092 * stores both fsimage and edits. 093 */ 094 @VisibleForTesting 095 public static enum NameNodeDirType implements StorageDirType { 096 UNDEFINED, 097 IMAGE, 098 EDITS, 099 IMAGE_AND_EDITS; 100 101 @Override 102 public StorageDirType getStorageDirType() { 103 return this; 104 } 105 106 @Override 107 public boolean isOfType(StorageDirType type) { 108 if ((this == IMAGE_AND_EDITS) && (type == IMAGE || type == EDITS)) 109 return true; 110 return this == type; 111 } 112 } 113 114 protected String blockpoolID = ""; // id of the block pool 115 116 /** 117 * flag that controls if we try to restore failed storages 118 */ 119 private boolean restoreFailedStorage = false; 120 private final Object restorationLock = new Object(); 121 private boolean disablePreUpgradableLayoutCheck = false; 122 123 124 /** 125 * TxId of the last transaction that was included in the most 126 * recent fsimage file. This does not include any transactions 127 * that have since been written to the edit log. 128 */ 129 protected volatile long mostRecentCheckpointTxId = HdfsConstants.INVALID_TXID; 130 131 /** 132 * Time of the last checkpoint, in milliseconds since the epoch. 133 */ 134 private long mostRecentCheckpointTime = 0; 135 136 /** 137 * list of failed (and thus removed) storages 138 */ 139 final protected List<StorageDirectory> removedStorageDirs 140 = new CopyOnWriteArrayList<StorageDirectory>(); 141 142 /** 143 * Properties from old layout versions that may be needed 144 * during upgrade only. 145 */ 146 private HashMap<String, String> deprecatedProperties; 147 148 /** 149 * Construct the NNStorage. 150 * @param conf Namenode configuration. 151 * @param imageDirs Directories the image can be stored in. 152 * @param editsDirs Directories the editlog can be stored in. 153 * @throws IOException if any directories are inaccessible. 154 */ 155 public NNStorage(Configuration conf, 156 Collection<URI> imageDirs, Collection<URI> editsDirs) 157 throws IOException { 158 super(NodeType.NAME_NODE); 159 160 storageDirs = new CopyOnWriteArrayList<StorageDirectory>(); 161 162 // this may modify the editsDirs, so copy before passing in 163 setStorageDirectories(imageDirs, 164 Lists.newArrayList(editsDirs), 165 FSNamesystem.getSharedEditsDirs(conf)); 166 } 167 168 @Override // Storage 169 public boolean isPreUpgradableLayout(StorageDirectory sd) throws IOException { 170 if (disablePreUpgradableLayoutCheck) { 171 return false; 172 } 173 174 File oldImageDir = new File(sd.getRoot(), "image"); 175 if (!oldImageDir.exists()) { 176 return false; 177 } 178 // check the layout version inside the image file 179 File oldF = new File(oldImageDir, "fsimage"); 180 RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws"); 181 try { 182 oldFile.seek(0); 183 int oldVersion = oldFile.readInt(); 184 oldFile.close(); 185 oldFile = null; 186 if (oldVersion < LAST_PRE_UPGRADE_LAYOUT_VERSION) 187 return false; 188 } finally { 189 IOUtils.cleanup(LOG, oldFile); 190 } 191 return true; 192 } 193 194 @Override // Closeable 195 public void close() throws IOException { 196 unlockAll(); 197 storageDirs.clear(); 198 } 199 200 /** 201 * Set flag whether an attempt should be made to restore failed storage 202 * directories at the next available oppurtuinity. 203 * 204 * @param val Whether restoration attempt should be made. 205 */ 206 void setRestoreFailedStorage(boolean val) { 207 LOG.warn("set restore failed storage to " + val); 208 restoreFailedStorage=val; 209 } 210 211 /** 212 * @return Whether failed storage directories are to be restored. 213 */ 214 boolean getRestoreFailedStorage() { 215 return restoreFailedStorage; 216 } 217 218 /** 219 * See if any of removed storages is "writable" again, and can be returned 220 * into service. 221 */ 222 void attemptRestoreRemovedStorage() { 223 // if directory is "alive" - copy the images there... 224 if(!restoreFailedStorage || removedStorageDirs.size() == 0) 225 return; //nothing to restore 226 227 /* We don't want more than one thread trying to restore at a time */ 228 synchronized (this.restorationLock) { 229 LOG.info("NNStorage.attemptRestoreRemovedStorage: check removed(failed) "+ 230 "storarge. removedStorages size = " + removedStorageDirs.size()); 231 for(Iterator<StorageDirectory> it 232 = this.removedStorageDirs.iterator(); it.hasNext();) { 233 StorageDirectory sd = it.next(); 234 File root = sd.getRoot(); 235 LOG.info("currently disabled dir " + root.getAbsolutePath() + 236 "; type="+sd.getStorageDirType() 237 + ";canwrite="+FileUtil.canWrite(root)); 238 if(root.exists() && FileUtil.canWrite(root)) { 239 LOG.info("restoring dir " + sd.getRoot().getAbsolutePath()); 240 this.addStorageDir(sd); // restore 241 this.removedStorageDirs.remove(sd); 242 } 243 } 244 } 245 } 246 247 /** 248 * @return A list of storage directories which are in the errored state. 249 */ 250 List<StorageDirectory> getRemovedStorageDirs() { 251 return this.removedStorageDirs; 252 } 253 254 /** 255 * See {@link NNStorage#setStorageDirectories(Collection, Collection, Collection)} 256 */ 257 @VisibleForTesting 258 synchronized void setStorageDirectories(Collection<URI> fsNameDirs, 259 Collection<URI> fsEditsDirs) 260 throws IOException { 261 setStorageDirectories(fsNameDirs, fsEditsDirs, new ArrayList<URI>()); 262 } 263 264 /** 265 * Set the storage directories which will be used. This should only ever be 266 * called from inside NNStorage. However, it needs to remain package private 267 * for testing, as StorageDirectories need to be reinitialised after using 268 * Mockito.spy() on this class, as Mockito doesn't work well with inner 269 * classes, such as StorageDirectory in this case. 270 * 271 * Synchronized due to initialization of storageDirs and removedStorageDirs. 272 * 273 * @param fsNameDirs Locations to store images. 274 * @param fsEditsDirs Locations to store edit logs. 275 * @throws IOException 276 */ 277 @VisibleForTesting 278 synchronized void setStorageDirectories(Collection<URI> fsNameDirs, 279 Collection<URI> fsEditsDirs, 280 Collection<URI> sharedEditsDirs) 281 throws IOException { 282 this.storageDirs.clear(); 283 this.removedStorageDirs.clear(); 284 285 // Add all name dirs with appropriate NameNodeDirType 286 for (URI dirName : fsNameDirs) { 287 checkSchemeConsistency(dirName); 288 boolean isAlsoEdits = false; 289 for (URI editsDirName : fsEditsDirs) { 290 if (editsDirName.compareTo(dirName) == 0) { 291 isAlsoEdits = true; 292 fsEditsDirs.remove(editsDirName); 293 break; 294 } 295 } 296 NameNodeDirType dirType = (isAlsoEdits) ? 297 NameNodeDirType.IMAGE_AND_EDITS : 298 NameNodeDirType.IMAGE; 299 // Add to the list of storage directories, only if the 300 // URI is of type file:// 301 if(dirName.getScheme().compareTo("file") == 0) { 302 this.addStorageDir(new StorageDirectory(new File(dirName.getPath()), 303 dirType, 304 sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared. 305 } 306 } 307 308 // Add edits dirs if they are different from name dirs 309 for (URI dirName : fsEditsDirs) { 310 checkSchemeConsistency(dirName); 311 // Add to the list of storage directories, only if the 312 // URI is of type file:// 313 if(dirName.getScheme().compareTo("file") == 0) 314 this.addStorageDir(new StorageDirectory(new File(dirName.getPath()), 315 NameNodeDirType.EDITS, sharedEditsDirs.contains(dirName))); 316 } 317 } 318 319 /** 320 * Return the storage directory corresponding to the passed URI 321 * @param uri URI of a storage directory 322 * @return The matching storage directory or null if none found 323 */ 324 StorageDirectory getStorageDirectory(URI uri) { 325 try { 326 uri = Util.fileAsURI(new File(uri)); 327 Iterator<StorageDirectory> it = dirIterator(); 328 for (; it.hasNext(); ) { 329 StorageDirectory sd = it.next(); 330 if (Util.fileAsURI(sd.getRoot()).equals(uri)) { 331 return sd; 332 } 333 } 334 } catch (IOException ioe) { 335 LOG.warn("Error converting file to URI", ioe); 336 } 337 return null; 338 } 339 340 /** 341 * Checks the consistency of a URI, in particular if the scheme 342 * is specified 343 * @param u URI whose consistency is being checked. 344 */ 345 private static void checkSchemeConsistency(URI u) throws IOException { 346 String scheme = u.getScheme(); 347 // the URI should have a proper scheme 348 if(scheme == null) { 349 throw new IOException("Undefined scheme for " + u); 350 } 351 } 352 353 /** 354 * Retrieve current directories of type IMAGE 355 * @return Collection of URI representing image directories 356 * @throws IOException in case of URI processing error 357 */ 358 Collection<URI> getImageDirectories() throws IOException { 359 return getDirectories(NameNodeDirType.IMAGE); 360 } 361 362 /** 363 * Retrieve current directories of type EDITS 364 * @return Collection of URI representing edits directories 365 * @throws IOException in case of URI processing error 366 */ 367 Collection<URI> getEditsDirectories() throws IOException { 368 return getDirectories(NameNodeDirType.EDITS); 369 } 370 371 /** 372 * Return number of storage directories of the given type. 373 * @param dirType directory type 374 * @return number of storage directories of type dirType 375 */ 376 int getNumStorageDirs(NameNodeDirType dirType) { 377 if(dirType == null) 378 return getNumStorageDirs(); 379 Iterator<StorageDirectory> it = dirIterator(dirType); 380 int numDirs = 0; 381 for(; it.hasNext(); it.next()) 382 numDirs++; 383 return numDirs; 384 } 385 386 /** 387 * Return the list of locations being used for a specific purpose. 388 * i.e. Image or edit log storage. 389 * 390 * @param dirType Purpose of locations requested. 391 * @throws IOException 392 */ 393 Collection<URI> getDirectories(NameNodeDirType dirType) 394 throws IOException { 395 ArrayList<URI> list = new ArrayList<URI>(); 396 Iterator<StorageDirectory> it = (dirType == null) ? dirIterator() : 397 dirIterator(dirType); 398 for ( ;it.hasNext(); ) { 399 StorageDirectory sd = it.next(); 400 try { 401 list.add(Util.fileAsURI(sd.getRoot())); 402 } catch (IOException e) { 403 throw new IOException("Exception while processing " + 404 "StorageDirectory " + sd.getRoot(), e); 405 } 406 } 407 return list; 408 } 409 410 /** 411 * Determine the last transaction ID noted in this storage directory. 412 * This txid is stored in a special seen_txid file since it might not 413 * correspond to the latest image or edit log. For example, an image-only 414 * directory will have this txid incremented when edits logs roll, even 415 * though the edits logs are in a different directory. 416 * 417 * @param sd StorageDirectory to check 418 * @return If file exists and can be read, last recorded txid. If not, 0L. 419 * @throws IOException On errors processing file pointed to by sd 420 */ 421 static long readTransactionIdFile(StorageDirectory sd) throws IOException { 422 File txidFile = getStorageFile(sd, NameNodeFile.SEEN_TXID); 423 return PersistentLongFile.readFile(txidFile, 0); 424 } 425 426 /** 427 * Write last checkpoint time into a separate file. 428 * 429 * @param sd 430 * @throws IOException 431 */ 432 void writeTransactionIdFile(StorageDirectory sd, long txid) throws IOException { 433 Preconditions.checkArgument(txid >= 0, "bad txid: " + txid); 434 435 File txIdFile = getStorageFile(sd, NameNodeFile.SEEN_TXID); 436 PersistentLongFile.writeFile(txIdFile, txid); 437 } 438 439 /** 440 * Set the transaction ID and time of the last checkpoint 441 * 442 * @param txid transaction id of the last checkpoint 443 * @param time time of the last checkpoint, in millis since the epoch 444 */ 445 void setMostRecentCheckpointInfo(long txid, long time) { 446 this.mostRecentCheckpointTxId = txid; 447 this.mostRecentCheckpointTime = time; 448 } 449 450 /** 451 * @return the transaction ID of the last checkpoint. 452 */ 453 public long getMostRecentCheckpointTxId() { 454 return mostRecentCheckpointTxId; 455 } 456 457 /** 458 * @return the time of the most recent checkpoint in millis since the epoch. 459 */ 460 long getMostRecentCheckpointTime() { 461 return mostRecentCheckpointTime; 462 } 463 464 /** 465 * Write a small file in all available storage directories that 466 * indicates that the namespace has reached some given transaction ID. 467 * 468 * This is used when the image is loaded to avoid accidental rollbacks 469 * in the case where an edit log is fully deleted but there is no 470 * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure() 471 * @param txid the txid that has been reached 472 */ 473 public void writeTransactionIdFileToStorage(long txid) { 474 // Write txid marker in all storage directories 475 for (StorageDirectory sd : storageDirs) { 476 try { 477 writeTransactionIdFile(sd, txid); 478 } catch(IOException e) { 479 // Close any edits stream associated with this dir and remove directory 480 LOG.warn("writeTransactionIdToStorage failed on " + sd, 481 e); 482 reportErrorsOnDirectory(sd); 483 } 484 } 485 } 486 487 /** 488 * Return the name of the image file that is uploaded by periodic 489 * checkpointing 490 * 491 * @return List of filenames to save checkpoints to. 492 */ 493 public File[] getFsImageNameCheckpoint(long txid) { 494 ArrayList<File> list = new ArrayList<File>(); 495 for (Iterator<StorageDirectory> it = 496 dirIterator(NameNodeDirType.IMAGE); it.hasNext();) { 497 list.add(getStorageFile(it.next(), NameNodeFile.IMAGE_NEW, txid)); 498 } 499 return list.toArray(new File[list.size()]); 500 } 501 502 /** 503 * @return The first image file with the given txid and image type. 504 */ 505 public File getFsImageName(long txid, NameNodeFile nnf) { 506 for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE); 507 it.hasNext();) { 508 StorageDirectory sd = it.next(); 509 File fsImage = getStorageFile(sd, nnf, txid); 510 if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) { 511 return fsImage; 512 } 513 } 514 return null; 515 } 516 517 /** 518 * @return The first image file whose txid is the same with the given txid and 519 * image type is one of the given types. 520 */ 521 public File getFsImage(long txid, EnumSet<NameNodeFile> nnfs) { 522 for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE); 523 it.hasNext();) { 524 StorageDirectory sd = it.next(); 525 for (NameNodeFile nnf : nnfs) { 526 File fsImage = getStorageFile(sd, nnf, txid); 527 if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) { 528 return fsImage; 529 } 530 } 531 } 532 return null; 533 } 534 535 public File getFsImageName(long txid) { 536 return getFsImageName(txid, NameNodeFile.IMAGE); 537 } 538 539 public File getHighestFsImageName() { 540 return getFsImageName(getMostRecentCheckpointTxId()); 541 } 542 543 /** Create new dfs name directory. Caution: this destroys all files 544 * in this filesystem. */ 545 private void format(StorageDirectory sd) throws IOException { 546 sd.clearDirectory(); // create currrent dir 547 writeProperties(sd); 548 writeTransactionIdFile(sd, 0); 549 550 LOG.info("Storage directory " + sd.getRoot() 551 + " has been successfully formatted."); 552 } 553 554 /** 555 * Format all available storage directories. 556 */ 557 public void format(NamespaceInfo nsInfo) throws IOException { 558 Preconditions.checkArgument(nsInfo.getLayoutVersion() == 0 || 559 nsInfo.getLayoutVersion() == HdfsConstants.NAMENODE_LAYOUT_VERSION, 560 "Bad layout version: %s", nsInfo.getLayoutVersion()); 561 562 this.setStorageInfo(nsInfo); 563 this.blockpoolID = nsInfo.getBlockPoolID(); 564 for (Iterator<StorageDirectory> it = 565 dirIterator(); it.hasNext();) { 566 StorageDirectory sd = it.next(); 567 format(sd); 568 } 569 } 570 571 public static NamespaceInfo newNamespaceInfo() 572 throws UnknownHostException { 573 return new NamespaceInfo(newNamespaceID(), newClusterID(), 574 newBlockPoolID(), 0L); 575 } 576 577 public void format() throws IOException { 578 this.layoutVersion = HdfsConstants.NAMENODE_LAYOUT_VERSION; 579 for (Iterator<StorageDirectory> it = 580 dirIterator(); it.hasNext();) { 581 StorageDirectory sd = it.next(); 582 format(sd); 583 } 584 } 585 586 /** 587 * Generate new namespaceID. 588 * 589 * namespaceID is a persistent attribute of the namespace. 590 * It is generated when the namenode is formatted and remains the same 591 * during the life cycle of the namenode. 592 * When a datanodes register they receive it as the registrationID, 593 * which is checked every time the datanode is communicating with the 594 * namenode. Datanodes that do not 'know' the namespaceID are rejected. 595 * 596 * @return new namespaceID 597 */ 598 private static int newNamespaceID() { 599 int newID = 0; 600 while(newID == 0) 601 newID = DFSUtil.getRandom().nextInt(0x7FFFFFFF); // use 31 bits only 602 return newID; 603 } 604 605 @Override // Storage 606 protected void setFieldsFromProperties( 607 Properties props, StorageDirectory sd) throws IOException { 608 super.setFieldsFromProperties(props, sd); 609 if (layoutVersion == 0) { 610 throw new IOException("NameNode directory " 611 + sd.getRoot() + " is not formatted."); 612 } 613 614 // Set Block pool ID in version with federation support 615 if (NameNodeLayoutVersion.supports( 616 LayoutVersion.Feature.FEDERATION, getLayoutVersion())) { 617 String sbpid = props.getProperty("blockpoolID"); 618 setBlockPoolID(sd.getRoot(), sbpid); 619 } 620 setDeprecatedPropertiesForUpgrade(props); 621 } 622 623 /** 624 * Pull any properties out of the VERSION file that are from older 625 * versions of HDFS and only necessary during upgrade. 626 */ 627 private void setDeprecatedPropertiesForUpgrade(Properties props) { 628 deprecatedProperties = new HashMap<String, String>(); 629 String md5 = props.getProperty(DEPRECATED_MESSAGE_DIGEST_PROPERTY); 630 if (md5 != null) { 631 deprecatedProperties.put(DEPRECATED_MESSAGE_DIGEST_PROPERTY, md5); 632 } 633 } 634 635 /** 636 * Return a property that was stored in an earlier version of HDFS. 637 * 638 * This should only be used during upgrades. 639 */ 640 String getDeprecatedProperty(String prop) { 641 assert getLayoutVersion() > HdfsConstants.NAMENODE_LAYOUT_VERSION : 642 "getDeprecatedProperty should only be done when loading " + 643 "storage from past versions during upgrade."; 644 return deprecatedProperties.get(prop); 645 } 646 647 /** 648 * Write version file into the storage directory. 649 * 650 * The version file should always be written last. 651 * Missing or corrupted version file indicates that 652 * the checkpoint is not valid. 653 * 654 * @param sd storage directory 655 * @throws IOException 656 */ 657 @Override // Storage 658 protected void setPropertiesFromFields(Properties props, 659 StorageDirectory sd 660 ) throws IOException { 661 super.setPropertiesFromFields(props, sd); 662 // Set blockpoolID in version with federation support 663 if (NameNodeLayoutVersion.supports( 664 LayoutVersion.Feature.FEDERATION, getLayoutVersion())) { 665 props.setProperty("blockpoolID", blockpoolID); 666 } 667 } 668 669 static File getStorageFile(StorageDirectory sd, NameNodeFile type, long imageTxId) { 670 return new File(sd.getCurrentDir(), 671 String.format("%s_%019d", type.getName(), imageTxId)); 672 } 673 674 /** 675 * Get a storage file for one of the files that doesn't need a txid associated 676 * (e.g version, seen_txid) 677 */ 678 static File getStorageFile(StorageDirectory sd, NameNodeFile type) { 679 return new File(sd.getCurrentDir(), type.getName()); 680 } 681 682 @VisibleForTesting 683 public static String getCheckpointImageFileName(long txid) { 684 return getNameNodeFileName(NameNodeFile.IMAGE_NEW, txid); 685 } 686 687 @VisibleForTesting 688 public static String getImageFileName(long txid) { 689 return getNameNodeFileName(NameNodeFile.IMAGE, txid); 690 } 691 692 @VisibleForTesting 693 public static String getRollbackImageFileName(long txid) { 694 return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid); 695 } 696 697 private static String getNameNodeFileName(NameNodeFile nnf, long txid) { 698 return String.format("%s_%019d", nnf.getName(), txid); 699 } 700 701 @VisibleForTesting 702 public static String getInProgressEditsFileName(long startTxId) { 703 return getNameNodeFileName(NameNodeFile.EDITS_INPROGRESS, startTxId); 704 } 705 706 static File getInProgressEditsFile(StorageDirectory sd, long startTxId) { 707 return new File(sd.getCurrentDir(), getInProgressEditsFileName(startTxId)); 708 } 709 710 static File getFinalizedEditsFile(StorageDirectory sd, 711 long startTxId, long endTxId) { 712 return new File(sd.getCurrentDir(), 713 getFinalizedEditsFileName(startTxId, endTxId)); 714 } 715 716 static File getTemporaryEditsFile(StorageDirectory sd, 717 long startTxId, long endTxId, long timestamp) { 718 return new File(sd.getCurrentDir(), 719 getTemporaryEditsFileName(startTxId, endTxId, timestamp)); 720 } 721 722 static File getImageFile(StorageDirectory sd, NameNodeFile nnf, long txid) { 723 return new File(sd.getCurrentDir(), getNameNodeFileName(nnf, txid)); 724 } 725 726 @VisibleForTesting 727 public static String getFinalizedEditsFileName(long startTxId, long endTxId) { 728 return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(), 729 startTxId, endTxId); 730 } 731 732 public static String getTemporaryEditsFileName(long startTxId, long endTxId, 733 long timestamp) { 734 return String.format("%s_%019d-%019d_%019d", NameNodeFile.EDITS_TMP.getName(), 735 startTxId, endTxId, timestamp); 736 } 737 738 /** 739 * Return the first readable finalized edits file for the given txid. 740 */ 741 File findFinalizedEditsFile(long startTxId, long endTxId) 742 throws IOException { 743 File ret = findFile(NameNodeDirType.EDITS, 744 getFinalizedEditsFileName(startTxId, endTxId)); 745 if (ret == null) { 746 throw new IOException( 747 "No edits file for txid " + startTxId + "-" + endTxId + " exists!"); 748 } 749 return ret; 750 } 751 752 /** 753 * Return the first readable image file for the given txid and image type, or 754 * null if no such image can be found 755 */ 756 File findImageFile(NameNodeFile nnf, long txid) { 757 return findFile(NameNodeDirType.IMAGE, 758 getNameNodeFileName(nnf, txid)); 759 } 760 761 /** 762 * Return the first readable storage file of the given name 763 * across any of the 'current' directories in SDs of the 764 * given type, or null if no such file exists. 765 */ 766 private File findFile(NameNodeDirType dirType, String name) { 767 for (StorageDirectory sd : dirIterable(dirType)) { 768 File candidate = new File(sd.getCurrentDir(), name); 769 if (FileUtil.canRead(sd.getCurrentDir()) && 770 candidate.exists()) { 771 return candidate; 772 } 773 } 774 return null; 775 } 776 777 /** 778 * Disable the check for pre-upgradable layouts. Needed for BackupImage. 779 * @param val Whether to disable the preupgradeable layout check. 780 */ 781 void setDisablePreUpgradableLayoutCheck(boolean val) { 782 disablePreUpgradableLayoutCheck = val; 783 } 784 785 /** 786 * Marks a list of directories as having experienced an error. 787 * 788 * @param sds A list of storage directories to mark as errored. 789 * @throws IOException 790 */ 791 void reportErrorsOnDirectories(List<StorageDirectory> sds) { 792 for (StorageDirectory sd : sds) { 793 reportErrorsOnDirectory(sd); 794 } 795 } 796 797 /** 798 * Reports that a directory has experienced an error. 799 * Notifies listeners that the directory is no longer 800 * available. 801 * 802 * @param sd A storage directory to mark as errored. 803 * @throws IOException 804 */ 805 private void reportErrorsOnDirectory(StorageDirectory sd) { 806 LOG.error("Error reported on storage directory " + sd); 807 808 String lsd = listStorageDirectories(); 809 LOG.debug("current list of storage dirs:" + lsd); 810 811 LOG.warn("About to remove corresponding storage: " 812 + sd.getRoot().getAbsolutePath()); 813 try { 814 sd.unlock(); 815 } catch (Exception e) { 816 LOG.warn("Unable to unlock bad storage directory: " 817 + sd.getRoot().getPath(), e); 818 } 819 820 if (this.storageDirs.remove(sd)) { 821 this.removedStorageDirs.add(sd); 822 } 823 824 lsd = listStorageDirectories(); 825 LOG.debug("at the end current list of storage dirs:" + lsd); 826 } 827 828 /** 829 * Processes the startup options for the clusterid and blockpoolid 830 * for the upgrade. 831 * @param startOpt Startup options 832 * @param layoutVersion Layout version for the upgrade 833 * @throws IOException 834 */ 835 void processStartupOptionsForUpgrade(StartupOption startOpt, int layoutVersion) 836 throws IOException { 837 if (startOpt == StartupOption.UPGRADE) { 838 // If upgrade from a release that does not support federation, 839 // if clusterId is provided in the startupOptions use it. 840 // Else generate a new cluster ID 841 if (!NameNodeLayoutVersion.supports( 842 LayoutVersion.Feature.FEDERATION, layoutVersion)) { 843 if (startOpt.getClusterId() == null) { 844 startOpt.setClusterId(newClusterID()); 845 } 846 setClusterID(startOpt.getClusterId()); 847 setBlockPoolID(newBlockPoolID()); 848 } else { 849 // Upgrade from one version of federation to another supported 850 // version of federation doesn't require clusterID. 851 // Warn the user if the current clusterid didn't match with the input 852 // clusterid. 853 if (startOpt.getClusterId() != null 854 && !startOpt.getClusterId().equals(getClusterID())) { 855 LOG.warn("Clusterid mismatch - current clusterid: " + getClusterID() 856 + ", Ignoring given clusterid: " + startOpt.getClusterId()); 857 } 858 } 859 LOG.info("Using clusterid: " + getClusterID()); 860 } 861 } 862 863 /** 864 * Report that an IOE has occurred on some file which may 865 * or may not be within one of the NN image storage directories. 866 */ 867 @Override 868 public void reportErrorOnFile(File f) { 869 // We use getAbsolutePath here instead of getCanonicalPath since we know 870 // that there is some IO problem on that drive. 871 // getCanonicalPath may need to call stat() or readlink() and it's likely 872 // those calls would fail due to the same underlying IO problem. 873 String absPath = f.getAbsolutePath(); 874 for (StorageDirectory sd : storageDirs) { 875 String dirPath = sd.getRoot().getAbsolutePath(); 876 if (!dirPath.endsWith(File.separator)) { 877 dirPath += File.separator; 878 } 879 if (absPath.startsWith(dirPath)) { 880 reportErrorsOnDirectory(sd); 881 return; 882 } 883 } 884 885 } 886 887 /** 888 * Generate new clusterID. 889 * 890 * clusterID is a persistent attribute of the cluster. 891 * It is generated when the cluster is created and remains the same 892 * during the life cycle of the cluster. When a new name node is formated, if 893 * this is a new cluster, a new clusterID is geneated and stored. Subsequent 894 * name node must be given the same ClusterID during its format to be in the 895 * same cluster. 896 * When a datanode register it receive the clusterID and stick with it. 897 * If at any point, name node or data node tries to join another cluster, it 898 * will be rejected. 899 * 900 * @return new clusterID 901 */ 902 public static String newClusterID() { 903 return "CID-" + UUID.randomUUID().toString(); 904 } 905 906 void setClusterID(String cid) { 907 clusterID = cid; 908 } 909 910 /** 911 * try to find current cluster id in the VERSION files 912 * returns first cluster id found in any VERSION file 913 * null in case none found 914 * @return clusterId or null in case no cluster id found 915 */ 916 public String determineClusterId() { 917 String cid = null; 918 Iterator<StorageDirectory> sdit = dirIterator(NameNodeDirType.IMAGE); 919 while(sdit.hasNext()) { 920 StorageDirectory sd = sdit.next(); 921 try { 922 Properties props = readPropertiesFile(sd.getVersionFile()); 923 cid = props.getProperty("clusterID"); 924 LOG.info("current cluster id for sd="+sd.getCurrentDir() + 925 ";lv=" + layoutVersion + ";cid=" + cid); 926 927 if(cid != null && !cid.equals("")) 928 return cid; 929 } catch (Exception e) { 930 LOG.warn("this sd not available: " + e.getLocalizedMessage()); 931 } //ignore 932 } 933 LOG.warn("couldn't find any VERSION file containing valid ClusterId"); 934 return null; 935 } 936 937 /** 938 * Generate new blockpoolID. 939 * 940 * @return new blockpoolID 941 */ 942 static String newBlockPoolID() throws UnknownHostException{ 943 String ip = "unknownIP"; 944 try { 945 ip = DNS.getDefaultIP("default"); 946 } catch (UnknownHostException e) { 947 LOG.warn("Could not find ip address of \"default\" inteface."); 948 throw e; 949 } 950 951 int rand = DFSUtil.getSecureRandom().nextInt(Integer.MAX_VALUE); 952 String bpid = "BP-" + rand + "-"+ ip + "-" + Time.now(); 953 return bpid; 954 } 955 956 /** Validate and set block pool ID */ 957 void setBlockPoolID(String bpid) { 958 blockpoolID = bpid; 959 } 960 961 /** Validate and set block pool ID */ 962 private void setBlockPoolID(File storage, String bpid) 963 throws InconsistentFSStateException { 964 if (bpid == null || bpid.equals("")) { 965 throw new InconsistentFSStateException(storage, "file " 966 + Storage.STORAGE_FILE_VERSION + " has no block pool Id."); 967 } 968 969 if (!blockpoolID.equals("") && !blockpoolID.equals(bpid)) { 970 throw new InconsistentFSStateException(storage, 971 "Unexepcted blockpoolID " + bpid + " . Expected " + blockpoolID); 972 } 973 setBlockPoolID(bpid); 974 } 975 976 public String getBlockPoolID() { 977 return blockpoolID; 978 } 979 980 /** 981 * Iterate over all current storage directories, inspecting them 982 * with the given inspector. 983 */ 984 void inspectStorageDirs(FSImageStorageInspector inspector) 985 throws IOException { 986 987 // Process each of the storage directories to find the pair of 988 // newest image file and edit file 989 for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) { 990 StorageDirectory sd = it.next(); 991 inspector.inspectDirectory(sd); 992 } 993 } 994 995 /** 996 * Iterate over all of the storage dirs, reading their contents to determine 997 * their layout versions. Returns an FSImageStorageInspector which has 998 * inspected each directory. 999 * 1000 * <b>Note:</b> this can mutate the storage info fields (ctime, version, etc). 1001 * @throws IOException if no valid storage dirs are found or no valid layout version 1002 */ 1003 FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes) 1004 throws IOException { 1005 Integer layoutVersion = null; 1006 boolean multipleLV = false; 1007 StringBuilder layoutVersions = new StringBuilder(); 1008 1009 // First determine what range of layout versions we're going to inspect 1010 for (Iterator<StorageDirectory> it = dirIterator(false); 1011 it.hasNext();) { 1012 StorageDirectory sd = it.next(); 1013 if (!sd.getVersionFile().exists()) { 1014 FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping..."); 1015 continue; 1016 } 1017 readProperties(sd); // sets layoutVersion 1018 int lv = getLayoutVersion(); 1019 if (layoutVersion == null) { 1020 layoutVersion = Integer.valueOf(lv); 1021 } else if (!layoutVersion.equals(lv)) { 1022 multipleLV = true; 1023 } 1024 layoutVersions.append("(").append(sd.getRoot()).append(", ").append(lv).append(") "); 1025 } 1026 1027 if (layoutVersion == null) { 1028 throw new IOException("No storage directories contained VERSION information"); 1029 } 1030 if (multipleLV) { 1031 throw new IOException( 1032 "Storage directories contain multiple layout versions: " 1033 + layoutVersions); 1034 } 1035 // If the storage directories are with the new layout version 1036 // (ie edits_<txnid>) then use the new inspector, which will ignore 1037 // the old format dirs. 1038 FSImageStorageInspector inspector; 1039 if (NameNodeLayoutVersion.supports( 1040 LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) { 1041 inspector = new FSImageTransactionalStorageInspector(fileTypes); 1042 } else { 1043 inspector = new FSImagePreTransactionalStorageInspector(); 1044 } 1045 1046 inspectStorageDirs(inspector); 1047 return inspector; 1048 } 1049 1050 public NamespaceInfo getNamespaceInfo() { 1051 return new NamespaceInfo( 1052 getNamespaceID(), 1053 getClusterID(), 1054 getBlockPoolID(), 1055 getCTime()); 1056 } 1057}