001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import java.io.Closeable;
021import java.io.File;
022import java.io.IOException;
023import java.io.RandomAccessFile;
024import java.net.URI;
025import java.net.UnknownHostException;
026import java.util.ArrayList;
027import java.util.Collection;
028import java.util.EnumSet;
029import java.util.HashMap;
030import java.util.Iterator;
031import java.util.List;
032import java.util.Properties;
033import java.util.UUID;
034import java.util.concurrent.CopyOnWriteArrayList;
035
036import org.apache.hadoop.classification.InterfaceAudience;
037import org.apache.hadoop.conf.Configuration;
038import org.apache.hadoop.fs.FileUtil;
039import org.apache.hadoop.hdfs.DFSUtil;
040import org.apache.hadoop.hdfs.protocol.HdfsConstants;
041import org.apache.hadoop.hdfs.protocol.LayoutVersion;
042import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
043import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
044import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
045import org.apache.hadoop.hdfs.server.common.Storage;
046import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
047import org.apache.hadoop.hdfs.server.common.Util;
048import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
049import org.apache.hadoop.hdfs.util.PersistentLongFile;
050import org.apache.hadoop.io.IOUtils;
051import org.apache.hadoop.net.DNS;
052import org.apache.hadoop.util.Time;
053
054import com.google.common.annotations.VisibleForTesting;
055import com.google.common.base.Preconditions;
056import com.google.common.collect.Lists;
057
058/**
059 * NNStorage is responsible for management of the StorageDirectories used by
060 * the NameNode.
061 */
062@InterfaceAudience.Private
063public class NNStorage extends Storage implements Closeable,
064    StorageErrorReporter {
065  static final String DEPRECATED_MESSAGE_DIGEST_PROPERTY = "imageMD5Digest";
066  static final String LOCAL_URI_SCHEME = "file";
067
068  //
069  // The filenames used for storing the images
070  //
071  public enum NameNodeFile {
072    IMAGE     ("fsimage"),
073    TIME      ("fstime"), // from "old" pre-HDFS-1073 format
074    SEEN_TXID ("seen_txid"),
075    EDITS     ("edits"),
076    IMAGE_NEW ("fsimage.ckpt"),
077    IMAGE_ROLLBACK("fsimage_rollback"),
078    EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format
079    EDITS_INPROGRESS ("edits_inprogress"),
080    EDITS_TMP ("edits_tmp"),
081    IMAGE_LEGACY_OIV ("fsimage_legacy_oiv");  // For pre-PB format
082
083    private String fileName = null;
084    private NameNodeFile(String name) { this.fileName = name; }
085    @VisibleForTesting
086    public String getName() { return fileName; }
087  }
088
089  /**
090   * Implementation of StorageDirType specific to namenode storage
091   * A Storage directory could be of type IMAGE which stores only fsimage,
092   * or of type EDITS which stores edits or of type IMAGE_AND_EDITS which
093   * stores both fsimage and edits.
094   */
095  @VisibleForTesting
096  public static enum NameNodeDirType implements StorageDirType {
097    UNDEFINED,
098    IMAGE,
099    EDITS,
100    IMAGE_AND_EDITS;
101
102    @Override
103    public StorageDirType getStorageDirType() {
104      return this;
105    }
106
107    @Override
108    public boolean isOfType(StorageDirType type) {
109      if ((this == IMAGE_AND_EDITS) && (type == IMAGE || type == EDITS))
110        return true;
111      return this == type;
112    }
113  }
114
115  protected String blockpoolID = ""; // id of the block pool
116  
117  /**
118   * flag that controls if we try to restore failed storages
119   */
120  private boolean restoreFailedStorage = false;
121  private final Object restorationLock = new Object();
122  private boolean disablePreUpgradableLayoutCheck = false;
123
124
125  /**
126   * TxId of the last transaction that was included in the most
127   * recent fsimage file. This does not include any transactions
128   * that have since been written to the edit log.
129   */
130  protected volatile long mostRecentCheckpointTxId = HdfsConstants.INVALID_TXID;
131  
132  /**
133   * Time of the last checkpoint, in milliseconds since the epoch.
134   */
135  private long mostRecentCheckpointTime = 0;
136
137  /**
138   * list of failed (and thus removed) storages
139   */
140  final protected List<StorageDirectory> removedStorageDirs
141    = new CopyOnWriteArrayList<StorageDirectory>();
142
143  /**
144   * Properties from old layout versions that may be needed
145   * during upgrade only.
146   */
147  private HashMap<String, String> deprecatedProperties;
148
149  /**
150   * Construct the NNStorage.
151   * @param conf Namenode configuration.
152   * @param imageDirs Directories the image can be stored in.
153   * @param editsDirs Directories the editlog can be stored in.
154   * @throws IOException if any directories are inaccessible.
155   */
156  public NNStorage(Configuration conf, 
157                   Collection<URI> imageDirs, Collection<URI> editsDirs) 
158      throws IOException {
159    super(NodeType.NAME_NODE);
160
161    storageDirs = new CopyOnWriteArrayList<StorageDirectory>();
162    
163    // this may modify the editsDirs, so copy before passing in
164    setStorageDirectories(imageDirs, 
165                          Lists.newArrayList(editsDirs),
166                          FSNamesystem.getSharedEditsDirs(conf));
167  }
168
169  @Override // Storage
170  public boolean isPreUpgradableLayout(StorageDirectory sd) throws IOException {
171    if (disablePreUpgradableLayoutCheck) {
172      return false;
173    }
174
175    File oldImageDir = new File(sd.getRoot(), "image");
176    if (!oldImageDir.exists()) {
177      return false;
178    }
179    // check the layout version inside the image file
180    File oldF = new File(oldImageDir, "fsimage");
181    RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws");
182    try {
183      oldFile.seek(0);
184      int oldVersion = oldFile.readInt();
185      oldFile.close();
186      oldFile = null;
187      if (oldVersion < LAST_PRE_UPGRADE_LAYOUT_VERSION)
188        return false;
189    } finally {
190      IOUtils.cleanup(LOG, oldFile);
191    }
192    return true;
193  }
194
195  @Override // Closeable
196  public void close() throws IOException {
197    unlockAll();
198    storageDirs.clear();
199  }
200
201  /**
202   * Set flag whether an attempt should be made to restore failed storage
203   * directories at the next available oppurtuinity.
204   *
205   * @param val Whether restoration attempt should be made.
206   */
207  void setRestoreFailedStorage(boolean val) {
208    LOG.warn("set restore failed storage to " + val);
209    restoreFailedStorage=val;
210  }
211
212  /**
213   * @return Whether failed storage directories are to be restored.
214   */
215  boolean getRestoreFailedStorage() {
216    return restoreFailedStorage;
217  }
218
219  /**
220   * See if any of removed storages is "writable" again, and can be returned
221   * into service.
222   */
223  void attemptRestoreRemovedStorage() {
224    // if directory is "alive" - copy the images there...
225    if(!restoreFailedStorage || removedStorageDirs.size() == 0)
226      return; //nothing to restore
227
228    /* We don't want more than one thread trying to restore at a time */
229    synchronized (this.restorationLock) {
230      LOG.info("NNStorage.attemptRestoreRemovedStorage: check removed(failed) "+
231               "storarge. removedStorages size = " + removedStorageDirs.size());
232      for(Iterator<StorageDirectory> it
233            = this.removedStorageDirs.iterator(); it.hasNext();) {
234        StorageDirectory sd = it.next();
235        File root = sd.getRoot();
236        LOG.info("currently disabled dir " + root.getAbsolutePath() +
237                 "; type="+sd.getStorageDirType() 
238                 + ";canwrite="+FileUtil.canWrite(root));
239        if(root.exists() && FileUtil.canWrite(root)) {
240          LOG.info("restoring dir " + sd.getRoot().getAbsolutePath());
241          this.addStorageDir(sd); // restore
242          this.removedStorageDirs.remove(sd);
243        }
244      }
245    }
246  }
247
248  /**
249   * @return A list of storage directories which are in the errored state.
250   */
251  List<StorageDirectory> getRemovedStorageDirs() {
252    return this.removedStorageDirs;
253  }
254  
255  /**
256   * See {@link NNStorage#setStorageDirectories(Collection, Collection, Collection)}
257   */
258  @VisibleForTesting
259  synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
260                                          Collection<URI> fsEditsDirs)
261      throws IOException {
262    setStorageDirectories(fsNameDirs, fsEditsDirs, new ArrayList<URI>());
263  }
264
265  /**
266   * Set the storage directories which will be used. This should only ever be
267   * called from inside NNStorage. However, it needs to remain package private
268   * for testing, as StorageDirectories need to be reinitialised after using
269   * Mockito.spy() on this class, as Mockito doesn't work well with inner
270   * classes, such as StorageDirectory in this case.
271   *
272   * Synchronized due to initialization of storageDirs and removedStorageDirs.
273   *
274   * @param fsNameDirs Locations to store images.
275   * @param fsEditsDirs Locations to store edit logs.
276   * @throws IOException
277   */
278  @VisibleForTesting
279  synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
280                                          Collection<URI> fsEditsDirs,
281                                          Collection<URI> sharedEditsDirs)
282      throws IOException {
283    this.storageDirs.clear();
284    this.removedStorageDirs.clear();
285
286   // Add all name dirs with appropriate NameNodeDirType
287    for (URI dirName : fsNameDirs) {
288      checkSchemeConsistency(dirName);
289      boolean isAlsoEdits = false;
290      for (URI editsDirName : fsEditsDirs) {
291        if (editsDirName.compareTo(dirName) == 0) {
292          isAlsoEdits = true;
293          fsEditsDirs.remove(editsDirName);
294          break;
295        }
296      }
297      NameNodeDirType dirType = (isAlsoEdits) ?
298                          NameNodeDirType.IMAGE_AND_EDITS :
299                          NameNodeDirType.IMAGE;
300      // Add to the list of storage directories, only if the
301      // URI is of type file://
302      if(dirName.getScheme().compareTo("file") == 0) {
303        this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
304            dirType,
305            sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared.
306      }
307    }
308
309    // Add edits dirs if they are different from name dirs
310    for (URI dirName : fsEditsDirs) {
311      checkSchemeConsistency(dirName);
312      // Add to the list of storage directories, only if the
313      // URI is of type file://
314      if(dirName.getScheme().compareTo("file") == 0)
315        this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
316                    NameNodeDirType.EDITS, sharedEditsDirs.contains(dirName)));
317    }
318  }
319
320  /**
321   * Return the storage directory corresponding to the passed URI
322   * @param uri URI of a storage directory
323   * @return The matching storage directory or null if none found
324   */
325  StorageDirectory getStorageDirectory(URI uri) {
326    try {
327      uri = Util.fileAsURI(new File(uri));
328      Iterator<StorageDirectory> it = dirIterator();
329      for (; it.hasNext(); ) {
330        StorageDirectory sd = it.next();
331        if (Util.fileAsURI(sd.getRoot()).equals(uri)) {
332          return sd;
333        }
334      }
335    } catch (IOException ioe) {
336      LOG.warn("Error converting file to URI", ioe);
337    }
338    return null;
339  }
340
341  /**
342   * Checks the consistency of a URI, in particular if the scheme
343   * is specified 
344   * @param u URI whose consistency is being checked.
345   */
346  private static void checkSchemeConsistency(URI u) throws IOException {
347    String scheme = u.getScheme();
348    // the URI should have a proper scheme
349    if(scheme == null) {
350      throw new IOException("Undefined scheme for " + u);
351    }
352  }
353
354  /**
355   * Retrieve current directories of type IMAGE
356   * @return Collection of URI representing image directories
357   * @throws IOException in case of URI processing error
358   */
359  Collection<URI> getImageDirectories() throws IOException {
360    return getDirectories(NameNodeDirType.IMAGE);
361  }
362
363  /**
364   * Retrieve current directories of type EDITS
365   * @return Collection of URI representing edits directories
366   * @throws IOException in case of URI processing error
367   */
368  Collection<URI> getEditsDirectories() throws IOException {
369    return getDirectories(NameNodeDirType.EDITS);
370  }
371
372  /**
373   * Return number of storage directories of the given type.
374   * @param dirType directory type
375   * @return number of storage directories of type dirType
376   */
377  int getNumStorageDirs(NameNodeDirType dirType) {
378    if(dirType == null)
379      return getNumStorageDirs();
380    Iterator<StorageDirectory> it = dirIterator(dirType);
381    int numDirs = 0;
382    for(; it.hasNext(); it.next())
383      numDirs++;
384    return numDirs;
385  }
386
387  /**
388   * Return the list of locations being used for a specific purpose.
389   * i.e. Image or edit log storage.
390   *
391   * @param dirType Purpose of locations requested.
392   * @throws IOException
393   */
394  Collection<URI> getDirectories(NameNodeDirType dirType)
395      throws IOException {
396    ArrayList<URI> list = new ArrayList<URI>();
397    Iterator<StorageDirectory> it = (dirType == null) ? dirIterator() :
398                                    dirIterator(dirType);
399    for ( ;it.hasNext(); ) {
400      StorageDirectory sd = it.next();
401      try {
402        list.add(Util.fileAsURI(sd.getRoot()));
403      } catch (IOException e) {
404        throw new IOException("Exception while processing " +
405            "StorageDirectory " + sd.getRoot(), e);
406      }
407    }
408    return list;
409  }
410  
411  /**
412   * Determine the last transaction ID noted in this storage directory.
413   * This txid is stored in a special seen_txid file since it might not
414   * correspond to the latest image or edit log. For example, an image-only
415   * directory will have this txid incremented when edits logs roll, even
416   * though the edits logs are in a different directory.
417   *
418   * @param sd StorageDirectory to check
419   * @return If file exists and can be read, last recorded txid. If not, 0L.
420   * @throws IOException On errors processing file pointed to by sd
421   */
422  static long readTransactionIdFile(StorageDirectory sd) throws IOException {
423    File txidFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
424    return PersistentLongFile.readFile(txidFile, 0);
425  }
426  
427  /**
428   * Write last checkpoint time into a separate file.
429   * @param sd storage directory
430   * @throws IOException
431   */
432  void writeTransactionIdFile(StorageDirectory sd, long txid) throws IOException {
433    Preconditions.checkArgument(txid >= 0, "bad txid: " + txid);
434    
435    File txIdFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
436    PersistentLongFile.writeFile(txIdFile, txid);
437  }
438
439  /**
440   * Set the transaction ID and time of the last checkpoint
441   * 
442   * @param txid transaction id of the last checkpoint
443   * @param time time of the last checkpoint, in millis since the epoch
444   */
445  void setMostRecentCheckpointInfo(long txid, long time) {
446    this.mostRecentCheckpointTxId = txid;
447    this.mostRecentCheckpointTime = time;
448  }
449
450  /**
451   * @return the transaction ID of the last checkpoint.
452   */
453  public long getMostRecentCheckpointTxId() {
454    return mostRecentCheckpointTxId;
455  }
456  
457  /**
458   * @return the time of the most recent checkpoint in millis since the epoch.
459   */
460  long getMostRecentCheckpointTime() {
461    return mostRecentCheckpointTime;
462  }
463
464  /**
465   * Write a small file in all available storage directories that
466   * indicates that the namespace has reached some given transaction ID.
467   * 
468   * This is used when the image is loaded to avoid accidental rollbacks
469   * in the case where an edit log is fully deleted but there is no
470   * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure()
471   * @param txid the txid that has been reached
472   */
473  public void writeTransactionIdFileToStorage(long txid) {
474    // Write txid marker in all storage directories
475    for (StorageDirectory sd : storageDirs) {
476      try {
477        writeTransactionIdFile(sd, txid);
478      } catch(IOException e) {
479        // Close any edits stream associated with this dir and remove directory
480        LOG.warn("writeTransactionIdToStorage failed on " + sd,
481            e);
482        reportErrorsOnDirectory(sd);
483      }
484    }
485  }
486
487  /**
488   * Return the name of the image file that is uploaded by periodic
489   * checkpointing
490   *
491   * @return List of filenames to save checkpoints to.
492   */
493  public File[] getFsImageNameCheckpoint(long txid) {
494    ArrayList<File> list = new ArrayList<File>();
495    for (Iterator<StorageDirectory> it =
496                 dirIterator(NameNodeDirType.IMAGE); it.hasNext();) {
497      list.add(getStorageFile(it.next(), NameNodeFile.IMAGE_NEW, txid));
498    }
499    return list.toArray(new File[list.size()]);
500  }
501
502  /**
503   * @return The first image file with the given txid and image type.
504   */
505  public File getFsImageName(long txid, NameNodeFile nnf) {
506    for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
507        it.hasNext();) {
508      StorageDirectory sd = it.next();
509      File fsImage = getStorageFile(sd, nnf, txid);
510      if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
511        return fsImage;
512      }
513    }
514    return null;
515  }
516
517  /**
518   * @return The first image file whose txid is the same with the given txid and
519   * image type is one of the given types.
520   */
521  public File getFsImage(long txid, EnumSet<NameNodeFile> nnfs) {
522    for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
523        it.hasNext();) {
524      StorageDirectory sd = it.next();
525      for (NameNodeFile nnf : nnfs) {
526        File fsImage = getStorageFile(sd, nnf, txid);
527        if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
528          return fsImage;
529        }
530      }
531    }
532    return null;
533  }
534
535  public File getFsImageName(long txid) {
536    return getFsImageName(txid, NameNodeFile.IMAGE);
537  }
538
539  public File getHighestFsImageName() {
540    return getFsImageName(getMostRecentCheckpointTxId());
541  }
542
543  /** Create new dfs name directory.  Caution: this destroys all files
544   * in this filesystem. */
545  private void format(StorageDirectory sd) throws IOException {
546    sd.clearDirectory(); // create currrent dir
547    writeProperties(sd);
548    writeTransactionIdFile(sd, 0);
549
550    LOG.info("Storage directory " + sd.getRoot()
551             + " has been successfully formatted.");
552  }
553
554  /**
555   * Format all available storage directories.
556   */
557  public void format(NamespaceInfo nsInfo) throws IOException {
558    Preconditions.checkArgument(nsInfo.getLayoutVersion() == 0 ||
559        nsInfo.getLayoutVersion() == HdfsConstants.NAMENODE_LAYOUT_VERSION,
560        "Bad layout version: %s", nsInfo.getLayoutVersion());
561    
562    this.setStorageInfo(nsInfo);
563    this.blockpoolID = nsInfo.getBlockPoolID();
564    for (Iterator<StorageDirectory> it =
565                           dirIterator(); it.hasNext();) {
566      StorageDirectory sd = it.next();
567      format(sd);
568    }
569  }
570  
571  public static NamespaceInfo newNamespaceInfo()
572      throws UnknownHostException {
573    return new NamespaceInfo(newNamespaceID(), newClusterID(),
574        newBlockPoolID(), 0L);
575  }
576  
577  public void format() throws IOException {
578    this.layoutVersion = HdfsConstants.NAMENODE_LAYOUT_VERSION;
579    for (Iterator<StorageDirectory> it =
580                           dirIterator(); it.hasNext();) {
581      StorageDirectory sd = it.next();
582      format(sd);
583    }
584  }
585
586  /**
587   * Generate new namespaceID.
588   *
589   * namespaceID is a persistent attribute of the namespace.
590   * It is generated when the namenode is formatted and remains the same
591   * during the life cycle of the namenode.
592   * When a datanodes register they receive it as the registrationID,
593   * which is checked every time the datanode is communicating with the
594   * namenode. Datanodes that do not 'know' the namespaceID are rejected.
595   *
596   * @return new namespaceID
597   */
598  private static int newNamespaceID() {
599    int newID = 0;
600    while(newID == 0)
601      newID = DFSUtil.getRandom().nextInt(0x7FFFFFFF);  // use 31 bits only
602    return newID;
603  }
604
605  @Override // Storage
606  protected void setFieldsFromProperties(
607      Properties props, StorageDirectory sd) throws IOException {
608    super.setFieldsFromProperties(props, sd);
609    if (layoutVersion == 0) {
610      throw new IOException("NameNode directory "
611                            + sd.getRoot() + " is not formatted.");
612    }
613
614    // Set Block pool ID in version with federation support
615    if (NameNodeLayoutVersion.supports(
616        LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
617      String sbpid = props.getProperty("blockpoolID");
618      setBlockPoolID(sd.getRoot(), sbpid);
619    }
620    setDeprecatedPropertiesForUpgrade(props);
621  }
622
623  /**
624   * Pull any properties out of the VERSION file that are from older
625   * versions of HDFS and only necessary during upgrade.
626   */
627  private void setDeprecatedPropertiesForUpgrade(Properties props) {
628    deprecatedProperties = new HashMap<String, String>();
629    String md5 = props.getProperty(DEPRECATED_MESSAGE_DIGEST_PROPERTY);
630    if (md5 != null) {
631      deprecatedProperties.put(DEPRECATED_MESSAGE_DIGEST_PROPERTY, md5);
632    }
633  }
634  
635  /**
636   * Return a property that was stored in an earlier version of HDFS.
637   * 
638   * This should only be used during upgrades.
639   */
640  String getDeprecatedProperty(String prop) {
641    assert getLayoutVersion() > HdfsConstants.NAMENODE_LAYOUT_VERSION :
642      "getDeprecatedProperty should only be done when loading " +
643      "storage from past versions during upgrade.";
644    return deprecatedProperties.get(prop);
645  }
646
647  /**
648   * Write version file into the storage directory.
649   *
650   * The version file should always be written last.
651   * Missing or corrupted version file indicates that
652   * the checkpoint is not valid.
653   *
654   * @param sd storage directory
655   * @throws IOException
656   */
657  @Override // Storage
658  protected void setPropertiesFromFields(Properties props,
659                           StorageDirectory sd
660                           ) throws IOException {
661    super.setPropertiesFromFields(props, sd);
662    // Set blockpoolID in version with federation support
663    if (NameNodeLayoutVersion.supports(
664        LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
665      props.setProperty("blockpoolID", blockpoolID);
666    }
667  }
668  
669  static File getStorageFile(StorageDirectory sd, NameNodeFile type, long imageTxId) {
670    return new File(sd.getCurrentDir(),
671                    String.format("%s_%019d", type.getName(), imageTxId));
672  }
673  
674  /**
675   * Get a storage file for one of the files that doesn't need a txid associated
676   * (e.g version, seen_txid)
677   */
678  static File getStorageFile(StorageDirectory sd, NameNodeFile type) {
679    return new File(sd.getCurrentDir(), type.getName());
680  }
681
682  @VisibleForTesting
683  public static String getCheckpointImageFileName(long txid) {
684    return getNameNodeFileName(NameNodeFile.IMAGE_NEW, txid);
685  }
686
687  @VisibleForTesting
688  public static String getImageFileName(long txid) {
689    return getNameNodeFileName(NameNodeFile.IMAGE, txid);
690  }
691
692  @VisibleForTesting
693  public static String getRollbackImageFileName(long txid) {
694    return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid);
695  }
696
697  public static String getLegacyOIVImageFileName(long txid) {
698    return getNameNodeFileName(NameNodeFile.IMAGE_LEGACY_OIV, txid);
699  }
700
701  private static String getNameNodeFileName(NameNodeFile nnf, long txid) {
702    return String.format("%s_%019d", nnf.getName(), txid);
703  }
704
705  @VisibleForTesting
706  public static String getInProgressEditsFileName(long startTxId) {
707    return getNameNodeFileName(NameNodeFile.EDITS_INPROGRESS, startTxId);
708  }
709  
710  static File getInProgressEditsFile(StorageDirectory sd, long startTxId) {
711    return new File(sd.getCurrentDir(), getInProgressEditsFileName(startTxId));
712  }
713  
714  static File getFinalizedEditsFile(StorageDirectory sd,
715      long startTxId, long endTxId) {
716    return new File(sd.getCurrentDir(),
717        getFinalizedEditsFileName(startTxId, endTxId));
718  }
719
720  static File getTemporaryEditsFile(StorageDirectory sd,
721      long startTxId, long endTxId, long timestamp) {
722    return new File(sd.getCurrentDir(),
723        getTemporaryEditsFileName(startTxId, endTxId, timestamp));
724  }
725
726  static File getImageFile(StorageDirectory sd, NameNodeFile nnf, long txid) {
727    return new File(sd.getCurrentDir(), getNameNodeFileName(nnf, txid));
728  }
729
730  @VisibleForTesting
731  public static String getFinalizedEditsFileName(long startTxId, long endTxId) {
732    return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(),
733                         startTxId, endTxId);
734  }
735
736  public static String getTemporaryEditsFileName(long startTxId, long endTxId,
737      long timestamp) {
738    return String.format("%s_%019d-%019d_%019d", NameNodeFile.EDITS_TMP.getName(),
739                         startTxId, endTxId, timestamp);
740  }
741  
742  /**
743   * Return the first readable finalized edits file for the given txid.
744   */
745  File findFinalizedEditsFile(long startTxId, long endTxId)
746  throws IOException {
747    File ret = findFile(NameNodeDirType.EDITS,
748        getFinalizedEditsFileName(startTxId, endTxId));
749    if (ret == null) {
750      throw new IOException(
751          "No edits file for txid " + startTxId + "-" + endTxId + " exists!");
752    }
753    return ret;
754  }
755    
756  /**
757   * Return the first readable image file for the given txid and image type, or
758   * null if no such image can be found
759   */
760  File findImageFile(NameNodeFile nnf, long txid) {
761    return findFile(NameNodeDirType.IMAGE,
762        getNameNodeFileName(nnf, txid));
763  }
764
765  /**
766   * Return the first readable storage file of the given name
767   * across any of the 'current' directories in SDs of the
768   * given type, or null if no such file exists.
769   */
770  private File findFile(NameNodeDirType dirType, String name) {
771    for (StorageDirectory sd : dirIterable(dirType)) {
772      File candidate = new File(sd.getCurrentDir(), name);
773      if (FileUtil.canRead(sd.getCurrentDir()) &&
774          candidate.exists()) {
775        return candidate;
776      }
777    }
778    return null;
779  }
780
781  /**
782   * Disable the check for pre-upgradable layouts. Needed for BackupImage.
783   * @param val Whether to disable the preupgradeable layout check.
784   */
785  void setDisablePreUpgradableLayoutCheck(boolean val) {
786    disablePreUpgradableLayoutCheck = val;
787  }
788
789  /**
790   * Marks a list of directories as having experienced an error.
791   *
792   * @param sds A list of storage directories to mark as errored.
793   */
794  void reportErrorsOnDirectories(List<StorageDirectory> sds) {
795    for (StorageDirectory sd : sds) {
796      reportErrorsOnDirectory(sd);
797    }
798  }
799
800  /**
801   * Reports that a directory has experienced an error.
802   * Notifies listeners that the directory is no longer
803   * available.
804   *
805   * @param sd A storage directory to mark as errored.
806   */
807  private void reportErrorsOnDirectory(StorageDirectory sd) {
808    LOG.error("Error reported on storage directory " + sd);
809
810    String lsd = listStorageDirectories();
811    LOG.debug("current list of storage dirs:" + lsd);
812
813    LOG.warn("About to remove corresponding storage: "
814             + sd.getRoot().getAbsolutePath());
815    try {
816      sd.unlock();
817    } catch (Exception e) {
818      LOG.warn("Unable to unlock bad storage directory: "
819               +  sd.getRoot().getPath(), e);
820    }
821
822    if (this.storageDirs.remove(sd)) {
823      this.removedStorageDirs.add(sd);
824    }
825    
826    lsd = listStorageDirectories();
827    LOG.debug("at the end current list of storage dirs:" + lsd);
828  }
829  
830  /** 
831   * Processes the startup options for the clusterid and blockpoolid 
832   * for the upgrade. 
833   * @param startOpt Startup options 
834   * @param layoutVersion Layout version for the upgrade 
835   * @throws IOException
836   */
837  void processStartupOptionsForUpgrade(StartupOption startOpt, int layoutVersion)
838      throws IOException {
839    if (startOpt == StartupOption.UPGRADE) {
840      // If upgrade from a release that does not support federation,
841      // if clusterId is provided in the startupOptions use it.
842      // Else generate a new cluster ID      
843      if (!NameNodeLayoutVersion.supports(
844          LayoutVersion.Feature.FEDERATION, layoutVersion)) {
845        if (startOpt.getClusterId() == null) {
846          startOpt.setClusterId(newClusterID());
847        }
848        setClusterID(startOpt.getClusterId());
849        setBlockPoolID(newBlockPoolID());
850      } else {
851        // Upgrade from one version of federation to another supported
852        // version of federation doesn't require clusterID.
853        // Warn the user if the current clusterid didn't match with the input
854        // clusterid.
855        if (startOpt.getClusterId() != null
856            && !startOpt.getClusterId().equals(getClusterID())) {
857          LOG.warn("Clusterid mismatch - current clusterid: " + getClusterID()
858              + ", Ignoring given clusterid: " + startOpt.getClusterId());
859        }
860      }
861      LOG.info("Using clusterid: " + getClusterID());
862    }
863  }
864  
865  /**
866   * Report that an IOE has occurred on some file which may
867   * or may not be within one of the NN image storage directories.
868   */
869  @Override
870  public void reportErrorOnFile(File f) {
871    // We use getAbsolutePath here instead of getCanonicalPath since we know
872    // that there is some IO problem on that drive.
873    // getCanonicalPath may need to call stat() or readlink() and it's likely
874    // those calls would fail due to the same underlying IO problem.
875    String absPath = f.getAbsolutePath();
876    for (StorageDirectory sd : storageDirs) {
877      String dirPath = sd.getRoot().getAbsolutePath();
878      if (!dirPath.endsWith(File.separator)) {
879        dirPath += File.separator;
880      }
881      if (absPath.startsWith(dirPath)) {
882        reportErrorsOnDirectory(sd);
883        return;
884      }
885    }
886    
887  }
888  
889  /**
890   * Generate new clusterID.
891   * 
892   * clusterID is a persistent attribute of the cluster.
893   * It is generated when the cluster is created and remains the same
894   * during the life cycle of the cluster.  When a new name node is formated, if 
895   * this is a new cluster, a new clusterID is geneated and stored.  Subsequent 
896   * name node must be given the same ClusterID during its format to be in the 
897   * same cluster.
898   * When a datanode register it receive the clusterID and stick with it.
899   * If at any point, name node or data node tries to join another cluster, it 
900   * will be rejected.
901   * 
902   * @return new clusterID
903   */ 
904  public static String newClusterID() {
905    return "CID-" + UUID.randomUUID().toString();
906  }
907
908  void setClusterID(String cid) {
909    clusterID = cid;
910  }
911
912  /**
913   * try to find current cluster id in the VERSION files
914   * returns first cluster id found in any VERSION file
915   * null in case none found
916   * @return clusterId or null in case no cluster id found
917   */
918  public String determineClusterId() {
919    String cid = null;
920    Iterator<StorageDirectory> sdit = dirIterator(NameNodeDirType.IMAGE);
921    while(sdit.hasNext()) {
922      StorageDirectory sd = sdit.next();
923      try {
924        Properties props = readPropertiesFile(sd.getVersionFile());
925        cid = props.getProperty("clusterID");
926        LOG.info("current cluster id for sd="+sd.getCurrentDir() + 
927            ";lv=" + layoutVersion + ";cid=" + cid);
928        
929        if(cid != null && !cid.equals(""))
930          return cid;
931      } catch (Exception e) {
932        LOG.warn("this sd not available: " + e.getLocalizedMessage());
933      } //ignore
934    }
935    LOG.warn("couldn't find any VERSION file containing valid ClusterId");
936    return null;
937  }
938
939  /**
940   * Generate new blockpoolID.
941   * 
942   * @return new blockpoolID
943   */ 
944  static String newBlockPoolID() throws UnknownHostException{
945    String ip = "unknownIP";
946    try {
947      ip = DNS.getDefaultIP("default");
948    } catch (UnknownHostException e) {
949      LOG.warn("Could not find ip address of \"default\" inteface.");
950      throw e;
951    }
952    
953    int rand = DFSUtil.getSecureRandom().nextInt(Integer.MAX_VALUE);
954    String bpid = "BP-" + rand + "-"+ ip + "-" + Time.now();
955    return bpid;
956  }
957
958  /** Validate and set block pool ID */
959  void setBlockPoolID(String bpid) {
960    blockpoolID = bpid;
961  }
962
963  /** Validate and set block pool ID */
964  private void setBlockPoolID(File storage, String bpid)
965      throws InconsistentFSStateException {
966    if (bpid == null || bpid.equals("")) {
967      throw new InconsistentFSStateException(storage, "file "
968          + Storage.STORAGE_FILE_VERSION + " has no block pool Id.");
969    }
970    
971    if (!blockpoolID.equals("") && !blockpoolID.equals(bpid)) {
972      throw new InconsistentFSStateException(storage,
973          "Unexepcted blockpoolID " + bpid + " . Expected " + blockpoolID);
974    }
975    setBlockPoolID(bpid);
976  }
977  
978  public String getBlockPoolID() {
979    return blockpoolID;
980  }
981
982  /**
983   * Iterate over all current storage directories, inspecting them
984   * with the given inspector.
985   */
986  void inspectStorageDirs(FSImageStorageInspector inspector)
987      throws IOException {
988
989    // Process each of the storage directories to find the pair of
990    // newest image file and edit file
991    for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
992      StorageDirectory sd = it.next();
993      inspector.inspectDirectory(sd);
994    }
995  }
996
997  /**
998   * Iterate over all of the storage dirs, reading their contents to determine
999   * their layout versions. Returns an FSImageStorageInspector which has
1000   * inspected each directory.
1001   * 
1002   * <b>Note:</b> this can mutate the storage info fields (ctime, version, etc).
1003   * @throws IOException if no valid storage dirs are found or no valid layout version
1004   */
1005  FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes)
1006      throws IOException {
1007    Integer layoutVersion = null;
1008    boolean multipleLV = false;
1009    StringBuilder layoutVersions = new StringBuilder();
1010
1011    // First determine what range of layout versions we're going to inspect
1012    for (Iterator<StorageDirectory> it = dirIterator(false);
1013         it.hasNext();) {
1014      StorageDirectory sd = it.next();
1015      if (!sd.getVersionFile().exists()) {
1016        FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping...");
1017        continue;
1018      }
1019      readProperties(sd); // sets layoutVersion
1020      int lv = getLayoutVersion();
1021      if (layoutVersion == null) {
1022        layoutVersion = Integer.valueOf(lv);
1023      } else if (!layoutVersion.equals(lv)) {
1024        multipleLV = true;
1025      }
1026      layoutVersions.append("(").append(sd.getRoot()).append(", ").append(lv).append(") ");
1027    }
1028    
1029    if (layoutVersion == null) {
1030      throw new IOException("No storage directories contained VERSION information");
1031    }
1032    if (multipleLV) {            
1033      throw new IOException(
1034          "Storage directories contain multiple layout versions: "
1035              + layoutVersions);
1036    }
1037    // If the storage directories are with the new layout version
1038    // (ie edits_<txnid>) then use the new inspector, which will ignore
1039    // the old format dirs.
1040    FSImageStorageInspector inspector;
1041    if (NameNodeLayoutVersion.supports(
1042        LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
1043      inspector = new FSImageTransactionalStorageInspector(fileTypes);
1044    } else {
1045      inspector = new FSImagePreTransactionalStorageInspector();
1046    }
1047    
1048    inspectStorageDirs(inspector);
1049    return inspector;
1050  }
1051
1052  public NamespaceInfo getNamespaceInfo() {
1053    return new NamespaceInfo(
1054        getNamespaceID(),
1055        getClusterID(),
1056        getBlockPoolID(),
1057        getCTime());
1058  }
1059}