001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import java.io.Closeable;
021import java.io.File;
022import java.io.IOException;
023import java.io.RandomAccessFile;
024import java.net.URI;
025import java.net.UnknownHostException;
026import java.util.ArrayList;
027import java.util.Collection;
028import java.util.EnumSet;
029import java.util.HashMap;
030import java.util.Iterator;
031import java.util.List;
032import java.util.Properties;
033import java.util.UUID;
034import java.util.concurrent.CopyOnWriteArrayList;
035
036import org.apache.hadoop.classification.InterfaceAudience;
037import org.apache.hadoop.conf.Configuration;
038import org.apache.hadoop.fs.FileUtil;
039import org.apache.hadoop.hdfs.DFSUtil;
040import org.apache.hadoop.hdfs.protocol.HdfsConstants;
041import org.apache.hadoop.hdfs.protocol.LayoutVersion;
042import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
043import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
044import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
045import org.apache.hadoop.hdfs.server.common.Storage;
046import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
047import org.apache.hadoop.hdfs.server.common.Util;
048import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
049import org.apache.hadoop.hdfs.util.PersistentLongFile;
050import org.apache.hadoop.io.IOUtils;
051import org.apache.hadoop.net.DNS;
052import org.apache.hadoop.util.Time;
053
054import com.google.common.annotations.VisibleForTesting;
055import com.google.common.base.Preconditions;
056import com.google.common.collect.Lists;
057
058/**
059 * NNStorage is responsible for management of the StorageDirectories used by
060 * the NameNode.
061 */
062@InterfaceAudience.Private
063public class NNStorage extends Storage implements Closeable,
064    StorageErrorReporter {
065  static final String DEPRECATED_MESSAGE_DIGEST_PROPERTY = "imageMD5Digest";
066  static final String LOCAL_URI_SCHEME = "file";
067
068  //
069  // The filenames used for storing the images
070  //
071  public enum NameNodeFile {
072    IMAGE     ("fsimage"),
073    TIME      ("fstime"), // from "old" pre-HDFS-1073 format
074    SEEN_TXID ("seen_txid"),
075    EDITS     ("edits"),
076    IMAGE_NEW ("fsimage.ckpt"),
077    IMAGE_ROLLBACK("fsimage_rollback"),
078    EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format
079    EDITS_INPROGRESS ("edits_inprogress"),
080    EDITS_TMP ("edits_tmp");
081
082    private String fileName = null;
083    private NameNodeFile(String name) { this.fileName = name; }
084    @VisibleForTesting
085    public String getName() { return fileName; }
086  }
087
088  /**
089   * Implementation of StorageDirType specific to namenode storage
090   * A Storage directory could be of type IMAGE which stores only fsimage,
091   * or of type EDITS which stores edits or of type IMAGE_AND_EDITS which
092   * stores both fsimage and edits.
093   */
094  @VisibleForTesting
095  public static enum NameNodeDirType implements StorageDirType {
096    UNDEFINED,
097    IMAGE,
098    EDITS,
099    IMAGE_AND_EDITS;
100
101    @Override
102    public StorageDirType getStorageDirType() {
103      return this;
104    }
105
106    @Override
107    public boolean isOfType(StorageDirType type) {
108      if ((this == IMAGE_AND_EDITS) && (type == IMAGE || type == EDITS))
109        return true;
110      return this == type;
111    }
112  }
113
114  protected String blockpoolID = ""; // id of the block pool
115  
116  /**
117   * flag that controls if we try to restore failed storages
118   */
119  private boolean restoreFailedStorage = false;
120  private final Object restorationLock = new Object();
121  private boolean disablePreUpgradableLayoutCheck = false;
122
123
124  /**
125   * TxId of the last transaction that was included in the most
126   * recent fsimage file. This does not include any transactions
127   * that have since been written to the edit log.
128   */
129  protected volatile long mostRecentCheckpointTxId = HdfsConstants.INVALID_TXID;
130  
131  /**
132   * Time of the last checkpoint, in milliseconds since the epoch.
133   */
134  private long mostRecentCheckpointTime = 0;
135
136  /**
137   * list of failed (and thus removed) storages
138   */
139  final protected List<StorageDirectory> removedStorageDirs
140    = new CopyOnWriteArrayList<StorageDirectory>();
141
142  /**
143   * Properties from old layout versions that may be needed
144   * during upgrade only.
145   */
146  private HashMap<String, String> deprecatedProperties;
147
148  /**
149   * Construct the NNStorage.
150   * @param conf Namenode configuration.
151   * @param imageDirs Directories the image can be stored in.
152   * @param editsDirs Directories the editlog can be stored in.
153   * @throws IOException if any directories are inaccessible.
154   */
155  public NNStorage(Configuration conf, 
156                   Collection<URI> imageDirs, Collection<URI> editsDirs) 
157      throws IOException {
158    super(NodeType.NAME_NODE);
159
160    storageDirs = new CopyOnWriteArrayList<StorageDirectory>();
161    
162    // this may modify the editsDirs, so copy before passing in
163    setStorageDirectories(imageDirs, 
164                          Lists.newArrayList(editsDirs),
165                          FSNamesystem.getSharedEditsDirs(conf));
166  }
167
168  @Override // Storage
169  public boolean isPreUpgradableLayout(StorageDirectory sd) throws IOException {
170    if (disablePreUpgradableLayoutCheck) {
171      return false;
172    }
173
174    File oldImageDir = new File(sd.getRoot(), "image");
175    if (!oldImageDir.exists()) {
176      return false;
177    }
178    // check the layout version inside the image file
179    File oldF = new File(oldImageDir, "fsimage");
180    RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws");
181    try {
182      oldFile.seek(0);
183      int oldVersion = oldFile.readInt();
184      oldFile.close();
185      oldFile = null;
186      if (oldVersion < LAST_PRE_UPGRADE_LAYOUT_VERSION)
187        return false;
188    } finally {
189      IOUtils.cleanup(LOG, oldFile);
190    }
191    return true;
192  }
193
194  @Override // Closeable
195  public void close() throws IOException {
196    unlockAll();
197    storageDirs.clear();
198  }
199
200  /**
201   * Set flag whether an attempt should be made to restore failed storage
202   * directories at the next available oppurtuinity.
203   *
204   * @param val Whether restoration attempt should be made.
205   */
206  void setRestoreFailedStorage(boolean val) {
207    LOG.warn("set restore failed storage to " + val);
208    restoreFailedStorage=val;
209  }
210
211  /**
212   * @return Whether failed storage directories are to be restored.
213   */
214  boolean getRestoreFailedStorage() {
215    return restoreFailedStorage;
216  }
217
218  /**
219   * See if any of removed storages is "writable" again, and can be returned
220   * into service.
221   */
222  void attemptRestoreRemovedStorage() {
223    // if directory is "alive" - copy the images there...
224    if(!restoreFailedStorage || removedStorageDirs.size() == 0)
225      return; //nothing to restore
226
227    /* We don't want more than one thread trying to restore at a time */
228    synchronized (this.restorationLock) {
229      LOG.info("NNStorage.attemptRestoreRemovedStorage: check removed(failed) "+
230               "storarge. removedStorages size = " + removedStorageDirs.size());
231      for(Iterator<StorageDirectory> it
232            = this.removedStorageDirs.iterator(); it.hasNext();) {
233        StorageDirectory sd = it.next();
234        File root = sd.getRoot();
235        LOG.info("currently disabled dir " + root.getAbsolutePath() +
236                 "; type="+sd.getStorageDirType() 
237                 + ";canwrite="+FileUtil.canWrite(root));
238        if(root.exists() && FileUtil.canWrite(root)) {
239          LOG.info("restoring dir " + sd.getRoot().getAbsolutePath());
240          this.addStorageDir(sd); // restore
241          this.removedStorageDirs.remove(sd);
242        }
243      }
244    }
245  }
246
247  /**
248   * @return A list of storage directories which are in the errored state.
249   */
250  List<StorageDirectory> getRemovedStorageDirs() {
251    return this.removedStorageDirs;
252  }
253  
254  /**
255   * See {@link NNStorage#setStorageDirectories(Collection, Collection, Collection)}
256   */
257  @VisibleForTesting
258  synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
259                                          Collection<URI> fsEditsDirs)
260      throws IOException {
261    setStorageDirectories(fsNameDirs, fsEditsDirs, new ArrayList<URI>());
262  }
263
264  /**
265   * Set the storage directories which will be used. This should only ever be
266   * called from inside NNStorage. However, it needs to remain package private
267   * for testing, as StorageDirectories need to be reinitialised after using
268   * Mockito.spy() on this class, as Mockito doesn't work well with inner
269   * classes, such as StorageDirectory in this case.
270   *
271   * Synchronized due to initialization of storageDirs and removedStorageDirs.
272   *
273   * @param fsNameDirs Locations to store images.
274   * @param fsEditsDirs Locations to store edit logs.
275   * @throws IOException
276   */
277  @VisibleForTesting
278  synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
279                                          Collection<URI> fsEditsDirs,
280                                          Collection<URI> sharedEditsDirs)
281      throws IOException {
282    this.storageDirs.clear();
283    this.removedStorageDirs.clear();
284
285   // Add all name dirs with appropriate NameNodeDirType
286    for (URI dirName : fsNameDirs) {
287      checkSchemeConsistency(dirName);
288      boolean isAlsoEdits = false;
289      for (URI editsDirName : fsEditsDirs) {
290        if (editsDirName.compareTo(dirName) == 0) {
291          isAlsoEdits = true;
292          fsEditsDirs.remove(editsDirName);
293          break;
294        }
295      }
296      NameNodeDirType dirType = (isAlsoEdits) ?
297                          NameNodeDirType.IMAGE_AND_EDITS :
298                          NameNodeDirType.IMAGE;
299      // Add to the list of storage directories, only if the
300      // URI is of type file://
301      if(dirName.getScheme().compareTo("file") == 0) {
302        this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
303            dirType,
304            sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared.
305      }
306    }
307
308    // Add edits dirs if they are different from name dirs
309    for (URI dirName : fsEditsDirs) {
310      checkSchemeConsistency(dirName);
311      // Add to the list of storage directories, only if the
312      // URI is of type file://
313      if(dirName.getScheme().compareTo("file") == 0)
314        this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
315                    NameNodeDirType.EDITS, sharedEditsDirs.contains(dirName)));
316    }
317  }
318
319  /**
320   * Return the storage directory corresponding to the passed URI
321   * @param uri URI of a storage directory
322   * @return The matching storage directory or null if none found
323   */
324  StorageDirectory getStorageDirectory(URI uri) {
325    try {
326      uri = Util.fileAsURI(new File(uri));
327      Iterator<StorageDirectory> it = dirIterator();
328      for (; it.hasNext(); ) {
329        StorageDirectory sd = it.next();
330        if (Util.fileAsURI(sd.getRoot()).equals(uri)) {
331          return sd;
332        }
333      }
334    } catch (IOException ioe) {
335      LOG.warn("Error converting file to URI", ioe);
336    }
337    return null;
338  }
339
340  /**
341   * Checks the consistency of a URI, in particular if the scheme
342   * is specified 
343   * @param u URI whose consistency is being checked.
344   */
345  private static void checkSchemeConsistency(URI u) throws IOException {
346    String scheme = u.getScheme();
347    // the URI should have a proper scheme
348    if(scheme == null) {
349      throw new IOException("Undefined scheme for " + u);
350    }
351  }
352
353  /**
354   * Retrieve current directories of type IMAGE
355   * @return Collection of URI representing image directories
356   * @throws IOException in case of URI processing error
357   */
358  Collection<URI> getImageDirectories() throws IOException {
359    return getDirectories(NameNodeDirType.IMAGE);
360  }
361
362  /**
363   * Retrieve current directories of type EDITS
364   * @return Collection of URI representing edits directories
365   * @throws IOException in case of URI processing error
366   */
367  Collection<URI> getEditsDirectories() throws IOException {
368    return getDirectories(NameNodeDirType.EDITS);
369  }
370
371  /**
372   * Return number of storage directories of the given type.
373   * @param dirType directory type
374   * @return number of storage directories of type dirType
375   */
376  int getNumStorageDirs(NameNodeDirType dirType) {
377    if(dirType == null)
378      return getNumStorageDirs();
379    Iterator<StorageDirectory> it = dirIterator(dirType);
380    int numDirs = 0;
381    for(; it.hasNext(); it.next())
382      numDirs++;
383    return numDirs;
384  }
385
386  /**
387   * Return the list of locations being used for a specific purpose.
388   * i.e. Image or edit log storage.
389   *
390   * @param dirType Purpose of locations requested.
391   * @throws IOException
392   */
393  Collection<URI> getDirectories(NameNodeDirType dirType)
394      throws IOException {
395    ArrayList<URI> list = new ArrayList<URI>();
396    Iterator<StorageDirectory> it = (dirType == null) ? dirIterator() :
397                                    dirIterator(dirType);
398    for ( ;it.hasNext(); ) {
399      StorageDirectory sd = it.next();
400      try {
401        list.add(Util.fileAsURI(sd.getRoot()));
402      } catch (IOException e) {
403        throw new IOException("Exception while processing " +
404            "StorageDirectory " + sd.getRoot(), e);
405      }
406    }
407    return list;
408  }
409  
410  /**
411   * Determine the last transaction ID noted in this storage directory.
412   * This txid is stored in a special seen_txid file since it might not
413   * correspond to the latest image or edit log. For example, an image-only
414   * directory will have this txid incremented when edits logs roll, even
415   * though the edits logs are in a different directory.
416   *
417   * @param sd StorageDirectory to check
418   * @return If file exists and can be read, last recorded txid. If not, 0L.
419   * @throws IOException On errors processing file pointed to by sd
420   */
421  static long readTransactionIdFile(StorageDirectory sd) throws IOException {
422    File txidFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
423    return PersistentLongFile.readFile(txidFile, 0);
424  }
425  
426  /**
427   * Write last checkpoint time into a separate file.
428   *
429   * @param sd
430   * @throws IOException
431   */
432  void writeTransactionIdFile(StorageDirectory sd, long txid) throws IOException {
433    Preconditions.checkArgument(txid >= 0, "bad txid: " + txid);
434    
435    File txIdFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
436    PersistentLongFile.writeFile(txIdFile, txid);
437  }
438
439  /**
440   * Set the transaction ID and time of the last checkpoint
441   * 
442   * @param txid transaction id of the last checkpoint
443   * @param time time of the last checkpoint, in millis since the epoch
444   */
445  void setMostRecentCheckpointInfo(long txid, long time) {
446    this.mostRecentCheckpointTxId = txid;
447    this.mostRecentCheckpointTime = time;
448  }
449
450  /**
451   * @return the transaction ID of the last checkpoint.
452   */
453  public long getMostRecentCheckpointTxId() {
454    return mostRecentCheckpointTxId;
455  }
456  
457  /**
458   * @return the time of the most recent checkpoint in millis since the epoch.
459   */
460  long getMostRecentCheckpointTime() {
461    return mostRecentCheckpointTime;
462  }
463
464  /**
465   * Write a small file in all available storage directories that
466   * indicates that the namespace has reached some given transaction ID.
467   * 
468   * This is used when the image is loaded to avoid accidental rollbacks
469   * in the case where an edit log is fully deleted but there is no
470   * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure()
471   * @param txid the txid that has been reached
472   */
473  public void writeTransactionIdFileToStorage(long txid) {
474    // Write txid marker in all storage directories
475    for (StorageDirectory sd : storageDirs) {
476      try {
477        writeTransactionIdFile(sd, txid);
478      } catch(IOException e) {
479        // Close any edits stream associated with this dir and remove directory
480        LOG.warn("writeTransactionIdToStorage failed on " + sd,
481            e);
482        reportErrorsOnDirectory(sd);
483      }
484    }
485  }
486
487  /**
488   * Return the name of the image file that is uploaded by periodic
489   * checkpointing
490   *
491   * @return List of filenames to save checkpoints to.
492   */
493  public File[] getFsImageNameCheckpoint(long txid) {
494    ArrayList<File> list = new ArrayList<File>();
495    for (Iterator<StorageDirectory> it =
496                 dirIterator(NameNodeDirType.IMAGE); it.hasNext();) {
497      list.add(getStorageFile(it.next(), NameNodeFile.IMAGE_NEW, txid));
498    }
499    return list.toArray(new File[list.size()]);
500  }
501
502  /**
503   * @return The first image file with the given txid and image type.
504   */
505  public File getFsImageName(long txid, NameNodeFile nnf) {
506    for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
507        it.hasNext();) {
508      StorageDirectory sd = it.next();
509      File fsImage = getStorageFile(sd, nnf, txid);
510      if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
511        return fsImage;
512      }
513    }
514    return null;
515  }
516
517  /**
518   * @return The first image file whose txid is the same with the given txid and
519   * image type is one of the given types.
520   */
521  public File getFsImage(long txid, EnumSet<NameNodeFile> nnfs) {
522    for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
523        it.hasNext();) {
524      StorageDirectory sd = it.next();
525      for (NameNodeFile nnf : nnfs) {
526        File fsImage = getStorageFile(sd, nnf, txid);
527        if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
528          return fsImage;
529        }
530      }
531    }
532    return null;
533  }
534
535  public File getFsImageName(long txid) {
536    return getFsImageName(txid, NameNodeFile.IMAGE);
537  }
538
539  public File getHighestFsImageName() {
540    return getFsImageName(getMostRecentCheckpointTxId());
541  }
542
543  /** Create new dfs name directory.  Caution: this destroys all files
544   * in this filesystem. */
545  private void format(StorageDirectory sd) throws IOException {
546    sd.clearDirectory(); // create currrent dir
547    writeProperties(sd);
548    writeTransactionIdFile(sd, 0);
549
550    LOG.info("Storage directory " + sd.getRoot()
551             + " has been successfully formatted.");
552  }
553
554  /**
555   * Format all available storage directories.
556   */
557  public void format(NamespaceInfo nsInfo) throws IOException {
558    Preconditions.checkArgument(nsInfo.getLayoutVersion() == 0 ||
559        nsInfo.getLayoutVersion() == HdfsConstants.NAMENODE_LAYOUT_VERSION,
560        "Bad layout version: %s", nsInfo.getLayoutVersion());
561    
562    this.setStorageInfo(nsInfo);
563    this.blockpoolID = nsInfo.getBlockPoolID();
564    for (Iterator<StorageDirectory> it =
565                           dirIterator(); it.hasNext();) {
566      StorageDirectory sd = it.next();
567      format(sd);
568    }
569  }
570  
571  public static NamespaceInfo newNamespaceInfo()
572      throws UnknownHostException {
573    return new NamespaceInfo(newNamespaceID(), newClusterID(),
574        newBlockPoolID(), 0L);
575  }
576  
577  public void format() throws IOException {
578    this.layoutVersion = HdfsConstants.NAMENODE_LAYOUT_VERSION;
579    for (Iterator<StorageDirectory> it =
580                           dirIterator(); it.hasNext();) {
581      StorageDirectory sd = it.next();
582      format(sd);
583    }
584  }
585
586  /**
587   * Generate new namespaceID.
588   *
589   * namespaceID is a persistent attribute of the namespace.
590   * It is generated when the namenode is formatted and remains the same
591   * during the life cycle of the namenode.
592   * When a datanodes register they receive it as the registrationID,
593   * which is checked every time the datanode is communicating with the
594   * namenode. Datanodes that do not 'know' the namespaceID are rejected.
595   *
596   * @return new namespaceID
597   */
598  private static int newNamespaceID() {
599    int newID = 0;
600    while(newID == 0)
601      newID = DFSUtil.getRandom().nextInt(0x7FFFFFFF);  // use 31 bits only
602    return newID;
603  }
604
605  @Override // Storage
606  protected void setFieldsFromProperties(
607      Properties props, StorageDirectory sd) throws IOException {
608    super.setFieldsFromProperties(props, sd);
609    if (layoutVersion == 0) {
610      throw new IOException("NameNode directory "
611                            + sd.getRoot() + " is not formatted.");
612    }
613
614    // Set Block pool ID in version with federation support
615    if (NameNodeLayoutVersion.supports(
616        LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
617      String sbpid = props.getProperty("blockpoolID");
618      setBlockPoolID(sd.getRoot(), sbpid);
619    }
620    setDeprecatedPropertiesForUpgrade(props);
621  }
622
623  /**
624   * Pull any properties out of the VERSION file that are from older
625   * versions of HDFS and only necessary during upgrade.
626   */
627  private void setDeprecatedPropertiesForUpgrade(Properties props) {
628    deprecatedProperties = new HashMap<String, String>();
629    String md5 = props.getProperty(DEPRECATED_MESSAGE_DIGEST_PROPERTY);
630    if (md5 != null) {
631      deprecatedProperties.put(DEPRECATED_MESSAGE_DIGEST_PROPERTY, md5);
632    }
633  }
634  
635  /**
636   * Return a property that was stored in an earlier version of HDFS.
637   * 
638   * This should only be used during upgrades.
639   */
640  String getDeprecatedProperty(String prop) {
641    assert getLayoutVersion() > HdfsConstants.NAMENODE_LAYOUT_VERSION :
642      "getDeprecatedProperty should only be done when loading " +
643      "storage from past versions during upgrade.";
644    return deprecatedProperties.get(prop);
645  }
646
647  /**
648   * Write version file into the storage directory.
649   *
650   * The version file should always be written last.
651   * Missing or corrupted version file indicates that
652   * the checkpoint is not valid.
653   *
654   * @param sd storage directory
655   * @throws IOException
656   */
657  @Override // Storage
658  protected void setPropertiesFromFields(Properties props,
659                           StorageDirectory sd
660                           ) throws IOException {
661    super.setPropertiesFromFields(props, sd);
662    // Set blockpoolID in version with federation support
663    if (NameNodeLayoutVersion.supports(
664        LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
665      props.setProperty("blockpoolID", blockpoolID);
666    }
667  }
668  
669  static File getStorageFile(StorageDirectory sd, NameNodeFile type, long imageTxId) {
670    return new File(sd.getCurrentDir(),
671                    String.format("%s_%019d", type.getName(), imageTxId));
672  }
673  
674  /**
675   * Get a storage file for one of the files that doesn't need a txid associated
676   * (e.g version, seen_txid)
677   */
678  static File getStorageFile(StorageDirectory sd, NameNodeFile type) {
679    return new File(sd.getCurrentDir(), type.getName());
680  }
681
682  @VisibleForTesting
683  public static String getCheckpointImageFileName(long txid) {
684    return getNameNodeFileName(NameNodeFile.IMAGE_NEW, txid);
685  }
686
687  @VisibleForTesting
688  public static String getImageFileName(long txid) {
689    return getNameNodeFileName(NameNodeFile.IMAGE, txid);
690  }
691
692  @VisibleForTesting
693  public static String getRollbackImageFileName(long txid) {
694    return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid);
695  }
696
697  private static String getNameNodeFileName(NameNodeFile nnf, long txid) {
698    return String.format("%s_%019d", nnf.getName(), txid);
699  }
700
701  @VisibleForTesting
702  public static String getInProgressEditsFileName(long startTxId) {
703    return getNameNodeFileName(NameNodeFile.EDITS_INPROGRESS, startTxId);
704  }
705  
706  static File getInProgressEditsFile(StorageDirectory sd, long startTxId) {
707    return new File(sd.getCurrentDir(), getInProgressEditsFileName(startTxId));
708  }
709  
710  static File getFinalizedEditsFile(StorageDirectory sd,
711      long startTxId, long endTxId) {
712    return new File(sd.getCurrentDir(),
713        getFinalizedEditsFileName(startTxId, endTxId));
714  }
715
716  static File getTemporaryEditsFile(StorageDirectory sd,
717      long startTxId, long endTxId, long timestamp) {
718    return new File(sd.getCurrentDir(),
719        getTemporaryEditsFileName(startTxId, endTxId, timestamp));
720  }
721
722  static File getImageFile(StorageDirectory sd, NameNodeFile nnf, long txid) {
723    return new File(sd.getCurrentDir(), getNameNodeFileName(nnf, txid));
724  }
725
726  @VisibleForTesting
727  public static String getFinalizedEditsFileName(long startTxId, long endTxId) {
728    return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(),
729                         startTxId, endTxId);
730  }
731
732  public static String getTemporaryEditsFileName(long startTxId, long endTxId,
733      long timestamp) {
734    return String.format("%s_%019d-%019d_%019d", NameNodeFile.EDITS_TMP.getName(),
735                         startTxId, endTxId, timestamp);
736  }
737  
738  /**
739   * Return the first readable finalized edits file for the given txid.
740   */
741  File findFinalizedEditsFile(long startTxId, long endTxId)
742  throws IOException {
743    File ret = findFile(NameNodeDirType.EDITS,
744        getFinalizedEditsFileName(startTxId, endTxId));
745    if (ret == null) {
746      throw new IOException(
747          "No edits file for txid " + startTxId + "-" + endTxId + " exists!");
748    }
749    return ret;
750  }
751    
752  /**
753   * Return the first readable image file for the given txid and image type, or
754   * null if no such image can be found
755   */
756  File findImageFile(NameNodeFile nnf, long txid) {
757    return findFile(NameNodeDirType.IMAGE,
758        getNameNodeFileName(nnf, txid));
759  }
760
761  /**
762   * Return the first readable storage file of the given name
763   * across any of the 'current' directories in SDs of the
764   * given type, or null if no such file exists.
765   */
766  private File findFile(NameNodeDirType dirType, String name) {
767    for (StorageDirectory sd : dirIterable(dirType)) {
768      File candidate = new File(sd.getCurrentDir(), name);
769      if (FileUtil.canRead(sd.getCurrentDir()) &&
770          candidate.exists()) {
771        return candidate;
772      }
773    }
774    return null;
775  }
776
777  /**
778   * Disable the check for pre-upgradable layouts. Needed for BackupImage.
779   * @param val Whether to disable the preupgradeable layout check.
780   */
781  void setDisablePreUpgradableLayoutCheck(boolean val) {
782    disablePreUpgradableLayoutCheck = val;
783  }
784
785  /**
786   * Marks a list of directories as having experienced an error.
787   *
788   * @param sds A list of storage directories to mark as errored.
789   * @throws IOException
790   */
791  void reportErrorsOnDirectories(List<StorageDirectory> sds) {
792    for (StorageDirectory sd : sds) {
793      reportErrorsOnDirectory(sd);
794    }
795  }
796
797  /**
798   * Reports that a directory has experienced an error.
799   * Notifies listeners that the directory is no longer
800   * available.
801   *
802   * @param sd A storage directory to mark as errored.
803   * @throws IOException
804   */
805  private void reportErrorsOnDirectory(StorageDirectory sd) {
806    LOG.error("Error reported on storage directory " + sd);
807
808    String lsd = listStorageDirectories();
809    LOG.debug("current list of storage dirs:" + lsd);
810
811    LOG.warn("About to remove corresponding storage: "
812             + sd.getRoot().getAbsolutePath());
813    try {
814      sd.unlock();
815    } catch (Exception e) {
816      LOG.warn("Unable to unlock bad storage directory: "
817               +  sd.getRoot().getPath(), e);
818    }
819
820    if (this.storageDirs.remove(sd)) {
821      this.removedStorageDirs.add(sd);
822    }
823    
824    lsd = listStorageDirectories();
825    LOG.debug("at the end current list of storage dirs:" + lsd);
826  }
827  
828  /** 
829   * Processes the startup options for the clusterid and blockpoolid 
830   * for the upgrade. 
831   * @param startOpt Startup options 
832   * @param layoutVersion Layout version for the upgrade 
833   * @throws IOException
834   */
835  void processStartupOptionsForUpgrade(StartupOption startOpt, int layoutVersion)
836      throws IOException {
837    if (startOpt == StartupOption.UPGRADE) {
838      // If upgrade from a release that does not support federation,
839      // if clusterId is provided in the startupOptions use it.
840      // Else generate a new cluster ID      
841      if (!NameNodeLayoutVersion.supports(
842          LayoutVersion.Feature.FEDERATION, layoutVersion)) {
843        if (startOpt.getClusterId() == null) {
844          startOpt.setClusterId(newClusterID());
845        }
846        setClusterID(startOpt.getClusterId());
847        setBlockPoolID(newBlockPoolID());
848      } else {
849        // Upgrade from one version of federation to another supported
850        // version of federation doesn't require clusterID.
851        // Warn the user if the current clusterid didn't match with the input
852        // clusterid.
853        if (startOpt.getClusterId() != null
854            && !startOpt.getClusterId().equals(getClusterID())) {
855          LOG.warn("Clusterid mismatch - current clusterid: " + getClusterID()
856              + ", Ignoring given clusterid: " + startOpt.getClusterId());
857        }
858      }
859      LOG.info("Using clusterid: " + getClusterID());
860    }
861  }
862  
863  /**
864   * Report that an IOE has occurred on some file which may
865   * or may not be within one of the NN image storage directories.
866   */
867  @Override
868  public void reportErrorOnFile(File f) {
869    // We use getAbsolutePath here instead of getCanonicalPath since we know
870    // that there is some IO problem on that drive.
871    // getCanonicalPath may need to call stat() or readlink() and it's likely
872    // those calls would fail due to the same underlying IO problem.
873    String absPath = f.getAbsolutePath();
874    for (StorageDirectory sd : storageDirs) {
875      String dirPath = sd.getRoot().getAbsolutePath();
876      if (!dirPath.endsWith(File.separator)) {
877        dirPath += File.separator;
878      }
879      if (absPath.startsWith(dirPath)) {
880        reportErrorsOnDirectory(sd);
881        return;
882      }
883    }
884    
885  }
886  
887  /**
888   * Generate new clusterID.
889   * 
890   * clusterID is a persistent attribute of the cluster.
891   * It is generated when the cluster is created and remains the same
892   * during the life cycle of the cluster.  When a new name node is formated, if 
893   * this is a new cluster, a new clusterID is geneated and stored.  Subsequent 
894   * name node must be given the same ClusterID during its format to be in the 
895   * same cluster.
896   * When a datanode register it receive the clusterID and stick with it.
897   * If at any point, name node or data node tries to join another cluster, it 
898   * will be rejected.
899   * 
900   * @return new clusterID
901   */ 
902  public static String newClusterID() {
903    return "CID-" + UUID.randomUUID().toString();
904  }
905
906  void setClusterID(String cid) {
907    clusterID = cid;
908  }
909
910  /**
911   * try to find current cluster id in the VERSION files
912   * returns first cluster id found in any VERSION file
913   * null in case none found
914   * @return clusterId or null in case no cluster id found
915   */
916  public String determineClusterId() {
917    String cid = null;
918    Iterator<StorageDirectory> sdit = dirIterator(NameNodeDirType.IMAGE);
919    while(sdit.hasNext()) {
920      StorageDirectory sd = sdit.next();
921      try {
922        Properties props = readPropertiesFile(sd.getVersionFile());
923        cid = props.getProperty("clusterID");
924        LOG.info("current cluster id for sd="+sd.getCurrentDir() + 
925            ";lv=" + layoutVersion + ";cid=" + cid);
926        
927        if(cid != null && !cid.equals(""))
928          return cid;
929      } catch (Exception e) {
930        LOG.warn("this sd not available: " + e.getLocalizedMessage());
931      } //ignore
932    }
933    LOG.warn("couldn't find any VERSION file containing valid ClusterId");
934    return null;
935  }
936
937  /**
938   * Generate new blockpoolID.
939   * 
940   * @return new blockpoolID
941   */ 
942  static String newBlockPoolID() throws UnknownHostException{
943    String ip = "unknownIP";
944    try {
945      ip = DNS.getDefaultIP("default");
946    } catch (UnknownHostException e) {
947      LOG.warn("Could not find ip address of \"default\" inteface.");
948      throw e;
949    }
950    
951    int rand = DFSUtil.getSecureRandom().nextInt(Integer.MAX_VALUE);
952    String bpid = "BP-" + rand + "-"+ ip + "-" + Time.now();
953    return bpid;
954  }
955
956  /** Validate and set block pool ID */
957  void setBlockPoolID(String bpid) {
958    blockpoolID = bpid;
959  }
960
961  /** Validate and set block pool ID */
962  private void setBlockPoolID(File storage, String bpid)
963      throws InconsistentFSStateException {
964    if (bpid == null || bpid.equals("")) {
965      throw new InconsistentFSStateException(storage, "file "
966          + Storage.STORAGE_FILE_VERSION + " has no block pool Id.");
967    }
968    
969    if (!blockpoolID.equals("") && !blockpoolID.equals(bpid)) {
970      throw new InconsistentFSStateException(storage,
971          "Unexepcted blockpoolID " + bpid + " . Expected " + blockpoolID);
972    }
973    setBlockPoolID(bpid);
974  }
975  
976  public String getBlockPoolID() {
977    return blockpoolID;
978  }
979
980  /**
981   * Iterate over all current storage directories, inspecting them
982   * with the given inspector.
983   */
984  void inspectStorageDirs(FSImageStorageInspector inspector)
985      throws IOException {
986
987    // Process each of the storage directories to find the pair of
988    // newest image file and edit file
989    for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
990      StorageDirectory sd = it.next();
991      inspector.inspectDirectory(sd);
992    }
993  }
994
995  /**
996   * Iterate over all of the storage dirs, reading their contents to determine
997   * their layout versions. Returns an FSImageStorageInspector which has
998   * inspected each directory.
999   * 
1000   * <b>Note:</b> this can mutate the storage info fields (ctime, version, etc).
1001   * @throws IOException if no valid storage dirs are found or no valid layout version
1002   */
1003  FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes)
1004      throws IOException {
1005    Integer layoutVersion = null;
1006    boolean multipleLV = false;
1007    StringBuilder layoutVersions = new StringBuilder();
1008
1009    // First determine what range of layout versions we're going to inspect
1010    for (Iterator<StorageDirectory> it = dirIterator(false);
1011         it.hasNext();) {
1012      StorageDirectory sd = it.next();
1013      if (!sd.getVersionFile().exists()) {
1014        FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping...");
1015        continue;
1016      }
1017      readProperties(sd); // sets layoutVersion
1018      int lv = getLayoutVersion();
1019      if (layoutVersion == null) {
1020        layoutVersion = Integer.valueOf(lv);
1021      } else if (!layoutVersion.equals(lv)) {
1022        multipleLV = true;
1023      }
1024      layoutVersions.append("(").append(sd.getRoot()).append(", ").append(lv).append(") ");
1025    }
1026    
1027    if (layoutVersion == null) {
1028      throw new IOException("No storage directories contained VERSION information");
1029    }
1030    if (multipleLV) {            
1031      throw new IOException(
1032          "Storage directories contain multiple layout versions: "
1033              + layoutVersions);
1034    }
1035    // If the storage directories are with the new layout version
1036    // (ie edits_<txnid>) then use the new inspector, which will ignore
1037    // the old format dirs.
1038    FSImageStorageInspector inspector;
1039    if (NameNodeLayoutVersion.supports(
1040        LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
1041      inspector = new FSImageTransactionalStorageInspector(fileTypes);
1042    } else {
1043      inspector = new FSImagePreTransactionalStorageInspector();
1044    }
1045    
1046    inspectStorageDirs(inspector);
1047    return inspector;
1048  }
1049
1050  public NamespaceInfo getNamespaceInfo() {
1051    return new NamespaceInfo(
1052        getNamespaceID(),
1053        getClusterID(),
1054        getBlockPoolID(),
1055        getCTime());
1056  }
1057}