001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.common;
019
020import java.io.File;
021import java.io.FileOutputStream;
022import java.io.IOException;
023import java.io.RandomAccessFile;
024import java.lang.management.ManagementFactory;
025import java.nio.channels.FileLock;
026import java.nio.channels.OverlappingFileLockException;
027import java.util.ArrayList;
028import java.util.Iterator;
029import java.util.List;
030import java.util.Properties;
031
032import org.apache.commons.logging.Log;
033import org.apache.commons.logging.LogFactory;
034import org.apache.hadoop.classification.InterfaceAudience;
035import org.apache.hadoop.fs.FileUtil;
036import org.apache.hadoop.fs.Path;
037import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
038import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
039import org.apache.hadoop.util.ToolRunner;
040import org.apache.hadoop.util.VersionInfo;
041
042import com.google.common.base.Charsets;
043import com.google.common.base.Preconditions;
044
045
046
047/**
048 * Storage information file.
049 * <p>
050 * Local storage information is stored in a separate file VERSION.
051 * It contains type of the node, 
052 * the storage layout version, the namespace id, and 
053 * the fs state creation time.
054 * <p>
055 * Local storage can reside in multiple directories. 
056 * Each directory should contain the same VERSION file as the others.
057 * During startup Hadoop servers (name-node and data-nodes) read their local 
058 * storage information from them.
059 * <p>
060 * The servers hold a lock for each storage directory while they run so that 
061 * other nodes were not able to startup sharing the same storage.
062 * The locks are released when the servers stop (normally or abnormally).
063 * 
064 */
065@InterfaceAudience.Private
066public abstract class Storage extends StorageInfo {
067  public static final Log LOG = LogFactory.getLog(Storage.class.getName());
068
069  // last layout version that did not support upgrades
070  public static final int LAST_PRE_UPGRADE_LAYOUT_VERSION = -3;
071  
072  // this corresponds to Hadoop-0.18
073  public static final int LAST_UPGRADABLE_LAYOUT_VERSION = -16;
074  protected static final String LAST_UPGRADABLE_HADOOP_VERSION = "Hadoop-0.18";
075  
076  /** Layout versions of 0.20.203 release */
077  public static final int[] LAYOUT_VERSIONS_203 = {-19, -31};
078
079  public    static final String STORAGE_FILE_LOCK     = "in_use.lock";
080  public    static final String STORAGE_DIR_CURRENT   = "current";
081  public    static final String STORAGE_DIR_PREVIOUS  = "previous";
082  public    static final String STORAGE_TMP_REMOVED   = "removed.tmp";
083  public    static final String STORAGE_TMP_PREVIOUS  = "previous.tmp";
084  public    static final String STORAGE_TMP_FINALIZED = "finalized.tmp";
085  public    static final String STORAGE_TMP_LAST_CKPT = "lastcheckpoint.tmp";
086  public    static final String STORAGE_PREVIOUS_CKPT = "previous.checkpoint";
087  
088  /**
089   * The blocksBeingWritten directory which was used in some 1.x and earlier
090   * releases.
091   */
092  public static final String STORAGE_1_BBW = "blocksBeingWritten";
093  
094  public enum StorageState {
095    NON_EXISTENT,
096    NOT_FORMATTED,
097    COMPLETE_UPGRADE,
098    RECOVER_UPGRADE,
099    COMPLETE_FINALIZE,
100    COMPLETE_ROLLBACK,
101    RECOVER_ROLLBACK,
102    COMPLETE_CHECKPOINT,
103    RECOVER_CHECKPOINT,
104    NORMAL;
105  }
106  
107  /**
108   * An interface to denote storage directory type
109   * Implementations can define a type for storage directory by implementing
110   * this interface.
111   */
112  @InterfaceAudience.Private
113  public interface StorageDirType {
114    public StorageDirType getStorageDirType();
115    public boolean isOfType(StorageDirType type);
116  }
117  
118  protected List<StorageDirectory> storageDirs = new ArrayList<StorageDirectory>();
119  
120  private class DirIterator implements Iterator<StorageDirectory> {
121    final StorageDirType dirType;
122    final boolean includeShared;
123    int prevIndex; // for remove()
124    int nextIndex; // for next()
125    
126    DirIterator(StorageDirType dirType, boolean includeShared) {
127      this.dirType = dirType;
128      this.nextIndex = 0;
129      this.prevIndex = 0;
130      this.includeShared = includeShared;
131    }
132    
133    @Override
134    public boolean hasNext() {
135      if (storageDirs.isEmpty() || nextIndex >= storageDirs.size())
136        return false;
137      if (dirType != null || !includeShared) {
138        while (nextIndex < storageDirs.size()) {
139          if (shouldReturnNextDir())
140            break;
141          nextIndex++;
142        }
143        if (nextIndex >= storageDirs.size())
144         return false;
145      }
146      return true;
147    }
148    
149    @Override
150    public StorageDirectory next() {
151      StorageDirectory sd = getStorageDir(nextIndex);
152      prevIndex = nextIndex;
153      nextIndex++;
154      if (dirType != null || !includeShared) {
155        while (nextIndex < storageDirs.size()) {
156          if (shouldReturnNextDir())
157            break;
158          nextIndex++;
159        }
160      }
161      return sd;
162    }
163    
164    @Override
165    public void remove() {
166      nextIndex = prevIndex; // restore previous state
167      storageDirs.remove(prevIndex); // remove last returned element
168      hasNext(); // reset nextIndex to correct place
169    }
170    
171    private boolean shouldReturnNextDir() {
172      StorageDirectory sd = getStorageDir(nextIndex);
173      return (dirType == null || sd.getStorageDirType().isOfType(dirType)) &&
174          (includeShared || !sd.isShared());
175    }
176  }
177  
178  /**
179   * @return A list of the given File in every available storage directory,
180   * regardless of whether it might exist.
181   */
182  public List<File> getFiles(StorageDirType dirType, String fileName) {
183    ArrayList<File> list = new ArrayList<File>();
184    Iterator<StorageDirectory> it =
185      (dirType == null) ? dirIterator() : dirIterator(dirType);
186    for ( ;it.hasNext(); ) {
187      list.add(new File(it.next().getCurrentDir(), fileName));
188    }
189    return list;
190  }
191
192
193  /**
194   * Return default iterator
195   * This iterator returns all entries in storageDirs
196   */
197  public Iterator<StorageDirectory> dirIterator() {
198    return dirIterator(null);
199  }
200  
201  /**
202   * Return iterator based on Storage Directory Type
203   * This iterator selects entries in storageDirs of type dirType and returns
204   * them via the Iterator
205   */
206  public Iterator<StorageDirectory> dirIterator(StorageDirType dirType) {
207    return dirIterator(dirType, true);
208  }
209  
210  /**
211   * Return all entries in storageDirs, potentially excluding shared dirs.
212   * @param includeShared whether or not to include shared dirs.
213   * @return an iterator over the configured storage dirs.
214   */
215  public Iterator<StorageDirectory> dirIterator(boolean includeShared) {
216    return dirIterator(null, includeShared);
217  }
218  
219  /**
220   * @param dirType all entries will be of this type of dir
221   * @param includeShared true to include any shared directories,
222   *        false otherwise
223   * @return an iterator over the configured storage dirs.
224   */
225  public Iterator<StorageDirectory> dirIterator(StorageDirType dirType,
226      boolean includeShared) {
227    return new DirIterator(dirType, includeShared);
228  }
229  
230  public Iterable<StorageDirectory> dirIterable(final StorageDirType dirType) {
231    return new Iterable<StorageDirectory>() {
232      @Override
233      public Iterator<StorageDirectory> iterator() {
234        return dirIterator(dirType);
235      }
236    };
237  }
238  
239  
240  /**
241   * generate storage list (debug line)
242   */
243  public String listStorageDirectories() {
244    StringBuilder buf = new StringBuilder();
245    for (StorageDirectory sd : storageDirs) {
246      buf.append(sd.getRoot() + "(" + sd.getStorageDirType() + ");");
247    }
248    return buf.toString();
249  }
250  
251  /**
252   * One of the storage directories.
253   */
254  @InterfaceAudience.Private
255  public static class StorageDirectory implements FormatConfirmable {
256    final File root;              // root directory
257    // whether or not this dir is shared between two separate NNs for HA, or
258    // between multiple block pools in the case of federation.
259    final boolean isShared;
260    final StorageDirType dirType; // storage dir type
261    FileLock lock;                // storage lock
262
263    private String storageUuid = null;      // Storage directory identifier.
264    
265    public StorageDirectory(File dir) {
266      // default dirType is null
267      this(dir, null, false);
268    }
269    
270    public StorageDirectory(File dir, StorageDirType dirType) {
271      this(dir, dirType, false);
272    }
273    
274    public void setStorageUuid(String storageUuid) {
275      this.storageUuid = storageUuid;
276    }
277
278    public String getStorageUuid() {
279      return storageUuid;
280    }
281
282    /**
283     * Constructor
284     * @param dir directory corresponding to the storage
285     * @param dirType storage directory type
286     * @param isShared whether or not this dir is shared between two NNs. true
287     *          disables locking on the storage directory, false enables locking
288     */
289    public StorageDirectory(File dir, StorageDirType dirType, boolean isShared) {
290      this.root = dir;
291      this.lock = null;
292      this.dirType = dirType;
293      this.isShared = isShared;
294    }
295    
296    /**
297     * Get root directory of this storage
298     */
299    public File getRoot() {
300      return root;
301    }
302
303    /**
304     * Get storage directory type
305     */
306    public StorageDirType getStorageDirType() {
307      return dirType;
308    }    
309
310    public void read(File from, Storage storage) throws IOException {
311      Properties props = readPropertiesFile(from);
312      storage.setFieldsFromProperties(props, this);
313    }
314
315    /**
316     * Clear and re-create storage directory.
317     * <p>
318     * Removes contents of the current directory and creates an empty directory.
319     * 
320     * This does not fully format storage directory. 
321     * It cannot write the version file since it should be written last after  
322     * all other storage type dependent files are written.
323     * Derived storage is responsible for setting specific storage values and
324     * writing the version file to disk.
325     * 
326     * @throws IOException
327     */
328    public void clearDirectory() throws IOException {
329      File curDir = this.getCurrentDir();
330      if (curDir.exists())
331        if (!(FileUtil.fullyDelete(curDir)))
332          throw new IOException("Cannot remove current directory: " + curDir);
333      if (!curDir.mkdirs())
334        throw new IOException("Cannot create directory " + curDir);
335    }
336
337    /**
338     * Directory {@code current} contains latest files defining
339     * the file system meta-data.
340     * 
341     * @return the directory path
342     */
343    public File getCurrentDir() {
344      return new File(root, STORAGE_DIR_CURRENT);
345    }
346
347    /**
348     * File {@code VERSION} contains the following fields:
349     * <ol>
350     * <li>node type</li>
351     * <li>layout version</li>
352     * <li>namespaceID</li>
353     * <li>fs state creation time</li>
354     * <li>other fields specific for this node type</li>
355     * </ol>
356     * The version file is always written last during storage directory updates.
357     * The existence of the version file indicates that all other files have
358     * been successfully written in the storage directory, the storage is valid
359     * and does not need to be recovered.
360     * 
361     * @return the version file path
362     */
363    public File getVersionFile() {
364      return new File(new File(root, STORAGE_DIR_CURRENT), STORAGE_FILE_VERSION);
365    }
366
367    /**
368     * File {@code VERSION} from the {@code previous} directory.
369     * 
370     * @return the previous version file path
371     */
372    public File getPreviousVersionFile() {
373      return new File(new File(root, STORAGE_DIR_PREVIOUS), STORAGE_FILE_VERSION);
374    }
375
376    /**
377     * Directory {@code previous} contains the previous file system state,
378     * which the system can be rolled back to.
379     * 
380     * @return the directory path
381     */
382    public File getPreviousDir() {
383      return new File(root, STORAGE_DIR_PREVIOUS);
384    }
385
386    /**
387     * {@code previous.tmp} is a transient directory, which holds
388     * current file system state while the new state is saved into the new
389     * {@code current} during upgrade.
390     * If the saving succeeds {@code previous.tmp} will be moved to
391     * {@code previous}, otherwise it will be renamed back to 
392     * {@code current} by the recovery procedure during startup.
393     * 
394     * @return the directory path
395     */
396    public File getPreviousTmp() {
397      return new File(root, STORAGE_TMP_PREVIOUS);
398    }
399
400    /**
401     * {@code removed.tmp} is a transient directory, which holds
402     * current file system state while the previous state is moved into
403     * {@code current} during rollback.
404     * If the moving succeeds {@code removed.tmp} will be removed,
405     * otherwise it will be renamed back to 
406     * {@code current} by the recovery procedure during startup.
407     * 
408     * @return the directory path
409     */
410    public File getRemovedTmp() {
411      return new File(root, STORAGE_TMP_REMOVED);
412    }
413
414    /**
415     * {@code finalized.tmp} is a transient directory, which holds
416     * the {@code previous} file system state while it is being removed
417     * in response to the finalize request.
418     * Finalize operation will remove {@code finalized.tmp} when completed,
419     * otherwise the removal will resume upon the system startup.
420     * 
421     * @return the directory path
422     */
423    public File getFinalizedTmp() {
424      return new File(root, STORAGE_TMP_FINALIZED);
425    }
426
427    /**
428     * {@code lastcheckpoint.tmp} is a transient directory, which holds
429     * current file system state while the new state is saved into the new
430     * {@code current} during regular namespace updates.
431     * If the saving succeeds {@code lastcheckpoint.tmp} will be moved to
432     * {@code previous.checkpoint}, otherwise it will be renamed back to 
433     * {@code current} by the recovery procedure during startup.
434     * 
435     * @return the directory path
436     */
437    public File getLastCheckpointTmp() {
438      return new File(root, STORAGE_TMP_LAST_CKPT);
439    }
440
441    /**
442     * {@code previous.checkpoint} is a directory, which holds the previous
443     * (before the last save) state of the storage directory.
444     * The directory is created as a reference only, it does not play role
445     * in state recovery procedures, and is recycled automatically, 
446     * but it may be useful for manual recovery of a stale state of the system.
447     * 
448     * @return the directory path
449     */
450    public File getPreviousCheckpoint() {
451      return new File(root, STORAGE_PREVIOUS_CKPT);
452    }
453
454    /**
455     * Check consistency of the storage directory
456     * 
457     * @param startOpt a startup option.
458     *  
459     * @return state {@link StorageState} of the storage directory 
460     * @throws InconsistentFSStateException if directory state is not 
461     * consistent and cannot be recovered.
462     * @throws IOException
463     */
464    public StorageState analyzeStorage(StartupOption startOpt, Storage storage)
465        throws IOException {
466      assert root != null : "root is null";
467      String rootPath = root.getCanonicalPath();
468      try { // check that storage exists
469        if (!root.exists()) {
470          // storage directory does not exist
471          if (startOpt != StartupOption.FORMAT) {
472            LOG.warn("Storage directory " + rootPath + " does not exist");
473            return StorageState.NON_EXISTENT;
474          }
475          LOG.info(rootPath + " does not exist. Creating ...");
476          if (!root.mkdirs())
477            throw new IOException("Cannot create directory " + rootPath);
478        }
479        // or is inaccessible
480        if (!root.isDirectory()) {
481          LOG.warn(rootPath + "is not a directory");
482          return StorageState.NON_EXISTENT;
483        }
484        if (!FileUtil.canWrite(root)) {
485          LOG.warn("Cannot access storage directory " + rootPath);
486          return StorageState.NON_EXISTENT;
487        }
488      } catch(SecurityException ex) {
489        LOG.warn("Cannot access storage directory " + rootPath, ex);
490        return StorageState.NON_EXISTENT;
491      }
492
493      this.lock(); // lock storage if it exists
494
495      if (startOpt == HdfsServerConstants.StartupOption.FORMAT)
496        return StorageState.NOT_FORMATTED;
497
498      if (startOpt != HdfsServerConstants.StartupOption.IMPORT) {
499        storage.checkOldLayoutStorage(this);
500      }
501
502      // check whether current directory is valid
503      File versionFile = getVersionFile();
504      boolean hasCurrent = versionFile.exists();
505
506      // check which directories exist
507      boolean hasPrevious = getPreviousDir().exists();
508      boolean hasPreviousTmp = getPreviousTmp().exists();
509      boolean hasRemovedTmp = getRemovedTmp().exists();
510      boolean hasFinalizedTmp = getFinalizedTmp().exists();
511      boolean hasCheckpointTmp = getLastCheckpointTmp().exists();
512
513      if (!(hasPreviousTmp || hasRemovedTmp
514          || hasFinalizedTmp || hasCheckpointTmp)) {
515        // no temp dirs - no recovery
516        if (hasCurrent)
517          return StorageState.NORMAL;
518        if (hasPrevious)
519          throw new InconsistentFSStateException(root,
520                              "version file in current directory is missing.");
521        return StorageState.NOT_FORMATTED;
522      }
523
524      if ((hasPreviousTmp?1:0) + (hasRemovedTmp?1:0)
525          + (hasFinalizedTmp?1:0) + (hasCheckpointTmp?1:0) > 1)
526        // more than one temp dirs
527        throw new InconsistentFSStateException(root,
528                                               "too many temporary directories.");
529
530      // # of temp dirs == 1 should either recover or complete a transition
531      if (hasCheckpointTmp) {
532        return hasCurrent ? StorageState.COMPLETE_CHECKPOINT
533                          : StorageState.RECOVER_CHECKPOINT;
534      }
535
536      if (hasFinalizedTmp) {
537        if (hasPrevious)
538          throw new InconsistentFSStateException(root,
539                                                 STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_FINALIZED
540                                                 + "cannot exist together.");
541        return StorageState.COMPLETE_FINALIZE;
542      }
543
544      if (hasPreviousTmp) {
545        if (hasPrevious)
546          throw new InconsistentFSStateException(root,
547                                                 STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_PREVIOUS
548                                                 + " cannot exist together.");
549        if (hasCurrent)
550          return StorageState.COMPLETE_UPGRADE;
551        return StorageState.RECOVER_UPGRADE;
552      }
553      
554      assert hasRemovedTmp : "hasRemovedTmp must be true";
555      if (!(hasCurrent ^ hasPrevious))
556        throw new InconsistentFSStateException(root,
557                                               "one and only one directory " + STORAGE_DIR_CURRENT 
558                                               + " or " + STORAGE_DIR_PREVIOUS 
559                                               + " must be present when " + STORAGE_TMP_REMOVED
560                                               + " exists.");
561      if (hasCurrent)
562        return StorageState.COMPLETE_ROLLBACK;
563      return StorageState.RECOVER_ROLLBACK;
564    }
565
566    /**
567     * Complete or recover storage state from previously failed transition.
568     * 
569     * @param curState specifies what/how the state should be recovered
570     * @throws IOException
571     */
572    public void doRecover(StorageState curState) throws IOException {
573      File curDir = getCurrentDir();
574      String rootPath = root.getCanonicalPath();
575      switch(curState) {
576      case COMPLETE_UPGRADE:  // mv previous.tmp -> previous
577        LOG.info("Completing previous upgrade for storage directory " 
578                 + rootPath);
579        rename(getPreviousTmp(), getPreviousDir());
580        return;
581      case RECOVER_UPGRADE:   // mv previous.tmp -> current
582        LOG.info("Recovering storage directory " + rootPath
583                 + " from previous upgrade");
584        if (curDir.exists())
585          deleteDir(curDir);
586        rename(getPreviousTmp(), curDir);
587        return;
588      case COMPLETE_ROLLBACK: // rm removed.tmp
589        LOG.info("Completing previous rollback for storage directory "
590                 + rootPath);
591        deleteDir(getRemovedTmp());
592        return;
593      case RECOVER_ROLLBACK:  // mv removed.tmp -> current
594        LOG.info("Recovering storage directory " + rootPath
595                 + " from previous rollback");
596        rename(getRemovedTmp(), curDir);
597        return;
598      case COMPLETE_FINALIZE: // rm finalized.tmp
599        LOG.info("Completing previous finalize for storage directory "
600                 + rootPath);
601        deleteDir(getFinalizedTmp());
602        return;
603      case COMPLETE_CHECKPOINT: // mv lastcheckpoint.tmp -> previous.checkpoint
604        LOG.info("Completing previous checkpoint for storage directory " 
605                 + rootPath);
606        File prevCkptDir = getPreviousCheckpoint();
607        if (prevCkptDir.exists())
608          deleteDir(prevCkptDir);
609        rename(getLastCheckpointTmp(), prevCkptDir);
610        return;
611      case RECOVER_CHECKPOINT:  // mv lastcheckpoint.tmp -> current
612        LOG.info("Recovering storage directory " + rootPath
613                 + " from failed checkpoint");
614        if (curDir.exists())
615          deleteDir(curDir);
616        rename(getLastCheckpointTmp(), curDir);
617        return;
618      default:
619        throw new IOException("Unexpected FS state: " + curState);
620      }
621    }
622    
623    /**
624     * @return true if the storage directory should prompt the user prior
625     * to formatting (i.e if the directory appears to contain some data)
626     * @throws IOException if the SD cannot be accessed due to an IO error
627     */
628    @Override
629    public boolean hasSomeData() throws IOException {
630      // Its alright for a dir not to exist, or to exist (properly accessible)
631      // and be completely empty.
632      if (!root.exists()) return false;
633      
634      if (!root.isDirectory()) {
635        // a file where you expect a directory should not cause silent
636        // formatting
637        return true;
638      }
639      
640      if (FileUtil.listFiles(root).length == 0) {
641        // Empty dir can format without prompt.
642        return false;
643      }
644      
645      return true;
646    }
647    
648    public boolean isShared() {
649      return isShared;
650    }
651
652
653    /**
654     * Lock storage to provide exclusive access.
655     * 
656     * <p> Locking is not supported by all file systems.
657     * E.g., NFS does not consistently support exclusive locks.
658     * 
659     * <p> If locking is supported we guarantee exclusive access to the
660     * storage directory. Otherwise, no guarantee is given.
661     * 
662     * @throws IOException if locking fails
663     */
664    public void lock() throws IOException {
665      if (isShared()) {
666        LOG.info("Locking is disabled");
667        return;
668      }
669      FileLock newLock = tryLock();
670      if (newLock == null) {
671        String msg = "Cannot lock storage " + this.root 
672          + ". The directory is already locked";
673        LOG.info(msg);
674        throw new IOException(msg);
675      }
676      // Don't overwrite lock until success - this way if we accidentally
677      // call lock twice, the internal state won't be cleared by the second
678      // (failed) lock attempt
679      lock = newLock;
680    }
681
682    /**
683     * Attempts to acquire an exclusive lock on the storage.
684     * 
685     * @return A lock object representing the newly-acquired lock or
686     * <code>null</code> if storage is already locked.
687     * @throws IOException if locking fails.
688     */
689    FileLock tryLock() throws IOException {
690      boolean deletionHookAdded = false;
691      File lockF = new File(root, STORAGE_FILE_LOCK);
692      if (!lockF.exists()) {
693        lockF.deleteOnExit();
694        deletionHookAdded = true;
695      }
696      RandomAccessFile file = new RandomAccessFile(lockF, "rws");
697      String jvmName = ManagementFactory.getRuntimeMXBean().getName();
698      FileLock res = null;
699      try {
700        res = file.getChannel().tryLock();
701        file.write(jvmName.getBytes(Charsets.UTF_8));
702        LOG.info("Lock on " + lockF + " acquired by nodename " + jvmName);
703      } catch(OverlappingFileLockException oe) {
704        // Cannot read from the locked file on Windows.
705        String lockingJvmName = Path.WINDOWS ? "" : (" " + file.readLine());
706        LOG.error("It appears that another namenode" + lockingJvmName
707            + " has already locked the storage directory");
708        file.close();
709        return null;
710      } catch(IOException e) {
711        LOG.error("Failed to acquire lock on " + lockF + ". If this storage directory is mounted via NFS, " 
712            + "ensure that the appropriate nfs lock services are running.", e);
713        file.close();
714        throw e;
715      }
716      if (res != null && !deletionHookAdded) {
717        // If the file existed prior to our startup, we didn't
718        // call deleteOnExit above. But since we successfully locked
719        // the dir, we can take care of cleaning it up.
720        lockF.deleteOnExit();
721      }
722      return res;
723    }
724
725    /**
726     * Unlock storage.
727     * 
728     * @throws IOException
729     */
730    public void unlock() throws IOException {
731      if (this.lock == null)
732        return;
733      this.lock.release();
734      lock.channel().close();
735      lock = null;
736    }
737    
738    @Override
739    public String toString() {
740      return "Storage Directory " + this.root;
741    }
742
743    /**
744     * Check whether underlying file system supports file locking.
745     * 
746     * @return <code>true</code> if exclusive locks are supported or
747     *         <code>false</code> otherwise.
748     * @throws IOException
749     * @see StorageDirectory#lock()
750     */
751    public boolean isLockSupported() throws IOException {
752      FileLock firstLock = null;
753      FileLock secondLock = null;
754      try {
755        firstLock = lock;
756        if(firstLock == null) {
757          firstLock = tryLock();
758          if(firstLock == null)
759            return true;
760        }
761        secondLock = tryLock();
762        if(secondLock == null)
763          return true;
764      } finally {
765        if(firstLock != null && firstLock != lock) {
766          firstLock.release();
767          firstLock.channel().close();
768        }
769        if(secondLock != null) {
770          secondLock.release();
771          secondLock.channel().close();
772        }
773      }
774      return false;
775    }
776  }
777
778  /**
779   * Create empty storage info of the specified type
780   */
781  protected Storage(NodeType type) {
782    super(type);
783  }
784  
785  protected Storage(StorageInfo storageInfo) {
786    super(storageInfo);
787  }
788  
789  public int getNumStorageDirs() {
790    return storageDirs.size();
791  }
792  
793  public StorageDirectory getStorageDir(int idx) {
794    return storageDirs.get(idx);
795  }
796  
797  /**
798   * @return the storage directory, with the precondition that this storage
799   * has exactly one storage directory
800   */
801  public StorageDirectory getSingularStorageDir() {
802    Preconditions.checkState(storageDirs.size() == 1);
803    return storageDirs.get(0);
804  }
805  
806  protected void addStorageDir(StorageDirectory sd) {
807    storageDirs.add(sd);
808  }
809
810  /**
811   * Return true if the layout of the given storage directory is from a version
812   * of Hadoop prior to the introduction of the "current" and "previous"
813   * directories which allow upgrade and rollback.
814   */
815  public abstract boolean isPreUpgradableLayout(StorageDirectory sd)
816  throws IOException;
817
818  /**
819   * Check if the given storage directory comes from a version of Hadoop
820   * prior to when the directory layout changed (ie 0.13). If this is
821   * the case, this method throws an IOException.
822   */
823  private void checkOldLayoutStorage(StorageDirectory sd) throws IOException {
824    if (isPreUpgradableLayout(sd)) {
825      checkVersionUpgradable(0);
826    }
827  }
828
829  /**
830   * Checks if the upgrade from the given old version is supported. If
831   * no upgrade is supported, it throws IncorrectVersionException.
832   * 
833   * @param oldVersion
834   */
835  public static void checkVersionUpgradable(int oldVersion) 
836                                     throws IOException {
837    if (oldVersion > LAST_UPGRADABLE_LAYOUT_VERSION) {
838      String msg = "*********** Upgrade is not supported from this " +
839                   " older version " + oldVersion + 
840                   " of storage to the current version." + 
841                   " Please upgrade to " + LAST_UPGRADABLE_HADOOP_VERSION +
842                   " or a later version and then upgrade to current" +
843                   " version. Old layout version is " + 
844                   (oldVersion == 0 ? "'too old'" : (""+oldVersion)) +
845                   " and latest layout version this software version can" +
846                   " upgrade from is " + LAST_UPGRADABLE_LAYOUT_VERSION +
847                   ". ************";
848      LOG.error(msg);
849      throw new IOException(msg); 
850    }
851    
852  }
853  
854  /**
855   * Iterate over each of the {@link FormatConfirmable} objects,
856   * potentially checking with the user whether it should be formatted.
857   * 
858   * If running in interactive mode, will prompt the user for each
859   * directory to allow them to format anyway. Otherwise, returns
860   * false, unless 'force' is specified.
861   * 
862   * @param force format regardless of whether dirs exist
863   * @param interactive prompt the user when a dir exists
864   * @return true if formatting should proceed
865   * @throws IOException if some storage cannot be accessed
866   */
867  public static boolean confirmFormat(
868      Iterable<? extends FormatConfirmable> items,
869      boolean force, boolean interactive) throws IOException {
870    for (FormatConfirmable item : items) {
871      if (!item.hasSomeData())
872        continue;
873      if (force) { // Don't confirm, always format.
874        System.err.println(
875            "Data exists in " + item + ". Formatting anyway.");
876        continue;
877      }
878      if (!interactive) { // Don't ask - always don't format
879        System.err.println(
880            "Running in non-interactive mode, and data appears to exist in " +
881            item + ". Not formatting.");
882        return false;
883      }
884      if (!ToolRunner.confirmPrompt("Re-format filesystem in " + item + " ?")) {
885        System.err.println("Format aborted in " + item);
886        return false;
887      }
888    }
889    
890    return true;
891  }
892  
893  /**
894   * Interface for classes which need to have the user confirm their
895   * formatting during NameNode -format and other similar operations.
896   * 
897   * This is currently a storage directory or journal manager.
898   */
899  @InterfaceAudience.Private
900  public interface FormatConfirmable {
901    /**
902     * @return true if the storage seems to have some valid data in it,
903     * and the user should be required to confirm the format. Otherwise,
904     * false.
905     * @throws IOException if the storage cannot be accessed at all.
906     */
907    public boolean hasSomeData() throws IOException;
908    
909    /**
910     * @return a string representation of the formattable item, suitable
911     * for display to the user inside a prompt
912     */
913    public String toString();
914  }
915  
916  /**
917   * Set common storage fields into the given properties object.
918   * Should be overloaded if additional fields need to be set.
919   * 
920   * @param props the Properties object to write into
921   */
922  protected void setPropertiesFromFields(Properties props, 
923                                         StorageDirectory sd)
924      throws IOException {
925    props.setProperty("layoutVersion", String.valueOf(layoutVersion));
926    props.setProperty("storageType", storageType.toString());
927    props.setProperty("namespaceID", String.valueOf(namespaceID));
928    // Set clusterID in version with federation support
929    if (versionSupportsFederation(getServiceLayoutFeatureMap())) {
930      props.setProperty("clusterID", clusterID);
931    }
932    props.setProperty("cTime", String.valueOf(cTime));
933  }
934
935  /**
936   * Write properties to the VERSION file in the given storage directory.
937   */
938  public void writeProperties(StorageDirectory sd) throws IOException {
939    writeProperties(sd.getVersionFile(), sd);
940  }
941  
942  public void writeProperties(File to, StorageDirectory sd) throws IOException {
943    Properties props = new Properties();
944    setPropertiesFromFields(props, sd);
945    writeProperties(to, sd, props);
946  }
947
948  public static void writeProperties(File to, StorageDirectory sd,
949      Properties props) throws IOException {
950    RandomAccessFile file = new RandomAccessFile(to, "rws");
951    FileOutputStream out = null;
952    try {
953      file.seek(0);
954      out = new FileOutputStream(file.getFD());
955      /*
956       * If server is interrupted before this line, 
957       * the version file will remain unchanged.
958       */
959      props.store(out, null);
960      /*
961       * Now the new fields are flushed to the head of the file, but file 
962       * length can still be larger then required and therefore the file can 
963       * contain whole or corrupted fields from its old contents in the end.
964       * If server is interrupted here and restarted later these extra fields
965       * either should not effect server behavior or should be handled
966       * by the server correctly.
967       */
968      file.setLength(out.getChannel().position());
969    } finally {
970      if (out != null) {
971        out.close();
972      }
973      file.close();
974    }
975  }
976
977  public static void rename(File from, File to) throws IOException {
978    if (!from.renameTo(to))
979      throw new IOException("Failed to rename " 
980                            + from.getCanonicalPath() + " to " + to.getCanonicalPath());
981  }
982
983  /**
984   * Recursively delete all the content of the directory first and then 
985   * the directory itself from the local filesystem.
986   * @param dir The directory to delete
987   * @throws IOException
988   */
989  public static void deleteDir(File dir) throws IOException {
990    if (!FileUtil.fullyDelete(dir))
991      throw new IOException("Failed to delete " + dir.getCanonicalPath());
992  }
993  
994  /**
995   * Write all data storage files.
996   * @throws IOException
997   */
998  public void writeAll() throws IOException {
999    this.layoutVersion = getServiceLayoutVersion();
1000    for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) {
1001      writeProperties(it.next());
1002    }
1003  }
1004
1005  /**
1006   * Unlock all storage directories.
1007   * @throws IOException
1008   */
1009  public void unlockAll() throws IOException {
1010    for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) {
1011      it.next().unlock();
1012    }
1013  }
1014
1015  public static String getBuildVersion() {
1016    return VersionInfo.getRevision();
1017  }
1018
1019  public static String getRegistrationID(StorageInfo storage) {
1020    return "NS-" + Integer.toString(storage.getNamespaceID())
1021      + "-" + storage.getClusterID()
1022      + "-" + Long.toString(storage.getCTime());
1023  }
1024  
1025  public static boolean is203LayoutVersion(int layoutVersion) {
1026    for (int lv203 : LAYOUT_VERSIONS_203) {
1027      if (lv203 == layoutVersion) {
1028        return true;
1029      }
1030    }
1031    return false;
1032  }
1033}