001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import com.google.common.annotations.VisibleForTesting;
021import com.google.common.base.Joiner;
022import com.google.common.base.Preconditions;
023import com.google.common.collect.Lists;
024
025import org.apache.commons.logging.Log;
026import org.apache.commons.logging.LogFactory;
027import org.apache.hadoop.HadoopIllegalArgumentException;
028import org.apache.hadoop.classification.InterfaceAudience;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FileSystem;
031import org.apache.hadoop.fs.Trash;
032import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
033import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
034import org.apache.hadoop.ha.HAServiceStatus;
035import org.apache.hadoop.ha.HealthCheckFailedException;
036import org.apache.hadoop.ha.ServiceFailedException;
037import org.apache.hadoop.hdfs.DFSConfigKeys;
038import org.apache.hadoop.hdfs.DFSUtil;
039import org.apache.hadoop.hdfs.HAUtil;
040import org.apache.hadoop.hdfs.HdfsConfiguration;
041import org.apache.hadoop.hdfs.protocol.ClientProtocol;
042import org.apache.hadoop.hdfs.protocol.HdfsConstants;
043import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
044import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
045import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
046import org.apache.hadoop.hdfs.server.namenode.ha.*;
047import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
048import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
049import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics;
050import org.apache.hadoop.hdfs.server.protocol.*;
051import org.apache.hadoop.ipc.Server;
052import org.apache.hadoop.ipc.StandbyException;
053import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
054import org.apache.hadoop.metrics2.util.MBeans;
055import org.apache.hadoop.net.NetUtils;
056import org.apache.hadoop.security.AccessControlException;
057import org.apache.hadoop.security.RefreshUserMappingsProtocol;
058import org.apache.hadoop.security.SecurityUtil;
059import org.apache.hadoop.security.UserGroupInformation;
060import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
061import org.apache.hadoop.ipc.RefreshCallQueueProtocol;
062import org.apache.hadoop.tools.GetUserMappingsProtocol;
063import org.apache.hadoop.util.ExitUtil.ExitException;
064import org.apache.hadoop.util.JvmPauseMonitor;
065import org.apache.hadoop.util.ServicePlugin;
066import org.apache.hadoop.util.StringUtils;
067
068import javax.management.ObjectName;
069
070import java.io.IOException;
071import java.io.PrintStream;
072import java.net.InetSocketAddress;
073import java.net.URI;
074import java.security.PrivilegedExceptionAction;
075import java.util.ArrayList;
076import java.util.Arrays;
077import java.util.Collection;
078import java.util.List;
079
080import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
081import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
082import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
083import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
084import static org.apache.hadoop.util.ExitUtil.terminate;
085import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
086
087/**********************************************************
088 * NameNode serves as both directory namespace manager and
089 * "inode table" for the Hadoop DFS.  There is a single NameNode
090 * running in any DFS deployment.  (Well, except when there
091 * is a second backup/failover NameNode, or when using federated NameNodes.)
092 *
093 * The NameNode controls two critical tables:
094 *   1)  filename->blocksequence (namespace)
095 *   2)  block->machinelist ("inodes")
096 *
097 * The first table is stored on disk and is very precious.
098 * The second table is rebuilt every time the NameNode comes up.
099 *
100 * 'NameNode' refers to both this class as well as the 'NameNode server'.
101 * The 'FSNamesystem' class actually performs most of the filesystem
102 * management.  The majority of the 'NameNode' class itself is concerned
103 * with exposing the IPC interface and the HTTP server to the outside world,
104 * plus some configuration management.
105 *
106 * NameNode implements the
107 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which
108 * allows clients to ask for DFS services.
109 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for
110 * direct use by authors of DFS client code.  End-users should instead use the
111 * {@link org.apache.hadoop.fs.FileSystem} class.
112 *
113 * NameNode also implements the
114 * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface,
115 * used by DataNodes that actually store DFS data blocks.  These
116 * methods are invoked repeatedly and automatically by all the
117 * DataNodes in a DFS deployment.
118 *
119 * NameNode also implements the
120 * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface,
121 * used by secondary namenodes or rebalancing processes to get partial
122 * NameNode state, for example partial blocksMap etc.
123 **********************************************************/
124@InterfaceAudience.Private
125public class NameNode implements NameNodeStatusMXBean {
126  static{
127    HdfsConfiguration.init();
128  }
129
130  /**
131   * Categories of operations supported by the namenode.
132   */
133  public static enum OperationCategory {
134    /** Operations that are state agnostic */
135    UNCHECKED,
136    /** Read operation that does not change the namespace state */
137    READ,
138    /** Write operation that changes the namespace state */
139    WRITE,
140    /** Operations related to checkpointing */
141    CHECKPOINT,
142    /** Operations related to {@link JournalProtocol} */
143    JOURNAL
144  }
145  
146  /**
147   * HDFS configuration can have three types of parameters:
148   * <ol>
149   * <li>Parameters that are common for all the name services in the cluster.</li>
150   * <li>Parameters that are specific to a name service. These keys are suffixed
151   * with nameserviceId in the configuration. For example,
152   * "dfs.namenode.rpc-address.nameservice1".</li>
153   * <li>Parameters that are specific to a single name node. These keys are suffixed
154   * with nameserviceId and namenodeId in the configuration. for example,
155   * "dfs.namenode.rpc-address.nameservice1.namenode1"</li>
156   * </ol>
157   * 
158   * In the latter cases, operators may specify the configuration without
159   * any suffix, with a nameservice suffix, or with a nameservice and namenode
160   * suffix. The more specific suffix will take precedence.
161   * 
162   * These keys are specific to a given namenode, and thus may be configured
163   * globally, for a nameservice, or for a specific namenode within a nameservice.
164   */
165  public static final String[] NAMENODE_SPECIFIC_KEYS = {
166    DFS_NAMENODE_RPC_ADDRESS_KEY,
167    DFS_NAMENODE_RPC_BIND_HOST_KEY,
168    DFS_NAMENODE_NAME_DIR_KEY,
169    DFS_NAMENODE_EDITS_DIR_KEY,
170    DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
171    DFS_NAMENODE_CHECKPOINT_DIR_KEY,
172    DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY,
173    DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
174    DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY,
175    DFS_NAMENODE_HTTP_ADDRESS_KEY,
176    DFS_NAMENODE_HTTPS_ADDRESS_KEY,
177    DFS_NAMENODE_KEYTAB_FILE_KEY,
178    DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY,
179    DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY,
180    DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY,
181    DFS_NAMENODE_BACKUP_ADDRESS_KEY,
182    DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,
183    DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY,
184    DFS_NAMENODE_USER_NAME_KEY,
185    DFS_NAMENODE_INTERNAL_SPNEGO_USER_NAME_KEY,
186    DFS_HA_FENCE_METHODS_KEY,
187    DFS_HA_ZKFC_PORT_KEY,
188    DFS_HA_FENCE_METHODS_KEY
189  };
190  
191  /**
192   * @see #NAMENODE_SPECIFIC_KEYS
193   * These keys are specific to a nameservice, but may not be overridden
194   * for a specific namenode.
195   */
196  public static final String[] NAMESERVICE_SPECIFIC_KEYS = {
197    DFS_HA_AUTO_FAILOVER_ENABLED_KEY
198  };
199  
200  private static final String USAGE = "Usage: java NameNode ["
201      + StartupOption.BACKUP.getName() + "] | ["
202      + StartupOption.CHECKPOINT.getName() + "] | ["
203      + StartupOption.FORMAT.getName() + " ["
204      + StartupOption.CLUSTERID.getName() + " cid ] ["
205      + StartupOption.FORCE.getName() + "] ["
206      + StartupOption.NONINTERACTIVE.getName() + "] ] | ["
207      + StartupOption.UPGRADE.getName() + 
208        " [" + StartupOption.CLUSTERID.getName() + " cid]" +
209        " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | ["
210      + StartupOption.ROLLBACK.getName() + "] | ["
211      + StartupOption.ROLLINGUPGRADE.getName() + " <"
212      + RollingUpgradeStartupOption.DOWNGRADE.name().toLowerCase() + "|"
213      + RollingUpgradeStartupOption.ROLLBACK.name().toLowerCase() + "> ] | ["
214      + StartupOption.FINALIZE.getName() + "] | ["
215      + StartupOption.IMPORT.getName() + "] | ["
216      + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | ["
217      + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | ["
218      + StartupOption.RECOVER.getName() + " [ " + StartupOption.FORCE.getName()
219      + " ] ]";
220  
221  public long getProtocolVersion(String protocol, 
222                                 long clientVersion) throws IOException {
223    if (protocol.equals(ClientProtocol.class.getName())) {
224      return ClientProtocol.versionID; 
225    } else if (protocol.equals(DatanodeProtocol.class.getName())){
226      return DatanodeProtocol.versionID;
227    } else if (protocol.equals(NamenodeProtocol.class.getName())){
228      return NamenodeProtocol.versionID;
229    } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){
230      return RefreshAuthorizationPolicyProtocol.versionID;
231    } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){
232      return RefreshUserMappingsProtocol.versionID;
233    } else if (protocol.equals(RefreshCallQueueProtocol.class.getName())) {
234      return RefreshCallQueueProtocol.versionID;
235    } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){
236      return GetUserMappingsProtocol.versionID;
237    } else {
238      throw new IOException("Unknown protocol to name node: " + protocol);
239    }
240  }
241    
242  public static final int DEFAULT_PORT = 8020;
243  public static final Log LOG = LogFactory.getLog(NameNode.class.getName());
244  public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange");
245  public static final Log blockStateChangeLog = LogFactory.getLog("BlockStateChange");
246  public static final HAState ACTIVE_STATE = new ActiveState();
247  public static final HAState STANDBY_STATE = new StandbyState();
248  
249  protected FSNamesystem namesystem; 
250  protected final Configuration conf;
251  protected final NamenodeRole role;
252  private volatile HAState state;
253  private final boolean haEnabled;
254  private final HAContext haContext;
255  protected final boolean allowStaleStandbyReads;
256
257  
258  /** httpServer */
259  protected NameNodeHttpServer httpServer;
260  private Thread emptier;
261  /** only used for testing purposes  */
262  protected boolean stopRequested = false;
263  /** Registration information of this name-node  */
264  protected NamenodeRegistration nodeRegistration;
265  /** Activated plug-ins. */
266  private List<ServicePlugin> plugins;
267  
268  private NameNodeRpcServer rpcServer;
269
270  private JvmPauseMonitor pauseMonitor;
271  private ObjectName nameNodeStatusBeanName;
272  /**
273   * The namenode address that clients will use to access this namenode
274   * or the name service. For HA configurations using logical URI, it
275   * will be the logical address.
276   */
277  private String clientNamenodeAddress;
278  
279  /** Format a new filesystem.  Destroys any filesystem that may already
280   * exist at this location.  **/
281  public static void format(Configuration conf) throws IOException {
282    format(conf, true, true);
283  }
284
285  static NameNodeMetrics metrics;
286  private static final StartupProgress startupProgress = new StartupProgress();
287  /** Return the {@link FSNamesystem} object.
288   * @return {@link FSNamesystem} object.
289   */
290  public FSNamesystem getNamesystem() {
291    return namesystem;
292  }
293
294  public NamenodeProtocols getRpcServer() {
295    return rpcServer;
296  }
297  
298  static void initMetrics(Configuration conf, NamenodeRole role) {
299    metrics = NameNodeMetrics.create(conf, role);
300  }
301
302  public static NameNodeMetrics getNameNodeMetrics() {
303    return metrics;
304  }
305
306  /**
307   * Returns object used for reporting namenode startup progress.
308   * 
309   * @return StartupProgress for reporting namenode startup progress
310   */
311  public static StartupProgress getStartupProgress() {
312    return startupProgress;
313  }
314
315  /**
316   * Return the service name of the issued delegation token.
317   *
318   * @return The name service id in HA-mode, or the rpc address in non-HA mode
319   */
320  public String getTokenServiceName() {
321    return getClientNamenodeAddress();
322  }
323
324  /**
325   * Set the namenode address that will be used by clients to access this
326   * namenode or name service. This needs to be called before the config
327   * is overriden.
328   */
329  public void setClientNamenodeAddress(Configuration conf) {
330    String nnAddr = conf.get(FS_DEFAULT_NAME_KEY);
331    if (nnAddr == null) {
332      // default fs is not set.
333      clientNamenodeAddress = null;
334      return;
335    }
336
337    LOG.info(FS_DEFAULT_NAME_KEY + " is " + nnAddr);
338    URI nnUri = URI.create(nnAddr);
339
340    String nnHost = nnUri.getHost();
341    if (nnHost == null) {
342      clientNamenodeAddress = null;
343      return;
344    }
345
346    if (DFSUtil.getNameServiceIds(conf).contains(nnHost)) {
347      // host name is logical
348      clientNamenodeAddress = nnHost;
349    } else if (nnUri.getPort() > 0) {
350      // physical address with a valid port
351      clientNamenodeAddress = nnUri.getAuthority();
352    } else {
353      // the port is missing or 0. Figure out real bind address later.
354      clientNamenodeAddress = null;
355      return;
356    }
357    LOG.info("Clients are to use " + clientNamenodeAddress + " to access"
358        + " this namenode/service.");
359  }
360
361  /**
362   * Get the namenode address to be used by clients.
363   * @return nn address
364   */
365  public String getClientNamenodeAddress() {
366    return clientNamenodeAddress;
367  }
368
369  public static InetSocketAddress getAddress(String address) {
370    return NetUtils.createSocketAddr(address, DEFAULT_PORT);
371  }
372  
373  /**
374   * Set the configuration property for the service rpc address
375   * to address
376   */
377  public static void setServiceAddress(Configuration conf,
378                                           String address) {
379    LOG.info("Setting ADDRESS " + address);
380    conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address);
381  }
382  
383  /**
384   * Fetches the address for services to use when connecting to namenode
385   * based on the value of fallback returns null if the special
386   * address is not specified or returns the default namenode address
387   * to be used by both clients and services.
388   * Services here are datanodes, backup node, any non client connection
389   */
390  public static InetSocketAddress getServiceAddress(Configuration conf,
391                                                        boolean fallback) {
392    String addr = conf.get(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY);
393    if (addr == null || addr.isEmpty()) {
394      return fallback ? getAddress(conf) : null;
395    }
396    return getAddress(addr);
397  }
398
399  public static InetSocketAddress getAddress(Configuration conf) {
400    URI filesystemURI = FileSystem.getDefaultUri(conf);
401    return getAddress(filesystemURI);
402  }
403
404
405  /**
406   * TODO:FEDERATION
407   * @param filesystemURI
408   * @return address of file system
409   */
410  public static InetSocketAddress getAddress(URI filesystemURI) {
411    String authority = filesystemURI.getAuthority();
412    if (authority == null) {
413      throw new IllegalArgumentException(String.format(
414          "Invalid URI for NameNode address (check %s): %s has no authority.",
415          FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString()));
416    }
417    if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase(
418        filesystemURI.getScheme())) {
419      throw new IllegalArgumentException(String.format(
420          "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.",
421          FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(),
422          HdfsConstants.HDFS_URI_SCHEME));
423    }
424    return getAddress(authority);
425  }
426
427  public static URI getUri(InetSocketAddress namenode) {
428    int port = namenode.getPort();
429    String portString = port == DEFAULT_PORT ? "" : (":"+port);
430    return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 
431        + namenode.getHostName()+portString);
432  }
433
434  //
435  // Common NameNode methods implementation for the active name-node role.
436  //
437  public NamenodeRole getRole() {
438    return role;
439  }
440
441  boolean isRole(NamenodeRole that) {
442    return role.equals(that);
443  }
444
445  /**
446   * Given a configuration get the address of the service rpc server
447   * If the service rpc is not configured returns null
448   */
449  protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) {
450    return NameNode.getServiceAddress(conf, false);
451  }
452
453  protected InetSocketAddress getRpcServerAddress(Configuration conf) {
454    return getAddress(conf);
455  }
456  
457  /** Given a configuration get the bind host of the service rpc server
458   *  If the bind host is not configured returns null.
459   */
460  protected String getServiceRpcServerBindHost(Configuration conf) {
461    String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY);
462    if (addr == null || addr.isEmpty()) {
463      return null;
464    }
465    return addr;
466  }
467
468  /** Given a configuration get the bind host of the client rpc server
469   *  If the bind host is not configured returns null.
470   */
471  protected String getRpcServerBindHost(Configuration conf) {
472    String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY);
473    if (addr == null || addr.isEmpty()) {
474      return null;
475    }
476    return addr;
477  }
478   
479  /**
480   * Modifies the configuration passed to contain the service rpc address setting
481   */
482  protected void setRpcServiceServerAddress(Configuration conf,
483      InetSocketAddress serviceRPCAddress) {
484    setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress));
485  }
486
487  protected void setRpcServerAddress(Configuration conf,
488      InetSocketAddress rpcAddress) {
489    FileSystem.setDefaultUri(conf, getUri(rpcAddress));
490  }
491
492  protected InetSocketAddress getHttpServerAddress(Configuration conf) {
493    return getHttpAddress(conf);
494  }
495
496  /** @return the NameNode HTTP address. */
497  public static InetSocketAddress getHttpAddress(Configuration conf) {
498    return  NetUtils.createSocketAddr(
499        conf.get(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT));
500  }
501
502  protected void loadNamesystem(Configuration conf) throws IOException {
503    this.namesystem = FSNamesystem.loadFromDisk(conf);
504  }
505
506  NamenodeRegistration getRegistration() {
507    return nodeRegistration;
508  }
509
510  NamenodeRegistration setRegistration() {
511    nodeRegistration = new NamenodeRegistration(
512        NetUtils.getHostPortString(rpcServer.getRpcAddress()),
513        NetUtils.getHostPortString(getHttpAddress()),
514        getFSImage().getStorage(), getRole());
515    return nodeRegistration;
516  }
517
518  /* optimize ugi lookup for RPC operations to avoid a trip through
519   * UGI.getCurrentUser which is synch'ed
520   */
521  public static UserGroupInformation getRemoteUser() throws IOException {
522    UserGroupInformation ugi = Server.getRemoteUser();
523    return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser();
524  }
525
526
527  /**
528   * Login as the configured user for the NameNode.
529   */
530  void loginAsNameNodeUser(Configuration conf) throws IOException {
531    InetSocketAddress socAddr = getRpcServerAddress(conf);
532    SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
533        DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
534  }
535  
536  /**
537   * Initialize name-node.
538   * 
539   * @param conf the configuration
540   */
541  protected void initialize(Configuration conf) throws IOException {
542    if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
543      String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
544      if (intervals != null) {
545        conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
546          intervals);
547      }
548    }
549
550    UserGroupInformation.setConfiguration(conf);
551    loginAsNameNodeUser(conf);
552
553    NameNode.initMetrics(conf, this.getRole());
554    StartupProgressMetrics.register(startupProgress);
555
556    if (NamenodeRole.NAMENODE == role) {
557      startHttpServer(conf);
558    }
559    loadNamesystem(conf);
560
561    rpcServer = createRpcServer(conf);
562    if (clientNamenodeAddress == null) {
563      // This is expected for MiniDFSCluster. Set it now using 
564      // the RPC server's bind address.
565      clientNamenodeAddress = 
566          NetUtils.getHostPortString(rpcServer.getRpcAddress());
567      LOG.info("Clients are to use " + clientNamenodeAddress + " to access"
568          + " this namenode/service.");
569    }
570    if (NamenodeRole.NAMENODE == role) {
571      httpServer.setNameNodeAddress(getNameNodeAddress());
572      httpServer.setFSImage(getFSImage());
573    }
574    
575    pauseMonitor = new JvmPauseMonitor(conf);
576    pauseMonitor.start();
577
578    startCommonServices(conf);
579  }
580  
581  /**
582   * Create the RPC server implementation. Used as an extension point for the
583   * BackupNode.
584   */
585  protected NameNodeRpcServer createRpcServer(Configuration conf)
586      throws IOException {
587    return new NameNodeRpcServer(conf, this);
588  }
589
590  /** Start the services common to active and standby states */
591  private void startCommonServices(Configuration conf) throws IOException {
592    namesystem.startCommonServices(conf, haContext);
593    registerNNSMXBean();
594    if (NamenodeRole.NAMENODE != role) {
595      startHttpServer(conf);
596      httpServer.setNameNodeAddress(getNameNodeAddress());
597      httpServer.setFSImage(getFSImage());
598    }
599    rpcServer.start();
600    plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
601        ServicePlugin.class);
602    for (ServicePlugin p: plugins) {
603      try {
604        p.start(this);
605      } catch (Throwable t) {
606        LOG.warn("ServicePlugin " + p + " could not be started", t);
607      }
608    }
609    LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress());
610    if (rpcServer.getServiceRpcAddress() != null) {
611      LOG.info(getRole() + " service RPC up at: "
612          + rpcServer.getServiceRpcAddress());
613    }
614  }
615  
616  private void stopCommonServices() {
617    if(rpcServer != null) rpcServer.stop();
618    if(namesystem != null) namesystem.close();
619    if (pauseMonitor != null) pauseMonitor.stop();
620    if (plugins != null) {
621      for (ServicePlugin p : plugins) {
622        try {
623          p.stop();
624        } catch (Throwable t) {
625          LOG.warn("ServicePlugin " + p + " could not be stopped", t);
626        }
627      }
628    }   
629    stopHttpServer();
630  }
631  
632  private void startTrashEmptier(final Configuration conf) throws IOException {
633    long trashInterval =
634        conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT);
635    if (trashInterval == 0) {
636      return;
637    } else if (trashInterval < 0) {
638      throw new IOException("Cannot start trash emptier with negative interval."
639          + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value.");
640    }
641    
642    // This may be called from the transitionToActive code path, in which
643    // case the current user is the administrator, not the NN. The trash
644    // emptier needs to run as the NN. See HDFS-3972.
645    FileSystem fs = SecurityUtil.doAsLoginUser(
646        new PrivilegedExceptionAction<FileSystem>() {
647          @Override
648          public FileSystem run() throws IOException {
649            return FileSystem.get(conf);
650          }
651        });
652    this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier");
653    this.emptier.setDaemon(true);
654    this.emptier.start();
655  }
656  
657  private void stopTrashEmptier() {
658    if (this.emptier != null) {
659      emptier.interrupt();
660      emptier = null;
661    }
662  }
663  
664  private void startHttpServer(final Configuration conf) throws IOException {
665    httpServer = new NameNodeHttpServer(conf, this, getHttpServerAddress(conf));
666    httpServer.start();
667    httpServer.setStartupProgress(startupProgress);
668  }
669  
670  private void stopHttpServer() {
671    try {
672      if (httpServer != null) httpServer.stop();
673    } catch (Exception e) {
674      LOG.error("Exception while stopping httpserver", e);
675    }
676  }
677
678  /**
679   * Start NameNode.
680   * <p>
681   * The name-node can be started with one of the following startup options:
682   * <ul> 
683   * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li>
684   * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li>
685   * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li>
686   * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li>
687   * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster  
688   * upgrade and create a snapshot of the current file system state</li> 
689   * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node
690   * metadata</li>
691   * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the  
692   *            cluster back to the previous state</li>
693   * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize 
694   *            previous upgrade</li>
695   * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li>
696   * </ul>
697   * The option is passed via configuration field: 
698   * <tt>dfs.namenode.startup</tt>
699   * 
700   * The conf will be modified to reflect the actual ports on which 
701   * the NameNode is up and running if the user passes the port as
702   * <code>zero</code> in the conf.
703   * 
704   * @param conf  confirguration
705   * @throws IOException
706   */
707  public NameNode(Configuration conf) throws IOException {
708    this(conf, NamenodeRole.NAMENODE);
709  }
710
711  protected NameNode(Configuration conf, NamenodeRole role) 
712      throws IOException { 
713    this.conf = conf;
714    this.role = role;
715    setClientNamenodeAddress(conf);
716    String nsId = getNameServiceId(conf);
717    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
718    this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
719    state = createHAState(getStartupOption(conf));
720    this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
721    this.haContext = createHAContext();
722    try {
723      initializeGenericKeys(conf, nsId, namenodeId);
724      initialize(conf);
725      try {
726        haContext.writeLock();
727        state.prepareToEnterState(haContext);
728        state.enterState(haContext);
729      } finally {
730        haContext.writeUnlock();
731      }
732    } catch (IOException e) {
733      this.stop();
734      throw e;
735    } catch (HadoopIllegalArgumentException e) {
736      this.stop();
737      throw e;
738    }
739  }
740
741  protected HAState createHAState(StartupOption startOpt) {
742    if (!haEnabled || startOpt == StartupOption.UPGRADE) {
743      return ACTIVE_STATE;
744    } else {
745      return STANDBY_STATE;
746    }
747  }
748
749  protected HAContext createHAContext() {
750    return new NameNodeHAContext();
751  }
752
753  /**
754   * Wait for service to finish.
755   * (Normally, it runs forever.)
756   */
757  public void join() {
758    try {
759      rpcServer.join();
760    } catch (InterruptedException ie) {
761      LOG.info("Caught interrupted exception ", ie);
762    }
763  }
764
765  /**
766   * Stop all NameNode threads and wait for all to finish.
767   */
768  public void stop() {
769    synchronized(this) {
770      if (stopRequested)
771        return;
772      stopRequested = true;
773    }
774    try {
775      if (state != null) {
776        state.exitState(haContext);
777      }
778    } catch (ServiceFailedException e) {
779      LOG.warn("Encountered exception while exiting state ", e);
780    } finally {
781      stopCommonServices();
782      if (metrics != null) {
783        metrics.shutdown();
784      }
785      if (namesystem != null) {
786        namesystem.shutdown();
787      }
788      if (nameNodeStatusBeanName != null) {
789        MBeans.unregister(nameNodeStatusBeanName);
790        nameNodeStatusBeanName = null;
791      }
792    }
793  }
794
795  synchronized boolean isStopRequested() {
796    return stopRequested;
797  }
798
799  /**
800   * Is the cluster currently in safe mode?
801   */
802  public boolean isInSafeMode() {
803    return namesystem.isInSafeMode();
804  }
805    
806  /** get FSImage */
807  @VisibleForTesting
808  public FSImage getFSImage() {
809    return namesystem.dir.fsImage;
810  }
811
812  /**
813   * @return NameNode RPC address
814   */
815  public InetSocketAddress getNameNodeAddress() {
816    return rpcServer.getRpcAddress();
817  }
818
819  /**
820   * @return NameNode RPC address in "host:port" string form
821   */
822  public String getNameNodeAddressHostPortString() {
823    return NetUtils.getHostPortString(rpcServer.getRpcAddress());
824  }
825
826  /**
827   * @return NameNode service RPC address if configured, the
828   *    NameNode RPC address otherwise
829   */
830  public InetSocketAddress getServiceRpcAddress() {
831    final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress();
832    return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr;
833  }
834
835  /**
836   * @return NameNode HTTP address, used by the Web UI, image transfer,
837   *    and HTTP-based file system clients like Hftp and WebHDFS
838   */
839  public InetSocketAddress getHttpAddress() {
840    return httpServer.getHttpAddress();
841  }
842
843  /**
844   * @return NameNode HTTPS address, used by the Web UI, image transfer,
845   *    and HTTP-based file system clients like Hftp and WebHDFS
846   */
847  public InetSocketAddress getHttpsAddress() {
848    return httpServer.getHttpsAddress();
849  }
850
851  /**
852   * Verify that configured directories exist, then
853   * Interactively confirm that formatting is desired 
854   * for each existing directory and format them.
855   * 
856   * @param conf
857   * @param force
858   * @return true if formatting was aborted, false otherwise
859   * @throws IOException
860   */
861  private static boolean format(Configuration conf, boolean force,
862      boolean isInteractive) throws IOException {
863    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
864    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
865    initializeGenericKeys(conf, nsId, namenodeId);
866    checkAllowFormat(conf);
867
868    if (UserGroupInformation.isSecurityEnabled()) {
869      InetSocketAddress socAddr = getAddress(conf);
870      SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
871          DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
872    }
873    
874    Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf);
875    List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf);
876    List<URI> dirsToPrompt = new ArrayList<URI>();
877    dirsToPrompt.addAll(nameDirsToFormat);
878    dirsToPrompt.addAll(sharedDirs);
879    List<URI> editDirsToFormat = 
880                 FSNamesystem.getNamespaceEditsDirs(conf);
881
882    // if clusterID is not provided - see if you can find the current one
883    String clusterId = StartupOption.FORMAT.getClusterId();
884    if(clusterId == null || clusterId.equals("")) {
885      //Generate a new cluster id
886      clusterId = NNStorage.newClusterID();
887    }
888    System.out.println("Formatting using clusterid: " + clusterId);
889    
890    FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat);
891    try {
892      FSNamesystem fsn = new FSNamesystem(conf, fsImage);
893      fsImage.getEditLog().initJournalsForWrite();
894
895      if (!fsImage.confirmFormat(force, isInteractive)) {
896        return true; // aborted
897      }
898
899      fsImage.format(fsn, clusterId);
900    } catch (IOException ioe) {
901      LOG.warn("Encountered exception during format: ", ioe);
902      fsImage.close();
903      throw ioe;
904    }
905    return false;
906  }
907
908  public static void checkAllowFormat(Configuration conf) throws IOException {
909    if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 
910        DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) {
911      throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY
912                + " is set to false for this filesystem, so it "
913                + "cannot be formatted. You will need to set "
914                + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter "
915                + "to true in order to format this filesystem");
916    }
917  }
918  
919  @VisibleForTesting
920  public static boolean initializeSharedEdits(Configuration conf) throws IOException {
921    return initializeSharedEdits(conf, true);
922  }
923  
924  @VisibleForTesting
925  public static boolean initializeSharedEdits(Configuration conf,
926      boolean force) throws IOException {
927    return initializeSharedEdits(conf, force, false);
928  }
929
930  /**
931   * Clone the supplied configuration but remove the shared edits dirs.
932   *
933   * @param conf Supplies the original configuration.
934   * @return Cloned configuration without the shared edit dirs.
935   * @throws IOException on failure to generate the configuration.
936   */
937  private static Configuration getConfigurationWithoutSharedEdits(
938      Configuration conf)
939      throws IOException {
940    List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false);
941    String editsDirsString = Joiner.on(",").join(editsDirs);
942
943    Configuration confWithoutShared = new Configuration(conf);
944    confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
945    confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
946        editsDirsString);
947    return confWithoutShared;
948  }
949
950  /**
951   * Format a new shared edits dir and copy in enough edit log segments so that
952   * the standby NN can start up.
953   * 
954   * @param conf configuration
955   * @param force format regardless of whether or not the shared edits dir exists
956   * @param interactive prompt the user when a dir exists
957   * @return true if the command aborts, false otherwise
958   */
959  private static boolean initializeSharedEdits(Configuration conf,
960      boolean force, boolean interactive) throws IOException {
961    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
962    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
963    initializeGenericKeys(conf, nsId, namenodeId);
964    
965    if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) {
966      LOG.fatal("No shared edits directory configured for namespace " +
967          nsId + " namenode " + namenodeId);
968      return false;
969    }
970
971    if (UserGroupInformation.isSecurityEnabled()) {
972      InetSocketAddress socAddr = getAddress(conf);
973      SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
974          DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName());
975    }
976
977    NNStorage existingStorage = null;
978    FSImage sharedEditsImage = null;
979    try {
980      FSNamesystem fsns =
981          FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf));
982      
983      existingStorage = fsns.getFSImage().getStorage();
984      NamespaceInfo nsInfo = existingStorage.getNamespaceInfo();
985      
986      List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
987      
988      sharedEditsImage = new FSImage(conf,
989          Lists.<URI>newArrayList(),
990          sharedEditsDirs);
991      sharedEditsImage.getEditLog().initJournalsForWrite();
992      
993      if (!sharedEditsImage.confirmFormat(force, interactive)) {
994        return true; // abort
995      }
996      
997      NNStorage newSharedStorage = sharedEditsImage.getStorage();
998      // Call Storage.format instead of FSImage.format here, since we don't
999      // actually want to save a checkpoint - just prime the dirs with
1000      // the existing namespace info
1001      newSharedStorage.format(nsInfo);
1002      sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo);
1003
1004      // Need to make sure the edit log segments are in good shape to initialize
1005      // the shared edits dir.
1006      fsns.getFSImage().getEditLog().close();
1007      fsns.getFSImage().getEditLog().initJournalsForWrite();
1008      fsns.getFSImage().getEditLog().recoverUnclosedStreams();
1009
1010      copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage,
1011          conf);
1012    } catch (IOException ioe) {
1013      LOG.error("Could not initialize shared edits dir", ioe);
1014      return true; // aborted
1015    } finally {
1016      if (sharedEditsImage != null) {
1017        try {
1018          sharedEditsImage.close();
1019        }  catch (IOException ioe) {
1020          LOG.warn("Could not close sharedEditsImage", ioe);
1021        }
1022      }
1023      // Have to unlock storage explicitly for the case when we're running in a
1024      // unit test, which runs in the same JVM as NNs.
1025      if (existingStorage != null) {
1026        try {
1027          existingStorage.unlockAll();
1028        } catch (IOException ioe) {
1029          LOG.warn("Could not unlock storage directories", ioe);
1030          return true; // aborted
1031        }
1032      }
1033    }
1034    return false; // did not abort
1035  }
1036
1037  private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns,
1038      Collection<URI> sharedEditsDirs, NNStorage newSharedStorage,
1039      Configuration conf) throws IOException {
1040    Preconditions.checkArgument(!sharedEditsDirs.isEmpty(),
1041        "No shared edits specified");
1042    // Copy edit log segments into the new shared edits dir.
1043    List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs);
1044    FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage,
1045        sharedEditsUris);
1046    newSharedEditLog.initJournalsForWrite();
1047    newSharedEditLog.recoverUnclosedStreams();
1048    
1049    FSEditLog sourceEditLog = fsns.getFSImage().editLog;
1050    
1051    long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId();
1052    
1053    Collection<EditLogInputStream> streams = null;
1054    try {
1055      streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0);
1056
1057      // Set the nextTxid to the CheckpointTxId+1
1058      newSharedEditLog.setNextTxId(fromTxId + 1);
1059
1060      // Copy all edits after last CheckpointTxId to shared edits dir
1061      for (EditLogInputStream stream : streams) {
1062        LOG.debug("Beginning to copy stream " + stream + " to shared edits");
1063        FSEditLogOp op;
1064        boolean segmentOpen = false;
1065        while ((op = stream.readOp()) != null) {
1066          if (LOG.isTraceEnabled()) {
1067            LOG.trace("copying op: " + op);
1068          }
1069          if (!segmentOpen) {
1070            newSharedEditLog.startLogSegment(op.txid, false);
1071            segmentOpen = true;
1072          }
1073
1074          newSharedEditLog.logEdit(op);
1075
1076          if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) {
1077            newSharedEditLog.logSync();
1078            newSharedEditLog.endCurrentLogSegment(false);
1079            LOG.debug("ending log segment because of END_LOG_SEGMENT op in "
1080                + stream);
1081            segmentOpen = false;
1082          }
1083        }
1084
1085        if (segmentOpen) {
1086          LOG.debug("ending log segment because of end of stream in " + stream);
1087          newSharedEditLog.logSync();
1088          newSharedEditLog.endCurrentLogSegment(false);
1089          segmentOpen = false;
1090        }
1091      }
1092    } finally {
1093      if (streams != null) {
1094        FSEditLog.closeAllStreams(streams);
1095      }
1096    }
1097  }
1098  
1099  @VisibleForTesting
1100  public static boolean doRollback(Configuration conf,
1101      boolean isConfirmationNeeded) throws IOException {
1102    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1103    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1104    initializeGenericKeys(conf, nsId, namenodeId);
1105
1106    FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf));
1107    System.err.print(
1108        "\"rollBack\" will remove the current state of the file system,\n"
1109        + "returning you to the state prior to initiating your recent.\n"
1110        + "upgrade. This action is permanent and cannot be undone. If you\n"
1111        + "are performing a rollback in an HA environment, you should be\n"
1112        + "certain that no NameNode process is running on any host.");
1113    if (isConfirmationNeeded) {
1114      if (!confirmPrompt("Roll back file system state?")) {
1115        System.err.println("Rollback aborted.");
1116        return true;
1117      }
1118    }
1119    nsys.dir.fsImage.doRollback(nsys);
1120    return false;
1121  }
1122
1123  private static void printUsage(PrintStream out) {
1124    out.println(USAGE + "\n");
1125  }
1126
1127  @VisibleForTesting
1128  static StartupOption parseArguments(String args[]) {
1129    int argsLen = (args == null) ? 0 : args.length;
1130    StartupOption startOpt = StartupOption.REGULAR;
1131    for(int i=0; i < argsLen; i++) {
1132      String cmd = args[i];
1133      if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) {
1134        startOpt = StartupOption.FORMAT;
1135        for (i = i + 1; i < argsLen; i++) {
1136          if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1137            i++;
1138            if (i >= argsLen) {
1139              // if no cluster id specified, return null
1140              LOG.fatal("Must specify a valid cluster ID after the "
1141                  + StartupOption.CLUSTERID.getName() + " flag");
1142              return null;
1143            }
1144            String clusterId = args[i];
1145            // Make sure an id is specified and not another flag
1146            if (clusterId.isEmpty() ||
1147                clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) ||
1148                clusterId.equalsIgnoreCase(
1149                    StartupOption.NONINTERACTIVE.getName())) {
1150              LOG.fatal("Must specify a valid cluster ID after the "
1151                  + StartupOption.CLUSTERID.getName() + " flag");
1152              return null;
1153            }
1154            startOpt.setClusterId(clusterId);
1155          }
1156
1157          if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) {
1158            startOpt.setForceFormat(true);
1159          }
1160
1161          if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) {
1162            startOpt.setInteractiveFormat(false);
1163          }
1164        }
1165      } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) {
1166        startOpt = StartupOption.GENCLUSTERID;
1167      } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) {
1168        startOpt = StartupOption.REGULAR;
1169      } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) {
1170        startOpt = StartupOption.BACKUP;
1171      } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) {
1172        startOpt = StartupOption.CHECKPOINT;
1173      } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) {
1174        startOpt = StartupOption.UPGRADE;
1175        /* Can be followed by CLUSTERID with a required parameter or
1176         * RENAMERESERVED with an optional parameter
1177         */
1178        while (i + 1 < argsLen) {
1179          String flag = args[i + 1];
1180          if (flag.equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1181            if (i + 2 < argsLen) {
1182              i += 2;
1183              startOpt.setClusterId(args[i]);
1184            } else {
1185              LOG.fatal("Must specify a valid cluster ID after the "
1186                  + StartupOption.CLUSTERID.getName() + " flag");
1187              return null;
1188            }
1189          } else if (flag.equalsIgnoreCase(StartupOption.RENAMERESERVED
1190              .getName())) {
1191            if (i + 2 < argsLen) {
1192              FSImageFormat.setRenameReservedPairs(args[i + 2]);
1193              i += 2;
1194            } else {
1195              FSImageFormat.useDefaultRenameReservedPairs();
1196              i += 1;
1197            }
1198          } else {
1199            LOG.fatal("Unknown upgrade flag " + flag);
1200            return null;
1201          }
1202        }
1203      } else if (StartupOption.ROLLINGUPGRADE.getName().equalsIgnoreCase(cmd)) {
1204        startOpt = StartupOption.ROLLINGUPGRADE;
1205        ++i;
1206        startOpt.setRollingUpgradeStartupOption(args[i]);
1207      } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) {
1208        startOpt = StartupOption.ROLLBACK;
1209      } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) {
1210        startOpt = StartupOption.FINALIZE;
1211      } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) {
1212        startOpt = StartupOption.IMPORT;
1213      } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) {
1214        startOpt = StartupOption.BOOTSTRAPSTANDBY;
1215        return startOpt;
1216      } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) {
1217        startOpt = StartupOption.INITIALIZESHAREDEDITS;
1218        for (i = i + 1 ; i < argsLen; i++) {
1219          if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) {
1220            startOpt.setInteractiveFormat(false);
1221          } else if (StartupOption.FORCE.getName().equals(args[i])) {
1222            startOpt.setForceFormat(true);
1223          } else {
1224            LOG.fatal("Invalid argument: " + args[i]);
1225            return null;
1226          }
1227        }
1228        return startOpt;
1229      } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) {
1230        if (startOpt != StartupOption.REGULAR) {
1231          throw new RuntimeException("Can't combine -recover with " +
1232              "other startup options.");
1233        }
1234        startOpt = StartupOption.RECOVER;
1235        while (++i < argsLen) {
1236          if (args[i].equalsIgnoreCase(
1237                StartupOption.FORCE.getName())) {
1238            startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE);
1239          } else {
1240            throw new RuntimeException("Error parsing recovery options: " + 
1241              "can't understand option \"" + args[i] + "\"");
1242          }
1243        }
1244      } else {
1245        return null;
1246      }
1247    }
1248    return startOpt;
1249  }
1250
1251  private static void setStartupOption(Configuration conf, StartupOption opt) {
1252    conf.set(DFS_NAMENODE_STARTUP_KEY, opt.name());
1253  }
1254
1255  static StartupOption getStartupOption(Configuration conf) {
1256    return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY,
1257                                          StartupOption.REGULAR.toString()));
1258  }
1259
1260  private static void doRecovery(StartupOption startOpt, Configuration conf)
1261      throws IOException {
1262    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1263    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1264    initializeGenericKeys(conf, nsId, namenodeId);
1265    if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) {
1266      if (!confirmPrompt("You have selected Metadata Recovery mode.  " +
1267          "This mode is intended to recover lost metadata on a corrupt " +
1268          "filesystem.  Metadata recovery mode often permanently deletes " +
1269          "data from your HDFS filesystem.  Please back up your edit log " +
1270          "and fsimage before trying this!\n\n" +
1271          "Are you ready to proceed? (Y/N)\n")) {
1272        System.err.println("Recovery aborted at user request.\n");
1273        return;
1274      }
1275    }
1276    MetaRecoveryContext.LOG.info("starting recovery...");
1277    UserGroupInformation.setConfiguration(conf);
1278    NameNode.initMetrics(conf, startOpt.toNodeRole());
1279    FSNamesystem fsn = null;
1280    try {
1281      fsn = FSNamesystem.loadFromDisk(conf);
1282      fsn.getFSImage().saveNamespace(fsn);
1283      MetaRecoveryContext.LOG.info("RECOVERY COMPLETE");
1284    } catch (IOException e) {
1285      MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1286      throw e;
1287    } catch (RuntimeException e) {
1288      MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1289      throw e;
1290    } finally {
1291      if (fsn != null)
1292        fsn.close();
1293    }
1294  }
1295
1296  public static NameNode createNameNode(String argv[], Configuration conf)
1297      throws IOException {
1298    LOG.info("createNameNode " + Arrays.asList(argv));
1299    if (conf == null)
1300      conf = new HdfsConfiguration();
1301    StartupOption startOpt = parseArguments(argv);
1302    if (startOpt == null) {
1303      printUsage(System.err);
1304      return null;
1305    }
1306    setStartupOption(conf, startOpt);
1307
1308    switch (startOpt) {
1309      case FORMAT: {
1310        boolean aborted = format(conf, startOpt.getForceFormat(),
1311            startOpt.getInteractiveFormat());
1312        terminate(aborted ? 1 : 0);
1313        return null; // avoid javac warning
1314      }
1315      case GENCLUSTERID: {
1316        System.err.println("Generating new cluster id:");
1317        System.out.println(NNStorage.newClusterID());
1318        terminate(0);
1319        return null;
1320      }
1321      case FINALIZE: {
1322        System.err.println("Use of the argument '" + StartupOption.FINALIZE +
1323            "' is no longer supported. To finalize an upgrade, start the NN " +
1324            " and then run `hdfs dfsadmin -finalizeUpgrade'");
1325        terminate(1);
1326        return null; // avoid javac warning
1327      }
1328      case ROLLBACK: {
1329        boolean aborted = doRollback(conf, true);
1330        terminate(aborted ? 1 : 0);
1331        return null; // avoid warning
1332      }
1333      case BOOTSTRAPSTANDBY: {
1334        String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length);
1335        int rc = BootstrapStandby.run(toolArgs, conf);
1336        terminate(rc);
1337        return null; // avoid warning
1338      }
1339      case INITIALIZESHAREDEDITS: {
1340        boolean aborted = initializeSharedEdits(conf,
1341            startOpt.getForceFormat(),
1342            startOpt.getInteractiveFormat());
1343        terminate(aborted ? 1 : 0);
1344        return null; // avoid warning
1345      }
1346      case BACKUP:
1347      case CHECKPOINT: {
1348        NamenodeRole role = startOpt.toNodeRole();
1349        DefaultMetricsSystem.initialize(role.toString().replace(" ", ""));
1350        return new BackupNode(conf, role);
1351      }
1352      case RECOVER: {
1353        NameNode.doRecovery(startOpt, conf);
1354        return null;
1355      }
1356      default: {
1357        DefaultMetricsSystem.initialize("NameNode");
1358        return new NameNode(conf);
1359      }
1360    }
1361  }
1362
1363  /**
1364   * In federation configuration is set for a set of
1365   * namenode and secondary namenode/backup/checkpointer, which are
1366   * grouped under a logical nameservice ID. The configuration keys specific 
1367   * to them have suffix set to configured nameserviceId.
1368   * 
1369   * This method copies the value from specific key of format key.nameserviceId
1370   * to key, to set up the generic configuration. Once this is done, only
1371   * generic version of the configuration is read in rest of the code, for
1372   * backward compatibility and simpler code changes.
1373   * 
1374   * @param conf
1375   *          Configuration object to lookup specific key and to set the value
1376   *          to the key passed. Note the conf object is modified
1377   * @param nameserviceId name service Id (to distinguish federated NNs)
1378   * @param namenodeId the namenode ID (to distinguish HA NNs)
1379   * @see DFSUtil#setGenericConf(Configuration, String, String, String...)
1380   */
1381  public static void initializeGenericKeys(Configuration conf,
1382      String nameserviceId, String namenodeId) {
1383    if ((nameserviceId != null && !nameserviceId.isEmpty()) || 
1384        (namenodeId != null && !namenodeId.isEmpty())) {
1385      if (nameserviceId != null) {
1386        conf.set(DFS_NAMESERVICE_ID, nameserviceId);
1387      }
1388      if (namenodeId != null) {
1389        conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId);
1390      }
1391      
1392      DFSUtil.setGenericConf(conf, nameserviceId, namenodeId,
1393          NAMENODE_SPECIFIC_KEYS);
1394      DFSUtil.setGenericConf(conf, nameserviceId, null,
1395          NAMESERVICE_SPECIFIC_KEYS);
1396    }
1397    
1398    // If the RPC address is set use it to (re-)configure the default FS
1399    if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) {
1400      URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
1401          + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY));
1402      conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString());
1403      LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString());
1404    }
1405  }
1406    
1407  /** 
1408   * Get the name service Id for the node
1409   * @return name service Id or null if federation is not configured
1410   */
1411  protected String getNameServiceId(Configuration conf) {
1412    return DFSUtil.getNamenodeNameServiceId(conf);
1413  }
1414  
1415  /**
1416   */
1417  public static void main(String argv[]) throws Exception {
1418    if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) {
1419      System.exit(0);
1420    }
1421
1422    try {
1423      StringUtils.startupShutdownMessage(NameNode.class, argv, LOG);
1424      NameNode namenode = createNameNode(argv, null);
1425      if (namenode != null) {
1426        namenode.join();
1427      }
1428    } catch (Throwable e) {
1429      LOG.fatal("Exception in namenode join", e);
1430      terminate(1, e);
1431    }
1432  }
1433
1434  synchronized void monitorHealth() 
1435      throws HealthCheckFailedException, AccessControlException {
1436    namesystem.checkSuperuserPrivilege();
1437    if (!haEnabled) {
1438      return; // no-op, if HA is not enabled
1439    }
1440    getNamesystem().checkAvailableResources();
1441    if (!getNamesystem().nameNodeHasResourcesAvailable()) {
1442      throw new HealthCheckFailedException(
1443          "The NameNode has no resources available");
1444    }
1445  }
1446  
1447  synchronized void transitionToActive() 
1448      throws ServiceFailedException, AccessControlException {
1449    namesystem.checkSuperuserPrivilege();
1450    if (!haEnabled) {
1451      throw new ServiceFailedException("HA for namenode is not enabled");
1452    }
1453    state.setState(haContext, ACTIVE_STATE);
1454  }
1455  
1456  synchronized void transitionToStandby() 
1457      throws ServiceFailedException, AccessControlException {
1458    namesystem.checkSuperuserPrivilege();
1459    if (!haEnabled) {
1460      throw new ServiceFailedException("HA for namenode is not enabled");
1461    }
1462    state.setState(haContext, STANDBY_STATE);
1463  }
1464
1465  synchronized HAServiceStatus getServiceStatus()
1466      throws ServiceFailedException, AccessControlException {
1467    namesystem.checkSuperuserPrivilege();
1468    if (!haEnabled) {
1469      throw new ServiceFailedException("HA for namenode is not enabled");
1470    }
1471    if (state == null) {
1472      return new HAServiceStatus(HAServiceState.INITIALIZING);
1473    }
1474    HAServiceState retState = state.getServiceState();
1475    HAServiceStatus ret = new HAServiceStatus(retState);
1476    if (retState == HAServiceState.STANDBY) {
1477      String safemodeTip = namesystem.getSafeModeTip();
1478      if (!safemodeTip.isEmpty()) {
1479        ret.setNotReadyToBecomeActive(
1480            "The NameNode is in safemode. " +
1481            safemodeTip);
1482      } else {
1483        ret.setReadyToBecomeActive();
1484      }
1485    } else if (retState == HAServiceState.ACTIVE) {
1486      ret.setReadyToBecomeActive();
1487    } else {
1488      ret.setNotReadyToBecomeActive("State is " + state);
1489    }
1490    return ret;
1491  }
1492
1493  synchronized HAServiceState getServiceState() {
1494    if (state == null) {
1495      return HAServiceState.INITIALIZING;
1496    }
1497    return state.getServiceState();
1498  }
1499
1500  /**
1501   * Register NameNodeStatusMXBean
1502   */
1503  private void registerNNSMXBean() {
1504    nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this);
1505  }
1506
1507  @Override // NameNodeStatusMXBean
1508  public String getNNRole() {
1509    String roleStr = "";
1510    NamenodeRole role = getRole();
1511    if (null != role) {
1512      roleStr = role.toString();
1513    }
1514    return roleStr;
1515  }
1516
1517  @Override // NameNodeStatusMXBean
1518  public String getState() {
1519    String servStateStr = "";
1520    HAServiceState servState = getServiceState();
1521    if (null != servState) {
1522      servStateStr = servState.toString();
1523    }
1524    return servStateStr;
1525  }
1526
1527  @Override // NameNodeStatusMXBean
1528  public String getHostAndPort() {
1529    return getNameNodeAddressHostPortString();
1530  }
1531
1532  @Override // NameNodeStatusMXBean
1533  public boolean isSecurityEnabled() {
1534    return UserGroupInformation.isSecurityEnabled();
1535  }
1536
1537  /**
1538   * Shutdown the NN immediately in an ungraceful way. Used when it would be
1539   * unsafe for the NN to continue operating, e.g. during a failed HA state
1540   * transition.
1541   * 
1542   * @param t exception which warrants the shutdown. Printed to the NN log
1543   *          before exit.
1544   * @throws ExitException thrown only for testing.
1545   */
1546  protected synchronized void doImmediateShutdown(Throwable t)
1547      throws ExitException {
1548    String message = "Error encountered requiring NN shutdown. " +
1549        "Shutting down immediately.";
1550    try {
1551      LOG.fatal(message, t);
1552    } catch (Throwable ignored) {
1553      // This is unlikely to happen, but there's nothing we can do if it does.
1554    }
1555    terminate(1, t);
1556  }
1557  
1558  /**
1559   * Class used to expose {@link NameNode} as context to {@link HAState}
1560   */
1561  protected class NameNodeHAContext implements HAContext {
1562    @Override
1563    public void setState(HAState s) {
1564      state = s;
1565    }
1566
1567    @Override
1568    public HAState getState() {
1569      return state;
1570    }
1571
1572    @Override
1573    public void startActiveServices() throws IOException {
1574      try {
1575        namesystem.startActiveServices();
1576        startTrashEmptier(conf);
1577      } catch (Throwable t) {
1578        doImmediateShutdown(t);
1579      }
1580    }
1581
1582    @Override
1583    public void stopActiveServices() throws IOException {
1584      try {
1585        if (namesystem != null) {
1586          namesystem.stopActiveServices();
1587        }
1588        stopTrashEmptier();
1589      } catch (Throwable t) {
1590        doImmediateShutdown(t);
1591      }
1592    }
1593
1594    @Override
1595    public void startStandbyServices() throws IOException {
1596      try {
1597        namesystem.startStandbyServices(conf);
1598      } catch (Throwable t) {
1599        doImmediateShutdown(t);
1600      }
1601    }
1602
1603    @Override
1604    public void prepareToStopStandbyServices() throws ServiceFailedException {
1605      try {
1606        namesystem.prepareToStopStandbyServices();
1607      } catch (Throwable t) {
1608        doImmediateShutdown(t);
1609      }
1610    }
1611    
1612    @Override
1613    public void stopStandbyServices() throws IOException {
1614      try {
1615        if (namesystem != null) {
1616          namesystem.stopStandbyServices();
1617        }
1618      } catch (Throwable t) {
1619        doImmediateShutdown(t);
1620      }
1621    }
1622    
1623    @Override
1624    public void writeLock() {
1625      namesystem.writeLock();
1626      namesystem.lockRetryCache();
1627    }
1628    
1629    @Override
1630    public void writeUnlock() {
1631      namesystem.unlockRetryCache();
1632      namesystem.writeUnlock();
1633    }
1634    
1635    /** Check if an operation of given category is allowed */
1636    @Override
1637    public void checkOperation(final OperationCategory op)
1638        throws StandbyException {
1639      state.checkOperation(haContext, op);
1640    }
1641    
1642    @Override
1643    public boolean allowStaleReads() {
1644      return allowStaleStandbyReads;
1645    }
1646
1647  }
1648  
1649  public boolean isStandbyState() {
1650    return (state.equals(STANDBY_STATE));
1651  }
1652
1653  /**
1654   * Check that a request to change this node's HA state is valid.
1655   * In particular, verifies that, if auto failover is enabled, non-forced
1656   * requests from the HAAdmin CLI are rejected, and vice versa.
1657   *
1658   * @param req the request to check
1659   * @throws AccessControlException if the request is disallowed
1660   */
1661  void checkHaStateChange(StateChangeRequestInfo req)
1662      throws AccessControlException {
1663    boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY,
1664        DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT);
1665    switch (req.getSource()) {
1666    case REQUEST_BY_USER:
1667      if (autoHaEnabled) {
1668        throw new AccessControlException(
1669            "Manual HA control for this NameNode is disallowed, because " +
1670            "automatic HA is enabled.");
1671      }
1672      break;
1673    case REQUEST_BY_USER_FORCED:
1674      if (autoHaEnabled) {
1675        LOG.warn("Allowing manual HA control from " +
1676            Server.getRemoteAddress() +
1677            " even though automatic HA is enabled, because the user " +
1678            "specified the force flag");
1679      }
1680      break;
1681    case REQUEST_BY_ZKFC:
1682      if (!autoHaEnabled) {
1683        throw new AccessControlException(
1684            "Request from ZK failover controller at " +
1685            Server.getRemoteAddress() + " denied since automatic HA " +
1686            "is not enabled"); 
1687      }
1688      break;
1689    }
1690  }
1691}