001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import com.google.common.annotations.VisibleForTesting; 021import com.google.common.base.Joiner; 022import com.google.common.base.Preconditions; 023import com.google.common.collect.Lists; 024 025import org.apache.commons.logging.Log; 026import org.apache.commons.logging.LogFactory; 027import org.apache.hadoop.HadoopIllegalArgumentException; 028import org.apache.hadoop.classification.InterfaceAudience; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.fs.FileSystem; 031import org.apache.hadoop.fs.Trash; 032import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; 033import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo; 034import org.apache.hadoop.ha.HAServiceStatus; 035import org.apache.hadoop.ha.HealthCheckFailedException; 036import org.apache.hadoop.ha.ServiceFailedException; 037import org.apache.hadoop.hdfs.DFSConfigKeys; 038import org.apache.hadoop.hdfs.DFSUtil; 039import org.apache.hadoop.hdfs.HAUtil; 040import org.apache.hadoop.hdfs.HdfsConfiguration; 041import org.apache.hadoop.hdfs.protocol.ClientProtocol; 042import org.apache.hadoop.hdfs.protocol.HdfsConstants; 043import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; 044import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption; 045import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 046import org.apache.hadoop.hdfs.server.namenode.ha.*; 047import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; 048import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 049import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics; 050import org.apache.hadoop.hdfs.server.protocol.*; 051import org.apache.hadoop.ipc.Server; 052import org.apache.hadoop.ipc.StandbyException; 053import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; 054import org.apache.hadoop.metrics2.util.MBeans; 055import org.apache.hadoop.net.NetUtils; 056import org.apache.hadoop.security.AccessControlException; 057import org.apache.hadoop.security.RefreshUserMappingsProtocol; 058import org.apache.hadoop.security.SecurityUtil; 059import org.apache.hadoop.security.UserGroupInformation; 060import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol; 061import org.apache.hadoop.ipc.RefreshCallQueueProtocol; 062import org.apache.hadoop.tools.GetUserMappingsProtocol; 063import org.apache.hadoop.util.ExitUtil.ExitException; 064import org.apache.hadoop.util.JvmPauseMonitor; 065import org.apache.hadoop.util.ServicePlugin; 066import org.apache.hadoop.util.StringUtils; 067 068import javax.management.ObjectName; 069 070import java.io.IOException; 071import java.io.PrintStream; 072import java.net.InetSocketAddress; 073import java.net.URI; 074import java.security.PrivilegedExceptionAction; 075import java.util.ArrayList; 076import java.util.Arrays; 077import java.util.Collection; 078import java.util.List; 079 080import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; 081import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; 082import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; 083import static org.apache.hadoop.hdfs.DFSConfigKeys.*; 084import static org.apache.hadoop.util.ExitUtil.terminate; 085import static org.apache.hadoop.util.ToolRunner.confirmPrompt; 086 087/********************************************************** 088 * NameNode serves as both directory namespace manager and 089 * "inode table" for the Hadoop DFS. There is a single NameNode 090 * running in any DFS deployment. (Well, except when there 091 * is a second backup/failover NameNode, or when using federated NameNodes.) 092 * 093 * The NameNode controls two critical tables: 094 * 1) filename->blocksequence (namespace) 095 * 2) block->machinelist ("inodes") 096 * 097 * The first table is stored on disk and is very precious. 098 * The second table is rebuilt every time the NameNode comes up. 099 * 100 * 'NameNode' refers to both this class as well as the 'NameNode server'. 101 * The 'FSNamesystem' class actually performs most of the filesystem 102 * management. The majority of the 'NameNode' class itself is concerned 103 * with exposing the IPC interface and the HTTP server to the outside world, 104 * plus some configuration management. 105 * 106 * NameNode implements the 107 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which 108 * allows clients to ask for DFS services. 109 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for 110 * direct use by authors of DFS client code. End-users should instead use the 111 * {@link org.apache.hadoop.fs.FileSystem} class. 112 * 113 * NameNode also implements the 114 * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface, 115 * used by DataNodes that actually store DFS data blocks. These 116 * methods are invoked repeatedly and automatically by all the 117 * DataNodes in a DFS deployment. 118 * 119 * NameNode also implements the 120 * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface, 121 * used by secondary namenodes or rebalancing processes to get partial 122 * NameNode state, for example partial blocksMap etc. 123 **********************************************************/ 124@InterfaceAudience.Private 125public class NameNode implements NameNodeStatusMXBean { 126 static{ 127 HdfsConfiguration.init(); 128 } 129 130 /** 131 * Categories of operations supported by the namenode. 132 */ 133 public static enum OperationCategory { 134 /** Operations that are state agnostic */ 135 UNCHECKED, 136 /** Read operation that does not change the namespace state */ 137 READ, 138 /** Write operation that changes the namespace state */ 139 WRITE, 140 /** Operations related to checkpointing */ 141 CHECKPOINT, 142 /** Operations related to {@link JournalProtocol} */ 143 JOURNAL 144 } 145 146 /** 147 * HDFS configuration can have three types of parameters: 148 * <ol> 149 * <li>Parameters that are common for all the name services in the cluster.</li> 150 * <li>Parameters that are specific to a name service. These keys are suffixed 151 * with nameserviceId in the configuration. For example, 152 * "dfs.namenode.rpc-address.nameservice1".</li> 153 * <li>Parameters that are specific to a single name node. These keys are suffixed 154 * with nameserviceId and namenodeId in the configuration. for example, 155 * "dfs.namenode.rpc-address.nameservice1.namenode1"</li> 156 * </ol> 157 * 158 * In the latter cases, operators may specify the configuration without 159 * any suffix, with a nameservice suffix, or with a nameservice and namenode 160 * suffix. The more specific suffix will take precedence. 161 * 162 * These keys are specific to a given namenode, and thus may be configured 163 * globally, for a nameservice, or for a specific namenode within a nameservice. 164 */ 165 public static final String[] NAMENODE_SPECIFIC_KEYS = { 166 DFS_NAMENODE_RPC_ADDRESS_KEY, 167 DFS_NAMENODE_RPC_BIND_HOST_KEY, 168 DFS_NAMENODE_NAME_DIR_KEY, 169 DFS_NAMENODE_EDITS_DIR_KEY, 170 DFS_NAMENODE_SHARED_EDITS_DIR_KEY, 171 DFS_NAMENODE_CHECKPOINT_DIR_KEY, 172 DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY, 173 DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, 174 DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY, 175 DFS_NAMENODE_HTTP_ADDRESS_KEY, 176 DFS_NAMENODE_HTTPS_ADDRESS_KEY, 177 DFS_NAMENODE_KEYTAB_FILE_KEY, 178 DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, 179 DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY, 180 DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY, 181 DFS_NAMENODE_BACKUP_ADDRESS_KEY, 182 DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY, 183 DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY, 184 DFS_NAMENODE_USER_NAME_KEY, 185 DFS_NAMENODE_INTERNAL_SPNEGO_USER_NAME_KEY, 186 DFS_HA_FENCE_METHODS_KEY, 187 DFS_HA_ZKFC_PORT_KEY, 188 DFS_HA_FENCE_METHODS_KEY 189 }; 190 191 /** 192 * @see #NAMENODE_SPECIFIC_KEYS 193 * These keys are specific to a nameservice, but may not be overridden 194 * for a specific namenode. 195 */ 196 public static final String[] NAMESERVICE_SPECIFIC_KEYS = { 197 DFS_HA_AUTO_FAILOVER_ENABLED_KEY 198 }; 199 200 private static final String USAGE = "Usage: java NameNode [" 201 + StartupOption.BACKUP.getName() + "] | [" 202 + StartupOption.CHECKPOINT.getName() + "] | [" 203 + StartupOption.FORMAT.getName() + " [" 204 + StartupOption.CLUSTERID.getName() + " cid ] [" 205 + StartupOption.FORCE.getName() + "] [" 206 + StartupOption.NONINTERACTIVE.getName() + "] ] | [" 207 + StartupOption.UPGRADE.getName() + 208 " [" + StartupOption.CLUSTERID.getName() + " cid]" + 209 " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | [" 210 + StartupOption.ROLLBACK.getName() + "] | [" 211 + StartupOption.ROLLINGUPGRADE.getName() + " <" 212 + RollingUpgradeStartupOption.DOWNGRADE.name().toLowerCase() + "|" 213 + RollingUpgradeStartupOption.ROLLBACK.name().toLowerCase() + "> ] | [" 214 + StartupOption.FINALIZE.getName() + "] | [" 215 + StartupOption.IMPORT.getName() + "] | [" 216 + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | [" 217 + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | [" 218 + StartupOption.RECOVER.getName() + " [ " + StartupOption.FORCE.getName() 219 + " ] ]"; 220 221 public long getProtocolVersion(String protocol, 222 long clientVersion) throws IOException { 223 if (protocol.equals(ClientProtocol.class.getName())) { 224 return ClientProtocol.versionID; 225 } else if (protocol.equals(DatanodeProtocol.class.getName())){ 226 return DatanodeProtocol.versionID; 227 } else if (protocol.equals(NamenodeProtocol.class.getName())){ 228 return NamenodeProtocol.versionID; 229 } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){ 230 return RefreshAuthorizationPolicyProtocol.versionID; 231 } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){ 232 return RefreshUserMappingsProtocol.versionID; 233 } else if (protocol.equals(RefreshCallQueueProtocol.class.getName())) { 234 return RefreshCallQueueProtocol.versionID; 235 } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){ 236 return GetUserMappingsProtocol.versionID; 237 } else { 238 throw new IOException("Unknown protocol to name node: " + protocol); 239 } 240 } 241 242 public static final int DEFAULT_PORT = 8020; 243 public static final Log LOG = LogFactory.getLog(NameNode.class.getName()); 244 public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange"); 245 public static final Log blockStateChangeLog = LogFactory.getLog("BlockStateChange"); 246 public static final HAState ACTIVE_STATE = new ActiveState(); 247 public static final HAState STANDBY_STATE = new StandbyState(); 248 249 protected FSNamesystem namesystem; 250 protected final Configuration conf; 251 protected final NamenodeRole role; 252 private volatile HAState state; 253 private final boolean haEnabled; 254 private final HAContext haContext; 255 protected final boolean allowStaleStandbyReads; 256 257 258 /** httpServer */ 259 protected NameNodeHttpServer httpServer; 260 private Thread emptier; 261 /** only used for testing purposes */ 262 protected boolean stopRequested = false; 263 /** Registration information of this name-node */ 264 protected NamenodeRegistration nodeRegistration; 265 /** Activated plug-ins. */ 266 private List<ServicePlugin> plugins; 267 268 private NameNodeRpcServer rpcServer; 269 270 private JvmPauseMonitor pauseMonitor; 271 private ObjectName nameNodeStatusBeanName; 272 /** 273 * The namenode address that clients will use to access this namenode 274 * or the name service. For HA configurations using logical URI, it 275 * will be the logical address. 276 */ 277 private String clientNamenodeAddress; 278 279 /** Format a new filesystem. Destroys any filesystem that may already 280 * exist at this location. **/ 281 public static void format(Configuration conf) throws IOException { 282 format(conf, true, true); 283 } 284 285 static NameNodeMetrics metrics; 286 private static final StartupProgress startupProgress = new StartupProgress(); 287 /** Return the {@link FSNamesystem} object. 288 * @return {@link FSNamesystem} object. 289 */ 290 public FSNamesystem getNamesystem() { 291 return namesystem; 292 } 293 294 public NamenodeProtocols getRpcServer() { 295 return rpcServer; 296 } 297 298 static void initMetrics(Configuration conf, NamenodeRole role) { 299 metrics = NameNodeMetrics.create(conf, role); 300 } 301 302 public static NameNodeMetrics getNameNodeMetrics() { 303 return metrics; 304 } 305 306 /** 307 * Returns object used for reporting namenode startup progress. 308 * 309 * @return StartupProgress for reporting namenode startup progress 310 */ 311 public static StartupProgress getStartupProgress() { 312 return startupProgress; 313 } 314 315 /** 316 * Return the service name of the issued delegation token. 317 * 318 * @return The name service id in HA-mode, or the rpc address in non-HA mode 319 */ 320 public String getTokenServiceName() { 321 return getClientNamenodeAddress(); 322 } 323 324 /** 325 * Set the namenode address that will be used by clients to access this 326 * namenode or name service. This needs to be called before the config 327 * is overriden. 328 */ 329 public void setClientNamenodeAddress(Configuration conf) { 330 String nnAddr = conf.get(FS_DEFAULT_NAME_KEY); 331 if (nnAddr == null) { 332 // default fs is not set. 333 clientNamenodeAddress = null; 334 return; 335 } 336 337 LOG.info(FS_DEFAULT_NAME_KEY + " is " + nnAddr); 338 URI nnUri = URI.create(nnAddr); 339 340 String nnHost = nnUri.getHost(); 341 if (nnHost == null) { 342 clientNamenodeAddress = null; 343 return; 344 } 345 346 if (DFSUtil.getNameServiceIds(conf).contains(nnHost)) { 347 // host name is logical 348 clientNamenodeAddress = nnHost; 349 } else if (nnUri.getPort() > 0) { 350 // physical address with a valid port 351 clientNamenodeAddress = nnUri.getAuthority(); 352 } else { 353 // the port is missing or 0. Figure out real bind address later. 354 clientNamenodeAddress = null; 355 return; 356 } 357 LOG.info("Clients are to use " + clientNamenodeAddress + " to access" 358 + " this namenode/service."); 359 } 360 361 /** 362 * Get the namenode address to be used by clients. 363 * @return nn address 364 */ 365 public String getClientNamenodeAddress() { 366 return clientNamenodeAddress; 367 } 368 369 public static InetSocketAddress getAddress(String address) { 370 return NetUtils.createSocketAddr(address, DEFAULT_PORT); 371 } 372 373 /** 374 * Set the configuration property for the service rpc address 375 * to address 376 */ 377 public static void setServiceAddress(Configuration conf, 378 String address) { 379 LOG.info("Setting ADDRESS " + address); 380 conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address); 381 } 382 383 /** 384 * Fetches the address for services to use when connecting to namenode 385 * based on the value of fallback returns null if the special 386 * address is not specified or returns the default namenode address 387 * to be used by both clients and services. 388 * Services here are datanodes, backup node, any non client connection 389 */ 390 public static InetSocketAddress getServiceAddress(Configuration conf, 391 boolean fallback) { 392 String addr = conf.get(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY); 393 if (addr == null || addr.isEmpty()) { 394 return fallback ? getAddress(conf) : null; 395 } 396 return getAddress(addr); 397 } 398 399 public static InetSocketAddress getAddress(Configuration conf) { 400 URI filesystemURI = FileSystem.getDefaultUri(conf); 401 return getAddress(filesystemURI); 402 } 403 404 405 /** 406 * TODO:FEDERATION 407 * @param filesystemURI 408 * @return address of file system 409 */ 410 public static InetSocketAddress getAddress(URI filesystemURI) { 411 String authority = filesystemURI.getAuthority(); 412 if (authority == null) { 413 throw new IllegalArgumentException(String.format( 414 "Invalid URI for NameNode address (check %s): %s has no authority.", 415 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString())); 416 } 417 if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase( 418 filesystemURI.getScheme())) { 419 throw new IllegalArgumentException(String.format( 420 "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.", 421 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(), 422 HdfsConstants.HDFS_URI_SCHEME)); 423 } 424 return getAddress(authority); 425 } 426 427 public static URI getUri(InetSocketAddress namenode) { 428 int port = namenode.getPort(); 429 String portString = port == DEFAULT_PORT ? "" : (":"+port); 430 return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 431 + namenode.getHostName()+portString); 432 } 433 434 // 435 // Common NameNode methods implementation for the active name-node role. 436 // 437 public NamenodeRole getRole() { 438 return role; 439 } 440 441 boolean isRole(NamenodeRole that) { 442 return role.equals(that); 443 } 444 445 /** 446 * Given a configuration get the address of the service rpc server 447 * If the service rpc is not configured returns null 448 */ 449 protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) { 450 return NameNode.getServiceAddress(conf, false); 451 } 452 453 protected InetSocketAddress getRpcServerAddress(Configuration conf) { 454 return getAddress(conf); 455 } 456 457 /** Given a configuration get the bind host of the service rpc server 458 * If the bind host is not configured returns null. 459 */ 460 protected String getServiceRpcServerBindHost(Configuration conf) { 461 String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY); 462 if (addr == null || addr.isEmpty()) { 463 return null; 464 } 465 return addr; 466 } 467 468 /** Given a configuration get the bind host of the client rpc server 469 * If the bind host is not configured returns null. 470 */ 471 protected String getRpcServerBindHost(Configuration conf) { 472 String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY); 473 if (addr == null || addr.isEmpty()) { 474 return null; 475 } 476 return addr; 477 } 478 479 /** 480 * Modifies the configuration passed to contain the service rpc address setting 481 */ 482 protected void setRpcServiceServerAddress(Configuration conf, 483 InetSocketAddress serviceRPCAddress) { 484 setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress)); 485 } 486 487 protected void setRpcServerAddress(Configuration conf, 488 InetSocketAddress rpcAddress) { 489 FileSystem.setDefaultUri(conf, getUri(rpcAddress)); 490 } 491 492 protected InetSocketAddress getHttpServerAddress(Configuration conf) { 493 return getHttpAddress(conf); 494 } 495 496 /** @return the NameNode HTTP address. */ 497 public static InetSocketAddress getHttpAddress(Configuration conf) { 498 return NetUtils.createSocketAddr( 499 conf.get(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT)); 500 } 501 502 protected void loadNamesystem(Configuration conf) throws IOException { 503 this.namesystem = FSNamesystem.loadFromDisk(conf); 504 } 505 506 NamenodeRegistration getRegistration() { 507 return nodeRegistration; 508 } 509 510 NamenodeRegistration setRegistration() { 511 nodeRegistration = new NamenodeRegistration( 512 NetUtils.getHostPortString(rpcServer.getRpcAddress()), 513 NetUtils.getHostPortString(getHttpAddress()), 514 getFSImage().getStorage(), getRole()); 515 return nodeRegistration; 516 } 517 518 /* optimize ugi lookup for RPC operations to avoid a trip through 519 * UGI.getCurrentUser which is synch'ed 520 */ 521 public static UserGroupInformation getRemoteUser() throws IOException { 522 UserGroupInformation ugi = Server.getRemoteUser(); 523 return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser(); 524 } 525 526 527 /** 528 * Login as the configured user for the NameNode. 529 */ 530 void loginAsNameNodeUser(Configuration conf) throws IOException { 531 InetSocketAddress socAddr = getRpcServerAddress(conf); 532 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 533 DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName()); 534 } 535 536 /** 537 * Initialize name-node. 538 * 539 * @param conf the configuration 540 */ 541 protected void initialize(Configuration conf) throws IOException { 542 if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) { 543 String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY); 544 if (intervals != null) { 545 conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS, 546 intervals); 547 } 548 } 549 550 UserGroupInformation.setConfiguration(conf); 551 loginAsNameNodeUser(conf); 552 553 NameNode.initMetrics(conf, this.getRole()); 554 StartupProgressMetrics.register(startupProgress); 555 556 if (NamenodeRole.NAMENODE == role) { 557 startHttpServer(conf); 558 } 559 loadNamesystem(conf); 560 561 rpcServer = createRpcServer(conf); 562 if (clientNamenodeAddress == null) { 563 // This is expected for MiniDFSCluster. Set it now using 564 // the RPC server's bind address. 565 clientNamenodeAddress = 566 NetUtils.getHostPortString(rpcServer.getRpcAddress()); 567 LOG.info("Clients are to use " + clientNamenodeAddress + " to access" 568 + " this namenode/service."); 569 } 570 if (NamenodeRole.NAMENODE == role) { 571 httpServer.setNameNodeAddress(getNameNodeAddress()); 572 httpServer.setFSImage(getFSImage()); 573 } 574 575 pauseMonitor = new JvmPauseMonitor(conf); 576 pauseMonitor.start(); 577 578 startCommonServices(conf); 579 } 580 581 /** 582 * Create the RPC server implementation. Used as an extension point for the 583 * BackupNode. 584 */ 585 protected NameNodeRpcServer createRpcServer(Configuration conf) 586 throws IOException { 587 return new NameNodeRpcServer(conf, this); 588 } 589 590 /** Start the services common to active and standby states */ 591 private void startCommonServices(Configuration conf) throws IOException { 592 namesystem.startCommonServices(conf, haContext); 593 registerNNSMXBean(); 594 if (NamenodeRole.NAMENODE != role) { 595 startHttpServer(conf); 596 httpServer.setNameNodeAddress(getNameNodeAddress()); 597 httpServer.setFSImage(getFSImage()); 598 } 599 rpcServer.start(); 600 plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY, 601 ServicePlugin.class); 602 for (ServicePlugin p: plugins) { 603 try { 604 p.start(this); 605 } catch (Throwable t) { 606 LOG.warn("ServicePlugin " + p + " could not be started", t); 607 } 608 } 609 LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress()); 610 if (rpcServer.getServiceRpcAddress() != null) { 611 LOG.info(getRole() + " service RPC up at: " 612 + rpcServer.getServiceRpcAddress()); 613 } 614 } 615 616 private void stopCommonServices() { 617 if(rpcServer != null) rpcServer.stop(); 618 if(namesystem != null) namesystem.close(); 619 if (pauseMonitor != null) pauseMonitor.stop(); 620 if (plugins != null) { 621 for (ServicePlugin p : plugins) { 622 try { 623 p.stop(); 624 } catch (Throwable t) { 625 LOG.warn("ServicePlugin " + p + " could not be stopped", t); 626 } 627 } 628 } 629 stopHttpServer(); 630 } 631 632 private void startTrashEmptier(final Configuration conf) throws IOException { 633 long trashInterval = 634 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT); 635 if (trashInterval == 0) { 636 return; 637 } else if (trashInterval < 0) { 638 throw new IOException("Cannot start trash emptier with negative interval." 639 + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value."); 640 } 641 642 // This may be called from the transitionToActive code path, in which 643 // case the current user is the administrator, not the NN. The trash 644 // emptier needs to run as the NN. See HDFS-3972. 645 FileSystem fs = SecurityUtil.doAsLoginUser( 646 new PrivilegedExceptionAction<FileSystem>() { 647 @Override 648 public FileSystem run() throws IOException { 649 return FileSystem.get(conf); 650 } 651 }); 652 this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier"); 653 this.emptier.setDaemon(true); 654 this.emptier.start(); 655 } 656 657 private void stopTrashEmptier() { 658 if (this.emptier != null) { 659 emptier.interrupt(); 660 emptier = null; 661 } 662 } 663 664 private void startHttpServer(final Configuration conf) throws IOException { 665 httpServer = new NameNodeHttpServer(conf, this, getHttpServerAddress(conf)); 666 httpServer.start(); 667 httpServer.setStartupProgress(startupProgress); 668 } 669 670 private void stopHttpServer() { 671 try { 672 if (httpServer != null) httpServer.stop(); 673 } catch (Exception e) { 674 LOG.error("Exception while stopping httpserver", e); 675 } 676 } 677 678 /** 679 * Start NameNode. 680 * <p> 681 * The name-node can be started with one of the following startup options: 682 * <ul> 683 * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li> 684 * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li> 685 * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li> 686 * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li> 687 * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster 688 * upgrade and create a snapshot of the current file system state</li> 689 * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node 690 * metadata</li> 691 * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the 692 * cluster back to the previous state</li> 693 * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize 694 * previous upgrade</li> 695 * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li> 696 * </ul> 697 * The option is passed via configuration field: 698 * <tt>dfs.namenode.startup</tt> 699 * 700 * The conf will be modified to reflect the actual ports on which 701 * the NameNode is up and running if the user passes the port as 702 * <code>zero</code> in the conf. 703 * 704 * @param conf confirguration 705 * @throws IOException 706 */ 707 public NameNode(Configuration conf) throws IOException { 708 this(conf, NamenodeRole.NAMENODE); 709 } 710 711 protected NameNode(Configuration conf, NamenodeRole role) 712 throws IOException { 713 this.conf = conf; 714 this.role = role; 715 setClientNamenodeAddress(conf); 716 String nsId = getNameServiceId(conf); 717 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 718 this.haEnabled = HAUtil.isHAEnabled(conf, nsId); 719 state = createHAState(getStartupOption(conf)); 720 this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf); 721 this.haContext = createHAContext(); 722 try { 723 initializeGenericKeys(conf, nsId, namenodeId); 724 initialize(conf); 725 try { 726 haContext.writeLock(); 727 state.prepareToEnterState(haContext); 728 state.enterState(haContext); 729 } finally { 730 haContext.writeUnlock(); 731 } 732 } catch (IOException e) { 733 this.stop(); 734 throw e; 735 } catch (HadoopIllegalArgumentException e) { 736 this.stop(); 737 throw e; 738 } 739 } 740 741 protected HAState createHAState(StartupOption startOpt) { 742 if (!haEnabled || startOpt == StartupOption.UPGRADE) { 743 return ACTIVE_STATE; 744 } else { 745 return STANDBY_STATE; 746 } 747 } 748 749 protected HAContext createHAContext() { 750 return new NameNodeHAContext(); 751 } 752 753 /** 754 * Wait for service to finish. 755 * (Normally, it runs forever.) 756 */ 757 public void join() { 758 try { 759 rpcServer.join(); 760 } catch (InterruptedException ie) { 761 LOG.info("Caught interrupted exception ", ie); 762 } 763 } 764 765 /** 766 * Stop all NameNode threads and wait for all to finish. 767 */ 768 public void stop() { 769 synchronized(this) { 770 if (stopRequested) 771 return; 772 stopRequested = true; 773 } 774 try { 775 if (state != null) { 776 state.exitState(haContext); 777 } 778 } catch (ServiceFailedException e) { 779 LOG.warn("Encountered exception while exiting state ", e); 780 } finally { 781 stopCommonServices(); 782 if (metrics != null) { 783 metrics.shutdown(); 784 } 785 if (namesystem != null) { 786 namesystem.shutdown(); 787 } 788 if (nameNodeStatusBeanName != null) { 789 MBeans.unregister(nameNodeStatusBeanName); 790 nameNodeStatusBeanName = null; 791 } 792 } 793 } 794 795 synchronized boolean isStopRequested() { 796 return stopRequested; 797 } 798 799 /** 800 * Is the cluster currently in safe mode? 801 */ 802 public boolean isInSafeMode() { 803 return namesystem.isInSafeMode(); 804 } 805 806 /** get FSImage */ 807 @VisibleForTesting 808 public FSImage getFSImage() { 809 return namesystem.dir.fsImage; 810 } 811 812 /** 813 * @return NameNode RPC address 814 */ 815 public InetSocketAddress getNameNodeAddress() { 816 return rpcServer.getRpcAddress(); 817 } 818 819 /** 820 * @return NameNode RPC address in "host:port" string form 821 */ 822 public String getNameNodeAddressHostPortString() { 823 return NetUtils.getHostPortString(rpcServer.getRpcAddress()); 824 } 825 826 /** 827 * @return NameNode service RPC address if configured, the 828 * NameNode RPC address otherwise 829 */ 830 public InetSocketAddress getServiceRpcAddress() { 831 final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress(); 832 return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr; 833 } 834 835 /** 836 * @return NameNode HTTP address, used by the Web UI, image transfer, 837 * and HTTP-based file system clients like Hftp and WebHDFS 838 */ 839 public InetSocketAddress getHttpAddress() { 840 return httpServer.getHttpAddress(); 841 } 842 843 /** 844 * @return NameNode HTTPS address, used by the Web UI, image transfer, 845 * and HTTP-based file system clients like Hftp and WebHDFS 846 */ 847 public InetSocketAddress getHttpsAddress() { 848 return httpServer.getHttpsAddress(); 849 } 850 851 /** 852 * Verify that configured directories exist, then 853 * Interactively confirm that formatting is desired 854 * for each existing directory and format them. 855 * 856 * @param conf 857 * @param force 858 * @return true if formatting was aborted, false otherwise 859 * @throws IOException 860 */ 861 private static boolean format(Configuration conf, boolean force, 862 boolean isInteractive) throws IOException { 863 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 864 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 865 initializeGenericKeys(conf, nsId, namenodeId); 866 checkAllowFormat(conf); 867 868 if (UserGroupInformation.isSecurityEnabled()) { 869 InetSocketAddress socAddr = getAddress(conf); 870 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 871 DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName()); 872 } 873 874 Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf); 875 List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf); 876 List<URI> dirsToPrompt = new ArrayList<URI>(); 877 dirsToPrompt.addAll(nameDirsToFormat); 878 dirsToPrompt.addAll(sharedDirs); 879 List<URI> editDirsToFormat = 880 FSNamesystem.getNamespaceEditsDirs(conf); 881 882 // if clusterID is not provided - see if you can find the current one 883 String clusterId = StartupOption.FORMAT.getClusterId(); 884 if(clusterId == null || clusterId.equals("")) { 885 //Generate a new cluster id 886 clusterId = NNStorage.newClusterID(); 887 } 888 System.out.println("Formatting using clusterid: " + clusterId); 889 890 FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat); 891 try { 892 FSNamesystem fsn = new FSNamesystem(conf, fsImage); 893 fsImage.getEditLog().initJournalsForWrite(); 894 895 if (!fsImage.confirmFormat(force, isInteractive)) { 896 return true; // aborted 897 } 898 899 fsImage.format(fsn, clusterId); 900 } catch (IOException ioe) { 901 LOG.warn("Encountered exception during format: ", ioe); 902 fsImage.close(); 903 throw ioe; 904 } 905 return false; 906 } 907 908 public static void checkAllowFormat(Configuration conf) throws IOException { 909 if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 910 DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) { 911 throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY 912 + " is set to false for this filesystem, so it " 913 + "cannot be formatted. You will need to set " 914 + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter " 915 + "to true in order to format this filesystem"); 916 } 917 } 918 919 @VisibleForTesting 920 public static boolean initializeSharedEdits(Configuration conf) throws IOException { 921 return initializeSharedEdits(conf, true); 922 } 923 924 @VisibleForTesting 925 public static boolean initializeSharedEdits(Configuration conf, 926 boolean force) throws IOException { 927 return initializeSharedEdits(conf, force, false); 928 } 929 930 /** 931 * Clone the supplied configuration but remove the shared edits dirs. 932 * 933 * @param conf Supplies the original configuration. 934 * @return Cloned configuration without the shared edit dirs. 935 * @throws IOException on failure to generate the configuration. 936 */ 937 private static Configuration getConfigurationWithoutSharedEdits( 938 Configuration conf) 939 throws IOException { 940 List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false); 941 String editsDirsString = Joiner.on(",").join(editsDirs); 942 943 Configuration confWithoutShared = new Configuration(conf); 944 confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY); 945 confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, 946 editsDirsString); 947 return confWithoutShared; 948 } 949 950 /** 951 * Format a new shared edits dir and copy in enough edit log segments so that 952 * the standby NN can start up. 953 * 954 * @param conf configuration 955 * @param force format regardless of whether or not the shared edits dir exists 956 * @param interactive prompt the user when a dir exists 957 * @return true if the command aborts, false otherwise 958 */ 959 private static boolean initializeSharedEdits(Configuration conf, 960 boolean force, boolean interactive) throws IOException { 961 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 962 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 963 initializeGenericKeys(conf, nsId, namenodeId); 964 965 if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) { 966 LOG.fatal("No shared edits directory configured for namespace " + 967 nsId + " namenode " + namenodeId); 968 return false; 969 } 970 971 if (UserGroupInformation.isSecurityEnabled()) { 972 InetSocketAddress socAddr = getAddress(conf); 973 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 974 DFS_NAMENODE_USER_NAME_KEY, socAddr.getHostName()); 975 } 976 977 NNStorage existingStorage = null; 978 FSImage sharedEditsImage = null; 979 try { 980 FSNamesystem fsns = 981 FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf)); 982 983 existingStorage = fsns.getFSImage().getStorage(); 984 NamespaceInfo nsInfo = existingStorage.getNamespaceInfo(); 985 986 List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf); 987 988 sharedEditsImage = new FSImage(conf, 989 Lists.<URI>newArrayList(), 990 sharedEditsDirs); 991 sharedEditsImage.getEditLog().initJournalsForWrite(); 992 993 if (!sharedEditsImage.confirmFormat(force, interactive)) { 994 return true; // abort 995 } 996 997 NNStorage newSharedStorage = sharedEditsImage.getStorage(); 998 // Call Storage.format instead of FSImage.format here, since we don't 999 // actually want to save a checkpoint - just prime the dirs with 1000 // the existing namespace info 1001 newSharedStorage.format(nsInfo); 1002 sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo); 1003 1004 // Need to make sure the edit log segments are in good shape to initialize 1005 // the shared edits dir. 1006 fsns.getFSImage().getEditLog().close(); 1007 fsns.getFSImage().getEditLog().initJournalsForWrite(); 1008 fsns.getFSImage().getEditLog().recoverUnclosedStreams(); 1009 1010 copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage, 1011 conf); 1012 } catch (IOException ioe) { 1013 LOG.error("Could not initialize shared edits dir", ioe); 1014 return true; // aborted 1015 } finally { 1016 if (sharedEditsImage != null) { 1017 try { 1018 sharedEditsImage.close(); 1019 } catch (IOException ioe) { 1020 LOG.warn("Could not close sharedEditsImage", ioe); 1021 } 1022 } 1023 // Have to unlock storage explicitly for the case when we're running in a 1024 // unit test, which runs in the same JVM as NNs. 1025 if (existingStorage != null) { 1026 try { 1027 existingStorage.unlockAll(); 1028 } catch (IOException ioe) { 1029 LOG.warn("Could not unlock storage directories", ioe); 1030 return true; // aborted 1031 } 1032 } 1033 } 1034 return false; // did not abort 1035 } 1036 1037 private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns, 1038 Collection<URI> sharedEditsDirs, NNStorage newSharedStorage, 1039 Configuration conf) throws IOException { 1040 Preconditions.checkArgument(!sharedEditsDirs.isEmpty(), 1041 "No shared edits specified"); 1042 // Copy edit log segments into the new shared edits dir. 1043 List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs); 1044 FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage, 1045 sharedEditsUris); 1046 newSharedEditLog.initJournalsForWrite(); 1047 newSharedEditLog.recoverUnclosedStreams(); 1048 1049 FSEditLog sourceEditLog = fsns.getFSImage().editLog; 1050 1051 long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId(); 1052 1053 Collection<EditLogInputStream> streams = null; 1054 try { 1055 streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0); 1056 1057 // Set the nextTxid to the CheckpointTxId+1 1058 newSharedEditLog.setNextTxId(fromTxId + 1); 1059 1060 // Copy all edits after last CheckpointTxId to shared edits dir 1061 for (EditLogInputStream stream : streams) { 1062 LOG.debug("Beginning to copy stream " + stream + " to shared edits"); 1063 FSEditLogOp op; 1064 boolean segmentOpen = false; 1065 while ((op = stream.readOp()) != null) { 1066 if (LOG.isTraceEnabled()) { 1067 LOG.trace("copying op: " + op); 1068 } 1069 if (!segmentOpen) { 1070 newSharedEditLog.startLogSegment(op.txid, false); 1071 segmentOpen = true; 1072 } 1073 1074 newSharedEditLog.logEdit(op); 1075 1076 if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) { 1077 newSharedEditLog.logSync(); 1078 newSharedEditLog.endCurrentLogSegment(false); 1079 LOG.debug("ending log segment because of END_LOG_SEGMENT op in " 1080 + stream); 1081 segmentOpen = false; 1082 } 1083 } 1084 1085 if (segmentOpen) { 1086 LOG.debug("ending log segment because of end of stream in " + stream); 1087 newSharedEditLog.logSync(); 1088 newSharedEditLog.endCurrentLogSegment(false); 1089 segmentOpen = false; 1090 } 1091 } 1092 } finally { 1093 if (streams != null) { 1094 FSEditLog.closeAllStreams(streams); 1095 } 1096 } 1097 } 1098 1099 @VisibleForTesting 1100 public static boolean doRollback(Configuration conf, 1101 boolean isConfirmationNeeded) throws IOException { 1102 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1103 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1104 initializeGenericKeys(conf, nsId, namenodeId); 1105 1106 FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf)); 1107 System.err.print( 1108 "\"rollBack\" will remove the current state of the file system,\n" 1109 + "returning you to the state prior to initiating your recent.\n" 1110 + "upgrade. This action is permanent and cannot be undone. If you\n" 1111 + "are performing a rollback in an HA environment, you should be\n" 1112 + "certain that no NameNode process is running on any host."); 1113 if (isConfirmationNeeded) { 1114 if (!confirmPrompt("Roll back file system state?")) { 1115 System.err.println("Rollback aborted."); 1116 return true; 1117 } 1118 } 1119 nsys.dir.fsImage.doRollback(nsys); 1120 return false; 1121 } 1122 1123 private static void printUsage(PrintStream out) { 1124 out.println(USAGE + "\n"); 1125 } 1126 1127 @VisibleForTesting 1128 static StartupOption parseArguments(String args[]) { 1129 int argsLen = (args == null) ? 0 : args.length; 1130 StartupOption startOpt = StartupOption.REGULAR; 1131 for(int i=0; i < argsLen; i++) { 1132 String cmd = args[i]; 1133 if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) { 1134 startOpt = StartupOption.FORMAT; 1135 for (i = i + 1; i < argsLen; i++) { 1136 if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) { 1137 i++; 1138 if (i >= argsLen) { 1139 // if no cluster id specified, return null 1140 LOG.fatal("Must specify a valid cluster ID after the " 1141 + StartupOption.CLUSTERID.getName() + " flag"); 1142 return null; 1143 } 1144 String clusterId = args[i]; 1145 // Make sure an id is specified and not another flag 1146 if (clusterId.isEmpty() || 1147 clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) || 1148 clusterId.equalsIgnoreCase( 1149 StartupOption.NONINTERACTIVE.getName())) { 1150 LOG.fatal("Must specify a valid cluster ID after the " 1151 + StartupOption.CLUSTERID.getName() + " flag"); 1152 return null; 1153 } 1154 startOpt.setClusterId(clusterId); 1155 } 1156 1157 if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) { 1158 startOpt.setForceFormat(true); 1159 } 1160 1161 if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) { 1162 startOpt.setInteractiveFormat(false); 1163 } 1164 } 1165 } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) { 1166 startOpt = StartupOption.GENCLUSTERID; 1167 } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) { 1168 startOpt = StartupOption.REGULAR; 1169 } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) { 1170 startOpt = StartupOption.BACKUP; 1171 } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) { 1172 startOpt = StartupOption.CHECKPOINT; 1173 } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) { 1174 startOpt = StartupOption.UPGRADE; 1175 /* Can be followed by CLUSTERID with a required parameter or 1176 * RENAMERESERVED with an optional parameter 1177 */ 1178 while (i + 1 < argsLen) { 1179 String flag = args[i + 1]; 1180 if (flag.equalsIgnoreCase(StartupOption.CLUSTERID.getName())) { 1181 if (i + 2 < argsLen) { 1182 i += 2; 1183 startOpt.setClusterId(args[i]); 1184 } else { 1185 LOG.fatal("Must specify a valid cluster ID after the " 1186 + StartupOption.CLUSTERID.getName() + " flag"); 1187 return null; 1188 } 1189 } else if (flag.equalsIgnoreCase(StartupOption.RENAMERESERVED 1190 .getName())) { 1191 if (i + 2 < argsLen) { 1192 FSImageFormat.setRenameReservedPairs(args[i + 2]); 1193 i += 2; 1194 } else { 1195 FSImageFormat.useDefaultRenameReservedPairs(); 1196 i += 1; 1197 } 1198 } else { 1199 LOG.fatal("Unknown upgrade flag " + flag); 1200 return null; 1201 } 1202 } 1203 } else if (StartupOption.ROLLINGUPGRADE.getName().equalsIgnoreCase(cmd)) { 1204 startOpt = StartupOption.ROLLINGUPGRADE; 1205 ++i; 1206 startOpt.setRollingUpgradeStartupOption(args[i]); 1207 } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) { 1208 startOpt = StartupOption.ROLLBACK; 1209 } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) { 1210 startOpt = StartupOption.FINALIZE; 1211 } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) { 1212 startOpt = StartupOption.IMPORT; 1213 } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) { 1214 startOpt = StartupOption.BOOTSTRAPSTANDBY; 1215 return startOpt; 1216 } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) { 1217 startOpt = StartupOption.INITIALIZESHAREDEDITS; 1218 for (i = i + 1 ; i < argsLen; i++) { 1219 if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) { 1220 startOpt.setInteractiveFormat(false); 1221 } else if (StartupOption.FORCE.getName().equals(args[i])) { 1222 startOpt.setForceFormat(true); 1223 } else { 1224 LOG.fatal("Invalid argument: " + args[i]); 1225 return null; 1226 } 1227 } 1228 return startOpt; 1229 } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) { 1230 if (startOpt != StartupOption.REGULAR) { 1231 throw new RuntimeException("Can't combine -recover with " + 1232 "other startup options."); 1233 } 1234 startOpt = StartupOption.RECOVER; 1235 while (++i < argsLen) { 1236 if (args[i].equalsIgnoreCase( 1237 StartupOption.FORCE.getName())) { 1238 startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE); 1239 } else { 1240 throw new RuntimeException("Error parsing recovery options: " + 1241 "can't understand option \"" + args[i] + "\""); 1242 } 1243 } 1244 } else { 1245 return null; 1246 } 1247 } 1248 return startOpt; 1249 } 1250 1251 private static void setStartupOption(Configuration conf, StartupOption opt) { 1252 conf.set(DFS_NAMENODE_STARTUP_KEY, opt.name()); 1253 } 1254 1255 static StartupOption getStartupOption(Configuration conf) { 1256 return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY, 1257 StartupOption.REGULAR.toString())); 1258 } 1259 1260 private static void doRecovery(StartupOption startOpt, Configuration conf) 1261 throws IOException { 1262 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1263 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1264 initializeGenericKeys(conf, nsId, namenodeId); 1265 if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) { 1266 if (!confirmPrompt("You have selected Metadata Recovery mode. " + 1267 "This mode is intended to recover lost metadata on a corrupt " + 1268 "filesystem. Metadata recovery mode often permanently deletes " + 1269 "data from your HDFS filesystem. Please back up your edit log " + 1270 "and fsimage before trying this!\n\n" + 1271 "Are you ready to proceed? (Y/N)\n")) { 1272 System.err.println("Recovery aborted at user request.\n"); 1273 return; 1274 } 1275 } 1276 MetaRecoveryContext.LOG.info("starting recovery..."); 1277 UserGroupInformation.setConfiguration(conf); 1278 NameNode.initMetrics(conf, startOpt.toNodeRole()); 1279 FSNamesystem fsn = null; 1280 try { 1281 fsn = FSNamesystem.loadFromDisk(conf); 1282 fsn.getFSImage().saveNamespace(fsn); 1283 MetaRecoveryContext.LOG.info("RECOVERY COMPLETE"); 1284 } catch (IOException e) { 1285 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e); 1286 throw e; 1287 } catch (RuntimeException e) { 1288 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e); 1289 throw e; 1290 } finally { 1291 if (fsn != null) 1292 fsn.close(); 1293 } 1294 } 1295 1296 public static NameNode createNameNode(String argv[], Configuration conf) 1297 throws IOException { 1298 LOG.info("createNameNode " + Arrays.asList(argv)); 1299 if (conf == null) 1300 conf = new HdfsConfiguration(); 1301 StartupOption startOpt = parseArguments(argv); 1302 if (startOpt == null) { 1303 printUsage(System.err); 1304 return null; 1305 } 1306 setStartupOption(conf, startOpt); 1307 1308 switch (startOpt) { 1309 case FORMAT: { 1310 boolean aborted = format(conf, startOpt.getForceFormat(), 1311 startOpt.getInteractiveFormat()); 1312 terminate(aborted ? 1 : 0); 1313 return null; // avoid javac warning 1314 } 1315 case GENCLUSTERID: { 1316 System.err.println("Generating new cluster id:"); 1317 System.out.println(NNStorage.newClusterID()); 1318 terminate(0); 1319 return null; 1320 } 1321 case FINALIZE: { 1322 System.err.println("Use of the argument '" + StartupOption.FINALIZE + 1323 "' is no longer supported. To finalize an upgrade, start the NN " + 1324 " and then run `hdfs dfsadmin -finalizeUpgrade'"); 1325 terminate(1); 1326 return null; // avoid javac warning 1327 } 1328 case ROLLBACK: { 1329 boolean aborted = doRollback(conf, true); 1330 terminate(aborted ? 1 : 0); 1331 return null; // avoid warning 1332 } 1333 case BOOTSTRAPSTANDBY: { 1334 String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length); 1335 int rc = BootstrapStandby.run(toolArgs, conf); 1336 terminate(rc); 1337 return null; // avoid warning 1338 } 1339 case INITIALIZESHAREDEDITS: { 1340 boolean aborted = initializeSharedEdits(conf, 1341 startOpt.getForceFormat(), 1342 startOpt.getInteractiveFormat()); 1343 terminate(aborted ? 1 : 0); 1344 return null; // avoid warning 1345 } 1346 case BACKUP: 1347 case CHECKPOINT: { 1348 NamenodeRole role = startOpt.toNodeRole(); 1349 DefaultMetricsSystem.initialize(role.toString().replace(" ", "")); 1350 return new BackupNode(conf, role); 1351 } 1352 case RECOVER: { 1353 NameNode.doRecovery(startOpt, conf); 1354 return null; 1355 } 1356 default: { 1357 DefaultMetricsSystem.initialize("NameNode"); 1358 return new NameNode(conf); 1359 } 1360 } 1361 } 1362 1363 /** 1364 * In federation configuration is set for a set of 1365 * namenode and secondary namenode/backup/checkpointer, which are 1366 * grouped under a logical nameservice ID. The configuration keys specific 1367 * to them have suffix set to configured nameserviceId. 1368 * 1369 * This method copies the value from specific key of format key.nameserviceId 1370 * to key, to set up the generic configuration. Once this is done, only 1371 * generic version of the configuration is read in rest of the code, for 1372 * backward compatibility and simpler code changes. 1373 * 1374 * @param conf 1375 * Configuration object to lookup specific key and to set the value 1376 * to the key passed. Note the conf object is modified 1377 * @param nameserviceId name service Id (to distinguish federated NNs) 1378 * @param namenodeId the namenode ID (to distinguish HA NNs) 1379 * @see DFSUtil#setGenericConf(Configuration, String, String, String...) 1380 */ 1381 public static void initializeGenericKeys(Configuration conf, 1382 String nameserviceId, String namenodeId) { 1383 if ((nameserviceId != null && !nameserviceId.isEmpty()) || 1384 (namenodeId != null && !namenodeId.isEmpty())) { 1385 if (nameserviceId != null) { 1386 conf.set(DFS_NAMESERVICE_ID, nameserviceId); 1387 } 1388 if (namenodeId != null) { 1389 conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId); 1390 } 1391 1392 DFSUtil.setGenericConf(conf, nameserviceId, namenodeId, 1393 NAMENODE_SPECIFIC_KEYS); 1394 DFSUtil.setGenericConf(conf, nameserviceId, null, 1395 NAMESERVICE_SPECIFIC_KEYS); 1396 } 1397 1398 // If the RPC address is set use it to (re-)configure the default FS 1399 if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) { 1400 URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 1401 + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY)); 1402 conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString()); 1403 LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString()); 1404 } 1405 } 1406 1407 /** 1408 * Get the name service Id for the node 1409 * @return name service Id or null if federation is not configured 1410 */ 1411 protected String getNameServiceId(Configuration conf) { 1412 return DFSUtil.getNamenodeNameServiceId(conf); 1413 } 1414 1415 /** 1416 */ 1417 public static void main(String argv[]) throws Exception { 1418 if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) { 1419 System.exit(0); 1420 } 1421 1422 try { 1423 StringUtils.startupShutdownMessage(NameNode.class, argv, LOG); 1424 NameNode namenode = createNameNode(argv, null); 1425 if (namenode != null) { 1426 namenode.join(); 1427 } 1428 } catch (Throwable e) { 1429 LOG.fatal("Exception in namenode join", e); 1430 terminate(1, e); 1431 } 1432 } 1433 1434 synchronized void monitorHealth() 1435 throws HealthCheckFailedException, AccessControlException { 1436 namesystem.checkSuperuserPrivilege(); 1437 if (!haEnabled) { 1438 return; // no-op, if HA is not enabled 1439 } 1440 getNamesystem().checkAvailableResources(); 1441 if (!getNamesystem().nameNodeHasResourcesAvailable()) { 1442 throw new HealthCheckFailedException( 1443 "The NameNode has no resources available"); 1444 } 1445 } 1446 1447 synchronized void transitionToActive() 1448 throws ServiceFailedException, AccessControlException { 1449 namesystem.checkSuperuserPrivilege(); 1450 if (!haEnabled) { 1451 throw new ServiceFailedException("HA for namenode is not enabled"); 1452 } 1453 state.setState(haContext, ACTIVE_STATE); 1454 } 1455 1456 synchronized void transitionToStandby() 1457 throws ServiceFailedException, AccessControlException { 1458 namesystem.checkSuperuserPrivilege(); 1459 if (!haEnabled) { 1460 throw new ServiceFailedException("HA for namenode is not enabled"); 1461 } 1462 state.setState(haContext, STANDBY_STATE); 1463 } 1464 1465 synchronized HAServiceStatus getServiceStatus() 1466 throws ServiceFailedException, AccessControlException { 1467 namesystem.checkSuperuserPrivilege(); 1468 if (!haEnabled) { 1469 throw new ServiceFailedException("HA for namenode is not enabled"); 1470 } 1471 if (state == null) { 1472 return new HAServiceStatus(HAServiceState.INITIALIZING); 1473 } 1474 HAServiceState retState = state.getServiceState(); 1475 HAServiceStatus ret = new HAServiceStatus(retState); 1476 if (retState == HAServiceState.STANDBY) { 1477 String safemodeTip = namesystem.getSafeModeTip(); 1478 if (!safemodeTip.isEmpty()) { 1479 ret.setNotReadyToBecomeActive( 1480 "The NameNode is in safemode. " + 1481 safemodeTip); 1482 } else { 1483 ret.setReadyToBecomeActive(); 1484 } 1485 } else if (retState == HAServiceState.ACTIVE) { 1486 ret.setReadyToBecomeActive(); 1487 } else { 1488 ret.setNotReadyToBecomeActive("State is " + state); 1489 } 1490 return ret; 1491 } 1492 1493 synchronized HAServiceState getServiceState() { 1494 if (state == null) { 1495 return HAServiceState.INITIALIZING; 1496 } 1497 return state.getServiceState(); 1498 } 1499 1500 /** 1501 * Register NameNodeStatusMXBean 1502 */ 1503 private void registerNNSMXBean() { 1504 nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this); 1505 } 1506 1507 @Override // NameNodeStatusMXBean 1508 public String getNNRole() { 1509 String roleStr = ""; 1510 NamenodeRole role = getRole(); 1511 if (null != role) { 1512 roleStr = role.toString(); 1513 } 1514 return roleStr; 1515 } 1516 1517 @Override // NameNodeStatusMXBean 1518 public String getState() { 1519 String servStateStr = ""; 1520 HAServiceState servState = getServiceState(); 1521 if (null != servState) { 1522 servStateStr = servState.toString(); 1523 } 1524 return servStateStr; 1525 } 1526 1527 @Override // NameNodeStatusMXBean 1528 public String getHostAndPort() { 1529 return getNameNodeAddressHostPortString(); 1530 } 1531 1532 @Override // NameNodeStatusMXBean 1533 public boolean isSecurityEnabled() { 1534 return UserGroupInformation.isSecurityEnabled(); 1535 } 1536 1537 /** 1538 * Shutdown the NN immediately in an ungraceful way. Used when it would be 1539 * unsafe for the NN to continue operating, e.g. during a failed HA state 1540 * transition. 1541 * 1542 * @param t exception which warrants the shutdown. Printed to the NN log 1543 * before exit. 1544 * @throws ExitException thrown only for testing. 1545 */ 1546 protected synchronized void doImmediateShutdown(Throwable t) 1547 throws ExitException { 1548 String message = "Error encountered requiring NN shutdown. " + 1549 "Shutting down immediately."; 1550 try { 1551 LOG.fatal(message, t); 1552 } catch (Throwable ignored) { 1553 // This is unlikely to happen, but there's nothing we can do if it does. 1554 } 1555 terminate(1, t); 1556 } 1557 1558 /** 1559 * Class used to expose {@link NameNode} as context to {@link HAState} 1560 */ 1561 protected class NameNodeHAContext implements HAContext { 1562 @Override 1563 public void setState(HAState s) { 1564 state = s; 1565 } 1566 1567 @Override 1568 public HAState getState() { 1569 return state; 1570 } 1571 1572 @Override 1573 public void startActiveServices() throws IOException { 1574 try { 1575 namesystem.startActiveServices(); 1576 startTrashEmptier(conf); 1577 } catch (Throwable t) { 1578 doImmediateShutdown(t); 1579 } 1580 } 1581 1582 @Override 1583 public void stopActiveServices() throws IOException { 1584 try { 1585 if (namesystem != null) { 1586 namesystem.stopActiveServices(); 1587 } 1588 stopTrashEmptier(); 1589 } catch (Throwable t) { 1590 doImmediateShutdown(t); 1591 } 1592 } 1593 1594 @Override 1595 public void startStandbyServices() throws IOException { 1596 try { 1597 namesystem.startStandbyServices(conf); 1598 } catch (Throwable t) { 1599 doImmediateShutdown(t); 1600 } 1601 } 1602 1603 @Override 1604 public void prepareToStopStandbyServices() throws ServiceFailedException { 1605 try { 1606 namesystem.prepareToStopStandbyServices(); 1607 } catch (Throwable t) { 1608 doImmediateShutdown(t); 1609 } 1610 } 1611 1612 @Override 1613 public void stopStandbyServices() throws IOException { 1614 try { 1615 if (namesystem != null) { 1616 namesystem.stopStandbyServices(); 1617 } 1618 } catch (Throwable t) { 1619 doImmediateShutdown(t); 1620 } 1621 } 1622 1623 @Override 1624 public void writeLock() { 1625 namesystem.writeLock(); 1626 namesystem.lockRetryCache(); 1627 } 1628 1629 @Override 1630 public void writeUnlock() { 1631 namesystem.unlockRetryCache(); 1632 namesystem.writeUnlock(); 1633 } 1634 1635 /** Check if an operation of given category is allowed */ 1636 @Override 1637 public void checkOperation(final OperationCategory op) 1638 throws StandbyException { 1639 state.checkOperation(haContext, op); 1640 } 1641 1642 @Override 1643 public boolean allowStaleReads() { 1644 return allowStaleStandbyReads; 1645 } 1646 1647 } 1648 1649 public boolean isStandbyState() { 1650 return (state.equals(STANDBY_STATE)); 1651 } 1652 1653 /** 1654 * Check that a request to change this node's HA state is valid. 1655 * In particular, verifies that, if auto failover is enabled, non-forced 1656 * requests from the HAAdmin CLI are rejected, and vice versa. 1657 * 1658 * @param req the request to check 1659 * @throws AccessControlException if the request is disallowed 1660 */ 1661 void checkHaStateChange(StateChangeRequestInfo req) 1662 throws AccessControlException { 1663 boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY, 1664 DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT); 1665 switch (req.getSource()) { 1666 case REQUEST_BY_USER: 1667 if (autoHaEnabled) { 1668 throw new AccessControlException( 1669 "Manual HA control for this NameNode is disallowed, because " + 1670 "automatic HA is enabled."); 1671 } 1672 break; 1673 case REQUEST_BY_USER_FORCED: 1674 if (autoHaEnabled) { 1675 LOG.warn("Allowing manual HA control from " + 1676 Server.getRemoteAddress() + 1677 " even though automatic HA is enabled, because the user " + 1678 "specified the force flag"); 1679 } 1680 break; 1681 case REQUEST_BY_ZKFC: 1682 if (!autoHaEnabled) { 1683 throw new AccessControlException( 1684 "Request from ZK failover controller at " + 1685 Server.getRemoteAddress() + " denied since automatic HA " + 1686 "is not enabled"); 1687 } 1688 break; 1689 } 1690 } 1691}