001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; 021import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; 022import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT; 023import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; 024import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT; 025import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY; 026import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT; 027import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY; 028import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT; 029import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY; 030import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT; 031import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY; 032import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_DEFAULT; 033import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY; 034import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT; 035import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY; 036import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT; 037import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY; 038import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY; 039import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT; 040import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY; 041import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT; 042import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY; 043import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT; 044import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY; 045import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME; 046import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT; 047import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY; 048import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT; 049import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY; 050import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT; 051import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY; 052import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT; 053import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY; 054import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY; 055import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY; 056import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS; 057import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT; 058import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD; 059import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT; 060import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT; 061import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY; 062import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT; 063import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY; 064import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY; 065import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT; 066import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY; 067import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY; 068import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT; 069import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY; 070import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT; 071import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY; 072import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT; 073import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY; 074import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY; 075import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT; 076import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY; 077import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT; 078import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY; 079import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY; 080import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT; 081import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY; 082import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT; 083import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY; 084import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT; 085import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY; 086import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_DEFAULT; 087import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_KEY; 088import static org.apache.hadoop.util.Time.now; 089 090import java.io.BufferedWriter; 091import java.io.ByteArrayInputStream; 092import java.io.DataInput; 093import java.io.DataInputStream; 094import java.io.File; 095import java.io.FileNotFoundException; 096import java.io.FileOutputStream; 097import java.io.IOException; 098import java.io.OutputStreamWriter; 099import java.io.PrintWriter; 100import java.io.StringWriter; 101import java.lang.management.ManagementFactory; 102import java.net.InetAddress; 103import java.net.URI; 104import java.util.ArrayList; 105import java.util.Arrays; 106import java.util.Collection; 107import java.util.Collections; 108import java.util.Date; 109import java.util.EnumSet; 110import java.util.HashMap; 111import java.util.HashSet; 112import java.util.Iterator; 113import java.util.LinkedHashSet; 114import java.util.List; 115import java.util.Map; 116import java.util.Set; 117import java.util.concurrent.TimeUnit; 118import java.util.concurrent.locks.ReentrantLock; 119import java.util.concurrent.locks.ReentrantReadWriteLock; 120 121import javax.management.NotCompliantMBeanException; 122import javax.management.ObjectName; 123import javax.management.StandardMBean; 124 125import org.apache.commons.logging.Log; 126import org.apache.commons.logging.LogFactory; 127import org.apache.commons.logging.impl.Log4JLogger; 128import org.apache.hadoop.HadoopIllegalArgumentException; 129import org.apache.hadoop.classification.InterfaceAudience; 130import org.apache.hadoop.conf.Configuration; 131import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries; 132import org.apache.hadoop.fs.CacheFlag; 133import org.apache.hadoop.fs.ContentSummary; 134import org.apache.hadoop.fs.CreateFlag; 135import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException; 136import org.apache.hadoop.fs.FileAlreadyExistsException; 137import org.apache.hadoop.fs.FileStatus; 138import org.apache.hadoop.fs.FileSystem; 139import org.apache.hadoop.fs.FsServerDefaults; 140import org.apache.hadoop.fs.InvalidPathException; 141import org.apache.hadoop.fs.Options; 142import org.apache.hadoop.fs.Options.Rename; 143import org.apache.hadoop.fs.ParentNotDirectoryException; 144import org.apache.hadoop.fs.Path; 145import org.apache.hadoop.fs.UnresolvedLinkException; 146import org.apache.hadoop.fs.permission.AclEntry; 147import org.apache.hadoop.fs.permission.AclStatus; 148import org.apache.hadoop.fs.permission.FsAction; 149import org.apache.hadoop.fs.permission.FsPermission; 150import org.apache.hadoop.fs.permission.PermissionStatus; 151import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; 152import org.apache.hadoop.ha.ServiceFailedException; 153import org.apache.hadoop.hdfs.DFSConfigKeys; 154import org.apache.hadoop.hdfs.DFSUtil; 155import org.apache.hadoop.hdfs.HAUtil; 156import org.apache.hadoop.hdfs.HdfsConfiguration; 157import org.apache.hadoop.hdfs.StorageType; 158import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; 159import org.apache.hadoop.hdfs.protocol.Block; 160import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry; 161import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; 162import org.apache.hadoop.hdfs.protocol.CachePoolEntry; 163import org.apache.hadoop.hdfs.protocol.CachePoolInfo; 164import org.apache.hadoop.hdfs.protocol.ClientProtocol; 165import org.apache.hadoop.hdfs.protocol.DatanodeID; 166import org.apache.hadoop.hdfs.protocol.DatanodeInfo; 167import org.apache.hadoop.hdfs.protocol.DirectoryListing; 168import org.apache.hadoop.hdfs.protocol.ExtendedBlock; 169import org.apache.hadoop.hdfs.protocol.HdfsConstants; 170import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; 171import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; 172import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; 173import org.apache.hadoop.hdfs.protocol.LocatedBlock; 174import org.apache.hadoop.hdfs.protocol.LocatedBlocks; 175import org.apache.hadoop.hdfs.protocol.QuotaExceededException; 176import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException; 177import org.apache.hadoop.hdfs.protocol.RollingUpgradeException; 178import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo; 179import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; 180import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry; 181import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus; 182import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure; 183import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager; 184import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode; 185import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; 186import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; 187import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager.SecretManagerState; 188import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection; 189import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; 190import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; 191import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; 192import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; 193import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; 194import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics; 195import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo; 196import org.apache.hadoop.hdfs.server.blockmanagement.OutOfV1GenerationStampsException; 197import org.apache.hadoop.hdfs.server.common.GenerationStamp; 198import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; 199import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; 200import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption; 201import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 202import org.apache.hadoop.hdfs.server.common.Storage; 203import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType; 204import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; 205import org.apache.hadoop.hdfs.server.common.Util; 206import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection; 207import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo; 208import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream; 209import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; 210import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile; 211import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory; 212import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer; 213import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; 214import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer; 215import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean; 216import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; 217import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable; 218import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable.SnapshotDiffInfo; 219import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; 220import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager; 221import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; 222import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 223import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; 224import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status; 225import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; 226import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType; 227import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods; 228import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand; 229import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; 230import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse; 231import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat; 232import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand; 233import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; 234import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; 235import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks; 236import org.apache.hadoop.hdfs.server.protocol.StorageReport; 237import org.apache.hadoop.hdfs.util.ChunkedArrayList; 238import org.apache.hadoop.io.IOUtils; 239import org.apache.hadoop.io.Text; 240import org.apache.hadoop.ipc.RetriableException; 241import org.apache.hadoop.ipc.RetryCache; 242import org.apache.hadoop.ipc.RetryCache.CacheEntry; 243import org.apache.hadoop.ipc.RetryCache.CacheEntryWithPayload; 244import org.apache.hadoop.ipc.Server; 245import org.apache.hadoop.ipc.StandbyException; 246import org.apache.hadoop.metrics2.annotation.Metric; 247import org.apache.hadoop.metrics2.annotation.Metrics; 248import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; 249import org.apache.hadoop.metrics2.util.MBeans; 250import org.apache.hadoop.net.NetworkTopology; 251import org.apache.hadoop.net.Node; 252import org.apache.hadoop.security.AccessControlException; 253import org.apache.hadoop.security.UserGroupInformation; 254import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; 255import org.apache.hadoop.security.token.SecretManager.InvalidToken; 256import org.apache.hadoop.security.token.Token; 257import org.apache.hadoop.security.token.TokenIdentifier; 258import org.apache.hadoop.security.token.delegation.DelegationKey; 259import org.apache.hadoop.util.Daemon; 260import org.apache.hadoop.util.DataChecksum; 261import org.apache.hadoop.util.StringUtils; 262import org.apache.hadoop.util.Time; 263import org.apache.hadoop.util.VersionInfo; 264import org.apache.log4j.Appender; 265import org.apache.log4j.AsyncAppender; 266import org.apache.log4j.Logger; 267import org.mortbay.util.ajax.JSON; 268 269import com.google.common.annotations.VisibleForTesting; 270import com.google.common.base.Charsets; 271import com.google.common.base.Preconditions; 272import com.google.common.collect.ImmutableMap; 273import com.google.common.collect.Lists; 274 275/*************************************************** 276 * FSNamesystem does the actual bookkeeping work for the 277 * DataNode. 278 * 279 * It tracks several important tables. 280 * 281 * 1) valid fsname --> blocklist (kept on disk, logged) 282 * 2) Set of all valid blocks (inverted #1) 283 * 3) block --> machinelist (kept in memory, rebuilt dynamically from reports) 284 * 4) machine --> blocklist (inverted #2) 285 * 5) LRU cache of updated-heartbeat machines 286 ***************************************************/ 287@InterfaceAudience.Private 288@Metrics(context="dfs") 289public class FSNamesystem implements Namesystem, FSClusterStats, 290 FSNamesystemMBean, NameNodeMXBean { 291 public static final Log LOG = LogFactory.getLog(FSNamesystem.class); 292 293 private static final ThreadLocal<StringBuilder> auditBuffer = 294 new ThreadLocal<StringBuilder>() { 295 @Override 296 protected StringBuilder initialValue() { 297 return new StringBuilder(); 298 } 299 }; 300 301 @VisibleForTesting 302 public boolean isAuditEnabled() { 303 return !isDefaultAuditLogger || auditLog.isInfoEnabled(); 304 } 305 306 private HdfsFileStatus getAuditFileInfo(String path, boolean resolveSymlink) 307 throws IOException { 308 return (isAuditEnabled() && isExternalInvocation()) 309 ? dir.getFileInfo(path, resolveSymlink) : null; 310 } 311 312 private void logAuditEvent(boolean succeeded, String cmd, String src) 313 throws IOException { 314 logAuditEvent(succeeded, cmd, src, null, null); 315 } 316 317 private void logAuditEvent(boolean succeeded, String cmd, String src, 318 String dst, HdfsFileStatus stat) throws IOException { 319 if (isAuditEnabled() && isExternalInvocation()) { 320 logAuditEvent(succeeded, getRemoteUser(), getRemoteIp(), 321 cmd, src, dst, stat); 322 } 323 } 324 325 private void logAuditEvent(boolean succeeded, 326 UserGroupInformation ugi, InetAddress addr, String cmd, String src, 327 String dst, HdfsFileStatus stat) { 328 FileStatus status = null; 329 if (stat != null) { 330 Path symlink = stat.isSymlink() ? new Path(stat.getSymlink()) : null; 331 Path path = dst != null ? new Path(dst) : new Path(src); 332 status = new FileStatus(stat.getLen(), stat.isDir(), 333 stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(), 334 stat.getAccessTime(), stat.getPermission(), stat.getOwner(), 335 stat.getGroup(), symlink, path); 336 } 337 for (AuditLogger logger : auditLoggers) { 338 if (logger instanceof HdfsAuditLogger) { 339 HdfsAuditLogger hdfsLogger = (HdfsAuditLogger) logger; 340 hdfsLogger.logAuditEvent(succeeded, ugi.toString(), addr, cmd, src, dst, 341 status, ugi, dtSecretManager); 342 } else { 343 logger.logAuditEvent(succeeded, ugi.toString(), addr, 344 cmd, src, dst, status); 345 } 346 } 347 } 348 349 /** 350 * Logger for audit events, noting successful FSNamesystem operations. Emits 351 * to FSNamesystem.audit at INFO. Each event causes a set of tab-separated 352 * <code>key=value</code> pairs to be written for the following properties: 353 * <code> 354 * ugi=<ugi in RPC> 355 * ip=<remote IP> 356 * cmd=<command> 357 * src=<src path> 358 * dst=<dst path (optional)> 359 * perm=<permissions (optional)> 360 * </code> 361 */ 362 public static final Log auditLog = LogFactory.getLog( 363 FSNamesystem.class.getName() + ".audit"); 364 365 static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100; 366 static int BLOCK_DELETION_INCREMENT = 1000; 367 private final boolean isPermissionEnabled; 368 private final UserGroupInformation fsOwner; 369 private final String fsOwnerShortUserName; 370 private final String supergroup; 371 private final boolean standbyShouldCheckpoint; 372 373 // Scan interval is not configurable. 374 private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL = 375 TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS); 376 final DelegationTokenSecretManager dtSecretManager; 377 private final boolean alwaysUseDelegationTokensForTests; 378 379 private static final Step STEP_AWAITING_REPORTED_BLOCKS = 380 new Step(StepType.AWAITING_REPORTED_BLOCKS); 381 382 // Tracks whether the default audit logger is the only configured audit 383 // logger; this allows isAuditEnabled() to return false in case the 384 // underlying logger is disabled, and avoid some unnecessary work. 385 private final boolean isDefaultAuditLogger; 386 private final List<AuditLogger> auditLoggers; 387 388 /** The namespace tree. */ 389 FSDirectory dir; 390 private final BlockManager blockManager; 391 private final SnapshotManager snapshotManager; 392 private final CacheManager cacheManager; 393 private final DatanodeStatistics datanodeStatistics; 394 395 private RollingUpgradeInfo rollingUpgradeInfo = null; 396 /** 397 * A flag that indicates whether the checkpointer should checkpoint a rollback 398 * fsimage. The edit log tailer sets this flag. The checkpoint will create a 399 * rollback fsimage if the flag is true, and then change the flag to false. 400 */ 401 private volatile boolean needRollbackFsImage; 402 403 // Block pool ID used by this namenode 404 private String blockPoolId; 405 406 final LeaseManager leaseManager = new LeaseManager(this); 407 408 volatile Daemon smmthread = null; // SafeModeMonitor thread 409 410 Daemon nnrmthread = null; // NamenodeResourceMonitor thread 411 412 Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread 413 /** 414 * When an active namenode will roll its own edit log, in # edits 415 */ 416 private final long editLogRollerThreshold; 417 /** 418 * Check interval of an active namenode's edit log roller thread 419 */ 420 private final int editLogRollerInterval; 421 422 private volatile boolean hasResourcesAvailable = false; 423 private volatile boolean fsRunning = true; 424 425 /** The start time of the namesystem. */ 426 private final long startTime = now(); 427 428 /** The interval of namenode checking for the disk space availability */ 429 private final long resourceRecheckInterval; 430 431 // The actual resource checker instance. 432 NameNodeResourceChecker nnResourceChecker; 433 434 private final FsServerDefaults serverDefaults; 435 private final boolean supportAppends; 436 private final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure; 437 438 private volatile SafeModeInfo safeMode; // safe mode information 439 440 private final long maxFsObjects; // maximum number of fs objects 441 442 private final long minBlockSize; // minimum block size 443 private final long maxBlocksPerFile; // maximum # of blocks per file 444 445 /** 446 * The global generation stamp for legacy blocks with randomly 447 * generated block IDs. 448 */ 449 private final GenerationStamp generationStampV1 = new GenerationStamp(); 450 451 /** 452 * The global generation stamp for this file system. 453 */ 454 private final GenerationStamp generationStampV2 = new GenerationStamp(); 455 456 /** 457 * The value of the generation stamp when the first switch to sequential 458 * block IDs was made. Blocks with generation stamps below this value 459 * have randomly allocated block IDs. Blocks with generation stamps above 460 * this value had sequentially allocated block IDs. Read from the fsImage 461 * (or initialized as an offset from the V1 (legacy) generation stamp on 462 * upgrade). 463 */ 464 private long generationStampV1Limit = 465 GenerationStamp.GRANDFATHER_GENERATION_STAMP; 466 467 /** 468 * The global block ID space for this file system. 469 */ 470 @VisibleForTesting 471 private final SequentialBlockIdGenerator blockIdGenerator; 472 473 // precision of access times. 474 private final long accessTimePrecision; 475 476 /** Lock to protect FSNamesystem. */ 477 private final FSNamesystemLock fsLock; 478 479 /** 480 * Used when this NN is in standby state to read from the shared edit log. 481 */ 482 private EditLogTailer editLogTailer = null; 483 484 /** 485 * Used when this NN is in standby state to perform checkpoints. 486 */ 487 private StandbyCheckpointer standbyCheckpointer; 488 489 /** 490 * Reference to the NN's HAContext object. This is only set once 491 * {@link #startCommonServices(Configuration, HAContext)} is called. 492 */ 493 private HAContext haContext; 494 495 private final boolean haEnabled; 496 497 /** flag indicating whether replication queues have been initialized */ 498 boolean initializedReplQueues = false; 499 500 /** 501 * Whether the namenode is in the middle of starting the active service 502 */ 503 private volatile boolean startingActiveService = false; 504 505 private INodeId inodeId; 506 507 private final RetryCache retryCache; 508 509 private final AclConfigFlag aclConfigFlag; 510 511 /** 512 * Set the last allocated inode id when fsimage or editlog is loaded. 513 */ 514 public void resetLastInodeId(long newValue) throws IOException { 515 try { 516 inodeId.skipTo(newValue); 517 } catch(IllegalStateException ise) { 518 throw new IOException(ise); 519 } 520 } 521 522 /** Should only be used for tests to reset to any value */ 523 void resetLastInodeIdWithoutChecking(long newValue) { 524 inodeId.setCurrentValue(newValue); 525 } 526 527 /** @return the last inode ID. */ 528 public long getLastInodeId() { 529 return inodeId.getCurrentValue(); 530 } 531 532 /** Allocate a new inode ID. */ 533 public long allocateNewInodeId() { 534 return inodeId.nextValue(); 535 } 536 537 /** 538 * Clear all loaded data 539 */ 540 void clear() { 541 dir.reset(); 542 dtSecretManager.reset(); 543 generationStampV1.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP); 544 generationStampV2.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP); 545 blockIdGenerator.setCurrentValue( 546 SequentialBlockIdGenerator.LAST_RESERVED_BLOCK_ID); 547 generationStampV1Limit = GenerationStamp.GRANDFATHER_GENERATION_STAMP; 548 leaseManager.removeAllLeases(); 549 inodeId.setCurrentValue(INodeId.LAST_RESERVED_ID); 550 snapshotManager.clearSnapshottableDirs(); 551 cacheManager.clear(); 552 } 553 554 @VisibleForTesting 555 LeaseManager getLeaseManager() { 556 return leaseManager; 557 } 558 559 boolean isHaEnabled() { 560 return haEnabled; 561 } 562 563 /** 564 * Check the supplied configuration for correctness. 565 * @param conf Supplies the configuration to validate. 566 * @throws IOException if the configuration could not be queried. 567 * @throws IllegalArgumentException if the configuration is invalid. 568 */ 569 private static void checkConfiguration(Configuration conf) 570 throws IOException { 571 572 final Collection<URI> namespaceDirs = 573 FSNamesystem.getNamespaceDirs(conf); 574 final Collection<URI> editsDirs = 575 FSNamesystem.getNamespaceEditsDirs(conf); 576 final Collection<URI> requiredEditsDirs = 577 FSNamesystem.getRequiredNamespaceEditsDirs(conf); 578 final Collection<URI> sharedEditsDirs = 579 FSNamesystem.getSharedEditsDirs(conf); 580 581 for (URI u : requiredEditsDirs) { 582 if (u.toString().compareTo( 583 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT) == 0) { 584 continue; 585 } 586 587 // Each required directory must also be in editsDirs or in 588 // sharedEditsDirs. 589 if (!editsDirs.contains(u) && 590 !sharedEditsDirs.contains(u)) { 591 throw new IllegalArgumentException( 592 "Required edits directory " + u.toString() + " not present in " + 593 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + ". " + 594 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + "=" + 595 editsDirs.toString() + "; " + 596 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY + "=" + 597 requiredEditsDirs.toString() + ". " + 598 DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY + "=" + 599 sharedEditsDirs.toString() + "."); 600 } 601 } 602 603 if (namespaceDirs.size() == 1) { 604 LOG.warn("Only one image storage directory (" 605 + DFS_NAMENODE_NAME_DIR_KEY + ") configured. Beware of dataloss" 606 + " due to lack of redundant storage directories!"); 607 } 608 if (editsDirs.size() == 1) { 609 LOG.warn("Only one namespace edits storage directory (" 610 + DFS_NAMENODE_EDITS_DIR_KEY + ") configured. Beware of dataloss" 611 + " due to lack of redundant storage directories!"); 612 } 613 } 614 615 /** 616 * Instantiates an FSNamesystem loaded from the image and edits 617 * directories specified in the passed Configuration. 618 * 619 * @param conf the Configuration which specifies the storage directories 620 * from which to load 621 * @return an FSNamesystem which contains the loaded namespace 622 * @throws IOException if loading fails 623 */ 624 static FSNamesystem loadFromDisk(Configuration conf) throws IOException { 625 626 checkConfiguration(conf); 627 FSImage fsImage = new FSImage(conf, 628 FSNamesystem.getNamespaceDirs(conf), 629 FSNamesystem.getNamespaceEditsDirs(conf)); 630 FSNamesystem namesystem = new FSNamesystem(conf, fsImage, false); 631 StartupOption startOpt = NameNode.getStartupOption(conf); 632 if (startOpt == StartupOption.RECOVER) { 633 namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER); 634 } 635 636 long loadStart = now(); 637 try { 638 namesystem.loadFSImage(startOpt); 639 } catch (IOException ioe) { 640 LOG.warn("Encountered exception loading fsimage", ioe); 641 fsImage.close(); 642 throw ioe; 643 } 644 long timeTakenToLoadFSImage = now() - loadStart; 645 LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs"); 646 NameNodeMetrics nnMetrics = NameNode.getNameNodeMetrics(); 647 if (nnMetrics != null) { 648 nnMetrics.setFsImageLoadTime((int) timeTakenToLoadFSImage); 649 } 650 return namesystem; 651 } 652 653 FSNamesystem(Configuration conf, FSImage fsImage) throws IOException { 654 this(conf, fsImage, false); 655 } 656 657 /** 658 * Create an FSNamesystem associated with the specified image. 659 * 660 * Note that this does not load any data off of disk -- if you would 661 * like that behavior, use {@link #loadFromDisk(Configuration)} 662 * 663 * @param conf configuration 664 * @param fsImage The FSImage to associate with 665 * @param ignoreRetryCache Whether or not should ignore the retry cache setup 666 * step. For Secondary NN this should be set to true. 667 * @throws IOException on bad configuration 668 */ 669 FSNamesystem(Configuration conf, FSImage fsImage, boolean ignoreRetryCache) 670 throws IOException { 671 if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY, 672 DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) { 673 LOG.info("Enabling async auditlog"); 674 enableAsyncAuditLog(); 675 } 676 boolean fair = conf.getBoolean("dfs.namenode.fslock.fair", true); 677 LOG.info("fsLock is fair:" + fair); 678 fsLock = new FSNamesystemLock(fair); 679 try { 680 resourceRecheckInterval = conf.getLong( 681 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, 682 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT); 683 684 this.blockManager = new BlockManager(this, this, conf); 685 this.datanodeStatistics = blockManager.getDatanodeManager().getDatanodeStatistics(); 686 this.blockIdGenerator = new SequentialBlockIdGenerator(this.blockManager); 687 688 this.fsOwner = UserGroupInformation.getCurrentUser(); 689 this.fsOwnerShortUserName = fsOwner.getShortUserName(); 690 this.supergroup = conf.get(DFS_PERMISSIONS_SUPERUSERGROUP_KEY, 691 DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT); 692 this.isPermissionEnabled = conf.getBoolean(DFS_PERMISSIONS_ENABLED_KEY, 693 DFS_PERMISSIONS_ENABLED_DEFAULT); 694 LOG.info("fsOwner = " + fsOwner); 695 LOG.info("supergroup = " + supergroup); 696 LOG.info("isPermissionEnabled = " + isPermissionEnabled); 697 698 // block allocation has to be persisted in HA using a shared edits directory 699 // so that the standby has up-to-date namespace information 700 String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf); 701 this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId); 702 703 // Sanity check the HA-related config. 704 if (nameserviceId != null) { 705 LOG.info("Determined nameservice ID: " + nameserviceId); 706 } 707 LOG.info("HA Enabled: " + haEnabled); 708 if (!haEnabled && HAUtil.usesSharedEditsDir(conf)) { 709 LOG.warn("Configured NNs:\n" + DFSUtil.nnAddressesAsString(conf)); 710 throw new IOException("Invalid configuration: a shared edits dir " + 711 "must not be specified if HA is not enabled."); 712 } 713 714 // Get the checksum type from config 715 String checksumTypeStr = conf.get(DFS_CHECKSUM_TYPE_KEY, DFS_CHECKSUM_TYPE_DEFAULT); 716 DataChecksum.Type checksumType; 717 try { 718 checksumType = DataChecksum.Type.valueOf(checksumTypeStr); 719 } catch (IllegalArgumentException iae) { 720 throw new IOException("Invalid checksum type in " 721 + DFS_CHECKSUM_TYPE_KEY + ": " + checksumTypeStr); 722 } 723 724 this.serverDefaults = new FsServerDefaults( 725 conf.getLongBytes(DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE_DEFAULT), 726 conf.getInt(DFS_BYTES_PER_CHECKSUM_KEY, DFS_BYTES_PER_CHECKSUM_DEFAULT), 727 conf.getInt(DFS_CLIENT_WRITE_PACKET_SIZE_KEY, DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT), 728 (short) conf.getInt(DFS_REPLICATION_KEY, DFS_REPLICATION_DEFAULT), 729 conf.getInt(IO_FILE_BUFFER_SIZE_KEY, IO_FILE_BUFFER_SIZE_DEFAULT), 730 conf.getBoolean(DFS_ENCRYPT_DATA_TRANSFER_KEY, DFS_ENCRYPT_DATA_TRANSFER_DEFAULT), 731 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT), 732 checksumType); 733 734 this.maxFsObjects = conf.getLong(DFS_NAMENODE_MAX_OBJECTS_KEY, 735 DFS_NAMENODE_MAX_OBJECTS_DEFAULT); 736 737 this.minBlockSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 738 DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_DEFAULT); 739 this.maxBlocksPerFile = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY, 740 DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_DEFAULT); 741 this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 742 DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT); 743 this.supportAppends = conf.getBoolean(DFS_SUPPORT_APPEND_KEY, DFS_SUPPORT_APPEND_DEFAULT); 744 LOG.info("Append Enabled: " + supportAppends); 745 746 this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf); 747 748 this.standbyShouldCheckpoint = conf.getBoolean( 749 DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT); 750 // # edit autoroll threshold is a multiple of the checkpoint threshold 751 this.editLogRollerThreshold = (long) 752 (conf.getFloat( 753 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD, 754 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) * 755 conf.getLong( 756 DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 757 DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT)); 758 this.editLogRollerInterval = conf.getInt( 759 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS, 760 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT); 761 this.inodeId = new INodeId(); 762 763 // For testing purposes, allow the DT secret manager to be started regardless 764 // of whether security is enabled. 765 alwaysUseDelegationTokensForTests = conf.getBoolean( 766 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, 767 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT); 768 769 this.dtSecretManager = createDelegationTokenSecretManager(conf); 770 this.dir = new FSDirectory(fsImage, this, conf); 771 this.snapshotManager = new SnapshotManager(dir); 772 this.cacheManager = new CacheManager(this, conf, blockManager); 773 this.safeMode = new SafeModeInfo(conf); 774 this.auditLoggers = initAuditLoggers(conf); 775 this.isDefaultAuditLogger = auditLoggers.size() == 1 && 776 auditLoggers.get(0) instanceof DefaultAuditLogger; 777 this.retryCache = ignoreRetryCache ? null : initRetryCache(conf); 778 this.aclConfigFlag = new AclConfigFlag(conf); 779 } catch(IOException e) { 780 LOG.error(getClass().getSimpleName() + " initialization failed.", e); 781 close(); 782 throw e; 783 } catch (RuntimeException re) { 784 LOG.error(getClass().getSimpleName() + " initialization failed.", re); 785 close(); 786 throw re; 787 } 788 } 789 790 @VisibleForTesting 791 public RetryCache getRetryCache() { 792 return retryCache; 793 } 794 795 void lockRetryCache() { 796 if (retryCache != null) { 797 retryCache.lock(); 798 } 799 } 800 801 void unlockRetryCache() { 802 if (retryCache != null) { 803 retryCache.unlock(); 804 } 805 } 806 807 /** Whether or not retry cache is enabled */ 808 boolean hasRetryCache() { 809 return retryCache != null; 810 } 811 812 void addCacheEntryWithPayload(byte[] clientId, int callId, Object payload) { 813 if (retryCache != null) { 814 retryCache.addCacheEntryWithPayload(clientId, callId, payload); 815 } 816 } 817 818 void addCacheEntry(byte[] clientId, int callId) { 819 if (retryCache != null) { 820 retryCache.addCacheEntry(clientId, callId); 821 } 822 } 823 824 @VisibleForTesting 825 static RetryCache initRetryCache(Configuration conf) { 826 boolean enable = conf.getBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY, 827 DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT); 828 LOG.info("Retry cache on namenode is " + (enable ? "enabled" : "disabled")); 829 if (enable) { 830 float heapPercent = conf.getFloat( 831 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY, 832 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT); 833 long entryExpiryMillis = conf.getLong( 834 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY, 835 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT); 836 LOG.info("Retry cache will use " + heapPercent 837 + " of total heap and retry cache entry expiry time is " 838 + entryExpiryMillis + " millis"); 839 long entryExpiryNanos = entryExpiryMillis * 1000 * 1000; 840 return new RetryCache("NameNodeRetryCache", heapPercent, 841 entryExpiryNanos); 842 } 843 return null; 844 } 845 846 private List<AuditLogger> initAuditLoggers(Configuration conf) { 847 // Initialize the custom access loggers if configured. 848 Collection<String> alClasses = conf.getStringCollection(DFS_NAMENODE_AUDIT_LOGGERS_KEY); 849 List<AuditLogger> auditLoggers = Lists.newArrayList(); 850 if (alClasses != null && !alClasses.isEmpty()) { 851 for (String className : alClasses) { 852 try { 853 AuditLogger logger; 854 if (DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME.equals(className)) { 855 logger = new DefaultAuditLogger(); 856 } else { 857 logger = (AuditLogger) Class.forName(className).newInstance(); 858 } 859 logger.initialize(conf); 860 auditLoggers.add(logger); 861 } catch (RuntimeException re) { 862 throw re; 863 } catch (Exception e) { 864 throw new RuntimeException(e); 865 } 866 } 867 } 868 869 // Make sure there is at least one logger installed. 870 if (auditLoggers.isEmpty()) { 871 auditLoggers.add(new DefaultAuditLogger()); 872 } 873 return Collections.unmodifiableList(auditLoggers); 874 } 875 876 private void loadFSImage(StartupOption startOpt) throws IOException { 877 final FSImage fsImage = getFSImage(); 878 879 // format before starting up if requested 880 if (startOpt == StartupOption.FORMAT) { 881 882 fsImage.format(this, fsImage.getStorage().determineClusterId());// reuse current id 883 884 startOpt = StartupOption.REGULAR; 885 } 886 boolean success = false; 887 writeLock(); 888 try { 889 // We shouldn't be calling saveNamespace if we've come up in standby state. 890 MetaRecoveryContext recovery = startOpt.createRecoveryContext(); 891 final boolean staleImage 892 = fsImage.recoverTransitionRead(startOpt, this, recovery); 893 if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt)) { 894 rollingUpgradeInfo = null; 895 } 896 final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade(); 897 LOG.info("Need to save fs image? " + needToSave 898 + " (staleImage=" + staleImage + ", haEnabled=" + haEnabled 899 + ", isRollingUpgrade=" + isRollingUpgrade() + ")"); 900 if (needToSave) { 901 fsImage.saveNamespace(this); 902 } else { 903 // No need to save, so mark the phase done. 904 StartupProgress prog = NameNode.getStartupProgress(); 905 prog.beginPhase(Phase.SAVING_CHECKPOINT); 906 prog.endPhase(Phase.SAVING_CHECKPOINT); 907 } 908 // This will start a new log segment and write to the seen_txid file, so 909 // we shouldn't do it when coming up in standby state 910 if (!haEnabled || (haEnabled && startOpt == StartupOption.UPGRADE)) { 911 fsImage.openEditLogForWrite(); 912 } 913 success = true; 914 } finally { 915 if (!success) { 916 fsImage.close(); 917 } 918 writeUnlock(); 919 } 920 dir.imageLoadComplete(); 921 } 922 923 private void startSecretManager() { 924 if (dtSecretManager != null) { 925 try { 926 dtSecretManager.startThreads(); 927 } catch (IOException e) { 928 // Inability to start secret manager 929 // can't be recovered from. 930 throw new RuntimeException(e); 931 } 932 } 933 } 934 935 private void startSecretManagerIfNecessary() { 936 boolean shouldRun = shouldUseDelegationTokens() && 937 !isInSafeMode() && getEditLog().isOpenForWrite(); 938 boolean running = dtSecretManager.isRunning(); 939 if (shouldRun && !running) { 940 startSecretManager(); 941 } 942 } 943 944 private void stopSecretManager() { 945 if (dtSecretManager != null) { 946 dtSecretManager.stopThreads(); 947 } 948 } 949 950 /** 951 * Start services common to both active and standby states 952 * @param haContext 953 * @throws IOException 954 */ 955 void startCommonServices(Configuration conf, HAContext haContext) throws IOException { 956 this.registerMBean(); // register the MBean for the FSNamesystemState 957 writeLock(); 958 this.haContext = haContext; 959 try { 960 nnResourceChecker = new NameNodeResourceChecker(conf); 961 checkAvailableResources(); 962 assert safeMode != null && !isPopulatingReplQueues(); 963 StartupProgress prog = NameNode.getStartupProgress(); 964 prog.beginPhase(Phase.SAFEMODE); 965 prog.setTotal(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS, 966 getCompleteBlocksTotal()); 967 setBlockTotal(); 968 blockManager.activate(conf); 969 } finally { 970 writeUnlock(); 971 } 972 973 registerMXBean(); 974 DefaultMetricsSystem.instance().register(this); 975 } 976 977 /** 978 * Stop services common to both active and standby states 979 * @throws IOException 980 */ 981 void stopCommonServices() { 982 writeLock(); 983 try { 984 if (blockManager != null) blockManager.close(); 985 } finally { 986 writeUnlock(); 987 } 988 RetryCache.clear(retryCache); 989 } 990 991 /** 992 * Start services required in active state 993 * @throws IOException 994 */ 995 void startActiveServices() throws IOException { 996 startingActiveService = true; 997 LOG.info("Starting services required for active state"); 998 writeLock(); 999 try { 1000 FSEditLog editLog = dir.fsImage.getEditLog(); 1001 1002 if (!editLog.isOpenForWrite()) { 1003 // During startup, we're already open for write during initialization. 1004 editLog.initJournalsForWrite(); 1005 // May need to recover 1006 editLog.recoverUnclosedStreams(); 1007 1008 LOG.info("Catching up to latest edits from old active before " + 1009 "taking over writer role in edits logs"); 1010 editLogTailer.catchupDuringFailover(); 1011 1012 blockManager.setPostponeBlocksFromFuture(false); 1013 blockManager.getDatanodeManager().markAllDatanodesStale(); 1014 blockManager.clearQueues(); 1015 blockManager.processAllPendingDNMessages(); 1016 1017 // Only need to re-process the queue, If not in SafeMode. 1018 if (!isInSafeMode()) { 1019 LOG.info("Reprocessing replication and invalidation queues"); 1020 initializeReplQueues(); 1021 } 1022 1023 if (LOG.isDebugEnabled()) { 1024 LOG.debug("NameNode metadata after re-processing " + 1025 "replication and invalidation queues during failover:\n" + 1026 metaSaveAsString()); 1027 } 1028 1029 long nextTxId = dir.fsImage.getLastAppliedTxId() + 1; 1030 LOG.info("Will take over writing edit logs at txnid " + 1031 nextTxId); 1032 editLog.setNextTxId(nextTxId); 1033 1034 dir.fsImage.editLog.openForWrite(); 1035 } 1036 1037 if (haEnabled) { 1038 // Renew all of the leases before becoming active. 1039 // This is because, while we were in standby mode, 1040 // the leases weren't getting renewed on this NN. 1041 // Give them all a fresh start here. 1042 leaseManager.renewAllLeases(); 1043 } 1044 leaseManager.startMonitor(); 1045 startSecretManagerIfNecessary(); 1046 1047 //ResourceMonitor required only at ActiveNN. See HDFS-2914 1048 this.nnrmthread = new Daemon(new NameNodeResourceMonitor()); 1049 nnrmthread.start(); 1050 1051 nnEditLogRoller = new Daemon(new NameNodeEditLogRoller( 1052 editLogRollerThreshold, editLogRollerInterval)); 1053 nnEditLogRoller.start(); 1054 1055 cacheManager.startMonitorThread(); 1056 blockManager.getDatanodeManager().setShouldSendCachingCommands(true); 1057 } finally { 1058 writeUnlock(); 1059 startingActiveService = false; 1060 } 1061 } 1062 1063 /** 1064 * Initialize replication queues. 1065 */ 1066 private void initializeReplQueues() { 1067 LOG.info("initializing replication queues"); 1068 blockManager.processMisReplicatedBlocks(); 1069 initializedReplQueues = true; 1070 } 1071 1072 private boolean inActiveState() { 1073 return haContext != null && 1074 haContext.getState().getServiceState() == HAServiceState.ACTIVE; 1075 } 1076 1077 /** 1078 * @return Whether the namenode is transitioning to active state and is in the 1079 * middle of the {@link #startActiveServices()} 1080 */ 1081 public boolean inTransitionToActive() { 1082 return haEnabled && inActiveState() && startingActiveService; 1083 } 1084 1085 private boolean shouldUseDelegationTokens() { 1086 return UserGroupInformation.isSecurityEnabled() || 1087 alwaysUseDelegationTokensForTests; 1088 } 1089 1090 /** 1091 * Stop services required in active state 1092 * @throws InterruptedException 1093 */ 1094 void stopActiveServices() { 1095 LOG.info("Stopping services started for active state"); 1096 writeLock(); 1097 try { 1098 stopSecretManager(); 1099 if (leaseManager != null) { 1100 leaseManager.stopMonitor(); 1101 } 1102 if (nnrmthread != null) { 1103 ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor(); 1104 nnrmthread.interrupt(); 1105 } 1106 if (nnEditLogRoller != null) { 1107 ((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop(); 1108 nnEditLogRoller.interrupt(); 1109 } 1110 if (dir != null && dir.fsImage != null) { 1111 if (dir.fsImage.editLog != null) { 1112 dir.fsImage.editLog.close(); 1113 } 1114 // Update the fsimage with the last txid that we wrote 1115 // so that the tailer starts from the right spot. 1116 dir.fsImage.updateLastAppliedTxIdFromWritten(); 1117 } 1118 cacheManager.stopMonitorThread(); 1119 cacheManager.clearDirectiveStats(); 1120 blockManager.getDatanodeManager().clearPendingCachingCommands(); 1121 blockManager.getDatanodeManager().setShouldSendCachingCommands(false); 1122 // Don't want to keep replication queues when not in Active. 1123 blockManager.clearQueues(); 1124 initializedReplQueues = false; 1125 } finally { 1126 writeUnlock(); 1127 } 1128 } 1129 1130 /** 1131 * Start services required in standby state 1132 * 1133 * @throws IOException 1134 */ 1135 void startStandbyServices(final Configuration conf) throws IOException { 1136 LOG.info("Starting services required for standby state"); 1137 if (!dir.fsImage.editLog.isOpenForRead()) { 1138 // During startup, we're already open for read. 1139 dir.fsImage.editLog.initSharedJournalsForRead(); 1140 } 1141 1142 blockManager.setPostponeBlocksFromFuture(true); 1143 1144 editLogTailer = new EditLogTailer(this, conf); 1145 editLogTailer.start(); 1146 if (standbyShouldCheckpoint) { 1147 standbyCheckpointer = new StandbyCheckpointer(conf, this); 1148 standbyCheckpointer.start(); 1149 } 1150 } 1151 1152 /** 1153 * Called when the NN is in Standby state and the editlog tailer tails the 1154 * OP_ROLLING_UPGRADE_START. 1155 */ 1156 void triggerRollbackCheckpoint() { 1157 setNeedRollbackFsImage(true); 1158 if (standbyCheckpointer != null) { 1159 standbyCheckpointer.triggerRollbackCheckpoint(); 1160 } 1161 } 1162 1163 /** 1164 * Called while the NN is in Standby state, but just about to be 1165 * asked to enter Active state. This cancels any checkpoints 1166 * currently being taken. 1167 */ 1168 void prepareToStopStandbyServices() throws ServiceFailedException { 1169 if (standbyCheckpointer != null) { 1170 standbyCheckpointer.cancelAndPreventCheckpoints( 1171 "About to leave standby state"); 1172 } 1173 } 1174 1175 /** Stop services required in standby state */ 1176 void stopStandbyServices() throws IOException { 1177 LOG.info("Stopping services started for standby state"); 1178 if (standbyCheckpointer != null) { 1179 standbyCheckpointer.stop(); 1180 } 1181 if (editLogTailer != null) { 1182 editLogTailer.stop(); 1183 } 1184 if (dir != null && dir.fsImage != null && dir.fsImage.editLog != null) { 1185 dir.fsImage.editLog.close(); 1186 } 1187 } 1188 1189 @Override 1190 public void checkOperation(OperationCategory op) throws StandbyException { 1191 if (haContext != null) { 1192 // null in some unit tests 1193 haContext.checkOperation(op); 1194 } 1195 } 1196 1197 /** 1198 * @throws RetriableException 1199 * If 1) The NameNode is in SafeMode, 2) HA is enabled, and 3) 1200 * NameNode is in active state 1201 * @throws SafeModeException 1202 * Otherwise if NameNode is in SafeMode. 1203 */ 1204 private void checkNameNodeSafeMode(String errorMsg) 1205 throws RetriableException, SafeModeException { 1206 if (isInSafeMode()) { 1207 SafeModeException se = new SafeModeException(errorMsg, safeMode); 1208 if (haEnabled && haContext != null 1209 && haContext.getState().getServiceState() == HAServiceState.ACTIVE 1210 && shouldRetrySafeMode(this.safeMode)) { 1211 throw new RetriableException(se); 1212 } else { 1213 throw se; 1214 } 1215 } 1216 } 1217 1218 /** 1219 * We already know that the safemode is on. We will throw a RetriableException 1220 * if the safemode is not manual or caused by low resource. 1221 */ 1222 private boolean shouldRetrySafeMode(SafeModeInfo safeMode) { 1223 if (safeMode == null) { 1224 return false; 1225 } else { 1226 return !safeMode.isManual() && !safeMode.areResourcesLow(); 1227 } 1228 } 1229 1230 public static Collection<URI> getNamespaceDirs(Configuration conf) { 1231 return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY); 1232 } 1233 1234 /** 1235 * Get all edits dirs which are required. If any shared edits dirs are 1236 * configured, these are also included in the set of required dirs. 1237 * 1238 * @param conf the HDFS configuration. 1239 * @return all required dirs. 1240 */ 1241 public static Collection<URI> getRequiredNamespaceEditsDirs(Configuration conf) { 1242 Set<URI> ret = new HashSet<URI>(); 1243 ret.addAll(getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY)); 1244 ret.addAll(getSharedEditsDirs(conf)); 1245 return ret; 1246 } 1247 1248 private static Collection<URI> getStorageDirs(Configuration conf, 1249 String propertyName) { 1250 Collection<String> dirNames = conf.getTrimmedStringCollection(propertyName); 1251 StartupOption startOpt = NameNode.getStartupOption(conf); 1252 if(startOpt == StartupOption.IMPORT) { 1253 // In case of IMPORT this will get rid of default directories 1254 // but will retain directories specified in hdfs-site.xml 1255 // When importing image from a checkpoint, the name-node can 1256 // start with empty set of storage directories. 1257 Configuration cE = new HdfsConfiguration(false); 1258 cE.addResource("core-default.xml"); 1259 cE.addResource("core-site.xml"); 1260 cE.addResource("hdfs-default.xml"); 1261 Collection<String> dirNames2 = cE.getTrimmedStringCollection(propertyName); 1262 dirNames.removeAll(dirNames2); 1263 if(dirNames.isEmpty()) 1264 LOG.warn("!!! WARNING !!!" + 1265 "\n\tThe NameNode currently runs without persistent storage." + 1266 "\n\tAny changes to the file system meta-data may be lost." + 1267 "\n\tRecommended actions:" + 1268 "\n\t\t- shutdown and restart NameNode with configured \"" 1269 + propertyName + "\" in hdfs-site.xml;" + 1270 "\n\t\t- use Backup Node as a persistent and up-to-date storage " + 1271 "of the file system meta-data."); 1272 } else if (dirNames.isEmpty()) { 1273 dirNames = Collections.singletonList( 1274 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT); 1275 } 1276 return Util.stringCollectionAsURIs(dirNames); 1277 } 1278 1279 /** 1280 * Return an ordered list of edits directories to write to. 1281 * The list is ordered such that all shared edits directories 1282 * are ordered before non-shared directories, and any duplicates 1283 * are removed. The order they are specified in the configuration 1284 * is retained. 1285 * @return Collection of shared edits directories. 1286 * @throws IOException if multiple shared edits directories are configured 1287 */ 1288 public static List<URI> getNamespaceEditsDirs(Configuration conf) 1289 throws IOException { 1290 return getNamespaceEditsDirs(conf, true); 1291 } 1292 1293 public static List<URI> getNamespaceEditsDirs(Configuration conf, 1294 boolean includeShared) 1295 throws IOException { 1296 // Use a LinkedHashSet so that order is maintained while we de-dup 1297 // the entries. 1298 LinkedHashSet<URI> editsDirs = new LinkedHashSet<URI>(); 1299 1300 if (includeShared) { 1301 List<URI> sharedDirs = getSharedEditsDirs(conf); 1302 1303 // Fail until multiple shared edits directories are supported (HDFS-2782) 1304 if (sharedDirs.size() > 1) { 1305 throw new IOException( 1306 "Multiple shared edits directories are not yet supported"); 1307 } 1308 1309 // First add the shared edits dirs. It's critical that the shared dirs 1310 // are added first, since JournalSet syncs them in the order they are listed, 1311 // and we need to make sure all edits are in place in the shared storage 1312 // before they are replicated locally. See HDFS-2874. 1313 for (URI dir : sharedDirs) { 1314 if (!editsDirs.add(dir)) { 1315 LOG.warn("Edits URI " + dir + " listed multiple times in " + 1316 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + ". Ignoring duplicates."); 1317 } 1318 } 1319 } 1320 // Now add the non-shared dirs. 1321 for (URI dir : getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY)) { 1322 if (!editsDirs.add(dir)) { 1323 LOG.warn("Edits URI " + dir + " listed multiple times in " + 1324 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + " and " + 1325 DFS_NAMENODE_EDITS_DIR_KEY + ". Ignoring duplicates."); 1326 } 1327 } 1328 1329 if (editsDirs.isEmpty()) { 1330 // If this is the case, no edit dirs have been explicitly configured. 1331 // Image dirs are to be used for edits too. 1332 return Lists.newArrayList(getNamespaceDirs(conf)); 1333 } else { 1334 return Lists.newArrayList(editsDirs); 1335 } 1336 } 1337 1338 /** 1339 * Returns edit directories that are shared between primary and secondary. 1340 * @param conf 1341 * @return Collection of edit directories. 1342 */ 1343 public static List<URI> getSharedEditsDirs(Configuration conf) { 1344 // don't use getStorageDirs here, because we want an empty default 1345 // rather than the dir in /tmp 1346 Collection<String> dirNames = conf.getTrimmedStringCollection( 1347 DFS_NAMENODE_SHARED_EDITS_DIR_KEY); 1348 return Util.stringCollectionAsURIs(dirNames); 1349 } 1350 1351 @Override 1352 public void readLock() { 1353 this.fsLock.readLock().lock(); 1354 } 1355 @Override 1356 public void longReadLockInterruptibly() throws InterruptedException { 1357 this.fsLock.longReadLock().lockInterruptibly(); 1358 try { 1359 this.fsLock.readLock().lockInterruptibly(); 1360 } catch (InterruptedException ie) { 1361 // In the event we're interrupted while getting the normal FSNS read lock, 1362 // release the long read lock. 1363 this.fsLock.longReadLock().unlock(); 1364 throw ie; 1365 } 1366 } 1367 @Override 1368 public void longReadUnlock() { 1369 this.fsLock.readLock().unlock(); 1370 this.fsLock.longReadLock().unlock(); 1371 } 1372 @Override 1373 public void readUnlock() { 1374 this.fsLock.readLock().unlock(); 1375 } 1376 @Override 1377 public void writeLock() { 1378 this.fsLock.longReadLock().lock(); 1379 this.fsLock.writeLock().lock(); 1380 } 1381 @Override 1382 public void writeLockInterruptibly() throws InterruptedException { 1383 this.fsLock.longReadLock().lockInterruptibly(); 1384 try { 1385 this.fsLock.writeLock().lockInterruptibly(); 1386 } catch (InterruptedException ie) { 1387 // In the event we're interrupted while getting the normal FSNS write 1388 // lock, release the long read lock. 1389 this.fsLock.longReadLock().unlock(); 1390 throw ie; 1391 } 1392 } 1393 @Override 1394 public void writeUnlock() { 1395 this.fsLock.writeLock().unlock(); 1396 this.fsLock.longReadLock().unlock(); 1397 } 1398 @Override 1399 public boolean hasWriteLock() { 1400 return this.fsLock.isWriteLockedByCurrentThread(); 1401 } 1402 @Override 1403 public boolean hasReadLock() { 1404 return this.fsLock.getReadHoldCount() > 0 || hasWriteLock(); 1405 } 1406 1407 public int getReadHoldCount() { 1408 return this.fsLock.getReadHoldCount(); 1409 } 1410 1411 public int getWriteHoldCount() { 1412 return this.fsLock.getWriteHoldCount(); 1413 } 1414 1415 NamespaceInfo getNamespaceInfo() { 1416 readLock(); 1417 try { 1418 return unprotectedGetNamespaceInfo(); 1419 } finally { 1420 readUnlock(); 1421 } 1422 } 1423 1424 /** 1425 * Version of @see #getNamespaceInfo() that is not protected by a lock. 1426 */ 1427 NamespaceInfo unprotectedGetNamespaceInfo() { 1428 return new NamespaceInfo(dir.fsImage.getStorage().getNamespaceID(), 1429 getClusterId(), getBlockPoolId(), 1430 dir.fsImage.getStorage().getCTime()); 1431 } 1432 1433 /** 1434 * Close down this file system manager. 1435 * Causes heartbeat and lease daemons to stop; waits briefly for 1436 * them to finish, but a short timeout returns control back to caller. 1437 */ 1438 void close() { 1439 fsRunning = false; 1440 try { 1441 stopCommonServices(); 1442 if (smmthread != null) smmthread.interrupt(); 1443 } finally { 1444 // using finally to ensure we also wait for lease daemon 1445 try { 1446 stopActiveServices(); 1447 stopStandbyServices(); 1448 if (dir != null) { 1449 dir.close(); 1450 } 1451 } catch (IOException ie) { 1452 LOG.error("Error closing FSDirectory", ie); 1453 IOUtils.cleanup(LOG, dir); 1454 } 1455 } 1456 } 1457 1458 @Override 1459 public boolean isRunning() { 1460 return fsRunning; 1461 } 1462 1463 @Override 1464 public boolean isInStandbyState() { 1465 if (haContext == null || haContext.getState() == null) { 1466 // We're still starting up. In this case, if HA is 1467 // on for the cluster, we always start in standby. Otherwise 1468 // start in active. 1469 return haEnabled; 1470 } 1471 1472 return HAServiceState.STANDBY == haContext.getState().getServiceState(); 1473 } 1474 1475 /** 1476 * Dump all metadata into specified file 1477 */ 1478 void metaSave(String filename) throws IOException { 1479 checkSuperuserPrivilege(); 1480 checkOperation(OperationCategory.UNCHECKED); 1481 writeLock(); 1482 try { 1483 checkOperation(OperationCategory.UNCHECKED); 1484 File file = new File(System.getProperty("hadoop.log.dir"), filename); 1485 PrintWriter out = new PrintWriter(new BufferedWriter( 1486 new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8))); 1487 metaSave(out); 1488 out.flush(); 1489 out.close(); 1490 } finally { 1491 writeUnlock(); 1492 } 1493 } 1494 1495 private void metaSave(PrintWriter out) { 1496 assert hasWriteLock(); 1497 long totalInodes = this.dir.totalInodes(); 1498 long totalBlocks = this.getBlocksTotal(); 1499 out.println(totalInodes + " files and directories, " + totalBlocks 1500 + " blocks = " + (totalInodes + totalBlocks) + " total"); 1501 1502 blockManager.metaSave(out); 1503 } 1504 1505 private String metaSaveAsString() { 1506 StringWriter sw = new StringWriter(); 1507 PrintWriter pw = new PrintWriter(sw); 1508 metaSave(pw); 1509 pw.flush(); 1510 return sw.toString(); 1511 } 1512 1513 1514 long getDefaultBlockSize() { 1515 return serverDefaults.getBlockSize(); 1516 } 1517 1518 FsServerDefaults getServerDefaults() throws StandbyException { 1519 checkOperation(OperationCategory.READ); 1520 return serverDefaults; 1521 } 1522 1523 long getAccessTimePrecision() { 1524 return accessTimePrecision; 1525 } 1526 1527 private boolean isAccessTimeSupported() { 1528 return accessTimePrecision > 0; 1529 } 1530 1531 ///////////////////////////////////////////////////////// 1532 // 1533 // These methods are called by HadoopFS clients 1534 // 1535 ///////////////////////////////////////////////////////// 1536 /** 1537 * Set permissions for an existing file. 1538 * @throws IOException 1539 */ 1540 void setPermission(String src, FsPermission permission) 1541 throws AccessControlException, FileNotFoundException, SafeModeException, 1542 UnresolvedLinkException, IOException { 1543 try { 1544 setPermissionInt(src, permission); 1545 } catch (AccessControlException e) { 1546 logAuditEvent(false, "setPermission", src); 1547 throw e; 1548 } 1549 } 1550 1551 private void setPermissionInt(String src, FsPermission permission) 1552 throws AccessControlException, FileNotFoundException, SafeModeException, 1553 UnresolvedLinkException, IOException { 1554 HdfsFileStatus resultingStat = null; 1555 FSPermissionChecker pc = getPermissionChecker(); 1556 checkOperation(OperationCategory.WRITE); 1557 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 1558 writeLock(); 1559 try { 1560 checkOperation(OperationCategory.WRITE); 1561 checkNameNodeSafeMode("Cannot set permission for " + src); 1562 src = FSDirectory.resolvePath(src, pathComponents, dir); 1563 checkOwner(pc, src); 1564 dir.setPermission(src, permission); 1565 resultingStat = getAuditFileInfo(src, false); 1566 } finally { 1567 writeUnlock(); 1568 } 1569 getEditLog().logSync(); 1570 logAuditEvent(true, "setPermission", src, null, resultingStat); 1571 } 1572 1573 /** 1574 * Set owner for an existing file. 1575 * @throws IOException 1576 */ 1577 void setOwner(String src, String username, String group) 1578 throws AccessControlException, FileNotFoundException, SafeModeException, 1579 UnresolvedLinkException, IOException { 1580 try { 1581 setOwnerInt(src, username, group); 1582 } catch (AccessControlException e) { 1583 logAuditEvent(false, "setOwner", src); 1584 throw e; 1585 } 1586 } 1587 1588 private void setOwnerInt(String src, String username, String group) 1589 throws AccessControlException, FileNotFoundException, SafeModeException, 1590 UnresolvedLinkException, IOException { 1591 HdfsFileStatus resultingStat = null; 1592 FSPermissionChecker pc = getPermissionChecker(); 1593 checkOperation(OperationCategory.WRITE); 1594 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 1595 writeLock(); 1596 try { 1597 checkOperation(OperationCategory.WRITE); 1598 checkNameNodeSafeMode("Cannot set owner for " + src); 1599 src = FSDirectory.resolvePath(src, pathComponents, dir); 1600 checkOwner(pc, src); 1601 if (!pc.isSuperUser()) { 1602 if (username != null && !pc.getUser().equals(username)) { 1603 throw new AccessControlException("Non-super user cannot change owner"); 1604 } 1605 if (group != null && !pc.containsGroup(group)) { 1606 throw new AccessControlException("User does not belong to " + group); 1607 } 1608 } 1609 dir.setOwner(src, username, group); 1610 resultingStat = getAuditFileInfo(src, false); 1611 } finally { 1612 writeUnlock(); 1613 } 1614 getEditLog().logSync(); 1615 logAuditEvent(true, "setOwner", src, null, resultingStat); 1616 } 1617 1618 /** 1619 * Get block locations within the specified range. 1620 * @see ClientProtocol#getBlockLocations(String, long, long) 1621 */ 1622 LocatedBlocks getBlockLocations(String clientMachine, String src, 1623 long offset, long length) throws AccessControlException, 1624 FileNotFoundException, UnresolvedLinkException, IOException { 1625 LocatedBlocks blocks = getBlockLocations(src, offset, length, true, true, 1626 true); 1627 if (blocks != null) { 1628 blockManager.getDatanodeManager().sortLocatedBlocks( 1629 clientMachine, blocks.getLocatedBlocks()); 1630 1631 LocatedBlock lastBlock = blocks.getLastLocatedBlock(); 1632 if (lastBlock != null) { 1633 ArrayList<LocatedBlock> lastBlockList = new ArrayList<LocatedBlock>(); 1634 lastBlockList.add(lastBlock); 1635 blockManager.getDatanodeManager().sortLocatedBlocks( 1636 clientMachine, lastBlockList); 1637 } 1638 } 1639 return blocks; 1640 } 1641 1642 /** 1643 * Get block locations within the specified range. 1644 * @see ClientProtocol#getBlockLocations(String, long, long) 1645 * @throws FileNotFoundException, UnresolvedLinkException, IOException 1646 */ 1647 LocatedBlocks getBlockLocations(String src, long offset, long length, 1648 boolean doAccessTime, boolean needBlockToken, boolean checkSafeMode) 1649 throws FileNotFoundException, UnresolvedLinkException, IOException { 1650 try { 1651 return getBlockLocationsInt(src, offset, length, doAccessTime, 1652 needBlockToken, checkSafeMode); 1653 } catch (AccessControlException e) { 1654 logAuditEvent(false, "open", src); 1655 throw e; 1656 } 1657 } 1658 1659 private LocatedBlocks getBlockLocationsInt(String src, long offset, 1660 long length, boolean doAccessTime, boolean needBlockToken, 1661 boolean checkSafeMode) 1662 throws FileNotFoundException, UnresolvedLinkException, IOException { 1663 if (offset < 0) { 1664 throw new HadoopIllegalArgumentException( 1665 "Negative offset is not supported. File: " + src); 1666 } 1667 if (length < 0) { 1668 throw new HadoopIllegalArgumentException( 1669 "Negative length is not supported. File: " + src); 1670 } 1671 final LocatedBlocks ret = getBlockLocationsUpdateTimes(src, 1672 offset, length, doAccessTime, needBlockToken); 1673 logAuditEvent(true, "open", src); 1674 if (checkSafeMode && isInSafeMode()) { 1675 for (LocatedBlock b : ret.getLocatedBlocks()) { 1676 // if safemode & no block locations yet then throw safemodeException 1677 if ((b.getLocations() == null) || (b.getLocations().length == 0)) { 1678 SafeModeException se = new SafeModeException( 1679 "Zero blocklocations for " + src, safeMode); 1680 if (haEnabled && haContext != null && 1681 haContext.getState().getServiceState() == HAServiceState.ACTIVE) { 1682 throw new RetriableException(se); 1683 } else { 1684 throw se; 1685 } 1686 } 1687 } 1688 } 1689 return ret; 1690 } 1691 1692 /* 1693 * Get block locations within the specified range, updating the 1694 * access times if necessary. 1695 */ 1696 private LocatedBlocks getBlockLocationsUpdateTimes(String src, long offset, 1697 long length, boolean doAccessTime, boolean needBlockToken) 1698 throws FileNotFoundException, 1699 UnresolvedLinkException, IOException { 1700 FSPermissionChecker pc = getPermissionChecker(); 1701 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 1702 for (int attempt = 0; attempt < 2; attempt++) { 1703 boolean isReadOp = (attempt == 0); 1704 if (isReadOp) { // first attempt is with readlock 1705 checkOperation(OperationCategory.READ); 1706 readLock(); 1707 } else { // second attempt is with write lock 1708 checkOperation(OperationCategory.WRITE); 1709 writeLock(); // writelock is needed to set accesstime 1710 } 1711 src = FSDirectory.resolvePath(src, pathComponents, dir); 1712 try { 1713 if (isReadOp) { 1714 checkOperation(OperationCategory.READ); 1715 } else { 1716 checkOperation(OperationCategory.WRITE); 1717 } 1718 if (isPermissionEnabled) { 1719 checkPathAccess(pc, src, FsAction.READ); 1720 } 1721 1722 // if the namenode is in safemode, then do not update access time 1723 if (isInSafeMode()) { 1724 doAccessTime = false; 1725 } 1726 1727 final INodesInPath iip = dir.getLastINodeInPath(src); 1728 final INodeFile inode = INodeFile.valueOf(iip.getLastINode(), src); 1729 if (!iip.isSnapshot() //snapshots are readonly, so don't update atime. 1730 && doAccessTime && isAccessTimeSupported()) { 1731 final long now = now(); 1732 if (now > inode.getAccessTime() + getAccessTimePrecision()) { 1733 // if we have to set access time but we only have the readlock, then 1734 // restart this entire operation with the writeLock. 1735 if (isReadOp) { 1736 continue; 1737 } 1738 dir.setTimes(src, inode, -1, now, false, iip.getLatestSnapshotId()); 1739 } 1740 } 1741 final long fileSize = iip.isSnapshot() ? 1742 inode.computeFileSize(iip.getPathSnapshotId()) 1743 : inode.computeFileSizeNotIncludingLastUcBlock(); 1744 boolean isUc = inode.isUnderConstruction(); 1745 if (iip.isSnapshot()) { 1746 // if src indicates a snapshot file, we need to make sure the returned 1747 // blocks do not exceed the size of the snapshot file. 1748 length = Math.min(length, fileSize - offset); 1749 isUc = false; 1750 } 1751 LocatedBlocks blocks = 1752 blockManager.createLocatedBlocks(inode.getBlocks(), fileSize, 1753 isUc, offset, length, needBlockToken, iip.isSnapshot()); 1754 // Set caching information for the located blocks. 1755 for (LocatedBlock lb: blocks.getLocatedBlocks()) { 1756 cacheManager.setCachedLocations(lb); 1757 } 1758 return blocks; 1759 } finally { 1760 if (isReadOp) { 1761 readUnlock(); 1762 } else { 1763 writeUnlock(); 1764 } 1765 } 1766 } 1767 return null; // can never reach here 1768 } 1769 1770 /** 1771 * Moves all the blocks from srcs and appends them to trg 1772 * To avoid rollbacks we will verify validitity of ALL of the args 1773 * before we start actual move. 1774 * 1775 * This does not support ".inodes" relative path 1776 * @param target 1777 * @param srcs 1778 * @throws IOException 1779 */ 1780 void concat(String target, String [] srcs) 1781 throws IOException, UnresolvedLinkException { 1782 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 1783 if (cacheEntry != null && cacheEntry.isSuccess()) { 1784 return; // Return previous response 1785 } 1786 1787 // Either there is no previous request in progres or it has failed 1788 if(FSNamesystem.LOG.isDebugEnabled()) { 1789 FSNamesystem.LOG.debug("concat " + Arrays.toString(srcs) + 1790 " to " + target); 1791 } 1792 1793 boolean success = false; 1794 try { 1795 concatInt(target, srcs, cacheEntry != null); 1796 success = true; 1797 } catch (AccessControlException e) { 1798 logAuditEvent(false, "concat", Arrays.toString(srcs), target, null); 1799 throw e; 1800 } finally { 1801 RetryCache.setState(cacheEntry, success); 1802 } 1803 } 1804 1805 private void concatInt(String target, String [] srcs, 1806 boolean logRetryCache) throws IOException, UnresolvedLinkException { 1807 // verify args 1808 if(target.isEmpty()) { 1809 throw new IllegalArgumentException("Target file name is empty"); 1810 } 1811 if(srcs == null || srcs.length == 0) { 1812 throw new IllegalArgumentException("No sources given"); 1813 } 1814 1815 // We require all files be in the same directory 1816 String trgParent = 1817 target.substring(0, target.lastIndexOf(Path.SEPARATOR_CHAR)); 1818 for (String s : srcs) { 1819 String srcParent = s.substring(0, s.lastIndexOf(Path.SEPARATOR_CHAR)); 1820 if (!srcParent.equals(trgParent)) { 1821 throw new IllegalArgumentException( 1822 "Sources and target are not in the same directory"); 1823 } 1824 } 1825 1826 HdfsFileStatus resultingStat = null; 1827 FSPermissionChecker pc = getPermissionChecker(); 1828 checkOperation(OperationCategory.WRITE); 1829 writeLock(); 1830 try { 1831 checkOperation(OperationCategory.WRITE); 1832 checkNameNodeSafeMode("Cannot concat " + target); 1833 concatInternal(pc, target, srcs, logRetryCache); 1834 resultingStat = getAuditFileInfo(target, false); 1835 } finally { 1836 writeUnlock(); 1837 } 1838 getEditLog().logSync(); 1839 logAuditEvent(true, "concat", Arrays.toString(srcs), target, resultingStat); 1840 } 1841 1842 /** See {@link #concat(String, String[])} */ 1843 private void concatInternal(FSPermissionChecker pc, String target, 1844 String[] srcs, boolean logRetryCache) throws IOException, 1845 UnresolvedLinkException { 1846 assert hasWriteLock(); 1847 1848 // write permission for the target 1849 if (isPermissionEnabled) { 1850 checkPathAccess(pc, target, FsAction.WRITE); 1851 1852 // and srcs 1853 for(String aSrc: srcs) { 1854 checkPathAccess(pc, aSrc, FsAction.READ); // read the file 1855 checkParentAccess(pc, aSrc, FsAction.WRITE); // for delete 1856 } 1857 } 1858 1859 // to make sure no two files are the same 1860 Set<INode> si = new HashSet<INode>(); 1861 1862 // we put the following prerequisite for the operation 1863 // replication and blocks sizes should be the same for ALL the blocks 1864 1865 // check the target 1866 final INodeFile trgInode = INodeFile.valueOf(dir.getINode4Write(target), 1867 target); 1868 if(trgInode.isUnderConstruction()) { 1869 throw new HadoopIllegalArgumentException("concat: target file " 1870 + target + " is under construction"); 1871 } 1872 // per design target shouldn't be empty and all the blocks same size 1873 if(trgInode.numBlocks() == 0) { 1874 throw new HadoopIllegalArgumentException("concat: target file " 1875 + target + " is empty"); 1876 } 1877 if (trgInode.isWithSnapshot()) { 1878 throw new HadoopIllegalArgumentException("concat: target file " 1879 + target + " is in a snapshot"); 1880 } 1881 1882 long blockSize = trgInode.getPreferredBlockSize(); 1883 1884 // check the end block to be full 1885 final BlockInfo last = trgInode.getLastBlock(); 1886 if(blockSize != last.getNumBytes()) { 1887 throw new HadoopIllegalArgumentException("The last block in " + target 1888 + " is not full; last block size = " + last.getNumBytes() 1889 + " but file block size = " + blockSize); 1890 } 1891 1892 si.add(trgInode); 1893 final short repl = trgInode.getFileReplication(); 1894 1895 // now check the srcs 1896 boolean endSrc = false; // final src file doesn't have to have full end block 1897 for(int i=0; i<srcs.length; i++) { 1898 String src = srcs[i]; 1899 if(i==srcs.length-1) 1900 endSrc=true; 1901 1902 final INodeFile srcInode = INodeFile.valueOf(dir.getINode4Write(src), src); 1903 if(src.isEmpty() 1904 || srcInode.isUnderConstruction() 1905 || srcInode.numBlocks() == 0) { 1906 throw new HadoopIllegalArgumentException("concat: source file " + src 1907 + " is invalid or empty or underConstruction"); 1908 } 1909 1910 // check replication and blocks size 1911 if(repl != srcInode.getBlockReplication()) { 1912 throw new HadoopIllegalArgumentException("concat: the soruce file " 1913 + src + " and the target file " + target 1914 + " should have the same replication: source replication is " 1915 + srcInode.getBlockReplication() 1916 + " but target replication is " + repl); 1917 } 1918 1919 //boolean endBlock=false; 1920 // verify that all the blocks are of the same length as target 1921 // should be enough to check the end blocks 1922 final BlockInfo[] srcBlocks = srcInode.getBlocks(); 1923 int idx = srcBlocks.length-1; 1924 if(endSrc) 1925 idx = srcBlocks.length-2; // end block of endSrc is OK not to be full 1926 if(idx >= 0 && srcBlocks[idx].getNumBytes() != blockSize) { 1927 throw new HadoopIllegalArgumentException("concat: the soruce file " 1928 + src + " and the target file " + target 1929 + " should have the same blocks sizes: target block size is " 1930 + blockSize + " but the size of source block " + idx + " is " 1931 + srcBlocks[idx].getNumBytes()); 1932 } 1933 1934 si.add(srcInode); 1935 } 1936 1937 // make sure no two files are the same 1938 if(si.size() < srcs.length+1) { // trg + srcs 1939 // it means at least two files are the same 1940 throw new HadoopIllegalArgumentException( 1941 "concat: at least two of the source files are the same"); 1942 } 1943 1944 if(NameNode.stateChangeLog.isDebugEnabled()) { 1945 NameNode.stateChangeLog.debug("DIR* NameSystem.concat: " + 1946 Arrays.toString(srcs) + " to " + target); 1947 } 1948 1949 dir.concat(target,srcs, logRetryCache); 1950 } 1951 1952 /** 1953 * stores the modification and access time for this inode. 1954 * The access time is precise upto an hour. The transaction, if needed, is 1955 * written to the edits log but is not flushed. 1956 */ 1957 void setTimes(String src, long mtime, long atime) 1958 throws IOException, UnresolvedLinkException { 1959 if (!isAccessTimeSupported() && atime != -1) { 1960 throw new IOException("Access time for hdfs is not configured. " + 1961 " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter."); 1962 } 1963 try { 1964 setTimesInt(src, mtime, atime); 1965 } catch (AccessControlException e) { 1966 logAuditEvent(false, "setTimes", src); 1967 throw e; 1968 } 1969 } 1970 1971 private void setTimesInt(String src, long mtime, long atime) 1972 throws IOException, UnresolvedLinkException { 1973 HdfsFileStatus resultingStat = null; 1974 FSPermissionChecker pc = getPermissionChecker(); 1975 checkOperation(OperationCategory.WRITE); 1976 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 1977 writeLock(); 1978 try { 1979 checkOperation(OperationCategory.WRITE); 1980 checkNameNodeSafeMode("Cannot set times " + src); 1981 src = FSDirectory.resolvePath(src, pathComponents, dir); 1982 1983 // Write access is required to set access and modification times 1984 if (isPermissionEnabled) { 1985 checkPathAccess(pc, src, FsAction.WRITE); 1986 } 1987 final INodesInPath iip = dir.getINodesInPath4Write(src); 1988 final INode inode = iip.getLastINode(); 1989 if (inode != null) { 1990 dir.setTimes(src, inode, mtime, atime, true, iip.getLatestSnapshotId()); 1991 resultingStat = getAuditFileInfo(src, false); 1992 } else { 1993 throw new FileNotFoundException("File/Directory " + src + " does not exist."); 1994 } 1995 } finally { 1996 writeUnlock(); 1997 } 1998 logAuditEvent(true, "setTimes", src, null, resultingStat); 1999 } 2000 2001 /** 2002 * Create a symbolic link. 2003 */ 2004 @SuppressWarnings("deprecation") 2005 void createSymlink(String target, String link, 2006 PermissionStatus dirPerms, boolean createParent) 2007 throws IOException, UnresolvedLinkException { 2008 if (!FileSystem.areSymlinksEnabled()) { 2009 throw new UnsupportedOperationException("Symlinks not supported"); 2010 } 2011 if (!DFSUtil.isValidName(link)) { 2012 throw new InvalidPathException("Invalid link name: " + link); 2013 } 2014 if (FSDirectory.isReservedName(target)) { 2015 throw new InvalidPathException("Invalid target name: " + target); 2016 } 2017 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 2018 if (cacheEntry != null && cacheEntry.isSuccess()) { 2019 return; // Return previous response 2020 } 2021 boolean success = false; 2022 try { 2023 createSymlinkInt(target, link, dirPerms, createParent, cacheEntry != null); 2024 success = true; 2025 } catch (AccessControlException e) { 2026 logAuditEvent(false, "createSymlink", link, target, null); 2027 throw e; 2028 } finally { 2029 RetryCache.setState(cacheEntry, success); 2030 } 2031 } 2032 2033 private void createSymlinkInt(String target, String link, 2034 PermissionStatus dirPerms, boolean createParent, boolean logRetryCache) 2035 throws IOException, UnresolvedLinkException { 2036 if (NameNode.stateChangeLog.isDebugEnabled()) { 2037 NameNode.stateChangeLog.debug("DIR* NameSystem.createSymlink: target=" 2038 + target + " link=" + link); 2039 } 2040 HdfsFileStatus resultingStat = null; 2041 FSPermissionChecker pc = getPermissionChecker(); 2042 checkOperation(OperationCategory.WRITE); 2043 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(link); 2044 writeLock(); 2045 try { 2046 checkOperation(OperationCategory.WRITE); 2047 checkNameNodeSafeMode("Cannot create symlink " + link); 2048 link = FSDirectory.resolvePath(link, pathComponents, dir); 2049 if (!createParent) { 2050 verifyParentDir(link); 2051 } 2052 if (!dir.isValidToCreate(link)) { 2053 throw new IOException("failed to create link " + link 2054 +" either because the filename is invalid or the file exists"); 2055 } 2056 if (isPermissionEnabled) { 2057 checkAncestorAccess(pc, link, FsAction.WRITE); 2058 } 2059 // validate that we have enough inodes. 2060 checkFsObjectLimit(); 2061 2062 // add symbolic link to namespace 2063 dir.addSymlink(link, target, dirPerms, createParent, logRetryCache); 2064 resultingStat = getAuditFileInfo(link, false); 2065 } finally { 2066 writeUnlock(); 2067 } 2068 getEditLog().logSync(); 2069 logAuditEvent(true, "createSymlink", link, target, resultingStat); 2070 } 2071 2072 /** 2073 * Set replication for an existing file. 2074 * 2075 * The NameNode sets new replication and schedules either replication of 2076 * under-replicated data blocks or removal of the excessive block copies 2077 * if the blocks are over-replicated. 2078 * 2079 * @see ClientProtocol#setReplication(String, short) 2080 * @param src file name 2081 * @param replication new replication 2082 * @return true if successful; 2083 * false if file does not exist or is a directory 2084 */ 2085 boolean setReplication(final String src, final short replication) 2086 throws IOException { 2087 try { 2088 return setReplicationInt(src, replication); 2089 } catch (AccessControlException e) { 2090 logAuditEvent(false, "setReplication", src); 2091 throw e; 2092 } 2093 } 2094 2095 private boolean setReplicationInt(String src, final short replication) 2096 throws IOException { 2097 blockManager.verifyReplication(src, replication, null); 2098 final boolean isFile; 2099 FSPermissionChecker pc = getPermissionChecker(); 2100 checkOperation(OperationCategory.WRITE); 2101 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2102 writeLock(); 2103 try { 2104 checkOperation(OperationCategory.WRITE); 2105 checkNameNodeSafeMode("Cannot set replication for " + src); 2106 src = FSDirectory.resolvePath(src, pathComponents, dir); 2107 if (isPermissionEnabled) { 2108 checkPathAccess(pc, src, FsAction.WRITE); 2109 } 2110 2111 final short[] blockRepls = new short[2]; // 0: old, 1: new 2112 final Block[] blocks = dir.setReplication(src, replication, blockRepls); 2113 isFile = blocks != null; 2114 if (isFile) { 2115 blockManager.setReplication(blockRepls[0], blockRepls[1], src, blocks); 2116 } 2117 } finally { 2118 writeUnlock(); 2119 } 2120 2121 getEditLog().logSync(); 2122 if (isFile) { 2123 logAuditEvent(true, "setReplication", src); 2124 } 2125 return isFile; 2126 } 2127 2128 long getPreferredBlockSize(String filename) 2129 throws IOException, UnresolvedLinkException { 2130 FSPermissionChecker pc = getPermissionChecker(); 2131 checkOperation(OperationCategory.READ); 2132 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(filename); 2133 readLock(); 2134 try { 2135 checkOperation(OperationCategory.READ); 2136 filename = FSDirectory.resolvePath(filename, pathComponents, dir); 2137 if (isPermissionEnabled) { 2138 checkTraverse(pc, filename); 2139 } 2140 return dir.getPreferredBlockSize(filename); 2141 } finally { 2142 readUnlock(); 2143 } 2144 } 2145 2146 /** 2147 * Verify that parent directory of src exists. 2148 */ 2149 private void verifyParentDir(String src) throws FileNotFoundException, 2150 ParentNotDirectoryException, UnresolvedLinkException { 2151 assert hasReadLock(); 2152 Path parent = new Path(src).getParent(); 2153 if (parent != null) { 2154 final INode parentNode = dir.getINode(parent.toString()); 2155 if (parentNode == null) { 2156 throw new FileNotFoundException("Parent directory doesn't exist: " 2157 + parent); 2158 } else if (!parentNode.isDirectory() && !parentNode.isSymlink()) { 2159 throw new ParentNotDirectoryException("Parent path is not a directory: " 2160 + parent); 2161 } 2162 } 2163 } 2164 2165 /** 2166 * Create a new file entry in the namespace. 2167 * 2168 * For description of parameters and exceptions thrown see 2169 * {@link ClientProtocol#create()}, except it returns valid file status upon 2170 * success 2171 * 2172 * For retryCache handling details see - 2173 * {@link #getFileStatus(boolean, CacheEntryWithPayload)} 2174 * 2175 */ 2176 HdfsFileStatus startFile(String src, PermissionStatus permissions, 2177 String holder, String clientMachine, EnumSet<CreateFlag> flag, 2178 boolean createParent, short replication, long blockSize) 2179 throws AccessControlException, SafeModeException, 2180 FileAlreadyExistsException, UnresolvedLinkException, 2181 FileNotFoundException, ParentNotDirectoryException, IOException { 2182 HdfsFileStatus status = null; 2183 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache, 2184 null); 2185 if (cacheEntry != null && cacheEntry.isSuccess()) { 2186 return (HdfsFileStatus) cacheEntry.getPayload(); 2187 } 2188 2189 try { 2190 status = startFileInt(src, permissions, holder, clientMachine, flag, 2191 createParent, replication, blockSize, cacheEntry != null); 2192 } catch (AccessControlException e) { 2193 logAuditEvent(false, "create", src); 2194 throw e; 2195 } finally { 2196 RetryCache.setState(cacheEntry, status != null, status); 2197 } 2198 return status; 2199 } 2200 2201 private HdfsFileStatus startFileInt(String src, PermissionStatus permissions, 2202 String holder, String clientMachine, EnumSet<CreateFlag> flag, 2203 boolean createParent, short replication, long blockSize, 2204 boolean logRetryCache) throws AccessControlException, SafeModeException, 2205 FileAlreadyExistsException, UnresolvedLinkException, 2206 FileNotFoundException, ParentNotDirectoryException, IOException { 2207 if (NameNode.stateChangeLog.isDebugEnabled()) { 2208 NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: src=" + src 2209 + ", holder=" + holder 2210 + ", clientMachine=" + clientMachine 2211 + ", createParent=" + createParent 2212 + ", replication=" + replication 2213 + ", createFlag=" + flag.toString()); 2214 } 2215 if (!DFSUtil.isValidName(src)) { 2216 throw new InvalidPathException(src); 2217 } 2218 blockManager.verifyReplication(src, replication, clientMachine); 2219 2220 boolean skipSync = false; 2221 HdfsFileStatus stat = null; 2222 FSPermissionChecker pc = getPermissionChecker(); 2223 checkOperation(OperationCategory.WRITE); 2224 if (blockSize < minBlockSize) { 2225 throw new IOException("Specified block size is less than configured" + 2226 " minimum value (" + DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY 2227 + "): " + blockSize + " < " + minBlockSize); 2228 } 2229 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2230 boolean create = flag.contains(CreateFlag.CREATE); 2231 boolean overwrite = flag.contains(CreateFlag.OVERWRITE); 2232 writeLock(); 2233 try { 2234 checkOperation(OperationCategory.WRITE); 2235 checkNameNodeSafeMode("Cannot create file" + src); 2236 src = FSDirectory.resolvePath(src, pathComponents, dir); 2237 startFileInternal(pc, src, permissions, holder, clientMachine, create, 2238 overwrite, createParent, replication, blockSize, logRetryCache); 2239 stat = dir.getFileInfo(src, false); 2240 } catch (StandbyException se) { 2241 skipSync = true; 2242 throw se; 2243 } finally { 2244 writeUnlock(); 2245 // There might be transactions logged while trying to recover the lease. 2246 // They need to be sync'ed even when an exception was thrown. 2247 if (!skipSync) { 2248 getEditLog().logSync(); 2249 } 2250 } 2251 logAuditEvent(true, "create", src, null, stat); 2252 return stat; 2253 } 2254 2255 /** 2256 * Create a new file or overwrite an existing file<br> 2257 * 2258 * Once the file is create the client then allocates a new block with the next 2259 * call using {@link NameNode#addBlock()}. 2260 * <p> 2261 * For description of parameters and exceptions thrown see 2262 * {@link ClientProtocol#create()} 2263 */ 2264 private void startFileInternal(FSPermissionChecker pc, String src, 2265 PermissionStatus permissions, String holder, String clientMachine, 2266 boolean create, boolean overwrite, boolean createParent, 2267 short replication, long blockSize, boolean logRetryEntry) 2268 throws FileAlreadyExistsException, AccessControlException, 2269 UnresolvedLinkException, FileNotFoundException, 2270 ParentNotDirectoryException, IOException { 2271 assert hasWriteLock(); 2272 // Verify that the destination does not exist as a directory already. 2273 final INodesInPath iip = dir.getINodesInPath4Write(src); 2274 final INode inode = iip.getLastINode(); 2275 if (inode != null && inode.isDirectory()) { 2276 throw new FileAlreadyExistsException(src + 2277 " already exists as a directory"); 2278 } 2279 final INodeFile myFile = INodeFile.valueOf(inode, src, true); 2280 if (isPermissionEnabled) { 2281 if (overwrite && myFile != null) { 2282 checkPathAccess(pc, src, FsAction.WRITE); 2283 } else { 2284 checkAncestorAccess(pc, src, FsAction.WRITE); 2285 } 2286 } 2287 2288 if (!createParent) { 2289 verifyParentDir(src); 2290 } 2291 2292 try { 2293 if (myFile == null) { 2294 if (!create) { 2295 throw new FileNotFoundException("Can't overwrite non-existent " + 2296 src + " for client " + clientMachine); 2297 } 2298 } else { 2299 if (overwrite) { 2300 try { 2301 deleteInt(src, true, false); // File exists - delete if overwrite 2302 } catch (AccessControlException e) { 2303 logAuditEvent(false, "delete", src); 2304 throw e; 2305 } 2306 } else { 2307 // If lease soft limit time is expired, recover the lease 2308 recoverLeaseInternal(myFile, src, holder, clientMachine, false); 2309 throw new FileAlreadyExistsException(src + " for client " + 2310 clientMachine + " already exists"); 2311 } 2312 } 2313 2314 checkFsObjectLimit(); 2315 final DatanodeDescriptor clientNode = 2316 blockManager.getDatanodeManager().getDatanodeByHost(clientMachine); 2317 2318 INodeFile newNode = dir.addFile(src, permissions, replication, blockSize, 2319 holder, clientMachine, clientNode); 2320 if (newNode == null) { 2321 throw new IOException("Unable to add " + src + " to namespace"); 2322 } 2323 leaseManager.addLease(newNode.getFileUnderConstructionFeature() 2324 .getClientName(), src); 2325 2326 // record file record in log, record new generation stamp 2327 getEditLog().logOpenFile(src, newNode, logRetryEntry); 2328 if (NameNode.stateChangeLog.isDebugEnabled()) { 2329 NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: added " + 2330 src + " inode " + newNode.getId() + " " + holder); 2331 } 2332 } catch (IOException ie) { 2333 NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " + src + " " + 2334 ie.getMessage()); 2335 throw ie; 2336 } 2337 } 2338 2339 /** 2340 * Append to an existing file for append. 2341 * <p> 2342 * 2343 * The method returns the last block of the file if this is a partial block, 2344 * which can still be used for writing more data. The client uses the returned 2345 * block locations to form the data pipeline for this block.<br> 2346 * The method returns null if the last block is full. The client then 2347 * allocates a new block with the next call using {@link NameNode#addBlock()}. 2348 * <p> 2349 * 2350 * For description of parameters and exceptions thrown see 2351 * {@link ClientProtocol#append(String, String)} 2352 * 2353 * @return the last block locations if the block is partial or null otherwise 2354 */ 2355 private LocatedBlock appendFileInternal(FSPermissionChecker pc, String src, 2356 String holder, String clientMachine, boolean logRetryCache) 2357 throws AccessControlException, UnresolvedLinkException, 2358 FileNotFoundException, IOException { 2359 assert hasWriteLock(); 2360 // Verify that the destination does not exist as a directory already. 2361 final INodesInPath iip = dir.getINodesInPath4Write(src); 2362 final INode inode = iip.getLastINode(); 2363 if (inode != null && inode.isDirectory()) { 2364 throw new FileAlreadyExistsException("Cannot append to directory " + src 2365 + "; already exists as a directory."); 2366 } 2367 if (isPermissionEnabled) { 2368 checkPathAccess(pc, src, FsAction.WRITE); 2369 } 2370 2371 try { 2372 if (inode == null) { 2373 throw new FileNotFoundException("failed to append to non-existent file " 2374 + src + " for client " + clientMachine); 2375 } 2376 INodeFile myFile = INodeFile.valueOf(inode, src, true); 2377 // Opening an existing file for write - may need to recover lease. 2378 recoverLeaseInternal(myFile, src, holder, clientMachine, false); 2379 2380 // recoverLeaseInternal may create a new InodeFile via 2381 // finalizeINodeFileUnderConstruction so we need to refresh 2382 // the referenced file. 2383 myFile = INodeFile.valueOf(dir.getINode(src), src, true); 2384 final BlockInfo lastBlock = myFile.getLastBlock(); 2385 // Check that the block has at least minimum replication. 2386 if(lastBlock != null && lastBlock.isComplete() && 2387 !getBlockManager().isSufficientlyReplicated(lastBlock)) { 2388 throw new IOException("append: lastBlock=" + lastBlock + 2389 " of src=" + src + " is not sufficiently replicated yet."); 2390 } 2391 final DatanodeDescriptor clientNode = 2392 blockManager.getDatanodeManager().getDatanodeByHost(clientMachine); 2393 return prepareFileForWrite(src, myFile, holder, clientMachine, clientNode, 2394 true, iip.getLatestSnapshotId(), logRetryCache); 2395 } catch (IOException ie) { 2396 NameNode.stateChangeLog.warn("DIR* NameSystem.append: " +ie.getMessage()); 2397 throw ie; 2398 } 2399 } 2400 2401 /** 2402 * Replace current node with a INodeUnderConstruction. 2403 * Recreate in-memory lease record. 2404 * 2405 * @param src path to the file 2406 * @param file existing file object 2407 * @param leaseHolder identifier of the lease holder on this file 2408 * @param clientMachine identifier of the client machine 2409 * @param clientNode if the client is collocated with a DN, that DN's descriptor 2410 * @param writeToEditLog whether to persist this change to the edit log 2411 * @param logRetryCache whether to record RPC ids in editlog for retry cache 2412 * rebuilding 2413 * @return the last block locations if the block is partial or null otherwise 2414 * @throws UnresolvedLinkException 2415 * @throws IOException 2416 */ 2417 LocatedBlock prepareFileForWrite(String src, INodeFile file, 2418 String leaseHolder, String clientMachine, DatanodeDescriptor clientNode, 2419 boolean writeToEditLog, int latestSnapshot, boolean logRetryCache) 2420 throws IOException { 2421 file = file.recordModification(latestSnapshot); 2422 final INodeFile cons = file.toUnderConstruction(leaseHolder, clientMachine, 2423 clientNode); 2424 2425 leaseManager.addLease(cons.getFileUnderConstructionFeature() 2426 .getClientName(), src); 2427 2428 LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(cons); 2429 if (writeToEditLog) { 2430 getEditLog().logOpenFile(src, cons, logRetryCache); 2431 } 2432 return ret; 2433 } 2434 2435 /** 2436 * Recover lease; 2437 * Immediately revoke the lease of the current lease holder and start lease 2438 * recovery so that the file can be forced to be closed. 2439 * 2440 * @param src the path of the file to start lease recovery 2441 * @param holder the lease holder's name 2442 * @param clientMachine the client machine's name 2443 * @return true if the file is already closed 2444 * @throws IOException 2445 */ 2446 boolean recoverLease(String src, String holder, String clientMachine) 2447 throws IOException { 2448 if (!DFSUtil.isValidName(src)) { 2449 throw new IOException("Invalid file name: " + src); 2450 } 2451 2452 boolean skipSync = false; 2453 FSPermissionChecker pc = getPermissionChecker(); 2454 checkOperation(OperationCategory.WRITE); 2455 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2456 writeLock(); 2457 try { 2458 checkOperation(OperationCategory.WRITE); 2459 checkNameNodeSafeMode("Cannot recover the lease of " + src); 2460 src = FSDirectory.resolvePath(src, pathComponents, dir); 2461 final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src); 2462 if (!inode.isUnderConstruction()) { 2463 return true; 2464 } 2465 if (isPermissionEnabled) { 2466 checkPathAccess(pc, src, FsAction.WRITE); 2467 } 2468 2469 recoverLeaseInternal(inode, src, holder, clientMachine, true); 2470 } catch (StandbyException se) { 2471 skipSync = true; 2472 throw se; 2473 } finally { 2474 writeUnlock(); 2475 // There might be transactions logged while trying to recover the lease. 2476 // They need to be sync'ed even when an exception was thrown. 2477 if (!skipSync) { 2478 getEditLog().logSync(); 2479 } 2480 } 2481 return false; 2482 } 2483 2484 private void recoverLeaseInternal(INodeFile fileInode, 2485 String src, String holder, String clientMachine, boolean force) 2486 throws IOException { 2487 assert hasWriteLock(); 2488 if (fileInode != null && fileInode.isUnderConstruction()) { 2489 // 2490 // If the file is under construction , then it must be in our 2491 // leases. Find the appropriate lease record. 2492 // 2493 Lease lease = leaseManager.getLease(holder); 2494 // 2495 // We found the lease for this file. And surprisingly the original 2496 // holder is trying to recreate this file. This should never occur. 2497 // 2498 if (!force && lease != null) { 2499 Lease leaseFile = leaseManager.getLeaseByPath(src); 2500 if ((leaseFile != null && leaseFile.equals(lease)) || 2501 lease.getHolder().equals(holder)) { 2502 throw new AlreadyBeingCreatedException( 2503 "failed to create file " + src + " for " + holder + 2504 " for client " + clientMachine + 2505 " because current leaseholder is trying to recreate file."); 2506 } 2507 } 2508 // 2509 // Find the original holder. 2510 // 2511 FileUnderConstructionFeature uc = fileInode.getFileUnderConstructionFeature(); 2512 String clientName = uc.getClientName(); 2513 lease = leaseManager.getLease(clientName); 2514 if (lease == null) { 2515 throw new AlreadyBeingCreatedException( 2516 "failed to create file " + src + " for " + holder + 2517 " for client " + clientMachine + 2518 " because pendingCreates is non-null but no leases found."); 2519 } 2520 if (force) { 2521 // close now: no need to wait for soft lease expiration and 2522 // close only the file src 2523 LOG.info("recoverLease: " + lease + ", src=" + src + 2524 " from client " + clientName); 2525 internalReleaseLease(lease, src, holder); 2526 } else { 2527 assert lease.getHolder().equals(clientName) : 2528 "Current lease holder " + lease.getHolder() + 2529 " does not match file creator " + clientName; 2530 // 2531 // If the original holder has not renewed in the last SOFTLIMIT 2532 // period, then start lease recovery. 2533 // 2534 if (lease.expiredSoftLimit()) { 2535 LOG.info("startFile: recover " + lease + ", src=" + src + " client " 2536 + clientName); 2537 boolean isClosed = internalReleaseLease(lease, src, null); 2538 if(!isClosed) 2539 throw new RecoveryInProgressException( 2540 "Failed to close file " + src + 2541 ". Lease recovery is in progress. Try again later."); 2542 } else { 2543 final BlockInfo lastBlock = fileInode.getLastBlock(); 2544 if (lastBlock != null 2545 && lastBlock.getBlockUCState() == BlockUCState.UNDER_RECOVERY) { 2546 throw new RecoveryInProgressException("Recovery in progress, file [" 2547 + src + "], " + "lease owner [" + lease.getHolder() + "]"); 2548 } else { 2549 throw new AlreadyBeingCreatedException("Failed to create file [" 2550 + src + "] for [" + holder + "] for client [" + clientMachine 2551 + "], because this file is already being created by [" 2552 + clientName + "] on [" 2553 + uc.getClientMachine() + "]"); 2554 } 2555 } 2556 } 2557 } 2558 } 2559 2560 /** 2561 * Append to an existing file in the namespace. 2562 */ 2563 LocatedBlock appendFile(String src, String holder, String clientMachine) 2564 throws AccessControlException, SafeModeException, 2565 FileAlreadyExistsException, FileNotFoundException, 2566 ParentNotDirectoryException, IOException { 2567 LocatedBlock lb = null; 2568 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache, 2569 null); 2570 if (cacheEntry != null && cacheEntry.isSuccess()) { 2571 return (LocatedBlock) cacheEntry.getPayload(); 2572 } 2573 2574 boolean success = false; 2575 try { 2576 lb = appendFileInt(src, holder, clientMachine, cacheEntry != null); 2577 success = true; 2578 return lb; 2579 } catch (AccessControlException e) { 2580 logAuditEvent(false, "append", src); 2581 throw e; 2582 } finally { 2583 RetryCache.setState(cacheEntry, success, lb); 2584 } 2585 } 2586 2587 private LocatedBlock appendFileInt(String src, String holder, 2588 String clientMachine, boolean logRetryCache) 2589 throws AccessControlException, SafeModeException, 2590 FileAlreadyExistsException, FileNotFoundException, 2591 ParentNotDirectoryException, IOException { 2592 if (NameNode.stateChangeLog.isDebugEnabled()) { 2593 NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: src=" + src 2594 + ", holder=" + holder 2595 + ", clientMachine=" + clientMachine); 2596 } 2597 boolean skipSync = false; 2598 if (!supportAppends) { 2599 throw new UnsupportedOperationException( 2600 "Append is not enabled on this NameNode. Use the " + 2601 DFS_SUPPORT_APPEND_KEY + " configuration option to enable it."); 2602 } 2603 2604 LocatedBlock lb = null; 2605 FSPermissionChecker pc = getPermissionChecker(); 2606 checkOperation(OperationCategory.WRITE); 2607 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2608 writeLock(); 2609 try { 2610 checkOperation(OperationCategory.WRITE); 2611 checkNameNodeSafeMode("Cannot append to file" + src); 2612 src = FSDirectory.resolvePath(src, pathComponents, dir); 2613 lb = appendFileInternal(pc, src, holder, clientMachine, logRetryCache); 2614 } catch (StandbyException se) { 2615 skipSync = true; 2616 throw se; 2617 } finally { 2618 writeUnlock(); 2619 // There might be transactions logged while trying to recover the lease. 2620 // They need to be sync'ed even when an exception was thrown. 2621 if (!skipSync) { 2622 getEditLog().logSync(); 2623 } 2624 } 2625 if (lb != null) { 2626 if (NameNode.stateChangeLog.isDebugEnabled()) { 2627 NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file " 2628 +src+" for "+holder+" at "+clientMachine 2629 +" block " + lb.getBlock() 2630 +" block size " + lb.getBlock().getNumBytes()); 2631 } 2632 } 2633 logAuditEvent(true, "append", src); 2634 return lb; 2635 } 2636 2637 ExtendedBlock getExtendedBlock(Block blk) { 2638 return new ExtendedBlock(blockPoolId, blk); 2639 } 2640 2641 void setBlockPoolId(String bpid) { 2642 blockPoolId = bpid; 2643 blockManager.setBlockPoolId(blockPoolId); 2644 } 2645 2646 /** 2647 * The client would like to obtain an additional block for the indicated 2648 * filename (which is being written-to). Return an array that consists 2649 * of the block, plus a set of machines. The first on this list should 2650 * be where the client writes data. Subsequent items in the list must 2651 * be provided in the connection to the first datanode. 2652 * 2653 * Make sure the previous blocks have been reported by datanodes and 2654 * are replicated. Will return an empty 2-elt array if we want the 2655 * client to "try again later". 2656 */ 2657 LocatedBlock getAdditionalBlock(String src, long fileId, String clientName, 2658 ExtendedBlock previous, Set<Node> excludedNodes, 2659 List<String> favoredNodes) 2660 throws LeaseExpiredException, NotReplicatedYetException, 2661 QuotaExceededException, SafeModeException, UnresolvedLinkException, 2662 IOException { 2663 long blockSize; 2664 int replication; 2665 DatanodeDescriptor clientNode = null; 2666 2667 if(NameNode.stateChangeLog.isDebugEnabled()) { 2668 NameNode.stateChangeLog.debug("BLOCK* NameSystem.getAdditionalBlock: " 2669 + src + " inodeId " + fileId + " for " + clientName); 2670 } 2671 2672 // Part I. Analyze the state of the file with respect to the input data. 2673 checkOperation(OperationCategory.READ); 2674 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2675 readLock(); 2676 try { 2677 checkOperation(OperationCategory.READ); 2678 src = FSDirectory.resolvePath(src, pathComponents, dir); 2679 LocatedBlock[] onRetryBlock = new LocatedBlock[1]; 2680 final INode[] inodes = analyzeFileState( 2681 src, fileId, clientName, previous, onRetryBlock).getINodes(); 2682 final INodeFile pendingFile = inodes[inodes.length - 1].asFile(); 2683 2684 if (onRetryBlock[0] != null && onRetryBlock[0].getLocations().length > 0) { 2685 // This is a retry. Just return the last block if having locations. 2686 return onRetryBlock[0]; 2687 } 2688 if (pendingFile.getBlocks().length >= maxBlocksPerFile) { 2689 throw new IOException("File has reached the limit on maximum number of" 2690 + " blocks (" + DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY 2691 + "): " + pendingFile.getBlocks().length + " >= " 2692 + maxBlocksPerFile); 2693 } 2694 blockSize = pendingFile.getPreferredBlockSize(); 2695 clientNode = pendingFile.getFileUnderConstructionFeature().getClientNode(); 2696 replication = pendingFile.getFileReplication(); 2697 } finally { 2698 readUnlock(); 2699 } 2700 2701 // choose targets for the new block to be allocated. 2702 final DatanodeStorageInfo targets[] = getBlockManager().chooseTarget( 2703 src, replication, clientNode, excludedNodes, blockSize, favoredNodes); 2704 2705 // Part II. 2706 // Allocate a new block, add it to the INode and the BlocksMap. 2707 Block newBlock = null; 2708 long offset; 2709 checkOperation(OperationCategory.WRITE); 2710 writeLock(); 2711 try { 2712 checkOperation(OperationCategory.WRITE); 2713 // Run the full analysis again, since things could have changed 2714 // while chooseTarget() was executing. 2715 LocatedBlock[] onRetryBlock = new LocatedBlock[1]; 2716 INodesInPath inodesInPath = 2717 analyzeFileState(src, fileId, clientName, previous, onRetryBlock); 2718 final INode[] inodes = inodesInPath.getINodes(); 2719 final INodeFile pendingFile = inodes[inodes.length - 1].asFile(); 2720 2721 if (onRetryBlock[0] != null) { 2722 if (onRetryBlock[0].getLocations().length > 0) { 2723 // This is a retry. Just return the last block if having locations. 2724 return onRetryBlock[0]; 2725 } else { 2726 // add new chosen targets to already allocated block and return 2727 BlockInfo lastBlockInFile = pendingFile.getLastBlock(); 2728 ((BlockInfoUnderConstruction) lastBlockInFile) 2729 .setExpectedLocations(targets); 2730 offset = pendingFile.computeFileSize(); 2731 return makeLocatedBlock(lastBlockInFile, targets, offset); 2732 } 2733 } 2734 2735 // commit the last block and complete it if it has minimum replicas 2736 commitOrCompleteLastBlock(pendingFile, 2737 ExtendedBlock.getLocalBlock(previous)); 2738 2739 // allocate new block, record block locations in INode. 2740 newBlock = createNewBlock(); 2741 saveAllocatedBlock(src, inodesInPath, newBlock, targets); 2742 2743 dir.persistNewBlock(src, pendingFile); 2744 offset = pendingFile.computeFileSize(); 2745 } finally { 2746 writeUnlock(); 2747 } 2748 getEditLog().logSync(); 2749 2750 // Return located block 2751 return makeLocatedBlock(newBlock, targets, offset); 2752 } 2753 2754 INodesInPath analyzeFileState(String src, 2755 long fileId, 2756 String clientName, 2757 ExtendedBlock previous, 2758 LocatedBlock[] onRetryBlock) 2759 throws IOException { 2760 assert hasReadLock(); 2761 2762 checkBlock(previous); 2763 onRetryBlock[0] = null; 2764 checkOperation(OperationCategory.WRITE); 2765 checkNameNodeSafeMode("Cannot add block to " + src); 2766 2767 // have we exceeded the configured limit of fs objects. 2768 checkFsObjectLimit(); 2769 2770 Block previousBlock = ExtendedBlock.getLocalBlock(previous); 2771 final INodesInPath iip = dir.getINodesInPath4Write(src); 2772 final INodeFile pendingFile 2773 = checkLease(src, fileId, clientName, iip.getLastINode()); 2774 BlockInfo lastBlockInFile = pendingFile.getLastBlock(); 2775 if (!Block.matchingIdAndGenStamp(previousBlock, lastBlockInFile)) { 2776 // The block that the client claims is the current last block 2777 // doesn't match up with what we think is the last block. There are 2778 // four possibilities: 2779 // 1) This is the first block allocation of an append() pipeline 2780 // which started appending exactly at a block boundary. 2781 // In this case, the client isn't passed the previous block, 2782 // so it makes the allocateBlock() call with previous=null. 2783 // We can distinguish this since the last block of the file 2784 // will be exactly a full block. 2785 // 2) This is a retry from a client that missed the response of a 2786 // prior getAdditionalBlock() call, perhaps because of a network 2787 // timeout, or because of an HA failover. In that case, we know 2788 // by the fact that the client is re-issuing the RPC that it 2789 // never began to write to the old block. Hence it is safe to 2790 // to return the existing block. 2791 // 3) This is an entirely bogus request/bug -- we should error out 2792 // rather than potentially appending a new block with an empty 2793 // one in the middle, etc 2794 // 4) This is a retry from a client that timed out while 2795 // the prior getAdditionalBlock() is still being processed, 2796 // currently working on chooseTarget(). 2797 // There are no means to distinguish between the first and 2798 // the second attempts in Part I, because the first one hasn't 2799 // changed the namesystem state yet. 2800 // We run this analysis again in Part II where case 4 is impossible. 2801 2802 BlockInfo penultimateBlock = pendingFile.getPenultimateBlock(); 2803 if (previous == null && 2804 lastBlockInFile != null && 2805 lastBlockInFile.getNumBytes() == pendingFile.getPreferredBlockSize() && 2806 lastBlockInFile.isComplete()) { 2807 // Case 1 2808 if (NameNode.stateChangeLog.isDebugEnabled()) { 2809 NameNode.stateChangeLog.debug( 2810 "BLOCK* NameSystem.allocateBlock: handling block allocation" + 2811 " writing to a file with a complete previous block: src=" + 2812 src + " lastBlock=" + lastBlockInFile); 2813 } 2814 } else if (Block.matchingIdAndGenStamp(penultimateBlock, previousBlock)) { 2815 if (lastBlockInFile.getNumBytes() != 0) { 2816 throw new IOException( 2817 "Request looked like a retry to allocate block " + 2818 lastBlockInFile + " but it already contains " + 2819 lastBlockInFile.getNumBytes() + " bytes"); 2820 } 2821 2822 // Case 2 2823 // Return the last block. 2824 NameNode.stateChangeLog.info("BLOCK* allocateBlock: " + 2825 "caught retry for allocation of a new block in " + 2826 src + ". Returning previously allocated block " + lastBlockInFile); 2827 long offset = pendingFile.computeFileSize(); 2828 onRetryBlock[0] = makeLocatedBlock(lastBlockInFile, 2829 ((BlockInfoUnderConstruction)lastBlockInFile).getExpectedStorageLocations(), 2830 offset); 2831 return iip; 2832 } else { 2833 // Case 3 2834 throw new IOException("Cannot allocate block in " + src + ": " + 2835 "passed 'previous' block " + previous + " does not match actual " + 2836 "last block in file " + lastBlockInFile); 2837 } 2838 } 2839 2840 // Check if the penultimate block is minimally replicated 2841 if (!checkFileProgress(pendingFile, false)) { 2842 throw new NotReplicatedYetException("Not replicated yet: " + src); 2843 } 2844 return iip; 2845 } 2846 2847 LocatedBlock makeLocatedBlock(Block blk, DatanodeStorageInfo[] locs, 2848 long offset) throws IOException { 2849 LocatedBlock lBlk = new LocatedBlock( 2850 getExtendedBlock(blk), locs, offset, false); 2851 getBlockManager().setBlockToken( 2852 lBlk, BlockTokenSecretManager.AccessMode.WRITE); 2853 return lBlk; 2854 } 2855 2856 /** @see NameNode#getAdditionalDatanode(String, ExtendedBlock, DatanodeInfo[], DatanodeInfo[], int, String) */ 2857 LocatedBlock getAdditionalDatanode(String src, final ExtendedBlock blk, 2858 final DatanodeInfo[] existings, final String[] storageIDs, 2859 final Set<Node> excludes, 2860 final int numAdditionalNodes, final String clientName 2861 ) throws IOException { 2862 //check if the feature is enabled 2863 dtpReplaceDatanodeOnFailure.checkEnabled(); 2864 2865 final DatanodeDescriptor clientnode; 2866 final long preferredblocksize; 2867 final List<DatanodeStorageInfo> chosen; 2868 checkOperation(OperationCategory.READ); 2869 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2870 readLock(); 2871 try { 2872 checkOperation(OperationCategory.READ); 2873 //check safe mode 2874 checkNameNodeSafeMode("Cannot add datanode; src=" + src + ", blk=" + blk); 2875 src = FSDirectory.resolvePath(src, pathComponents, dir); 2876 2877 //check lease 2878 final INodeFile file = checkLease(src, clientName); 2879 clientnode = file.getFileUnderConstructionFeature().getClientNode(); 2880 preferredblocksize = file.getPreferredBlockSize(); 2881 2882 //find datanode storages 2883 final DatanodeManager dm = blockManager.getDatanodeManager(); 2884 chosen = Arrays.asList(dm.getDatanodeStorageInfos(existings, storageIDs)); 2885 } finally { 2886 readUnlock(); 2887 } 2888 2889 // choose new datanodes. 2890 final DatanodeStorageInfo[] targets = blockManager.getBlockPlacementPolicy( 2891 ).chooseTarget(src, numAdditionalNodes, clientnode, chosen, true, 2892 // TODO: get storage type from the file 2893 excludes, preferredblocksize, StorageType.DEFAULT); 2894 final LocatedBlock lb = new LocatedBlock(blk, targets); 2895 blockManager.setBlockToken(lb, AccessMode.COPY); 2896 return lb; 2897 } 2898 2899 /** 2900 * The client would like to let go of the given block 2901 */ 2902 boolean abandonBlock(ExtendedBlock b, String src, String holder) 2903 throws LeaseExpiredException, FileNotFoundException, 2904 UnresolvedLinkException, IOException { 2905 if(NameNode.stateChangeLog.isDebugEnabled()) { 2906 NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " + b 2907 + "of file " + src); 2908 } 2909 checkOperation(OperationCategory.WRITE); 2910 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2911 writeLock(); 2912 try { 2913 checkOperation(OperationCategory.WRITE); 2914 checkNameNodeSafeMode("Cannot abandon block " + b + " for fle" + src); 2915 src = FSDirectory.resolvePath(src, pathComponents, dir); 2916 2917 // 2918 // Remove the block from the pending creates list 2919 // 2920 INodeFile file = checkLease(src, holder); 2921 boolean removed = dir.removeBlock(src, file, 2922 ExtendedBlock.getLocalBlock(b)); 2923 if (!removed) { 2924 return true; 2925 } 2926 if(NameNode.stateChangeLog.isDebugEnabled()) { 2927 NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " 2928 + b + " is removed from pendingCreates"); 2929 } 2930 dir.persistBlocks(src, file, false); 2931 } finally { 2932 writeUnlock(); 2933 } 2934 getEditLog().logSync(); 2935 2936 return true; 2937 } 2938 2939 /** make sure that we still have the lease on this file. */ 2940 private INodeFile checkLease(String src, String holder) 2941 throws LeaseExpiredException, UnresolvedLinkException, 2942 FileNotFoundException { 2943 return checkLease(src, INodeId.GRANDFATHER_INODE_ID, holder, 2944 dir.getINode(src)); 2945 } 2946 2947 private INodeFile checkLease(String src, long fileId, String holder, 2948 INode inode) throws LeaseExpiredException, FileNotFoundException { 2949 assert hasReadLock(); 2950 if (inode == null || !inode.isFile()) { 2951 Lease lease = leaseManager.getLease(holder); 2952 throw new LeaseExpiredException( 2953 "No lease on " + src + ": File does not exist. " 2954 + (lease != null ? lease.toString() 2955 : "Holder " + holder + " does not have any open files.")); 2956 } 2957 final INodeFile file = inode.asFile(); 2958 if (!file.isUnderConstruction()) { 2959 Lease lease = leaseManager.getLease(holder); 2960 throw new LeaseExpiredException( 2961 "No lease on " + src + ": File is not open for writing. " 2962 + (lease != null ? lease.toString() 2963 : "Holder " + holder + " does not have any open files.")); 2964 } 2965 // No further modification is allowed on a deleted file. 2966 // A file is considered deleted, if it has no parent or is marked 2967 // as deleted in the snapshot feature. 2968 if (file.getParent() == null || (file.isWithSnapshot() && 2969 file.getFileWithSnapshotFeature().isCurrentFileDeleted())) { 2970 throw new FileNotFoundException(src); 2971 } 2972 String clientName = file.getFileUnderConstructionFeature().getClientName(); 2973 if (holder != null && !clientName.equals(holder)) { 2974 throw new LeaseExpiredException("Lease mismatch on " + src + " owned by " 2975 + clientName + " but is accessed by " + holder); 2976 } 2977 INodeId.checkId(fileId, file); 2978 return file; 2979 } 2980 2981 /** 2982 * Complete in-progress write to the given file. 2983 * @return true if successful, false if the client should continue to retry 2984 * (e.g if not all blocks have reached minimum replication yet) 2985 * @throws IOException on error (eg lease mismatch, file not open, file deleted) 2986 */ 2987 boolean completeFile(String src, String holder, 2988 ExtendedBlock last, long fileId) 2989 throws SafeModeException, UnresolvedLinkException, IOException { 2990 if (NameNode.stateChangeLog.isDebugEnabled()) { 2991 NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " + 2992 src + " for " + holder); 2993 } 2994 checkBlock(last); 2995 boolean success = false; 2996 checkOperation(OperationCategory.WRITE); 2997 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2998 writeLock(); 2999 try { 3000 checkOperation(OperationCategory.WRITE); 3001 checkNameNodeSafeMode("Cannot complete file " + src); 3002 src = FSDirectory.resolvePath(src, pathComponents, dir); 3003 success = completeFileInternal(src, holder, 3004 ExtendedBlock.getLocalBlock(last), fileId); 3005 } finally { 3006 writeUnlock(); 3007 } 3008 getEditLog().logSync(); 3009 if (success) { 3010 NameNode.stateChangeLog.info("DIR* completeFile: " + src 3011 + " is closed by " + holder); 3012 } 3013 return success; 3014 } 3015 3016 private boolean completeFileInternal(String src, 3017 String holder, Block last, long fileId) throws SafeModeException, 3018 UnresolvedLinkException, IOException { 3019 assert hasWriteLock(); 3020 final INodesInPath iip = dir.getLastINodeInPath(src); 3021 final INodeFile pendingFile; 3022 try { 3023 pendingFile = checkLease(src, fileId, holder, iip.getINode(0)); 3024 } catch (LeaseExpiredException lee) { 3025 final INode inode = dir.getINode(src); 3026 if (inode != null 3027 && inode.isFile() 3028 && !inode.asFile().isUnderConstruction()) { 3029 // This could be a retry RPC - i.e the client tried to close 3030 // the file, but missed the RPC response. Thus, it is trying 3031 // again to close the file. If the file still exists and 3032 // the client's view of the last block matches the actual 3033 // last block, then we'll treat it as a successful close. 3034 // See HDFS-3031. 3035 final Block realLastBlock = inode.asFile().getLastBlock(); 3036 if (Block.matchingIdAndGenStamp(last, realLastBlock)) { 3037 NameNode.stateChangeLog.info("DIR* completeFile: " + 3038 "request from " + holder + " to complete " + src + 3039 " which is already closed. But, it appears to be an RPC " + 3040 "retry. Returning success"); 3041 return true; 3042 } 3043 } 3044 throw lee; 3045 } 3046 // Check the state of the penultimate block. It should be completed 3047 // before attempting to complete the last one. 3048 if (!checkFileProgress(pendingFile, false)) { 3049 return false; 3050 } 3051 3052 // commit the last block and complete it if it has minimum replicas 3053 commitOrCompleteLastBlock(pendingFile, last); 3054 3055 if (!checkFileProgress(pendingFile, true)) { 3056 return false; 3057 } 3058 3059 finalizeINodeFileUnderConstruction(src, pendingFile, 3060 iip.getLatestSnapshotId()); 3061 return true; 3062 } 3063 3064 /** 3065 * Save allocated block at the given pending filename 3066 * 3067 * @param src path to the file 3068 * @param inodesInPath representing each of the components of src. 3069 * The last INode is the INode for the file. 3070 * @throws QuotaExceededException If addition of block exceeds space quota 3071 */ 3072 BlockInfo saveAllocatedBlock(String src, INodesInPath inodes, 3073 Block newBlock, DatanodeStorageInfo[] targets) 3074 throws IOException { 3075 assert hasWriteLock(); 3076 BlockInfo b = dir.addBlock(src, inodes, newBlock, targets); 3077 NameNode.stateChangeLog.info("BLOCK* allocateBlock: " + src + ". " 3078 + getBlockPoolId() + " " + b); 3079 DatanodeStorageInfo.incrementBlocksScheduled(targets); 3080 return b; 3081 } 3082 3083 /** 3084 * Create new block with a unique block id and a new generation stamp. 3085 */ 3086 Block createNewBlock() throws IOException { 3087 assert hasWriteLock(); 3088 Block b = new Block(nextBlockId(), 0, 0); 3089 // Increment the generation stamp for every new block. 3090 b.setGenerationStamp(nextGenerationStamp(false)); 3091 return b; 3092 } 3093 3094 /** 3095 * Check that the indicated file's blocks are present and 3096 * replicated. If not, return false. If checkall is true, then check 3097 * all blocks, otherwise check only penultimate block. 3098 */ 3099 boolean checkFileProgress(INodeFile v, boolean checkall) { 3100 readLock(); 3101 try { 3102 if (checkall) { 3103 // 3104 // check all blocks of the file. 3105 // 3106 for (BlockInfo block: v.getBlocks()) { 3107 if (!block.isComplete()) { 3108 LOG.info("BLOCK* checkFileProgress: " + block 3109 + " has not reached minimal replication " 3110 + blockManager.minReplication); 3111 return false; 3112 } 3113 } 3114 } else { 3115 // 3116 // check the penultimate block of this file 3117 // 3118 BlockInfo b = v.getPenultimateBlock(); 3119 if (b != null && !b.isComplete()) { 3120 LOG.warn("BLOCK* checkFileProgress: " + b 3121 + " has not reached minimal replication " 3122 + blockManager.minReplication); 3123 return false; 3124 } 3125 } 3126 return true; 3127 } finally { 3128 readUnlock(); 3129 } 3130 } 3131 3132 //////////////////////////////////////////////////////////////// 3133 // Here's how to handle block-copy failure during client write: 3134 // -- As usual, the client's write should result in a streaming 3135 // backup write to a k-machine sequence. 3136 // -- If one of the backup machines fails, no worries. Fail silently. 3137 // -- Before client is allowed to close and finalize file, make sure 3138 // that the blocks are backed up. Namenode may have to issue specific backup 3139 // commands to make up for earlier datanode failures. Once all copies 3140 // are made, edit namespace and return to client. 3141 //////////////////////////////////////////////////////////////// 3142 3143 /** 3144 * Change the indicated filename. 3145 * @deprecated Use {@link #renameTo(String, String, Options.Rename...)} instead. 3146 */ 3147 @Deprecated 3148 boolean renameTo(String src, String dst) 3149 throws IOException, UnresolvedLinkException { 3150 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 3151 if (cacheEntry != null && cacheEntry.isSuccess()) { 3152 return true; // Return previous response 3153 } 3154 boolean ret = false; 3155 try { 3156 ret = renameToInt(src, dst, cacheEntry != null); 3157 } catch (AccessControlException e) { 3158 logAuditEvent(false, "rename", src, dst, null); 3159 throw e; 3160 } finally { 3161 RetryCache.setState(cacheEntry, ret); 3162 } 3163 return ret; 3164 } 3165 3166 private boolean renameToInt(String src, String dst, boolean logRetryCache) 3167 throws IOException, UnresolvedLinkException { 3168 if (NameNode.stateChangeLog.isDebugEnabled()) { 3169 NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: " + src + 3170 " to " + dst); 3171 } 3172 if (!DFSUtil.isValidName(dst)) { 3173 throw new IOException("Invalid name: " + dst); 3174 } 3175 FSPermissionChecker pc = getPermissionChecker(); 3176 checkOperation(OperationCategory.WRITE); 3177 byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src); 3178 byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst); 3179 boolean status = false; 3180 HdfsFileStatus resultingStat = null; 3181 writeLock(); 3182 try { 3183 checkOperation(OperationCategory.WRITE); 3184 checkNameNodeSafeMode("Cannot rename " + src); 3185 src = FSDirectory.resolvePath(src, srcComponents, dir); 3186 dst = FSDirectory.resolvePath(dst, dstComponents, dir); 3187 checkOperation(OperationCategory.WRITE); 3188 status = renameToInternal(pc, src, dst, logRetryCache); 3189 if (status) { 3190 resultingStat = getAuditFileInfo(dst, false); 3191 } 3192 } finally { 3193 writeUnlock(); 3194 } 3195 getEditLog().logSync(); 3196 if (status) { 3197 logAuditEvent(true, "rename", src, dst, resultingStat); 3198 } 3199 return status; 3200 } 3201 3202 /** @deprecated See {@link #renameTo(String, String)} */ 3203 @Deprecated 3204 private boolean renameToInternal(FSPermissionChecker pc, String src, 3205 String dst, boolean logRetryCache) throws IOException, 3206 UnresolvedLinkException { 3207 assert hasWriteLock(); 3208 if (isPermissionEnabled) { 3209 //We should not be doing this. This is move() not renameTo(). 3210 //but for now, 3211 //NOTE: yes, this is bad! it's assuming much lower level behavior 3212 // of rewriting the dst 3213 String actualdst = dir.isDir(dst)? 3214 dst + Path.SEPARATOR + new Path(src).getName(): dst; 3215 // Rename does not operates on link targets 3216 // Do not resolveLink when checking permissions of src and dst 3217 // Check write access to parent of src 3218 checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false); 3219 // Check write access to ancestor of dst 3220 checkPermission(pc, actualdst, false, FsAction.WRITE, null, null, null, 3221 false); 3222 } 3223 3224 if (dir.renameTo(src, dst, logRetryCache)) { 3225 return true; 3226 } 3227 return false; 3228 } 3229 3230 3231 /** Rename src to dst */ 3232 void renameTo(String src, String dst, Options.Rename... options) 3233 throws IOException, UnresolvedLinkException { 3234 if (NameNode.stateChangeLog.isDebugEnabled()) { 3235 NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: with options - " 3236 + src + " to " + dst); 3237 } 3238 if (!DFSUtil.isValidName(dst)) { 3239 throw new InvalidPathException("Invalid name: " + dst); 3240 } 3241 final FSPermissionChecker pc = getPermissionChecker(); 3242 3243 checkOperation(OperationCategory.WRITE); 3244 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 3245 if (cacheEntry != null && cacheEntry.isSuccess()) { 3246 return; // Return previous response 3247 } 3248 byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src); 3249 byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst); 3250 HdfsFileStatus resultingStat = null; 3251 boolean success = false; 3252 writeLock(); 3253 try { 3254 checkOperation(OperationCategory.WRITE); 3255 checkNameNodeSafeMode("Cannot rename " + src); 3256 src = FSDirectory.resolvePath(src, srcComponents, dir); 3257 dst = FSDirectory.resolvePath(dst, dstComponents, dir); 3258 renameToInternal(pc, src, dst, cacheEntry != null, options); 3259 resultingStat = getAuditFileInfo(dst, false); 3260 success = true; 3261 } finally { 3262 writeUnlock(); 3263 RetryCache.setState(cacheEntry, success); 3264 } 3265 getEditLog().logSync(); 3266 if (resultingStat != null) { 3267 StringBuilder cmd = new StringBuilder("rename options="); 3268 for (Rename option : options) { 3269 cmd.append(option.value()).append(" "); 3270 } 3271 logAuditEvent(true, cmd.toString(), src, dst, resultingStat); 3272 } 3273 } 3274 3275 private void renameToInternal(FSPermissionChecker pc, String src, String dst, 3276 boolean logRetryCache, Options.Rename... options) throws IOException { 3277 assert hasWriteLock(); 3278 if (isPermissionEnabled) { 3279 // Rename does not operates on link targets 3280 // Do not resolveLink when checking permissions of src and dst 3281 // Check write access to parent of src 3282 checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false); 3283 // Check write access to ancestor of dst 3284 checkPermission(pc, dst, false, FsAction.WRITE, null, null, null, false); 3285 } 3286 3287 dir.renameTo(src, dst, logRetryCache, options); 3288 } 3289 3290 /** 3291 * Remove the indicated file from namespace. 3292 * 3293 * @see ClientProtocol#delete(String, boolean) for detailed description and 3294 * description of exceptions 3295 */ 3296 boolean delete(String src, boolean recursive) 3297 throws AccessControlException, SafeModeException, 3298 UnresolvedLinkException, IOException { 3299 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 3300 if (cacheEntry != null && cacheEntry.isSuccess()) { 3301 return true; // Return previous response 3302 } 3303 boolean ret = false; 3304 try { 3305 ret = deleteInt(src, recursive, cacheEntry != null); 3306 } catch (AccessControlException e) { 3307 logAuditEvent(false, "delete", src); 3308 throw e; 3309 } finally { 3310 RetryCache.setState(cacheEntry, ret); 3311 } 3312 return ret; 3313 } 3314 3315 private boolean deleteInt(String src, boolean recursive, boolean logRetryCache) 3316 throws AccessControlException, SafeModeException, 3317 UnresolvedLinkException, IOException { 3318 if (NameNode.stateChangeLog.isDebugEnabled()) { 3319 NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + src); 3320 } 3321 boolean status = deleteInternal(src, recursive, true, logRetryCache); 3322 if (status) { 3323 logAuditEvent(true, "delete", src); 3324 } 3325 return status; 3326 } 3327 3328 private FSPermissionChecker getPermissionChecker() 3329 throws AccessControlException { 3330 try { 3331 return new FSPermissionChecker(fsOwnerShortUserName, supergroup, getRemoteUser()); 3332 } catch (IOException ioe) { 3333 throw new AccessControlException(ioe); 3334 } 3335 } 3336 3337 /** 3338 * Remove a file/directory from the namespace. 3339 * <p> 3340 * For large directories, deletion is incremental. The blocks under 3341 * the directory are collected and deleted a small number at a time holding 3342 * the {@link FSNamesystem} lock. 3343 * <p> 3344 * For small directory or file the deletion is done in one shot. 3345 * 3346 * @see ClientProtocol#delete(String, boolean) for description of exceptions 3347 */ 3348 private boolean deleteInternal(String src, boolean recursive, 3349 boolean enforcePermission, boolean logRetryCache) 3350 throws AccessControlException, SafeModeException, UnresolvedLinkException, 3351 IOException { 3352 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); 3353 List<INode> removedINodes = new ChunkedArrayList<INode>(); 3354 FSPermissionChecker pc = getPermissionChecker(); 3355 checkOperation(OperationCategory.WRITE); 3356 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3357 boolean ret = false; 3358 writeLock(); 3359 try { 3360 checkOperation(OperationCategory.WRITE); 3361 checkNameNodeSafeMode("Cannot delete " + src); 3362 src = FSDirectory.resolvePath(src, pathComponents, dir); 3363 if (!recursive && dir.isNonEmptyDirectory(src)) { 3364 throw new IOException(src + " is non empty"); 3365 } 3366 if (enforcePermission && isPermissionEnabled) { 3367 checkPermission(pc, src, false, null, FsAction.WRITE, null, 3368 FsAction.ALL, false); 3369 } 3370 // Unlink the target directory from directory tree 3371 if (!dir.delete(src, collectedBlocks, removedINodes, logRetryCache)) { 3372 return false; 3373 } 3374 ret = true; 3375 } finally { 3376 writeUnlock(); 3377 } 3378 getEditLog().logSync(); 3379 removeBlocks(collectedBlocks); // Incremental deletion of blocks 3380 collectedBlocks.clear(); 3381 3382 dir.writeLock(); 3383 try { 3384 dir.removeFromInodeMap(removedINodes); 3385 } finally { 3386 dir.writeUnlock(); 3387 } 3388 removedINodes.clear(); 3389 if (NameNode.stateChangeLog.isDebugEnabled()) { 3390 NameNode.stateChangeLog.debug("DIR* Namesystem.delete: " 3391 + src +" is removed"); 3392 } 3393 return ret; 3394 } 3395 3396 /** 3397 * From the given list, incrementally remove the blocks from blockManager 3398 * Writelock is dropped and reacquired every BLOCK_DELETION_INCREMENT to 3399 * ensure that other waiters on the lock can get in. See HDFS-2938 3400 * 3401 * @param blocks 3402 * An instance of {@link BlocksMapUpdateInfo} which contains a list 3403 * of blocks that need to be removed from blocksMap 3404 */ 3405 void removeBlocks(BlocksMapUpdateInfo blocks) { 3406 List<Block> toDeleteList = blocks.getToDeleteList(); 3407 Iterator<Block> iter = toDeleteList.iterator(); 3408 while (iter.hasNext()) { 3409 writeLock(); 3410 try { 3411 for (int i = 0; i < BLOCK_DELETION_INCREMENT && iter.hasNext(); i++) { 3412 blockManager.removeBlock(iter.next()); 3413 } 3414 } finally { 3415 writeUnlock(); 3416 } 3417 } 3418 } 3419 3420 /** 3421 * Remove leases, inodes and blocks related to a given path 3422 * @param src The given path 3423 * @param blocks Containing the list of blocks to be deleted from blocksMap 3424 * @param removedINodes Containing the list of inodes to be removed from 3425 * inodesMap 3426 */ 3427 void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks, 3428 List<INode> removedINodes) { 3429 assert hasWriteLock(); 3430 leaseManager.removeLeaseWithPrefixPath(src); 3431 // remove inodes from inodesMap 3432 if (removedINodes != null) { 3433 dir.removeFromInodeMap(removedINodes); 3434 removedINodes.clear(); 3435 } 3436 if (blocks == null) { 3437 return; 3438 } 3439 3440 removeBlocksAndUpdateSafemodeTotal(blocks); 3441 } 3442 3443 /** 3444 * Removes the blocks from blocksmap and updates the safemode blocks total 3445 * 3446 * @param blocks 3447 * An instance of {@link BlocksMapUpdateInfo} which contains a list 3448 * of blocks that need to be removed from blocksMap 3449 */ 3450 void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) { 3451 assert hasWriteLock(); 3452 // In the case that we are a Standby tailing edits from the 3453 // active while in safe-mode, we need to track the total number 3454 // of blocks and safe blocks in the system. 3455 boolean trackBlockCounts = isSafeModeTrackingBlocks(); 3456 int numRemovedComplete = 0, numRemovedSafe = 0; 3457 3458 for (Block b : blocks.getToDeleteList()) { 3459 if (trackBlockCounts) { 3460 BlockInfo bi = getStoredBlock(b); 3461 if (bi.isComplete()) { 3462 numRemovedComplete++; 3463 if (bi.numNodes() >= blockManager.minReplication) { 3464 numRemovedSafe++; 3465 } 3466 } 3467 } 3468 blockManager.removeBlock(b); 3469 } 3470 if (trackBlockCounts) { 3471 if (LOG.isDebugEnabled()) { 3472 LOG.debug("Adjusting safe-mode totals for deletion." 3473 + "decreasing safeBlocks by " + numRemovedSafe 3474 + ", totalBlocks by " + numRemovedComplete); 3475 } 3476 adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete); 3477 } 3478 } 3479 3480 /** 3481 * @see SafeModeInfo#shouldIncrementallyTrackBlocks 3482 */ 3483 private boolean isSafeModeTrackingBlocks() { 3484 if (!haEnabled) { 3485 // Never track blocks incrementally in non-HA code. 3486 return false; 3487 } 3488 SafeModeInfo sm = this.safeMode; 3489 return sm != null && sm.shouldIncrementallyTrackBlocks(); 3490 } 3491 3492 /** 3493 * Get the file info for a specific file. 3494 * 3495 * @param src The string representation of the path to the file 3496 * @param resolveLink whether to throw UnresolvedLinkException 3497 * if src refers to a symlink 3498 * 3499 * @throws AccessControlException if access is denied 3500 * @throws UnresolvedLinkException if a symlink is encountered. 3501 * 3502 * @return object containing information regarding the file 3503 * or null if file not found 3504 * @throws StandbyException 3505 */ 3506 HdfsFileStatus getFileInfo(String src, boolean resolveLink) 3507 throws AccessControlException, UnresolvedLinkException, 3508 StandbyException, IOException { 3509 if (!DFSUtil.isValidName(src)) { 3510 throw new InvalidPathException("Invalid file name: " + src); 3511 } 3512 HdfsFileStatus stat = null; 3513 FSPermissionChecker pc = getPermissionChecker(); 3514 checkOperation(OperationCategory.READ); 3515 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3516 readLock(); 3517 try { 3518 checkOperation(OperationCategory.READ); 3519 src = FSDirectory.resolvePath(src, pathComponents, dir); 3520 if (isPermissionEnabled) { 3521 checkPermission(pc, src, false, null, null, null, null, resolveLink); 3522 } 3523 stat = dir.getFileInfo(src, resolveLink); 3524 } catch (AccessControlException e) { 3525 logAuditEvent(false, "getfileinfo", src); 3526 throw e; 3527 } finally { 3528 readUnlock(); 3529 } 3530 logAuditEvent(true, "getfileinfo", src); 3531 return stat; 3532 } 3533 3534 /** 3535 * Returns true if the file is closed 3536 */ 3537 boolean isFileClosed(String src) 3538 throws AccessControlException, UnresolvedLinkException, 3539 StandbyException, IOException { 3540 FSPermissionChecker pc = getPermissionChecker(); 3541 checkOperation(OperationCategory.READ); 3542 readLock(); 3543 try { 3544 checkOperation(OperationCategory.READ); 3545 if (isPermissionEnabled) { 3546 checkTraverse(pc, src); 3547 } 3548 return !INodeFile.valueOf(dir.getINode(src), src).isUnderConstruction(); 3549 } catch (AccessControlException e) { 3550 if (isAuditEnabled() && isExternalInvocation()) { 3551 logAuditEvent(false, "isFileClosed", src); 3552 } 3553 throw e; 3554 } finally { 3555 readUnlock(); 3556 } 3557 } 3558 3559 /** 3560 * Create all the necessary directories 3561 */ 3562 boolean mkdirs(String src, PermissionStatus permissions, 3563 boolean createParent) throws IOException, UnresolvedLinkException { 3564 boolean ret = false; 3565 try { 3566 ret = mkdirsInt(src, permissions, createParent); 3567 } catch (AccessControlException e) { 3568 logAuditEvent(false, "mkdirs", src); 3569 throw e; 3570 } 3571 return ret; 3572 } 3573 3574 private boolean mkdirsInt(String src, PermissionStatus permissions, 3575 boolean createParent) throws IOException, UnresolvedLinkException { 3576 if(NameNode.stateChangeLog.isDebugEnabled()) { 3577 NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src); 3578 } 3579 if (!DFSUtil.isValidName(src)) { 3580 throw new InvalidPathException(src); 3581 } 3582 FSPermissionChecker pc = getPermissionChecker(); 3583 checkOperation(OperationCategory.WRITE); 3584 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3585 HdfsFileStatus resultingStat = null; 3586 boolean status = false; 3587 writeLock(); 3588 try { 3589 checkOperation(OperationCategory.WRITE); 3590 checkNameNodeSafeMode("Cannot create directory " + src); 3591 src = FSDirectory.resolvePath(src, pathComponents, dir); 3592 status = mkdirsInternal(pc, src, permissions, createParent); 3593 if (status) { 3594 resultingStat = dir.getFileInfo(src, false); 3595 } 3596 } finally { 3597 writeUnlock(); 3598 } 3599 getEditLog().logSync(); 3600 if (status) { 3601 logAuditEvent(true, "mkdirs", src, null, resultingStat); 3602 } 3603 return status; 3604 } 3605 3606 /** 3607 * Create all the necessary directories 3608 */ 3609 private boolean mkdirsInternal(FSPermissionChecker pc, String src, 3610 PermissionStatus permissions, boolean createParent) 3611 throws IOException, UnresolvedLinkException { 3612 assert hasWriteLock(); 3613 if (isPermissionEnabled) { 3614 checkTraverse(pc, src); 3615 } 3616 if (dir.isDirMutable(src)) { 3617 // all the users of mkdirs() are used to expect 'true' even if 3618 // a new directory is not created. 3619 return true; 3620 } 3621 if (isPermissionEnabled) { 3622 checkAncestorAccess(pc, src, FsAction.WRITE); 3623 } 3624 if (!createParent) { 3625 verifyParentDir(src); 3626 } 3627 3628 // validate that we have enough inodes. This is, at best, a 3629 // heuristic because the mkdirs() operation might need to 3630 // create multiple inodes. 3631 checkFsObjectLimit(); 3632 3633 if (!dir.mkdirs(src, permissions, false, now())) { 3634 throw new IOException("Failed to create directory: " + src); 3635 } 3636 return true; 3637 } 3638 3639 /** 3640 * Get the content summary for a specific file/dir. 3641 * 3642 * @param src The string representation of the path to the file 3643 * 3644 * @throws AccessControlException if access is denied 3645 * @throws UnresolvedLinkException if a symlink is encountered. 3646 * @throws FileNotFoundException if no file exists 3647 * @throws StandbyException 3648 * @throws IOException for issues with writing to the audit log 3649 * 3650 * @return object containing information regarding the file 3651 * or null if file not found 3652 */ 3653 ContentSummary getContentSummary(String src) throws IOException { 3654 FSPermissionChecker pc = getPermissionChecker(); 3655 checkOperation(OperationCategory.READ); 3656 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3657 readLock(); 3658 boolean success = true; 3659 try { 3660 checkOperation(OperationCategory.READ); 3661 src = FSDirectory.resolvePath(src, pathComponents, dir); 3662 if (isPermissionEnabled) { 3663 checkPermission(pc, src, false, null, null, null, FsAction.READ_EXECUTE); 3664 } 3665 return dir.getContentSummary(src); 3666 3667 } catch (AccessControlException ace) { 3668 success = false; 3669 throw ace; 3670 } finally { 3671 readUnlock(); 3672 logAuditEvent(success, "contentSummary", src); 3673 } 3674 } 3675 3676 /** 3677 * Set the namespace quota and diskspace quota for a directory. 3678 * See {@link ClientProtocol#setQuota(String, long, long)} for the 3679 * contract. 3680 * 3681 * Note: This does not support ".inodes" relative path. 3682 */ 3683 void setQuota(String path, long nsQuota, long dsQuota) 3684 throws IOException, UnresolvedLinkException { 3685 checkSuperuserPrivilege(); 3686 checkOperation(OperationCategory.WRITE); 3687 writeLock(); 3688 try { 3689 checkOperation(OperationCategory.WRITE); 3690 checkNameNodeSafeMode("Cannot set quota on " + path); 3691 dir.setQuota(path, nsQuota, dsQuota); 3692 } finally { 3693 writeUnlock(); 3694 } 3695 getEditLog().logSync(); 3696 } 3697 3698 /** Persist all metadata about this file. 3699 * @param src The string representation of the path 3700 * @param clientName The string representation of the client 3701 * @param lastBlockLength The length of the last block 3702 * under construction reported from client. 3703 * @throws IOException if path does not exist 3704 */ 3705 void fsync(String src, String clientName, long lastBlockLength) 3706 throws IOException, UnresolvedLinkException { 3707 NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName); 3708 checkOperation(OperationCategory.WRITE); 3709 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3710 writeLock(); 3711 try { 3712 checkOperation(OperationCategory.WRITE); 3713 checkNameNodeSafeMode("Cannot fsync file " + src); 3714 src = FSDirectory.resolvePath(src, pathComponents, dir); 3715 INodeFile pendingFile = checkLease(src, clientName); 3716 if (lastBlockLength > 0) { 3717 pendingFile.getFileUnderConstructionFeature().updateLengthOfLastBlock( 3718 pendingFile, lastBlockLength); 3719 } 3720 dir.persistBlocks(src, pendingFile, false); 3721 } finally { 3722 writeUnlock(); 3723 } 3724 getEditLog().logSync(); 3725 } 3726 3727 /** 3728 * Move a file that is being written to be immutable. 3729 * @param src The filename 3730 * @param lease The lease for the client creating the file 3731 * @param recoveryLeaseHolder reassign lease to this holder if the last block 3732 * needs recovery; keep current holder if null. 3733 * @throws AlreadyBeingCreatedException if file is waiting to achieve minimal 3734 * replication;<br> 3735 * RecoveryInProgressException if lease recovery is in progress.<br> 3736 * IOException in case of an error. 3737 * @return true if file has been successfully finalized and closed or 3738 * false if block recovery has been initiated. Since the lease owner 3739 * has been changed and logged, caller should call logSync(). 3740 */ 3741 boolean internalReleaseLease(Lease lease, String src, 3742 String recoveryLeaseHolder) throws AlreadyBeingCreatedException, 3743 IOException, UnresolvedLinkException { 3744 LOG.info("Recovering " + lease + ", src=" + src); 3745 assert !isInSafeMode(); 3746 assert hasWriteLock(); 3747 3748 final INodesInPath iip = dir.getLastINodeInPath(src); 3749 final INodeFile pendingFile = iip.getINode(0).asFile(); 3750 int nrBlocks = pendingFile.numBlocks(); 3751 BlockInfo[] blocks = pendingFile.getBlocks(); 3752 3753 int nrCompleteBlocks; 3754 BlockInfo curBlock = null; 3755 for(nrCompleteBlocks = 0; nrCompleteBlocks < nrBlocks; nrCompleteBlocks++) { 3756 curBlock = blocks[nrCompleteBlocks]; 3757 if(!curBlock.isComplete()) 3758 break; 3759 assert blockManager.checkMinReplication(curBlock) : 3760 "A COMPLETE block is not minimally replicated in " + src; 3761 } 3762 3763 // If there are no incomplete blocks associated with this file, 3764 // then reap lease immediately and close the file. 3765 if(nrCompleteBlocks == nrBlocks) { 3766 finalizeINodeFileUnderConstruction(src, pendingFile, 3767 iip.getLatestSnapshotId()); 3768 NameNode.stateChangeLog.warn("BLOCK*" 3769 + " internalReleaseLease: All existing blocks are COMPLETE," 3770 + " lease removed, file closed."); 3771 return true; // closed! 3772 } 3773 3774 // Only the last and the penultimate blocks may be in non COMPLETE state. 3775 // If the penultimate block is not COMPLETE, then it must be COMMITTED. 3776 if(nrCompleteBlocks < nrBlocks - 2 || 3777 nrCompleteBlocks == nrBlocks - 2 && 3778 curBlock != null && 3779 curBlock.getBlockUCState() != BlockUCState.COMMITTED) { 3780 final String message = "DIR* NameSystem.internalReleaseLease: " 3781 + "attempt to release a create lock on " 3782 + src + " but file is already closed."; 3783 NameNode.stateChangeLog.warn(message); 3784 throw new IOException(message); 3785 } 3786 3787 // The last block is not COMPLETE, and 3788 // that the penultimate block if exists is either COMPLETE or COMMITTED 3789 final BlockInfo lastBlock = pendingFile.getLastBlock(); 3790 BlockUCState lastBlockState = lastBlock.getBlockUCState(); 3791 BlockInfo penultimateBlock = pendingFile.getPenultimateBlock(); 3792 boolean penultimateBlockMinReplication; 3793 BlockUCState penultimateBlockState; 3794 if (penultimateBlock == null) { 3795 penultimateBlockState = BlockUCState.COMPLETE; 3796 // If penultimate block doesn't exist then its minReplication is met 3797 penultimateBlockMinReplication = true; 3798 } else { 3799 penultimateBlockState = BlockUCState.COMMITTED; 3800 penultimateBlockMinReplication = 3801 blockManager.checkMinReplication(penultimateBlock); 3802 } 3803 assert penultimateBlockState == BlockUCState.COMPLETE || 3804 penultimateBlockState == BlockUCState.COMMITTED : 3805 "Unexpected state of penultimate block in " + src; 3806 3807 switch(lastBlockState) { 3808 case COMPLETE: 3809 assert false : "Already checked that the last block is incomplete"; 3810 break; 3811 case COMMITTED: 3812 // Close file if committed blocks are minimally replicated 3813 if(penultimateBlockMinReplication && 3814 blockManager.checkMinReplication(lastBlock)) { 3815 finalizeINodeFileUnderConstruction(src, pendingFile, 3816 iip.getLatestSnapshotId()); 3817 NameNode.stateChangeLog.warn("BLOCK*" 3818 + " internalReleaseLease: Committed blocks are minimally replicated," 3819 + " lease removed, file closed."); 3820 return true; // closed! 3821 } 3822 // Cannot close file right now, since some blocks 3823 // are not yet minimally replicated. 3824 // This may potentially cause infinite loop in lease recovery 3825 // if there are no valid replicas on data-nodes. 3826 String message = "DIR* NameSystem.internalReleaseLease: " + 3827 "Failed to release lease for file " + src + 3828 ". Committed blocks are waiting to be minimally replicated." + 3829 " Try again later."; 3830 NameNode.stateChangeLog.warn(message); 3831 throw new AlreadyBeingCreatedException(message); 3832 case UNDER_CONSTRUCTION: 3833 case UNDER_RECOVERY: 3834 final BlockInfoUnderConstruction uc = (BlockInfoUnderConstruction)lastBlock; 3835 // setup the last block locations from the blockManager if not known 3836 if (uc.getNumExpectedLocations() == 0) { 3837 uc.setExpectedLocations(blockManager.getStorages(lastBlock)); 3838 } 3839 3840 if (uc.getNumExpectedLocations() == 0 && uc.getNumBytes() == 0) { 3841 // There is no datanode reported to this block. 3842 // may be client have crashed before writing data to pipeline. 3843 // This blocks doesn't need any recovery. 3844 // We can remove this block and close the file. 3845 pendingFile.removeLastBlock(lastBlock); 3846 finalizeINodeFileUnderConstruction(src, pendingFile, 3847 iip.getLatestSnapshotId()); 3848 NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: " 3849 + "Removed empty last block and closed file."); 3850 return true; 3851 } 3852 // start recovery of the last block for this file 3853 long blockRecoveryId = nextGenerationStamp(isLegacyBlock(uc)); 3854 lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile); 3855 uc.initializeBlockRecovery(blockRecoveryId); 3856 leaseManager.renewLease(lease); 3857 // Cannot close file right now, since the last block requires recovery. 3858 // This may potentially cause infinite loop in lease recovery 3859 // if there are no valid replicas on data-nodes. 3860 NameNode.stateChangeLog.warn( 3861 "DIR* NameSystem.internalReleaseLease: " + 3862 "File " + src + " has not been closed." + 3863 " Lease recovery is in progress. " + 3864 "RecoveryId = " + blockRecoveryId + " for block " + lastBlock); 3865 break; 3866 } 3867 return false; 3868 } 3869 3870 private Lease reassignLease(Lease lease, String src, String newHolder, 3871 INodeFile pendingFile) { 3872 assert hasWriteLock(); 3873 if(newHolder == null) 3874 return lease; 3875 // The following transaction is not synced. Make sure it's sync'ed later. 3876 logReassignLease(lease.getHolder(), src, newHolder); 3877 return reassignLeaseInternal(lease, src, newHolder, pendingFile); 3878 } 3879 3880 Lease reassignLeaseInternal(Lease lease, String src, String newHolder, 3881 INodeFile pendingFile) { 3882 assert hasWriteLock(); 3883 pendingFile.getFileUnderConstructionFeature().setClientName(newHolder); 3884 return leaseManager.reassignLease(lease, src, newHolder); 3885 } 3886 3887 private void commitOrCompleteLastBlock(final INodeFile fileINode, 3888 final Block commitBlock) throws IOException { 3889 assert hasWriteLock(); 3890 Preconditions.checkArgument(fileINode.isUnderConstruction()); 3891 if (!blockManager.commitOrCompleteLastBlock(fileINode, commitBlock)) { 3892 return; 3893 } 3894 3895 // Adjust disk space consumption if required 3896 final long diff = fileINode.getPreferredBlockSize() - commitBlock.getNumBytes(); 3897 if (diff > 0) { 3898 try { 3899 String path = fileINode.getFullPathName(); 3900 dir.updateSpaceConsumed(path, 0, -diff*fileINode.getFileReplication()); 3901 } catch (IOException e) { 3902 LOG.warn("Unexpected exception while updating disk space.", e); 3903 } 3904 } 3905 } 3906 3907 private void finalizeINodeFileUnderConstruction(String src, 3908 INodeFile pendingFile, int latestSnapshot) throws IOException, 3909 UnresolvedLinkException { 3910 assert hasWriteLock(); 3911 FileUnderConstructionFeature uc = pendingFile.getFileUnderConstructionFeature(); 3912 Preconditions.checkArgument(uc != null); 3913 leaseManager.removeLease(uc.getClientName(), src); 3914 3915 pendingFile = pendingFile.recordModification(latestSnapshot); 3916 3917 // The file is no longer pending. 3918 // Create permanent INode, update blocks. No need to replace the inode here 3919 // since we just remove the uc feature from pendingFile 3920 final INodeFile newFile = pendingFile.toCompleteFile(now()); 3921 3922 // close file and persist block allocations for this file 3923 dir.closeFile(src, newFile); 3924 3925 blockManager.checkReplication(newFile); 3926 } 3927 3928 @VisibleForTesting 3929 BlockInfo getStoredBlock(Block block) { 3930 return blockManager.getStoredBlock(block); 3931 } 3932 3933 @Override 3934 public boolean isInSnapshot(BlockInfoUnderConstruction blockUC) { 3935 assert hasReadLock(); 3936 final BlockCollection bc = blockUC.getBlockCollection(); 3937 if (bc == null || !(bc instanceof INodeFile) 3938 || !((INodeFile) bc).isUnderConstruction()) { 3939 return false; 3940 } 3941 3942 INodeFile inodeUC = (INodeFile) bc; 3943 String fullName = inodeUC.getName(); 3944 try { 3945 if (fullName != null && fullName.startsWith(Path.SEPARATOR) 3946 && dir.getINode(fullName) == inodeUC) { 3947 // If file exists in normal path then no need to look in snapshot 3948 return false; 3949 } 3950 } catch (UnresolvedLinkException e) { 3951 LOG.error("Error while resolving the link : " + fullName, e); 3952 return false; 3953 } 3954 /* 3955 * 1. if bc is an instance of INodeFileUnderConstructionWithSnapshot, and 3956 * bc is not in the current fsdirectory tree, bc must represent a snapshot 3957 * file. 3958 * 2. if fullName is not an absolute path, bc cannot be existent in the 3959 * current fsdirectory tree. 3960 * 3. if bc is not the current node associated with fullName, bc must be a 3961 * snapshot inode. 3962 */ 3963 return true; 3964 } 3965 3966 void commitBlockSynchronization(ExtendedBlock lastblock, 3967 long newgenerationstamp, long newlength, 3968 boolean closeFile, boolean deleteblock, DatanodeID[] newtargets, 3969 String[] newtargetstorages) 3970 throws IOException, UnresolvedLinkException { 3971 LOG.info("commitBlockSynchronization(lastblock=" + lastblock 3972 + ", newgenerationstamp=" + newgenerationstamp 3973 + ", newlength=" + newlength 3974 + ", newtargets=" + Arrays.asList(newtargets) 3975 + ", closeFile=" + closeFile 3976 + ", deleteBlock=" + deleteblock 3977 + ")"); 3978 checkOperation(OperationCategory.WRITE); 3979 String src = ""; 3980 writeLock(); 3981 try { 3982 checkOperation(OperationCategory.WRITE); 3983 // If a DN tries to commit to the standby, the recovery will 3984 // fail, and the next retry will succeed on the new NN. 3985 3986 checkNameNodeSafeMode( 3987 "Cannot commitBlockSynchronization while in safe mode"); 3988 final BlockInfo storedBlock = getStoredBlock( 3989 ExtendedBlock.getLocalBlock(lastblock)); 3990 if (storedBlock == null) { 3991 if (deleteblock) { 3992 // This may be a retry attempt so ignore the failure 3993 // to locate the block. 3994 if (LOG.isDebugEnabled()) { 3995 LOG.debug("Block (=" + lastblock + ") not found"); 3996 } 3997 return; 3998 } else { 3999 throw new IOException("Block (=" + lastblock + ") not found"); 4000 } 4001 } 4002 INodeFile iFile = ((INode)storedBlock.getBlockCollection()).asFile(); 4003 if (!iFile.isUnderConstruction() || storedBlock.isComplete()) { 4004 if (LOG.isDebugEnabled()) { 4005 LOG.debug("Unexpected block (=" + lastblock 4006 + ") since the file (=" + iFile.getLocalName() 4007 + ") is not under construction"); 4008 } 4009 return; 4010 } 4011 4012 long recoveryId = 4013 ((BlockInfoUnderConstruction)storedBlock).getBlockRecoveryId(); 4014 if(recoveryId != newgenerationstamp) { 4015 throw new IOException("The recovery id " + newgenerationstamp 4016 + " does not match current recovery id " 4017 + recoveryId + " for block " + lastblock); 4018 } 4019 4020 if (deleteblock) { 4021 Block blockToDel = ExtendedBlock.getLocalBlock(lastblock); 4022 boolean remove = iFile.removeLastBlock(blockToDel); 4023 if (remove) { 4024 blockManager.removeBlockFromMap(storedBlock); 4025 } 4026 } 4027 else { 4028 // update last block 4029 storedBlock.setGenerationStamp(newgenerationstamp); 4030 storedBlock.setNumBytes(newlength); 4031 4032 // find the DatanodeDescriptor objects 4033 // There should be no locations in the blockManager till now because the 4034 // file is underConstruction 4035 ArrayList<DatanodeDescriptor> trimmedTargets = 4036 new ArrayList<DatanodeDescriptor>(newtargets.length); 4037 ArrayList<String> trimmedStorages = 4038 new ArrayList<String>(newtargets.length); 4039 if (newtargets.length > 0) { 4040 for (int i = 0; i < newtargets.length; ++i) { 4041 // try to get targetNode 4042 DatanodeDescriptor targetNode = 4043 blockManager.getDatanodeManager().getDatanode(newtargets[i]); 4044 if (targetNode != null) { 4045 trimmedTargets.add(targetNode); 4046 trimmedStorages.add(newtargetstorages[i]); 4047 } else if (LOG.isDebugEnabled()) { 4048 LOG.debug("DatanodeDescriptor (=" + newtargets[i] + ") not found"); 4049 } 4050 } 4051 } 4052 if ((closeFile) && !trimmedTargets.isEmpty()) { 4053 // the file is getting closed. Insert block locations into blockManager. 4054 // Otherwise fsck will report these blocks as MISSING, especially if the 4055 // blocksReceived from Datanodes take a long time to arrive. 4056 for (int i = 0; i < trimmedTargets.size(); i++) { 4057 trimmedTargets.get(i).addBlock( 4058 trimmedStorages.get(i), storedBlock); 4059 } 4060 } 4061 4062 // add pipeline locations into the INodeUnderConstruction 4063 DatanodeStorageInfo[] trimmedStorageInfos = 4064 blockManager.getDatanodeManager().getDatanodeStorageInfos( 4065 trimmedTargets.toArray(new DatanodeID[trimmedTargets.size()]), 4066 trimmedStorages.toArray(new String[trimmedStorages.size()])); 4067 iFile.setLastBlock(storedBlock, trimmedStorageInfos); 4068 } 4069 4070 if (closeFile) { 4071 src = closeFileCommitBlocks(iFile, storedBlock); 4072 } else { 4073 // If this commit does not want to close the file, persist blocks 4074 src = persistBlocks(iFile, false); 4075 } 4076 } finally { 4077 writeUnlock(); 4078 } 4079 getEditLog().logSync(); 4080 if (closeFile) { 4081 LOG.info("commitBlockSynchronization(newblock=" + lastblock 4082 + ", file=" + src 4083 + ", newgenerationstamp=" + newgenerationstamp 4084 + ", newlength=" + newlength 4085 + ", newtargets=" + Arrays.asList(newtargets) + ") successful"); 4086 } else { 4087 LOG.info("commitBlockSynchronization(" + lastblock + ") successful"); 4088 } 4089 } 4090 4091 /** 4092 * 4093 * @param pendingFile 4094 * @param storedBlock 4095 * @return Path of the file that was closed. 4096 * @throws IOException 4097 */ 4098 @VisibleForTesting 4099 String closeFileCommitBlocks(INodeFile pendingFile, BlockInfo storedBlock) 4100 throws IOException { 4101 String src = pendingFile.getFullPathName(); 4102 4103 // commit the last block and complete it if it has minimum replicas 4104 commitOrCompleteLastBlock(pendingFile, storedBlock); 4105 4106 //remove lease, close file 4107 finalizeINodeFileUnderConstruction(src, pendingFile, 4108 Snapshot.findLatestSnapshot(pendingFile, Snapshot.CURRENT_STATE_ID)); 4109 4110 return src; 4111 } 4112 4113 /** 4114 * Persist the block list for the given file. 4115 * 4116 * @param pendingFile 4117 * @return Path to the given file. 4118 * @throws IOException 4119 */ 4120 @VisibleForTesting 4121 String persistBlocks(INodeFile pendingFile, boolean logRetryCache) 4122 throws IOException { 4123 String src = pendingFile.getFullPathName(); 4124 dir.persistBlocks(src, pendingFile, logRetryCache); 4125 return src; 4126 } 4127 4128 /** 4129 * Renew the lease(s) held by the given client 4130 */ 4131 void renewLease(String holder) throws IOException { 4132 checkOperation(OperationCategory.WRITE); 4133 readLock(); 4134 try { 4135 checkOperation(OperationCategory.WRITE); 4136 checkNameNodeSafeMode("Cannot renew lease for " + holder); 4137 leaseManager.renewLease(holder); 4138 } finally { 4139 readUnlock(); 4140 } 4141 } 4142 4143 /** 4144 * Get a partial listing of the indicated directory 4145 * 4146 * @param src the directory name 4147 * @param startAfter the name to start after 4148 * @param needLocation if blockLocations need to be returned 4149 * @return a partial listing starting after startAfter 4150 * 4151 * @throws AccessControlException if access is denied 4152 * @throws UnresolvedLinkException if symbolic link is encountered 4153 * @throws IOException if other I/O error occurred 4154 */ 4155 DirectoryListing getListing(String src, byte[] startAfter, 4156 boolean needLocation) 4157 throws AccessControlException, UnresolvedLinkException, IOException { 4158 try { 4159 return getListingInt(src, startAfter, needLocation); 4160 } catch (AccessControlException e) { 4161 logAuditEvent(false, "listStatus", src); 4162 throw e; 4163 } 4164 } 4165 4166 private DirectoryListing getListingInt(String src, byte[] startAfter, 4167 boolean needLocation) 4168 throws AccessControlException, UnresolvedLinkException, IOException { 4169 DirectoryListing dl; 4170 FSPermissionChecker pc = getPermissionChecker(); 4171 checkOperation(OperationCategory.READ); 4172 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 4173 String startAfterString = new String(startAfter); 4174 readLock(); 4175 try { 4176 checkOperation(OperationCategory.READ); 4177 src = FSDirectory.resolvePath(src, pathComponents, dir); 4178 4179 // Get file name when startAfter is an INodePath 4180 if (FSDirectory.isReservedName(startAfterString)) { 4181 byte[][] startAfterComponents = FSDirectory 4182 .getPathComponentsForReservedPath(startAfterString); 4183 try { 4184 String tmp = FSDirectory.resolvePath(src, startAfterComponents, dir); 4185 byte[][] regularPath = INode.getPathComponents(tmp); 4186 startAfter = regularPath[regularPath.length - 1]; 4187 } catch (IOException e) { 4188 // Possibly the inode is deleted 4189 throw new DirectoryListingStartAfterNotFoundException( 4190 "Can't find startAfter " + startAfterString); 4191 } 4192 } 4193 4194 if (isPermissionEnabled) { 4195 if (dir.isDir(src)) { 4196 checkPathAccess(pc, src, FsAction.READ_EXECUTE); 4197 } else { 4198 checkTraverse(pc, src); 4199 } 4200 } 4201 logAuditEvent(true, "listStatus", src); 4202 dl = dir.getListing(src, startAfter, needLocation); 4203 } finally { 4204 readUnlock(); 4205 } 4206 return dl; 4207 } 4208 4209 ///////////////////////////////////////////////////////// 4210 // 4211 // These methods are called by datanodes 4212 // 4213 ///////////////////////////////////////////////////////// 4214 /** 4215 * Register Datanode. 4216 * <p> 4217 * The purpose of registration is to identify whether the new datanode 4218 * serves a new data storage, and will report new data block copies, 4219 * which the namenode was not aware of; or the datanode is a replacement 4220 * node for the data storage that was previously served by a different 4221 * or the same (in terms of host:port) datanode. 4222 * The data storages are distinguished by their storageIDs. When a new 4223 * data storage is reported the namenode issues a new unique storageID. 4224 * <p> 4225 * Finally, the namenode returns its namespaceID as the registrationID 4226 * for the datanodes. 4227 * namespaceID is a persistent attribute of the name space. 4228 * The registrationID is checked every time the datanode is communicating 4229 * with the namenode. 4230 * Datanodes with inappropriate registrationID are rejected. 4231 * If the namenode stops, and then restarts it can restore its 4232 * namespaceID and will continue serving the datanodes that has previously 4233 * registered with the namenode without restarting the whole cluster. 4234 * 4235 * @see org.apache.hadoop.hdfs.server.datanode.DataNode 4236 */ 4237 void registerDatanode(DatanodeRegistration nodeReg) throws IOException { 4238 writeLock(); 4239 try { 4240 getBlockManager().getDatanodeManager().registerDatanode(nodeReg); 4241 checkSafeMode(); 4242 } finally { 4243 writeUnlock(); 4244 } 4245 } 4246 4247 /** 4248 * Get registrationID for datanodes based on the namespaceID. 4249 * 4250 * @see #registerDatanode(DatanodeRegistration) 4251 * @return registration ID 4252 */ 4253 String getRegistrationID() { 4254 return Storage.getRegistrationID(dir.fsImage.getStorage()); 4255 } 4256 4257 /** 4258 * The given node has reported in. This method should: 4259 * 1) Record the heartbeat, so the datanode isn't timed out 4260 * 2) Adjust usage stats for future block allocation 4261 * 4262 * If a substantial amount of time passed since the last datanode 4263 * heartbeat then request an immediate block report. 4264 * 4265 * @return an array of datanode commands 4266 * @throws IOException 4267 */ 4268 HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg, 4269 StorageReport[] reports, long cacheCapacity, long cacheUsed, 4270 int xceiverCount, int xmitsInProgress, int failedVolumes) 4271 throws IOException { 4272 readLock(); 4273 try { 4274 //get datanode commands 4275 final int maxTransfer = blockManager.getMaxReplicationStreams() 4276 - xmitsInProgress; 4277 DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat( 4278 nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed, 4279 xceiverCount, maxTransfer, failedVolumes); 4280 4281 //create ha status 4282 final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat( 4283 haContext.getState().getServiceState(), 4284 getFSImage().getLastAppliedOrWrittenTxId()); 4285 4286 return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo); 4287 } finally { 4288 readUnlock(); 4289 } 4290 } 4291 4292 /** 4293 * Returns whether or not there were available resources at the last check of 4294 * resources. 4295 * 4296 * @return true if there were sufficient resources available, false otherwise. 4297 */ 4298 boolean nameNodeHasResourcesAvailable() { 4299 return hasResourcesAvailable; 4300 } 4301 4302 /** 4303 * Perform resource checks and cache the results. 4304 * @throws IOException 4305 */ 4306 void checkAvailableResources() { 4307 Preconditions.checkState(nnResourceChecker != null, 4308 "nnResourceChecker not initialized"); 4309 hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace(); 4310 } 4311 4312 /** 4313 * Periodically calls hasAvailableResources of NameNodeResourceChecker, and if 4314 * there are found to be insufficient resources available, causes the NN to 4315 * enter safe mode. If resources are later found to have returned to 4316 * acceptable levels, this daemon will cause the NN to exit safe mode. 4317 */ 4318 class NameNodeResourceMonitor implements Runnable { 4319 boolean shouldNNRmRun = true; 4320 @Override 4321 public void run () { 4322 try { 4323 while (fsRunning && shouldNNRmRun) { 4324 checkAvailableResources(); 4325 if(!nameNodeHasResourcesAvailable()) { 4326 String lowResourcesMsg = "NameNode low on available disk space. "; 4327 if (!isInSafeMode()) { 4328 FSNamesystem.LOG.warn(lowResourcesMsg + "Entering safe mode."); 4329 } else { 4330 FSNamesystem.LOG.warn(lowResourcesMsg + "Already in safe mode."); 4331 } 4332 enterSafeMode(true); 4333 } 4334 try { 4335 Thread.sleep(resourceRecheckInterval); 4336 } catch (InterruptedException ie) { 4337 // Deliberately ignore 4338 } 4339 } 4340 } catch (Exception e) { 4341 FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e); 4342 } 4343 } 4344 4345 public void stopMonitor() { 4346 shouldNNRmRun = false; 4347 } 4348 } 4349 4350 class NameNodeEditLogRoller implements Runnable { 4351 4352 private boolean shouldRun = true; 4353 private final long rollThreshold; 4354 private final long sleepIntervalMs; 4355 4356 public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) { 4357 this.rollThreshold = rollThreshold; 4358 this.sleepIntervalMs = sleepIntervalMs; 4359 } 4360 4361 @Override 4362 public void run() { 4363 while (fsRunning && shouldRun) { 4364 try { 4365 FSEditLog editLog = getFSImage().getEditLog(); 4366 long numEdits = 4367 editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId(); 4368 if (numEdits > rollThreshold) { 4369 FSNamesystem.LOG.info("NameNode rolling its own edit log because" 4370 + " number of edits in open segment exceeds threshold of " 4371 + rollThreshold); 4372 rollEditLog(); 4373 } 4374 Thread.sleep(sleepIntervalMs); 4375 } catch (InterruptedException e) { 4376 FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName() 4377 + " was interrupted, exiting"); 4378 break; 4379 } catch (Exception e) { 4380 FSNamesystem.LOG.error("Swallowing exception in " 4381 + NameNodeEditLogRoller.class.getSimpleName() + ":", e); 4382 } 4383 } 4384 } 4385 4386 public void stop() { 4387 shouldRun = false; 4388 } 4389 } 4390 4391 public FSImage getFSImage() { 4392 return dir.fsImage; 4393 } 4394 4395 public FSEditLog getEditLog() { 4396 return getFSImage().getEditLog(); 4397 } 4398 4399 private void checkBlock(ExtendedBlock block) throws IOException { 4400 if (block != null && !this.blockPoolId.equals(block.getBlockPoolId())) { 4401 throw new IOException("Unexpected BlockPoolId " + block.getBlockPoolId() 4402 + " - expected " + blockPoolId); 4403 } 4404 } 4405 4406 @Metric({"MissingBlocks", "Number of missing blocks"}) 4407 public long getMissingBlocksCount() { 4408 // not locking 4409 return blockManager.getMissingBlocksCount(); 4410 } 4411 4412 @Metric({"ExpiredHeartbeats", "Number of expired heartbeats"}) 4413 public int getExpiredHeartbeats() { 4414 return datanodeStatistics.getExpiredHeartbeats(); 4415 } 4416 4417 @Metric({"TransactionsSinceLastCheckpoint", 4418 "Number of transactions since last checkpoint"}) 4419 public long getTransactionsSinceLastCheckpoint() { 4420 return getEditLog().getLastWrittenTxId() - 4421 getFSImage().getStorage().getMostRecentCheckpointTxId(); 4422 } 4423 4424 @Metric({"TransactionsSinceLastLogRoll", 4425 "Number of transactions since last edit log roll"}) 4426 public long getTransactionsSinceLastLogRoll() { 4427 if (isInStandbyState() || !getEditLog().isSegmentOpen()) { 4428 return 0; 4429 } else { 4430 return getEditLog().getLastWrittenTxId() - 4431 getEditLog().getCurSegmentTxId() + 1; 4432 } 4433 } 4434 4435 @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"}) 4436 public long getLastWrittenTransactionId() { 4437 return getEditLog().getLastWrittenTxId(); 4438 } 4439 4440 @Metric({"LastCheckpointTime", 4441 "Time in milliseconds since the epoch of the last checkpoint"}) 4442 public long getLastCheckpointTime() { 4443 return getFSImage().getStorage().getMostRecentCheckpointTime(); 4444 } 4445 4446 /** @see ClientProtocol#getStats() */ 4447 long[] getStats() { 4448 final long[] stats = datanodeStatistics.getStats(); 4449 stats[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX] = getUnderReplicatedBlocks(); 4450 stats[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX] = getCorruptReplicaBlocks(); 4451 stats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX] = getMissingBlocksCount(); 4452 return stats; 4453 } 4454 4455 @Override // FSNamesystemMBean 4456 @Metric({"CapacityTotal", 4457 "Total raw capacity of data nodes in bytes"}) 4458 public long getCapacityTotal() { 4459 return datanodeStatistics.getCapacityTotal(); 4460 } 4461 4462 @Metric({"CapacityTotalGB", 4463 "Total raw capacity of data nodes in GB"}) 4464 public float getCapacityTotalGB() { 4465 return DFSUtil.roundBytesToGB(getCapacityTotal()); 4466 } 4467 4468 @Override // FSNamesystemMBean 4469 @Metric({"CapacityUsed", 4470 "Total used capacity across all data nodes in bytes"}) 4471 public long getCapacityUsed() { 4472 return datanodeStatistics.getCapacityUsed(); 4473 } 4474 4475 @Metric({"CapacityUsedGB", 4476 "Total used capacity across all data nodes in GB"}) 4477 public float getCapacityUsedGB() { 4478 return DFSUtil.roundBytesToGB(getCapacityUsed()); 4479 } 4480 4481 @Override // FSNamesystemMBean 4482 @Metric({"CapacityRemaining", "Remaining capacity in bytes"}) 4483 public long getCapacityRemaining() { 4484 return datanodeStatistics.getCapacityRemaining(); 4485 } 4486 4487 @Metric({"CapacityRemainingGB", "Remaining capacity in GB"}) 4488 public float getCapacityRemainingGB() { 4489 return DFSUtil.roundBytesToGB(getCapacityRemaining()); 4490 } 4491 4492 @Metric({"CapacityUsedNonDFS", 4493 "Total space used by data nodes for non DFS purposes in bytes"}) 4494 public long getCapacityUsedNonDFS() { 4495 return datanodeStatistics.getCapacityUsedNonDFS(); 4496 } 4497 4498 /** 4499 * Total number of connections. 4500 */ 4501 @Override // FSNamesystemMBean 4502 @Metric 4503 public int getTotalLoad() { 4504 return datanodeStatistics.getXceiverCount(); 4505 } 4506 4507 @Metric({ "SnapshottableDirectories", "Number of snapshottable directories" }) 4508 public int getNumSnapshottableDirs() { 4509 return this.snapshotManager.getNumSnapshottableDirs(); 4510 } 4511 4512 @Metric({ "Snapshots", "The number of snapshots" }) 4513 public int getNumSnapshots() { 4514 return this.snapshotManager.getNumSnapshots(); 4515 } 4516 4517 @Override 4518 public String getSnapshotStats() { 4519 Map<String, Object> info = new HashMap<String, Object>(); 4520 info.put("SnapshottableDirectories", this.getNumSnapshottableDirs()); 4521 info.put("Snapshots", this.getNumSnapshots()); 4522 return JSON.toString(info); 4523 } 4524 4525 int getNumberOfDatanodes(DatanodeReportType type) { 4526 readLock(); 4527 try { 4528 return getBlockManager().getDatanodeManager().getDatanodeListForReport( 4529 type).size(); 4530 } finally { 4531 readUnlock(); 4532 } 4533 } 4534 4535 DatanodeInfo[] datanodeReport(final DatanodeReportType type 4536 ) throws AccessControlException, StandbyException { 4537 checkSuperuserPrivilege(); 4538 checkOperation(OperationCategory.UNCHECKED); 4539 readLock(); 4540 try { 4541 checkOperation(OperationCategory.UNCHECKED); 4542 final DatanodeManager dm = getBlockManager().getDatanodeManager(); 4543 final List<DatanodeDescriptor> results = dm.getDatanodeListForReport(type); 4544 4545 DatanodeInfo[] arr = new DatanodeInfo[results.size()]; 4546 for (int i=0; i<arr.length; i++) { 4547 arr[i] = new DatanodeInfo(results.get(i)); 4548 } 4549 return arr; 4550 } finally { 4551 readUnlock(); 4552 } 4553 } 4554 4555 /** 4556 * Save namespace image. 4557 * This will save current namespace into fsimage file and empty edits file. 4558 * Requires superuser privilege and safe mode. 4559 * 4560 * @throws AccessControlException if superuser privilege is violated. 4561 * @throws IOException if 4562 */ 4563 void saveNamespace() throws AccessControlException, IOException { 4564 checkOperation(OperationCategory.UNCHECKED); 4565 checkSuperuserPrivilege(); 4566 4567 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 4568 if (cacheEntry != null && cacheEntry.isSuccess()) { 4569 return; // Return previous response 4570 } 4571 boolean success = false; 4572 readLock(); 4573 try { 4574 checkOperation(OperationCategory.UNCHECKED); 4575 4576 if (!isInSafeMode()) { 4577 throw new IOException("Safe mode should be turned ON " 4578 + "in order to create namespace image."); 4579 } 4580 getFSImage().saveNamespace(this); 4581 success = true; 4582 } finally { 4583 readUnlock(); 4584 RetryCache.setState(cacheEntry, success); 4585 } 4586 LOG.info("New namespace image has been created"); 4587 } 4588 4589 /** 4590 * Enables/Disables/Checks restoring failed storage replicas if the storage becomes available again. 4591 * Requires superuser privilege. 4592 * 4593 * @throws AccessControlException if superuser privilege is violated. 4594 */ 4595 boolean restoreFailedStorage(String arg) throws AccessControlException, 4596 StandbyException { 4597 checkSuperuserPrivilege(); 4598 checkOperation(OperationCategory.UNCHECKED); 4599 writeLock(); 4600 try { 4601 checkOperation(OperationCategory.UNCHECKED); 4602 4603 // if it is disabled - enable it and vice versa. 4604 if(arg.equals("check")) 4605 return getFSImage().getStorage().getRestoreFailedStorage(); 4606 4607 boolean val = arg.equals("true"); // false if not 4608 getFSImage().getStorage().setRestoreFailedStorage(val); 4609 4610 return val; 4611 } finally { 4612 writeUnlock(); 4613 } 4614 } 4615 4616 Date getStartTime() { 4617 return new Date(startTime); 4618 } 4619 4620 void finalizeUpgrade() throws IOException { 4621 checkSuperuserPrivilege(); 4622 checkOperation(OperationCategory.UNCHECKED); 4623 writeLock(); 4624 try { 4625 checkOperation(OperationCategory.UNCHECKED); 4626 getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState()); 4627 } finally { 4628 writeUnlock(); 4629 } 4630 } 4631 4632 void refreshNodes() throws IOException { 4633 checkOperation(OperationCategory.UNCHECKED); 4634 checkSuperuserPrivilege(); 4635 getBlockManager().getDatanodeManager().refreshNodes(new HdfsConfiguration()); 4636 } 4637 4638 void setBalancerBandwidth(long bandwidth) throws IOException { 4639 checkOperation(OperationCategory.UNCHECKED); 4640 checkSuperuserPrivilege(); 4641 getBlockManager().getDatanodeManager().setBalancerBandwidth(bandwidth); 4642 } 4643 4644 /** 4645 * SafeModeInfo contains information related to the safe mode. 4646 * <p> 4647 * An instance of {@link SafeModeInfo} is created when the name node 4648 * enters safe mode. 4649 * <p> 4650 * During name node startup {@link SafeModeInfo} counts the number of 4651 * <em>safe blocks</em>, those that have at least the minimal number of 4652 * replicas, and calculates the ratio of safe blocks to the total number 4653 * of blocks in the system, which is the size of blocks in 4654 * {@link FSNamesystem#blockManager}. When the ratio reaches the 4655 * {@link #threshold} it starts the SafeModeMonitor daemon in order 4656 * to monitor whether the safe mode {@link #extension} is passed. 4657 * Then it leaves safe mode and destroys itself. 4658 * <p> 4659 * If safe mode is turned on manually then the number of safe blocks is 4660 * not tracked because the name node is not intended to leave safe mode 4661 * automatically in the case. 4662 * 4663 * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, boolean) 4664 */ 4665 public class SafeModeInfo { 4666 // configuration fields 4667 /** Safe mode threshold condition %.*/ 4668 private final double threshold; 4669 /** Safe mode minimum number of datanodes alive */ 4670 private final int datanodeThreshold; 4671 /** Safe mode extension after the threshold. */ 4672 private int extension; 4673 /** Min replication required by safe mode. */ 4674 private final int safeReplication; 4675 /** threshold for populating needed replication queues */ 4676 private final double replQueueThreshold; 4677 // internal fields 4678 /** Time when threshold was reached. 4679 * <br> -1 safe mode is off 4680 * <br> 0 safe mode is on, and threshold is not reached yet 4681 * <br> >0 safe mode is on, but we are in extension period 4682 */ 4683 private long reached = -1; 4684 /** Total number of blocks. */ 4685 int blockTotal; 4686 /** Number of safe blocks. */ 4687 int blockSafe; 4688 /** Number of blocks needed to satisfy safe mode threshold condition */ 4689 private int blockThreshold; 4690 /** Number of blocks needed before populating replication queues */ 4691 private int blockReplQueueThreshold; 4692 /** time of the last status printout */ 4693 private long lastStatusReport = 0; 4694 /** Was safemode entered automatically because available resources were low. */ 4695 private boolean resourcesLow = false; 4696 /** Should safemode adjust its block totals as blocks come in */ 4697 private boolean shouldIncrementallyTrackBlocks = false; 4698 /** counter for tracking startup progress of reported blocks */ 4699 private Counter awaitingReportedBlocksCounter; 4700 4701 /** 4702 * Creates SafeModeInfo when the name node enters 4703 * automatic safe mode at startup. 4704 * 4705 * @param conf configuration 4706 */ 4707 private SafeModeInfo(Configuration conf) { 4708 this.threshold = conf.getFloat(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY, 4709 DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT); 4710 if(threshold > 1.0) { 4711 LOG.warn("The threshold value should't be greater than 1, threshold: " + threshold); 4712 } 4713 this.datanodeThreshold = conf.getInt( 4714 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 4715 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT); 4716 this.extension = conf.getInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0); 4717 this.safeReplication = conf.getInt(DFS_NAMENODE_REPLICATION_MIN_KEY, 4718 DFS_NAMENODE_REPLICATION_MIN_DEFAULT); 4719 4720 LOG.info(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY + " = " + threshold); 4721 LOG.info(DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY + " = " + datanodeThreshold); 4722 LOG.info(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY + " = " + extension); 4723 4724 // default to safe mode threshold (i.e., don't populate queues before leaving safe mode) 4725 this.replQueueThreshold = 4726 conf.getFloat(DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY, 4727 (float) threshold); 4728 this.blockTotal = 0; 4729 this.blockSafe = 0; 4730 } 4731 4732 /** 4733 * In the HA case, the StandbyNode can be in safemode while the namespace 4734 * is modified by the edit log tailer. In this case, the number of total 4735 * blocks changes as edits are processed (eg blocks are added and deleted). 4736 * However, we don't want to do the incremental tracking during the 4737 * startup-time loading process -- only once the initial total has been 4738 * set after the image has been loaded. 4739 */ 4740 private boolean shouldIncrementallyTrackBlocks() { 4741 return shouldIncrementallyTrackBlocks; 4742 } 4743 4744 /** 4745 * Creates SafeModeInfo when safe mode is entered manually, or because 4746 * available resources are low. 4747 * 4748 * The {@link #threshold} is set to 1.5 so that it could never be reached. 4749 * {@link #blockTotal} is set to -1 to indicate that safe mode is manual. 4750 * 4751 * @see SafeModeInfo 4752 */ 4753 private SafeModeInfo(boolean resourcesLow) { 4754 this.threshold = 1.5f; // this threshold can never be reached 4755 this.datanodeThreshold = Integer.MAX_VALUE; 4756 this.extension = Integer.MAX_VALUE; 4757 this.safeReplication = Short.MAX_VALUE + 1; // more than maxReplication 4758 this.replQueueThreshold = 1.5f; // can never be reached 4759 this.blockTotal = -1; 4760 this.blockSafe = -1; 4761 this.resourcesLow = resourcesLow; 4762 enter(); 4763 reportStatus("STATE* Safe mode is ON.", true); 4764 } 4765 4766 /** 4767 * Check if safe mode is on. 4768 * @return true if in safe mode 4769 */ 4770 private synchronized boolean isOn() { 4771 doConsistencyCheck(); 4772 return this.reached >= 0; 4773 } 4774 4775 /** 4776 * Enter safe mode. 4777 */ 4778 private void enter() { 4779 this.reached = 0; 4780 } 4781 4782 /** 4783 * Leave safe mode. 4784 * <p> 4785 * Check for invalid, under- & over-replicated blocks in the end of startup. 4786 */ 4787 private synchronized void leave() { 4788 // if not done yet, initialize replication queues. 4789 // In the standby, do not populate repl queues 4790 if (!isPopulatingReplQueues() && shouldPopulateReplQueues()) { 4791 initializeReplQueues(); 4792 } 4793 long timeInSafemode = now() - startTime; 4794 NameNode.stateChangeLog.info("STATE* Leaving safe mode after " 4795 + timeInSafemode/1000 + " secs"); 4796 NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode); 4797 4798 //Log the following only once (when transitioning from ON -> OFF) 4799 if (reached >= 0) { 4800 NameNode.stateChangeLog.info("STATE* Safe mode is OFF"); 4801 } 4802 reached = -1; 4803 safeMode = null; 4804 final NetworkTopology nt = blockManager.getDatanodeManager().getNetworkTopology(); 4805 NameNode.stateChangeLog.info("STATE* Network topology has " 4806 + nt.getNumOfRacks() + " racks and " 4807 + nt.getNumOfLeaves() + " datanodes"); 4808 NameNode.stateChangeLog.info("STATE* UnderReplicatedBlocks has " 4809 + blockManager.numOfUnderReplicatedBlocks() + " blocks"); 4810 4811 startSecretManagerIfNecessary(); 4812 4813 // If startup has not yet completed, end safemode phase. 4814 StartupProgress prog = NameNode.getStartupProgress(); 4815 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) { 4816 prog.endStep(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS); 4817 prog.endPhase(Phase.SAFEMODE); 4818 } 4819 } 4820 4821 /** 4822 * Check whether we have reached the threshold for 4823 * initializing replication queues. 4824 */ 4825 private synchronized boolean canInitializeReplQueues() { 4826 return shouldPopulateReplQueues() 4827 && blockSafe >= blockReplQueueThreshold; 4828 } 4829 4830 /** 4831 * Safe mode can be turned off iff 4832 * the threshold is reached and 4833 * the extension time have passed. 4834 * @return true if can leave or false otherwise. 4835 */ 4836 private synchronized boolean canLeave() { 4837 if (reached == 0) { 4838 return false; 4839 } 4840 4841 if (now() - reached < extension) { 4842 reportStatus("STATE* Safe mode ON, in safe mode extension.", false); 4843 return false; 4844 } 4845 4846 if (needEnter()) { 4847 reportStatus("STATE* Safe mode ON, thresholds not met.", false); 4848 return false; 4849 } 4850 4851 return true; 4852 } 4853 4854 /** 4855 * There is no need to enter safe mode 4856 * if DFS is empty or {@link #threshold} == 0 4857 */ 4858 private boolean needEnter() { 4859 return (threshold != 0 && blockSafe < blockThreshold) || 4860 (datanodeThreshold != 0 && getNumLiveDataNodes() < datanodeThreshold) || 4861 (!nameNodeHasResourcesAvailable()); 4862 } 4863 4864 /** 4865 * Check and trigger safe mode if needed. 4866 */ 4867 private void checkMode() { 4868 // Have to have write-lock since leaving safemode initializes 4869 // repl queues, which requires write lock 4870 assert hasWriteLock(); 4871 // if smmthread is already running, the block threshold must have been 4872 // reached before, there is no need to enter the safe mode again 4873 if (smmthread == null && needEnter()) { 4874 enter(); 4875 // check if we are ready to initialize replication queues 4876 if (canInitializeReplQueues() && !isPopulatingReplQueues() 4877 && !haEnabled) { 4878 initializeReplQueues(); 4879 } 4880 reportStatus("STATE* Safe mode ON.", false); 4881 return; 4882 } 4883 // the threshold is reached or was reached before 4884 if (!isOn() || // safe mode is off 4885 extension <= 0 || threshold <= 0) { // don't need to wait 4886 this.leave(); // leave safe mode 4887 return; 4888 } 4889 if (reached > 0) { // threshold has already been reached before 4890 reportStatus("STATE* Safe mode ON.", false); 4891 return; 4892 } 4893 // start monitor 4894 reached = now(); 4895 if (smmthread == null) { 4896 smmthread = new Daemon(new SafeModeMonitor()); 4897 smmthread.start(); 4898 reportStatus("STATE* Safe mode extension entered.", true); 4899 } 4900 4901 // check if we are ready to initialize replication queues 4902 if (canInitializeReplQueues() && !isPopulatingReplQueues() && !haEnabled) { 4903 initializeReplQueues(); 4904 } 4905 } 4906 4907 /** 4908 * Set total number of blocks. 4909 */ 4910 private synchronized void setBlockTotal(int total) { 4911 this.blockTotal = total; 4912 this.blockThreshold = (int) (blockTotal * threshold); 4913 this.blockReplQueueThreshold = 4914 (int) (blockTotal * replQueueThreshold); 4915 if (haEnabled) { 4916 // After we initialize the block count, any further namespace 4917 // modifications done while in safe mode need to keep track 4918 // of the number of total blocks in the system. 4919 this.shouldIncrementallyTrackBlocks = true; 4920 } 4921 if(blockSafe < 0) 4922 this.blockSafe = 0; 4923 checkMode(); 4924 } 4925 4926 /** 4927 * Increment number of safe blocks if current block has 4928 * reached minimal replication. 4929 * @param replication current replication 4930 */ 4931 private synchronized void incrementSafeBlockCount(short replication) { 4932 if (replication == safeReplication) { 4933 this.blockSafe++; 4934 4935 // Report startup progress only if we haven't completed startup yet. 4936 StartupProgress prog = NameNode.getStartupProgress(); 4937 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) { 4938 if (this.awaitingReportedBlocksCounter == null) { 4939 this.awaitingReportedBlocksCounter = prog.getCounter(Phase.SAFEMODE, 4940 STEP_AWAITING_REPORTED_BLOCKS); 4941 } 4942 this.awaitingReportedBlocksCounter.increment(); 4943 } 4944 4945 checkMode(); 4946 } 4947 } 4948 4949 /** 4950 * Decrement number of safe blocks if current block has 4951 * fallen below minimal replication. 4952 * @param replication current replication 4953 */ 4954 private synchronized void decrementSafeBlockCount(short replication) { 4955 if (replication == safeReplication-1) { 4956 this.blockSafe--; 4957 //blockSafe is set to -1 in manual / low resources safemode 4958 assert blockSafe >= 0 || isManual() || areResourcesLow(); 4959 checkMode(); 4960 } 4961 } 4962 4963 /** 4964 * Check if safe mode was entered manually 4965 */ 4966 private boolean isManual() { 4967 return extension == Integer.MAX_VALUE; 4968 } 4969 4970 /** 4971 * Set manual safe mode. 4972 */ 4973 private synchronized void setManual() { 4974 extension = Integer.MAX_VALUE; 4975 } 4976 4977 /** 4978 * Check if safe mode was entered due to resources being low. 4979 */ 4980 private boolean areResourcesLow() { 4981 return resourcesLow; 4982 } 4983 4984 /** 4985 * Set that resources are low for this instance of safe mode. 4986 */ 4987 private void setResourcesLow() { 4988 resourcesLow = true; 4989 } 4990 4991 /** 4992 * A tip on how safe mode is to be turned off: manually or automatically. 4993 */ 4994 String getTurnOffTip() { 4995 if(!isOn()) { 4996 return "Safe mode is OFF."; 4997 } 4998 4999 //Manual OR low-resource safemode. (Admin intervention required) 5000 String adminMsg = "It was turned on manually. "; 5001 if (areResourcesLow()) { 5002 adminMsg = "Resources are low on NN. Please add or free up more " 5003 + "resources then turn off safe mode manually. NOTE: If you turn off" 5004 + " safe mode before adding resources, " 5005 + "the NN will immediately return to safe mode. "; 5006 } 5007 if (isManual() || areResourcesLow()) { 5008 return adminMsg 5009 + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off."; 5010 } 5011 5012 boolean thresholdsMet = true; 5013 int numLive = getNumLiveDataNodes(); 5014 String msg = ""; 5015 if (blockSafe < blockThreshold) { 5016 msg += String.format( 5017 "The reported blocks %d needs additional %d" 5018 + " blocks to reach the threshold %.4f of total blocks %d.\n", 5019 blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal); 5020 thresholdsMet = false; 5021 } else { 5022 msg += String.format("The reported blocks %d has reached the threshold" 5023 + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal); 5024 } 5025 if (numLive < datanodeThreshold) { 5026 msg += String.format( 5027 "The number of live datanodes %d needs an additional %d live " 5028 + "datanodes to reach the minimum number %d.\n", 5029 numLive, (datanodeThreshold - numLive), datanodeThreshold); 5030 thresholdsMet = false; 5031 } else { 5032 msg += String.format("The number of live datanodes %d has reached " 5033 + "the minimum number %d. ", 5034 numLive, datanodeThreshold); 5035 } 5036 msg += (reached > 0) ? "In safe mode extension. " : ""; 5037 msg += "Safe mode will be turned off automatically "; 5038 5039 if (!thresholdsMet) { 5040 msg += "once the thresholds have been reached."; 5041 } else if (reached + extension - now() > 0) { 5042 msg += ("in " + (reached + extension - now()) / 1000 + " seconds."); 5043 } else { 5044 msg += "soon."; 5045 } 5046 5047 return msg; 5048 } 5049 5050 /** 5051 * Print status every 20 seconds. 5052 */ 5053 private void reportStatus(String msg, boolean rightNow) { 5054 long curTime = now(); 5055 if(!rightNow && (curTime - lastStatusReport < 20 * 1000)) 5056 return; 5057 NameNode.stateChangeLog.info(msg + " \n" + getTurnOffTip()); 5058 lastStatusReport = curTime; 5059 } 5060 5061 @Override 5062 public String toString() { 5063 String resText = "Current safe blocks = " 5064 + blockSafe 5065 + ". Target blocks = " + blockThreshold + " for threshold = %" + threshold 5066 + ". Minimal replication = " + safeReplication + "."; 5067 if (reached > 0) 5068 resText += " Threshold was reached " + new Date(reached) + "."; 5069 return resText; 5070 } 5071 5072 /** 5073 * Checks consistency of the class state. 5074 * This is costly so only runs if asserts are enabled. 5075 */ 5076 private void doConsistencyCheck() { 5077 boolean assertsOn = false; 5078 assert assertsOn = true; // set to true if asserts are on 5079 if (!assertsOn) return; 5080 5081 if (blockTotal == -1 && blockSafe == -1) { 5082 return; // manual safe mode 5083 } 5084 int activeBlocks = blockManager.getActiveBlockCount(); 5085 if ((blockTotal != activeBlocks) && 5086 !(blockSafe >= 0 && blockSafe <= blockTotal)) { 5087 throw new AssertionError( 5088 " SafeMode: Inconsistent filesystem state: " 5089 + "SafeMode data: blockTotal=" + blockTotal 5090 + " blockSafe=" + blockSafe + "; " 5091 + "BlockManager data: active=" + activeBlocks); 5092 } 5093 } 5094 5095 private synchronized void adjustBlockTotals(int deltaSafe, int deltaTotal) { 5096 if (!shouldIncrementallyTrackBlocks) { 5097 return; 5098 } 5099 assert haEnabled; 5100 5101 if (LOG.isDebugEnabled()) { 5102 LOG.debug("Adjusting block totals from " + 5103 blockSafe + "/" + blockTotal + " to " + 5104 (blockSafe + deltaSafe) + "/" + (blockTotal + deltaTotal)); 5105 } 5106 assert blockSafe + deltaSafe >= 0 : "Can't reduce blockSafe " + 5107 blockSafe + " by " + deltaSafe + ": would be negative"; 5108 assert blockTotal + deltaTotal >= 0 : "Can't reduce blockTotal " + 5109 blockTotal + " by " + deltaTotal + ": would be negative"; 5110 5111 blockSafe += deltaSafe; 5112 setBlockTotal(blockTotal + deltaTotal); 5113 } 5114 } 5115 5116 /** 5117 * Periodically check whether it is time to leave safe mode. 5118 * This thread starts when the threshold level is reached. 5119 * 5120 */ 5121 class SafeModeMonitor implements Runnable { 5122 /** interval in msec for checking safe mode: {@value} */ 5123 private static final long recheckInterval = 1000; 5124 5125 /** 5126 */ 5127 @Override 5128 public void run() { 5129 while (fsRunning) { 5130 writeLock(); 5131 try { 5132 if (safeMode == null) { // Not in safe mode. 5133 break; 5134 } 5135 if (safeMode.canLeave()) { 5136 // Leave safe mode. 5137 safeMode.leave(); 5138 smmthread = null; 5139 break; 5140 } 5141 } finally { 5142 writeUnlock(); 5143 } 5144 5145 try { 5146 Thread.sleep(recheckInterval); 5147 } catch (InterruptedException ie) { 5148 // Ignored 5149 } 5150 } 5151 if (!fsRunning) { 5152 LOG.info("NameNode is being shutdown, exit SafeModeMonitor thread"); 5153 } 5154 } 5155 } 5156 5157 boolean setSafeMode(SafeModeAction action) throws IOException { 5158 if (action != SafeModeAction.SAFEMODE_GET) { 5159 checkSuperuserPrivilege(); 5160 switch(action) { 5161 case SAFEMODE_LEAVE: // leave safe mode 5162 leaveSafeMode(); 5163 break; 5164 case SAFEMODE_ENTER: // enter safe mode 5165 enterSafeMode(false); 5166 break; 5167 default: 5168 LOG.error("Unexpected safe mode action"); 5169 } 5170 } 5171 return isInSafeMode(); 5172 } 5173 5174 @Override 5175 public void checkSafeMode() { 5176 // safeMode is volatile, and may be set to null at any time 5177 SafeModeInfo safeMode = this.safeMode; 5178 if (safeMode != null) { 5179 safeMode.checkMode(); 5180 } 5181 } 5182 5183 @Override 5184 public boolean isInSafeMode() { 5185 // safeMode is volatile, and may be set to null at any time 5186 SafeModeInfo safeMode = this.safeMode; 5187 if (safeMode == null) 5188 return false; 5189 return safeMode.isOn(); 5190 } 5191 5192 @Override 5193 public boolean isInStartupSafeMode() { 5194 // safeMode is volatile, and may be set to null at any time 5195 SafeModeInfo safeMode = this.safeMode; 5196 if (safeMode == null) 5197 return false; 5198 // If the NN is in safemode, and not due to manual / low resources, we 5199 // assume it must be because of startup. If the NN had low resources during 5200 // startup, we assume it came out of startup safemode and it is now in low 5201 // resources safemode 5202 return !safeMode.isManual() && !safeMode.areResourcesLow() 5203 && safeMode.isOn(); 5204 } 5205 5206 /** 5207 * Check if replication queues are to be populated 5208 * @return true when node is HAState.Active and not in the very first safemode 5209 */ 5210 @Override 5211 public boolean isPopulatingReplQueues() { 5212 if (!shouldPopulateReplQueues()) { 5213 return false; 5214 } 5215 return initializedReplQueues; 5216 } 5217 5218 private boolean shouldPopulateReplQueues() { 5219 if(haContext == null || haContext.getState() == null) 5220 return false; 5221 return haContext.getState().shouldPopulateReplQueues(); 5222 } 5223 5224 @Override 5225 public void incrementSafeBlockCount(int replication) { 5226 // safeMode is volatile, and may be set to null at any time 5227 SafeModeInfo safeMode = this.safeMode; 5228 if (safeMode == null) 5229 return; 5230 safeMode.incrementSafeBlockCount((short)replication); 5231 } 5232 5233 @Override 5234 public void decrementSafeBlockCount(Block b) { 5235 // safeMode is volatile, and may be set to null at any time 5236 SafeModeInfo safeMode = this.safeMode; 5237 if (safeMode == null) // mostly true 5238 return; 5239 BlockInfo storedBlock = getStoredBlock(b); 5240 if (storedBlock.isComplete()) { 5241 safeMode.decrementSafeBlockCount((short)blockManager.countNodes(b).liveReplicas()); 5242 } 5243 } 5244 5245 /** 5246 * Adjust the total number of blocks safe and expected during safe mode. 5247 * If safe mode is not currently on, this is a no-op. 5248 * @param deltaSafe the change in number of safe blocks 5249 * @param deltaTotal the change i nnumber of total blocks expected 5250 */ 5251 @Override 5252 public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal) { 5253 // safeMode is volatile, and may be set to null at any time 5254 SafeModeInfo safeMode = this.safeMode; 5255 if (safeMode == null) 5256 return; 5257 safeMode.adjustBlockTotals(deltaSafe, deltaTotal); 5258 } 5259 5260 /** 5261 * Set the total number of blocks in the system. 5262 */ 5263 public void setBlockTotal() { 5264 // safeMode is volatile, and may be set to null at any time 5265 SafeModeInfo safeMode = this.safeMode; 5266 if (safeMode == null) 5267 return; 5268 safeMode.setBlockTotal((int)getCompleteBlocksTotal()); 5269 } 5270 5271 /** 5272 * Get the total number of blocks in the system. 5273 */ 5274 @Override // FSNamesystemMBean 5275 @Metric 5276 public long getBlocksTotal() { 5277 return blockManager.getTotalBlocks(); 5278 } 5279 5280 /** 5281 * Get the total number of COMPLETE blocks in the system. 5282 * For safe mode only complete blocks are counted. 5283 */ 5284 private long getCompleteBlocksTotal() { 5285 // Calculate number of blocks under construction 5286 long numUCBlocks = 0; 5287 readLock(); 5288 try { 5289 for (Lease lease : leaseManager.getSortedLeases()) { 5290 for (String path : lease.getPaths()) { 5291 final INodeFile cons; 5292 try { 5293 cons = dir.getINode(path).asFile(); 5294 Preconditions.checkState(cons.isUnderConstruction()); 5295 } catch (UnresolvedLinkException e) { 5296 throw new AssertionError("Lease files should reside on this FS"); 5297 } 5298 BlockInfo[] blocks = cons.getBlocks(); 5299 if(blocks == null) 5300 continue; 5301 for(BlockInfo b : blocks) { 5302 if(!b.isComplete()) 5303 numUCBlocks++; 5304 } 5305 } 5306 } 5307 LOG.info("Number of blocks under construction: " + numUCBlocks); 5308 return getBlocksTotal() - numUCBlocks; 5309 } finally { 5310 readUnlock(); 5311 } 5312 } 5313 5314 /** 5315 * Enter safe mode. If resourcesLow is false, then we assume it is manual 5316 * @throws IOException 5317 */ 5318 void enterSafeMode(boolean resourcesLow) throws IOException { 5319 writeLock(); 5320 try { 5321 // Stop the secret manager, since rolling the master key would 5322 // try to write to the edit log 5323 stopSecretManager(); 5324 5325 // Ensure that any concurrent operations have been fully synced 5326 // before entering safe mode. This ensures that the FSImage 5327 // is entirely stable on disk as soon as we're in safe mode. 5328 boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite(); 5329 // Before Editlog is in OpenForWrite mode, editLogStream will be null. So, 5330 // logSyncAll call can be called only when Edlitlog is in OpenForWrite mode 5331 if (isEditlogOpenForWrite) { 5332 getEditLog().logSyncAll(); 5333 } 5334 if (!isInSafeMode()) { 5335 safeMode = new SafeModeInfo(resourcesLow); 5336 return; 5337 } 5338 if (resourcesLow) { 5339 safeMode.setResourcesLow(); 5340 } else { 5341 safeMode.setManual(); 5342 } 5343 if (isEditlogOpenForWrite) { 5344 getEditLog().logSyncAll(); 5345 } 5346 NameNode.stateChangeLog.info("STATE* Safe mode is ON" 5347 + safeMode.getTurnOffTip()); 5348 } finally { 5349 writeUnlock(); 5350 } 5351 } 5352 5353 /** 5354 * Leave safe mode. 5355 * @throws IOException 5356 */ 5357 void leaveSafeMode() { 5358 writeLock(); 5359 try { 5360 if (!isInSafeMode()) { 5361 NameNode.stateChangeLog.info("STATE* Safe mode is already OFF"); 5362 return; 5363 } 5364 safeMode.leave(); 5365 } finally { 5366 writeUnlock(); 5367 } 5368 } 5369 5370 String getSafeModeTip() { 5371 readLock(); 5372 try { 5373 if (!isInSafeMode()) { 5374 return ""; 5375 } 5376 return safeMode.getTurnOffTip(); 5377 } finally { 5378 readUnlock(); 5379 } 5380 } 5381 5382 CheckpointSignature rollEditLog() throws IOException { 5383 checkSuperuserPrivilege(); 5384 checkOperation(OperationCategory.JOURNAL); 5385 writeLock(); 5386 try { 5387 checkOperation(OperationCategory.JOURNAL); 5388 checkNameNodeSafeMode("Log not rolled"); 5389 if (Server.isRpcInvocation()) { 5390 LOG.info("Roll Edit Log from " + Server.getRemoteAddress()); 5391 } 5392 return getFSImage().rollEditLog(); 5393 } finally { 5394 writeUnlock(); 5395 } 5396 } 5397 5398 NamenodeCommand startCheckpoint(NamenodeRegistration backupNode, 5399 NamenodeRegistration activeNamenode) throws IOException { 5400 checkOperation(OperationCategory.CHECKPOINT); 5401 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache, 5402 null); 5403 if (cacheEntry != null && cacheEntry.isSuccess()) { 5404 return (NamenodeCommand) cacheEntry.getPayload(); 5405 } 5406 writeLock(); 5407 NamenodeCommand cmd = null; 5408 try { 5409 checkOperation(OperationCategory.CHECKPOINT); 5410 checkNameNodeSafeMode("Checkpoint not started"); 5411 5412 LOG.info("Start checkpoint for " + backupNode.getAddress()); 5413 cmd = getFSImage().startCheckpoint(backupNode, activeNamenode); 5414 getEditLog().logSync(); 5415 return cmd; 5416 } finally { 5417 writeUnlock(); 5418 RetryCache.setState(cacheEntry, cmd != null, cmd); 5419 } 5420 } 5421 5422 public void processIncrementalBlockReport(final DatanodeID nodeID, 5423 final String poolId, final StorageReceivedDeletedBlocks srdb) 5424 throws IOException { 5425 writeLock(); 5426 try { 5427 blockManager.processIncrementalBlockReport(nodeID, srdb); 5428 } finally { 5429 writeUnlock(); 5430 } 5431 } 5432 5433 void endCheckpoint(NamenodeRegistration registration, 5434 CheckpointSignature sig) throws IOException { 5435 checkOperation(OperationCategory.CHECKPOINT); 5436 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 5437 if (cacheEntry != null && cacheEntry.isSuccess()) { 5438 return; // Return previous response 5439 } 5440 boolean success = false; 5441 readLock(); 5442 try { 5443 checkOperation(OperationCategory.CHECKPOINT); 5444 5445 checkNameNodeSafeMode("Checkpoint not ended"); 5446 LOG.info("End checkpoint for " + registration.getAddress()); 5447 getFSImage().endCheckpoint(sig); 5448 success = true; 5449 } finally { 5450 readUnlock(); 5451 RetryCache.setState(cacheEntry, success); 5452 } 5453 } 5454 5455 PermissionStatus createFsOwnerPermissions(FsPermission permission) { 5456 return new PermissionStatus(fsOwner.getShortUserName(), supergroup, permission); 5457 } 5458 5459 private void checkOwner(FSPermissionChecker pc, String path) 5460 throws AccessControlException, UnresolvedLinkException { 5461 checkPermission(pc, path, true, null, null, null, null); 5462 } 5463 5464 private void checkPathAccess(FSPermissionChecker pc, 5465 String path, FsAction access) throws AccessControlException, 5466 UnresolvedLinkException { 5467 checkPermission(pc, path, false, null, null, access, null); 5468 } 5469 5470 private void checkParentAccess(FSPermissionChecker pc, 5471 String path, FsAction access) throws AccessControlException, 5472 UnresolvedLinkException { 5473 checkPermission(pc, path, false, null, access, null, null); 5474 } 5475 5476 private void checkAncestorAccess(FSPermissionChecker pc, 5477 String path, FsAction access) throws AccessControlException, 5478 UnresolvedLinkException { 5479 checkPermission(pc, path, false, access, null, null, null); 5480 } 5481 5482 private void checkTraverse(FSPermissionChecker pc, String path) 5483 throws AccessControlException, UnresolvedLinkException { 5484 checkPermission(pc, path, false, null, null, null, null); 5485 } 5486 5487 @Override 5488 public void checkSuperuserPrivilege() 5489 throws AccessControlException { 5490 if (isPermissionEnabled) { 5491 FSPermissionChecker pc = getPermissionChecker(); 5492 pc.checkSuperuserPrivilege(); 5493 } 5494 } 5495 5496 /** 5497 * Check whether current user have permissions to access the path. For more 5498 * details of the parameters, see 5499 * {@link FSPermissionChecker#checkPermission()}. 5500 */ 5501 private void checkPermission(FSPermissionChecker pc, 5502 String path, boolean doCheckOwner, FsAction ancestorAccess, 5503 FsAction parentAccess, FsAction access, FsAction subAccess) 5504 throws AccessControlException, UnresolvedLinkException { 5505 checkPermission(pc, path, doCheckOwner, ancestorAccess, 5506 parentAccess, access, subAccess, true); 5507 } 5508 5509 /** 5510 * Check whether current user have permissions to access the path. For more 5511 * details of the parameters, see 5512 * {@link FSPermissionChecker#checkPermission()}. 5513 */ 5514 private void checkPermission(FSPermissionChecker pc, 5515 String path, boolean doCheckOwner, FsAction ancestorAccess, 5516 FsAction parentAccess, FsAction access, FsAction subAccess, 5517 boolean resolveLink) 5518 throws AccessControlException, UnresolvedLinkException { 5519 if (!pc.isSuperUser()) { 5520 dir.waitForReady(); 5521 readLock(); 5522 try { 5523 pc.checkPermission(path, dir.rootDir, doCheckOwner, ancestorAccess, 5524 parentAccess, access, subAccess, resolveLink); 5525 } finally { 5526 readUnlock(); 5527 } 5528 } 5529 } 5530 5531 /** 5532 * Check to see if we have exceeded the limit on the number 5533 * of inodes. 5534 */ 5535 void checkFsObjectLimit() throws IOException { 5536 if (maxFsObjects != 0 && 5537 maxFsObjects <= dir.totalInodes() + getBlocksTotal()) { 5538 throw new IOException("Exceeded the configured number of objects " + 5539 maxFsObjects + " in the filesystem."); 5540 } 5541 } 5542 5543 /** 5544 * Get the total number of objects in the system. 5545 */ 5546 @Override // FSNamesystemMBean 5547 public long getMaxObjects() { 5548 return maxFsObjects; 5549 } 5550 5551 @Override // FSNamesystemMBean 5552 @Metric 5553 public long getFilesTotal() { 5554 readLock(); 5555 try { 5556 return this.dir.totalInodes(); 5557 } finally { 5558 readUnlock(); 5559 } 5560 } 5561 5562 @Override // FSNamesystemMBean 5563 @Metric 5564 public long getPendingReplicationBlocks() { 5565 return blockManager.getPendingReplicationBlocksCount(); 5566 } 5567 5568 @Override // FSNamesystemMBean 5569 @Metric 5570 public long getUnderReplicatedBlocks() { 5571 return blockManager.getUnderReplicatedBlocksCount(); 5572 } 5573 5574 /** Returns number of blocks with corrupt replicas */ 5575 @Metric({"CorruptBlocks", "Number of blocks with corrupt replicas"}) 5576 public long getCorruptReplicaBlocks() { 5577 return blockManager.getCorruptReplicaBlocksCount(); 5578 } 5579 5580 @Override // FSNamesystemMBean 5581 @Metric 5582 public long getScheduledReplicationBlocks() { 5583 return blockManager.getScheduledReplicationBlocksCount(); 5584 } 5585 5586 @Override 5587 @Metric 5588 public long getPendingDeletionBlocks() { 5589 return blockManager.getPendingDeletionBlocksCount(); 5590 } 5591 5592 @Metric 5593 public long getExcessBlocks() { 5594 return blockManager.getExcessBlocksCount(); 5595 } 5596 5597 // HA-only metric 5598 @Metric 5599 public long getPostponedMisreplicatedBlocks() { 5600 return blockManager.getPostponedMisreplicatedBlocksCount(); 5601 } 5602 5603 // HA-only metric 5604 @Metric 5605 public int getPendingDataNodeMessageCount() { 5606 return blockManager.getPendingDataNodeMessageCount(); 5607 } 5608 5609 // HA-only metric 5610 @Metric 5611 public String getHAState() { 5612 return haContext.getState().toString(); 5613 } 5614 5615 // HA-only metric 5616 @Metric 5617 public long getMillisSinceLastLoadedEdits() { 5618 if (isInStandbyState() && editLogTailer != null) { 5619 return now() - editLogTailer.getLastLoadTimestamp(); 5620 } else { 5621 return 0; 5622 } 5623 } 5624 5625 @Metric 5626 public int getBlockCapacity() { 5627 return blockManager.getCapacity(); 5628 } 5629 5630 @Override // FSNamesystemMBean 5631 public String getFSState() { 5632 return isInSafeMode() ? "safeMode" : "Operational"; 5633 } 5634 5635 private ObjectName mbeanName; 5636 private ObjectName mxbeanName; 5637 5638 /** 5639 * Register the FSNamesystem MBean using the name 5640 * "hadoop:service=NameNode,name=FSNamesystemState" 5641 */ 5642 private void registerMBean() { 5643 // We can only implement one MXBean interface, so we keep the old one. 5644 try { 5645 StandardMBean bean = new StandardMBean(this, FSNamesystemMBean.class); 5646 mbeanName = MBeans.register("NameNode", "FSNamesystemState", bean); 5647 } catch (NotCompliantMBeanException e) { 5648 throw new RuntimeException("Bad MBean setup", e); 5649 } 5650 5651 LOG.info("Registered FSNamesystemState MBean"); 5652 } 5653 5654 /** 5655 * shutdown FSNamesystem 5656 */ 5657 void shutdown() { 5658 if (mbeanName != null) { 5659 MBeans.unregister(mbeanName); 5660 mbeanName = null; 5661 } 5662 if (mxbeanName != null) { 5663 MBeans.unregister(mxbeanName); 5664 mxbeanName = null; 5665 } 5666 if (dir != null) { 5667 dir.shutdown(); 5668 } 5669 if (blockManager != null) { 5670 blockManager.shutdown(); 5671 } 5672 } 5673 5674 5675 @Override // FSNamesystemMBean 5676 public int getNumLiveDataNodes() { 5677 return getBlockManager().getDatanodeManager().getNumLiveDataNodes(); 5678 } 5679 5680 @Override // FSNamesystemMBean 5681 public int getNumDeadDataNodes() { 5682 return getBlockManager().getDatanodeManager().getNumDeadDataNodes(); 5683 } 5684 5685 @Override // FSNamesystemMBean 5686 public int getNumDecomLiveDataNodes() { 5687 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 5688 getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true); 5689 int liveDecommissioned = 0; 5690 for (DatanodeDescriptor node : live) { 5691 liveDecommissioned += node.isDecommissioned() ? 1 : 0; 5692 } 5693 return liveDecommissioned; 5694 } 5695 5696 @Override // FSNamesystemMBean 5697 public int getNumDecomDeadDataNodes() { 5698 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>(); 5699 getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, true); 5700 int deadDecommissioned = 0; 5701 for (DatanodeDescriptor node : dead) { 5702 deadDecommissioned += node.isDecommissioned() ? 1 : 0; 5703 } 5704 return deadDecommissioned; 5705 } 5706 5707 @Override // FSNamesystemMBean 5708 public int getNumDecommissioningDataNodes() { 5709 return getBlockManager().getDatanodeManager().getDecommissioningNodes() 5710 .size(); 5711 } 5712 5713 @Override // FSNamesystemMBean 5714 @Metric({"StaleDataNodes", 5715 "Number of datanodes marked stale due to delayed heartbeat"}) 5716 public int getNumStaleDataNodes() { 5717 return getBlockManager().getDatanodeManager().getNumStaleNodes(); 5718 } 5719 5720 /** 5721 * Sets the current generation stamp for legacy blocks 5722 */ 5723 void setGenerationStampV1(long stamp) { 5724 generationStampV1.setCurrentValue(stamp); 5725 } 5726 5727 /** 5728 * Gets the current generation stamp for legacy blocks 5729 */ 5730 long getGenerationStampV1() { 5731 return generationStampV1.getCurrentValue(); 5732 } 5733 5734 /** 5735 * Gets the current generation stamp for this filesystem 5736 */ 5737 void setGenerationStampV2(long stamp) { 5738 generationStampV2.setCurrentValue(stamp); 5739 } 5740 5741 /** 5742 * Gets the current generation stamp for this filesystem 5743 */ 5744 long getGenerationStampV2() { 5745 return generationStampV2.getCurrentValue(); 5746 } 5747 5748 /** 5749 * Upgrades the generation stamp for the filesystem 5750 * by reserving a sufficient range for all existing blocks. 5751 * Should be invoked only during the first upgrade to 5752 * sequential block IDs. 5753 */ 5754 long upgradeGenerationStampToV2() { 5755 Preconditions.checkState(generationStampV2.getCurrentValue() == 5756 GenerationStamp.LAST_RESERVED_STAMP); 5757 5758 generationStampV2.skipTo( 5759 generationStampV1.getCurrentValue() + 5760 HdfsConstants.RESERVED_GENERATION_STAMPS_V1); 5761 5762 generationStampV1Limit = generationStampV2.getCurrentValue(); 5763 return generationStampV2.getCurrentValue(); 5764 } 5765 5766 /** 5767 * Sets the generation stamp that delineates random and sequentially 5768 * allocated block IDs. 5769 * @param stamp 5770 */ 5771 void setGenerationStampV1Limit(long stamp) { 5772 Preconditions.checkState(generationStampV1Limit == 5773 GenerationStamp.GRANDFATHER_GENERATION_STAMP); 5774 generationStampV1Limit = stamp; 5775 } 5776 5777 /** 5778 * Gets the value of the generation stamp that delineates sequential 5779 * and random block IDs. 5780 */ 5781 long getGenerationStampAtblockIdSwitch() { 5782 return generationStampV1Limit; 5783 } 5784 5785 @VisibleForTesting 5786 SequentialBlockIdGenerator getBlockIdGenerator() { 5787 return blockIdGenerator; 5788 } 5789 5790 /** 5791 * Sets the maximum allocated block ID for this filesystem. This is 5792 * the basis for allocating new block IDs. 5793 */ 5794 void setLastAllocatedBlockId(long blockId) { 5795 blockIdGenerator.skipTo(blockId); 5796 } 5797 5798 /** 5799 * Gets the maximum sequentially allocated block ID for this filesystem 5800 */ 5801 long getLastAllocatedBlockId() { 5802 return blockIdGenerator.getCurrentValue(); 5803 } 5804 5805 /** 5806 * Increments, logs and then returns the stamp 5807 */ 5808 long nextGenerationStamp(boolean legacyBlock) 5809 throws IOException, SafeModeException { 5810 assert hasWriteLock(); 5811 checkNameNodeSafeMode("Cannot get next generation stamp"); 5812 5813 long gs; 5814 if (legacyBlock) { 5815 gs = getNextGenerationStampV1(); 5816 getEditLog().logGenerationStampV1(gs); 5817 } else { 5818 gs = getNextGenerationStampV2(); 5819 getEditLog().logGenerationStampV2(gs); 5820 } 5821 5822 // NB: callers sync the log 5823 return gs; 5824 } 5825 5826 @VisibleForTesting 5827 long getNextGenerationStampV1() throws IOException { 5828 long genStampV1 = generationStampV1.nextValue(); 5829 5830 if (genStampV1 >= generationStampV1Limit) { 5831 // We ran out of generation stamps for legacy blocks. In practice, it 5832 // is extremely unlikely as we reserved 1T v1 generation stamps. The 5833 // result is that we can no longer append to the legacy blocks that 5834 // were created before the upgrade to sequential block IDs. 5835 throw new OutOfV1GenerationStampsException(); 5836 } 5837 5838 return genStampV1; 5839 } 5840 5841 @VisibleForTesting 5842 long getNextGenerationStampV2() { 5843 return generationStampV2.nextValue(); 5844 } 5845 5846 long getGenerationStampV1Limit() { 5847 return generationStampV1Limit; 5848 } 5849 5850 /** 5851 * Determine whether the block ID was randomly generated (legacy) or 5852 * sequentially generated. The generation stamp value is used to 5853 * make the distinction. 5854 * @param block 5855 * @return true if the block ID was randomly generated, false otherwise. 5856 */ 5857 boolean isLegacyBlock(Block block) { 5858 return block.getGenerationStamp() < getGenerationStampV1Limit(); 5859 } 5860 5861 /** 5862 * Increments, logs and then returns the block ID 5863 */ 5864 private long nextBlockId() throws IOException { 5865 assert hasWriteLock(); 5866 checkNameNodeSafeMode("Cannot get next block ID"); 5867 final long blockId = blockIdGenerator.nextValue(); 5868 getEditLog().logAllocateBlockId(blockId); 5869 // NB: callers sync the log 5870 return blockId; 5871 } 5872 5873 private INodeFile checkUCBlock(ExtendedBlock block, 5874 String clientName) throws IOException { 5875 assert hasWriteLock(); 5876 checkNameNodeSafeMode("Cannot get a new generation stamp and an " 5877 + "access token for block " + block); 5878 5879 // check stored block state 5880 BlockInfo storedBlock = getStoredBlock(ExtendedBlock.getLocalBlock(block)); 5881 if (storedBlock == null || 5882 storedBlock.getBlockUCState() != BlockUCState.UNDER_CONSTRUCTION) { 5883 throw new IOException(block + 5884 " does not exist or is not under Construction" + storedBlock); 5885 } 5886 5887 // check file inode 5888 final INodeFile file = ((INode)storedBlock.getBlockCollection()).asFile(); 5889 if (file == null || !file.isUnderConstruction()) { 5890 throw new IOException("The file " + storedBlock + 5891 " belonged to does not exist or it is not under construction."); 5892 } 5893 5894 // check lease 5895 if (clientName == null 5896 || !clientName.equals(file.getFileUnderConstructionFeature() 5897 .getClientName())) { 5898 throw new LeaseExpiredException("Lease mismatch: " + block + 5899 " is accessed by a non lease holder " + clientName); 5900 } 5901 5902 return file; 5903 } 5904 5905 /** 5906 * Client is reporting some bad block locations. 5907 */ 5908 void reportBadBlocks(LocatedBlock[] blocks) throws IOException { 5909 checkOperation(OperationCategory.WRITE); 5910 NameNode.stateChangeLog.info("*DIR* reportBadBlocks"); 5911 writeLock(); 5912 try { 5913 checkOperation(OperationCategory.WRITE); 5914 for (int i = 0; i < blocks.length; i++) { 5915 ExtendedBlock blk = blocks[i].getBlock(); 5916 DatanodeInfo[] nodes = blocks[i].getLocations(); 5917 String[] storageIDs = blocks[i].getStorageIDs(); 5918 for (int j = 0; j < nodes.length; j++) { 5919 blockManager.findAndMarkBlockAsCorrupt(blk, nodes[j], 5920 storageIDs == null ? null: storageIDs[j], 5921 "client machine reported it"); 5922 } 5923 } 5924 } finally { 5925 writeUnlock(); 5926 } 5927 } 5928 5929 /** 5930 * Get a new generation stamp together with an access token for 5931 * a block under construction 5932 * 5933 * This method is called for recovering a failed pipeline or setting up 5934 * a pipeline to append to a block. 5935 * 5936 * @param block a block 5937 * @param clientName the name of a client 5938 * @return a located block with a new generation stamp and an access token 5939 * @throws IOException if any error occurs 5940 */ 5941 LocatedBlock updateBlockForPipeline(ExtendedBlock block, 5942 String clientName) throws IOException { 5943 LocatedBlock locatedBlock; 5944 checkOperation(OperationCategory.WRITE); 5945 writeLock(); 5946 try { 5947 checkOperation(OperationCategory.WRITE); 5948 5949 // check vadility of parameters 5950 checkUCBlock(block, clientName); 5951 5952 // get a new generation stamp and an access token 5953 block.setGenerationStamp( 5954 nextGenerationStamp(isLegacyBlock(block.getLocalBlock()))); 5955 locatedBlock = new LocatedBlock(block, new DatanodeInfo[0]); 5956 blockManager.setBlockToken(locatedBlock, AccessMode.WRITE); 5957 } finally { 5958 writeUnlock(); 5959 } 5960 // Ensure we record the new generation stamp 5961 getEditLog().logSync(); 5962 return locatedBlock; 5963 } 5964 5965 /** 5966 * Update a pipeline for a block under construction 5967 * 5968 * @param clientName the name of the client 5969 * @param oldBlock and old block 5970 * @param newBlock a new block with a new generation stamp and length 5971 * @param newNodes datanodes in the pipeline 5972 * @throws IOException if any error occurs 5973 */ 5974 void updatePipeline(String clientName, ExtendedBlock oldBlock, 5975 ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs) 5976 throws IOException { 5977 checkOperation(OperationCategory.WRITE); 5978 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 5979 if (cacheEntry != null && cacheEntry.isSuccess()) { 5980 return; // Return previous response 5981 } 5982 LOG.info("updatePipeline(block=" + oldBlock 5983 + ", newGenerationStamp=" + newBlock.getGenerationStamp() 5984 + ", newLength=" + newBlock.getNumBytes() 5985 + ", newNodes=" + Arrays.asList(newNodes) 5986 + ", clientName=" + clientName 5987 + ")"); 5988 writeLock(); 5989 boolean success = false; 5990 try { 5991 checkOperation(OperationCategory.WRITE); 5992 checkNameNodeSafeMode("Pipeline not updated"); 5993 assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and " 5994 + oldBlock + " has different block identifier"; 5995 updatePipelineInternal(clientName, oldBlock, newBlock, newNodes, 5996 newStorageIDs, cacheEntry != null); 5997 success = true; 5998 } finally { 5999 writeUnlock(); 6000 RetryCache.setState(cacheEntry, success); 6001 } 6002 getEditLog().logSync(); 6003 LOG.info("updatePipeline(" + oldBlock + ") successfully to " + newBlock); 6004 } 6005 6006 /** @see #updatePipeline(String, ExtendedBlock, ExtendedBlock, DatanodeID[]) */ 6007 private void updatePipelineInternal(String clientName, ExtendedBlock oldBlock, 6008 ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs, 6009 boolean logRetryCache) 6010 throws IOException { 6011 assert hasWriteLock(); 6012 // check the vadility of the block and lease holder name 6013 final INodeFile pendingFile = checkUCBlock(oldBlock, clientName); 6014 final BlockInfoUnderConstruction blockinfo 6015 = (BlockInfoUnderConstruction)pendingFile.getLastBlock(); 6016 6017 // check new GS & length: this is not expected 6018 if (newBlock.getGenerationStamp() <= blockinfo.getGenerationStamp() || 6019 newBlock.getNumBytes() < blockinfo.getNumBytes()) { 6020 String msg = "Update " + oldBlock + " (len = " + 6021 blockinfo.getNumBytes() + ") to an older state: " + newBlock + 6022 " (len = " + newBlock.getNumBytes() +")"; 6023 LOG.warn(msg); 6024 throw new IOException(msg); 6025 } 6026 6027 // Update old block with the new generation stamp and new length 6028 blockinfo.setNumBytes(newBlock.getNumBytes()); 6029 blockinfo.setGenerationStampAndVerifyReplicas(newBlock.getGenerationStamp()); 6030 6031 // find the DatanodeDescriptor objects 6032 final DatanodeStorageInfo[] storages = blockManager.getDatanodeManager() 6033 .getDatanodeStorageInfos(newNodes, newStorageIDs); 6034 blockinfo.setExpectedLocations(storages); 6035 6036 String src = pendingFile.getFullPathName(); 6037 dir.persistBlocks(src, pendingFile, logRetryCache); 6038 } 6039 6040 // rename was successful. If any part of the renamed subtree had 6041 // files that were being written to, update with new filename. 6042 void unprotectedChangeLease(String src, String dst) { 6043 assert hasWriteLock(); 6044 leaseManager.changeLease(src, dst); 6045 } 6046 6047 /** 6048 * @return all the under-construction files in the lease map 6049 */ 6050 Map<String, INodeFile> getFilesUnderConstruction() { 6051 synchronized (leaseManager) { 6052 return leaseManager.getINodesUnderConstruction(); 6053 } 6054 } 6055 6056 /** 6057 * Register a Backup name-node, verifying that it belongs 6058 * to the correct namespace, and adding it to the set of 6059 * active journals if necessary. 6060 * 6061 * @param bnReg registration of the new BackupNode 6062 * @param nnReg registration of this NameNode 6063 * @throws IOException if the namespace IDs do not match 6064 */ 6065 void registerBackupNode(NamenodeRegistration bnReg, 6066 NamenodeRegistration nnReg) throws IOException { 6067 writeLock(); 6068 try { 6069 if(getFSImage().getStorage().getNamespaceID() 6070 != bnReg.getNamespaceID()) 6071 throw new IOException("Incompatible namespaceIDs: " 6072 + " Namenode namespaceID = " 6073 + getFSImage().getStorage().getNamespaceID() + "; " 6074 + bnReg.getRole() + 6075 " node namespaceID = " + bnReg.getNamespaceID()); 6076 if (bnReg.getRole() == NamenodeRole.BACKUP) { 6077 getFSImage().getEditLog().registerBackupNode( 6078 bnReg, nnReg); 6079 } 6080 } finally { 6081 writeUnlock(); 6082 } 6083 } 6084 6085 /** 6086 * Release (unregister) backup node. 6087 * <p> 6088 * Find and remove the backup stream corresponding to the node. 6089 * @param registration 6090 * @throws IOException 6091 */ 6092 void releaseBackupNode(NamenodeRegistration registration) 6093 throws IOException { 6094 checkOperation(OperationCategory.WRITE); 6095 writeLock(); 6096 try { 6097 checkOperation(OperationCategory.WRITE); 6098 if(getFSImage().getStorage().getNamespaceID() 6099 != registration.getNamespaceID()) 6100 throw new IOException("Incompatible namespaceIDs: " 6101 + " Namenode namespaceID = " 6102 + getFSImage().getStorage().getNamespaceID() + "; " 6103 + registration.getRole() + 6104 " node namespaceID = " + registration.getNamespaceID()); 6105 getEditLog().releaseBackupStream(registration); 6106 } finally { 6107 writeUnlock(); 6108 } 6109 } 6110 6111 static class CorruptFileBlockInfo { 6112 final String path; 6113 final Block block; 6114 6115 public CorruptFileBlockInfo(String p, Block b) { 6116 path = p; 6117 block = b; 6118 } 6119 6120 @Override 6121 public String toString() { 6122 return block.getBlockName() + "\t" + path; 6123 } 6124 } 6125 /** 6126 * @param path Restrict corrupt files to this portion of namespace. 6127 * @param startBlockAfter Support for continuation; the set of files we return 6128 * back is ordered by blockid; startBlockAfter tells where to start from 6129 * @return a list in which each entry describes a corrupt file/block 6130 * @throws AccessControlException 6131 * @throws IOException 6132 */ 6133 Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path, 6134 String[] cookieTab) throws IOException { 6135 checkSuperuserPrivilege(); 6136 checkOperation(OperationCategory.READ); 6137 readLock(); 6138 try { 6139 checkOperation(OperationCategory.READ); 6140 if (!isPopulatingReplQueues()) { 6141 throw new IOException("Cannot run listCorruptFileBlocks because " + 6142 "replication queues have not been initialized."); 6143 } 6144 // print a limited # of corrupt files per call 6145 int count = 0; 6146 ArrayList<CorruptFileBlockInfo> corruptFiles = new ArrayList<CorruptFileBlockInfo>(); 6147 6148 final Iterator<Block> blkIterator = blockManager.getCorruptReplicaBlockIterator(); 6149 6150 if (cookieTab == null) { 6151 cookieTab = new String[] { null }; 6152 } 6153 int skip = getIntCookie(cookieTab[0]); 6154 for (int i = 0; i < skip && blkIterator.hasNext(); i++) { 6155 blkIterator.next(); 6156 } 6157 6158 while (blkIterator.hasNext()) { 6159 Block blk = blkIterator.next(); 6160 final INode inode = (INode)blockManager.getBlockCollection(blk); 6161 skip++; 6162 if (inode != null && blockManager.countNodes(blk).liveReplicas() == 0) { 6163 String src = FSDirectory.getFullPathName(inode); 6164 if (src.startsWith(path)){ 6165 corruptFiles.add(new CorruptFileBlockInfo(src, blk)); 6166 count++; 6167 if (count >= DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED) 6168 break; 6169 } 6170 } 6171 } 6172 cookieTab[0] = String.valueOf(skip); 6173 LOG.info("list corrupt file blocks returned: " + count); 6174 return corruptFiles; 6175 } finally { 6176 readUnlock(); 6177 } 6178 } 6179 6180 /** 6181 * Convert string cookie to integer. 6182 */ 6183 private static int getIntCookie(String cookie){ 6184 int c; 6185 if(cookie == null){ 6186 c = 0; 6187 } else { 6188 try{ 6189 c = Integer.parseInt(cookie); 6190 }catch (NumberFormatException e) { 6191 c = 0; 6192 } 6193 } 6194 c = Math.max(0, c); 6195 return c; 6196 } 6197 6198 /** 6199 * Create delegation token secret manager 6200 */ 6201 private DelegationTokenSecretManager createDelegationTokenSecretManager( 6202 Configuration conf) { 6203 return new DelegationTokenSecretManager(conf.getLong( 6204 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY, 6205 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT), 6206 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, 6207 DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT), 6208 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, 6209 DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT), 6210 DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL, 6211 conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY, 6212 DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT), 6213 this); 6214 } 6215 6216 /** 6217 * Returns the DelegationTokenSecretManager instance in the namesystem. 6218 * @return delegation token secret manager object 6219 */ 6220 DelegationTokenSecretManager getDelegationTokenSecretManager() { 6221 return dtSecretManager; 6222 } 6223 6224 /** 6225 * @param renewer 6226 * @return Token<DelegationTokenIdentifier> 6227 * @throws IOException 6228 */ 6229 Token<DelegationTokenIdentifier> getDelegationToken(Text renewer) 6230 throws IOException { 6231 Token<DelegationTokenIdentifier> token; 6232 checkOperation(OperationCategory.WRITE); 6233 writeLock(); 6234 try { 6235 checkOperation(OperationCategory.WRITE); 6236 checkNameNodeSafeMode("Cannot issue delegation token"); 6237 if (!isAllowedDelegationTokenOp()) { 6238 throw new IOException( 6239 "Delegation Token can be issued only with kerberos or web authentication"); 6240 } 6241 if (dtSecretManager == null || !dtSecretManager.isRunning()) { 6242 LOG.warn("trying to get DT with no secret manager running"); 6243 return null; 6244 } 6245 6246 UserGroupInformation ugi = getRemoteUser(); 6247 String user = ugi.getUserName(); 6248 Text owner = new Text(user); 6249 Text realUser = null; 6250 if (ugi.getRealUser() != null) { 6251 realUser = new Text(ugi.getRealUser().getUserName()); 6252 } 6253 DelegationTokenIdentifier dtId = new DelegationTokenIdentifier(owner, 6254 renewer, realUser); 6255 token = new Token<DelegationTokenIdentifier>( 6256 dtId, dtSecretManager); 6257 long expiryTime = dtSecretManager.getTokenExpiryTime(dtId); 6258 getEditLog().logGetDelegationToken(dtId, expiryTime); 6259 } finally { 6260 writeUnlock(); 6261 } 6262 getEditLog().logSync(); 6263 return token; 6264 } 6265 6266 /** 6267 * 6268 * @param token 6269 * @return New expiryTime of the token 6270 * @throws InvalidToken 6271 * @throws IOException 6272 */ 6273 long renewDelegationToken(Token<DelegationTokenIdentifier> token) 6274 throws InvalidToken, IOException { 6275 long expiryTime; 6276 checkOperation(OperationCategory.WRITE); 6277 writeLock(); 6278 try { 6279 checkOperation(OperationCategory.WRITE); 6280 6281 checkNameNodeSafeMode("Cannot renew delegation token"); 6282 if (!isAllowedDelegationTokenOp()) { 6283 throw new IOException( 6284 "Delegation Token can be renewed only with kerberos or web authentication"); 6285 } 6286 String renewer = getRemoteUser().getShortUserName(); 6287 expiryTime = dtSecretManager.renewToken(token, renewer); 6288 DelegationTokenIdentifier id = new DelegationTokenIdentifier(); 6289 ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); 6290 DataInputStream in = new DataInputStream(buf); 6291 id.readFields(in); 6292 getEditLog().logRenewDelegationToken(id, expiryTime); 6293 } finally { 6294 writeUnlock(); 6295 } 6296 getEditLog().logSync(); 6297 return expiryTime; 6298 } 6299 6300 /** 6301 * 6302 * @param token 6303 * @throws IOException 6304 */ 6305 void cancelDelegationToken(Token<DelegationTokenIdentifier> token) 6306 throws IOException { 6307 checkOperation(OperationCategory.WRITE); 6308 writeLock(); 6309 try { 6310 checkOperation(OperationCategory.WRITE); 6311 6312 checkNameNodeSafeMode("Cannot cancel delegation token"); 6313 String canceller = getRemoteUser().getUserName(); 6314 DelegationTokenIdentifier id = dtSecretManager 6315 .cancelToken(token, canceller); 6316 getEditLog().logCancelDelegationToken(id); 6317 } finally { 6318 writeUnlock(); 6319 } 6320 getEditLog().logSync(); 6321 } 6322 6323 SecretManagerState saveSecretManagerState() { 6324 return dtSecretManager.saveSecretManagerState(); 6325 } 6326 6327 /** 6328 * @param in load the state of secret manager from input stream 6329 */ 6330 void loadSecretManagerStateCompat(DataInput in) throws IOException { 6331 dtSecretManager.loadSecretManagerStateCompat(in); 6332 } 6333 6334 void loadSecretManagerState(SecretManagerSection s, 6335 List<SecretManagerSection.DelegationKey> keys, 6336 List<SecretManagerSection.PersistToken> tokens) throws IOException { 6337 dtSecretManager.loadSecretManagerState(new SecretManagerState(s, keys, tokens)); 6338 } 6339 6340 /** 6341 * Log the updateMasterKey operation to edit logs 6342 * 6343 * @param key new delegation key. 6344 */ 6345 public void logUpdateMasterKey(DelegationKey key) { 6346 6347 assert !isInSafeMode() : 6348 "this should never be called while in safemode, since we stop " + 6349 "the DT manager before entering safemode!"; 6350 // No need to hold FSN lock since we don't access any internal 6351 // structures, and this is stopped before the FSN shuts itself 6352 // down, etc. 6353 getEditLog().logUpdateMasterKey(key); 6354 getEditLog().logSync(); 6355 } 6356 6357 /** 6358 * Log the cancellation of expired tokens to edit logs 6359 * 6360 * @param id token identifier to cancel 6361 */ 6362 public void logExpireDelegationToken(DelegationTokenIdentifier id) { 6363 assert !isInSafeMode() : 6364 "this should never be called while in safemode, since we stop " + 6365 "the DT manager before entering safemode!"; 6366 // No need to hold FSN lock since we don't access any internal 6367 // structures, and this is stopped before the FSN shuts itself 6368 // down, etc. 6369 getEditLog().logCancelDelegationToken(id); 6370 } 6371 6372 private void logReassignLease(String leaseHolder, String src, 6373 String newHolder) { 6374 assert hasWriteLock(); 6375 getEditLog().logReassignLease(leaseHolder, src, newHolder); 6376 } 6377 6378 /** 6379 * 6380 * @return true if delegation token operation is allowed 6381 */ 6382 private boolean isAllowedDelegationTokenOp() throws IOException { 6383 return !UserGroupInformation.isSecurityEnabled() 6384 || getConnectionAuthenticationMethod().allowsDelegation(); 6385 } 6386 6387 /** 6388 * Returns authentication method used to establish the connection 6389 * @return AuthenticationMethod used to establish connection 6390 * @throws IOException 6391 */ 6392 private AuthenticationMethod getConnectionAuthenticationMethod() 6393 throws IOException { 6394 UserGroupInformation ugi = getRemoteUser(); 6395 AuthenticationMethod authMethod = ugi.getAuthenticationMethod(); 6396 if (authMethod == AuthenticationMethod.PROXY) { 6397 authMethod = ugi.getRealUser().getAuthenticationMethod(); 6398 } 6399 return authMethod; 6400 } 6401 6402 /** 6403 * Client invoked methods are invoked over RPC and will be in 6404 * RPC call context even if the client exits. 6405 */ 6406 private boolean isExternalInvocation() { 6407 return Server.isRpcInvocation() || NamenodeWebHdfsMethods.isWebHdfsInvocation(); 6408 } 6409 6410 private static InetAddress getRemoteIp() { 6411 InetAddress ip = Server.getRemoteIp(); 6412 if (ip != null) { 6413 return ip; 6414 } 6415 return NamenodeWebHdfsMethods.getRemoteIp(); 6416 } 6417 6418 // optimize ugi lookup for RPC operations to avoid a trip through 6419 // UGI.getCurrentUser which is synch'ed 6420 private static UserGroupInformation getRemoteUser() throws IOException { 6421 return NameNode.getRemoteUser(); 6422 } 6423 6424 /** 6425 * Log fsck event in the audit log 6426 */ 6427 void logFsckEvent(String src, InetAddress remoteAddress) throws IOException { 6428 if (isAuditEnabled()) { 6429 logAuditEvent(true, getRemoteUser(), 6430 remoteAddress, 6431 "fsck", src, null, null); 6432 } 6433 } 6434 /** 6435 * Register NameNodeMXBean 6436 */ 6437 private void registerMXBean() { 6438 mxbeanName = MBeans.register("NameNode", "NameNodeInfo", this); 6439 } 6440 6441 /** 6442 * Class representing Namenode information for JMX interfaces 6443 */ 6444 @Override // NameNodeMXBean 6445 public String getVersion() { 6446 return VersionInfo.getVersion() + ", r" + VersionInfo.getRevision(); 6447 } 6448 6449 @Override // NameNodeMXBean 6450 public long getUsed() { 6451 return this.getCapacityUsed(); 6452 } 6453 6454 @Override // NameNodeMXBean 6455 public long getFree() { 6456 return this.getCapacityRemaining(); 6457 } 6458 6459 @Override // NameNodeMXBean 6460 public long getTotal() { 6461 return this.getCapacityTotal(); 6462 } 6463 6464 @Override // NameNodeMXBean 6465 public String getSafemode() { 6466 if (!this.isInSafeMode()) 6467 return ""; 6468 return "Safe mode is ON. " + this.getSafeModeTip(); 6469 } 6470 6471 @Override // NameNodeMXBean 6472 public boolean isUpgradeFinalized() { 6473 return this.getFSImage().isUpgradeFinalized(); 6474 } 6475 6476 @Override // NameNodeMXBean 6477 public long getNonDfsUsedSpace() { 6478 return datanodeStatistics.getCapacityUsedNonDFS(); 6479 } 6480 6481 @Override // NameNodeMXBean 6482 public float getPercentUsed() { 6483 return datanodeStatistics.getCapacityUsedPercent(); 6484 } 6485 6486 @Override // NameNodeMXBean 6487 public long getBlockPoolUsedSpace() { 6488 return datanodeStatistics.getBlockPoolUsed(); 6489 } 6490 6491 @Override // NameNodeMXBean 6492 public float getPercentBlockPoolUsed() { 6493 return datanodeStatistics.getPercentBlockPoolUsed(); 6494 } 6495 6496 @Override // NameNodeMXBean 6497 public float getPercentRemaining() { 6498 return datanodeStatistics.getCapacityRemainingPercent(); 6499 } 6500 6501 @Override // NameNodeMXBean 6502 public long getCacheCapacity() { 6503 return datanodeStatistics.getCacheCapacity(); 6504 } 6505 6506 @Override // NameNodeMXBean 6507 public long getCacheUsed() { 6508 return datanodeStatistics.getCacheUsed(); 6509 } 6510 6511 @Override // NameNodeMXBean 6512 public long getTotalBlocks() { 6513 return getBlocksTotal(); 6514 } 6515 6516 @Override // NameNodeMXBean 6517 @Metric 6518 public long getTotalFiles() { 6519 return getFilesTotal(); 6520 } 6521 6522 @Override // NameNodeMXBean 6523 public long getNumberOfMissingBlocks() { 6524 return getMissingBlocksCount(); 6525 } 6526 6527 @Override // NameNodeMXBean 6528 public int getThreads() { 6529 return ManagementFactory.getThreadMXBean().getThreadCount(); 6530 } 6531 6532 /** 6533 * Returned information is a JSON representation of map with host name as the 6534 * key and value is a map of live node attribute keys to its values 6535 */ 6536 @Override // NameNodeMXBean 6537 public String getLiveNodes() { 6538 final Map<String, Map<String,Object>> info = 6539 new HashMap<String, Map<String,Object>>(); 6540 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6541 blockManager.getDatanodeManager().fetchDatanodes(live, null, true); 6542 for (DatanodeDescriptor node : live) { 6543 Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder() 6544 .put("infoAddr", node.getInfoAddr()) 6545 .put("infoSecureAddr", node.getInfoSecureAddr()) 6546 .put("xferaddr", node.getXferAddr()) 6547 .put("lastContact", getLastContact(node)) 6548 .put("usedSpace", getDfsUsed(node)) 6549 .put("adminState", node.getAdminState().toString()) 6550 .put("nonDfsUsedSpace", node.getNonDfsUsed()) 6551 .put("capacity", node.getCapacity()) 6552 .put("numBlocks", node.numBlocks()) 6553 .put("version", node.getSoftwareVersion()) 6554 .put("used", node.getDfsUsed()) 6555 .put("remaining", node.getRemaining()) 6556 .put("blockScheduled", node.getBlocksScheduled()) 6557 .put("blockPoolUsed", node.getBlockPoolUsed()) 6558 .put("blockPoolUsedPercent", node.getBlockPoolUsedPercent()) 6559 .put("volfails", node.getVolumeFailures()) 6560 .build(); 6561 6562 info.put(node.getHostName(), innerinfo); 6563 } 6564 return JSON.toString(info); 6565 } 6566 6567 /** 6568 * Returned information is a JSON representation of map with host name as the 6569 * key and value is a map of dead node attribute keys to its values 6570 */ 6571 @Override // NameNodeMXBean 6572 public String getDeadNodes() { 6573 final Map<String, Map<String, Object>> info = 6574 new HashMap<String, Map<String, Object>>(); 6575 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>(); 6576 blockManager.getDatanodeManager().fetchDatanodes(null, dead, true); 6577 for (DatanodeDescriptor node : dead) { 6578 Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder() 6579 .put("lastContact", getLastContact(node)) 6580 .put("decommissioned", node.isDecommissioned()) 6581 .put("xferaddr", node.getXferAddr()) 6582 .build(); 6583 info.put(node.getHostName(), innerinfo); 6584 } 6585 return JSON.toString(info); 6586 } 6587 6588 /** 6589 * Returned information is a JSON representation of map with host name as the 6590 * key and value is a map of decomisioning node attribute keys to its values 6591 */ 6592 @Override // NameNodeMXBean 6593 public String getDecomNodes() { 6594 final Map<String, Map<String, Object>> info = 6595 new HashMap<String, Map<String, Object>>(); 6596 final List<DatanodeDescriptor> decomNodeList = blockManager.getDatanodeManager( 6597 ).getDecommissioningNodes(); 6598 for (DatanodeDescriptor node : decomNodeList) { 6599 Map<String, Object> innerinfo = ImmutableMap 6600 .<String, Object> builder() 6601 .put("xferaddr", node.getXferAddr()) 6602 .put("underReplicatedBlocks", 6603 node.decommissioningStatus.getUnderReplicatedBlocks()) 6604 .put("decommissionOnlyReplicas", 6605 node.decommissioningStatus.getDecommissionOnlyReplicas()) 6606 .put("underReplicateInOpenFiles", 6607 node.decommissioningStatus.getUnderReplicatedInOpenFiles()) 6608 .build(); 6609 info.put(node.getHostName(), innerinfo); 6610 } 6611 return JSON.toString(info); 6612 } 6613 6614 private long getLastContact(DatanodeDescriptor alivenode) { 6615 return (Time.now() - alivenode.getLastUpdate())/1000; 6616 } 6617 6618 private long getDfsUsed(DatanodeDescriptor alivenode) { 6619 return alivenode.getDfsUsed(); 6620 } 6621 6622 @Override // NameNodeMXBean 6623 public String getClusterId() { 6624 return dir.fsImage.getStorage().getClusterID(); 6625 } 6626 6627 @Override // NameNodeMXBean 6628 public String getBlockPoolId() { 6629 return blockPoolId; 6630 } 6631 6632 @Override // NameNodeMXBean 6633 public String getNameDirStatuses() { 6634 Map<String, Map<File, StorageDirType>> statusMap = 6635 new HashMap<String, Map<File, StorageDirType>>(); 6636 6637 Map<File, StorageDirType> activeDirs = new HashMap<File, StorageDirType>(); 6638 for (Iterator<StorageDirectory> it 6639 = getFSImage().getStorage().dirIterator(); it.hasNext();) { 6640 StorageDirectory st = it.next(); 6641 activeDirs.put(st.getRoot(), st.getStorageDirType()); 6642 } 6643 statusMap.put("active", activeDirs); 6644 6645 List<Storage.StorageDirectory> removedStorageDirs 6646 = getFSImage().getStorage().getRemovedStorageDirs(); 6647 Map<File, StorageDirType> failedDirs = new HashMap<File, StorageDirType>(); 6648 for (StorageDirectory st : removedStorageDirs) { 6649 failedDirs.put(st.getRoot(), st.getStorageDirType()); 6650 } 6651 statusMap.put("failed", failedDirs); 6652 6653 return JSON.toString(statusMap); 6654 } 6655 6656 @Override // NameNodeMXBean 6657 public String getNodeUsage() { 6658 float median = 0; 6659 float max = 0; 6660 float min = 0; 6661 float dev = 0; 6662 6663 final Map<String, Map<String,Object>> info = 6664 new HashMap<String, Map<String,Object>>(); 6665 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6666 blockManager.getDatanodeManager().fetchDatanodes(live, null, true); 6667 6668 if (live.size() > 0) { 6669 float totalDfsUsed = 0; 6670 float[] usages = new float[live.size()]; 6671 int i = 0; 6672 for (DatanodeDescriptor dn : live) { 6673 usages[i++] = dn.getDfsUsedPercent(); 6674 totalDfsUsed += dn.getDfsUsedPercent(); 6675 } 6676 totalDfsUsed /= live.size(); 6677 Arrays.sort(usages); 6678 median = usages[usages.length / 2]; 6679 max = usages[usages.length - 1]; 6680 min = usages[0]; 6681 6682 for (i = 0; i < usages.length; i++) { 6683 dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed); 6684 } 6685 dev = (float) Math.sqrt(dev / usages.length); 6686 } 6687 6688 final Map<String, Object> innerInfo = new HashMap<String, Object>(); 6689 innerInfo.put("min", StringUtils.format("%.2f%%", min)); 6690 innerInfo.put("median", StringUtils.format("%.2f%%", median)); 6691 innerInfo.put("max", StringUtils.format("%.2f%%", max)); 6692 innerInfo.put("stdDev", StringUtils.format("%.2f%%", dev)); 6693 info.put("nodeUsage", innerInfo); 6694 6695 return JSON.toString(info); 6696 } 6697 6698 @Override // NameNodeMXBean 6699 public String getNameJournalStatus() { 6700 List<Map<String, String>> jasList = new ArrayList<Map<String, String>>(); 6701 FSEditLog log = getFSImage().getEditLog(); 6702 if (log != null) { 6703 boolean openForWrite = log.isOpenForWrite(); 6704 for (JournalAndStream jas : log.getJournals()) { 6705 final Map<String, String> jasMap = new HashMap<String, String>(); 6706 String manager = jas.getManager().toString(); 6707 6708 jasMap.put("required", String.valueOf(jas.isRequired())); 6709 jasMap.put("disabled", String.valueOf(jas.isDisabled())); 6710 jasMap.put("manager", manager); 6711 6712 if (jas.isDisabled()) { 6713 jasMap.put("stream", "Failed"); 6714 } else if (openForWrite) { 6715 EditLogOutputStream elos = jas.getCurrentStream(); 6716 if (elos != null) { 6717 jasMap.put("stream", elos.generateReport()); 6718 } else { 6719 jasMap.put("stream", "not currently writing"); 6720 } 6721 } else { 6722 jasMap.put("stream", "open for read"); 6723 } 6724 jasList.add(jasMap); 6725 } 6726 } 6727 return JSON.toString(jasList); 6728 } 6729 6730 @Override // NameNodeMxBean 6731 public String getJournalTransactionInfo() { 6732 Map<String, String> txnIdMap = new HashMap<String, String>(); 6733 txnIdMap.put("LastAppliedOrWrittenTxId", 6734 Long.toString(this.getFSImage().getLastAppliedOrWrittenTxId())); 6735 txnIdMap.put("MostRecentCheckpointTxId", 6736 Long.toString(this.getFSImage().getMostRecentCheckpointTxId())); 6737 return JSON.toString(txnIdMap); 6738 } 6739 6740 @Override // NameNodeMXBean 6741 public String getNNStarted() { 6742 return getStartTime().toString(); 6743 } 6744 6745 @Override // NameNodeMXBean 6746 public String getCompileInfo() { 6747 return VersionInfo.getDate() + " by " + VersionInfo.getUser() + 6748 " from " + VersionInfo.getBranch(); 6749 } 6750 6751 /** @return the block manager. */ 6752 public BlockManager getBlockManager() { 6753 return blockManager; 6754 } 6755 /** @return the FSDirectory. */ 6756 public FSDirectory getFSDirectory() { 6757 return dir; 6758 } 6759 /** @return the cache manager. */ 6760 public CacheManager getCacheManager() { 6761 return cacheManager; 6762 } 6763 6764 @Override // NameNodeMXBean 6765 public String getCorruptFiles() { 6766 List<String> list = new ArrayList<String>(); 6767 Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks; 6768 try { 6769 corruptFileBlocks = listCorruptFileBlocks("/", null); 6770 int corruptFileCount = corruptFileBlocks.size(); 6771 if (corruptFileCount != 0) { 6772 for (FSNamesystem.CorruptFileBlockInfo c : corruptFileBlocks) { 6773 list.add(c.toString()); 6774 } 6775 } 6776 } catch (IOException e) { 6777 LOG.warn("Get corrupt file blocks returned error: " + e.getMessage()); 6778 } 6779 return JSON.toString(list); 6780 } 6781 6782 @Override //NameNodeMXBean 6783 public int getDistinctVersionCount() { 6784 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions() 6785 .size(); 6786 } 6787 6788 @Override //NameNodeMXBean 6789 public Map<String, Integer> getDistinctVersions() { 6790 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions(); 6791 } 6792 6793 @Override //NameNodeMXBean 6794 public String getSoftwareVersion() { 6795 return VersionInfo.getVersion(); 6796 } 6797 6798 /** 6799 * Verifies that the given identifier and password are valid and match. 6800 * @param identifier Token identifier. 6801 * @param password Password in the token. 6802 */ 6803 public synchronized void verifyToken(DelegationTokenIdentifier identifier, 6804 byte[] password) throws InvalidToken, RetriableException { 6805 try { 6806 getDelegationTokenSecretManager().verifyToken(identifier, password); 6807 } catch (InvalidToken it) { 6808 if (inTransitionToActive()) { 6809 throw new RetriableException(it); 6810 } 6811 throw it; 6812 } 6813 } 6814 6815 @Override 6816 public boolean isGenStampInFuture(Block block) { 6817 if (isLegacyBlock(block)) { 6818 return block.getGenerationStamp() > getGenerationStampV1(); 6819 } else { 6820 return block.getGenerationStamp() > getGenerationStampV2(); 6821 } 6822 } 6823 6824 @VisibleForTesting 6825 public EditLogTailer getEditLogTailer() { 6826 return editLogTailer; 6827 } 6828 6829 @VisibleForTesting 6830 public void setEditLogTailerForTests(EditLogTailer tailer) { 6831 this.editLogTailer = tailer; 6832 } 6833 6834 @VisibleForTesting 6835 void setFsLockForTests(ReentrantReadWriteLock lock) { 6836 this.fsLock.coarseLock = lock; 6837 } 6838 6839 @VisibleForTesting 6840 public ReentrantReadWriteLock getFsLockForTests() { 6841 return fsLock.coarseLock; 6842 } 6843 6844 @VisibleForTesting 6845 public ReentrantLock getLongReadLockForTests() { 6846 return fsLock.longReadLock; 6847 } 6848 6849 @VisibleForTesting 6850 public SafeModeInfo getSafeModeInfoForTests() { 6851 return safeMode; 6852 } 6853 6854 @VisibleForTesting 6855 public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) { 6856 this.nnResourceChecker = nnResourceChecker; 6857 } 6858 6859 @Override 6860 public boolean isAvoidingStaleDataNodesForWrite() { 6861 return this.blockManager.getDatanodeManager() 6862 .shouldAvoidStaleDataNodesForWrite(); 6863 } 6864 6865 @Override // FSClusterStats 6866 public int getNumDatanodesInService() { 6867 return getNumLiveDataNodes() - getNumDecomLiveDataNodes(); 6868 } 6869 6870 public SnapshotManager getSnapshotManager() { 6871 return snapshotManager; 6872 } 6873 6874 /** Allow snapshot on a directroy. */ 6875 void allowSnapshot(String path) throws SafeModeException, IOException { 6876 checkOperation(OperationCategory.WRITE); 6877 writeLock(); 6878 try { 6879 checkOperation(OperationCategory.WRITE); 6880 checkNameNodeSafeMode("Cannot allow snapshot for " + path); 6881 checkSuperuserPrivilege(); 6882 6883 dir.writeLock(); 6884 try { 6885 snapshotManager.setSnapshottable(path, true); 6886 } finally { 6887 dir.writeUnlock(); 6888 } 6889 getEditLog().logAllowSnapshot(path); 6890 } finally { 6891 writeUnlock(); 6892 } 6893 getEditLog().logSync(); 6894 6895 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 6896 logAuditEvent(true, "allowSnapshot", path, null, null); 6897 } 6898 } 6899 6900 /** Disallow snapshot on a directory. */ 6901 void disallowSnapshot(String path) throws SafeModeException, IOException { 6902 checkOperation(OperationCategory.WRITE); 6903 writeLock(); 6904 try { 6905 checkOperation(OperationCategory.WRITE); 6906 checkNameNodeSafeMode("Cannot disallow snapshot for " + path); 6907 checkSuperuserPrivilege(); 6908 6909 dir.writeLock(); 6910 try { 6911 snapshotManager.resetSnapshottable(path); 6912 } finally { 6913 dir.writeUnlock(); 6914 } 6915 getEditLog().logDisallowSnapshot(path); 6916 } finally { 6917 writeUnlock(); 6918 } 6919 getEditLog().logSync(); 6920 6921 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 6922 logAuditEvent(true, "disallowSnapshot", path, null, null); 6923 } 6924 } 6925 6926 /** 6927 * Create a snapshot 6928 * @param snapshotRoot The directory path where the snapshot is taken 6929 * @param snapshotName The name of the snapshot 6930 */ 6931 String createSnapshot(String snapshotRoot, String snapshotName) 6932 throws SafeModeException, IOException { 6933 checkOperation(OperationCategory.WRITE); 6934 final FSPermissionChecker pc = getPermissionChecker(); 6935 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache, 6936 null); 6937 if (cacheEntry != null && cacheEntry.isSuccess()) { 6938 return (String) cacheEntry.getPayload(); 6939 } 6940 String snapshotPath = null; 6941 writeLock(); 6942 try { 6943 checkOperation(OperationCategory.WRITE); 6944 checkNameNodeSafeMode("Cannot create snapshot for " + snapshotRoot); 6945 if (isPermissionEnabled) { 6946 checkOwner(pc, snapshotRoot); 6947 } 6948 6949 if (snapshotName == null || snapshotName.isEmpty()) { 6950 snapshotName = Snapshot.generateDefaultSnapshotName(); 6951 } 6952 if(snapshotName != null){ 6953 if (!DFSUtil.isValidNameForComponent(snapshotName)) { 6954 throw new InvalidPathException("Invalid snapshot name: " 6955 + snapshotName); 6956 } 6957 } 6958 dir.verifySnapshotName(snapshotName, snapshotRoot); 6959 dir.writeLock(); 6960 try { 6961 snapshotPath = snapshotManager.createSnapshot(snapshotRoot, snapshotName); 6962 } finally { 6963 dir.writeUnlock(); 6964 } 6965 getEditLog().logCreateSnapshot(snapshotRoot, snapshotName, 6966 cacheEntry != null); 6967 } finally { 6968 writeUnlock(); 6969 RetryCache.setState(cacheEntry, snapshotPath != null, snapshotPath); 6970 } 6971 getEditLog().logSync(); 6972 6973 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 6974 logAuditEvent(true, "createSnapshot", snapshotRoot, snapshotPath, null); 6975 } 6976 return snapshotPath; 6977 } 6978 6979 /** 6980 * Rename a snapshot 6981 * @param path The directory path where the snapshot was taken 6982 * @param snapshotOldName Old snapshot name 6983 * @param snapshotNewName New snapshot name 6984 * @throws SafeModeException 6985 * @throws IOException 6986 */ 6987 void renameSnapshot(String path, String snapshotOldName, 6988 String snapshotNewName) throws SafeModeException, IOException { 6989 checkOperation(OperationCategory.WRITE); 6990 final FSPermissionChecker pc = getPermissionChecker(); 6991 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 6992 if (cacheEntry != null && cacheEntry.isSuccess()) { 6993 return; // Return previous response 6994 } 6995 writeLock(); 6996 boolean success = false; 6997 try { 6998 checkOperation(OperationCategory.WRITE); 6999 checkNameNodeSafeMode("Cannot rename snapshot for " + path); 7000 if (isPermissionEnabled) { 7001 checkOwner(pc, path); 7002 } 7003 dir.verifySnapshotName(snapshotNewName, path); 7004 7005 snapshotManager.renameSnapshot(path, snapshotOldName, snapshotNewName); 7006 getEditLog().logRenameSnapshot(path, snapshotOldName, snapshotNewName, 7007 cacheEntry != null); 7008 success = true; 7009 } finally { 7010 writeUnlock(); 7011 RetryCache.setState(cacheEntry, success); 7012 } 7013 getEditLog().logSync(); 7014 7015 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 7016 String oldSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotOldName); 7017 String newSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotNewName); 7018 logAuditEvent(true, "renameSnapshot", oldSnapshotRoot, newSnapshotRoot, null); 7019 } 7020 } 7021 7022 /** 7023 * Get the list of snapshottable directories that are owned 7024 * by the current user. Return all the snapshottable directories if the 7025 * current user is a super user. 7026 * @return The list of all the current snapshottable directories 7027 * @throws IOException 7028 */ 7029 public SnapshottableDirectoryStatus[] getSnapshottableDirListing() 7030 throws IOException { 7031 SnapshottableDirectoryStatus[] status = null; 7032 checkOperation(OperationCategory.READ); 7033 final FSPermissionChecker checker = getPermissionChecker(); 7034 readLock(); 7035 try { 7036 checkOperation(OperationCategory.READ); 7037 final String user = checker.isSuperUser()? null : checker.getUser(); 7038 status = snapshotManager.getSnapshottableDirListing(user); 7039 } finally { 7040 readUnlock(); 7041 } 7042 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 7043 logAuditEvent(true, "listSnapshottableDirectory", null, null, null); 7044 } 7045 return status; 7046 } 7047 7048 /** 7049 * Get the difference between two snapshots (or between a snapshot and the 7050 * current status) of a snapshottable directory. 7051 * 7052 * @param path The full path of the snapshottable directory. 7053 * @param fromSnapshot Name of the snapshot to calculate the diff from. Null 7054 * or empty string indicates the current tree. 7055 * @param toSnapshot Name of the snapshot to calculated the diff to. Null or 7056 * empty string indicates the current tree. 7057 * @return A report about the difference between {@code fromSnapshot} and 7058 * {@code toSnapshot}. Modified/deleted/created/renamed files and 7059 * directories belonging to the snapshottable directories are listed 7060 * and labeled as M/-/+/R respectively. 7061 * @throws IOException 7062 */ 7063 SnapshotDiffReport getSnapshotDiffReport(String path, 7064 String fromSnapshot, String toSnapshot) throws IOException { 7065 SnapshotDiffInfo diffs = null; 7066 checkOperation(OperationCategory.READ); 7067 final FSPermissionChecker pc = getPermissionChecker(); 7068 readLock(); 7069 try { 7070 checkOperation(OperationCategory.READ); 7071 if (isPermissionEnabled) { 7072 checkSubtreeReadPermission(pc, path, fromSnapshot); 7073 checkSubtreeReadPermission(pc, path, toSnapshot); 7074 } 7075 diffs = snapshotManager.diff(path, fromSnapshot, toSnapshot); 7076 } finally { 7077 readUnlock(); 7078 } 7079 7080 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 7081 logAuditEvent(true, "computeSnapshotDiff", null, null, null); 7082 } 7083 return diffs != null ? diffs.generateReport() : new SnapshotDiffReport( 7084 path, fromSnapshot, toSnapshot, 7085 Collections.<DiffReportEntry> emptyList()); 7086 } 7087 7088 private void checkSubtreeReadPermission(final FSPermissionChecker pc, 7089 final String snapshottablePath, final String snapshot) 7090 throws AccessControlException, UnresolvedLinkException { 7091 final String fromPath = snapshot == null? 7092 snapshottablePath: Snapshot.getSnapshotPath(snapshottablePath, snapshot); 7093 checkPermission(pc, fromPath, false, null, null, FsAction.READ, FsAction.READ); 7094 } 7095 7096 /** 7097 * Delete a snapshot of a snapshottable directory 7098 * @param snapshotRoot The snapshottable directory 7099 * @param snapshotName The name of the to-be-deleted snapshot 7100 * @throws SafeModeException 7101 * @throws IOException 7102 */ 7103 void deleteSnapshot(String snapshotRoot, String snapshotName) 7104 throws SafeModeException, IOException { 7105 checkOperation(OperationCategory.WRITE); 7106 final FSPermissionChecker pc = getPermissionChecker(); 7107 7108 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 7109 if (cacheEntry != null && cacheEntry.isSuccess()) { 7110 return; // Return previous response 7111 } 7112 boolean success = false; 7113 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); 7114 writeLock(); 7115 try { 7116 checkOperation(OperationCategory.WRITE); 7117 checkNameNodeSafeMode("Cannot delete snapshot for " + snapshotRoot); 7118 if (isPermissionEnabled) { 7119 checkOwner(pc, snapshotRoot); 7120 } 7121 7122 List<INode> removedINodes = new ChunkedArrayList<INode>(); 7123 dir.writeLock(); 7124 try { 7125 snapshotManager.deleteSnapshot(snapshotRoot, snapshotName, 7126 collectedBlocks, removedINodes); 7127 dir.removeFromInodeMap(removedINodes); 7128 } finally { 7129 dir.writeUnlock(); 7130 } 7131 removedINodes.clear(); 7132 getEditLog().logDeleteSnapshot(snapshotRoot, snapshotName, 7133 cacheEntry != null); 7134 success = true; 7135 } finally { 7136 writeUnlock(); 7137 RetryCache.setState(cacheEntry, success); 7138 } 7139 getEditLog().logSync(); 7140 7141 removeBlocks(collectedBlocks); 7142 collectedBlocks.clear(); 7143 7144 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 7145 String rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName); 7146 logAuditEvent(true, "deleteSnapshot", rootPath, null, null); 7147 } 7148 } 7149 7150 /** 7151 * Remove a list of INodeDirectorySnapshottable from the SnapshotManager 7152 * @param toRemove the list of INodeDirectorySnapshottable to be removed 7153 */ 7154 void removeSnapshottableDirs(List<INodeDirectorySnapshottable> toRemove) { 7155 if (snapshotManager != null) { 7156 snapshotManager.removeSnapshottable(toRemove); 7157 } 7158 } 7159 7160 RollingUpgradeInfo queryRollingUpgrade() throws IOException { 7161 checkSuperuserPrivilege(); 7162 checkOperation(OperationCategory.READ); 7163 readLock(); 7164 try { 7165 if (rollingUpgradeInfo != null) { 7166 boolean hasRollbackImage = this.getFSImage().hasRollbackFSImage(); 7167 rollingUpgradeInfo.setCreatedRollbackImages(hasRollbackImage); 7168 } 7169 return rollingUpgradeInfo; 7170 } finally { 7171 readUnlock(); 7172 } 7173 } 7174 7175 RollingUpgradeInfo startRollingUpgrade() throws IOException { 7176 checkSuperuserPrivilege(); 7177 checkOperation(OperationCategory.WRITE); 7178 writeLock(); 7179 try { 7180 checkOperation(OperationCategory.WRITE); 7181 long startTime = now(); 7182 if (!haEnabled) { // for non-HA, we require NN to be in safemode 7183 startRollingUpgradeInternalForNonHA(startTime); 7184 } else { // for HA, NN cannot be in safemode 7185 checkNameNodeSafeMode("Failed to start rolling upgrade"); 7186 startRollingUpgradeInternal(startTime); 7187 } 7188 7189 getEditLog().logStartRollingUpgrade(rollingUpgradeInfo.getStartTime()); 7190 if (haEnabled) { 7191 // roll the edit log to make sure the standby NameNode can tail 7192 getFSImage().rollEditLog(); 7193 } 7194 } finally { 7195 writeUnlock(); 7196 } 7197 7198 getEditLog().logSync(); 7199 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 7200 logAuditEvent(true, "startRollingUpgrade", null, null, null); 7201 } 7202 return rollingUpgradeInfo; 7203 } 7204 7205 /** 7206 * Update internal state to indicate that a rolling upgrade is in progress. 7207 * @param startTime 7208 */ 7209 void startRollingUpgradeInternal(long startTime) 7210 throws IOException { 7211 checkRollingUpgrade("start rolling upgrade"); 7212 getFSImage().checkUpgrade(this); 7213 setRollingUpgradeInfo(false, startTime); 7214 } 7215 7216 /** 7217 * Update internal state to indicate that a rolling upgrade is in progress for 7218 * non-HA setup. This requires the namesystem is in SafeMode and after doing a 7219 * checkpoint for rollback the namesystem will quit the safemode automatically 7220 */ 7221 private void startRollingUpgradeInternalForNonHA(long startTime) 7222 throws IOException { 7223 Preconditions.checkState(!haEnabled); 7224 if (!isInSafeMode()) { 7225 throw new IOException("Safe mode should be turned ON " 7226 + "in order to create namespace image."); 7227 } 7228 checkRollingUpgrade("start rolling upgrade"); 7229 getFSImage().checkUpgrade(this); 7230 // in non-HA setup, we do an extra ckpt to generate a rollback image 7231 getFSImage().saveNamespace(this, NameNodeFile.IMAGE_ROLLBACK, null); 7232 LOG.info("Successfully saved namespace for preparing rolling upgrade."); 7233 7234 // leave SafeMode automatically 7235 setSafeMode(SafeModeAction.SAFEMODE_LEAVE); 7236 setRollingUpgradeInfo(true, startTime); 7237 } 7238 7239 void setRollingUpgradeInfo(boolean createdRollbackImages, long startTime) { 7240 rollingUpgradeInfo = new RollingUpgradeInfo(blockPoolId, 7241 createdRollbackImages, startTime, 0L); 7242 } 7243 7244 public void setCreatedRollbackImages(boolean created) { 7245 if (rollingUpgradeInfo != null) { 7246 rollingUpgradeInfo.setCreatedRollbackImages(created); 7247 } 7248 } 7249 7250 public RollingUpgradeInfo getRollingUpgradeInfo() { 7251 return rollingUpgradeInfo; 7252 } 7253 7254 public boolean isNeedRollbackFsImage() { 7255 return needRollbackFsImage; 7256 } 7257 7258 public void setNeedRollbackFsImage(boolean needRollbackFsImage) { 7259 this.needRollbackFsImage = needRollbackFsImage; 7260 } 7261 7262 @Override // NameNodeMXBean 7263 public RollingUpgradeInfo.Bean getRollingUpgradeStatus() { 7264 readLock(); 7265 try { 7266 RollingUpgradeInfo upgradeInfo = getRollingUpgradeInfo(); 7267 if (upgradeInfo != null) { 7268 return new RollingUpgradeInfo.Bean(upgradeInfo); 7269 } 7270 return null; 7271 } finally { 7272 readUnlock(); 7273 } 7274 } 7275 7276 /** Is rolling upgrade in progress? */ 7277 public boolean isRollingUpgrade() { 7278 return rollingUpgradeInfo != null; 7279 } 7280 7281 void checkRollingUpgrade(String action) throws RollingUpgradeException { 7282 if (isRollingUpgrade()) { 7283 throw new RollingUpgradeException("Failed to " + action 7284 + " since a rolling upgrade is already in progress." 7285 + " Existing rolling upgrade info:\n" + rollingUpgradeInfo); 7286 } 7287 } 7288 7289 RollingUpgradeInfo finalizeRollingUpgrade() throws IOException { 7290 checkSuperuserPrivilege(); 7291 checkOperation(OperationCategory.WRITE); 7292 writeLock(); 7293 final RollingUpgradeInfo returnInfo; 7294 try { 7295 checkOperation(OperationCategory.WRITE); 7296 checkNameNodeSafeMode("Failed to finalize rolling upgrade"); 7297 7298 returnInfo = finalizeRollingUpgradeInternal(now()); 7299 getEditLog().logFinalizeRollingUpgrade(returnInfo.getFinalizeTime()); 7300 getFSImage().saveNamespace(this); 7301 getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK, 7302 NameNodeFile.IMAGE); 7303 } finally { 7304 writeUnlock(); 7305 } 7306 7307 // getEditLog().logSync() is not needed since it does saveNamespace 7308 7309 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 7310 logAuditEvent(true, "finalizeRollingUpgrade", null, null, null); 7311 } 7312 return returnInfo; 7313 } 7314 7315 RollingUpgradeInfo finalizeRollingUpgradeInternal(long finalizeTime) 7316 throws RollingUpgradeException { 7317 if (!isRollingUpgrade()) { 7318 throw new RollingUpgradeException( 7319 "Failed to finalize rolling upgrade since there is no rolling upgrade in progress."); 7320 } 7321 7322 final long startTime = rollingUpgradeInfo.getStartTime(); 7323 rollingUpgradeInfo = null; 7324 return new RollingUpgradeInfo(blockPoolId, false, startTime, finalizeTime); 7325 } 7326 7327 long addCacheDirective(CacheDirectiveInfo directive, EnumSet<CacheFlag> flags) 7328 throws IOException { 7329 checkOperation(OperationCategory.WRITE); 7330 final FSPermissionChecker pc = isPermissionEnabled ? 7331 getPermissionChecker() : null; 7332 CacheEntryWithPayload cacheEntry = 7333 RetryCache.waitForCompletion(retryCache, null); 7334 if (cacheEntry != null && cacheEntry.isSuccess()) { 7335 return (Long) cacheEntry.getPayload(); 7336 } 7337 boolean success = false; 7338 if (!flags.contains(CacheFlag.FORCE)) { 7339 cacheManager.waitForRescanIfNeeded(); 7340 } 7341 writeLock(); 7342 Long result = null; 7343 try { 7344 checkOperation(OperationCategory.WRITE); 7345 if (isInSafeMode()) { 7346 throw new SafeModeException( 7347 "Cannot add cache directive", safeMode); 7348 } 7349 if (directive.getId() != null) { 7350 throw new IOException("addDirective: you cannot specify an ID " + 7351 "for this operation."); 7352 } 7353 CacheDirectiveInfo effectiveDirective = 7354 cacheManager.addDirective(directive, pc, flags); 7355 getEditLog().logAddCacheDirectiveInfo(effectiveDirective, 7356 cacheEntry != null); 7357 result = effectiveDirective.getId(); 7358 success = true; 7359 } finally { 7360 writeUnlock(); 7361 if (success) { 7362 getEditLog().logSync(); 7363 } 7364 if (isAuditEnabled() && isExternalInvocation()) { 7365 logAuditEvent(success, "addCacheDirective", null, null, null); 7366 } 7367 RetryCache.setState(cacheEntry, success, result); 7368 } 7369 return result; 7370 } 7371 7372 void modifyCacheDirective(CacheDirectiveInfo directive, 7373 EnumSet<CacheFlag> flags) throws IOException { 7374 checkOperation(OperationCategory.WRITE); 7375 final FSPermissionChecker pc = isPermissionEnabled ? 7376 getPermissionChecker() : null; 7377 boolean success = false; 7378 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 7379 if (cacheEntry != null && cacheEntry.isSuccess()) { 7380 return; 7381 } 7382 if (!flags.contains(CacheFlag.FORCE)) { 7383 cacheManager.waitForRescanIfNeeded(); 7384 } 7385 writeLock(); 7386 try { 7387 checkOperation(OperationCategory.WRITE); 7388 if (isInSafeMode()) { 7389 throw new SafeModeException( 7390 "Cannot add cache directive", safeMode); 7391 } 7392 cacheManager.modifyDirective(directive, pc, flags); 7393 getEditLog().logModifyCacheDirectiveInfo(directive, 7394 cacheEntry != null); 7395 success = true; 7396 } finally { 7397 writeUnlock(); 7398 if (success) { 7399 getEditLog().logSync(); 7400 } 7401 if (isAuditEnabled() && isExternalInvocation()) { 7402 logAuditEvent(success, "modifyCacheDirective", null, null, null); 7403 } 7404 RetryCache.setState(cacheEntry, success); 7405 } 7406 } 7407 7408 void removeCacheDirective(Long id) throws IOException { 7409 checkOperation(OperationCategory.WRITE); 7410 final FSPermissionChecker pc = isPermissionEnabled ? 7411 getPermissionChecker() : null; 7412 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 7413 if (cacheEntry != null && cacheEntry.isSuccess()) { 7414 return; 7415 } 7416 boolean success = false; 7417 writeLock(); 7418 try { 7419 checkOperation(OperationCategory.WRITE); 7420 if (isInSafeMode()) { 7421 throw new SafeModeException( 7422 "Cannot remove cache directives", safeMode); 7423 } 7424 cacheManager.removeDirective(id, pc); 7425 getEditLog().logRemoveCacheDirectiveInfo(id, cacheEntry != null); 7426 success = true; 7427 } finally { 7428 writeUnlock(); 7429 if (isAuditEnabled() && isExternalInvocation()) { 7430 logAuditEvent(success, "removeCacheDirective", null, null, 7431 null); 7432 } 7433 RetryCache.setState(cacheEntry, success); 7434 } 7435 getEditLog().logSync(); 7436 } 7437 7438 BatchedListEntries<CacheDirectiveEntry> listCacheDirectives( 7439 long startId, CacheDirectiveInfo filter) throws IOException { 7440 checkOperation(OperationCategory.READ); 7441 final FSPermissionChecker pc = isPermissionEnabled ? 7442 getPermissionChecker() : null; 7443 BatchedListEntries<CacheDirectiveEntry> results; 7444 cacheManager.waitForRescanIfNeeded(); 7445 readLock(); 7446 boolean success = false; 7447 try { 7448 checkOperation(OperationCategory.READ); 7449 results = 7450 cacheManager.listCacheDirectives(startId, filter, pc); 7451 success = true; 7452 } finally { 7453 readUnlock(); 7454 if (isAuditEnabled() && isExternalInvocation()) { 7455 logAuditEvent(success, "listCacheDirectives", null, null, 7456 null); 7457 } 7458 } 7459 return results; 7460 } 7461 7462 public void addCachePool(CachePoolInfo req) throws IOException { 7463 checkOperation(OperationCategory.WRITE); 7464 final FSPermissionChecker pc = isPermissionEnabled ? 7465 getPermissionChecker() : null; 7466 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 7467 if (cacheEntry != null && cacheEntry.isSuccess()) { 7468 return; // Return previous response 7469 } 7470 writeLock(); 7471 boolean success = false; 7472 try { 7473 checkOperation(OperationCategory.WRITE); 7474 if (isInSafeMode()) { 7475 throw new SafeModeException( 7476 "Cannot add cache pool " + req.getPoolName(), safeMode); 7477 } 7478 if (pc != null) { 7479 pc.checkSuperuserPrivilege(); 7480 } 7481 CachePoolInfo info = cacheManager.addCachePool(req); 7482 getEditLog().logAddCachePool(info, cacheEntry != null); 7483 success = true; 7484 } finally { 7485 writeUnlock(); 7486 if (isAuditEnabled() && isExternalInvocation()) { 7487 logAuditEvent(success, "addCachePool", req.getPoolName(), null, null); 7488 } 7489 RetryCache.setState(cacheEntry, success); 7490 } 7491 7492 getEditLog().logSync(); 7493 } 7494 7495 public void modifyCachePool(CachePoolInfo req) throws IOException { 7496 checkOperation(OperationCategory.WRITE); 7497 final FSPermissionChecker pc = 7498 isPermissionEnabled ? getPermissionChecker() : null; 7499 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 7500 if (cacheEntry != null && cacheEntry.isSuccess()) { 7501 return; // Return previous response 7502 } 7503 writeLock(); 7504 boolean success = false; 7505 try { 7506 checkOperation(OperationCategory.WRITE); 7507 if (isInSafeMode()) { 7508 throw new SafeModeException( 7509 "Cannot modify cache pool " + req.getPoolName(), safeMode); 7510 } 7511 if (pc != null) { 7512 pc.checkSuperuserPrivilege(); 7513 } 7514 cacheManager.modifyCachePool(req); 7515 getEditLog().logModifyCachePool(req, cacheEntry != null); 7516 success = true; 7517 } finally { 7518 writeUnlock(); 7519 if (isAuditEnabled() && isExternalInvocation()) { 7520 logAuditEvent(success, "modifyCachePool", req.getPoolName(), null, null); 7521 } 7522 RetryCache.setState(cacheEntry, success); 7523 } 7524 7525 getEditLog().logSync(); 7526 } 7527 7528 public void removeCachePool(String cachePoolName) throws IOException { 7529 checkOperation(OperationCategory.WRITE); 7530 final FSPermissionChecker pc = 7531 isPermissionEnabled ? getPermissionChecker() : null; 7532 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 7533 if (cacheEntry != null && cacheEntry.isSuccess()) { 7534 return; // Return previous response 7535 } 7536 writeLock(); 7537 boolean success = false; 7538 try { 7539 checkOperation(OperationCategory.WRITE); 7540 if (isInSafeMode()) { 7541 throw new SafeModeException( 7542 "Cannot remove cache pool " + cachePoolName, safeMode); 7543 } 7544 if (pc != null) { 7545 pc.checkSuperuserPrivilege(); 7546 } 7547 cacheManager.removeCachePool(cachePoolName); 7548 getEditLog().logRemoveCachePool(cachePoolName, cacheEntry != null); 7549 success = true; 7550 } finally { 7551 writeUnlock(); 7552 if (isAuditEnabled() && isExternalInvocation()) { 7553 logAuditEvent(success, "removeCachePool", cachePoolName, null, null); 7554 } 7555 RetryCache.setState(cacheEntry, success); 7556 } 7557 7558 getEditLog().logSync(); 7559 } 7560 7561 public BatchedListEntries<CachePoolEntry> listCachePools(String prevKey) 7562 throws IOException { 7563 final FSPermissionChecker pc = 7564 isPermissionEnabled ? getPermissionChecker() : null; 7565 BatchedListEntries<CachePoolEntry> results; 7566 checkOperation(OperationCategory.READ); 7567 boolean success = false; 7568 cacheManager.waitForRescanIfNeeded(); 7569 readLock(); 7570 try { 7571 checkOperation(OperationCategory.READ); 7572 results = cacheManager.listCachePools(pc, prevKey); 7573 success = true; 7574 } finally { 7575 readUnlock(); 7576 if (isAuditEnabled() && isExternalInvocation()) { 7577 logAuditEvent(success, "listCachePools", null, null, null); 7578 } 7579 } 7580 return results; 7581 } 7582 7583 void modifyAclEntries(String src, List<AclEntry> aclSpec) throws IOException { 7584 aclConfigFlag.checkForApiCall(); 7585 HdfsFileStatus resultingStat = null; 7586 FSPermissionChecker pc = getPermissionChecker(); 7587 checkOperation(OperationCategory.WRITE); 7588 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 7589 writeLock(); 7590 try { 7591 checkOperation(OperationCategory.WRITE); 7592 checkNameNodeSafeMode("Cannot modify ACL entries on " + src); 7593 src = FSDirectory.resolvePath(src, pathComponents, dir); 7594 checkOwner(pc, src); 7595 dir.modifyAclEntries(src, aclSpec); 7596 resultingStat = getAuditFileInfo(src, false); 7597 } finally { 7598 writeUnlock(); 7599 } 7600 getEditLog().logSync(); 7601 logAuditEvent(true, "modifyAclEntries", src, null, resultingStat); 7602 } 7603 7604 void removeAclEntries(String src, List<AclEntry> aclSpec) throws IOException { 7605 aclConfigFlag.checkForApiCall(); 7606 HdfsFileStatus resultingStat = null; 7607 FSPermissionChecker pc = getPermissionChecker(); 7608 checkOperation(OperationCategory.WRITE); 7609 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 7610 writeLock(); 7611 try { 7612 checkOperation(OperationCategory.WRITE); 7613 checkNameNodeSafeMode("Cannot remove ACL entries on " + src); 7614 src = FSDirectory.resolvePath(src, pathComponents, dir); 7615 checkOwner(pc, src); 7616 dir.removeAclEntries(src, aclSpec); 7617 resultingStat = getAuditFileInfo(src, false); 7618 } finally { 7619 writeUnlock(); 7620 } 7621 getEditLog().logSync(); 7622 logAuditEvent(true, "removeAclEntries", src, null, resultingStat); 7623 } 7624 7625 void removeDefaultAcl(String src) throws IOException { 7626 aclConfigFlag.checkForApiCall(); 7627 HdfsFileStatus resultingStat = null; 7628 FSPermissionChecker pc = getPermissionChecker(); 7629 checkOperation(OperationCategory.WRITE); 7630 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 7631 writeLock(); 7632 try { 7633 checkOperation(OperationCategory.WRITE); 7634 checkNameNodeSafeMode("Cannot remove default ACL entries on " + src); 7635 src = FSDirectory.resolvePath(src, pathComponents, dir); 7636 checkOwner(pc, src); 7637 dir.removeDefaultAcl(src); 7638 resultingStat = getAuditFileInfo(src, false); 7639 } finally { 7640 writeUnlock(); 7641 } 7642 getEditLog().logSync(); 7643 logAuditEvent(true, "removeDefaultAcl", src, null, resultingStat); 7644 } 7645 7646 void removeAcl(String src) throws IOException { 7647 aclConfigFlag.checkForApiCall(); 7648 HdfsFileStatus resultingStat = null; 7649 FSPermissionChecker pc = getPermissionChecker(); 7650 checkOperation(OperationCategory.WRITE); 7651 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 7652 writeLock(); 7653 try { 7654 checkOperation(OperationCategory.WRITE); 7655 checkNameNodeSafeMode("Cannot remove ACL on " + src); 7656 src = FSDirectory.resolvePath(src, pathComponents, dir); 7657 checkOwner(pc, src); 7658 dir.removeAcl(src); 7659 resultingStat = getAuditFileInfo(src, false); 7660 } finally { 7661 writeUnlock(); 7662 } 7663 getEditLog().logSync(); 7664 logAuditEvent(true, "removeAcl", src, null, resultingStat); 7665 } 7666 7667 void setAcl(String src, List<AclEntry> aclSpec) throws IOException { 7668 aclConfigFlag.checkForApiCall(); 7669 HdfsFileStatus resultingStat = null; 7670 FSPermissionChecker pc = getPermissionChecker(); 7671 checkOperation(OperationCategory.WRITE); 7672 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 7673 writeLock(); 7674 try { 7675 checkOperation(OperationCategory.WRITE); 7676 checkNameNodeSafeMode("Cannot set ACL on " + src); 7677 src = FSDirectory.resolvePath(src, pathComponents, dir); 7678 checkOwner(pc, src); 7679 dir.setAcl(src, aclSpec); 7680 resultingStat = getAuditFileInfo(src, false); 7681 } finally { 7682 writeUnlock(); 7683 } 7684 getEditLog().logSync(); 7685 logAuditEvent(true, "setAcl", src, null, resultingStat); 7686 } 7687 7688 AclStatus getAclStatus(String src) throws IOException { 7689 aclConfigFlag.checkForApiCall(); 7690 FSPermissionChecker pc = getPermissionChecker(); 7691 checkOperation(OperationCategory.READ); 7692 readLock(); 7693 try { 7694 checkOperation(OperationCategory.READ); 7695 if (isPermissionEnabled) { 7696 checkPermission(pc, src, false, null, null, null, null); 7697 } 7698 return dir.getAclStatus(src); 7699 } finally { 7700 readUnlock(); 7701 } 7702 } 7703 7704 /** 7705 * Default AuditLogger implementation; used when no access logger is 7706 * defined in the config file. It can also be explicitly listed in the 7707 * config file. 7708 */ 7709 private static class DefaultAuditLogger extends HdfsAuditLogger { 7710 7711 private boolean logTokenTrackingId; 7712 7713 @Override 7714 public void initialize(Configuration conf) { 7715 logTokenTrackingId = conf.getBoolean( 7716 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY, 7717 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT); 7718 } 7719 7720 @Override 7721 public void logAuditEvent(boolean succeeded, String userName, 7722 InetAddress addr, String cmd, String src, String dst, 7723 FileStatus status, UserGroupInformation ugi, 7724 DelegationTokenSecretManager dtSecretManager) { 7725 if (auditLog.isInfoEnabled()) { 7726 final StringBuilder sb = auditBuffer.get(); 7727 sb.setLength(0); 7728 sb.append("allowed=").append(succeeded).append("\t"); 7729 sb.append("ugi=").append(userName).append("\t"); 7730 sb.append("ip=").append(addr).append("\t"); 7731 sb.append("cmd=").append(cmd).append("\t"); 7732 sb.append("src=").append(src).append("\t"); 7733 sb.append("dst=").append(dst).append("\t"); 7734 if (null == status) { 7735 sb.append("perm=null"); 7736 } else { 7737 sb.append("perm="); 7738 sb.append(status.getOwner()).append(":"); 7739 sb.append(status.getGroup()).append(":"); 7740 sb.append(status.getPermission()); 7741 } 7742 if (logTokenTrackingId) { 7743 sb.append("\t").append("trackingId="); 7744 String trackingId = null; 7745 if (ugi != null && dtSecretManager != null 7746 && ugi.getAuthenticationMethod() == AuthenticationMethod.TOKEN) { 7747 for (TokenIdentifier tid: ugi.getTokenIdentifiers()) { 7748 if (tid instanceof DelegationTokenIdentifier) { 7749 DelegationTokenIdentifier dtid = 7750 (DelegationTokenIdentifier)tid; 7751 trackingId = dtSecretManager.getTokenTrackingId(dtid); 7752 break; 7753 } 7754 } 7755 } 7756 sb.append(trackingId); 7757 } 7758 logAuditMessage(sb.toString()); 7759 } 7760 } 7761 7762 public void logAuditMessage(String message) { 7763 auditLog.info(message); 7764 } 7765 } 7766 7767 private static void enableAsyncAuditLog() { 7768 if (!(auditLog instanceof Log4JLogger)) { 7769 LOG.warn("Log4j is required to enable async auditlog"); 7770 return; 7771 } 7772 Logger logger = ((Log4JLogger)auditLog).getLogger(); 7773 @SuppressWarnings("unchecked") 7774 List<Appender> appenders = Collections.list(logger.getAllAppenders()); 7775 // failsafe against trying to async it more than once 7776 if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) { 7777 AsyncAppender asyncAppender = new AsyncAppender(); 7778 // change logger to have an async appender containing all the 7779 // previously configured appenders 7780 for (Appender appender : appenders) { 7781 logger.removeAppender(appender); 7782 asyncAppender.addAppender(appender); 7783 } 7784 logger.addAppender(asyncAppender); 7785 } 7786 } 7787 7788} 7789