001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.blockmanagement; 019 020import java.util.ArrayList; 021import java.util.Collection; 022import java.util.Collections; 023import java.util.HashMap; 024import java.util.Iterator; 025import java.util.LinkedList; 026import java.util.List; 027import java.util.Map; 028import java.util.Queue; 029 030import com.google.common.annotations.VisibleForTesting; 031import org.apache.commons.logging.Log; 032import org.apache.commons.logging.LogFactory; 033import org.apache.hadoop.classification.InterfaceAudience; 034import org.apache.hadoop.classification.InterfaceStability; 035import org.apache.hadoop.hdfs.protocol.Block; 036import org.apache.hadoop.hdfs.protocol.DatanodeID; 037import org.apache.hadoop.hdfs.protocol.DatanodeInfo; 038import org.apache.hadoop.hdfs.server.namenode.CachedBlock; 039import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; 040import org.apache.hadoop.hdfs.server.protocol.StorageReport; 041import org.apache.hadoop.hdfs.util.LightWeightHashSet; 042import org.apache.hadoop.util.IntrusiveCollection; 043import org.apache.hadoop.util.Time; 044 045import com.google.common.annotations.VisibleForTesting; 046 047/** 048 * This class extends the DatanodeInfo class with ephemeral information (eg 049 * health, capacity, what blocks are associated with the Datanode) that is 050 * private to the Namenode, ie this class is not exposed to clients. 051 */ 052@InterfaceAudience.Private 053@InterfaceStability.Evolving 054public class DatanodeDescriptor extends DatanodeInfo { 055 public static final Log LOG = LogFactory.getLog(DatanodeDescriptor.class); 056 public static final DatanodeDescriptor[] EMPTY_ARRAY = {}; 057 058 // Stores status of decommissioning. 059 // If node is not decommissioning, do not use this object for anything. 060 public final DecommissioningStatus decommissioningStatus = new DecommissioningStatus(); 061 062 /** Block and targets pair */ 063 @InterfaceAudience.Private 064 @InterfaceStability.Evolving 065 public static class BlockTargetPair { 066 public final Block block; 067 public final DatanodeStorageInfo[] targets; 068 069 BlockTargetPair(Block block, DatanodeStorageInfo[] targets) { 070 this.block = block; 071 this.targets = targets; 072 } 073 } 074 075 /** A BlockTargetPair queue. */ 076 private static class BlockQueue<E> { 077 private final Queue<E> blockq = new LinkedList<E>(); 078 079 /** Size of the queue */ 080 synchronized int size() {return blockq.size();} 081 082 /** Enqueue */ 083 synchronized boolean offer(E e) { 084 return blockq.offer(e); 085 } 086 087 /** Dequeue */ 088 synchronized List<E> poll(int numBlocks) { 089 if (numBlocks <= 0 || blockq.isEmpty()) { 090 return null; 091 } 092 093 List<E> results = new ArrayList<E>(); 094 for(; !blockq.isEmpty() && numBlocks > 0; numBlocks--) { 095 results.add(blockq.poll()); 096 } 097 return results; 098 } 099 100 /** 101 * Returns <tt>true</tt> if the queue contains the specified element. 102 */ 103 boolean contains(E e) { 104 return blockq.contains(e); 105 } 106 107 synchronized void clear() { 108 blockq.clear(); 109 } 110 } 111 112 private final Map<String, DatanodeStorageInfo> storageMap = 113 new HashMap<String, DatanodeStorageInfo>(); 114 115 /** 116 * A list of CachedBlock objects on this datanode. 117 */ 118 public static class CachedBlocksList extends IntrusiveCollection<CachedBlock> { 119 public enum Type { 120 PENDING_CACHED, 121 CACHED, 122 PENDING_UNCACHED 123 } 124 125 private final DatanodeDescriptor datanode; 126 127 private final Type type; 128 129 CachedBlocksList(DatanodeDescriptor datanode, Type type) { 130 this.datanode = datanode; 131 this.type = type; 132 } 133 134 public DatanodeDescriptor getDatanode() { 135 return datanode; 136 } 137 138 public Type getType() { 139 return type; 140 } 141 } 142 143 /** 144 * The blocks which we want to cache on this DataNode. 145 */ 146 private final CachedBlocksList pendingCached = 147 new CachedBlocksList(this, CachedBlocksList.Type.PENDING_CACHED); 148 149 /** 150 * The blocks which we know are cached on this datanode. 151 * This list is updated by periodic cache reports. 152 */ 153 private final CachedBlocksList cached = 154 new CachedBlocksList(this, CachedBlocksList.Type.CACHED); 155 156 /** 157 * The blocks which we want to uncache on this DataNode. 158 */ 159 private final CachedBlocksList pendingUncached = 160 new CachedBlocksList(this, CachedBlocksList.Type.PENDING_UNCACHED); 161 162 public CachedBlocksList getPendingCached() { 163 return pendingCached; 164 } 165 166 public CachedBlocksList getCached() { 167 return cached; 168 } 169 170 public CachedBlocksList getPendingUncached() { 171 return pendingUncached; 172 } 173 174 /** 175 * The time when the last batch of caching directives was sent, in 176 * monotonic milliseconds. 177 */ 178 private long lastCachingDirectiveSentTimeMs; 179 180 // isAlive == heartbeats.contains(this) 181 // This is an optimization, because contains takes O(n) time on Arraylist 182 public boolean isAlive = false; 183 public boolean needKeyUpdate = false; 184 185 186 // A system administrator can tune the balancer bandwidth parameter 187 // (dfs.balance.bandwidthPerSec) dynamically by calling 188 // "dfsadmin -setBalanacerBandwidth <newbandwidth>", at which point the 189 // following 'bandwidth' variable gets updated with the new value for each 190 // node. Once the heartbeat command is issued to update the value on the 191 // specified datanode, this value will be set back to 0. 192 private long bandwidth; 193 194 /** A queue of blocks to be replicated by this datanode */ 195 private final BlockQueue<BlockTargetPair> replicateBlocks = new BlockQueue<BlockTargetPair>(); 196 /** A queue of blocks to be recovered by this datanode */ 197 private final BlockQueue<BlockInfoUnderConstruction> recoverBlocks = 198 new BlockQueue<BlockInfoUnderConstruction>(); 199 /** A set of blocks to be invalidated by this datanode */ 200 private final LightWeightHashSet<Block> invalidateBlocks = new LightWeightHashSet<Block>(); 201 202 /* Variables for maintaining number of blocks scheduled to be written to 203 * this storage. This count is approximate and might be slightly bigger 204 * in case of errors (e.g. datanode does not report if an error occurs 205 * while writing the block). 206 */ 207 private int currApproxBlocksScheduled = 0; 208 private int prevApproxBlocksScheduled = 0; 209 private long lastBlocksScheduledRollTime = 0; 210 private static final int BLOCKS_SCHEDULED_ROLL_INTERVAL = 600*1000; //10min 211 private int volumeFailures = 0; 212 213 /** 214 * When set to true, the node is not in include list and is not allowed 215 * to communicate with the namenode 216 */ 217 private boolean disallowed = false; 218 219 /** 220 * DatanodeDescriptor constructor 221 * @param nodeID id of the data node 222 */ 223 public DatanodeDescriptor(DatanodeID nodeID) { 224 super(nodeID); 225 updateHeartbeat(StorageReport.EMPTY_ARRAY, 0L, 0L, 0, 0); 226 } 227 228 /** 229 * DatanodeDescriptor constructor 230 * @param nodeID id of the data node 231 * @param networkLocation location of the data node in network 232 */ 233 public DatanodeDescriptor(DatanodeID nodeID, 234 String networkLocation) { 235 super(nodeID, networkLocation); 236 updateHeartbeat(StorageReport.EMPTY_ARRAY, 0L, 0L, 0, 0); 237 } 238 239 /** 240 * Add data-node to the block. Add block to the head of the list of blocks 241 * belonging to the data-node. 242 */ 243 public boolean addBlock(String storageID, BlockInfo b) { 244 DatanodeStorageInfo s = getStorageInfo(storageID); 245 if (s != null) { 246 return s.addBlock(b); 247 } 248 return false; 249 } 250 251 @VisibleForTesting 252 public DatanodeStorageInfo getStorageInfo(String storageID) { 253 synchronized (storageMap) { 254 return storageMap.get(storageID); 255 } 256 } 257 DatanodeStorageInfo[] getStorageInfos() { 258 synchronized (storageMap) { 259 final Collection<DatanodeStorageInfo> storages = storageMap.values(); 260 return storages.toArray(new DatanodeStorageInfo[storages.size()]); 261 } 262 } 263 264 boolean hasStaleStorages() { 265 synchronized (storageMap) { 266 for (DatanodeStorageInfo storage : storageMap.values()) { 267 if (storage.areBlockContentsStale()) { 268 return true; 269 } 270 } 271 return false; 272 } 273 } 274 275 /** 276 * Remove block from the list of blocks belonging to the data-node. Remove 277 * data-node from the block. 278 */ 279 boolean removeBlock(BlockInfo b) { 280 int index = b.findStorageInfo(this); 281 // if block exists on this datanode 282 if (index >= 0) { 283 DatanodeStorageInfo s = b.getStorageInfo(index); 284 if (s != null) { 285 return s.removeBlock(b); 286 } 287 } 288 return false; 289 } 290 291 /** 292 * Remove block from the list of blocks belonging to the data-node. Remove 293 * data-node from the block. 294 */ 295 boolean removeBlock(String storageID, BlockInfo b) { 296 DatanodeStorageInfo s = getStorageInfo(storageID); 297 if (s != null) { 298 return s.removeBlock(b); 299 } 300 return false; 301 } 302 303 /** 304 * Replace specified old block with a new one in the DataNodeDescriptor. 305 * 306 * @param oldBlock - block to be replaced 307 * @param newBlock - a replacement block 308 * @return the new block 309 */ 310 public BlockInfo replaceBlock(BlockInfo oldBlock, BlockInfo newBlock) { 311 int index = oldBlock.findStorageInfo(this); 312 DatanodeStorageInfo s = oldBlock.getStorageInfo(index); 313 boolean done = s.removeBlock(oldBlock); 314 assert done : "Old block should belong to the data-node when replacing"; 315 316 done = s.addBlock(newBlock); 317 assert done : "New block should not belong to the data-node when replacing"; 318 return newBlock; 319 } 320 321 public void resetBlocks() { 322 setCapacity(0); 323 setRemaining(0); 324 setBlockPoolUsed(0); 325 setDfsUsed(0); 326 setXceiverCount(0); 327 this.invalidateBlocks.clear(); 328 this.volumeFailures = 0; 329 // pendingCached, cached, and pendingUncached are protected by the 330 // FSN lock. 331 this.pendingCached.clear(); 332 this.cached.clear(); 333 this.pendingUncached.clear(); 334 } 335 336 public void clearBlockQueues() { 337 synchronized (invalidateBlocks) { 338 this.invalidateBlocks.clear(); 339 this.recoverBlocks.clear(); 340 this.replicateBlocks.clear(); 341 } 342 // pendingCached, cached, and pendingUncached are protected by the 343 // FSN lock. 344 this.pendingCached.clear(); 345 this.cached.clear(); 346 this.pendingUncached.clear(); 347 } 348 349 public int numBlocks() { 350 int blocks = 0; 351 for (DatanodeStorageInfo entry : getStorageInfos()) { 352 blocks += entry.numBlocks(); 353 } 354 return blocks; 355 } 356 357 /** 358 * Updates stats from datanode heartbeat. 359 */ 360 public void updateHeartbeat(StorageReport[] reports, long cacheCapacity, 361 long cacheUsed, int xceiverCount, int volFailures) { 362 long totalCapacity = 0; 363 long totalRemaining = 0; 364 long totalBlockPoolUsed = 0; 365 long totalDfsUsed = 0; 366 367 setCacheCapacity(cacheCapacity); 368 setCacheUsed(cacheUsed); 369 setXceiverCount(xceiverCount); 370 setLastUpdate(Time.now()); 371 this.volumeFailures = volFailures; 372 for (StorageReport report : reports) { 373 DatanodeStorageInfo storage = updateStorage(report.getStorage()); 374 storage.receivedHeartbeat(report); 375 totalCapacity += report.getCapacity(); 376 totalRemaining += report.getRemaining(); 377 totalBlockPoolUsed += report.getBlockPoolUsed(); 378 totalDfsUsed += report.getDfsUsed(); 379 } 380 rollBlocksScheduled(getLastUpdate()); 381 382 // Update total metrics for the node. 383 setCapacity(totalCapacity); 384 setRemaining(totalRemaining); 385 setBlockPoolUsed(totalBlockPoolUsed); 386 setDfsUsed(totalDfsUsed); 387 } 388 389 private static class BlockIterator implements Iterator<BlockInfo> { 390 private int index = 0; 391 private final List<Iterator<BlockInfo>> iterators; 392 393 private BlockIterator(final DatanodeStorageInfo... storages) { 394 List<Iterator<BlockInfo>> iterators = new ArrayList<Iterator<BlockInfo>>(); 395 for (DatanodeStorageInfo e : storages) { 396 iterators.add(e.getBlockIterator()); 397 } 398 this.iterators = Collections.unmodifiableList(iterators); 399 } 400 401 @Override 402 public boolean hasNext() { 403 update(); 404 return !iterators.isEmpty() && iterators.get(index).hasNext(); 405 } 406 407 @Override 408 public BlockInfo next() { 409 update(); 410 return iterators.get(index).next(); 411 } 412 413 @Override 414 public void remove() { 415 throw new UnsupportedOperationException("Remove unsupported."); 416 } 417 418 private void update() { 419 while(index < iterators.size() - 1 && !iterators.get(index).hasNext()) { 420 index++; 421 } 422 } 423 } 424 425 Iterator<BlockInfo> getBlockIterator() { 426 return new BlockIterator(getStorageInfos()); 427 } 428 Iterator<BlockInfo> getBlockIterator(final String storageID) { 429 return new BlockIterator(getStorageInfo(storageID)); 430 } 431 432 /** 433 * Store block replication work. 434 */ 435 void addBlockToBeReplicated(Block block, DatanodeStorageInfo[] targets) { 436 assert(block != null && targets != null && targets.length > 0); 437 replicateBlocks.offer(new BlockTargetPair(block, targets)); 438 } 439 440 /** 441 * Store block recovery work. 442 */ 443 void addBlockToBeRecovered(BlockInfoUnderConstruction block) { 444 if(recoverBlocks.contains(block)) { 445 // this prevents adding the same block twice to the recovery queue 446 BlockManager.LOG.info(block + " is already in the recovery queue"); 447 return; 448 } 449 recoverBlocks.offer(block); 450 } 451 452 /** 453 * Store block invalidation work. 454 */ 455 void addBlocksToBeInvalidated(List<Block> blocklist) { 456 assert(blocklist != null && blocklist.size() > 0); 457 synchronized (invalidateBlocks) { 458 for(Block blk : blocklist) { 459 invalidateBlocks.add(blk); 460 } 461 } 462 } 463 464 /** 465 * The number of work items that are pending to be replicated 466 */ 467 int getNumberOfBlocksToBeReplicated() { 468 return replicateBlocks.size(); 469 } 470 471 /** 472 * The number of block invalidation items that are pending to 473 * be sent to the datanode 474 */ 475 int getNumberOfBlocksToBeInvalidated() { 476 synchronized (invalidateBlocks) { 477 return invalidateBlocks.size(); 478 } 479 } 480 481 public List<BlockTargetPair> getReplicationCommand(int maxTransfers) { 482 return replicateBlocks.poll(maxTransfers); 483 } 484 485 public BlockInfoUnderConstruction[] getLeaseRecoveryCommand(int maxTransfers) { 486 List<BlockInfoUnderConstruction> blocks = recoverBlocks.poll(maxTransfers); 487 if(blocks == null) 488 return null; 489 return blocks.toArray(new BlockInfoUnderConstruction[blocks.size()]); 490 } 491 492 /** 493 * Remove the specified number of blocks to be invalidated 494 */ 495 public Block[] getInvalidateBlocks(int maxblocks) { 496 synchronized (invalidateBlocks) { 497 Block[] deleteList = invalidateBlocks.pollToArray(new Block[Math.min( 498 invalidateBlocks.size(), maxblocks)]); 499 return deleteList.length == 0 ? null : deleteList; 500 } 501 } 502 503 /** 504 * @return Approximate number of blocks currently scheduled to be written 505 * to this datanode. 506 */ 507 public int getBlocksScheduled() { 508 return currApproxBlocksScheduled + prevApproxBlocksScheduled; 509 } 510 511 /** Increment the number of blocks scheduled. */ 512 void incrementBlocksScheduled() { 513 currApproxBlocksScheduled++; 514 } 515 516 /** Decrement the number of blocks scheduled. */ 517 void decrementBlocksScheduled() { 518 if (prevApproxBlocksScheduled > 0) { 519 prevApproxBlocksScheduled--; 520 } else if (currApproxBlocksScheduled > 0) { 521 currApproxBlocksScheduled--; 522 } 523 // its ok if both counters are zero. 524 } 525 526 /** Adjusts curr and prev number of blocks scheduled every few minutes. */ 527 private void rollBlocksScheduled(long now) { 528 if (now - lastBlocksScheduledRollTime > BLOCKS_SCHEDULED_ROLL_INTERVAL) { 529 prevApproxBlocksScheduled = currApproxBlocksScheduled; 530 currApproxBlocksScheduled = 0; 531 lastBlocksScheduledRollTime = now; 532 } 533 } 534 535 @Override 536 public int hashCode() { 537 // Super implementation is sufficient 538 return super.hashCode(); 539 } 540 541 @Override 542 public boolean equals(Object obj) { 543 // Sufficient to use super equality as datanodes are uniquely identified 544 // by DatanodeID 545 return (this == obj) || super.equals(obj); 546 } 547 548 /** Decommissioning status */ 549 public class DecommissioningStatus { 550 private int underReplicatedBlocks; 551 private int decommissionOnlyReplicas; 552 private int underReplicatedInOpenFiles; 553 private long startTime; 554 555 synchronized void set(int underRep, 556 int onlyRep, int underConstruction) { 557 if (isDecommissionInProgress() == false) { 558 return; 559 } 560 underReplicatedBlocks = underRep; 561 decommissionOnlyReplicas = onlyRep; 562 underReplicatedInOpenFiles = underConstruction; 563 } 564 565 /** @return the number of under-replicated blocks */ 566 public synchronized int getUnderReplicatedBlocks() { 567 if (isDecommissionInProgress() == false) { 568 return 0; 569 } 570 return underReplicatedBlocks; 571 } 572 /** @return the number of decommission-only replicas */ 573 public synchronized int getDecommissionOnlyReplicas() { 574 if (isDecommissionInProgress() == false) { 575 return 0; 576 } 577 return decommissionOnlyReplicas; 578 } 579 /** @return the number of under-replicated blocks in open files */ 580 public synchronized int getUnderReplicatedInOpenFiles() { 581 if (isDecommissionInProgress() == false) { 582 return 0; 583 } 584 return underReplicatedInOpenFiles; 585 } 586 /** Set start time */ 587 public synchronized void setStartTime(long time) { 588 startTime = time; 589 } 590 /** @return start time */ 591 public synchronized long getStartTime() { 592 if (isDecommissionInProgress() == false) { 593 return 0; 594 } 595 return startTime; 596 } 597 } // End of class DecommissioningStatus 598 599 /** 600 * Set the flag to indicate if this datanode is disallowed from communicating 601 * with the namenode. 602 */ 603 public void setDisallowed(boolean flag) { 604 disallowed = flag; 605 } 606 /** Is the datanode disallowed from communicating with the namenode? */ 607 public boolean isDisallowed() { 608 return disallowed; 609 } 610 611 /** 612 * @return number of failed volumes in the datanode. 613 */ 614 public int getVolumeFailures() { 615 return volumeFailures; 616 } 617 618 /** 619 * @param nodeReg DatanodeID to update registration for. 620 */ 621 @Override 622 public void updateRegInfo(DatanodeID nodeReg) { 623 super.updateRegInfo(nodeReg); 624 625 // must re-process IBR after re-registration 626 for(DatanodeStorageInfo storage : getStorageInfos()) { 627 storage.setBlockReportCount(0); 628 } 629 } 630 631 /** 632 * @return balancer bandwidth in bytes per second for this datanode 633 */ 634 public long getBalancerBandwidth() { 635 return this.bandwidth; 636 } 637 638 /** 639 * @param bandwidth balancer bandwidth in bytes per second for this datanode 640 */ 641 public void setBalancerBandwidth(long bandwidth) { 642 this.bandwidth = bandwidth; 643 } 644 645 @Override 646 public String dumpDatanode() { 647 StringBuilder sb = new StringBuilder(super.dumpDatanode()); 648 int repl = replicateBlocks.size(); 649 if (repl > 0) { 650 sb.append(" ").append(repl).append(" blocks to be replicated;"); 651 } 652 int inval = invalidateBlocks.size(); 653 if (inval > 0) { 654 sb.append(" ").append(inval).append(" blocks to be invalidated;"); 655 } 656 int recover = recoverBlocks.size(); 657 if (recover > 0) { 658 sb.append(" ").append(recover).append(" blocks to be recovered;"); 659 } 660 return sb.toString(); 661 } 662 663 DatanodeStorageInfo updateStorage(DatanodeStorage s) { 664 synchronized (storageMap) { 665 DatanodeStorageInfo storage = storageMap.get(s.getStorageID()); 666 if (storage == null) { 667 LOG.info("Adding new storage ID " + s.getStorageID() + 668 " for DN " + getXferAddr()); 669 storage = new DatanodeStorageInfo(this, s); 670 storageMap.put(s.getStorageID(), storage); 671 } else if (storage.getState() != s.getState() || 672 storage.getStorageType() != s.getStorageType()) { 673 // For backwards compatibility, make sure that the type and 674 // state are updated. Some reports from older datanodes do 675 // not include these fields so we may have assumed defaults. 676 // This check can be removed in the next major release after 677 // 2.4. 678 storage.updateFromStorage(s); 679 storageMap.put(storage.getStorageID(), storage); 680 } 681 return storage; 682 } 683 } 684 685 /** 686 * @return The time at which we last sent caching directives to this 687 * DataNode, in monotonic milliseconds. 688 */ 689 public long getLastCachingDirectiveSentTimeMs() { 690 return this.lastCachingDirectiveSentTimeMs; 691 } 692 693 /** 694 * @param time The time at which we last sent caching directives to this 695 * DataNode, in monotonic milliseconds. 696 */ 697 public void setLastCachingDirectiveSentTimeMs(long time) { 698 this.lastCachingDirectiveSentTimeMs = time; 699 } 700 701 /** 702 * checks whether atleast first block report has been received 703 * @return 704 */ 705 public boolean checkBlockReportReceived() { 706 if(this.getStorageInfos().length == 0) { 707 return false; 708 } 709 for(DatanodeStorageInfo storageInfo: this.getStorageInfos()) { 710 if(storageInfo.getBlockReportCount() == 0 ) 711 return false; 712 } 713 return true; 714 } 715} 716