001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.blockmanagement;
019
020import java.util.ArrayList;
021import java.util.Collection;
022import java.util.Collections;
023import java.util.HashMap;
024import java.util.Iterator;
025import java.util.LinkedList;
026import java.util.List;
027import java.util.Map;
028import java.util.Queue;
029
030import com.google.common.annotations.VisibleForTesting;
031import org.apache.commons.logging.Log;
032import org.apache.commons.logging.LogFactory;
033import org.apache.hadoop.classification.InterfaceAudience;
034import org.apache.hadoop.classification.InterfaceStability;
035import org.apache.hadoop.hdfs.protocol.Block;
036import org.apache.hadoop.hdfs.protocol.DatanodeID;
037import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
038import org.apache.hadoop.hdfs.server.namenode.CachedBlock;
039import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
040import org.apache.hadoop.hdfs.server.protocol.StorageReport;
041import org.apache.hadoop.hdfs.util.LightWeightHashSet;
042import org.apache.hadoop.util.IntrusiveCollection;
043import org.apache.hadoop.util.Time;
044
045import com.google.common.annotations.VisibleForTesting;
046
047/**
048 * This class extends the DatanodeInfo class with ephemeral information (eg
049 * health, capacity, what blocks are associated with the Datanode) that is
050 * private to the Namenode, ie this class is not exposed to clients.
051 */
052@InterfaceAudience.Private
053@InterfaceStability.Evolving
054public class DatanodeDescriptor extends DatanodeInfo {
055  public static final Log LOG = LogFactory.getLog(DatanodeDescriptor.class);
056  public static final DatanodeDescriptor[] EMPTY_ARRAY = {};
057
058  // Stores status of decommissioning.
059  // If node is not decommissioning, do not use this object for anything.
060  public final DecommissioningStatus decommissioningStatus = new DecommissioningStatus();
061  
062  /** Block and targets pair */
063  @InterfaceAudience.Private
064  @InterfaceStability.Evolving
065  public static class BlockTargetPair {
066    public final Block block;
067    public final DatanodeStorageInfo[] targets;    
068
069    BlockTargetPair(Block block, DatanodeStorageInfo[] targets) {
070      this.block = block;
071      this.targets = targets;
072    }
073  }
074
075  /** A BlockTargetPair queue. */
076  private static class BlockQueue<E> {
077    private final Queue<E> blockq = new LinkedList<E>();
078
079    /** Size of the queue */
080    synchronized int size() {return blockq.size();}
081
082    /** Enqueue */
083    synchronized boolean offer(E e) { 
084      return blockq.offer(e);
085    }
086
087    /** Dequeue */
088    synchronized List<E> poll(int numBlocks) {
089      if (numBlocks <= 0 || blockq.isEmpty()) {
090        return null;
091      }
092
093      List<E> results = new ArrayList<E>();
094      for(; !blockq.isEmpty() && numBlocks > 0; numBlocks--) {
095        results.add(blockq.poll());
096      }
097      return results;
098    }
099
100    /**
101     * Returns <tt>true</tt> if the queue contains the specified element.
102     */
103    boolean contains(E e) {
104      return blockq.contains(e);
105    }
106
107    synchronized void clear() {
108      blockq.clear();
109    }
110  }
111
112  private final Map<String, DatanodeStorageInfo> storageMap = 
113      new HashMap<String, DatanodeStorageInfo>();
114
115  /**
116   * A list of CachedBlock objects on this datanode.
117   */
118  public static class CachedBlocksList extends IntrusiveCollection<CachedBlock> {
119    public enum Type {
120      PENDING_CACHED,
121      CACHED,
122      PENDING_UNCACHED
123    }
124
125    private final DatanodeDescriptor datanode;
126
127    private final Type type;
128
129    CachedBlocksList(DatanodeDescriptor datanode, Type type) {
130      this.datanode = datanode;
131      this.type = type;
132    }
133
134    public DatanodeDescriptor getDatanode() {
135      return datanode;
136    }
137
138    public Type getType() {
139      return type;
140    }
141  }
142
143  /**
144   * The blocks which we want to cache on this DataNode.
145   */
146  private final CachedBlocksList pendingCached = 
147      new CachedBlocksList(this, CachedBlocksList.Type.PENDING_CACHED);
148
149  /**
150   * The blocks which we know are cached on this datanode.
151   * This list is updated by periodic cache reports.
152   */
153  private final CachedBlocksList cached = 
154      new CachedBlocksList(this, CachedBlocksList.Type.CACHED);
155
156  /**
157   * The blocks which we want to uncache on this DataNode.
158   */
159  private final CachedBlocksList pendingUncached = 
160      new CachedBlocksList(this, CachedBlocksList.Type.PENDING_UNCACHED);
161
162  public CachedBlocksList getPendingCached() {
163    return pendingCached;
164  }
165
166  public CachedBlocksList getCached() {
167    return cached;
168  }
169
170  public CachedBlocksList getPendingUncached() {
171    return pendingUncached;
172  }
173
174  /**
175   * The time when the last batch of caching directives was sent, in
176   * monotonic milliseconds.
177   */
178  private long lastCachingDirectiveSentTimeMs;
179
180  // isAlive == heartbeats.contains(this)
181  // This is an optimization, because contains takes O(n) time on Arraylist
182  public boolean isAlive = false;
183  public boolean needKeyUpdate = false;
184
185  
186  // A system administrator can tune the balancer bandwidth parameter
187  // (dfs.balance.bandwidthPerSec) dynamically by calling
188  // "dfsadmin -setBalanacerBandwidth <newbandwidth>", at which point the
189  // following 'bandwidth' variable gets updated with the new value for each
190  // node. Once the heartbeat command is issued to update the value on the
191  // specified datanode, this value will be set back to 0.
192  private long bandwidth;
193
194  /** A queue of blocks to be replicated by this datanode */
195  private final BlockQueue<BlockTargetPair> replicateBlocks = new BlockQueue<BlockTargetPair>();
196  /** A queue of blocks to be recovered by this datanode */
197  private final BlockQueue<BlockInfoUnderConstruction> recoverBlocks =
198                                new BlockQueue<BlockInfoUnderConstruction>();
199  /** A set of blocks to be invalidated by this datanode */
200  private final LightWeightHashSet<Block> invalidateBlocks = new LightWeightHashSet<Block>();
201
202  /* Variables for maintaining number of blocks scheduled to be written to
203   * this storage. This count is approximate and might be slightly bigger
204   * in case of errors (e.g. datanode does not report if an error occurs
205   * while writing the block).
206   */
207  private int currApproxBlocksScheduled = 0;
208  private int prevApproxBlocksScheduled = 0;
209  private long lastBlocksScheduledRollTime = 0;
210  private static final int BLOCKS_SCHEDULED_ROLL_INTERVAL = 600*1000; //10min
211  private int volumeFailures = 0;
212  
213  /** 
214   * When set to true, the node is not in include list and is not allowed
215   * to communicate with the namenode
216   */
217  private boolean disallowed = false;
218
219  /**
220   * DatanodeDescriptor constructor
221   * @param nodeID id of the data node
222   */
223  public DatanodeDescriptor(DatanodeID nodeID) {
224    super(nodeID);
225    updateHeartbeat(StorageReport.EMPTY_ARRAY, 0L, 0L, 0, 0);
226  }
227
228  /**
229   * DatanodeDescriptor constructor
230   * @param nodeID id of the data node
231   * @param networkLocation location of the data node in network
232   */
233  public DatanodeDescriptor(DatanodeID nodeID, 
234                            String networkLocation) {
235    super(nodeID, networkLocation);
236    updateHeartbeat(StorageReport.EMPTY_ARRAY, 0L, 0L, 0, 0);
237  }
238
239  /**
240   * Add data-node to the block. Add block to the head of the list of blocks
241   * belonging to the data-node.
242   */
243  public boolean addBlock(String storageID, BlockInfo b) {
244    DatanodeStorageInfo s = getStorageInfo(storageID);
245    if (s != null) {
246      return s.addBlock(b);
247    }
248    return false;
249  }
250
251  @VisibleForTesting
252  public DatanodeStorageInfo getStorageInfo(String storageID) {
253    synchronized (storageMap) {
254      return storageMap.get(storageID);
255    }
256  }
257  DatanodeStorageInfo[] getStorageInfos() {
258    synchronized (storageMap) {
259      final Collection<DatanodeStorageInfo> storages = storageMap.values();
260      return storages.toArray(new DatanodeStorageInfo[storages.size()]);
261    }
262  }
263
264  boolean hasStaleStorages() {
265    synchronized (storageMap) {
266      for (DatanodeStorageInfo storage : storageMap.values()) {
267        if (storage.areBlockContentsStale()) {
268          return true;
269        }
270      }
271      return false;
272    }
273  }
274
275  /**
276   * Remove block from the list of blocks belonging to the data-node. Remove
277   * data-node from the block.
278   */
279  boolean removeBlock(BlockInfo b) {
280    int index = b.findStorageInfo(this);
281    // if block exists on this datanode
282    if (index >= 0) {
283      DatanodeStorageInfo s = b.getStorageInfo(index);
284      if (s != null) {
285        return s.removeBlock(b);
286      }
287    }
288    return false;
289  }
290  
291  /**
292   * Remove block from the list of blocks belonging to the data-node. Remove
293   * data-node from the block.
294   */
295  boolean removeBlock(String storageID, BlockInfo b) {
296    DatanodeStorageInfo s = getStorageInfo(storageID);
297    if (s != null) {
298      return s.removeBlock(b);
299    }
300    return false;
301  }
302
303  /**
304   * Replace specified old block with a new one in the DataNodeDescriptor.
305   *
306   * @param oldBlock - block to be replaced
307   * @param newBlock - a replacement block
308   * @return the new block
309   */
310  public BlockInfo replaceBlock(BlockInfo oldBlock, BlockInfo newBlock) {
311    int index = oldBlock.findStorageInfo(this);
312    DatanodeStorageInfo s = oldBlock.getStorageInfo(index);
313    boolean done = s.removeBlock(oldBlock);
314    assert done : "Old block should belong to the data-node when replacing";
315
316    done = s.addBlock(newBlock);
317    assert done : "New block should not belong to the data-node when replacing";
318    return newBlock;
319  }
320
321  public void resetBlocks() {
322    setCapacity(0);
323    setRemaining(0);
324    setBlockPoolUsed(0);
325    setDfsUsed(0);
326    setXceiverCount(0);
327    this.invalidateBlocks.clear();
328    this.volumeFailures = 0;
329    // pendingCached, cached, and pendingUncached are protected by the
330    // FSN lock.
331    this.pendingCached.clear();
332    this.cached.clear();
333    this.pendingUncached.clear();
334  }
335  
336  public void clearBlockQueues() {
337    synchronized (invalidateBlocks) {
338      this.invalidateBlocks.clear();
339      this.recoverBlocks.clear();
340      this.replicateBlocks.clear();
341    }
342    // pendingCached, cached, and pendingUncached are protected by the
343    // FSN lock.
344    this.pendingCached.clear();
345    this.cached.clear();
346    this.pendingUncached.clear();
347  }
348
349  public int numBlocks() {
350    int blocks = 0;
351    for (DatanodeStorageInfo entry : getStorageInfos()) {
352      blocks += entry.numBlocks();
353    }
354    return blocks;
355  }
356
357  /**
358   * Updates stats from datanode heartbeat.
359   */
360  public void updateHeartbeat(StorageReport[] reports, long cacheCapacity,
361      long cacheUsed, int xceiverCount, int volFailures) {
362    long totalCapacity = 0;
363    long totalRemaining = 0;
364    long totalBlockPoolUsed = 0;
365    long totalDfsUsed = 0;
366
367    setCacheCapacity(cacheCapacity);
368    setCacheUsed(cacheUsed);
369    setXceiverCount(xceiverCount);
370    setLastUpdate(Time.now());    
371    this.volumeFailures = volFailures;
372    for (StorageReport report : reports) {
373      DatanodeStorageInfo storage = updateStorage(report.getStorage());
374      storage.receivedHeartbeat(report);
375      totalCapacity += report.getCapacity();
376      totalRemaining += report.getRemaining();
377      totalBlockPoolUsed += report.getBlockPoolUsed();
378      totalDfsUsed += report.getDfsUsed();
379    }
380    rollBlocksScheduled(getLastUpdate());
381
382    // Update total metrics for the node.
383    setCapacity(totalCapacity);
384    setRemaining(totalRemaining);
385    setBlockPoolUsed(totalBlockPoolUsed);
386    setDfsUsed(totalDfsUsed);
387  }
388
389  private static class BlockIterator implements Iterator<BlockInfo> {
390    private int index = 0;
391    private final List<Iterator<BlockInfo>> iterators;
392    
393    private BlockIterator(final DatanodeStorageInfo... storages) {
394      List<Iterator<BlockInfo>> iterators = new ArrayList<Iterator<BlockInfo>>();
395      for (DatanodeStorageInfo e : storages) {
396        iterators.add(e.getBlockIterator());
397      }
398      this.iterators = Collections.unmodifiableList(iterators);
399    }
400
401    @Override
402    public boolean hasNext() {
403      update();
404      return !iterators.isEmpty() && iterators.get(index).hasNext();
405    }
406
407    @Override
408    public BlockInfo next() {
409      update();
410      return iterators.get(index).next();
411    }
412    
413    @Override
414    public void remove() {
415      throw new UnsupportedOperationException("Remove unsupported.");
416    }
417    
418    private void update() {
419      while(index < iterators.size() - 1 && !iterators.get(index).hasNext()) {
420        index++;
421      }
422    }
423  }
424
425  Iterator<BlockInfo> getBlockIterator() {
426    return new BlockIterator(getStorageInfos());
427  }
428  Iterator<BlockInfo> getBlockIterator(final String storageID) {
429    return new BlockIterator(getStorageInfo(storageID));
430  }
431
432  /**
433   * Store block replication work.
434   */
435  void addBlockToBeReplicated(Block block, DatanodeStorageInfo[] targets) {
436    assert(block != null && targets != null && targets.length > 0);
437    replicateBlocks.offer(new BlockTargetPair(block, targets));
438  }
439
440  /**
441   * Store block recovery work.
442   */
443  void addBlockToBeRecovered(BlockInfoUnderConstruction block) {
444    if(recoverBlocks.contains(block)) {
445      // this prevents adding the same block twice to the recovery queue
446      BlockManager.LOG.info(block + " is already in the recovery queue");
447      return;
448    }
449    recoverBlocks.offer(block);
450  }
451
452  /**
453   * Store block invalidation work.
454   */
455  void addBlocksToBeInvalidated(List<Block> blocklist) {
456    assert(blocklist != null && blocklist.size() > 0);
457    synchronized (invalidateBlocks) {
458      for(Block blk : blocklist) {
459        invalidateBlocks.add(blk);
460      }
461    }
462  }
463  
464  /**
465   * The number of work items that are pending to be replicated
466   */
467  int getNumberOfBlocksToBeReplicated() {
468    return replicateBlocks.size();
469  }
470
471  /**
472   * The number of block invalidation items that are pending to 
473   * be sent to the datanode
474   */
475  int getNumberOfBlocksToBeInvalidated() {
476    synchronized (invalidateBlocks) {
477      return invalidateBlocks.size();
478    }
479  }
480
481  public List<BlockTargetPair> getReplicationCommand(int maxTransfers) {
482    return replicateBlocks.poll(maxTransfers);
483  }
484
485  public BlockInfoUnderConstruction[] getLeaseRecoveryCommand(int maxTransfers) {
486    List<BlockInfoUnderConstruction> blocks = recoverBlocks.poll(maxTransfers);
487    if(blocks == null)
488      return null;
489    return blocks.toArray(new BlockInfoUnderConstruction[blocks.size()]);
490  }
491
492  /**
493   * Remove the specified number of blocks to be invalidated
494   */
495  public Block[] getInvalidateBlocks(int maxblocks) {
496    synchronized (invalidateBlocks) {
497      Block[] deleteList = invalidateBlocks.pollToArray(new Block[Math.min(
498          invalidateBlocks.size(), maxblocks)]);
499      return deleteList.length == 0 ? null : deleteList;
500    }
501  }
502
503  /**
504   * @return Approximate number of blocks currently scheduled to be written 
505   * to this datanode.
506   */
507  public int getBlocksScheduled() {
508    return currApproxBlocksScheduled + prevApproxBlocksScheduled;
509  }
510
511  /** Increment the number of blocks scheduled. */
512  void incrementBlocksScheduled() {
513    currApproxBlocksScheduled++;
514  }
515  
516  /** Decrement the number of blocks scheduled. */
517  void decrementBlocksScheduled() {
518    if (prevApproxBlocksScheduled > 0) {
519      prevApproxBlocksScheduled--;
520    } else if (currApproxBlocksScheduled > 0) {
521      currApproxBlocksScheduled--;
522    } 
523    // its ok if both counters are zero.
524  }
525  
526  /** Adjusts curr and prev number of blocks scheduled every few minutes. */
527  private void rollBlocksScheduled(long now) {
528    if (now - lastBlocksScheduledRollTime > BLOCKS_SCHEDULED_ROLL_INTERVAL) {
529      prevApproxBlocksScheduled = currApproxBlocksScheduled;
530      currApproxBlocksScheduled = 0;
531      lastBlocksScheduledRollTime = now;
532    }
533  }
534  
535  @Override
536  public int hashCode() {
537    // Super implementation is sufficient
538    return super.hashCode();
539  }
540  
541  @Override
542  public boolean equals(Object obj) {
543    // Sufficient to use super equality as datanodes are uniquely identified
544    // by DatanodeID
545    return (this == obj) || super.equals(obj);
546  }
547
548  /** Decommissioning status */
549  public class DecommissioningStatus {
550    private int underReplicatedBlocks;
551    private int decommissionOnlyReplicas;
552    private int underReplicatedInOpenFiles;
553    private long startTime;
554    
555    synchronized void set(int underRep,
556        int onlyRep, int underConstruction) {
557      if (isDecommissionInProgress() == false) {
558        return;
559      }
560      underReplicatedBlocks = underRep;
561      decommissionOnlyReplicas = onlyRep;
562      underReplicatedInOpenFiles = underConstruction;
563    }
564
565    /** @return the number of under-replicated blocks */
566    public synchronized int getUnderReplicatedBlocks() {
567      if (isDecommissionInProgress() == false) {
568        return 0;
569      }
570      return underReplicatedBlocks;
571    }
572    /** @return the number of decommission-only replicas */
573    public synchronized int getDecommissionOnlyReplicas() {
574      if (isDecommissionInProgress() == false) {
575        return 0;
576      }
577      return decommissionOnlyReplicas;
578    }
579    /** @return the number of under-replicated blocks in open files */
580    public synchronized int getUnderReplicatedInOpenFiles() {
581      if (isDecommissionInProgress() == false) {
582        return 0;
583      }
584      return underReplicatedInOpenFiles;
585    }
586    /** Set start time */
587    public synchronized void setStartTime(long time) {
588      startTime = time;
589    }
590    /** @return start time */
591    public synchronized long getStartTime() {
592      if (isDecommissionInProgress() == false) {
593        return 0;
594      }
595      return startTime;
596    }
597  }  // End of class DecommissioningStatus
598
599  /**
600   * Set the flag to indicate if this datanode is disallowed from communicating
601   * with the namenode.
602   */
603  public void setDisallowed(boolean flag) {
604    disallowed = flag;
605  }
606  /** Is the datanode disallowed from communicating with the namenode? */
607  public boolean isDisallowed() {
608    return disallowed;
609  }
610
611  /**
612   * @return number of failed volumes in the datanode.
613   */
614  public int getVolumeFailures() {
615    return volumeFailures;
616  }
617
618  /**
619   * @param nodeReg DatanodeID to update registration for.
620   */
621  @Override
622  public void updateRegInfo(DatanodeID nodeReg) {
623    super.updateRegInfo(nodeReg);
624    
625    // must re-process IBR after re-registration
626    for(DatanodeStorageInfo storage : getStorageInfos()) {
627      storage.setBlockReportCount(0);
628    }
629  }
630
631  /**
632   * @return balancer bandwidth in bytes per second for this datanode
633   */
634  public long getBalancerBandwidth() {
635    return this.bandwidth;
636  }
637
638  /**
639   * @param bandwidth balancer bandwidth in bytes per second for this datanode
640   */
641  public void setBalancerBandwidth(long bandwidth) {
642    this.bandwidth = bandwidth;
643  }
644
645  @Override
646  public String dumpDatanode() {
647    StringBuilder sb = new StringBuilder(super.dumpDatanode());
648    int repl = replicateBlocks.size();
649    if (repl > 0) {
650      sb.append(" ").append(repl).append(" blocks to be replicated;");
651    }
652    int inval = invalidateBlocks.size();
653    if (inval > 0) {
654      sb.append(" ").append(inval).append(" blocks to be invalidated;");      
655    }
656    int recover = recoverBlocks.size();
657    if (recover > 0) {
658      sb.append(" ").append(recover).append(" blocks to be recovered;");
659    }
660    return sb.toString();
661  }
662
663  DatanodeStorageInfo updateStorage(DatanodeStorage s) {
664    synchronized (storageMap) {
665      DatanodeStorageInfo storage = storageMap.get(s.getStorageID());
666      if (storage == null) {
667        LOG.info("Adding new storage ID " + s.getStorageID() +
668                 " for DN " + getXferAddr());
669        storage = new DatanodeStorageInfo(this, s);
670        storageMap.put(s.getStorageID(), storage);
671      } else if (storage.getState() != s.getState() ||
672                 storage.getStorageType() != s.getStorageType()) {
673        // For backwards compatibility, make sure that the type and
674        // state are updated. Some reports from older datanodes do
675        // not include these fields so we may have assumed defaults.
676        // This check can be removed in the next major release after
677        // 2.4.
678        storage.updateFromStorage(s);
679        storageMap.put(storage.getStorageID(), storage);
680      }
681      return storage;
682    }
683  }
684
685  /**
686   * @return   The time at which we last sent caching directives to this 
687   *           DataNode, in monotonic milliseconds.
688   */
689  public long getLastCachingDirectiveSentTimeMs() {
690    return this.lastCachingDirectiveSentTimeMs;
691  }
692
693  /**
694   * @param time  The time at which we last sent caching directives to this 
695   *              DataNode, in monotonic milliseconds.
696   */
697  public void setLastCachingDirectiveSentTimeMs(long time) {
698    this.lastCachingDirectiveSentTimeMs = time;
699  }
700  
701  /**
702   * checks whether atleast first block report has been received
703   * @return
704   */
705  public boolean checkBlockReportReceived() {
706    if(this.getStorageInfos().length == 0) {
707      return false;
708    }
709    for(DatanodeStorageInfo storageInfo: this.getStorageInfos()) {
710      if(storageInfo.getBlockReportCount() == 0 )
711        return false;
712    }
713    return true;
714 }
715}
716