001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.net;
019
020import java.util.ArrayList;
021import java.util.List;
022import java.util.Collection;
023import java.util.List;
024import java.util.Random;
025import java.util.concurrent.locks.ReadWriteLock;
026import java.util.concurrent.locks.ReentrantReadWriteLock;
027
028import org.apache.commons.logging.Log;
029import org.apache.commons.logging.LogFactory;
030import org.apache.hadoop.classification.InterfaceAudience;
031import org.apache.hadoop.classification.InterfaceStability;
032import org.apache.hadoop.conf.Configuration;
033import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
034import org.apache.hadoop.util.ReflectionUtils;
035
036/** The class represents a cluster of computer with a tree hierarchical
037 * network topology.
038 * For example, a cluster may be consists of many data centers filled 
039 * with racks of computers.
040 * In a network topology, leaves represent data nodes (computers) and inner
041 * nodes represent switches/routers that manage traffic in/out of data centers
042 * or racks.  
043 * 
044 */
045@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
046@InterfaceStability.Unstable
047public class NetworkTopology {
048  public final static String DEFAULT_RACK = "/default-rack";
049  public final static int DEFAULT_HOST_LEVEL = 2;
050  public static final Log LOG = 
051    LogFactory.getLog(NetworkTopology.class);
052    
053  public static class InvalidTopologyException extends RuntimeException {
054    private static final long serialVersionUID = 1L;
055    public InvalidTopologyException(String msg) {
056      super(msg);
057    }
058  }
059  
060  /**
061   * Get an instance of NetworkTopology based on the value of the configuration
062   * parameter net.topology.impl.
063   * 
064   * @param conf the configuration to be used
065   * @return an instance of NetworkTopology
066   */
067  public static NetworkTopology getInstance(Configuration conf){
068    return ReflectionUtils.newInstance(
069        conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY,
070        NetworkTopology.class, NetworkTopology.class), conf);
071  }
072
073  /** InnerNode represents a switch/router of a data center or rack.
074   * Different from a leaf node, it has non-null children.
075   */
076  static class InnerNode extends NodeBase {
077    protected List<Node> children=new ArrayList<Node>();
078    private int numOfLeaves;
079        
080    /** Construct an InnerNode from a path-like string */
081    InnerNode(String path) {
082      super(path);
083    }
084        
085    /** Construct an InnerNode from its name and its network location */
086    InnerNode(String name, String location) {
087      super(name, location);
088    }
089        
090    /** Construct an InnerNode
091     * from its name, its network location, its parent, and its level */
092    InnerNode(String name, String location, InnerNode parent, int level) {
093      super(name, location, parent, level);
094    }
095        
096    /** @return its children */
097    List<Node> getChildren() {return children;}
098        
099    /** @return the number of children this node has */
100    int getNumOfChildren() {
101      return children.size();
102    }
103        
104    /** Judge if this node represents a rack 
105     * @return true if it has no child or its children are not InnerNodes
106     */ 
107    boolean isRack() {
108      if (children.isEmpty()) {
109        return true;
110      }
111            
112      Node firstChild = children.get(0);
113      if (firstChild instanceof InnerNode) {
114        return false;
115      }
116            
117      return true;
118    }
119        
120    /** Judge if this node is an ancestor of node <i>n</i>
121     * 
122     * @param n a node
123     * @return true if this node is an ancestor of <i>n</i>
124     */
125    boolean isAncestor(Node n) {
126      return getPath(this).equals(NodeBase.PATH_SEPARATOR_STR) ||
127        (n.getNetworkLocation()+NodeBase.PATH_SEPARATOR_STR).
128        startsWith(getPath(this)+NodeBase.PATH_SEPARATOR_STR);
129    }
130        
131    /** Judge if this node is the parent of node <i>n</i>
132     * 
133     * @param n a node
134     * @return true if this node is the parent of <i>n</i>
135     */
136    boolean isParent(Node n) {
137      return n.getNetworkLocation().equals(getPath(this));
138    }
139        
140    /* Return a child name of this node who is an ancestor of node <i>n</i> */
141    private String getNextAncestorName(Node n) {
142      if (!isAncestor(n)) {
143        throw new IllegalArgumentException(
144                                           this + "is not an ancestor of " + n);
145      }
146      String name = n.getNetworkLocation().substring(getPath(this).length());
147      if (name.charAt(0) == PATH_SEPARATOR) {
148        name = name.substring(1);
149      }
150      int index=name.indexOf(PATH_SEPARATOR);
151      if (index !=-1)
152        name = name.substring(0, index);
153      return name;
154    }
155        
156    /** Add node <i>n</i> to the subtree of this node 
157     * @param n node to be added
158     * @return true if the node is added; false otherwise
159     */
160    boolean add(Node n) {
161      if (!isAncestor(n))
162        throw new IllegalArgumentException(n.getName()+", which is located at "
163                +n.getNetworkLocation()+", is not a decendent of "
164                +getPath(this));
165      if (isParent(n)) {
166        // this node is the parent of n; add n directly
167        n.setParent(this);
168        n.setLevel(this.level+1);
169        for(int i=0; i<children.size(); i++) {
170          if (children.get(i).getName().equals(n.getName())) {
171            children.set(i, n);
172            return false;
173          }
174        }
175        children.add(n);
176        numOfLeaves++;
177        return true;
178      } else {
179        // find the next ancestor node
180        String parentName = getNextAncestorName(n);
181        InnerNode parentNode = null;
182        for(int i=0; i<children.size(); i++) {
183          if (children.get(i).getName().equals(parentName)) {
184            parentNode = (InnerNode)children.get(i);
185            break;
186          }
187        }
188        if (parentNode == null) {
189          // create a new InnerNode
190          parentNode = createParentNode(parentName);
191          children.add(parentNode);
192        }
193        // add n to the subtree of the next ancestor node
194        if (parentNode.add(n)) {
195          numOfLeaves++;
196          return true;
197        } else {
198          return false;
199        }
200      }
201    }
202
203    /**
204     * Creates a parent node to be added to the list of children.  
205     * Creates a node using the InnerNode four argument constructor specifying 
206     * the name, location, parent, and level of this node.
207     * 
208     * <p>To be overridden in subclasses for specific InnerNode implementations,
209     * as alternative to overriding the full {@link #add(Node)} method.
210     * 
211     * @param parentName The name of the parent node
212     * @return A new inner node
213     * @see InnerNode#InnerNode(String, String, InnerNode, int)
214     */
215    protected InnerNode createParentNode(String parentName) {
216      return new InnerNode(parentName, getPath(this), this, this.getLevel()+1);
217    }
218
219    /** Remove node <i>n</i> from the subtree of this node
220     * @param n node to be deleted 
221     * @return true if the node is deleted; false otherwise
222     */
223    boolean remove(Node n) {
224      String parent = n.getNetworkLocation();
225      String currentPath = getPath(this);
226      if (!isAncestor(n))
227        throw new IllegalArgumentException(n.getName()
228                                           +", which is located at "
229                                           +parent+", is not a descendent of "+currentPath);
230      if (isParent(n)) {
231        // this node is the parent of n; remove n directly
232        for(int i=0; i<children.size(); i++) {
233          if (children.get(i).getName().equals(n.getName())) {
234            children.remove(i);
235            numOfLeaves--;
236            n.setParent(null);
237            return true;
238          }
239        }
240        return false;
241      } else {
242        // find the next ancestor node: the parent node
243        String parentName = getNextAncestorName(n);
244        InnerNode parentNode = null;
245        int i;
246        for(i=0; i<children.size(); i++) {
247          if (children.get(i).getName().equals(parentName)) {
248            parentNode = (InnerNode)children.get(i);
249            break;
250          }
251        }
252        if (parentNode==null) {
253          return false;
254        }
255        // remove n from the parent node
256        boolean isRemoved = parentNode.remove(n);
257        // if the parent node has no children, remove the parent node too
258        if (isRemoved) {
259          if (parentNode.getNumOfChildren() == 0) {
260            children.remove(i);
261          }
262          numOfLeaves--;
263        }
264        return isRemoved;
265      }
266    } // end of remove
267        
268    /** Given a node's string representation, return a reference to the node
269     * @param loc string location of the form /rack/node
270     * @return null if the node is not found or the childnode is there but
271     * not an instance of {@link InnerNode}
272     */
273    private Node getLoc(String loc) {
274      if (loc == null || loc.length() == 0) return this;
275            
276      String[] path = loc.split(PATH_SEPARATOR_STR, 2);
277      Node childnode = null;
278      for(int i=0; i<children.size(); i++) {
279        if (children.get(i).getName().equals(path[0])) {
280          childnode = children.get(i);
281        }
282      }
283      if (childnode == null) return null; // non-existing node
284      if (path.length == 1) return childnode;
285      if (childnode instanceof InnerNode) {
286        return ((InnerNode)childnode).getLoc(path[1]);
287      } else {
288        return null;
289      }
290    }
291        
292    /** get <i>leafIndex</i> leaf of this subtree 
293     * if it is not in the <i>excludedNode</i>
294     *
295     * @param leafIndex an indexed leaf of the node
296     * @param excludedNode an excluded node (can be null)
297     * @return
298     */
299    Node getLeaf(int leafIndex, Node excludedNode) {
300      int count=0;
301      // check if the excluded node a leaf
302      boolean isLeaf =
303        excludedNode == null || !(excludedNode instanceof InnerNode);
304      // calculate the total number of excluded leaf nodes
305      int numOfExcludedLeaves =
306        isLeaf ? 1 : ((InnerNode)excludedNode).getNumOfLeaves();
307      if (isLeafParent()) { // children are leaves
308        if (isLeaf) { // excluded node is a leaf node
309          int excludedIndex = children.indexOf(excludedNode);
310          if (excludedIndex != -1 && leafIndex >= 0) {
311            // excluded node is one of the children so adjust the leaf index
312            leafIndex = leafIndex>=excludedIndex ? leafIndex+1 : leafIndex;
313          }
314        }
315        // range check
316        if (leafIndex<0 || leafIndex>=this.getNumOfChildren()) {
317          return null;
318        }
319        return children.get(leafIndex);
320      } else {
321        for(int i=0; i<children.size(); i++) {
322          InnerNode child = (InnerNode)children.get(i);
323          if (excludedNode == null || excludedNode != child) {
324            // not the excludedNode
325            int numOfLeaves = child.getNumOfLeaves();
326            if (excludedNode != null && child.isAncestor(excludedNode)) {
327              numOfLeaves -= numOfExcludedLeaves;
328            }
329            if (count+numOfLeaves > leafIndex) {
330              // the leaf is in the child subtree
331              return child.getLeaf(leafIndex-count, excludedNode);
332            } else {
333              // go to the next child
334              count = count+numOfLeaves;
335            }
336          } else { // it is the excluededNode
337            // skip it and set the excludedNode to be null
338            excludedNode = null;
339          }
340        }
341        return null;
342      }
343    }
344    
345    protected boolean isLeafParent() {
346      return isRack();
347    }
348
349    /**
350      * Determine if children a leaves, default implementation calls {@link #isRack()}
351      * <p>To be overridden in subclasses for specific InnerNode implementations,
352      * as alternative to overriding the full {@link #getLeaf(int, Node)} method.
353      * 
354      * @return true if children are leaves, false otherwise
355      */
356    protected boolean areChildrenLeaves() {
357      return isRack();
358    }
359
360    /**
361     * Get number of leaves.
362     */
363    int getNumOfLeaves() {
364      return numOfLeaves;
365    }
366  } // end of InnerNode
367
368  /**
369   * the root cluster map
370   */
371  InnerNode clusterMap;
372  /** Depth of all leaf nodes */
373  private int depthOfAllLeaves = -1;
374  /** rack counter */
375  protected int numOfRacks = 0;
376  /** the lock used to manage access */
377  protected ReadWriteLock netlock = new ReentrantReadWriteLock();
378
379  public NetworkTopology() {
380    clusterMap = new InnerNode(InnerNode.ROOT);
381  }
382
383  /** Add a leaf node
384   * Update node counter & rack counter if necessary
385   * @param node node to be added; can be null
386   * @exception IllegalArgumentException if add a node to a leave 
387                                         or node to be added is not a leaf
388   */
389  public void add(Node node) {
390    if (node==null) return;
391    String oldTopoStr = this.toString();
392    if( node instanceof InnerNode ) {
393      throw new IllegalArgumentException(
394        "Not allow to add an inner node: "+NodeBase.getPath(node));
395    }
396    int newDepth = NodeBase.locationToDepth(node.getNetworkLocation()) + 1;
397    netlock.writeLock().lock();
398    try {
399      if ((depthOfAllLeaves != -1) && (depthOfAllLeaves != newDepth)) {
400        LOG.error("Error: can't add leaf node " + NodeBase.getPath(node) +
401            " at depth " + newDepth + " to topology:\n" + oldTopoStr);
402        throw new InvalidTopologyException("Failed to add " + NodeBase.getPath(node) +
403            ": You cannot have a rack and a non-rack node at the same " +
404            "level of the network topology.");
405      }
406      Node rack = getNodeForNetworkLocation(node);
407      if (rack != null && !(rack instanceof InnerNode)) {
408        throw new IllegalArgumentException("Unexpected data node " 
409                                           + node.toString() 
410                                           + " at an illegal network location");
411      }
412      if (clusterMap.add(node)) {
413        LOG.info("Adding a new node: "+NodeBase.getPath(node));
414        if (rack == null) {
415          numOfRacks++;
416        }
417        if (!(node instanceof InnerNode)) {
418          if (depthOfAllLeaves == -1) {
419            depthOfAllLeaves = node.getLevel();
420          }
421        }
422      }
423      if(LOG.isDebugEnabled()) {
424        LOG.debug("NetworkTopology became:\n" + this.toString());
425      }
426    } finally {
427      netlock.writeLock().unlock();
428    }
429  }
430  
431  /**
432   * Return a reference to the node given its string representation.
433   * Default implementation delegates to {@link #getNode(String)}.
434   * 
435   * <p>To be overridden in subclasses for specific NetworkTopology 
436   * implementations, as alternative to overriding the full {@link #add(Node)}
437   *  method.
438   * 
439   * @param node The string representation of this node's network location is
440   * used to retrieve a Node object. 
441   * @return a reference to the node; null if the node is not in the tree
442   * 
443   * @see #add(Node)
444   * @see #getNode(String)
445   */
446  protected Node getNodeForNetworkLocation(Node node) {
447    return getNode(node.getNetworkLocation());
448  }
449  
450  /**
451   * Given a string representation of a rack, return its children
452   * @param loc a path-like string representation of a rack
453   * @return a newly allocated list with all the node's children
454   */
455  public List<Node> getDatanodesInRack(String loc) {
456    netlock.readLock().lock();
457    try {
458      loc = NodeBase.normalize(loc);
459      if (!NodeBase.ROOT.equals(loc)) {
460        loc = loc.substring(1);
461      }
462      InnerNode rack = (InnerNode) clusterMap.getLoc(loc);
463      if (rack == null) {
464        return null;
465      }
466      return new ArrayList<Node>(rack.getChildren());
467    } finally {
468      netlock.readLock().unlock();
469    }
470  }
471
472  /** Remove a node
473   * Update node counter and rack counter if necessary
474   * @param node node to be removed; can be null
475   */ 
476  public void remove(Node node) {
477    if (node==null) return;
478    if( node instanceof InnerNode ) {
479      throw new IllegalArgumentException(
480        "Not allow to remove an inner node: "+NodeBase.getPath(node));
481    }
482    LOG.info("Removing a node: "+NodeBase.getPath(node));
483    netlock.writeLock().lock();
484    try {
485      if (clusterMap.remove(node)) {
486        InnerNode rack = (InnerNode)getNode(node.getNetworkLocation());
487        if (rack == null) {
488          numOfRacks--;
489        }
490      }
491      if(LOG.isDebugEnabled()) {
492        LOG.debug("NetworkTopology became:\n" + this.toString());
493      }
494    } finally {
495      netlock.writeLock().unlock();
496    }
497  }
498
499  /** Check if the tree contains node <i>node</i>
500   * 
501   * @param node a node
502   * @return true if <i>node</i> is already in the tree; false otherwise
503   */
504  public boolean contains(Node node) {
505    if (node == null) return false;
506    netlock.readLock().lock();
507    try {
508      Node parent = node.getParent();
509      for (int level = node.getLevel(); parent != null && level > 0;
510           parent = parent.getParent(), level--) {
511        if (parent == clusterMap) {
512          return true;
513        }
514      }
515    } finally {
516      netlock.readLock().unlock();
517    }
518    return false; 
519  }
520    
521  /** Given a string representation of a node, return its reference
522   * 
523   * @param loc
524   *          a path-like string representation of a node
525   * @return a reference to the node; null if the node is not in the tree
526   */
527  public Node getNode(String loc) {
528    netlock.readLock().lock();
529    try {
530      loc = NodeBase.normalize(loc);
531      if (!NodeBase.ROOT.equals(loc))
532        loc = loc.substring(1);
533      return clusterMap.getLoc(loc);
534    } finally {
535      netlock.readLock().unlock();
536    }
537  }
538  
539  /** Given a string representation of a rack for a specific network
540   *  location
541   * 
542   * To be overridden in subclasses for specific NetworkTopology 
543   * implementations, as alternative to overriding the full 
544   * {@link #getRack(String)} method.
545   * @param loc
546   *          a path-like string representation of a network location
547   * @return a rack string
548   */
549  public String getRack(String loc) {
550    return loc;
551  }
552  
553  /** @return the total number of racks */
554  public int getNumOfRacks() {
555    netlock.readLock().lock();
556    try {
557      return numOfRacks;
558    } finally {
559      netlock.readLock().unlock();
560    }
561  }
562
563  /** @return the total number of leaf nodes */
564  public int getNumOfLeaves() {
565    netlock.readLock().lock();
566    try {
567      return clusterMap.getNumOfLeaves();
568    } finally {
569      netlock.readLock().unlock();
570    }
571  }
572
573  /** Return the distance between two nodes
574   * It is assumed that the distance from one node to its parent is 1
575   * The distance between two nodes is calculated by summing up their distances
576   * to their closest common ancestor.
577   * @param node1 one node
578   * @param node2 another node
579   * @return the distance between node1 and node2 which is zero if they are the same
580   *  or {@link Integer#MAX_VALUE} if node1 or node2 do not belong to the cluster
581   */
582  public int getDistance(Node node1, Node node2) {
583    if (node1 == node2) {
584      return 0;
585    }
586    Node n1=node1, n2=node2;
587    int dis = 0;
588    netlock.readLock().lock();
589    try {
590      int level1=node1.getLevel(), level2=node2.getLevel();
591      while(n1!=null && level1>level2) {
592        n1 = n1.getParent();
593        level1--;
594        dis++;
595      }
596      while(n2!=null && level2>level1) {
597        n2 = n2.getParent();
598        level2--;
599        dis++;
600      }
601      while(n1!=null && n2!=null && n1.getParent()!=n2.getParent()) {
602        n1=n1.getParent();
603        n2=n2.getParent();
604        dis+=2;
605      }
606    } finally {
607      netlock.readLock().unlock();
608    }
609    if (n1==null) {
610      LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node1));
611      return Integer.MAX_VALUE;
612    }
613    if (n2==null) {
614      LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node2));
615      return Integer.MAX_VALUE;
616    }
617    return dis+2;
618  }
619
620  /** Check if two nodes are on the same rack
621   * @param node1 one node (can be null)
622   * @param node2 another node (can be null)
623   * @return true if node1 and node2 are on the same rack; false otherwise
624   * @exception IllegalArgumentException when either node1 or node2 is null, or
625   * node1 or node2 do not belong to the cluster
626   */
627  public boolean isOnSameRack( Node node1,  Node node2) {
628    if (node1 == null || node2 == null) {
629      return false;
630    }
631      
632    netlock.readLock().lock();
633    try {
634      return isSameParents(node1, node2);
635    } finally {
636      netlock.readLock().unlock();
637    }
638  }
639  
640  /**
641   * Check if network topology is aware of NodeGroup
642   */
643  public boolean isNodeGroupAware() {
644    return false;
645  }
646  
647  /** 
648   * Return false directly as not aware of NodeGroup, to be override in sub-class
649   */
650  public boolean isOnSameNodeGroup(Node node1, Node node2) {
651    return false;
652  }
653
654  /**
655   * Compare the parents of each node for equality
656   * 
657   * <p>To be overridden in subclasses for specific NetworkTopology 
658   * implementations, as alternative to overriding the full 
659   * {@link #isOnSameRack(Node, Node)} method.
660   * 
661   * @param node1 the first node to compare
662   * @param node2 the second node to compare
663   * @return true if their parents are equal, false otherwise
664   * 
665   * @see #isOnSameRack(Node, Node)
666   */
667  protected boolean isSameParents(Node node1, Node node2) {
668    return node1.getParent()==node2.getParent();
669  }
670
671  final protected static Random r = new Random();
672  /** randomly choose one node from <i>scope</i>
673   * if scope starts with ~, choose one from the all nodes except for the
674   * ones in <i>scope</i>; otherwise, choose one from <i>scope</i>
675   * @param scope range of nodes from which a node will be chosen
676   * @return the chosen node
677   */
678  public Node chooseRandom(String scope) {
679    netlock.readLock().lock();
680    try {
681      if (scope.startsWith("~")) {
682        return chooseRandom(NodeBase.ROOT, scope.substring(1));
683      } else {
684        return chooseRandom(scope, null);
685      }
686    } finally {
687      netlock.readLock().unlock();
688    }
689  }
690
691  private Node chooseRandom(String scope, String excludedScope){
692    if (excludedScope != null) {
693      if (scope.startsWith(excludedScope)) {
694        return null;
695      }
696      if (!excludedScope.startsWith(scope)) {
697        excludedScope = null;
698      }
699    }
700    Node node = getNode(scope);
701    if (!(node instanceof InnerNode)) {
702      return node;
703    }
704    InnerNode innerNode = (InnerNode)node;
705    int numOfDatanodes = innerNode.getNumOfLeaves();
706    if (excludedScope == null) {
707      node = null;
708    } else {
709      node = getNode(excludedScope);
710      if (!(node instanceof InnerNode)) {
711        numOfDatanodes -= 1;
712      } else {
713        numOfDatanodes -= ((InnerNode)node).getNumOfLeaves();
714      }
715    }
716    if (numOfDatanodes == 0) {
717      throw new InvalidTopologyException(
718          "Failed to find datanode (scope=\"" + String.valueOf(scope) +
719          "\" excludedScope=\"" + String.valueOf(excludedScope) + "\").");
720    }
721    int leaveIndex = r.nextInt(numOfDatanodes);
722    return innerNode.getLeaf(leaveIndex, node);
723  }
724
725  /** return leaves in <i>scope</i>
726   * @param scope a path string
727   * @return leaves nodes under specific scope
728   */
729  public List<Node> getLeaves(String scope) {
730    Node node = getNode(scope);
731    List<Node> leafNodes = new ArrayList<Node>();
732    if (!(node instanceof InnerNode)) {
733      leafNodes.add(node);
734    } else {
735      InnerNode innerNode = (InnerNode) node;
736      for (int i=0;i<innerNode.getNumOfLeaves();i++) {
737        leafNodes.add(innerNode.getLeaf(i, null));
738      }
739    }
740    return leafNodes;
741  }
742
743  /** return the number of leaves in <i>scope</i> but not in <i>excludedNodes</i>
744   * if scope starts with ~, return the number of nodes that are not
745   * in <i>scope</i> and <i>excludedNodes</i>; 
746   * @param scope a path string that may start with ~
747   * @param excludedNodes a list of nodes
748   * @return number of available nodes
749   */
750  public int countNumOfAvailableNodes(String scope,
751                                      Collection<Node> excludedNodes) {
752    boolean isExcluded=false;
753    if (scope.startsWith("~")) {
754      isExcluded=true;
755      scope=scope.substring(1);
756    }
757    scope = NodeBase.normalize(scope);
758    int count=0; // the number of nodes in both scope & excludedNodes
759    netlock.readLock().lock();
760    try {
761      for(Node node:excludedNodes) {
762        if ((NodeBase.getPath(node)+NodeBase.PATH_SEPARATOR_STR).
763            startsWith(scope+NodeBase.PATH_SEPARATOR_STR)) {
764          count++;
765        }
766      }
767      Node n=getNode(scope);
768      int scopeNodeCount=1;
769      if (n instanceof InnerNode) {
770        scopeNodeCount=((InnerNode)n).getNumOfLeaves();
771      }
772      if (isExcluded) {
773        return clusterMap.getNumOfLeaves()-
774          scopeNodeCount-excludedNodes.size()+count;
775      } else {
776        return scopeNodeCount-count;
777      }
778    } finally {
779      netlock.readLock().unlock();
780    }
781  }
782
783  /** convert a network tree to a string */
784  @Override
785  public String toString() {
786    // print the number of racks
787    StringBuilder tree = new StringBuilder();
788    tree.append("Number of racks: ");
789    tree.append(numOfRacks);
790    tree.append("\n");
791    // print the number of leaves
792    int numOfLeaves = getNumOfLeaves();
793    tree.append("Expected number of leaves:");
794    tree.append(numOfLeaves);
795    tree.append("\n");
796    // print nodes
797    for(int i=0; i<numOfLeaves; i++) {
798      tree.append(NodeBase.getPath(clusterMap.getLeaf(i, null)));
799      tree.append("\n");
800    }
801    return tree.toString();
802  }
803  
804  /**
805   * Divide networklocation string into two parts by last separator, and get 
806   * the first part here.
807   * 
808   * @param networkLocation
809   * @return
810   */
811  public static String getFirstHalf(String networkLocation) {
812    int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR);
813    return networkLocation.substring(0, index);
814  }
815
816  /**
817   * Divide networklocation string into two parts by last separator, and get 
818   * the second part here.
819   * 
820   * @param networkLocation
821   * @return
822   */
823  public static String getLastHalf(String networkLocation) {
824    int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR);
825    return networkLocation.substring(index);
826  }
827
828  /** swap two array items */
829  static protected void swap(Node[] nodes, int i, int j) {
830    Node tempNode;
831    tempNode = nodes[j];
832    nodes[j] = nodes[i];
833    nodes[i] = tempNode;
834  }
835  
836  /** Sort nodes array by their distances to <i>reader</i>
837   * It linearly scans the array, if a local node is found, swap it with
838   * the first element of the array.
839   * If a local rack node is found, swap it with the first element following
840   * the local node.
841   * If neither local node or local rack node is found, put a random replica
842   * location at position 0.
843   * It leaves the rest nodes untouched.
844   * @param reader the node that wishes to read a block from one of the nodes
845   * @param nodes the list of nodes containing data for the reader
846   */
847  public void pseudoSortByDistance( Node reader, Node[] nodes ) {
848    int tempIndex = 0;
849    int localRackNode = -1;
850    if (reader != null ) {
851      //scan the array to find the local node & local rack node
852      for(int i=0; i<nodes.length; i++) {
853        if(tempIndex == 0 && reader == nodes[i]) { //local node
854          //swap the local node and the node at position 0
855          if( i != 0 ) {
856            swap(nodes, tempIndex, i);
857          }
858          tempIndex=1;
859          if(localRackNode != -1 ) {
860            if(localRackNode == 0) {
861              localRackNode = i;
862            }
863            break;
864          }
865        } else if(localRackNode == -1 && isOnSameRack(reader, nodes[i])) {
866          //local rack
867          localRackNode = i;
868          if(tempIndex != 0 ) break;
869        }
870      }
871
872      // swap the local rack node and the node at position tempIndex
873      if(localRackNode != -1 && localRackNode != tempIndex ) {
874        swap(nodes, tempIndex, localRackNode);
875        tempIndex++;
876      }
877    }
878    
879    // put a random node at position 0 if it is not a local/local-rack node
880    if(tempIndex == 0 && localRackNode == -1 && nodes.length != 0) {
881      swap(nodes, 0, r.nextInt(nodes.length));
882    }
883  }
884  
885}