001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs;
019
020import java.io.BufferedOutputStream;
021import java.io.DataInputStream;
022import java.io.DataOutputStream;
023import java.io.FileInputStream;
024import java.io.IOException;
025import java.net.InetSocketAddress;
026
027import org.apache.commons.lang.mutable.MutableBoolean;
028import org.apache.commons.logging.LogFactory;
029import org.apache.commons.logging.Log;
030import org.apache.hadoop.classification.InterfaceAudience;
031import org.apache.hadoop.conf.Configuration;
032import org.apache.hadoop.hdfs.client.ShortCircuitCache;
033import org.apache.hadoop.hdfs.client.ShortCircuitCache.ShortCircuitReplicaCreator;
034import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
035import org.apache.hadoop.hdfs.client.ShortCircuitReplicaInfo;
036import org.apache.hadoop.hdfs.ShortCircuitShm.Slot;
037import org.apache.hadoop.hdfs.ShortCircuitShm.SlotId;
038import org.apache.hadoop.hdfs.net.DomainPeer;
039import org.apache.hadoop.hdfs.net.Peer;
040import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
041import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
042import org.apache.hadoop.hdfs.protocol.datatransfer.InvalidEncryptionKeyException;
043import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
044import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
045import org.apache.hadoop.hdfs.protocolPB.PBHelper;
046import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
047import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
048import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
049import org.apache.hadoop.io.IOUtils;
050import org.apache.hadoop.ipc.RemoteException;
051import org.apache.hadoop.net.unix.DomainSocket;
052import org.apache.hadoop.security.AccessControlException;
053import org.apache.hadoop.security.UserGroupInformation;
054import org.apache.hadoop.security.token.SecretManager.InvalidToken;
055import org.apache.hadoop.security.token.Token;
056import org.apache.hadoop.util.Time;
057
058import com.google.common.annotations.VisibleForTesting;
059import com.google.common.base.Preconditions;
060
061
062/** 
063 * Utility class to create BlockReader implementations.
064 */
065@InterfaceAudience.Private
066public class BlockReaderFactory implements ShortCircuitReplicaCreator {
067  static final Log LOG = LogFactory.getLog(BlockReaderFactory.class);
068
069  @VisibleForTesting
070  static ShortCircuitReplicaCreator
071      createShortCircuitReplicaInfoCallback = null;
072
073  private final DFSClient.Conf conf;
074
075  /**
076   * The file name, for logging and debugging purposes.
077   */
078  private String fileName;
079
080  /**
081   * The block ID and block pool ID to use.
082   */
083  private ExtendedBlock block;
084
085  /**
086   * The block token to use for security purposes.
087   */
088  private Token<BlockTokenIdentifier> token;
089
090  /**
091   * The offset within the block to start reading at.
092   */
093  private long startOffset;
094
095  /**
096   * If false, we won't try to verify the block checksum.
097   */
098  private boolean verifyChecksum;
099
100  /**
101   * The name of this client.
102   */
103  private String clientName; 
104
105  /**
106   * The DataNode we're talking to.
107   */
108  private DatanodeInfo datanode;
109
110  /**
111   * If false, we won't try short-circuit local reads.
112   */
113  private boolean allowShortCircuitLocalReads;
114
115  /**
116   * The ClientContext to use for things like the PeerCache.
117   */
118  private ClientContext clientContext;
119
120  /**
121   * Number of bytes to read.  -1 indicates no limit.
122   */
123  private long length = -1;
124
125  /**
126   * Caching strategy to use when reading the block.
127   */
128  private CachingStrategy cachingStrategy;
129
130  /**
131   * Socket address to use to connect to peer.
132   */
133  private InetSocketAddress inetSocketAddress;
134
135  /**
136   * Remote peer factory to use to create a peer, if needed.
137   */
138  private RemotePeerFactory remotePeerFactory;
139
140  /**
141   * UserGroupInformation  to use for legacy block reader local objects, if needed.
142   */
143  private UserGroupInformation userGroupInformation;
144
145  /**
146   * Configuration to use for legacy block reader local objects, if needed.
147   */
148  private Configuration configuration;
149
150  /**
151   * Information about the domain socket path we should use to connect to the
152   * local peer-- or null if we haven't examined the local domain socket.
153   */
154  private DomainSocketFactory.PathInfo pathInfo;
155
156  /**
157   * The remaining number of times that we'll try to pull a socket out of the
158   * cache.
159   */
160  private int remainingCacheTries;
161
162  public BlockReaderFactory(DFSClient.Conf conf) {
163    this.conf = conf;
164    this.remainingCacheTries = conf.nCachedConnRetry;
165  }
166
167  public BlockReaderFactory setFileName(String fileName) {
168    this.fileName = fileName;
169    return this;
170  }
171
172  public BlockReaderFactory setBlock(ExtendedBlock block) {
173    this.block = block;
174    return this;
175  }
176
177  public BlockReaderFactory setBlockToken(Token<BlockTokenIdentifier> token) {
178    this.token = token;
179    return this;
180  }
181
182  public BlockReaderFactory setStartOffset(long startOffset) {
183    this.startOffset = startOffset;
184    return this;
185  }
186
187  public BlockReaderFactory setVerifyChecksum(boolean verifyChecksum) {
188    this.verifyChecksum = verifyChecksum;
189    return this;
190  }
191
192  public BlockReaderFactory setClientName(String clientName) {
193    this.clientName = clientName;
194    return this;
195  }
196
197  public BlockReaderFactory setDatanodeInfo(DatanodeInfo datanode) {
198    this.datanode = datanode;
199    return this;
200  }
201
202  public BlockReaderFactory setAllowShortCircuitLocalReads(
203      boolean allowShortCircuitLocalReads) {
204    this.allowShortCircuitLocalReads = allowShortCircuitLocalReads;
205    return this;
206  }
207
208  public BlockReaderFactory setClientCacheContext(
209      ClientContext clientContext) {
210    this.clientContext = clientContext;
211    return this;
212  }
213
214  public BlockReaderFactory setLength(long length) {
215    this.length = length;
216    return this;
217  }
218
219  public BlockReaderFactory setCachingStrategy(
220      CachingStrategy cachingStrategy) {
221    this.cachingStrategy = cachingStrategy;
222    return this;
223  }
224
225  public BlockReaderFactory setInetSocketAddress (
226      InetSocketAddress inetSocketAddress) {
227    this.inetSocketAddress = inetSocketAddress;
228    return this;
229  }
230
231  public BlockReaderFactory setUserGroupInformation(
232      UserGroupInformation userGroupInformation) {
233    this.userGroupInformation = userGroupInformation;
234    return this;
235  }
236
237  public BlockReaderFactory setRemotePeerFactory(
238      RemotePeerFactory remotePeerFactory) {
239    this.remotePeerFactory = remotePeerFactory;
240    return this;
241  }
242
243  public BlockReaderFactory setConfiguration(
244      Configuration configuration) {
245    this.configuration = configuration;
246    return this;
247  }
248
249  /**
250   * Build a BlockReader with the given options.
251   *
252   * This function will do the best it can to create a block reader that meets
253   * all of our requirements.  We prefer short-circuit block readers
254   * (BlockReaderLocal and BlockReaderLocalLegacy) over remote ones, since the
255   * former avoid the overhead of socket communication.  If short-circuit is
256   * unavailable, our next fallback is data transfer over UNIX domain sockets,
257   * if dfs.client.domain.socket.data.traffic has been enabled.  If that doesn't
258   * work, we will try to create a remote block reader that operates over TCP
259   * sockets.
260   *
261   * There are a few caches that are important here.
262   *
263   * The ShortCircuitCache stores file descriptor objects which have been passed
264   * from the DataNode. 
265   *
266   * The DomainSocketFactory stores information about UNIX domain socket paths
267   * that we not been able to use in the past, so that we don't waste time
268   * retrying them over and over.  (Like all the caches, it does have a timeout,
269   * though.)
270   *
271   * The PeerCache stores peers that we have used in the past.  If we can reuse
272   * one of these peers, we avoid the overhead of re-opening a socket.  However,
273   * if the socket has been timed out on the remote end, our attempt to reuse
274   * the socket may end with an IOException.  For that reason, we limit our
275   * attempts at socket reuse to dfs.client.cached.conn.retry times.  After
276   * that, we create new sockets.  This avoids the problem where a thread tries
277   * to talk to a peer that it hasn't talked to in a while, and has to clean out
278   * every entry in a socket cache full of stale entries.
279   *
280   * @return The new BlockReader.  We will not return null.
281   *
282   * @throws InvalidToken
283   *             If the block token was invalid.
284   *         InvalidEncryptionKeyException
285   *             If the encryption key was invalid.
286   *         Other IOException
287   *             If there was another problem.
288   */
289  public BlockReader build() throws IOException {
290    BlockReader reader = null;
291
292    Preconditions.checkNotNull(configuration);
293    if (conf.shortCircuitLocalReads && allowShortCircuitLocalReads) {
294      if (clientContext.getUseLegacyBlockReaderLocal()) {
295        reader = getLegacyBlockReaderLocal();
296        if (reader != null) {
297          if (LOG.isTraceEnabled()) {
298            LOG.trace(this + ": returning new legacy block reader local.");
299          }
300          return reader;
301        }
302      } else {
303        reader = getBlockReaderLocal();
304        if (reader != null) {
305          if (LOG.isTraceEnabled()) {
306            LOG.trace(this + ": returning new block reader local.");
307          }
308          return reader;
309        }
310      }
311    }
312    if (conf.domainSocketDataTraffic) {
313      reader = getRemoteBlockReaderFromDomain();
314      if (reader != null) {
315        if (LOG.isTraceEnabled()) {
316          LOG.trace(this + ": returning new remote block reader using " +
317              "UNIX domain socket on " + pathInfo.getPath());
318        }
319        return reader;
320      }
321    }
322    Preconditions.checkState(!DFSInputStream.tcpReadsDisabledForTesting,
323        "TCP reads were disabled for testing, but we failed to " +
324        "do a non-TCP read.");
325    return getRemoteBlockReaderFromTcp();
326  }
327
328  /**
329   * Get {@link BlockReaderLocalLegacy} for short circuited local reads.
330   * This block reader implements the path-based style of local reads
331   * first introduced in HDFS-2246.
332   */
333  private BlockReader getLegacyBlockReaderLocal() throws IOException {
334    if (LOG.isTraceEnabled()) {
335      LOG.trace(this + ": trying to construct BlockReaderLocalLegacy");
336    }
337    if (!DFSClient.isLocalAddress(inetSocketAddress)) {
338      if (LOG.isTraceEnabled()) {
339        LOG.trace(this + ": can't construct BlockReaderLocalLegacy because " +
340            "the address " + inetSocketAddress + " is not local");
341      }
342      return null;
343    }
344    if (clientContext.getDisableLegacyBlockReaderLocal()) {
345      if (LOG.isTraceEnabled()) {
346        LOG.trace(this + ": can't construct BlockReaderLocalLegacy because " +
347            "disableLegacyBlockReaderLocal is set.");
348      }
349      return null;
350    }
351    IOException ioe = null;
352    try {
353      return BlockReaderLocalLegacy.newBlockReader(conf,
354          userGroupInformation, configuration, fileName, block, token,
355          datanode, startOffset, length);
356    } catch (RemoteException remoteException) {
357      ioe = remoteException.unwrapRemoteException(
358                InvalidToken.class, AccessControlException.class);
359    } catch (IOException e) {
360      ioe = e;
361    }
362    if ((!(ioe instanceof AccessControlException)) &&
363        isSecurityException(ioe)) {
364      // Handle security exceptions.
365      // We do not handle AccessControlException here, since
366      // BlockReaderLocalLegacy#newBlockReader uses that exception to indicate
367      // that the user is not in dfs.block.local-path-access.user, a condition
368      // which requires us to disable legacy SCR.
369      throw ioe;
370    }
371    LOG.warn(this + ": error creating legacy BlockReaderLocal.  " +
372        "Disabling legacy local reads.", ioe);
373    clientContext.setDisableLegacyBlockReaderLocal();
374    return null;
375  }
376
377  private BlockReader getBlockReaderLocal() throws InvalidToken {
378    if (LOG.isTraceEnabled()) {
379      LOG.trace(this + ": trying to construct a BlockReaderLocal " +
380          "for short-circuit reads.");
381    }
382    if (pathInfo == null) {
383      pathInfo = clientContext.getDomainSocketFactory().
384                      getPathInfo(inetSocketAddress, conf);
385    }
386    if (!pathInfo.getPathState().getUsableForShortCircuit()) {
387      if (LOG.isTraceEnabled()) {
388        LOG.trace(this + ": " + pathInfo + " is not " +
389            "usable for short circuit; giving up on BlockReaderLocal.");
390      }
391      return null;
392    }
393    ShortCircuitCache cache = clientContext.getShortCircuitCache();
394    ExtendedBlockId key = new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId());
395    ShortCircuitReplicaInfo info = cache.fetchOrCreate(key, this);
396    InvalidToken exc = info.getInvalidTokenException();
397    if (exc != null) {
398      if (LOG.isTraceEnabled()) {
399        LOG.trace(this + ": got InvalidToken exception while trying to " +
400            "construct BlockReaderLocal via " + pathInfo.getPath());
401      }
402      throw exc;
403    }
404    if (info.getReplica() == null) {
405      if (LOG.isTraceEnabled()) {
406        LOG.trace(this + ": failed to get ShortCircuitReplica.  " +
407            "Cannot construct BlockReaderLocal via " + pathInfo.getPath());
408      }
409      return null;
410    }
411    return new BlockReaderLocal.Builder(conf).
412        setFilename(fileName).
413        setBlock(block).
414        setStartOffset(startOffset).
415        setShortCircuitReplica(info.getReplica()).
416        setVerifyChecksum(verifyChecksum).
417        setCachingStrategy(cachingStrategy).
418        build();
419  }
420
421  /**
422   * Fetch a pair of short-circuit block descriptors from a local DataNode.
423   *
424   * @return    Null if we could not communicate with the datanode,
425   *            a new ShortCircuitReplicaInfo object otherwise.
426   *            ShortCircuitReplicaInfo objects may contain either an InvalidToken
427   *            exception, or a ShortCircuitReplica object ready to use.
428   */
429  @Override
430  public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
431    if (createShortCircuitReplicaInfoCallback != null) {
432      ShortCircuitReplicaInfo info =
433        createShortCircuitReplicaInfoCallback.createShortCircuitReplicaInfo();
434      if (info != null) return info;
435    }
436    if (LOG.isTraceEnabled()) {
437      LOG.trace(this + ": trying to create ShortCircuitReplicaInfo.");
438    }
439    BlockReaderPeer curPeer;
440    while (true) {
441      curPeer = nextDomainPeer();
442      if (curPeer == null) break;
443      if (curPeer.fromCache) remainingCacheTries--;
444      DomainPeer peer = (DomainPeer)curPeer.peer;
445      Slot slot = null;
446      ShortCircuitCache cache = clientContext.getShortCircuitCache();
447      try {
448        MutableBoolean usedPeer = new MutableBoolean(false);
449        slot = cache.allocShmSlot(datanode, peer, usedPeer,
450            new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId()),
451            clientName);
452        if (usedPeer.booleanValue()) {
453          if (LOG.isTraceEnabled()) {
454            LOG.trace(this + ": allocShmSlot used up our previous socket " +
455              peer.getDomainSocket() + ".  Allocating a new one...");
456          }
457          curPeer = nextDomainPeer();
458          if (curPeer == null) break;
459          peer = (DomainPeer)curPeer.peer;
460        }
461        ShortCircuitReplicaInfo info = requestFileDescriptors(peer, slot);
462        clientContext.getPeerCache().put(datanode, peer);
463        return info;
464      } catch (IOException e) {
465        if (slot != null) {
466          cache.freeSlot(slot);
467        }
468        if (curPeer.fromCache) {
469          // Handle an I/O error we got when using a cached socket.
470          // These are considered less serious, because the socket may be stale.
471          if (LOG.isDebugEnabled()) {
472            LOG.debug(this + ": closing stale domain peer " + peer, e);
473          }
474          IOUtils.cleanup(LOG, peer);
475        } else {
476          // Handle an I/O error we got when using a newly created socket.
477          // We temporarily disable the domain socket path for a few minutes in
478          // this case, to prevent wasting more time on it.
479          LOG.warn(this + ": I/O error requesting file descriptors.  " + 
480              "Disabling domain socket " + peer.getDomainSocket(), e);
481          IOUtils.cleanup(LOG, peer);
482          clientContext.getDomainSocketFactory()
483              .disableDomainSocketPath(pathInfo.getPath());
484          return null;
485        }
486      }
487    }
488    return null;
489  }
490
491  /**
492   * Request file descriptors from a DomainPeer.
493   *
494   * @param peer   The peer to use for communication.
495   * @param slot   If non-null, the shared memory slot to associate with the 
496   *               new ShortCircuitReplica.
497   * 
498   * @return  A ShortCircuitReplica object if we could communicate with the
499   *          datanode; null, otherwise. 
500   * @throws  IOException If we encountered an I/O exception while communicating
501   *          with the datanode.
502   */
503  private ShortCircuitReplicaInfo requestFileDescriptors(DomainPeer peer,
504          Slot slot) throws IOException {
505    ShortCircuitCache cache = clientContext.getShortCircuitCache();
506    final DataOutputStream out =
507        new DataOutputStream(new BufferedOutputStream(peer.getOutputStream()));
508    SlotId slotId = slot == null ? null : slot.getSlotId();
509    new Sender(out).requestShortCircuitFds(block, token, slotId, 1);
510    DataInputStream in = new DataInputStream(peer.getInputStream());
511    BlockOpResponseProto resp = BlockOpResponseProto.parseFrom(
512        PBHelper.vintPrefixed(in));
513    DomainSocket sock = peer.getDomainSocket();
514    switch (resp.getStatus()) {
515    case SUCCESS:
516      byte buf[] = new byte[1];
517      FileInputStream fis[] = new FileInputStream[2];
518      sock.recvFileInputStreams(fis, buf, 0, buf.length);
519      ShortCircuitReplica replica = null;
520      try {
521        ExtendedBlockId key =
522            new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId());
523        replica = new ShortCircuitReplica(key, fis[0], fis[1], cache,
524            Time.monotonicNow(), slot);
525      } catch (IOException e) {
526        // This indicates an error reading from disk, or a format error.  Since
527        // it's not a socket communication problem, we return null rather than
528        // throwing an exception.
529        LOG.warn(this + ": error creating ShortCircuitReplica.", e);
530        return null;
531      } finally {
532        if (replica == null) {
533          IOUtils.cleanup(DFSClient.LOG, fis[0], fis[1]);
534        }
535      }
536      return new ShortCircuitReplicaInfo(replica);
537    case ERROR_UNSUPPORTED:
538      if (!resp.hasShortCircuitAccessVersion()) {
539        LOG.warn("short-circuit read access is disabled for " +
540            "DataNode " + datanode + ".  reason: " + resp.getMessage());
541        clientContext.getDomainSocketFactory()
542            .disableShortCircuitForPath(pathInfo.getPath());
543      } else {
544        LOG.warn("short-circuit read access for the file " +
545            fileName + " is disabled for DataNode " + datanode +
546            ".  reason: " + resp.getMessage());
547      }
548      return null;
549    case ERROR_ACCESS_TOKEN:
550      String msg = "access control error while " +
551          "attempting to set up short-circuit access to " +
552          fileName + resp.getMessage();
553      if (LOG.isDebugEnabled()) {
554        LOG.debug(this + ":" + msg);
555      }
556      return new ShortCircuitReplicaInfo(new InvalidToken(msg));
557    default:
558      LOG.warn(this + ": unknown response code " + resp.getStatus() +
559          " while attempting to set up short-circuit access. " +
560          resp.getMessage());
561      clientContext.getDomainSocketFactory()
562          .disableShortCircuitForPath(pathInfo.getPath());
563      return null;
564    }
565  }
566
567  /**
568   * Get a RemoteBlockReader that communicates over a UNIX domain socket.
569   *
570   * @return The new BlockReader, or null if we failed to create the block
571   * reader.
572   *
573   * @throws InvalidToken    If the block token was invalid.
574   * Potentially other security-related execptions.
575   */
576  private BlockReader getRemoteBlockReaderFromDomain() throws IOException {
577    if (pathInfo == null) {
578      pathInfo = clientContext.getDomainSocketFactory().
579                      getPathInfo(inetSocketAddress, conf);
580    }
581    if (!pathInfo.getPathState().getUsableForDataTransfer()) {
582      if (LOG.isTraceEnabled()) {
583        LOG.trace(this + ": not trying to create a remote block reader " +
584            "because the UNIX domain socket at " + pathInfo +
585            " is not usable.");
586      }
587      return null;
588    }
589    if (LOG.isTraceEnabled()) {
590      LOG.trace(this + ": trying to create a remote block reader from the " +
591          "UNIX domain socket at " + pathInfo.getPath());
592    }
593
594    while (true) {
595      BlockReaderPeer curPeer = nextDomainPeer();
596      if (curPeer == null) break;
597      if (curPeer.fromCache) remainingCacheTries--;
598      DomainPeer peer = (DomainPeer)curPeer.peer;
599      BlockReader blockReader = null;
600      try {
601        blockReader = getRemoteBlockReader(peer);
602        return blockReader;
603      } catch (IOException ioe) {
604        IOUtils.cleanup(LOG, peer);
605        if (isSecurityException(ioe)) {
606          if (LOG.isTraceEnabled()) {
607            LOG.trace(this + ": got security exception while constructing " +
608                "a remote block reader from the unix domain socket at " +
609                pathInfo.getPath(), ioe);
610          }
611          throw ioe;
612        }
613        if (curPeer.fromCache) {
614          // Handle an I/O error we got when using a cached peer.  These are
615          // considered less serious, because the underlying socket may be stale.
616          if (LOG.isDebugEnabled()) {
617            LOG.debug("Closed potentially stale domain peer " + peer, ioe);
618          }
619        } else {
620          // Handle an I/O error we got when using a newly created domain peer.
621          // We temporarily disable the domain socket path for a few minutes in
622          // this case, to prevent wasting more time on it.
623          LOG.warn("I/O error constructing remote block reader.  Disabling " +
624              "domain socket " + peer.getDomainSocket(), ioe);
625          clientContext.getDomainSocketFactory()
626              .disableDomainSocketPath(pathInfo.getPath());
627          return null;
628        }
629      } finally {
630        if (blockReader == null) {
631          IOUtils.cleanup(LOG, peer);
632        }
633      }
634    }
635    return null;
636  }
637
638  /**
639   * Get a RemoteBlockReader that communicates over a TCP socket.
640   *
641   * @return The new BlockReader.  We will not return null, but instead throw
642   *         an exception if this fails.
643   *
644   * @throws InvalidToken
645   *             If the block token was invalid.
646   *         InvalidEncryptionKeyException
647   *             If the encryption key was invalid.
648   *         Other IOException
649   *             If there was another problem.
650   */
651  private BlockReader getRemoteBlockReaderFromTcp() throws IOException {
652    if (LOG.isTraceEnabled()) {
653      LOG.trace(this + ": trying to create a remote block reader from a " +
654          "TCP socket");
655    }
656    BlockReader blockReader = null;
657    while (true) {
658      BlockReaderPeer curPeer = null;
659      Peer peer = null;
660      try {
661        curPeer = nextTcpPeer();
662        if (curPeer == null) break;
663        if (curPeer.fromCache) remainingCacheTries--;
664        peer = curPeer.peer;
665        blockReader = getRemoteBlockReader(peer);
666        return blockReader;
667      } catch (IOException ioe) {
668        if (isSecurityException(ioe)) {
669          if (LOG.isTraceEnabled()) {
670            LOG.trace(this + ": got security exception while constructing " +
671                "a remote block reader from " + peer, ioe);
672          }
673          throw ioe;
674        }
675        if ((curPeer != null) && curPeer.fromCache) {
676          // Handle an I/O error we got when using a cached peer.  These are
677          // considered less serious, because the underlying socket may be
678          // stale.
679          if (LOG.isDebugEnabled()) {
680            LOG.debug("Closed potentially stale remote peer " + peer, ioe);
681          }
682        } else {
683          // Handle an I/O error we got when using a newly created peer.
684          LOG.warn("I/O error constructing remote block reader.", ioe);
685          throw ioe;
686        }
687      } finally {
688        if (blockReader == null) {
689          IOUtils.cleanup(LOG, peer);
690        }
691      }
692    }
693    return null;
694  }
695
696  public static class BlockReaderPeer {
697    final Peer peer;
698    final boolean fromCache;
699    
700    BlockReaderPeer(Peer peer, boolean fromCache) {
701      this.peer = peer;
702      this.fromCache = fromCache;
703    }
704  }
705
706  /**
707   * Get the next DomainPeer-- either from the cache or by creating it.
708   *
709   * @return the next DomainPeer, or null if we could not construct one.
710   */
711  private BlockReaderPeer nextDomainPeer() {
712    if (remainingCacheTries > 0) {
713      Peer peer = clientContext.getPeerCache().get(datanode, true);
714      if (peer != null) {
715        if (LOG.isTraceEnabled()) {
716          LOG.trace("nextDomainPeer: reusing existing peer " + peer);
717        }
718        return new BlockReaderPeer(peer, true);
719      }
720    }
721    DomainSocket sock = clientContext.getDomainSocketFactory().
722        createSocket(pathInfo, conf.socketTimeout);
723    if (sock == null) return null;
724    return new BlockReaderPeer(new DomainPeer(sock), false);
725  }
726
727  /**
728   * Get the next TCP-based peer-- either from the cache or by creating it.
729   *
730   * @return the next Peer, or null if we could not construct one.
731   *
732   * @throws IOException  If there was an error while constructing the peer
733   *                      (such as an InvalidEncryptionKeyException)
734   */
735  private BlockReaderPeer nextTcpPeer() throws IOException {
736    if (remainingCacheTries > 0) {
737      Peer peer = clientContext.getPeerCache().get(datanode, false);
738      if (peer != null) {
739        if (LOG.isTraceEnabled()) {
740          LOG.trace("nextTcpPeer: reusing existing peer " + peer);
741        }
742        return new BlockReaderPeer(peer, true);
743      }
744    }
745    try {
746      Peer peer = remotePeerFactory.newConnectedPeer(inetSocketAddress);
747      if (LOG.isTraceEnabled()) {
748        LOG.trace("nextTcpPeer: created newConnectedPeer " + peer);
749      }
750      return new BlockReaderPeer(peer, false);
751    } catch (IOException e) {
752      if (LOG.isTraceEnabled()) {
753        LOG.trace("nextTcpPeer: failed to create newConnectedPeer " +
754                  "connected to " + datanode);
755      }
756      throw e;
757    }
758  }
759
760  /**
761   * Determine if an exception is security-related.
762   *
763   * We need to handle these exceptions differently than other IOExceptions.
764   * They don't indicate a communication problem.  Instead, they mean that there
765   * is some action the client needs to take, such as refetching block tokens,
766   * renewing encryption keys, etc.
767   *
768   * @param ioe    The exception
769   * @return       True only if the exception is security-related.
770   */
771  private static boolean isSecurityException(IOException ioe) {
772    return (ioe instanceof InvalidToken) ||
773            (ioe instanceof InvalidEncryptionKeyException) ||
774            (ioe instanceof InvalidBlockTokenException) ||
775            (ioe instanceof AccessControlException);
776  }
777
778  @SuppressWarnings("deprecation")
779  private BlockReader getRemoteBlockReader(Peer peer) throws IOException {
780    if (conf.useLegacyBlockReader) {
781      return RemoteBlockReader.newBlockReader(fileName,
782          block, token, startOffset, length, conf.ioBufferSize,
783          verifyChecksum, clientName, peer, datanode,
784          clientContext.getPeerCache(), cachingStrategy);
785    } else {
786      return RemoteBlockReader2.newBlockReader(
787          fileName, block, token, startOffset, length,
788          verifyChecksum, clientName, peer, datanode,
789          clientContext.getPeerCache(), cachingStrategy);
790    }
791  }
792
793  @Override
794  public String toString() {
795    return "BlockReaderFactory(fileName=" + fileName + ", block=" + block + ")";
796  }
797
798  /**
799   * File name to print when accessing a block directly (from servlets)
800   * @param s Address of the block location
801   * @param poolId Block pool ID of the block
802   * @param blockId Block ID of the block
803   * @return string that has a file name for debug purposes
804   */
805  public static String getFileName(final InetSocketAddress s,
806      final String poolId, final long blockId) {
807    return s.toString() + ":" + poolId + ":" + blockId;
808  }
809}