001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs;
019
020import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT;
021import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY;
022import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT;
023import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY;
024import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_BLOCK_WRITE_LOCATEFOLLOWINGBLOCK_RETRIES_DEFAULT;
025import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_BLOCK_WRITE_LOCATEFOLLOWINGBLOCK_RETRIES_KEY;
026import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_BLOCK_WRITE_RETRIES_DEFAULT;
027import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_BLOCK_WRITE_RETRIES_KEY;
028import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CACHED_CONN_RETRY_DEFAULT;
029import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CACHED_CONN_RETRY_KEY;
030import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CACHE_DROP_BEHIND_READS;
031import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CACHE_DROP_BEHIND_WRITES;
032import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CACHE_READAHEAD;
033import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_DATANODE_RESTART_TIMEOUT_KEY;
034import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_DATANODE_RESTART_TIMEOUT_DEFAULT;
035import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT;
036import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY;
037import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT;
038import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY;
039import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_DEFAULT;
040import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY;
041import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT;
042import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_RETRY_MAX_ATTEMPTS_KEY;
043import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_DEFAULT;
044import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_KEY;
045import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_READ_PREFETCH_SIZE_KEY;
046import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_RETRY_WINDOW_BASE;
047import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT;
048import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY;
049import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT;
050import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY;
051import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY;
052import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_USE_DN_HOSTNAME;
053import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_USE_DN_HOSTNAME_DEFAULT;
054import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL;
055import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL_DEFAULT;
056import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT;
057import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY;
058import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY;
059import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
060import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
061import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CONTEXT;
062import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CONTEXT_DEFAULT;
063
064import java.io.BufferedOutputStream;
065import java.io.DataInputStream;
066import java.io.DataOutputStream;
067import java.io.FileNotFoundException;
068import java.io.IOException;
069import java.io.InputStream;
070import java.io.OutputStream;
071import java.net.InetAddress;
072import java.net.InetSocketAddress;
073import java.net.NetworkInterface;
074import java.net.Socket;
075import java.net.SocketException;
076import java.net.SocketAddress;
077import java.net.URI;
078import java.net.UnknownHostException;
079import java.util.ArrayList;
080import java.util.Collections;
081import java.util.EnumSet;
082import java.util.HashMap;
083import java.util.LinkedHashMap;
084import java.util.List;
085import java.util.Map;
086import java.util.Random;
087import java.util.concurrent.SynchronousQueue;
088import java.util.concurrent.ThreadPoolExecutor;
089import java.util.concurrent.TimeUnit;
090import java.util.concurrent.atomic.AtomicInteger;
091
092import javax.net.SocketFactory;
093
094import org.apache.commons.logging.Log;
095import org.apache.commons.logging.LogFactory;
096import org.apache.hadoop.classification.InterfaceAudience;
097import org.apache.hadoop.conf.Configuration;
098import org.apache.hadoop.fs.BlockLocation;
099import org.apache.hadoop.fs.BlockStorageLocation;
100import org.apache.hadoop.fs.CacheFlag;
101import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
102import org.apache.hadoop.fs.ContentSummary;
103import org.apache.hadoop.fs.CreateFlag;
104import org.apache.hadoop.fs.FileAlreadyExistsException;
105import org.apache.hadoop.fs.FileSystem;
106import org.apache.hadoop.fs.FsServerDefaults;
107import org.apache.hadoop.fs.FsStatus;
108import org.apache.hadoop.fs.HdfsBlockLocation;
109import org.apache.hadoop.fs.InvalidPathException;
110import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
111import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
112import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
113import org.apache.hadoop.fs.Options;
114import org.apache.hadoop.fs.RemoteIterator;
115import org.apache.hadoop.fs.Options.ChecksumOpt;
116import org.apache.hadoop.fs.ParentNotDirectoryException;
117import org.apache.hadoop.fs.Path;
118import org.apache.hadoop.fs.UnresolvedLinkException;
119import org.apache.hadoop.fs.VolumeId;
120import org.apache.hadoop.fs.permission.AclEntry;
121import org.apache.hadoop.fs.permission.AclStatus;
122import org.apache.hadoop.fs.permission.FsPermission;
123import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
124import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
125import org.apache.hadoop.hdfs.protocol.AclException;
126import org.apache.hadoop.hdfs.net.Peer;
127import org.apache.hadoop.hdfs.net.TcpPeerServer;
128import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
129import org.apache.hadoop.hdfs.protocol.CacheDirectiveIterator;
130import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
131import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
132import org.apache.hadoop.hdfs.protocol.CachePoolIterator;
133import org.apache.hadoop.hdfs.protocol.ClientProtocol;
134import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks;
135import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
136import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
137import org.apache.hadoop.hdfs.protocol.DirectoryListing;
138import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
139import org.apache.hadoop.hdfs.protocol.HdfsBlocksMetadata;
140import org.apache.hadoop.hdfs.protocol.HdfsConstants;
141import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
142import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
143import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction;
144import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
145import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
146import org.apache.hadoop.hdfs.protocol.LocatedBlock;
147import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
148import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException;
149import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
150import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException;
151import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
152import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
153import org.apache.hadoop.hdfs.protocol.UnresolvedPathException;
154import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferEncryptor;
155import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
156import org.apache.hadoop.hdfs.protocol.datatransfer.Op;
157import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure;
158import org.apache.hadoop.hdfs.protocol.datatransfer.TrustedChannelResolver;
159import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
160import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
161import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpBlockChecksumResponseProto;
162import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
163import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
164import org.apache.hadoop.hdfs.protocolPB.PBHelper;
165import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
166import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
167import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
168import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
169import org.apache.hadoop.hdfs.server.namenode.NameNode;
170import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
171import org.apache.hadoop.io.DataOutputBuffer;
172import org.apache.hadoop.io.EnumSetWritable;
173import org.apache.hadoop.io.IOUtils;
174import org.apache.hadoop.io.MD5Hash;
175import org.apache.hadoop.io.Text;
176import org.apache.hadoop.io.retry.LossyRetryInvocationHandler;
177import org.apache.hadoop.ipc.Client;
178import org.apache.hadoop.ipc.RPC;
179import org.apache.hadoop.ipc.RemoteException;
180import org.apache.hadoop.net.DNS;
181import org.apache.hadoop.net.NetUtils;
182import org.apache.hadoop.security.AccessControlException;
183import org.apache.hadoop.security.UserGroupInformation;
184import org.apache.hadoop.security.token.SecretManager.InvalidToken;
185import org.apache.hadoop.security.token.Token;
186import org.apache.hadoop.security.token.TokenRenewer;
187import org.apache.hadoop.util.Daemon;
188import org.apache.hadoop.util.DataChecksum;
189import org.apache.hadoop.util.DataChecksum.Type;
190import org.apache.hadoop.util.Progressable;
191import org.apache.hadoop.util.Time;
192
193import com.google.common.annotations.VisibleForTesting;
194import com.google.common.base.Joiner;
195import com.google.common.base.Preconditions;
196import com.google.common.net.InetAddresses;
197
198/********************************************************
199 * DFSClient can connect to a Hadoop Filesystem and 
200 * perform basic file tasks.  It uses the ClientProtocol
201 * to communicate with a NameNode daemon, and connects 
202 * directly to DataNodes to read/write block data.
203 *
204 * Hadoop DFS users should obtain an instance of 
205 * DistributedFileSystem, which uses DFSClient to handle
206 * filesystem tasks.
207 *
208 ********************************************************/
209@InterfaceAudience.Private
210public class DFSClient implements java.io.Closeable, RemotePeerFactory {
211  public static final Log LOG = LogFactory.getLog(DFSClient.class);
212  public static final long SERVER_DEFAULTS_VALIDITY_PERIOD = 60 * 60 * 1000L; // 1 hour
213  static final int TCP_WINDOW_SIZE = 128 * 1024; // 128 KB
214
215  private final Configuration conf;
216  private final Conf dfsClientConf;
217  final ClientProtocol namenode;
218  /* The service used for delegation tokens */
219  private Text dtService;
220
221  final UserGroupInformation ugi;
222  volatile boolean clientRunning = true;
223  volatile long lastLeaseRenewal;
224  private volatile FsServerDefaults serverDefaults;
225  private volatile long serverDefaultsLastUpdate;
226  final String clientName;
227  final SocketFactory socketFactory;
228  final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure;
229  final FileSystem.Statistics stats;
230  private final String authority;
231  private final Random r = new Random();
232  private SocketAddress[] localInterfaceAddrs;
233  private DataEncryptionKey encryptionKey;
234  final TrustedChannelResolver trustedChannelResolver;
235  private final CachingStrategy defaultReadCachingStrategy;
236  private final CachingStrategy defaultWriteCachingStrategy;
237  private final ClientContext clientContext;
238  private volatile long hedgedReadThresholdMillis;
239  private static final DFSHedgedReadMetrics HEDGED_READ_METRIC =
240      new DFSHedgedReadMetrics();
241  private static ThreadPoolExecutor HEDGED_READ_THREAD_POOL;
242  
243  /**
244   * DFSClient configuration 
245   */
246  public static class Conf {
247    final int hdfsTimeout;    // timeout value for a DFS operation.
248
249    final int maxFailoverAttempts;
250    final int maxRetryAttempts;
251    final int failoverSleepBaseMillis;
252    final int failoverSleepMaxMillis;
253    final int maxBlockAcquireFailures;
254    final int confTime;
255    final int ioBufferSize;
256    final ChecksumOpt defaultChecksumOpt;
257    final int writePacketSize;
258    final int socketTimeout;
259    final int socketCacheCapacity;
260    final long socketCacheExpiry;
261    final long excludedNodesCacheExpiry;
262    /** Wait time window (in msec) if BlockMissingException is caught */
263    final int timeWindow;
264    final int nCachedConnRetry;
265    final int nBlockWriteRetry;
266    final int nBlockWriteLocateFollowingRetry;
267    final long defaultBlockSize;
268    final long prefetchSize;
269    final short defaultReplication;
270    final String taskId;
271    final FsPermission uMask;
272    final boolean connectToDnViaHostname;
273    final boolean getHdfsBlocksMetadataEnabled;
274    final int getFileBlockStorageLocationsNumThreads;
275    final int getFileBlockStorageLocationsTimeoutMs;
276    final int retryTimesForGetLastBlockLength;
277    final int retryIntervalForGetLastBlockLength;
278    final long datanodeRestartTimeout;
279
280    final boolean useLegacyBlockReader;
281    final boolean useLegacyBlockReaderLocal;
282    final String domainSocketPath;
283    final boolean skipShortCircuitChecksums;
284    final int shortCircuitBufferSize;
285    final boolean shortCircuitLocalReads;
286    final boolean domainSocketDataTraffic;
287    final int shortCircuitStreamsCacheSize;
288    final long shortCircuitStreamsCacheExpiryMs; 
289    final int shortCircuitSharedMemoryWatcherInterruptCheckMs;
290    
291    final boolean shortCircuitMmapEnabled;
292    final int shortCircuitMmapCacheSize;
293    final long shortCircuitMmapCacheExpiryMs;
294    final long shortCircuitMmapCacheRetryTimeout;
295    final long shortCircuitCacheStaleThresholdMs;
296
297    public Conf(Configuration conf) {
298      // The hdfsTimeout is currently the same as the ipc timeout 
299      hdfsTimeout = Client.getTimeout(conf);
300      maxFailoverAttempts = conf.getInt(
301          DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY,
302          DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT);
303      maxRetryAttempts = conf.getInt(
304          DFS_CLIENT_RETRY_MAX_ATTEMPTS_KEY,
305          DFS_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT);
306      failoverSleepBaseMillis = conf.getInt(
307          DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY,
308          DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT);
309      failoverSleepMaxMillis = conf.getInt(
310          DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY,
311          DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_DEFAULT);
312
313      maxBlockAcquireFailures = conf.getInt(
314          DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_KEY,
315          DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_DEFAULT);
316      confTime = conf.getInt(DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY,
317          HdfsServerConstants.WRITE_TIMEOUT);
318      ioBufferSize = conf.getInt(
319          CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY,
320          CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT);
321      defaultChecksumOpt = getChecksumOptFromConf(conf);
322      socketTimeout = conf.getInt(DFS_CLIENT_SOCKET_TIMEOUT_KEY,
323          HdfsServerConstants.READ_TIMEOUT);
324      /** dfs.write.packet.size is an internal config variable */
325      writePacketSize = conf.getInt(DFS_CLIENT_WRITE_PACKET_SIZE_KEY,
326          DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT);
327      defaultBlockSize = conf.getLongBytes(DFS_BLOCK_SIZE_KEY,
328          DFS_BLOCK_SIZE_DEFAULT);
329      defaultReplication = (short) conf.getInt(
330          DFS_REPLICATION_KEY, DFS_REPLICATION_DEFAULT);
331      taskId = conf.get("mapreduce.task.attempt.id", "NONMAPREDUCE");
332      socketCacheCapacity = conf.getInt(DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY,
333          DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT);
334      socketCacheExpiry = conf.getLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY,
335          DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT);
336      excludedNodesCacheExpiry = conf.getLong(
337          DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL,
338          DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL_DEFAULT);
339      prefetchSize = conf.getLong(DFS_CLIENT_READ_PREFETCH_SIZE_KEY,
340          10 * defaultBlockSize);
341      timeWindow = conf.getInt(DFS_CLIENT_RETRY_WINDOW_BASE, 3000);
342      nCachedConnRetry = conf.getInt(DFS_CLIENT_CACHED_CONN_RETRY_KEY,
343          DFS_CLIENT_CACHED_CONN_RETRY_DEFAULT);
344      nBlockWriteRetry = conf.getInt(DFS_CLIENT_BLOCK_WRITE_RETRIES_KEY,
345          DFS_CLIENT_BLOCK_WRITE_RETRIES_DEFAULT);
346      nBlockWriteLocateFollowingRetry = conf.getInt(
347          DFS_CLIENT_BLOCK_WRITE_LOCATEFOLLOWINGBLOCK_RETRIES_KEY,
348          DFS_CLIENT_BLOCK_WRITE_LOCATEFOLLOWINGBLOCK_RETRIES_DEFAULT);
349      uMask = FsPermission.getUMask(conf);
350      connectToDnViaHostname = conf.getBoolean(DFS_CLIENT_USE_DN_HOSTNAME,
351          DFS_CLIENT_USE_DN_HOSTNAME_DEFAULT);
352      getHdfsBlocksMetadataEnabled = conf.getBoolean(
353          DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, 
354          DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT);
355      getFileBlockStorageLocationsNumThreads = conf.getInt(
356          DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_NUM_THREADS,
357          DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_NUM_THREADS_DEFAULT);
358      getFileBlockStorageLocationsTimeoutMs = conf.getInt(
359          DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT_MS,
360          DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT_MS_DEFAULT);
361      retryTimesForGetLastBlockLength = conf.getInt(
362          DFSConfigKeys.DFS_CLIENT_RETRY_TIMES_GET_LAST_BLOCK_LENGTH,
363          DFSConfigKeys.DFS_CLIENT_RETRY_TIMES_GET_LAST_BLOCK_LENGTH_DEFAULT);
364      retryIntervalForGetLastBlockLength = conf.getInt(
365        DFSConfigKeys.DFS_CLIENT_RETRY_INTERVAL_GET_LAST_BLOCK_LENGTH,
366        DFSConfigKeys.DFS_CLIENT_RETRY_INTERVAL_GET_LAST_BLOCK_LENGTH_DEFAULT);
367
368      useLegacyBlockReader = conf.getBoolean(
369          DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER,
370          DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER_DEFAULT);
371      useLegacyBlockReaderLocal = conf.getBoolean(
372          DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL,
373          DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT);
374      shortCircuitLocalReads = conf.getBoolean(
375          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
376          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT);
377      domainSocketDataTraffic = conf.getBoolean(
378          DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC,
379          DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT);
380      domainSocketPath = conf.getTrimmed(
381          DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
382          DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_DEFAULT);
383
384      if (BlockReaderLocal.LOG.isDebugEnabled()) {
385        BlockReaderLocal.LOG.debug(
386            DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL
387            + " = " + useLegacyBlockReaderLocal);
388        BlockReaderLocal.LOG.debug(
389            DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY
390            + " = " + shortCircuitLocalReads);
391        BlockReaderLocal.LOG.debug(
392            DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC
393            + " = " + domainSocketDataTraffic);
394        BlockReaderLocal.LOG.debug(
395            DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY
396            + " = " + domainSocketPath);
397      }
398
399      skipShortCircuitChecksums = conf.getBoolean(
400          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
401          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT);
402      shortCircuitBufferSize = conf.getInt(
403          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY,
404          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT);
405      shortCircuitStreamsCacheSize = conf.getInt(
406          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_KEY,
407          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_DEFAULT);
408      shortCircuitStreamsCacheExpiryMs = conf.getLong(
409          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY,
410          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT);
411      shortCircuitMmapEnabled = conf.getBoolean(
412          DFSConfigKeys.DFS_CLIENT_MMAP_ENABLED,
413          DFSConfigKeys.DFS_CLIENT_MMAP_ENABLED_DEFAULT);
414      shortCircuitMmapCacheSize = conf.getInt(
415          DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE,
416          DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT);
417      shortCircuitMmapCacheExpiryMs = conf.getLong(
418          DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS,
419          DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT);
420      shortCircuitMmapCacheRetryTimeout = conf.getLong(
421          DFSConfigKeys.DFS_CLIENT_MMAP_RETRY_TIMEOUT_MS,
422          DFSConfigKeys.DFS_CLIENT_MMAP_RETRY_TIMEOUT_MS_DEFAULT);
423      shortCircuitCacheStaleThresholdMs = conf.getLong(
424          DFSConfigKeys.DFS_CLIENT_SHORT_CIRCUIT_REPLICA_STALE_THRESHOLD_MS,
425          DFSConfigKeys.DFS_CLIENT_SHORT_CIRCUIT_REPLICA_STALE_THRESHOLD_MS_DEFAULT);
426      shortCircuitSharedMemoryWatcherInterruptCheckMs = conf.getInt(
427          DFSConfigKeys.DFS_SHORT_CIRCUIT_SHARED_MEMORY_WATCHER_INTERRUPT_CHECK_MS,
428          DFSConfigKeys.DFS_SHORT_CIRCUIT_SHARED_MEMORY_WATCHER_INTERRUPT_CHECK_MS_DEFAULT);
429
430      datanodeRestartTimeout = conf.getLong(
431          DFS_CLIENT_DATANODE_RESTART_TIMEOUT_KEY,
432          DFS_CLIENT_DATANODE_RESTART_TIMEOUT_DEFAULT) * 1000;
433    }
434
435    private DataChecksum.Type getChecksumType(Configuration conf) {
436      final String checksum = conf.get(
437          DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY,
438          DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT);
439      try {
440        return DataChecksum.Type.valueOf(checksum);
441      } catch(IllegalArgumentException iae) {
442        LOG.warn("Bad checksum type: " + checksum + ". Using default "
443            + DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT);
444        return DataChecksum.Type.valueOf(
445            DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT); 
446      }
447    }
448
449    // Construct a checksum option from conf
450    private ChecksumOpt getChecksumOptFromConf(Configuration conf) {
451      DataChecksum.Type type = getChecksumType(conf);
452      int bytesPerChecksum = conf.getInt(DFS_BYTES_PER_CHECKSUM_KEY,
453          DFS_BYTES_PER_CHECKSUM_DEFAULT);
454      return new ChecksumOpt(type, bytesPerChecksum);
455    }
456
457    // create a DataChecksum with the default option.
458    private DataChecksum createChecksum() throws IOException {
459      return createChecksum(null);
460    }
461
462    private DataChecksum createChecksum(ChecksumOpt userOpt) 
463        throws IOException {
464      // Fill in any missing field with the default.
465      ChecksumOpt myOpt = ChecksumOpt.processChecksumOpt(
466          defaultChecksumOpt, userOpt);
467      DataChecksum dataChecksum = DataChecksum.newDataChecksum(
468          myOpt.getChecksumType(),
469          myOpt.getBytesPerChecksum());
470      if (dataChecksum == null) {
471        throw new IOException("Invalid checksum type specified: "
472            + myOpt.getChecksumType().name());
473      }
474      return dataChecksum;
475    }
476  }
477 
478  public Conf getConf() {
479    return dfsClientConf;
480  }
481
482  Configuration getConfiguration() {
483    return conf;
484  }
485
486  /**
487   * A map from file names to {@link DFSOutputStream} objects
488   * that are currently being written by this client.
489   * Note that a file can only be written by a single client.
490   */
491  private final Map<String, DFSOutputStream> filesBeingWritten
492      = new HashMap<String, DFSOutputStream>();
493
494  /**
495   * Same as this(NameNode.getAddress(conf), conf);
496   * @see #DFSClient(InetSocketAddress, Configuration)
497   * @deprecated Deprecated at 0.21
498   */
499  @Deprecated
500  public DFSClient(Configuration conf) throws IOException {
501    this(NameNode.getAddress(conf), conf);
502  }
503  
504  public DFSClient(InetSocketAddress address, Configuration conf) throws IOException {
505    this(NameNode.getUri(address), conf);
506  }
507
508  /**
509   * Same as this(nameNodeUri, conf, null);
510   * @see #DFSClient(URI, Configuration, FileSystem.Statistics)
511   */
512  public DFSClient(URI nameNodeUri, Configuration conf
513      ) throws IOException {
514    this(nameNodeUri, conf, null);
515  }
516
517  /**
518   * Same as this(nameNodeUri, null, conf, stats);
519   * @see #DFSClient(URI, ClientProtocol, Configuration, FileSystem.Statistics) 
520   */
521  public DFSClient(URI nameNodeUri, Configuration conf,
522                   FileSystem.Statistics stats)
523    throws IOException {
524    this(nameNodeUri, null, conf, stats);
525  }
526  
527  /** 
528   * Create a new DFSClient connected to the given nameNodeUri or rpcNamenode.
529   * If HA is enabled and a positive value is set for 
530   * {@link DFSConfigKeys#DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY} in the
531   * configuration, the DFSClient will use {@link LossyRetryInvocationHandler}
532   * as its RetryInvocationHandler. Otherwise one of nameNodeUri or rpcNamenode 
533   * must be null.
534   */
535  @VisibleForTesting
536  public DFSClient(URI nameNodeUri, ClientProtocol rpcNamenode,
537      Configuration conf, FileSystem.Statistics stats)
538    throws IOException {
539    // Copy only the required DFSClient configuration
540    this.dfsClientConf = new Conf(conf);
541    if (this.dfsClientConf.useLegacyBlockReaderLocal) {
542      LOG.debug("Using legacy short-circuit local reads.");
543    }
544    this.conf = conf;
545    this.stats = stats;
546    this.socketFactory = NetUtils.getSocketFactory(conf, ClientProtocol.class);
547    this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf);
548
549    this.ugi = UserGroupInformation.getCurrentUser();
550    
551    this.authority = nameNodeUri == null? "null": nameNodeUri.getAuthority();
552    this.clientName = "DFSClient_" + dfsClientConf.taskId + "_" + 
553        DFSUtil.getRandom().nextInt()  + "_" + Thread.currentThread().getId();
554    
555    int numResponseToDrop = conf.getInt(
556        DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY,
557        DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_DEFAULT);
558    NameNodeProxies.ProxyAndInfo<ClientProtocol> proxyInfo = null;
559    if (numResponseToDrop > 0) {
560      // This case is used for testing.
561      LOG.warn(DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY
562          + " is set to " + numResponseToDrop
563          + ", this hacked client will proactively drop responses");
564      proxyInfo = NameNodeProxies.createProxyWithLossyRetryHandler(conf,
565          nameNodeUri, ClientProtocol.class, numResponseToDrop);
566    }
567    
568    if (proxyInfo != null) {
569      this.dtService = proxyInfo.getDelegationTokenService();
570      this.namenode = proxyInfo.getProxy();
571    } else if (rpcNamenode != null) {
572      // This case is used for testing.
573      Preconditions.checkArgument(nameNodeUri == null);
574      this.namenode = rpcNamenode;
575      dtService = null;
576    } else {
577      Preconditions.checkArgument(nameNodeUri != null,
578          "null URI");
579      proxyInfo = NameNodeProxies.createProxy(conf, nameNodeUri,
580          ClientProtocol.class);
581      this.dtService = proxyInfo.getDelegationTokenService();
582      this.namenode = proxyInfo.getProxy();
583    }
584
585    String localInterfaces[] =
586      conf.getTrimmedStrings(DFSConfigKeys.DFS_CLIENT_LOCAL_INTERFACES);
587    localInterfaceAddrs = getLocalInterfaceAddrs(localInterfaces);
588    if (LOG.isDebugEnabled() && 0 != localInterfaces.length) {
589      LOG.debug("Using local interfaces [" +
590      Joiner.on(',').join(localInterfaces)+ "] with addresses [" +
591      Joiner.on(',').join(localInterfaceAddrs) + "]");
592    }
593    
594    Boolean readDropBehind = (conf.get(DFS_CLIENT_CACHE_DROP_BEHIND_READS) == null) ?
595        null : conf.getBoolean(DFS_CLIENT_CACHE_DROP_BEHIND_READS, false);
596    Long readahead = (conf.get(DFS_CLIENT_CACHE_READAHEAD) == null) ?
597        null : conf.getLong(DFS_CLIENT_CACHE_READAHEAD, 0);
598    Boolean writeDropBehind = (conf.get(DFS_CLIENT_CACHE_DROP_BEHIND_WRITES) == null) ?
599        null : conf.getBoolean(DFS_CLIENT_CACHE_DROP_BEHIND_WRITES, false);
600    this.defaultReadCachingStrategy =
601        new CachingStrategy(readDropBehind, readahead);
602    this.defaultWriteCachingStrategy =
603        new CachingStrategy(writeDropBehind, readahead);
604    this.clientContext = ClientContext.get(
605        conf.get(DFS_CLIENT_CONTEXT, DFS_CLIENT_CONTEXT_DEFAULT),
606        dfsClientConf);
607    this.hedgedReadThresholdMillis = conf.getLong(
608        DFSConfigKeys.DFS_DFSCLIENT_HEDGED_READ_THRESHOLD_MILLIS,
609        DFSConfigKeys.DEFAULT_DFSCLIENT_HEDGED_READ_THRESHOLD_MILLIS);
610    int numThreads = conf.getInt(
611        DFSConfigKeys.DFS_DFSCLIENT_HEDGED_READ_THREADPOOL_SIZE,
612        DFSConfigKeys.DEFAULT_DFSCLIENT_HEDGED_READ_THREADPOOL_SIZE);
613    if (numThreads > 0) {
614      this.initThreadsNumForHedgedReads(numThreads);
615    }
616    this.trustedChannelResolver = TrustedChannelResolver.getInstance(getConfiguration());
617  }
618  
619  /**
620   * Return the socket addresses to use with each configured
621   * local interface. Local interfaces may be specified by IP
622   * address, IP address range using CIDR notation, interface
623   * name (e.g. eth0) or sub-interface name (e.g. eth0:0).
624   * The socket addresses consist of the IPs for the interfaces
625   * and the ephemeral port (port 0). If an IP, IP range, or
626   * interface name matches an interface with sub-interfaces
627   * only the IP of the interface is used. Sub-interfaces can
628   * be used by specifying them explicitly (by IP or name).
629   * 
630   * @return SocketAddresses for the configured local interfaces,
631   *    or an empty array if none are configured
632   * @throws UnknownHostException if a given interface name is invalid
633   */
634  private static SocketAddress[] getLocalInterfaceAddrs(
635      String interfaceNames[]) throws UnknownHostException {
636    List<SocketAddress> localAddrs = new ArrayList<SocketAddress>();
637    for (String interfaceName : interfaceNames) {
638      if (InetAddresses.isInetAddress(interfaceName)) {
639        localAddrs.add(new InetSocketAddress(interfaceName, 0));
640      } else if (NetUtils.isValidSubnet(interfaceName)) {
641        for (InetAddress addr : NetUtils.getIPs(interfaceName, false)) {
642          localAddrs.add(new InetSocketAddress(addr, 0));
643        }
644      } else {
645        for (String ip : DNS.getIPs(interfaceName, false)) {
646          localAddrs.add(new InetSocketAddress(ip, 0));
647        }
648      }
649    }
650    return localAddrs.toArray(new SocketAddress[localAddrs.size()]);
651  }
652
653  /**
654   * Select one of the configured local interfaces at random. We use a random
655   * interface because other policies like round-robin are less effective
656   * given that we cache connections to datanodes.
657   *
658   * @return one of the local interface addresses at random, or null if no
659   *    local interfaces are configured
660   */
661  SocketAddress getRandomLocalInterfaceAddr() {
662    if (localInterfaceAddrs.length == 0) {
663      return null;
664    }
665    final int idx = r.nextInt(localInterfaceAddrs.length);
666    final SocketAddress addr = localInterfaceAddrs[idx];
667    if (LOG.isDebugEnabled()) {
668      LOG.debug("Using local interface " + addr);
669    }
670    return addr;
671  }
672
673  /**
674   * Return the number of times the client should go back to the namenode
675   * to retrieve block locations when reading.
676   */
677  int getMaxBlockAcquireFailures() {
678    return dfsClientConf.maxBlockAcquireFailures;
679  }
680
681  /**
682   * Return the timeout that clients should use when writing to datanodes.
683   * @param numNodes the number of nodes in the pipeline.
684   */
685  int getDatanodeWriteTimeout(int numNodes) {
686    return (dfsClientConf.confTime > 0) ?
687      (dfsClientConf.confTime + HdfsServerConstants.WRITE_TIMEOUT_EXTENSION * numNodes) : 0;
688  }
689
690  int getDatanodeReadTimeout(int numNodes) {
691    return dfsClientConf.socketTimeout > 0 ?
692        (HdfsServerConstants.READ_TIMEOUT_EXTENSION * numNodes +
693            dfsClientConf.socketTimeout) : 0;
694  }
695  
696  int getHdfsTimeout() {
697    return dfsClientConf.hdfsTimeout;
698  }
699  
700  @VisibleForTesting
701  public String getClientName() {
702    return clientName;
703  }
704
705  void checkOpen() throws IOException {
706    if (!clientRunning) {
707      IOException result = new IOException("Filesystem closed");
708      throw result;
709    }
710  }
711
712  /** Return the lease renewer instance. The renewer thread won't start
713   *  until the first output stream is created. The same instance will
714   *  be returned until all output streams are closed.
715   */
716  public LeaseRenewer getLeaseRenewer() throws IOException {
717      return LeaseRenewer.getInstance(authority, ugi, this);
718  }
719
720  /** Get a lease and start automatic renewal */
721  private void beginFileLease(final String src, final DFSOutputStream out) 
722      throws IOException {
723    getLeaseRenewer().put(src, out, this);
724  }
725
726  /** Stop renewal of lease for the file. */
727  void endFileLease(final String src) throws IOException {
728    getLeaseRenewer().closeFile(src, this);
729  }
730    
731
732  /** Put a file. Only called from LeaseRenewer, where proper locking is
733   *  enforced to consistently update its local dfsclients array and 
734   *  client's filesBeingWritten map.
735   */
736  void putFileBeingWritten(final String src, final DFSOutputStream out) {
737    synchronized(filesBeingWritten) {
738      filesBeingWritten.put(src, out);
739      // update the last lease renewal time only when there was no
740      // writes. once there is one write stream open, the lease renewer
741      // thread keeps it updated well with in anyone's expiration time.
742      if (lastLeaseRenewal == 0) {
743        updateLastLeaseRenewal();
744      }
745    }
746  }
747
748  /** Remove a file. Only called from LeaseRenewer. */
749  void removeFileBeingWritten(final String src) {
750    synchronized(filesBeingWritten) {
751      filesBeingWritten.remove(src);
752      if (filesBeingWritten.isEmpty()) {
753        lastLeaseRenewal = 0;
754      }
755    }
756  }
757
758  /** Is file-being-written map empty? */
759  boolean isFilesBeingWrittenEmpty() {
760    synchronized(filesBeingWritten) {
761      return filesBeingWritten.isEmpty();
762    }
763  }
764  
765  /** @return true if the client is running */
766  boolean isClientRunning() {
767    return clientRunning;
768  }
769
770  long getLastLeaseRenewal() {
771    return lastLeaseRenewal;
772  }
773
774  void updateLastLeaseRenewal() {
775    synchronized(filesBeingWritten) {
776      if (filesBeingWritten.isEmpty()) {
777        return;
778      }
779      lastLeaseRenewal = Time.now();
780    }
781  }
782
783  /**
784   * Renew leases.
785   * @return true if lease was renewed. May return false if this
786   * client has been closed or has no files open.
787   **/
788  boolean renewLease() throws IOException {
789    if (clientRunning && !isFilesBeingWrittenEmpty()) {
790      try {
791        namenode.renewLease(clientName);
792        updateLastLeaseRenewal();
793        return true;
794      } catch (IOException e) {
795        // Abort if the lease has already expired. 
796        final long elapsed = Time.now() - getLastLeaseRenewal();
797        if (elapsed > HdfsConstants.LEASE_HARDLIMIT_PERIOD) {
798          LOG.warn("Failed to renew lease for " + clientName + " for "
799              + (elapsed/1000) + " seconds (>= hard-limit ="
800              + (HdfsConstants.LEASE_HARDLIMIT_PERIOD/1000) + " seconds.) "
801              + "Closing all files being written ...", e);
802          closeAllFilesBeingWritten(true);
803        } else {
804          // Let the lease renewer handle it and retry.
805          throw e;
806        }
807      }
808    }
809    return false;
810  }
811  
812  /**
813   * Close connections the Namenode.
814   */
815  void closeConnectionToNamenode() {
816    RPC.stopProxy(namenode);
817  }
818  
819  /** Abort and release resources held.  Ignore all errors. */
820  void abort() {
821    clientRunning = false;
822    closeAllFilesBeingWritten(true);
823    try {
824      // remove reference to this client and stop the renewer,
825      // if there is no more clients under the renewer.
826      getLeaseRenewer().closeClient(this);
827    } catch (IOException ioe) {
828       LOG.info("Exception occurred while aborting the client " + ioe);
829    }
830    closeConnectionToNamenode();
831  }
832
833  /** Close/abort all files being written. */
834  private void closeAllFilesBeingWritten(final boolean abort) {
835    for(;;) {
836      final String src;
837      final DFSOutputStream out;
838      synchronized(filesBeingWritten) {
839        if (filesBeingWritten.isEmpty()) {
840          return;
841        }
842        src = filesBeingWritten.keySet().iterator().next();
843        out = filesBeingWritten.remove(src);
844      }
845      if (out != null) {
846        try {
847          if (abort) {
848            out.abort();
849          } else {
850            out.close();
851          }
852        } catch(IOException ie) {
853          LOG.error("Failed to " + (abort? "abort": "close") + " file " + src,
854              ie);
855        }
856      }
857    }
858  }
859
860  /**
861   * Close the file system, abandoning all of the leases and files being
862   * created and close connections to the namenode.
863   */
864  @Override
865  public synchronized void close() throws IOException {
866    if(clientRunning) {
867      closeAllFilesBeingWritten(false);
868      clientRunning = false;
869      getLeaseRenewer().closeClient(this);
870      // close connections to the namenode
871      closeConnectionToNamenode();
872    }
873  }
874
875  /**
876   * Get the default block size for this cluster
877   * @return the default block size in bytes
878   */
879  public long getDefaultBlockSize() {
880    return dfsClientConf.defaultBlockSize;
881  }
882    
883  /**
884   * @see ClientProtocol#getPreferredBlockSize(String)
885   */
886  public long getBlockSize(String f) throws IOException {
887    try {
888      return namenode.getPreferredBlockSize(f);
889    } catch (IOException ie) {
890      LOG.warn("Problem getting block size", ie);
891      throw ie;
892    }
893  }
894
895  /**
896   * Get server default values for a number of configuration params.
897   * @see ClientProtocol#getServerDefaults()
898   */
899  public FsServerDefaults getServerDefaults() throws IOException {
900    long now = Time.now();
901    if (now - serverDefaultsLastUpdate > SERVER_DEFAULTS_VALIDITY_PERIOD) {
902      serverDefaults = namenode.getServerDefaults();
903      serverDefaultsLastUpdate = now;
904    }
905    return serverDefaults;
906  }
907  
908  /**
909   * Get a canonical token service name for this client's tokens.  Null should
910   * be returned if the client is not using tokens.
911   * @return the token service for the client
912   */
913  @InterfaceAudience.LimitedPrivate( { "HDFS" }) 
914  public String getCanonicalServiceName() {
915    return (dtService != null) ? dtService.toString() : null;
916  }
917  
918  /**
919   * @see ClientProtocol#getDelegationToken(Text)
920   */
921  public Token<DelegationTokenIdentifier> getDelegationToken(Text renewer)
922      throws IOException {
923    assert dtService != null;
924    Token<DelegationTokenIdentifier> token =
925      namenode.getDelegationToken(renewer);
926
927    if (token != null) {
928      token.setService(this.dtService);
929      LOG.info("Created " + DelegationTokenIdentifier.stringifyToken(token));
930    } else {
931      LOG.info("Cannot get delegation token from " + renewer);
932    }
933    return token;
934
935  }
936
937  /**
938   * Renew a delegation token
939   * @param token the token to renew
940   * @return the new expiration time
941   * @throws InvalidToken
942   * @throws IOException
943   * @deprecated Use Token.renew instead.
944   */
945  @Deprecated
946  public long renewDelegationToken(Token<DelegationTokenIdentifier> token)
947      throws InvalidToken, IOException {
948    LOG.info("Renewing " + DelegationTokenIdentifier.stringifyToken(token));
949    try {
950      return token.renew(conf);
951    } catch (InterruptedException ie) {                                       
952      throw new RuntimeException("caught interrupted", ie);
953    } catch (RemoteException re) {
954      throw re.unwrapRemoteException(InvalidToken.class,
955                                     AccessControlException.class);
956    }
957  }
958  
959  private static final Map<String, Boolean> localAddrMap = Collections
960      .synchronizedMap(new HashMap<String, Boolean>());
961  
962  static boolean isLocalAddress(InetSocketAddress targetAddr) {
963    InetAddress addr = targetAddr.getAddress();
964    Boolean cached = localAddrMap.get(addr.getHostAddress());
965    if (cached != null) {
966      if (LOG.isTraceEnabled()) {
967        LOG.trace("Address " + targetAddr +
968                  (cached ? " is local" : " is not local"));
969      }
970      return cached;
971    }
972    
973    boolean local = NetUtils.isLocalAddress(addr);
974
975    if (LOG.isTraceEnabled()) {
976      LOG.trace("Address " + targetAddr +
977                (local ? " is local" : " is not local"));
978    }
979    localAddrMap.put(addr.getHostAddress(), local);
980    return local;
981  }
982  
983  /**
984   * Should the block access token be refetched on an exception
985   * 
986   * @param ex Exception received
987   * @param targetAddr Target datanode address from where exception was received
988   * @return true if block access token has expired or invalid and it should be
989   *         refetched
990   */
991  private static boolean tokenRefetchNeeded(IOException ex,
992      InetSocketAddress targetAddr) {
993    /*
994     * Get a new access token and retry. Retry is needed in 2 cases. 1) When
995     * both NN and DN re-started while DFSClient holding a cached access token.
996     * 2) In the case that NN fails to update its access key at pre-set interval
997     * (by a wide margin) and subsequently restarts. In this case, DN
998     * re-registers itself with NN and receives a new access key, but DN will
999     * delete the old access key from its memory since it's considered expired
1000     * based on the estimated expiration date.
1001     */
1002    if (ex instanceof InvalidBlockTokenException || ex instanceof InvalidToken) {
1003      LOG.info("Access token was invalid when connecting to " + targetAddr
1004          + " : " + ex);
1005      return true;
1006    }
1007    return false;
1008  }
1009  
1010  /**
1011   * Cancel a delegation token
1012   * @param token the token to cancel
1013   * @throws InvalidToken
1014   * @throws IOException
1015   * @deprecated Use Token.cancel instead.
1016   */
1017  @Deprecated
1018  public void cancelDelegationToken(Token<DelegationTokenIdentifier> token)
1019      throws InvalidToken, IOException {
1020    LOG.info("Cancelling " + DelegationTokenIdentifier.stringifyToken(token));
1021    try {
1022      token.cancel(conf);
1023     } catch (InterruptedException ie) {                                       
1024      throw new RuntimeException("caught interrupted", ie);
1025    } catch (RemoteException re) {
1026      throw re.unwrapRemoteException(InvalidToken.class,
1027                                     AccessControlException.class);
1028    }
1029  }
1030  
1031  @InterfaceAudience.Private
1032  public static class Renewer extends TokenRenewer {
1033    
1034    static {
1035      //Ensure that HDFS Configuration files are loaded before trying to use
1036      // the renewer.
1037      HdfsConfiguration.init();
1038    }
1039    
1040    @Override
1041    public boolean handleKind(Text kind) {
1042      return DelegationTokenIdentifier.HDFS_DELEGATION_KIND.equals(kind);
1043    }
1044
1045    @SuppressWarnings("unchecked")
1046    @Override
1047    public long renew(Token<?> token, Configuration conf) throws IOException {
1048      Token<DelegationTokenIdentifier> delToken = 
1049        (Token<DelegationTokenIdentifier>) token;
1050      ClientProtocol nn = getNNProxy(delToken, conf);
1051      try {
1052        return nn.renewDelegationToken(delToken);
1053      } catch (RemoteException re) {
1054        throw re.unwrapRemoteException(InvalidToken.class, 
1055                                       AccessControlException.class);
1056      }
1057    }
1058
1059    @SuppressWarnings("unchecked")
1060    @Override
1061    public void cancel(Token<?> token, Configuration conf) throws IOException {
1062      Token<DelegationTokenIdentifier> delToken = 
1063          (Token<DelegationTokenIdentifier>) token;
1064      LOG.info("Cancelling " + 
1065               DelegationTokenIdentifier.stringifyToken(delToken));
1066      ClientProtocol nn = getNNProxy(delToken, conf);
1067      try {
1068        nn.cancelDelegationToken(delToken);
1069      } catch (RemoteException re) {
1070        throw re.unwrapRemoteException(InvalidToken.class,
1071            AccessControlException.class);
1072      }
1073    }
1074    
1075    private static ClientProtocol getNNProxy(
1076        Token<DelegationTokenIdentifier> token, Configuration conf)
1077        throws IOException {
1078      URI uri = HAUtil.getServiceUriFromToken(HdfsConstants.HDFS_URI_SCHEME,
1079              token);
1080      if (HAUtil.isTokenForLogicalUri(token) &&
1081          !HAUtil.isLogicalUri(conf, uri)) {
1082        // If the token is for a logical nameservice, but the configuration
1083        // we have disagrees about that, we can't actually renew it.
1084        // This can be the case in MR, for example, if the RM doesn't
1085        // have all of the HA clusters configured in its configuration.
1086        throw new IOException("Unable to map logical nameservice URI '" +
1087            uri + "' to a NameNode. Local configuration does not have " +
1088            "a failover proxy provider configured.");
1089      }
1090      
1091      NameNodeProxies.ProxyAndInfo<ClientProtocol> info =
1092        NameNodeProxies.createProxy(conf, uri, ClientProtocol.class);
1093      assert info.getDelegationTokenService().equals(token.getService()) :
1094        "Returned service '" + info.getDelegationTokenService().toString() +
1095        "' doesn't match expected service '" +
1096        token.getService().toString() + "'";
1097        
1098      return info.getProxy();
1099    }
1100
1101    @Override
1102    public boolean isManaged(Token<?> token) throws IOException {
1103      return true;
1104    }
1105    
1106  }
1107
1108  /**
1109   * Report corrupt blocks that were discovered by the client.
1110   * @see ClientProtocol#reportBadBlocks(LocatedBlock[])
1111   */
1112  public void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
1113    namenode.reportBadBlocks(blocks);
1114  }
1115  
1116  public short getDefaultReplication() {
1117    return dfsClientConf.defaultReplication;
1118  }
1119  
1120  public LocatedBlocks getLocatedBlocks(String src, long start)
1121      throws IOException {
1122    return getLocatedBlocks(src, start, dfsClientConf.prefetchSize);
1123  }
1124
1125  /*
1126   * This is just a wrapper around callGetBlockLocations, but non-static so that
1127   * we can stub it out for tests.
1128   */
1129  @VisibleForTesting
1130  public LocatedBlocks getLocatedBlocks(String src, long start, long length)
1131      throws IOException {
1132    return callGetBlockLocations(namenode, src, start, length);
1133  }
1134
1135  /**
1136   * @see ClientProtocol#getBlockLocations(String, long, long)
1137   */
1138  static LocatedBlocks callGetBlockLocations(ClientProtocol namenode,
1139      String src, long start, long length) 
1140      throws IOException {
1141    try {
1142      return namenode.getBlockLocations(src, start, length);
1143    } catch(RemoteException re) {
1144      throw re.unwrapRemoteException(AccessControlException.class,
1145                                     FileNotFoundException.class,
1146                                     UnresolvedPathException.class);
1147    }
1148  }
1149
1150  /**
1151   * Recover a file's lease
1152   * @param src a file's path
1153   * @return true if the file is already closed
1154   * @throws IOException
1155   */
1156  boolean recoverLease(String src) throws IOException {
1157    checkOpen();
1158
1159    try {
1160      return namenode.recoverLease(src, clientName);
1161    } catch (RemoteException re) {
1162      throw re.unwrapRemoteException(FileNotFoundException.class,
1163                                     AccessControlException.class,
1164                                     UnresolvedPathException.class);
1165    }
1166  }
1167
1168  /**
1169   * Get block location info about file
1170   * 
1171   * getBlockLocations() returns a list of hostnames that store 
1172   * data for a specific file region.  It returns a set of hostnames
1173   * for every block within the indicated region.
1174   *
1175   * This function is very useful when writing code that considers
1176   * data-placement when performing operations.  For example, the
1177   * MapReduce system tries to schedule tasks on the same machines
1178   * as the data-block the task processes. 
1179   */
1180  public BlockLocation[] getBlockLocations(String src, long start, 
1181    long length) throws IOException, UnresolvedLinkException {
1182    LocatedBlocks blocks = getLocatedBlocks(src, start, length);
1183    BlockLocation[] locations =  DFSUtil.locatedBlocks2Locations(blocks);
1184    HdfsBlockLocation[] hdfsLocations = new HdfsBlockLocation[locations.length];
1185    for (int i = 0; i < locations.length; i++) {
1186      hdfsLocations[i] = new HdfsBlockLocation(locations[i], blocks.get(i));
1187    }
1188    return hdfsLocations;
1189  }
1190  
1191  /**
1192   * Get block location information about a list of {@link HdfsBlockLocation}.
1193   * Used by {@link DistributedFileSystem#getFileBlockStorageLocations(List)} to
1194   * get {@link BlockStorageLocation}s for blocks returned by
1195   * {@link DistributedFileSystem#getFileBlockLocations(org.apache.hadoop.fs.FileStatus, long, long)}
1196   * .
1197   * 
1198   * This is done by making a round of RPCs to the associated datanodes, asking
1199   * the volume of each block replica. The returned array of
1200   * {@link BlockStorageLocation} expose this information as a
1201   * {@link VolumeId}.
1202   * 
1203   * @param blockLocations
1204   *          target blocks on which to query volume location information
1205   * @return volumeBlockLocations original block array augmented with additional
1206   *         volume location information for each replica.
1207   */
1208  public BlockStorageLocation[] getBlockStorageLocations(
1209      List<BlockLocation> blockLocations) throws IOException,
1210      UnsupportedOperationException, InvalidBlockTokenException {
1211    if (!getConf().getHdfsBlocksMetadataEnabled) {
1212      throw new UnsupportedOperationException("Datanode-side support for " +
1213          "getVolumeBlockLocations() must also be enabled in the client " +
1214          "configuration.");
1215    }
1216    // Downcast blockLocations and fetch out required LocatedBlock(s)
1217    List<LocatedBlock> blocks = new ArrayList<LocatedBlock>();
1218    for (BlockLocation loc : blockLocations) {
1219      if (!(loc instanceof HdfsBlockLocation)) {
1220        throw new ClassCastException("DFSClient#getVolumeBlockLocations " +
1221            "expected to be passed HdfsBlockLocations");
1222      }
1223      HdfsBlockLocation hdfsLoc = (HdfsBlockLocation) loc;
1224      blocks.add(hdfsLoc.getLocatedBlock());
1225    }
1226    
1227    // Re-group the LocatedBlocks to be grouped by datanodes, with the values
1228    // a list of the LocatedBlocks on the datanode.
1229    Map<DatanodeInfo, List<LocatedBlock>> datanodeBlocks = 
1230        new LinkedHashMap<DatanodeInfo, List<LocatedBlock>>();
1231    for (LocatedBlock b : blocks) {
1232      for (DatanodeInfo info : b.getLocations()) {
1233        if (!datanodeBlocks.containsKey(info)) {
1234          datanodeBlocks.put(info, new ArrayList<LocatedBlock>());
1235        }
1236        List<LocatedBlock> l = datanodeBlocks.get(info);
1237        l.add(b);
1238      }
1239    }
1240        
1241    // Make RPCs to the datanodes to get volume locations for its replicas
1242    Map<DatanodeInfo, HdfsBlocksMetadata> metadatas = BlockStorageLocationUtil
1243        .queryDatanodesForHdfsBlocksMetadata(conf, datanodeBlocks,
1244            getConf().getFileBlockStorageLocationsNumThreads,
1245            getConf().getFileBlockStorageLocationsTimeoutMs,
1246            getConf().connectToDnViaHostname);
1247    
1248    if (LOG.isTraceEnabled()) {
1249      LOG.trace("metadata returned: "
1250          + Joiner.on("\n").withKeyValueSeparator("=").join(metadatas));
1251    }
1252    
1253    // Regroup the returned VolumeId metadata to again be grouped by
1254    // LocatedBlock rather than by datanode
1255    Map<LocatedBlock, List<VolumeId>> blockVolumeIds = BlockStorageLocationUtil
1256        .associateVolumeIdsWithBlocks(blocks, metadatas);
1257    
1258    // Combine original BlockLocations with new VolumeId information
1259    BlockStorageLocation[] volumeBlockLocations = BlockStorageLocationUtil
1260        .convertToVolumeBlockLocations(blocks, blockVolumeIds);
1261
1262    return volumeBlockLocations;
1263  }
1264  
1265  public DFSInputStream open(String src) 
1266      throws IOException, UnresolvedLinkException {
1267    return open(src, dfsClientConf.ioBufferSize, true, null);
1268  }
1269
1270  /**
1271   * Create an input stream that obtains a nodelist from the
1272   * namenode, and then reads from all the right places.  Creates
1273   * inner subclass of InputStream that does the right out-of-band
1274   * work.
1275   * @deprecated Use {@link #open(String, int, boolean)} instead.
1276   */
1277  @Deprecated
1278  public DFSInputStream open(String src, int buffersize, boolean verifyChecksum,
1279                             FileSystem.Statistics stats)
1280      throws IOException, UnresolvedLinkException {
1281    return open(src, buffersize, verifyChecksum);
1282  }
1283  
1284
1285  /**
1286   * Create an input stream that obtains a nodelist from the
1287   * namenode, and then reads from all the right places.  Creates
1288   * inner subclass of InputStream that does the right out-of-band
1289   * work.
1290   */
1291  public DFSInputStream open(String src, int buffersize, boolean verifyChecksum)
1292      throws IOException, UnresolvedLinkException {
1293    checkOpen();
1294    //    Get block info from namenode
1295    return new DFSInputStream(this, src, buffersize, verifyChecksum);
1296  }
1297
1298  /**
1299   * Get the namenode associated with this DFSClient object
1300   * @return the namenode associated with this DFSClient object
1301   */
1302  public ClientProtocol getNamenode() {
1303    return namenode;
1304  }
1305  
1306  /**
1307   * Call {@link #create(String, boolean, short, long, Progressable)} with
1308   * default <code>replication</code> and <code>blockSize<code> and null <code>
1309   * progress</code>.
1310   */
1311  public OutputStream create(String src, boolean overwrite) 
1312      throws IOException {
1313    return create(src, overwrite, dfsClientConf.defaultReplication,
1314        dfsClientConf.defaultBlockSize, null);
1315  }
1316    
1317  /**
1318   * Call {@link #create(String, boolean, short, long, Progressable)} with
1319   * default <code>replication</code> and <code>blockSize<code>.
1320   */
1321  public OutputStream create(String src, 
1322                             boolean overwrite,
1323                             Progressable progress) throws IOException {
1324    return create(src, overwrite, dfsClientConf.defaultReplication,
1325        dfsClientConf.defaultBlockSize, progress);
1326  }
1327    
1328  /**
1329   * Call {@link #create(String, boolean, short, long, Progressable)} with
1330   * null <code>progress</code>.
1331   */
1332  public OutputStream create(String src, 
1333                             boolean overwrite, 
1334                             short replication,
1335                             long blockSize) throws IOException {
1336    return create(src, overwrite, replication, blockSize, null);
1337  }
1338
1339  /**
1340   * Call {@link #create(String, boolean, short, long, Progressable, int)}
1341   * with default bufferSize.
1342   */
1343  public OutputStream create(String src, boolean overwrite, short replication,
1344      long blockSize, Progressable progress) throws IOException {
1345    return create(src, overwrite, replication, blockSize, progress,
1346        dfsClientConf.ioBufferSize);
1347  }
1348
1349  /**
1350   * Call {@link #create(String, FsPermission, EnumSet, short, long, 
1351   * Progressable, int, ChecksumOpt)} with default <code>permission</code>
1352   * {@link FsPermission#getFileDefault()}.
1353   * 
1354   * @param src File name
1355   * @param overwrite overwrite an existing file if true
1356   * @param replication replication factor for the file
1357   * @param blockSize maximum block size
1358   * @param progress interface for reporting client progress
1359   * @param buffersize underlying buffersize
1360   * 
1361   * @return output stream
1362   */
1363  public OutputStream create(String src,
1364                             boolean overwrite,
1365                             short replication,
1366                             long blockSize,
1367                             Progressable progress,
1368                             int buffersize)
1369      throws IOException {
1370    return create(src, FsPermission.getFileDefault(),
1371        overwrite ? EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE)
1372            : EnumSet.of(CreateFlag.CREATE), replication, blockSize, progress,
1373        buffersize, null);
1374  }
1375
1376  /**
1377   * Call {@link #create(String, FsPermission, EnumSet, boolean, short, 
1378   * long, Progressable, int, ChecksumOpt)} with <code>createParent</code>
1379   *  set to true.
1380   */
1381  public DFSOutputStream create(String src, 
1382                             FsPermission permission,
1383                             EnumSet<CreateFlag> flag, 
1384                             short replication,
1385                             long blockSize,
1386                             Progressable progress,
1387                             int buffersize,
1388                             ChecksumOpt checksumOpt)
1389      throws IOException {
1390    return create(src, permission, flag, true,
1391        replication, blockSize, progress, buffersize, checksumOpt, null);
1392  }
1393
1394  /**
1395   * Create a new dfs file with the specified block replication 
1396   * with write-progress reporting and return an output stream for writing
1397   * into the file.  
1398   * 
1399   * @param src File name
1400   * @param permission The permission of the directory being created.
1401   *          If null, use default permission {@link FsPermission#getFileDefault()}
1402   * @param flag indicates create a new file or create/overwrite an
1403   *          existing file or append to an existing file
1404   * @param createParent create missing parent directory if true
1405   * @param replication block replication
1406   * @param blockSize maximum block size
1407   * @param progress interface for reporting client progress
1408   * @param buffersize underlying buffer size 
1409   * @param checksumOpt checksum options
1410   * 
1411   * @return output stream
1412   * 
1413   * @see ClientProtocol#create(String, FsPermission, String, EnumSetWritable,
1414   * boolean, short, long) for detailed description of exceptions thrown
1415   */
1416  public DFSOutputStream create(String src, 
1417                             FsPermission permission,
1418                             EnumSet<CreateFlag> flag, 
1419                             boolean createParent,
1420                             short replication,
1421                             long blockSize,
1422                             Progressable progress,
1423                             int buffersize,
1424                             ChecksumOpt checksumOpt) throws IOException {
1425    return create(src, permission, flag, createParent, replication, blockSize, 
1426        progress, buffersize, checksumOpt, null);
1427  }
1428
1429  /**
1430   * Same as {@link #create(String, FsPermission, EnumSet, boolean, short, long,
1431   * Progressable, int, ChecksumOpt)} with the addition of favoredNodes that is
1432   * a hint to where the namenode should place the file blocks.
1433   * The favored nodes hint is not persisted in HDFS. Hence it may be honored
1434   * at the creation time only. HDFS could move the blocks during balancing or
1435   * replication, to move the blocks from favored nodes. A value of null means
1436   * no favored nodes for this create
1437   */
1438  public DFSOutputStream create(String src, 
1439                             FsPermission permission,
1440                             EnumSet<CreateFlag> flag, 
1441                             boolean createParent,
1442                             short replication,
1443                             long blockSize,
1444                             Progressable progress,
1445                             int buffersize,
1446                             ChecksumOpt checksumOpt,
1447                             InetSocketAddress[] favoredNodes) throws IOException {
1448    checkOpen();
1449    if (permission == null) {
1450      permission = FsPermission.getFileDefault();
1451    }
1452    FsPermission masked = permission.applyUMask(dfsClientConf.uMask);
1453    if(LOG.isDebugEnabled()) {
1454      LOG.debug(src + ": masked=" + masked);
1455    }
1456    String[] favoredNodeStrs = null;
1457    if (favoredNodes != null) {
1458      favoredNodeStrs = new String[favoredNodes.length];
1459      for (int i = 0; i < favoredNodes.length; i++) {
1460        favoredNodeStrs[i] = 
1461            favoredNodes[i].getHostName() + ":" 
1462                         + favoredNodes[i].getPort();
1463      }
1464    }
1465    final DFSOutputStream result = DFSOutputStream.newStreamForCreate(this,
1466        src, masked, flag, createParent, replication, blockSize, progress,
1467        buffersize, dfsClientConf.createChecksum(checksumOpt), favoredNodeStrs);
1468    beginFileLease(src, result);
1469    return result;
1470  }
1471  
1472  /**
1473   * Append to an existing file if {@link CreateFlag#APPEND} is present
1474   */
1475  private DFSOutputStream primitiveAppend(String src, EnumSet<CreateFlag> flag,
1476      int buffersize, Progressable progress) throws IOException {
1477    if (flag.contains(CreateFlag.APPEND)) {
1478      HdfsFileStatus stat = getFileInfo(src);
1479      if (stat == null) { // No file to append to
1480        // New file needs to be created if create option is present
1481        if (!flag.contains(CreateFlag.CREATE)) {
1482          throw new FileNotFoundException("failed to append to non-existent file "
1483              + src + " on client " + clientName);
1484        }
1485        return null;
1486      }
1487      return callAppend(stat, src, buffersize, progress);
1488    }
1489    return null;
1490  }
1491  
1492  /**
1493   * Same as {{@link #create(String, FsPermission, EnumSet, short, long,
1494   *  Progressable, int, ChecksumOpt)} except that the permission
1495   *  is absolute (ie has already been masked with umask.
1496   */
1497  public DFSOutputStream primitiveCreate(String src, 
1498                             FsPermission absPermission,
1499                             EnumSet<CreateFlag> flag,
1500                             boolean createParent,
1501                             short replication,
1502                             long blockSize,
1503                             Progressable progress,
1504                             int buffersize,
1505                             ChecksumOpt checksumOpt)
1506      throws IOException, UnresolvedLinkException {
1507    checkOpen();
1508    CreateFlag.validate(flag);
1509    DFSOutputStream result = primitiveAppend(src, flag, buffersize, progress);
1510    if (result == null) {
1511      DataChecksum checksum = dfsClientConf.createChecksum(checksumOpt);
1512      result = DFSOutputStream.newStreamForCreate(this, src, absPermission,
1513          flag, createParent, replication, blockSize, progress, buffersize,
1514          checksum);
1515    }
1516    beginFileLease(src, result);
1517    return result;
1518  }
1519  
1520  /**
1521   * Creates a symbolic link.
1522   * 
1523   * @see ClientProtocol#createSymlink(String, String,FsPermission, boolean) 
1524   */
1525  public void createSymlink(String target, String link, boolean createParent)
1526      throws IOException {
1527    try {
1528      FsPermission dirPerm = 
1529          FsPermission.getDefault().applyUMask(dfsClientConf.uMask); 
1530      namenode.createSymlink(target, link, dirPerm, createParent);
1531    } catch (RemoteException re) {
1532      throw re.unwrapRemoteException(AccessControlException.class,
1533                                     FileAlreadyExistsException.class, 
1534                                     FileNotFoundException.class,
1535                                     ParentNotDirectoryException.class,
1536                                     NSQuotaExceededException.class, 
1537                                     DSQuotaExceededException.class,
1538                                     UnresolvedPathException.class,
1539                                     SnapshotAccessControlException.class);
1540    }
1541  }
1542
1543  /**
1544   * Resolve the *first* symlink, if any, in the path.
1545   * 
1546   * @see ClientProtocol#getLinkTarget(String)
1547   */
1548  public String getLinkTarget(String path) throws IOException { 
1549    checkOpen();
1550    try {
1551      return namenode.getLinkTarget(path);
1552    } catch (RemoteException re) {
1553      throw re.unwrapRemoteException(AccessControlException.class,
1554                                     FileNotFoundException.class);
1555    }
1556  }
1557
1558  /** Method to get stream returned by append call */
1559  private DFSOutputStream callAppend(HdfsFileStatus stat, String src,
1560      int buffersize, Progressable progress) throws IOException {
1561    LocatedBlock lastBlock = null;
1562    try {
1563      lastBlock = namenode.append(src, clientName);
1564    } catch(RemoteException re) {
1565      throw re.unwrapRemoteException(AccessControlException.class,
1566                                     FileNotFoundException.class,
1567                                     SafeModeException.class,
1568                                     DSQuotaExceededException.class,
1569                                     UnsupportedOperationException.class,
1570                                     UnresolvedPathException.class,
1571                                     SnapshotAccessControlException.class);
1572    }
1573    return DFSOutputStream.newStreamForAppend(this, src, buffersize, progress,
1574        lastBlock, stat, dfsClientConf.createChecksum());
1575  }
1576  
1577  /**
1578   * Append to an existing HDFS file.  
1579   * 
1580   * @param src file name
1581   * @param buffersize buffer size
1582   * @param progress for reporting write-progress; null is acceptable.
1583   * @param statistics file system statistics; null is acceptable.
1584   * @return an output stream for writing into the file
1585   * 
1586   * @see ClientProtocol#append(String, String) 
1587   */
1588  public HdfsDataOutputStream append(final String src, final int buffersize,
1589      final Progressable progress, final FileSystem.Statistics statistics
1590      ) throws IOException {
1591    final DFSOutputStream out = append(src, buffersize, progress);
1592    return new HdfsDataOutputStream(out, statistics, out.getInitialLen());
1593  }
1594
1595  private DFSOutputStream append(String src, int buffersize, Progressable progress) 
1596      throws IOException {
1597    checkOpen();
1598    HdfsFileStatus stat = getFileInfo(src);
1599    if (stat == null) { // No file found
1600      throw new FileNotFoundException("failed to append to non-existent file "
1601          + src + " on client " + clientName);
1602    }
1603    final DFSOutputStream result = callAppend(stat, src, buffersize, progress);
1604    beginFileLease(src, result);
1605    return result;
1606  }
1607
1608  /**
1609   * Set replication for an existing file.
1610   * @param src file name
1611   * @param replication
1612   * 
1613   * @see ClientProtocol#setReplication(String, short)
1614   */
1615  public boolean setReplication(String src, short replication)
1616      throws IOException {
1617    try {
1618      return namenode.setReplication(src, replication);
1619    } catch(RemoteException re) {
1620      throw re.unwrapRemoteException(AccessControlException.class,
1621                                     FileNotFoundException.class,
1622                                     SafeModeException.class,
1623                                     DSQuotaExceededException.class,
1624                                     UnresolvedPathException.class,
1625                                     SnapshotAccessControlException.class);
1626    }
1627  }
1628
1629  /**
1630   * Rename file or directory.
1631   * @see ClientProtocol#rename(String, String)
1632   * @deprecated Use {@link #rename(String, String, Options.Rename...)} instead.
1633   */
1634  @Deprecated
1635  public boolean rename(String src, String dst) throws IOException {
1636    checkOpen();
1637    try {
1638      return namenode.rename(src, dst);
1639    } catch(RemoteException re) {
1640      throw re.unwrapRemoteException(AccessControlException.class,
1641                                     NSQuotaExceededException.class,
1642                                     DSQuotaExceededException.class,
1643                                     UnresolvedPathException.class,
1644                                     SnapshotAccessControlException.class);
1645    }
1646  }
1647
1648  /**
1649   * Move blocks from src to trg and delete src
1650   * See {@link ClientProtocol#concat(String, String [])}. 
1651   */
1652  public void concat(String trg, String [] srcs) throws IOException {
1653    checkOpen();
1654    try {
1655      namenode.concat(trg, srcs);
1656    } catch(RemoteException re) {
1657      throw re.unwrapRemoteException(AccessControlException.class,
1658                                     UnresolvedPathException.class,
1659                                     SnapshotAccessControlException.class);
1660    }
1661  }
1662  /**
1663   * Rename file or directory.
1664   * @see ClientProtocol#rename2(String, String, Options.Rename...)
1665   */
1666  public void rename(String src, String dst, Options.Rename... options)
1667      throws IOException {
1668    checkOpen();
1669    try {
1670      namenode.rename2(src, dst, options);
1671    } catch(RemoteException re) {
1672      throw re.unwrapRemoteException(AccessControlException.class,
1673                                     DSQuotaExceededException.class,
1674                                     FileAlreadyExistsException.class,
1675                                     FileNotFoundException.class,
1676                                     ParentNotDirectoryException.class,
1677                                     SafeModeException.class,
1678                                     NSQuotaExceededException.class,
1679                                     UnresolvedPathException.class,
1680                                     SnapshotAccessControlException.class);
1681    }
1682  }
1683  /**
1684   * Delete file or directory.
1685   * See {@link ClientProtocol#delete(String, boolean)}. 
1686   */
1687  @Deprecated
1688  public boolean delete(String src) throws IOException {
1689    checkOpen();
1690    return namenode.delete(src, true);
1691  }
1692
1693  /**
1694   * delete file or directory.
1695   * delete contents of the directory if non empty and recursive 
1696   * set to true
1697   *
1698   * @see ClientProtocol#delete(String, boolean)
1699   */
1700  public boolean delete(String src, boolean recursive) throws IOException {
1701    checkOpen();
1702    try {
1703      return namenode.delete(src, recursive);
1704    } catch(RemoteException re) {
1705      throw re.unwrapRemoteException(AccessControlException.class,
1706                                     FileNotFoundException.class,
1707                                     SafeModeException.class,
1708                                     UnresolvedPathException.class,
1709                                     SnapshotAccessControlException.class);
1710    }
1711  }
1712  
1713  /** Implemented using getFileInfo(src)
1714   */
1715  public boolean exists(String src) throws IOException {
1716    checkOpen();
1717    return getFileInfo(src) != null;
1718  }
1719
1720  /**
1721   * Get a partial listing of the indicated directory
1722   * No block locations need to be fetched
1723   */
1724  public DirectoryListing listPaths(String src,  byte[] startAfter)
1725    throws IOException {
1726    return listPaths(src, startAfter, false);
1727  }
1728  
1729  /**
1730   * Get a partial listing of the indicated directory
1731   *
1732   * Recommend to use HdfsFileStatus.EMPTY_NAME as startAfter
1733   * if the application wants to fetch a listing starting from
1734   * the first entry in the directory
1735   *
1736   * @see ClientProtocol#getListing(String, byte[], boolean)
1737   */
1738  public DirectoryListing listPaths(String src,  byte[] startAfter,
1739      boolean needLocation) 
1740    throws IOException {
1741    checkOpen();
1742    try {
1743      return namenode.getListing(src, startAfter, needLocation);
1744    } catch(RemoteException re) {
1745      throw re.unwrapRemoteException(AccessControlException.class,
1746                                     FileNotFoundException.class,
1747                                     UnresolvedPathException.class);
1748    }
1749  }
1750
1751  /**
1752   * Get the file info for a specific file or directory.
1753   * @param src The string representation of the path to the file
1754   * @return object containing information regarding the file
1755   *         or null if file not found
1756   *         
1757   * @see ClientProtocol#getFileInfo(String) for description of exceptions
1758   */
1759  public HdfsFileStatus getFileInfo(String src) throws IOException {
1760    checkOpen();
1761    try {
1762      return namenode.getFileInfo(src);
1763    } catch(RemoteException re) {
1764      throw re.unwrapRemoteException(AccessControlException.class,
1765                                     FileNotFoundException.class,
1766                                     UnresolvedPathException.class);
1767    }
1768  }
1769  
1770  /**
1771   * Close status of a file
1772   * @return true if file is already closed
1773   */
1774  public boolean isFileClosed(String src) throws IOException{
1775    checkOpen();
1776    try {
1777      return namenode.isFileClosed(src);
1778    } catch(RemoteException re) {
1779      throw re.unwrapRemoteException(AccessControlException.class,
1780                                     FileNotFoundException.class,
1781                                     UnresolvedPathException.class);
1782    }
1783  }
1784  
1785  /**
1786   * Get the file info for a specific file or directory. If src
1787   * refers to a symlink then the FileStatus of the link is returned.
1788   * @param src path to a file or directory.
1789   * 
1790   * For description of exceptions thrown 
1791   * @see ClientProtocol#getFileLinkInfo(String)
1792   */
1793  public HdfsFileStatus getFileLinkInfo(String src) throws IOException {
1794    checkOpen();
1795    try {
1796      return namenode.getFileLinkInfo(src);
1797    } catch(RemoteException re) {
1798      throw re.unwrapRemoteException(AccessControlException.class,
1799                                     UnresolvedPathException.class);
1800     }
1801   }
1802
1803  /**
1804   * Get the checksum of a file.
1805   * @param src The file path
1806   * @return The checksum 
1807   * @see DistributedFileSystem#getFileChecksum(Path)
1808   */
1809  public MD5MD5CRC32FileChecksum getFileChecksum(String src) throws IOException {
1810    checkOpen();
1811    return getFileChecksum(src, clientName, namenode, socketFactory,
1812        dfsClientConf.socketTimeout, getDataEncryptionKey(),
1813        dfsClientConf.connectToDnViaHostname);
1814  }
1815  
1816  @InterfaceAudience.Private
1817  public void clearDataEncryptionKey() {
1818    LOG.debug("Clearing encryption key");
1819    synchronized (this) {
1820      encryptionKey = null;
1821    }
1822  }
1823  
1824  /**
1825   * @return true if data sent between this client and DNs should be encrypted,
1826   *         false otherwise.
1827   * @throws IOException in the event of error communicating with the NN
1828   */
1829  boolean shouldEncryptData() throws IOException {
1830    FsServerDefaults d = getServerDefaults();
1831    return d == null ? false : d.getEncryptDataTransfer();
1832  }
1833  
1834  @InterfaceAudience.Private
1835  public DataEncryptionKey getDataEncryptionKey()
1836      throws IOException {
1837    if (shouldEncryptData() && 
1838        !this.trustedChannelResolver.isTrusted()) {
1839      synchronized (this) {
1840        if (encryptionKey == null ||
1841            encryptionKey.expiryDate < Time.now()) {
1842          LOG.debug("Getting new encryption token from NN");
1843          encryptionKey = namenode.getDataEncryptionKey();
1844        }
1845        return encryptionKey;
1846      }
1847    } else {
1848      return null;
1849    }
1850  }
1851
1852  /**
1853   * Get the checksum of a file.
1854   * @param src The file path
1855   * @param clientName the name of the client requesting the checksum.
1856   * @param namenode the RPC proxy for the namenode
1857   * @param socketFactory to create sockets to connect to DNs
1858   * @param socketTimeout timeout to use when connecting and waiting for a response
1859   * @param encryptionKey the key needed to communicate with DNs in this cluster
1860   * @param connectToDnViaHostname whether the client should use hostnames instead of IPs
1861   * @return The checksum 
1862   */
1863  private static MD5MD5CRC32FileChecksum getFileChecksum(String src,
1864      String clientName,
1865      ClientProtocol namenode, SocketFactory socketFactory, int socketTimeout,
1866      DataEncryptionKey encryptionKey, boolean connectToDnViaHostname)
1867      throws IOException {
1868    //get all block locations
1869    LocatedBlocks blockLocations = callGetBlockLocations(namenode, src, 0, Long.MAX_VALUE);
1870    if (null == blockLocations) {
1871      throw new FileNotFoundException("File does not exist: " + src);
1872    }
1873    List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks();
1874    final DataOutputBuffer md5out = new DataOutputBuffer();
1875    int bytesPerCRC = -1;
1876    DataChecksum.Type crcType = DataChecksum.Type.DEFAULT;
1877    long crcPerBlock = 0;
1878    boolean refetchBlocks = false;
1879    int lastRetriedIndex = -1;
1880
1881    //get block checksum for each block
1882    for(int i = 0; i < locatedblocks.size(); i++) {
1883      if (refetchBlocks) {  // refetch to get fresh tokens
1884        blockLocations = callGetBlockLocations(namenode, src, 0, Long.MAX_VALUE);
1885        if (null == blockLocations) {
1886          throw new FileNotFoundException("File does not exist: " + src);
1887        }
1888        locatedblocks = blockLocations.getLocatedBlocks();
1889        refetchBlocks = false;
1890      }
1891      LocatedBlock lb = locatedblocks.get(i);
1892      final ExtendedBlock block = lb.getBlock();
1893      final DatanodeInfo[] datanodes = lb.getLocations();
1894      
1895      //try each datanode location of the block
1896      final int timeout = 3000 * datanodes.length + socketTimeout;
1897      boolean done = false;
1898      for(int j = 0; !done && j < datanodes.length; j++) {
1899        DataOutputStream out = null;
1900        DataInputStream in = null;
1901        
1902        try {
1903          //connect to a datanode
1904          IOStreamPair pair = connectToDN(socketFactory, connectToDnViaHostname,
1905              encryptionKey, datanodes[j], timeout);
1906          out = new DataOutputStream(new BufferedOutputStream(pair.out,
1907              HdfsConstants.SMALL_BUFFER_SIZE));
1908          in = new DataInputStream(pair.in);
1909
1910          if (LOG.isDebugEnabled()) {
1911            LOG.debug("write to " + datanodes[j] + ": "
1912                + Op.BLOCK_CHECKSUM + ", block=" + block);
1913          }
1914          // get block MD5
1915          new Sender(out).blockChecksum(block, lb.getBlockToken());
1916
1917          final BlockOpResponseProto reply =
1918            BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in));
1919
1920          if (reply.getStatus() != Status.SUCCESS) {
1921            if (reply.getStatus() == Status.ERROR_ACCESS_TOKEN) {
1922              throw new InvalidBlockTokenException();
1923            } else {
1924              throw new IOException("Bad response " + reply + " for block "
1925                  + block + " from datanode " + datanodes[j]);
1926            }
1927          }
1928          
1929          OpBlockChecksumResponseProto checksumData =
1930            reply.getChecksumResponse();
1931
1932          //read byte-per-checksum
1933          final int bpc = checksumData.getBytesPerCrc();
1934          if (i == 0) { //first block
1935            bytesPerCRC = bpc;
1936          }
1937          else if (bpc != bytesPerCRC) {
1938            throw new IOException("Byte-per-checksum not matched: bpc=" + bpc
1939                + " but bytesPerCRC=" + bytesPerCRC);
1940          }
1941          
1942          //read crc-per-block
1943          final long cpb = checksumData.getCrcPerBlock();
1944          if (locatedblocks.size() > 1 && i == 0) {
1945            crcPerBlock = cpb;
1946          }
1947
1948          //read md5
1949          final MD5Hash md5 = new MD5Hash(
1950              checksumData.getMd5().toByteArray());
1951          md5.write(md5out);
1952          
1953          // read crc-type
1954          final DataChecksum.Type ct;
1955          if (checksumData.hasCrcType()) {
1956            ct = PBHelper.convert(checksumData
1957                .getCrcType());
1958          } else {
1959            LOG.debug("Retrieving checksum from an earlier-version DataNode: " +
1960                      "inferring checksum by reading first byte");
1961            ct = inferChecksumTypeByReading(
1962                clientName, socketFactory, socketTimeout, lb, datanodes[j],
1963                encryptionKey, connectToDnViaHostname);
1964          }
1965
1966          if (i == 0) { // first block
1967            crcType = ct;
1968          } else if (crcType != DataChecksum.Type.MIXED
1969              && crcType != ct) {
1970            // if crc types are mixed in a file
1971            crcType = DataChecksum.Type.MIXED;
1972          }
1973
1974          done = true;
1975
1976          if (LOG.isDebugEnabled()) {
1977            if (i == 0) {
1978              LOG.debug("set bytesPerCRC=" + bytesPerCRC
1979                  + ", crcPerBlock=" + crcPerBlock);
1980            }
1981            LOG.debug("got reply from " + datanodes[j] + ": md5=" + md5);
1982          }
1983        } catch (InvalidBlockTokenException ibte) {
1984          if (i > lastRetriedIndex) {
1985            if (LOG.isDebugEnabled()) {
1986              LOG.debug("Got access token error in response to OP_BLOCK_CHECKSUM "
1987                  + "for file " + src + " for block " + block
1988                  + " from datanode " + datanodes[j]
1989                  + ". Will retry the block once.");
1990            }
1991            lastRetriedIndex = i;
1992            done = true; // actually it's not done; but we'll retry
1993            i--; // repeat at i-th block
1994            refetchBlocks = true;
1995            break;
1996          }
1997        } catch (IOException ie) {
1998          LOG.warn("src=" + src + ", datanodes["+j+"]=" + datanodes[j], ie);
1999        } finally {
2000          IOUtils.closeStream(in);
2001          IOUtils.closeStream(out);
2002        }
2003      }
2004
2005      if (!done) {
2006        throw new IOException("Fail to get block MD5 for " + block);
2007      }
2008    }
2009
2010    //compute file MD5
2011    final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData()); 
2012    switch (crcType) {
2013      case CRC32:
2014        return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC,
2015            crcPerBlock, fileMD5);
2016      case CRC32C:
2017        return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC,
2018            crcPerBlock, fileMD5);
2019      default:
2020        // If there is no block allocated for the file,
2021        // return one with the magic entry that matches what previous
2022        // hdfs versions return.
2023        if (locatedblocks.size() == 0) {
2024          return new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5);
2025        }
2026
2027        // we should never get here since the validity was checked
2028        // when getCrcType() was called above.
2029        return null;
2030    }
2031  }
2032
2033  /**
2034   * Connect to the given datanode's datantrasfer port, and return
2035   * the resulting IOStreamPair. This includes encryption wrapping, etc.
2036   */
2037  private static IOStreamPair connectToDN(
2038      SocketFactory socketFactory, boolean connectToDnViaHostname,
2039      DataEncryptionKey encryptionKey, DatanodeInfo dn, int timeout)
2040      throws IOException
2041  {
2042    boolean success = false;
2043    Socket sock = null;
2044    try {
2045      sock = socketFactory.createSocket();
2046      String dnAddr = dn.getXferAddr(connectToDnViaHostname);
2047      if (LOG.isDebugEnabled()) {
2048        LOG.debug("Connecting to datanode " + dnAddr);
2049      }
2050      NetUtils.connect(sock, NetUtils.createSocketAddr(dnAddr), timeout);
2051      sock.setSoTimeout(timeout);
2052  
2053      OutputStream unbufOut = NetUtils.getOutputStream(sock);
2054      InputStream unbufIn = NetUtils.getInputStream(sock);
2055      IOStreamPair ret;
2056      if (encryptionKey != null) {
2057        ret = DataTransferEncryptor.getEncryptedStreams(
2058                unbufOut, unbufIn, encryptionKey);
2059      } else {
2060        ret = new IOStreamPair(unbufIn, unbufOut);        
2061      }
2062      success = true;
2063      return ret;
2064    } finally {
2065      if (!success) {
2066        IOUtils.closeSocket(sock);
2067      }
2068    }
2069  }
2070  
2071  /**
2072   * Infer the checksum type for a replica by sending an OP_READ_BLOCK
2073   * for the first byte of that replica. This is used for compatibility
2074   * with older HDFS versions which did not include the checksum type in
2075   * OpBlockChecksumResponseProto.
2076   *
2077   * @param in input stream from datanode
2078   * @param out output stream to datanode
2079   * @param lb the located block
2080   * @param clientName the name of the DFSClient requesting the checksum
2081   * @param dn the connected datanode
2082   * @return the inferred checksum type
2083   * @throws IOException if an error occurs
2084   */
2085  private static Type inferChecksumTypeByReading(
2086      String clientName, SocketFactory socketFactory, int socketTimeout,
2087      LocatedBlock lb, DatanodeInfo dn,
2088      DataEncryptionKey encryptionKey, boolean connectToDnViaHostname)
2089      throws IOException {
2090    IOStreamPair pair = connectToDN(socketFactory, connectToDnViaHostname,
2091        encryptionKey, dn, socketTimeout);
2092
2093    try {
2094      DataOutputStream out = new DataOutputStream(new BufferedOutputStream(pair.out,
2095          HdfsConstants.SMALL_BUFFER_SIZE));
2096      DataInputStream in = new DataInputStream(pair.in);
2097  
2098      new Sender(out).readBlock(lb.getBlock(), lb.getBlockToken(), clientName,
2099          0, 1, true, CachingStrategy.newDefaultStrategy());
2100      final BlockOpResponseProto reply =
2101          BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in));
2102      
2103      if (reply.getStatus() != Status.SUCCESS) {
2104        if (reply.getStatus() == Status.ERROR_ACCESS_TOKEN) {
2105          throw new InvalidBlockTokenException();
2106        } else {
2107          throw new IOException("Bad response " + reply + " trying to read "
2108              + lb.getBlock() + " from datanode " + dn);
2109        }
2110      }
2111      
2112      return PBHelper.convert(reply.getReadOpChecksumInfo().getChecksum().getType());
2113    } finally {
2114      IOUtils.cleanup(null, pair.in, pair.out);
2115    }
2116  }
2117
2118  /**
2119   * Set permissions to a file or directory.
2120   * @param src path name.
2121   * @param permission
2122   * 
2123   * @see ClientProtocol#setPermission(String, FsPermission)
2124   */
2125  public void setPermission(String src, FsPermission permission)
2126      throws IOException {
2127    checkOpen();
2128    try {
2129      namenode.setPermission(src, permission);
2130    } catch(RemoteException re) {
2131      throw re.unwrapRemoteException(AccessControlException.class,
2132                                     FileNotFoundException.class,
2133                                     SafeModeException.class,
2134                                     UnresolvedPathException.class,
2135                                     SnapshotAccessControlException.class);
2136    }
2137  }
2138
2139  /**
2140   * Set file or directory owner.
2141   * @param src path name.
2142   * @param username user id.
2143   * @param groupname user group.
2144   * 
2145   * @see ClientProtocol#setOwner(String, String, String)
2146   */
2147  public void setOwner(String src, String username, String groupname)
2148      throws IOException {
2149    checkOpen();
2150    try {
2151      namenode.setOwner(src, username, groupname);
2152    } catch(RemoteException re) {
2153      throw re.unwrapRemoteException(AccessControlException.class,
2154                                     FileNotFoundException.class,
2155                                     SafeModeException.class,
2156                                     UnresolvedPathException.class,
2157                                     SnapshotAccessControlException.class);                                   
2158    }
2159  }
2160
2161  /**
2162   * @see ClientProtocol#getStats()
2163   */
2164  public FsStatus getDiskStatus() throws IOException {
2165    long rawNums[] = namenode.getStats();
2166    return new FsStatus(rawNums[0], rawNums[1], rawNums[2]);
2167  }
2168
2169  /**
2170   * Returns count of blocks with no good replicas left. Normally should be 
2171   * zero.
2172   * @throws IOException
2173   */ 
2174  public long getMissingBlocksCount() throws IOException {
2175    return namenode.getStats()[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX];
2176  }
2177  
2178  /**
2179   * Returns count of blocks with one of more replica missing.
2180   * @throws IOException
2181   */ 
2182  public long getUnderReplicatedBlocksCount() throws IOException {
2183    return namenode.getStats()[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX];
2184  }
2185  
2186  /**
2187   * Returns count of blocks with at least one replica marked corrupt. 
2188   * @throws IOException
2189   */ 
2190  public long getCorruptBlocksCount() throws IOException {
2191    return namenode.getStats()[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX];
2192  }
2193  
2194  /**
2195   * @return a list in which each entry describes a corrupt file/block
2196   * @throws IOException
2197   */
2198  public CorruptFileBlocks listCorruptFileBlocks(String path,
2199                                                 String cookie)
2200    throws IOException {
2201    return namenode.listCorruptFileBlocks(path, cookie);
2202  }
2203
2204  public DatanodeInfo[] datanodeReport(DatanodeReportType type)
2205  throws IOException {
2206    return namenode.getDatanodeReport(type);
2207  }
2208    
2209  /**
2210   * Enter, leave or get safe mode.
2211   * 
2212   * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction,boolean)
2213   */
2214  public boolean setSafeMode(SafeModeAction action) throws IOException {
2215    return setSafeMode(action, false);
2216  }
2217  
2218  /**
2219   * Enter, leave or get safe mode.
2220   * 
2221   * @param action
2222   *          One of SafeModeAction.GET, SafeModeAction.ENTER and
2223   *          SafeModeActiob.LEAVE
2224   * @param isChecked
2225   *          If true, then check only active namenode's safemode status, else
2226   *          check first namenode's status.
2227   * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, boolean)
2228   */
2229  public boolean setSafeMode(SafeModeAction action, boolean isChecked) throws IOException{
2230    return namenode.setSafeMode(action, isChecked);    
2231  }
2232 
2233  /**
2234   * Create one snapshot.
2235   * 
2236   * @param snapshotRoot The directory where the snapshot is to be taken
2237   * @param snapshotName Name of the snapshot
2238   * @return the snapshot path.
2239   * @see ClientProtocol#createSnapshot(String, String)
2240   */
2241  public String createSnapshot(String snapshotRoot, String snapshotName)
2242      throws IOException {
2243    checkOpen();
2244    try {
2245      return namenode.createSnapshot(snapshotRoot, snapshotName);
2246    } catch(RemoteException re) {
2247      throw re.unwrapRemoteException();
2248    }
2249  }
2250  
2251  /**
2252   * Delete a snapshot of a snapshottable directory.
2253   * 
2254   * @param snapshotRoot The snapshottable directory that the 
2255   *                    to-be-deleted snapshot belongs to
2256   * @param snapshotName The name of the to-be-deleted snapshot
2257   * @throws IOException
2258   * @see ClientProtocol#deleteSnapshot(String, String)
2259   */
2260  public void deleteSnapshot(String snapshotRoot, String snapshotName)
2261      throws IOException {
2262    try {
2263      namenode.deleteSnapshot(snapshotRoot, snapshotName);
2264    } catch(RemoteException re) {
2265      throw re.unwrapRemoteException();
2266    }
2267  }
2268  
2269  /**
2270   * Rename a snapshot.
2271   * @param snapshotDir The directory path where the snapshot was taken
2272   * @param snapshotOldName Old name of the snapshot
2273   * @param snapshotNewName New name of the snapshot
2274   * @throws IOException
2275   * @see ClientProtocol#renameSnapshot(String, String, String)
2276   */
2277  public void renameSnapshot(String snapshotDir, String snapshotOldName,
2278      String snapshotNewName) throws IOException {
2279    checkOpen();
2280    try {
2281      namenode.renameSnapshot(snapshotDir, snapshotOldName, snapshotNewName);
2282    } catch(RemoteException re) {
2283      throw re.unwrapRemoteException();
2284    }
2285  }
2286  
2287  /**
2288   * Get all the current snapshottable directories.
2289   * @return All the current snapshottable directories
2290   * @throws IOException
2291   * @see ClientProtocol#getSnapshottableDirListing()
2292   */
2293  public SnapshottableDirectoryStatus[] getSnapshottableDirListing()
2294      throws IOException {
2295    checkOpen();
2296    try {
2297      return namenode.getSnapshottableDirListing();
2298    } catch(RemoteException re) {
2299      throw re.unwrapRemoteException();
2300    }
2301  }
2302
2303  /**
2304   * Allow snapshot on a directory.
2305   * 
2306   * @see ClientProtocol#allowSnapshot(String snapshotRoot)
2307   */
2308  public void allowSnapshot(String snapshotRoot) throws IOException {
2309    checkOpen();
2310    try {
2311      namenode.allowSnapshot(snapshotRoot);
2312    } catch (RemoteException re) {
2313      throw re.unwrapRemoteException();
2314    }
2315  }
2316  
2317  /**
2318   * Disallow snapshot on a directory.
2319   * 
2320   * @see ClientProtocol#disallowSnapshot(String snapshotRoot)
2321   */
2322  public void disallowSnapshot(String snapshotRoot) throws IOException {
2323    checkOpen();
2324    try {
2325      namenode.disallowSnapshot(snapshotRoot);
2326    } catch (RemoteException re) {
2327      throw re.unwrapRemoteException();
2328    }
2329  }
2330  
2331  /**
2332   * Get the difference between two snapshots, or between a snapshot and the
2333   * current tree of a directory.
2334   * @see ClientProtocol#getSnapshotDiffReport(String, String, String)
2335   */
2336  public SnapshotDiffReport getSnapshotDiffReport(String snapshotDir,
2337      String fromSnapshot, String toSnapshot) throws IOException {
2338    checkOpen();
2339    try {
2340      return namenode.getSnapshotDiffReport(snapshotDir,
2341          fromSnapshot, toSnapshot);
2342    } catch(RemoteException re) {
2343      throw re.unwrapRemoteException();
2344    }
2345  }
2346
2347  public long addCacheDirective(
2348      CacheDirectiveInfo info, EnumSet<CacheFlag> flags) throws IOException {
2349    checkOpen();
2350    try {
2351      return namenode.addCacheDirective(info, flags);
2352    } catch (RemoteException re) {
2353      throw re.unwrapRemoteException();
2354    }
2355  }
2356  
2357  public void modifyCacheDirective(
2358      CacheDirectiveInfo info, EnumSet<CacheFlag> flags) throws IOException {
2359    checkOpen();
2360    try {
2361      namenode.modifyCacheDirective(info, flags);
2362    } catch (RemoteException re) {
2363      throw re.unwrapRemoteException();
2364    }
2365  }
2366
2367  public void removeCacheDirective(long id)
2368      throws IOException {
2369    checkOpen();
2370    try {
2371      namenode.removeCacheDirective(id);
2372    } catch (RemoteException re) {
2373      throw re.unwrapRemoteException();
2374    }
2375  }
2376  
2377  public RemoteIterator<CacheDirectiveEntry> listCacheDirectives(
2378      CacheDirectiveInfo filter) throws IOException {
2379    return new CacheDirectiveIterator(namenode, filter);
2380  }
2381
2382  public void addCachePool(CachePoolInfo info) throws IOException {
2383    checkOpen();
2384    try {
2385      namenode.addCachePool(info);
2386    } catch (RemoteException re) {
2387      throw re.unwrapRemoteException();
2388    }
2389  }
2390
2391  public void modifyCachePool(CachePoolInfo info) throws IOException {
2392    checkOpen();
2393    try {
2394      namenode.modifyCachePool(info);
2395    } catch (RemoteException re) {
2396      throw re.unwrapRemoteException();
2397    }
2398  }
2399
2400  public void removeCachePool(String poolName) throws IOException {
2401    checkOpen();
2402    try {
2403      namenode.removeCachePool(poolName);
2404    } catch (RemoteException re) {
2405      throw re.unwrapRemoteException();
2406    }
2407  }
2408
2409  public RemoteIterator<CachePoolEntry> listCachePools() throws IOException {
2410    return new CachePoolIterator(namenode);
2411  }
2412
2413  /**
2414   * Save namespace image.
2415   * 
2416   * @see ClientProtocol#saveNamespace()
2417   */
2418  void saveNamespace() throws AccessControlException, IOException {
2419    try {
2420      namenode.saveNamespace();
2421    } catch(RemoteException re) {
2422      throw re.unwrapRemoteException(AccessControlException.class);
2423    }
2424  }
2425
2426  /**
2427   * Rolls the edit log on the active NameNode.
2428   * @return the txid of the new log segment 
2429   *
2430   * @see ClientProtocol#rollEdits()
2431   */
2432  long rollEdits() throws AccessControlException, IOException {
2433    try {
2434      return namenode.rollEdits();
2435    } catch(RemoteException re) {
2436      throw re.unwrapRemoteException(AccessControlException.class);
2437    }
2438  }
2439
2440  @VisibleForTesting
2441  ExtendedBlock getPreviousBlock(String file) {
2442    return filesBeingWritten.get(file).getBlock();
2443  }
2444  
2445  /**
2446   * enable/disable restore failed storage.
2447   * 
2448   * @see ClientProtocol#restoreFailedStorage(String arg)
2449   */
2450  boolean restoreFailedStorage(String arg)
2451      throws AccessControlException, IOException{
2452    return namenode.restoreFailedStorage(arg);
2453  }
2454
2455  /**
2456   * Refresh the hosts and exclude files.  (Rereads them.)
2457   * See {@link ClientProtocol#refreshNodes()} 
2458   * for more details.
2459   * 
2460   * @see ClientProtocol#refreshNodes()
2461   */
2462  public void refreshNodes() throws IOException {
2463    namenode.refreshNodes();
2464  }
2465
2466  /**
2467   * Dumps DFS data structures into specified file.
2468   * 
2469   * @see ClientProtocol#metaSave(String)
2470   */
2471  public void metaSave(String pathname) throws IOException {
2472    namenode.metaSave(pathname);
2473  }
2474
2475  /**
2476   * Requests the namenode to tell all datanodes to use a new, non-persistent
2477   * bandwidth value for dfs.balance.bandwidthPerSec.
2478   * See {@link ClientProtocol#setBalancerBandwidth(long)} 
2479   * for more details.
2480   * 
2481   * @see ClientProtocol#setBalancerBandwidth(long)
2482   */
2483  public void setBalancerBandwidth(long bandwidth) throws IOException {
2484    namenode.setBalancerBandwidth(bandwidth);
2485  }
2486    
2487  /**
2488   * @see ClientProtocol#finalizeUpgrade()
2489   */
2490  public void finalizeUpgrade() throws IOException {
2491    namenode.finalizeUpgrade();
2492  }
2493
2494  RollingUpgradeInfo rollingUpgrade(RollingUpgradeAction action) throws IOException {
2495    return namenode.rollingUpgrade(action);
2496  }
2497
2498  /**
2499   */
2500  @Deprecated
2501  public boolean mkdirs(String src) throws IOException {
2502    return mkdirs(src, null, true);
2503  }
2504
2505  /**
2506   * Create a directory (or hierarchy of directories) with the given
2507   * name and permission.
2508   *
2509   * @param src The path of the directory being created
2510   * @param permission The permission of the directory being created.
2511   * If permission == null, use {@link FsPermission#getDefault()}.
2512   * @param createParent create missing parent directory if true
2513   * 
2514   * @return True if the operation success.
2515   * 
2516   * @see ClientProtocol#mkdirs(String, FsPermission, boolean)
2517   */
2518  public boolean mkdirs(String src, FsPermission permission,
2519      boolean createParent) throws IOException {
2520    if (permission == null) {
2521      permission = FsPermission.getDefault();
2522    }
2523    FsPermission masked = permission.applyUMask(dfsClientConf.uMask);
2524    return primitiveMkdir(src, masked, createParent);
2525  }
2526
2527  /**
2528   * Same {{@link #mkdirs(String, FsPermission, boolean)} except
2529   * that the permissions has already been masked against umask.
2530   */
2531  public boolean primitiveMkdir(String src, FsPermission absPermission)
2532    throws IOException {
2533    return primitiveMkdir(src, absPermission, true);
2534  }
2535
2536  /**
2537   * Same {{@link #mkdirs(String, FsPermission, boolean)} except
2538   * that the permissions has already been masked against umask.
2539   */
2540  public boolean primitiveMkdir(String src, FsPermission absPermission, 
2541    boolean createParent)
2542    throws IOException {
2543    checkOpen();
2544    if (absPermission == null) {
2545      absPermission = 
2546        FsPermission.getDefault().applyUMask(dfsClientConf.uMask);
2547    } 
2548
2549    if(LOG.isDebugEnabled()) {
2550      LOG.debug(src + ": masked=" + absPermission);
2551    }
2552    try {
2553      return namenode.mkdirs(src, absPermission, createParent);
2554    } catch(RemoteException re) {
2555      throw re.unwrapRemoteException(AccessControlException.class,
2556                                     InvalidPathException.class,
2557                                     FileAlreadyExistsException.class,
2558                                     FileNotFoundException.class,
2559                                     ParentNotDirectoryException.class,
2560                                     SafeModeException.class,
2561                                     NSQuotaExceededException.class,
2562                                     DSQuotaExceededException.class,
2563                                     UnresolvedPathException.class,
2564                                     SnapshotAccessControlException.class);
2565    }
2566  }
2567  
2568  /**
2569   * Get {@link ContentSummary} rooted at the specified directory.
2570   * @param path The string representation of the path
2571   * 
2572   * @see ClientProtocol#getContentSummary(String)
2573   */
2574  ContentSummary getContentSummary(String src) throws IOException {
2575    try {
2576      return namenode.getContentSummary(src);
2577    } catch(RemoteException re) {
2578      throw re.unwrapRemoteException(AccessControlException.class,
2579                                     FileNotFoundException.class,
2580                                     UnresolvedPathException.class);
2581    }
2582  }
2583
2584  /**
2585   * Sets or resets quotas for a directory.
2586   * @see ClientProtocol#setQuota(String, long, long)
2587   */
2588  void setQuota(String src, long namespaceQuota, long diskspaceQuota) 
2589      throws IOException {
2590    // sanity check
2591    if ((namespaceQuota <= 0 && namespaceQuota != HdfsConstants.QUOTA_DONT_SET &&
2592         namespaceQuota != HdfsConstants.QUOTA_RESET) ||
2593        (diskspaceQuota <= 0 && diskspaceQuota != HdfsConstants.QUOTA_DONT_SET &&
2594         diskspaceQuota != HdfsConstants.QUOTA_RESET)) {
2595      throw new IllegalArgumentException("Invalid values for quota : " +
2596                                         namespaceQuota + " and " + 
2597                                         diskspaceQuota);
2598                                         
2599    }
2600    try {
2601      namenode.setQuota(src, namespaceQuota, diskspaceQuota);
2602    } catch(RemoteException re) {
2603      throw re.unwrapRemoteException(AccessControlException.class,
2604                                     FileNotFoundException.class,
2605                                     NSQuotaExceededException.class,
2606                                     DSQuotaExceededException.class,
2607                                     UnresolvedPathException.class,
2608                                     SnapshotAccessControlException.class);
2609    }
2610  }
2611
2612  /**
2613   * set the modification and access time of a file
2614   * 
2615   * @see ClientProtocol#setTimes(String, long, long)
2616   */
2617  public void setTimes(String src, long mtime, long atime) throws IOException {
2618    checkOpen();
2619    try {
2620      namenode.setTimes(src, mtime, atime);
2621    } catch(RemoteException re) {
2622      throw re.unwrapRemoteException(AccessControlException.class,
2623                                     FileNotFoundException.class,
2624                                     UnresolvedPathException.class,
2625                                     SnapshotAccessControlException.class);
2626    }
2627  }
2628
2629  /**
2630   * @deprecated use {@link HdfsDataInputStream} instead.
2631   */
2632  @Deprecated
2633  public static class DFSDataInputStream extends HdfsDataInputStream {
2634
2635    public DFSDataInputStream(DFSInputStream in) throws IOException {
2636      super(in);
2637    }
2638  }
2639
2640  void reportChecksumFailure(String file, ExtendedBlock blk, DatanodeInfo dn) {
2641    DatanodeInfo [] dnArr = { dn };
2642    LocatedBlock [] lblocks = { new LocatedBlock(blk, dnArr) };
2643    reportChecksumFailure(file, lblocks);
2644  }
2645    
2646  // just reports checksum failure and ignores any exception during the report.
2647  void reportChecksumFailure(String file, LocatedBlock lblocks[]) {
2648    try {
2649      reportBadBlocks(lblocks);
2650    } catch (IOException ie) {
2651      LOG.info("Found corruption while reading " + file
2652          + ". Error repairing corrupt blocks. Bad blocks remain.", ie);
2653    }
2654  }
2655
2656  @Override
2657  public String toString() {
2658    return getClass().getSimpleName() + "[clientName=" + clientName
2659        + ", ugi=" + ugi + "]"; 
2660  }
2661
2662  public CachingStrategy getDefaultReadCachingStrategy() {
2663    return defaultReadCachingStrategy;
2664  }
2665
2666  public CachingStrategy getDefaultWriteCachingStrategy() {
2667    return defaultWriteCachingStrategy;
2668  }
2669
2670  public ClientContext getClientContext() {
2671    return clientContext;
2672  }
2673
2674  public void modifyAclEntries(String src, List<AclEntry> aclSpec)
2675      throws IOException {
2676    checkOpen();
2677    try {
2678      namenode.modifyAclEntries(src, aclSpec);
2679    } catch(RemoteException re) {
2680      throw re.unwrapRemoteException(AccessControlException.class,
2681                                     AclException.class,
2682                                     FileNotFoundException.class,
2683                                     NSQuotaExceededException.class,
2684                                     SafeModeException.class,
2685                                     SnapshotAccessControlException.class,
2686                                     UnresolvedPathException.class);
2687    }
2688  }
2689
2690  public void removeAclEntries(String src, List<AclEntry> aclSpec)
2691      throws IOException {
2692    checkOpen();
2693    try {
2694      namenode.removeAclEntries(src, aclSpec);
2695    } catch(RemoteException re) {
2696      throw re.unwrapRemoteException(AccessControlException.class,
2697                                     AclException.class,
2698                                     FileNotFoundException.class,
2699                                     NSQuotaExceededException.class,
2700                                     SafeModeException.class,
2701                                     SnapshotAccessControlException.class,
2702                                     UnresolvedPathException.class);
2703    }
2704  }
2705
2706  public void removeDefaultAcl(String src) throws IOException {
2707    checkOpen();
2708    try {
2709      namenode.removeDefaultAcl(src);
2710    } catch(RemoteException re) {
2711      throw re.unwrapRemoteException(AccessControlException.class,
2712                                     AclException.class,
2713                                     FileNotFoundException.class,
2714                                     NSQuotaExceededException.class,
2715                                     SafeModeException.class,
2716                                     SnapshotAccessControlException.class,
2717                                     UnresolvedPathException.class);
2718    }
2719  }
2720
2721  public void removeAcl(String src) throws IOException {
2722    checkOpen();
2723    try {
2724      namenode.removeAcl(src);
2725    } catch(RemoteException re) {
2726      throw re.unwrapRemoteException(AccessControlException.class,
2727                                     AclException.class,
2728                                     FileNotFoundException.class,
2729                                     NSQuotaExceededException.class,
2730                                     SafeModeException.class,
2731                                     SnapshotAccessControlException.class,
2732                                     UnresolvedPathException.class);
2733    }
2734  }
2735
2736  public void setAcl(String src, List<AclEntry> aclSpec) throws IOException {
2737    checkOpen();
2738    try {
2739      namenode.setAcl(src, aclSpec);
2740    } catch(RemoteException re) {
2741      throw re.unwrapRemoteException(AccessControlException.class,
2742                                     AclException.class,
2743                                     FileNotFoundException.class,
2744                                     NSQuotaExceededException.class,
2745                                     SafeModeException.class,
2746                                     SnapshotAccessControlException.class,
2747                                     UnresolvedPathException.class);
2748    }
2749  }
2750
2751  public AclStatus getAclStatus(String src) throws IOException {
2752    checkOpen();
2753    try {
2754      return namenode.getAclStatus(src);
2755    } catch(RemoteException re) {
2756      throw re.unwrapRemoteException(AccessControlException.class,
2757                                     AclException.class,
2758                                     FileNotFoundException.class,
2759                                     UnresolvedPathException.class);
2760    }
2761  }
2762
2763  @Override // RemotePeerFactory
2764  public Peer newConnectedPeer(InetSocketAddress addr) throws IOException {
2765    Peer peer = null;
2766    boolean success = false;
2767    Socket sock = null;
2768    try {
2769      sock = socketFactory.createSocket();
2770      NetUtils.connect(sock, addr,
2771        getRandomLocalInterfaceAddr(),
2772        dfsClientConf.socketTimeout);
2773      peer = TcpPeerServer.peerFromSocketAndKey(sock, 
2774          getDataEncryptionKey());
2775      success = true;
2776      return peer;
2777    } finally {
2778      if (!success) {
2779        IOUtils.cleanup(LOG, peer);
2780        IOUtils.closeSocket(sock);
2781      }
2782    }
2783  }
2784
2785  /**
2786   * Create hedged reads thread pool, HEDGED_READ_THREAD_POOL, if
2787   * it does not already exist.
2788   * @param num Number of threads for hedged reads thread pool.
2789   * If zero, skip hedged reads thread pool creation.
2790   */
2791  private synchronized void initThreadsNumForHedgedReads(int num) {
2792    if (num <= 0 || HEDGED_READ_THREAD_POOL != null) return;
2793    HEDGED_READ_THREAD_POOL = new ThreadPoolExecutor(1, num, 60,
2794        TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
2795        new Daemon.DaemonFactory() {
2796          private final AtomicInteger threadIndex =
2797            new AtomicInteger(0); 
2798          @Override
2799          public Thread newThread(Runnable r) {
2800            Thread t = super.newThread(r);
2801            t.setName("hedgedRead-" +
2802              threadIndex.getAndIncrement());
2803            return t;
2804          }
2805        },
2806        new ThreadPoolExecutor.CallerRunsPolicy() {
2807
2808      @Override
2809      public void rejectedExecution(Runnable runnable,
2810          ThreadPoolExecutor e) {
2811        LOG.info("Execution rejected, Executing in current thread");
2812        HEDGED_READ_METRIC.incHedgedReadOpsInCurThread();
2813        // will run in the current thread
2814        super.rejectedExecution(runnable, e);
2815      }
2816    });
2817    HEDGED_READ_THREAD_POOL.allowCoreThreadTimeOut(true);
2818    if (LOG.isDebugEnabled()) {
2819      LOG.debug("Using hedged reads; pool threads=" + num);
2820    }
2821  }
2822
2823  long getHedgedReadTimeout() {
2824    return this.hedgedReadThresholdMillis;
2825  }
2826
2827  @VisibleForTesting
2828  void setHedgedReadTimeout(long timeoutMillis) {
2829    this.hedgedReadThresholdMillis = timeoutMillis;
2830  }
2831
2832  ThreadPoolExecutor getHedgedReadsThreadPool() {
2833    return HEDGED_READ_THREAD_POOL;
2834  }
2835
2836  boolean isHedgedReadsEnabled() {
2837    return (HEDGED_READ_THREAD_POOL != null) &&
2838      HEDGED_READ_THREAD_POOL.getMaximumPoolSize() > 0;
2839  }
2840
2841  DFSHedgedReadMetrics getHedgedReadMetrics() {
2842    return HEDGED_READ_METRIC;
2843  }
2844}