001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.datanode.fsdataset; 019 020 021import java.io.File; 022import java.io.FileDescriptor; 023import java.io.IOException; 024import java.io.InputStream; 025import java.util.List; 026import java.util.Map; 027 028import org.apache.hadoop.classification.InterfaceAudience; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.hdfs.DFSConfigKeys; 031import org.apache.hadoop.hdfs.protocol.Block; 032import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; 033import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo; 034import org.apache.hadoop.hdfs.protocol.ExtendedBlock; 035import org.apache.hadoop.hdfs.protocol.HdfsBlocksMetadata; 036import org.apache.hadoop.hdfs.server.datanode.DataNode; 037import org.apache.hadoop.hdfs.server.datanode.DataStorage; 038import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica; 039import org.apache.hadoop.hdfs.server.datanode.Replica; 040import org.apache.hadoop.hdfs.server.datanode.ReplicaInPipelineInterface; 041import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetFactory; 042import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean; 043import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock; 044import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; 045import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo; 046import org.apache.hadoop.hdfs.server.protocol.StorageReport; 047import org.apache.hadoop.util.DiskChecker.DiskErrorException; 048import org.apache.hadoop.util.ReflectionUtils; 049 050/** 051 * This is a service provider interface for the underlying storage that 052 * stores replicas for a data node. 053 * The default implementation stores replicas on local drives. 054 */ 055@InterfaceAudience.Private 056public interface FsDatasetSpi<V extends FsVolumeSpi> extends FSDatasetMBean { 057 /** 058 * A factory for creating {@link FsDatasetSpi} objects. 059 */ 060 public static abstract class Factory<D extends FsDatasetSpi<?>> { 061 /** @return the configured factory. */ 062 public static Factory<?> getFactory(Configuration conf) { 063 @SuppressWarnings("rawtypes") 064 final Class<? extends Factory> clazz = conf.getClass( 065 DFSConfigKeys.DFS_DATANODE_FSDATASET_FACTORY_KEY, 066 FsDatasetFactory.class, 067 Factory.class); 068 return ReflectionUtils.newInstance(clazz, conf); 069 } 070 071 /** Create a new object. */ 072 public abstract D newInstance(DataNode datanode, DataStorage storage, 073 Configuration conf) throws IOException; 074 075 /** Does the factory create simulated objects? */ 076 public boolean isSimulated() { 077 return false; 078 } 079 } 080 081 /** 082 * Create rolling logs. 083 * 084 * @param prefix the prefix of the log names. 085 * @return rolling logs 086 */ 087 public RollingLogs createRollingLogs(String bpid, String prefix 088 ) throws IOException; 089 090 /** @return a list of volumes. */ 091 public List<V> getVolumes(); 092 093 /** @return a storage with the given storage ID */ 094 public DatanodeStorage getStorage(final String storageUuid); 095 096 /** @return one or more storage reports for attached volumes. */ 097 public StorageReport[] getStorageReports(String bpid) 098 throws IOException; 099 100 /** @return the volume that contains a replica of the block. */ 101 public V getVolume(ExtendedBlock b); 102 103 /** @return a volume information map (name => info). */ 104 public Map<String, Object> getVolumeInfoMap(); 105 106 /** @return a list of finalized blocks for the given block pool. */ 107 public List<FinalizedReplica> getFinalizedBlocks(String bpid); 108 109 /** 110 * Check whether the in-memory block record matches the block on the disk, 111 * and, in case that they are not matched, update the record or mark it 112 * as corrupted. 113 */ 114 public void checkAndUpdate(String bpid, long blockId, File diskFile, 115 File diskMetaFile, FsVolumeSpi vol); 116 117 /** 118 * @param b - the block 119 * @return a stream if the meta-data of the block exists; 120 * otherwise, return null. 121 * @throws IOException 122 */ 123 public LengthInputStream getMetaDataInputStream(ExtendedBlock b 124 ) throws IOException; 125 126 /** 127 * Returns the specified block's on-disk length (excluding metadata) 128 * @return the specified block's on-disk length (excluding metadta) 129 * @throws IOException on error 130 */ 131 public long getLength(ExtendedBlock b) throws IOException; 132 133 /** 134 * Get reference to the replica meta info in the replicasMap. 135 * To be called from methods that are synchronized on {@link FSDataset} 136 * @return replica from the replicas map 137 */ 138 @Deprecated 139 public Replica getReplica(String bpid, long blockId); 140 141 /** 142 * @return replica meta information 143 */ 144 public String getReplicaString(String bpid, long blockId); 145 146 /** 147 * @return the generation stamp stored with the block. 148 */ 149 public Block getStoredBlock(String bpid, long blkid) throws IOException; 150 151 /** 152 * Returns an input stream at specified offset of the specified block 153 * @param b block 154 * @param seekOffset offset with in the block to seek to 155 * @return an input stream to read the contents of the specified block, 156 * starting at the offset 157 * @throws IOException 158 */ 159 public InputStream getBlockInputStream(ExtendedBlock b, long seekOffset) 160 throws IOException; 161 162 /** 163 * Returns an input stream at specified offset of the specified block 164 * The block is still in the tmp directory and is not finalized 165 * @return an input stream to read the contents of the specified block, 166 * starting at the offset 167 * @throws IOException 168 */ 169 public ReplicaInputStreams getTmpInputStreams(ExtendedBlock b, long blkoff, 170 long ckoff) throws IOException; 171 172 /** 173 * Creates a temporary replica and returns the meta information of the replica 174 * 175 * @param b block 176 * @return the meta info of the replica which is being written to 177 * @throws IOException if an error occurs 178 */ 179 public ReplicaInPipelineInterface createTemporary(ExtendedBlock b 180 ) throws IOException; 181 182 /** 183 * Creates a RBW replica and returns the meta info of the replica 184 * 185 * @param b block 186 * @return the meta info of the replica which is being written to 187 * @throws IOException if an error occurs 188 */ 189 public ReplicaInPipelineInterface createRbw(ExtendedBlock b 190 ) throws IOException; 191 192 /** 193 * Recovers a RBW replica and returns the meta info of the replica 194 * 195 * @param b block 196 * @param newGS the new generation stamp for the replica 197 * @param minBytesRcvd the minimum number of bytes that the replica could have 198 * @param maxBytesRcvd the maximum number of bytes that the replica could have 199 * @return the meta info of the replica which is being written to 200 * @throws IOException if an error occurs 201 */ 202 public ReplicaInPipelineInterface recoverRbw(ExtendedBlock b, 203 long newGS, long minBytesRcvd, long maxBytesRcvd) throws IOException; 204 205 /** 206 * Covert a temporary replica to a RBW. 207 * @param temporary the temporary replica being converted 208 * @return the result RBW 209 */ 210 public ReplicaInPipelineInterface convertTemporaryToRbw( 211 ExtendedBlock temporary) throws IOException; 212 213 /** 214 * Append to a finalized replica and returns the meta info of the replica 215 * 216 * @param b block 217 * @param newGS the new generation stamp for the replica 218 * @param expectedBlockLen the number of bytes the replica is expected to have 219 * @return the meata info of the replica which is being written to 220 * @throws IOException 221 */ 222 public ReplicaInPipelineInterface append(ExtendedBlock b, long newGS, 223 long expectedBlockLen) throws IOException; 224 225 /** 226 * Recover a failed append to a finalized replica 227 * and returns the meta info of the replica 228 * 229 * @param b block 230 * @param newGS the new generation stamp for the replica 231 * @param expectedBlockLen the number of bytes the replica is expected to have 232 * @return the meta info of the replica which is being written to 233 * @throws IOException 234 */ 235 public ReplicaInPipelineInterface recoverAppend(ExtendedBlock b, long newGS, 236 long expectedBlockLen) throws IOException; 237 238 /** 239 * Recover a failed pipeline close 240 * It bumps the replica's generation stamp and finalize it if RBW replica 241 * 242 * @param b block 243 * @param newGS the new generation stamp for the replica 244 * @param expectedBlockLen the number of bytes the replica is expected to have 245 * @return the storage uuid of the replica. 246 * @throws IOException 247 */ 248 public String recoverClose(ExtendedBlock b, long newGS, long expectedBlockLen 249 ) throws IOException; 250 251 /** 252 * Finalizes the block previously opened for writing using writeToBlock. 253 * The block size is what is in the parameter b and it must match the amount 254 * of data written 255 * @throws IOException 256 */ 257 public void finalizeBlock(ExtendedBlock b) throws IOException; 258 259 /** 260 * Unfinalizes the block previously opened for writing using writeToBlock. 261 * The temporary file associated with this block is deleted. 262 * @throws IOException 263 */ 264 public void unfinalizeBlock(ExtendedBlock b) throws IOException; 265 266 /** 267 * Returns one block report per volume. 268 * @param bpid Block Pool Id 269 * @return - a map of DatanodeStorage to block report for the volume. 270 */ 271 public Map<DatanodeStorage, BlockListAsLongs> getBlockReports(String bpid); 272 273 /** 274 * Returns the cache report - the full list of cached block IDs of a 275 * block pool. 276 * @param bpid Block Pool Id 277 * @return the cache report - the full list of cached block IDs. 278 */ 279 public List<Long> getCacheReport(String bpid); 280 281 /** Does the dataset contain the block? */ 282 public boolean contains(ExtendedBlock block); 283 284 /** 285 * Is the block valid? 286 * @return - true if the specified block is valid 287 */ 288 public boolean isValidBlock(ExtendedBlock b); 289 290 /** 291 * Is the block a valid RBW? 292 * @return - true if the specified block is a valid RBW 293 */ 294 public boolean isValidRbw(ExtendedBlock b); 295 296 /** 297 * Invalidates the specified blocks 298 * @param bpid Block pool Id 299 * @param invalidBlks - the blocks to be invalidated 300 * @throws IOException 301 */ 302 public void invalidate(String bpid, Block invalidBlks[]) throws IOException; 303 304 /** 305 * Caches the specified blocks 306 * @param bpid Block pool id 307 * @param blockIds - block ids to cache 308 */ 309 public void cache(String bpid, long[] blockIds); 310 311 /** 312 * Uncaches the specified blocks 313 * @param bpid Block pool id 314 * @param blockIds - blocks ids to uncache 315 */ 316 public void uncache(String bpid, long[] blockIds); 317 318 /** 319 * Determine if the specified block is cached. 320 * @param bpid Block pool id 321 * @param blockIds - block id 322 * @return true if the block is cached 323 */ 324 public boolean isCached(String bpid, long blockId); 325 326 /** 327 * Check if all the data directories are healthy 328 * @throws DiskErrorException 329 */ 330 public void checkDataDir() throws DiskErrorException; 331 332 /** 333 * Shutdown the FSDataset 334 */ 335 public void shutdown(); 336 337 /** 338 * Sets the file pointer of the checksum stream so that the last checksum 339 * will be overwritten 340 * @param b block 341 * @param outs The streams for the data file and checksum file 342 * @param checksumSize number of bytes each checksum has 343 * @throws IOException 344 */ 345 public void adjustCrcChannelPosition(ExtendedBlock b, 346 ReplicaOutputStreams outs, int checksumSize) throws IOException; 347 348 /** 349 * Checks how many valid storage volumes there are in the DataNode. 350 * @return true if more than the minimum number of valid volumes are left 351 * in the FSDataSet. 352 */ 353 public boolean hasEnoughResource(); 354 355 /** 356 * Get visible length of the specified replica. 357 */ 358 long getReplicaVisibleLength(final ExtendedBlock block) throws IOException; 359 360 /** 361 * Initialize a replica recovery. 362 * @return actual state of the replica on this data-node or 363 * null if data-node does not have the replica. 364 */ 365 public ReplicaRecoveryInfo initReplicaRecovery(RecoveringBlock rBlock 366 ) throws IOException; 367 368 /** 369 * Update replica's generation stamp and length and finalize it. 370 * @return the ID of storage that stores the block 371 */ 372 public String updateReplicaUnderRecovery(ExtendedBlock oldBlock, 373 long recoveryId, long newLength) throws IOException; 374 375 /** 376 * add new block pool ID 377 * @param bpid Block pool Id 378 * @param conf Configuration 379 */ 380 public void addBlockPool(String bpid, Configuration conf) throws IOException; 381 382 /** 383 * Shutdown and remove the block pool from underlying storage. 384 * @param bpid Block pool Id to be removed 385 */ 386 public void shutdownBlockPool(String bpid) ; 387 388 /** 389 * Deletes the block pool directories. If force is false, directories are 390 * deleted only if no block files exist for the block pool. If force 391 * is true entire directory for the blockpool is deleted along with its 392 * contents. 393 * @param bpid BlockPool Id to be deleted. 394 * @param force If force is false, directories are deleted only if no 395 * block files exist for the block pool, otherwise entire 396 * directory for the blockpool is deleted along with its contents. 397 * @throws IOException 398 */ 399 public void deleteBlockPool(String bpid, boolean force) throws IOException; 400 401 /** 402 * Get {@link BlockLocalPathInfo} for the given block. 403 */ 404 public BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b 405 ) throws IOException; 406 407 /** 408 * Get a {@link HdfsBlocksMetadata} corresponding to the list of blocks in 409 * <code>blocks</code>. 410 * 411 * @param bpid pool to query 412 * @param blockIds List of block ids for which to return metadata 413 * @return metadata Metadata for the list of blocks 414 * @throws IOException 415 */ 416 public HdfsBlocksMetadata getHdfsBlocksMetadata(String bpid, 417 long[] blockIds) throws IOException; 418 419 /** 420 * Enable 'trash' for the given dataset. When trash is enabled, files are 421 * moved to a separate trash directory instead of being deleted immediately. 422 * This can be useful for example during rolling upgrades. 423 */ 424 public void enableTrash(String bpid); 425 426 /** 427 * Restore trash 428 */ 429 public void restoreTrash(String bpid); 430 431 /** 432 * @return true when trash is enabled 433 */ 434 public boolean trashEnabled(String bpid); 435 436 /** 437 * submit a sync_file_range request to AsyncDiskService 438 */ 439 public void submitBackgroundSyncFileRangeRequest(final ExtendedBlock block, 440 final FileDescriptor fd, final long offset, final long nbytes, 441 final int flags); 442} 443