001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.datanode.fsdataset;
019
020
021import java.io.File;
022import java.io.FileDescriptor;
023import java.io.IOException;
024import java.io.InputStream;
025import java.util.List;
026import java.util.Map;
027
028import org.apache.hadoop.classification.InterfaceAudience;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.hdfs.DFSConfigKeys;
031import org.apache.hadoop.hdfs.protocol.Block;
032import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
033import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
034import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
035import org.apache.hadoop.hdfs.protocol.HdfsBlocksMetadata;
036import org.apache.hadoop.hdfs.server.datanode.DataNode;
037import org.apache.hadoop.hdfs.server.datanode.DataStorage;
038import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica;
039import org.apache.hadoop.hdfs.server.datanode.Replica;
040import org.apache.hadoop.hdfs.server.datanode.ReplicaInPipelineInterface;
041import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetFactory;
042import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean;
043import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock;
044import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
045import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
046import org.apache.hadoop.hdfs.server.protocol.StorageReport;
047import org.apache.hadoop.util.DiskChecker.DiskErrorException;
048import org.apache.hadoop.util.ReflectionUtils;
049
050/**
051 * This is a service provider interface for the underlying storage that
052 * stores replicas for a data node.
053 * The default implementation stores replicas on local drives. 
054 */
055@InterfaceAudience.Private
056public interface FsDatasetSpi<V extends FsVolumeSpi> extends FSDatasetMBean {
057  /**
058   * A factory for creating {@link FsDatasetSpi} objects.
059   */
060  public static abstract class Factory<D extends FsDatasetSpi<?>> {
061    /** @return the configured factory. */
062    public static Factory<?> getFactory(Configuration conf) {
063      @SuppressWarnings("rawtypes")
064      final Class<? extends Factory> clazz = conf.getClass(
065          DFSConfigKeys.DFS_DATANODE_FSDATASET_FACTORY_KEY,
066          FsDatasetFactory.class,
067          Factory.class);
068      return ReflectionUtils.newInstance(clazz, conf);
069    }
070
071    /** Create a new object. */
072    public abstract D newInstance(DataNode datanode, DataStorage storage,
073        Configuration conf) throws IOException;
074
075    /** Does the factory create simulated objects? */
076    public boolean isSimulated() {
077      return false;
078    }
079  }
080
081  /**
082   * Create rolling logs.
083   *
084   * @param prefix the prefix of the log names.
085   * @return rolling logs
086   */
087  public RollingLogs createRollingLogs(String bpid, String prefix
088      ) throws IOException;
089
090  /** @return a list of volumes. */
091  public List<V> getVolumes();
092
093  /** @return a storage with the given storage ID */
094  public DatanodeStorage getStorage(final String storageUuid);
095
096  /** @return one or more storage reports for attached volumes. */
097  public StorageReport[] getStorageReports(String bpid)
098      throws IOException;
099
100  /** @return the volume that contains a replica of the block. */
101  public V getVolume(ExtendedBlock b);
102
103  /** @return a volume information map (name => info). */
104  public Map<String, Object> getVolumeInfoMap();
105
106  /** @return a list of finalized blocks for the given block pool. */
107  public List<FinalizedReplica> getFinalizedBlocks(String bpid);
108
109  /**
110   * Check whether the in-memory block record matches the block on the disk,
111   * and, in case that they are not matched, update the record or mark it
112   * as corrupted.
113   */
114  public void checkAndUpdate(String bpid, long blockId, File diskFile,
115      File diskMetaFile, FsVolumeSpi vol);
116
117  /**
118   * @param b - the block
119   * @return a stream if the meta-data of the block exists;
120   *         otherwise, return null.
121   * @throws IOException
122   */
123  public LengthInputStream getMetaDataInputStream(ExtendedBlock b
124      ) throws IOException;
125
126  /**
127   * Returns the specified block's on-disk length (excluding metadata)
128   * @return   the specified block's on-disk length (excluding metadta)
129   * @throws IOException on error
130   */
131  public long getLength(ExtendedBlock b) throws IOException;
132
133  /**
134   * Get reference to the replica meta info in the replicasMap. 
135   * To be called from methods that are synchronized on {@link FSDataset}
136   * @return replica from the replicas map
137   */
138  @Deprecated
139  public Replica getReplica(String bpid, long blockId);
140
141  /**
142   * @return replica meta information
143   */
144  public String getReplicaString(String bpid, long blockId);
145
146  /**
147   * @return the generation stamp stored with the block.
148   */
149  public Block getStoredBlock(String bpid, long blkid) throws IOException;
150  
151  /**
152   * Returns an input stream at specified offset of the specified block
153   * @param b block
154   * @param seekOffset offset with in the block to seek to
155   * @return an input stream to read the contents of the specified block,
156   *  starting at the offset
157   * @throws IOException
158   */
159  public InputStream getBlockInputStream(ExtendedBlock b, long seekOffset)
160            throws IOException;
161
162  /**
163   * Returns an input stream at specified offset of the specified block
164   * The block is still in the tmp directory and is not finalized
165   * @return an input stream to read the contents of the specified block,
166   *  starting at the offset
167   * @throws IOException
168   */
169  public ReplicaInputStreams getTmpInputStreams(ExtendedBlock b, long blkoff,
170      long ckoff) throws IOException;
171
172  /**
173   * Creates a temporary replica and returns the meta information of the replica
174   * 
175   * @param b block
176   * @return the meta info of the replica which is being written to
177   * @throws IOException if an error occurs
178   */
179  public ReplicaInPipelineInterface createTemporary(ExtendedBlock b
180      ) throws IOException;
181
182  /**
183   * Creates a RBW replica and returns the meta info of the replica
184   * 
185   * @param b block
186   * @return the meta info of the replica which is being written to
187   * @throws IOException if an error occurs
188   */
189  public ReplicaInPipelineInterface createRbw(ExtendedBlock b
190      ) throws IOException;
191
192  /**
193   * Recovers a RBW replica and returns the meta info of the replica
194   * 
195   * @param b block
196   * @param newGS the new generation stamp for the replica
197   * @param minBytesRcvd the minimum number of bytes that the replica could have
198   * @param maxBytesRcvd the maximum number of bytes that the replica could have
199   * @return the meta info of the replica which is being written to
200   * @throws IOException if an error occurs
201   */
202  public ReplicaInPipelineInterface recoverRbw(ExtendedBlock b, 
203      long newGS, long minBytesRcvd, long maxBytesRcvd) throws IOException;
204
205  /**
206   * Covert a temporary replica to a RBW.
207   * @param temporary the temporary replica being converted
208   * @return the result RBW
209   */
210  public ReplicaInPipelineInterface convertTemporaryToRbw(
211      ExtendedBlock temporary) throws IOException;
212
213  /**
214   * Append to a finalized replica and returns the meta info of the replica
215   * 
216   * @param b block
217   * @param newGS the new generation stamp for the replica
218   * @param expectedBlockLen the number of bytes the replica is expected to have
219   * @return the meata info of the replica which is being written to
220   * @throws IOException
221   */
222  public ReplicaInPipelineInterface append(ExtendedBlock b, long newGS,
223      long expectedBlockLen) throws IOException;
224
225  /**
226   * Recover a failed append to a finalized replica
227   * and returns the meta info of the replica
228   * 
229   * @param b block
230   * @param newGS the new generation stamp for the replica
231   * @param expectedBlockLen the number of bytes the replica is expected to have
232   * @return the meta info of the replica which is being written to
233   * @throws IOException
234   */
235  public ReplicaInPipelineInterface recoverAppend(ExtendedBlock b, long newGS,
236      long expectedBlockLen) throws IOException;
237  
238  /**
239   * Recover a failed pipeline close
240   * It bumps the replica's generation stamp and finalize it if RBW replica
241   * 
242   * @param b block
243   * @param newGS the new generation stamp for the replica
244   * @param expectedBlockLen the number of bytes the replica is expected to have
245   * @return the storage uuid of the replica.
246   * @throws IOException
247   */
248  public String recoverClose(ExtendedBlock b, long newGS, long expectedBlockLen
249      ) throws IOException;
250  
251  /**
252   * Finalizes the block previously opened for writing using writeToBlock.
253   * The block size is what is in the parameter b and it must match the amount
254   *  of data written
255   * @throws IOException
256   */
257  public void finalizeBlock(ExtendedBlock b) throws IOException;
258
259  /**
260   * Unfinalizes the block previously opened for writing using writeToBlock.
261   * The temporary file associated with this block is deleted.
262   * @throws IOException
263   */
264  public void unfinalizeBlock(ExtendedBlock b) throws IOException;
265
266  /**
267   * Returns one block report per volume.
268   * @param bpid Block Pool Id
269   * @return - a map of DatanodeStorage to block report for the volume.
270   */
271  public Map<DatanodeStorage, BlockListAsLongs> getBlockReports(String bpid);
272
273  /**
274   * Returns the cache report - the full list of cached block IDs of a
275   * block pool.
276   * @param   bpid Block Pool Id
277   * @return  the cache report - the full list of cached block IDs.
278   */
279  public List<Long> getCacheReport(String bpid);
280
281  /** Does the dataset contain the block? */
282  public boolean contains(ExtendedBlock block);
283
284  /**
285   * Is the block valid?
286   * @return - true if the specified block is valid
287   */
288  public boolean isValidBlock(ExtendedBlock b);
289
290  /**
291   * Is the block a valid RBW?
292   * @return - true if the specified block is a valid RBW
293   */
294  public boolean isValidRbw(ExtendedBlock b);
295
296  /**
297   * Invalidates the specified blocks
298   * @param bpid Block pool Id
299   * @param invalidBlks - the blocks to be invalidated
300   * @throws IOException
301   */
302  public void invalidate(String bpid, Block invalidBlks[]) throws IOException;
303
304  /**
305   * Caches the specified blocks
306   * @param bpid Block pool id
307   * @param blockIds - block ids to cache
308   */
309  public void cache(String bpid, long[] blockIds);
310
311  /**
312   * Uncaches the specified blocks
313   * @param bpid Block pool id
314   * @param blockIds - blocks ids to uncache
315   */
316  public void uncache(String bpid, long[] blockIds);
317
318  /**
319   * Determine if the specified block is cached.
320   * @param bpid Block pool id
321   * @param blockIds - block id
322   * @return true if the block is cached
323   */
324  public boolean isCached(String bpid, long blockId);
325
326    /**
327     * Check if all the data directories are healthy
328     * @throws DiskErrorException
329     */
330  public void checkDataDir() throws DiskErrorException;
331
332  /**
333   * Shutdown the FSDataset
334   */
335  public void shutdown();
336
337  /**
338   * Sets the file pointer of the checksum stream so that the last checksum
339   * will be overwritten
340   * @param b block
341   * @param outs The streams for the data file and checksum file
342   * @param checksumSize number of bytes each checksum has
343   * @throws IOException
344   */
345  public void adjustCrcChannelPosition(ExtendedBlock b,
346      ReplicaOutputStreams outs, int checksumSize) throws IOException;
347
348  /**
349   * Checks how many valid storage volumes there are in the DataNode.
350   * @return true if more than the minimum number of valid volumes are left 
351   * in the FSDataSet.
352   */
353  public boolean hasEnoughResource();
354
355  /**
356   * Get visible length of the specified replica.
357   */
358  long getReplicaVisibleLength(final ExtendedBlock block) throws IOException;
359
360  /**
361   * Initialize a replica recovery.
362   * @return actual state of the replica on this data-node or 
363   * null if data-node does not have the replica.
364   */
365  public ReplicaRecoveryInfo initReplicaRecovery(RecoveringBlock rBlock
366      ) throws IOException;
367
368  /**
369   * Update replica's generation stamp and length and finalize it.
370   * @return the ID of storage that stores the block
371   */
372  public String updateReplicaUnderRecovery(ExtendedBlock oldBlock,
373      long recoveryId, long newLength) throws IOException;
374
375  /**
376   * add new block pool ID
377   * @param bpid Block pool Id
378   * @param conf Configuration
379   */
380  public void addBlockPool(String bpid, Configuration conf) throws IOException;
381  
382  /**
383   * Shutdown and remove the block pool from underlying storage.
384   * @param bpid Block pool Id to be removed
385   */
386  public void shutdownBlockPool(String bpid) ;
387  
388  /**
389   * Deletes the block pool directories. If force is false, directories are 
390   * deleted only if no block files exist for the block pool. If force 
391   * is true entire directory for the blockpool is deleted along with its
392   * contents.
393   * @param bpid BlockPool Id to be deleted.
394   * @param force If force is false, directories are deleted only if no
395   *        block files exist for the block pool, otherwise entire 
396   *        directory for the blockpool is deleted along with its contents.
397   * @throws IOException
398   */
399  public void deleteBlockPool(String bpid, boolean force) throws IOException;
400  
401  /**
402   * Get {@link BlockLocalPathInfo} for the given block.
403   */
404  public BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b
405      ) throws IOException;
406
407  /**
408   * Get a {@link HdfsBlocksMetadata} corresponding to the list of blocks in 
409   * <code>blocks</code>.
410   * 
411   * @param bpid pool to query
412   * @param blockIds List of block ids for which to return metadata
413   * @return metadata Metadata for the list of blocks
414   * @throws IOException
415   */
416  public HdfsBlocksMetadata getHdfsBlocksMetadata(String bpid,
417      long[] blockIds) throws IOException;
418
419  /**
420   * Enable 'trash' for the given dataset. When trash is enabled, files are
421   * moved to a separate trash directory instead of being deleted immediately.
422   * This can be useful for example during rolling upgrades.
423   */
424  public void enableTrash(String bpid);
425
426  /**
427   * Restore trash
428   */
429  public void restoreTrash(String bpid);
430
431  /**
432   * @return true when trash is enabled
433   */
434  public boolean trashEnabled(String bpid);
435
436  /**
437   * submit a sync_file_range request to AsyncDiskService
438   */
439  public void submitBackgroundSyncFileRangeRequest(final ExtendedBlock block,
440      final FileDescriptor fd, final long offset, final long nbytes,
441      final int flags);
442}
443