001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.util;
020
021import java.io.DataInputStream;
022import java.io.DataOutputStream;
023import java.io.IOException;
024import java.nio.ByteBuffer;
025import java.util.zip.CRC32;
026import java.util.zip.Checksum;
027
028import org.apache.hadoop.classification.InterfaceAudience;
029import org.apache.hadoop.classification.InterfaceStability;
030import org.apache.hadoop.fs.ChecksumException;
031
032/**
033 * This class provides interface and utilities for processing checksums for
034 * DFS data transfers.
035 */
036@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
037@InterfaceStability.Evolving
038public class DataChecksum implements Checksum {
039  
040  // checksum types
041  public static final int CHECKSUM_NULL    = 0;
042  public static final int CHECKSUM_CRC32   = 1;
043  public static final int CHECKSUM_CRC32C  = 2;
044  public static final int CHECKSUM_DEFAULT = 3; 
045  public static final int CHECKSUM_MIXED   = 4;
046 
047  /** The checksum types */
048  public static enum Type {
049    NULL  (CHECKSUM_NULL, 0),
050    CRC32 (CHECKSUM_CRC32, 4),
051    CRC32C(CHECKSUM_CRC32C, 4),
052    DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
053    MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
054
055    public final int id;
056    public final int size;
057    
058    private Type(int id, int size) {
059      this.id = id;
060      this.size = size;
061    }
062
063    /** @return the type corresponding to the id. */
064    public static Type valueOf(int id) {
065      if (id < 0 || id >= values().length) {
066        throw new IllegalArgumentException("id=" + id
067            + " out of range [0, " + values().length + ")");
068      }
069      return values()[id];
070    }
071  }
072
073  /**
074   * Create a Crc32 Checksum object. The implementation of the Crc32 algorithm
075   * is chosen depending on the platform.
076   */
077  public static Checksum newCrc32() {
078    //return Shell.isJava7OrAbove()? new CRC32(): new PureJavaCrc32();
079    // Reverting to usage of PureJavaCrc32 (new version as of 2.5.1). No use for native CRC32
080    return new PureJavaCrc32();
081  }
082
083  public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) {
084    if ( bytesPerChecksum <= 0 ) {
085      return null;
086    }
087    
088    switch ( type ) {
089    case NULL :
090      return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum );
091    case CRC32 :
092      return new DataChecksum(type, newCrc32(), bytesPerChecksum );
093    case CRC32C:
094      return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum);
095    default:
096      return null;  
097    }
098  }
099  
100  /**
101   * Creates a DataChecksum from HEADER_LEN bytes from arr[offset].
102   * @return DataChecksum of the type in the array or null in case of an error.
103   */
104  public static DataChecksum newDataChecksum( byte bytes[], int offset ) {
105    if (offset < 0 || bytes.length < offset + getChecksumHeaderSize()) {
106      return null;
107    }
108    
109    // like readInt():
110    int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) | 
111                           ( (bytes[offset+2] & 0xff) << 16 ) |
112                           ( (bytes[offset+3] & 0xff) << 8 )  |
113                           ( (bytes[offset+4] & 0xff) );
114    return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum );
115  }
116  
117  /**
118   * This constructs a DataChecksum by reading HEADER_LEN bytes from input
119   * stream <i>in</i>
120   */
121  public static DataChecksum newDataChecksum( DataInputStream in )
122                                 throws IOException {
123    int type = in.readByte();
124    int bpc = in.readInt();
125    DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc );
126    if ( summer == null ) {
127      throw new IOException( "Could not create DataChecksum of type " +
128                             type + " with bytesPerChecksum " + bpc );
129    }
130    return summer;
131  }
132  
133  /**
134   * Writes the checksum header to the output stream <i>out</i>.
135   */
136  public void writeHeader( DataOutputStream out ) 
137                           throws IOException { 
138    out.writeByte( type.id );
139    out.writeInt( bytesPerChecksum );
140  }
141
142  public byte[] getHeader() {
143    byte[] header = new byte[getChecksumHeaderSize()];
144    header[0] = (byte) (type.id & 0xff);
145    // Writing in buffer just like DataOutput.WriteInt()
146    header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff);
147    header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff);
148    header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff);
149    header[1+3] = (byte) (bytesPerChecksum & 0xff);
150    return header;
151  }
152  
153  /**
154   * Writes the current checksum to the stream.
155   * If <i>reset</i> is true, then resets the checksum.
156   * @return number of bytes written. Will be equal to getChecksumSize();
157   */
158   public int writeValue( DataOutputStream out, boolean reset )
159                          throws IOException {
160     if ( type.size <= 0 ) {
161       return 0;
162     }
163
164     if ( type.size == 4 ) {
165       out.writeInt( (int) summer.getValue() );
166     } else {
167       throw new IOException( "Unknown Checksum " + type );
168     }
169     
170     if ( reset ) {
171       reset();
172     }
173     
174     return type.size;
175   }
176   
177   /**
178    * Writes the current checksum to a buffer.
179    * If <i>reset</i> is true, then resets the checksum.
180    * @return number of bytes written. Will be equal to getChecksumSize();
181    */
182    public int writeValue( byte[] buf, int offset, boolean reset )
183                           throws IOException {
184      if ( type.size <= 0 ) {
185        return 0;
186      }
187
188      if ( type.size == 4 ) {
189        int checksum = (int) summer.getValue();
190        buf[offset+0] = (byte) ((checksum >>> 24) & 0xff);
191        buf[offset+1] = (byte) ((checksum >>> 16) & 0xff);
192        buf[offset+2] = (byte) ((checksum >>> 8) & 0xff);
193        buf[offset+3] = (byte) (checksum & 0xff);
194      } else {
195        throw new IOException( "Unknown Checksum " + type );
196      }
197      
198      if ( reset ) {
199        reset();
200      }
201      
202      return type.size;
203    }
204   
205   /**
206    * Compares the checksum located at buf[offset] with the current checksum.
207    * @return true if the checksum matches and false otherwise.
208    */
209   public boolean compare( byte buf[], int offset ) {
210     if ( type.size == 4 ) {
211       int checksum = ( (buf[offset+0] & 0xff) << 24 ) | 
212                      ( (buf[offset+1] & 0xff) << 16 ) |
213                      ( (buf[offset+2] & 0xff) << 8 )  |
214                      ( (buf[offset+3] & 0xff) );
215       return checksum == (int) summer.getValue();
216     }
217     return type.size == 0;
218   }
219   
220  private final Type type;
221  private final Checksum summer;
222  private final int bytesPerChecksum;
223  private int inSum = 0;
224  
225  private DataChecksum( Type type, Checksum checksum, int chunkSize ) {
226    this.type = type;
227    summer = checksum;
228    bytesPerChecksum = chunkSize;
229  }
230  
231  /** @return the checksum algorithm type. */
232  public Type getChecksumType() {
233    return type;
234  }
235  /** @return the size for a checksum. */
236  public int getChecksumSize() {
237    return type.size;
238  }
239  /** @return the required checksum size given the data length. */
240  public int getChecksumSize(int dataSize) {
241    return ((dataSize - 1)/getBytesPerChecksum() + 1) * getChecksumSize(); 
242  }
243  public int getBytesPerChecksum() {
244    return bytesPerChecksum;
245  }
246  public int getNumBytesInSum() {
247    return inSum;
248  }
249  
250  public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE;
251  static public int getChecksumHeaderSize() {
252    return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int
253  }
254  //Checksum Interface. Just a wrapper around member summer.
255  @Override
256  public long getValue() {
257    return summer.getValue();
258  }
259  @Override
260  public void reset() {
261    summer.reset();
262    inSum = 0;
263  }
264  @Override
265  public void update( byte[] b, int off, int len ) {
266    if ( len > 0 ) {
267      summer.update( b, off, len );
268      inSum += len;
269    }
270  }
271  @Override
272  public void update( int b ) {
273    summer.update( b );
274    inSum += 1;
275  }
276  
277  /**
278   * Verify that the given checksums match the given data.
279   * 
280   * The 'mark' of the ByteBuffer parameters may be modified by this function,.
281   * but the position is maintained.
282   *  
283   * @param data the DirectByteBuffer pointing to the data to verify.
284   * @param checksums the DirectByteBuffer pointing to a series of stored
285   *                  checksums
286   * @param fileName the name of the file being read, for error-reporting
287   * @param basePos the file position to which the start of 'data' corresponds
288   * @throws ChecksumException if the checksums do not match
289   */
290  public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums,
291      String fileName, long basePos)
292  throws ChecksumException {
293    if (type.size == 0) return;
294    
295    if (data.hasArray() && checksums.hasArray()) {
296      verifyChunkedSums(
297          data.array(), data.arrayOffset() + data.position(), data.remaining(),
298          checksums.array(), checksums.arrayOffset() + checksums.position(),
299          fileName, basePos);
300      return;
301    }
302    if (NativeCrc32.isAvailable()) {
303      NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data,
304          fileName, basePos);
305      return;
306    }
307    
308    int startDataPos = data.position();
309    data.mark();
310    checksums.mark();
311    try {
312      byte[] buf = new byte[bytesPerChecksum];
313      byte[] sum = new byte[type.size];
314      while (data.remaining() > 0) {
315        int n = Math.min(data.remaining(), bytesPerChecksum);
316        checksums.get(sum);
317        data.get(buf, 0, n);
318        summer.reset();
319        summer.update(buf, 0, n);
320        int calculated = (int)summer.getValue();
321        int stored = (sum[0] << 24 & 0xff000000) |
322          (sum[1] << 16 & 0xff0000) |
323          (sum[2] << 8 & 0xff00) |
324          sum[3] & 0xff;
325        if (calculated != stored) {
326          long errPos = basePos + data.position() - startDataPos - n;
327          throw new ChecksumException(
328              "Checksum error: "+ fileName + " at "+ errPos +
329              " exp: " + stored + " got: " + calculated, errPos);
330        }
331      }
332    } finally {
333      data.reset();
334      checksums.reset();
335    }
336  }
337  
338  /**
339   * Implementation of chunked verification specifically on byte arrays. This
340   * is to avoid the copy when dealing with ByteBuffers that have array backing.
341   */
342  private void verifyChunkedSums(
343      byte[] data, int dataOff, int dataLen,
344      byte[] checksums, int checksumsOff, String fileName,
345      long basePos) throws ChecksumException {
346    if (type.size == 0) return;
347
348    if (NativeCrc32.isAvailable()) {
349      NativeCrc32.verifyChunkedSumsByteArray(bytesPerChecksum, type.id,
350          checksums, checksumsOff, data, dataOff, dataLen, fileName, basePos);
351      return;
352    }
353    
354    int remaining = dataLen;
355    int dataPos = 0;
356    while (remaining > 0) {
357      int n = Math.min(remaining, bytesPerChecksum);
358      
359      summer.reset();
360      summer.update(data, dataOff + dataPos, n);
361      dataPos += n;
362      remaining -= n;
363      
364      int calculated = (int)summer.getValue();
365      int stored = (checksums[checksumsOff] << 24 & 0xff000000) |
366        (checksums[checksumsOff + 1] << 16 & 0xff0000) |
367        (checksums[checksumsOff + 2] << 8 & 0xff00) |
368        checksums[checksumsOff + 3] & 0xff;
369      checksumsOff += 4;
370      if (calculated != stored) {
371        long errPos = basePos + dataPos - n;
372        throw new ChecksumException(
373            "Checksum error: "+ fileName + " at "+ errPos +
374            " exp: " + stored + " got: " + calculated, errPos);
375      }
376    }
377  }
378
379  /**
380   * Calculate checksums for the given data.
381   * 
382   * The 'mark' of the ByteBuffer parameters may be modified by this function,
383   * but the position is maintained.
384   * 
385   * @param data the DirectByteBuffer pointing to the data to checksum.
386   * @param checksums the DirectByteBuffer into which checksums will be
387   *                  stored. Enough space must be available in this
388   *                  buffer to put the checksums.
389   */
390  public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) {
391    if (type.size == 0) return;
392    
393    if (data.hasArray() && checksums.hasArray()) {
394      calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(),
395          checksums.array(), checksums.arrayOffset() + checksums.position());
396      return;
397    }
398
399    if (NativeCrc32.isAvailable()) {
400      NativeCrc32.calculateChunkedSums(bytesPerChecksum, type.id,
401          checksums, data);
402      return;
403    }
404    
405    data.mark();
406    checksums.mark();
407    try {
408      byte[] buf = new byte[bytesPerChecksum];
409      while (data.remaining() > 0) {
410        int n = Math.min(data.remaining(), bytesPerChecksum);
411        data.get(buf, 0, n);
412        summer.reset();
413        summer.update(buf, 0, n);
414        checksums.putInt((int)summer.getValue());
415      }
416    } finally {
417      data.reset();
418      checksums.reset();
419    }
420  }
421
422  /**
423   * Implementation of chunked calculation specifically on byte arrays. This
424   * is to avoid the copy when dealing with ByteBuffers that have array backing.
425   */
426  public void calculateChunkedSums(
427      byte[] data, int dataOffset, int dataLength,
428      byte[] sums, int sumsOffset) {
429    if (type.size == 0) return;
430
431    if (NativeCrc32.isAvailable()) {
432      NativeCrc32.calculateChunkedSumsByteArray(bytesPerChecksum, type.id,
433          sums, sumsOffset, data, dataOffset, dataLength);
434      return;
435    }
436
437    int remaining = dataLength;
438    while (remaining > 0) {
439      int n = Math.min(remaining, bytesPerChecksum);
440      summer.reset();
441      summer.update(data, dataOffset, n);
442      dataOffset += n;
443      remaining -= n;
444      long calculated = summer.getValue();
445      sums[sumsOffset++] = (byte) (calculated >> 24);
446      sums[sumsOffset++] = (byte) (calculated >> 16);
447      sums[sumsOffset++] = (byte) (calculated >> 8);
448      sums[sumsOffset++] = (byte) (calculated);
449    }
450  }
451
452  @Override
453  public boolean equals(Object other) {
454    if (!(other instanceof DataChecksum)) {
455      return false;
456    }
457    DataChecksum o = (DataChecksum)other;
458    return o.bytesPerChecksum == this.bytesPerChecksum &&
459      o.type == this.type;
460  }
461  
462  @Override
463  public int hashCode() {
464    return (this.type.id + 31) * this.bytesPerChecksum;
465  }
466  
467  @Override
468  public String toString() {
469    return "DataChecksum(type=" + type +
470      ", chunkSize=" + bytesPerChecksum + ")";
471  }
472  
473  /**
474   * This just provides a dummy implimentation for Checksum class
475   * This is used when there is no checksum available or required for 
476   * data
477   */
478  static class ChecksumNull implements Checksum {
479    
480    public ChecksumNull() {}
481    
482    //Dummy interface
483    @Override
484    public long getValue() { return 0; }
485    @Override
486    public void reset() {}
487    @Override
488    public void update(byte[] b, int off, int len) {}
489    @Override
490    public void update(int b) {}
491  };
492}