001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.util;
020
021import java.io.DataInputStream;
022import java.io.DataOutputStream;
023import java.io.IOException;
024import java.nio.ByteBuffer;
025import java.util.zip.CRC32;
026import java.util.zip.Checksum;
027
028import org.apache.hadoop.classification.InterfaceAudience;
029import org.apache.hadoop.classification.InterfaceStability;
030import org.apache.hadoop.fs.ChecksumException;
031
032/**
033 * This class provides inteface and utilities for processing checksums for
034 * DFS data transfers.
035 */
036@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
037@InterfaceStability.Evolving
038public class DataChecksum implements Checksum {
039  
040  // Misc constants
041  public static final int HEADER_LEN = 5; /// 1 byte type and 4 byte len
042  
043  // checksum types
044  public static final int CHECKSUM_NULL    = 0;
045  public static final int CHECKSUM_CRC32   = 1;
046  public static final int CHECKSUM_CRC32C  = 2;
047  public static final int CHECKSUM_DEFAULT = 3; 
048  public static final int CHECKSUM_MIXED   = 4;
049 
050  /** The checksum types */
051  public static enum Type {
052    NULL  (CHECKSUM_NULL, 0),
053    CRC32 (CHECKSUM_CRC32, 4),
054    CRC32C(CHECKSUM_CRC32C, 4),
055    DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
056    MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
057
058    public final int id;
059    public final int size;
060    
061    private Type(int id, int size) {
062      this.id = id;
063      this.size = size;
064    }
065
066    /** @return the type corresponding to the id. */
067    public static Type valueOf(int id) {
068      if (id < 0 || id >= values().length) {
069        throw new IllegalArgumentException("id=" + id
070            + " out of range [0, " + values().length + ")");
071      }
072      return values()[id];
073    }
074  }
075
076  /**
077   * Create a Crc32 Checksum object. The implementation of the Crc32 algorithm
078   * is chosen depending on the platform.
079   */
080  public static Checksum newCrc32() {
081    //return Shell.isJava7OrAbove()? new CRC32(): new PureJavaCrc32();
082    // Reverting to usage of PureJavaCrc32 (new version as of 2.5.1). No use for native CRC32
083    return new PureJavaCrc32();
084  }
085
086  public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) {
087    if ( bytesPerChecksum <= 0 ) {
088      return null;
089    }
090    
091    switch ( type ) {
092    case NULL :
093      return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum );
094    case CRC32 :
095      return new DataChecksum(type, newCrc32(), bytesPerChecksum );
096    case CRC32C:
097      return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum);
098    default:
099      return null;  
100    }
101  }
102  
103  /**
104   * Creates a DataChecksum from HEADER_LEN bytes from arr[offset].
105   * @return DataChecksum of the type in the array or null in case of an error.
106   */
107  public static DataChecksum newDataChecksum( byte bytes[], int offset ) {
108    if ( offset < 0 || bytes.length < offset + HEADER_LEN ) {
109      return null;
110    }
111    
112    // like readInt():
113    int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) | 
114                           ( (bytes[offset+2] & 0xff) << 16 ) |
115                           ( (bytes[offset+3] & 0xff) << 8 )  |
116                           ( (bytes[offset+4] & 0xff) );
117    return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum );
118  }
119  
120  /**
121   * This constructucts a DataChecksum by reading HEADER_LEN bytes from
122   * input stream <i>in</i>
123   */
124  public static DataChecksum newDataChecksum( DataInputStream in )
125                                 throws IOException {
126    int type = in.readByte();
127    int bpc = in.readInt();
128    DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc );
129    if ( summer == null ) {
130      throw new IOException( "Could not create DataChecksum of type " +
131                             type + " with bytesPerChecksum " + bpc );
132    }
133    return summer;
134  }
135  
136  /**
137   * Writes the checksum header to the output stream <i>out</i>.
138   */
139  public void writeHeader( DataOutputStream out ) 
140                           throws IOException { 
141    out.writeByte( type.id );
142    out.writeInt( bytesPerChecksum );
143  }
144
145  public byte[] getHeader() {
146    byte[] header = new byte[DataChecksum.HEADER_LEN];
147    header[0] = (byte) (type.id & 0xff);
148    // Writing in buffer just like DataOutput.WriteInt()
149    header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff);
150    header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff);
151    header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff);
152    header[1+3] = (byte) (bytesPerChecksum & 0xff);
153    return header;
154  }
155  
156  /**
157   * Writes the current checksum to the stream.
158   * If <i>reset</i> is true, then resets the checksum.
159   * @return number of bytes written. Will be equal to getChecksumSize();
160   */
161   public int writeValue( DataOutputStream out, boolean reset )
162                          throws IOException {
163     if ( type.size <= 0 ) {
164       return 0;
165     }
166
167     if ( type.size == 4 ) {
168       out.writeInt( (int) summer.getValue() );
169     } else {
170       throw new IOException( "Unknown Checksum " + type );
171     }
172     
173     if ( reset ) {
174       reset();
175     }
176     
177     return type.size;
178   }
179   
180   /**
181    * Writes the current checksum to a buffer.
182    * If <i>reset</i> is true, then resets the checksum.
183    * @return number of bytes written. Will be equal to getChecksumSize();
184    */
185    public int writeValue( byte[] buf, int offset, boolean reset )
186                           throws IOException {
187      if ( type.size <= 0 ) {
188        return 0;
189      }
190
191      if ( type.size == 4 ) {
192        int checksum = (int) summer.getValue();
193        buf[offset+0] = (byte) ((checksum >>> 24) & 0xff);
194        buf[offset+1] = (byte) ((checksum >>> 16) & 0xff);
195        buf[offset+2] = (byte) ((checksum >>> 8) & 0xff);
196        buf[offset+3] = (byte) (checksum & 0xff);
197      } else {
198        throw new IOException( "Unknown Checksum " + type );
199      }
200      
201      if ( reset ) {
202        reset();
203      }
204      
205      return type.size;
206    }
207   
208   /**
209    * Compares the checksum located at buf[offset] with the current checksum.
210    * @return true if the checksum matches and false otherwise.
211    */
212   public boolean compare( byte buf[], int offset ) {
213     if ( type.size == 4 ) {
214       int checksum = ( (buf[offset+0] & 0xff) << 24 ) | 
215                      ( (buf[offset+1] & 0xff) << 16 ) |
216                      ( (buf[offset+2] & 0xff) << 8 )  |
217                      ( (buf[offset+3] & 0xff) );
218       return checksum == (int) summer.getValue();
219     }
220     return type.size == 0;
221   }
222   
223  private final Type type;
224  private final Checksum summer;
225  private final int bytesPerChecksum;
226  private int inSum = 0;
227  
228  private DataChecksum( Type type, Checksum checksum, int chunkSize ) {
229    this.type = type;
230    summer = checksum;
231    bytesPerChecksum = chunkSize;
232  }
233  
234  // Accessors
235  public Type getChecksumType() {
236    return type;
237  }
238  public int getChecksumSize() {
239    return type.size;
240  }
241  public int getBytesPerChecksum() {
242    return bytesPerChecksum;
243  }
244  public int getNumBytesInSum() {
245    return inSum;
246  }
247  
248  public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE;
249  static public int getChecksumHeaderSize() {
250    return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int
251  }
252  //Checksum Interface. Just a wrapper around member summer.
253  @Override
254  public long getValue() {
255    return summer.getValue();
256  }
257  @Override
258  public void reset() {
259    summer.reset();
260    inSum = 0;
261  }
262  @Override
263  public void update( byte[] b, int off, int len ) {
264    if ( len > 0 ) {
265      summer.update( b, off, len );
266      inSum += len;
267    }
268  }
269  @Override
270  public void update( int b ) {
271    summer.update( b );
272    inSum += 1;
273  }
274  
275  /**
276   * Verify that the given checksums match the given data.
277   * 
278   * The 'mark' of the ByteBuffer parameters may be modified by this function,.
279   * but the position is maintained.
280   *  
281   * @param data the DirectByteBuffer pointing to the data to verify.
282   * @param checksums the DirectByteBuffer pointing to a series of stored
283   *                  checksums
284   * @param fileName the name of the file being read, for error-reporting
285   * @param basePos the file position to which the start of 'data' corresponds
286   * @throws ChecksumException if the checksums do not match
287   */
288  public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums,
289      String fileName, long basePos)
290  throws ChecksumException {
291    if (type.size == 0) return;
292    
293    if (data.hasArray() && checksums.hasArray()) {
294      verifyChunkedSums(
295          data.array(), data.arrayOffset() + data.position(), data.remaining(),
296          checksums.array(), checksums.arrayOffset() + checksums.position(),
297          fileName, basePos);
298      return;
299    }
300    if (NativeCrc32.isAvailable()) {
301      NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data,
302          fileName, basePos);
303      return;
304    }
305    
306    int startDataPos = data.position();
307    data.mark();
308    checksums.mark();
309    try {
310      byte[] buf = new byte[bytesPerChecksum];
311      byte[] sum = new byte[type.size];
312      while (data.remaining() > 0) {
313        int n = Math.min(data.remaining(), bytesPerChecksum);
314        checksums.get(sum);
315        data.get(buf, 0, n);
316        summer.reset();
317        summer.update(buf, 0, n);
318        int calculated = (int)summer.getValue();
319        int stored = (sum[0] << 24 & 0xff000000) |
320          (sum[1] << 16 & 0xff0000) |
321          (sum[2] << 8 & 0xff00) |
322          sum[3] & 0xff;
323        if (calculated != stored) {
324          long errPos = basePos + data.position() - startDataPos - n;
325          throw new ChecksumException(
326              "Checksum error: "+ fileName + " at "+ errPos +
327              " exp: " + stored + " got: " + calculated, errPos);
328        }
329      }
330    } finally {
331      data.reset();
332      checksums.reset();
333    }
334  }
335  
336  /**
337   * Implementation of chunked verification specifically on byte arrays. This
338   * is to avoid the copy when dealing with ByteBuffers that have array backing.
339   */
340  private void verifyChunkedSums(
341      byte[] data, int dataOff, int dataLen,
342      byte[] checksums, int checksumsOff, String fileName,
343      long basePos) throws ChecksumException {
344    
345    int remaining = dataLen;
346    int dataPos = 0;
347    while (remaining > 0) {
348      int n = Math.min(remaining, bytesPerChecksum);
349      
350      summer.reset();
351      summer.update(data, dataOff + dataPos, n);
352      dataPos += n;
353      remaining -= n;
354      
355      int calculated = (int)summer.getValue();
356      int stored = (checksums[checksumsOff] << 24 & 0xff000000) |
357        (checksums[checksumsOff + 1] << 16 & 0xff0000) |
358        (checksums[checksumsOff + 2] << 8 & 0xff00) |
359        checksums[checksumsOff + 3] & 0xff;
360      checksumsOff += 4;
361      if (calculated != stored) {
362        long errPos = basePos + dataPos - n;
363        throw new ChecksumException(
364            "Checksum error: "+ fileName + " at "+ errPos +
365            " exp: " + stored + " got: " + calculated, errPos);
366      }
367    }
368  }
369
370  /**
371   * Calculate checksums for the given data.
372   * 
373   * The 'mark' of the ByteBuffer parameters may be modified by this function,
374   * but the position is maintained.
375   * 
376   * @param data the DirectByteBuffer pointing to the data to checksum.
377   * @param checksums the DirectByteBuffer into which checksums will be
378   *                  stored. Enough space must be available in this
379   *                  buffer to put the checksums.
380   */
381  public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) {
382    if (type.size == 0) return;
383    
384    if (data.hasArray() && checksums.hasArray()) {
385      calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(),
386          checksums.array(), checksums.arrayOffset() + checksums.position());
387      return;
388    }
389    
390    data.mark();
391    checksums.mark();
392    try {
393      byte[] buf = new byte[bytesPerChecksum];
394      while (data.remaining() > 0) {
395        int n = Math.min(data.remaining(), bytesPerChecksum);
396        data.get(buf, 0, n);
397        summer.reset();
398        summer.update(buf, 0, n);
399        checksums.putInt((int)summer.getValue());
400      }
401    } finally {
402      data.reset();
403      checksums.reset();
404    }
405  }
406
407  /**
408   * Implementation of chunked calculation specifically on byte arrays. This
409   * is to avoid the copy when dealing with ByteBuffers that have array backing.
410   */
411  private void calculateChunkedSums(
412      byte[] data, int dataOffset, int dataLength,
413      byte[] sums, int sumsOffset) {
414
415    int remaining = dataLength;
416    while (remaining > 0) {
417      int n = Math.min(remaining, bytesPerChecksum);
418      summer.reset();
419      summer.update(data, dataOffset, n);
420      dataOffset += n;
421      remaining -= n;
422      long calculated = summer.getValue();
423      sums[sumsOffset++] = (byte) (calculated >> 24);
424      sums[sumsOffset++] = (byte) (calculated >> 16);
425      sums[sumsOffset++] = (byte) (calculated >> 8);
426      sums[sumsOffset++] = (byte) (calculated);
427    }
428  }
429
430  @Override
431  public boolean equals(Object other) {
432    if (!(other instanceof DataChecksum)) {
433      return false;
434    }
435    DataChecksum o = (DataChecksum)other;
436    return o.bytesPerChecksum == this.bytesPerChecksum &&
437      o.type == this.type;
438  }
439  
440  @Override
441  public int hashCode() {
442    return (this.type.id + 31) * this.bytesPerChecksum;
443  }
444  
445  @Override
446  public String toString() {
447    return "DataChecksum(type=" + type +
448      ", chunkSize=" + bytesPerChecksum + ")";
449  }
450  
451  /**
452   * This just provides a dummy implimentation for Checksum class
453   * This is used when there is no checksum available or required for 
454   * data
455   */
456  static class ChecksumNull implements Checksum {
457    
458    public ChecksumNull() {}
459    
460    //Dummy interface
461    @Override
462    public long getValue() { return 0; }
463    @Override
464    public void reset() {}
465    @Override
466    public void update(byte[] b, int off, int len) {}
467    @Override
468    public void update(int b) {}
469  };
470}