001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.util;
020
021import java.io.DataInputStream;
022import java.io.DataOutputStream;
023import java.io.IOException;
024import java.nio.ByteBuffer;
025import java.util.zip.Checksum;
026
027import org.apache.hadoop.classification.InterfaceAudience;
028import org.apache.hadoop.classification.InterfaceStability;
029import org.apache.hadoop.fs.ChecksumException;
030
031/**
032 * This class provides inteface and utilities for processing checksums for
033 * DFS data transfers.
034 */
035@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
036@InterfaceStability.Evolving
037public class DataChecksum implements Checksum {
038  
039  // Misc constants
040  public static final int HEADER_LEN = 5; /// 1 byte type and 4 byte len
041  
042  // checksum types
043  public static final int CHECKSUM_NULL    = 0;
044  public static final int CHECKSUM_CRC32   = 1;
045  public static final int CHECKSUM_CRC32C  = 2;
046  public static final int CHECKSUM_DEFAULT = 3; 
047  public static final int CHECKSUM_MIXED   = 4;
048 
049  /** The checksum types */
050  public static enum Type {
051    NULL  (CHECKSUM_NULL, 0),
052    CRC32 (CHECKSUM_CRC32, 4),
053    CRC32C(CHECKSUM_CRC32C, 4),
054    DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
055    MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
056
057    public final int id;
058    public final int size;
059    
060    private Type(int id, int size) {
061      this.id = id;
062      this.size = size;
063    }
064
065    /** @return the type corresponding to the id. */
066    public static Type valueOf(int id) {
067      if (id < 0 || id >= values().length) {
068        throw new IllegalArgumentException("id=" + id
069            + " out of range [0, " + values().length + ")");
070      }
071      return values()[id];
072    }
073  }
074
075
076  public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) {
077    if ( bytesPerChecksum <= 0 ) {
078      return null;
079    }
080    
081    switch ( type ) {
082    case NULL :
083      return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum );
084    case CRC32 :
085      return new DataChecksum(type, new PureJavaCrc32(), bytesPerChecksum );
086    case CRC32C:
087      return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum);
088    default:
089      return null;  
090    }
091  }
092  
093  /**
094   * Creates a DataChecksum from HEADER_LEN bytes from arr[offset].
095   * @return DataChecksum of the type in the array or null in case of an error.
096   */
097  public static DataChecksum newDataChecksum( byte bytes[], int offset ) {
098    if ( offset < 0 || bytes.length < offset + HEADER_LEN ) {
099      return null;
100    }
101    
102    // like readInt():
103    int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) | 
104                           ( (bytes[offset+2] & 0xff) << 16 ) |
105                           ( (bytes[offset+3] & 0xff) << 8 )  |
106                           ( (bytes[offset+4] & 0xff) );
107    return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum );
108  }
109  
110  /**
111   * This constructucts a DataChecksum by reading HEADER_LEN bytes from
112   * input stream <i>in</i>
113   */
114  public static DataChecksum newDataChecksum( DataInputStream in )
115                                 throws IOException {
116    int type = in.readByte();
117    int bpc = in.readInt();
118    DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc );
119    if ( summer == null ) {
120      throw new IOException( "Could not create DataChecksum of type " +
121                             type + " with bytesPerChecksum " + bpc );
122    }
123    return summer;
124  }
125  
126  /**
127   * Writes the checksum header to the output stream <i>out</i>.
128   */
129  public void writeHeader( DataOutputStream out ) 
130                           throws IOException { 
131    out.writeByte( type.id );
132    out.writeInt( bytesPerChecksum );
133  }
134
135  public byte[] getHeader() {
136    byte[] header = new byte[DataChecksum.HEADER_LEN];
137    header[0] = (byte) (type.id & 0xff);
138    // Writing in buffer just like DataOutput.WriteInt()
139    header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff);
140    header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff);
141    header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff);
142    header[1+3] = (byte) (bytesPerChecksum & 0xff);
143    return header;
144  }
145  
146  /**
147   * Writes the current checksum to the stream.
148   * If <i>reset</i> is true, then resets the checksum.
149   * @return number of bytes written. Will be equal to getChecksumSize();
150   */
151   public int writeValue( DataOutputStream out, boolean reset )
152                          throws IOException {
153     if ( type.size <= 0 ) {
154       return 0;
155     }
156
157     if ( type.size == 4 ) {
158       out.writeInt( (int) summer.getValue() );
159     } else {
160       throw new IOException( "Unknown Checksum " + type );
161     }
162     
163     if ( reset ) {
164       reset();
165     }
166     
167     return type.size;
168   }
169   
170   /**
171    * Writes the current checksum to a buffer.
172    * If <i>reset</i> is true, then resets the checksum.
173    * @return number of bytes written. Will be equal to getChecksumSize();
174    */
175    public int writeValue( byte[] buf, int offset, boolean reset )
176                           throws IOException {
177      if ( type.size <= 0 ) {
178        return 0;
179      }
180
181      if ( type.size == 4 ) {
182        int checksum = (int) summer.getValue();
183        buf[offset+0] = (byte) ((checksum >>> 24) & 0xff);
184        buf[offset+1] = (byte) ((checksum >>> 16) & 0xff);
185        buf[offset+2] = (byte) ((checksum >>> 8) & 0xff);
186        buf[offset+3] = (byte) (checksum & 0xff);
187      } else {
188        throw new IOException( "Unknown Checksum " + type );
189      }
190      
191      if ( reset ) {
192        reset();
193      }
194      
195      return type.size;
196    }
197   
198   /**
199    * Compares the checksum located at buf[offset] with the current checksum.
200    * @return true if the checksum matches and false otherwise.
201    */
202   public boolean compare( byte buf[], int offset ) {
203     if ( type.size == 4 ) {
204       int checksum = ( (buf[offset+0] & 0xff) << 24 ) | 
205                      ( (buf[offset+1] & 0xff) << 16 ) |
206                      ( (buf[offset+2] & 0xff) << 8 )  |
207                      ( (buf[offset+3] & 0xff) );
208       return checksum == (int) summer.getValue();
209     }
210     return type.size == 0;
211   }
212   
213  private final Type type;
214  private final Checksum summer;
215  private final int bytesPerChecksum;
216  private int inSum = 0;
217  
218  private DataChecksum( Type type, Checksum checksum, int chunkSize ) {
219    this.type = type;
220    summer = checksum;
221    bytesPerChecksum = chunkSize;
222  }
223  
224  // Accessors
225  public Type getChecksumType() {
226    return type;
227  }
228  public int getChecksumSize() {
229    return type.size;
230  }
231  public int getBytesPerChecksum() {
232    return bytesPerChecksum;
233  }
234  public int getNumBytesInSum() {
235    return inSum;
236  }
237  
238  public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE;
239  static public int getChecksumHeaderSize() {
240    return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int
241  }
242  //Checksum Interface. Just a wrapper around member summer.
243  @Override
244  public long getValue() {
245    return summer.getValue();
246  }
247  @Override
248  public void reset() {
249    summer.reset();
250    inSum = 0;
251  }
252  @Override
253  public void update( byte[] b, int off, int len ) {
254    if ( len > 0 ) {
255      summer.update( b, off, len );
256      inSum += len;
257    }
258  }
259  @Override
260  public void update( int b ) {
261    summer.update( b );
262    inSum += 1;
263  }
264  
265  /**
266   * Verify that the given checksums match the given data.
267   * 
268   * The 'mark' of the ByteBuffer parameters may be modified by this function,.
269   * but the position is maintained.
270   *  
271   * @param data the DirectByteBuffer pointing to the data to verify.
272   * @param checksums the DirectByteBuffer pointing to a series of stored
273   *                  checksums
274   * @param fileName the name of the file being read, for error-reporting
275   * @param basePos the file position to which the start of 'data' corresponds
276   * @throws ChecksumException if the checksums do not match
277   */
278  public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums,
279      String fileName, long basePos)
280  throws ChecksumException {
281    if (type.size == 0) return;
282    
283    if (data.hasArray() && checksums.hasArray()) {
284      verifyChunkedSums(
285          data.array(), data.arrayOffset() + data.position(), data.remaining(),
286          checksums.array(), checksums.arrayOffset() + checksums.position(),
287          fileName, basePos);
288      return;
289    }
290    if (NativeCrc32.isAvailable()) {
291      NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data,
292          fileName, basePos);
293      return;
294    }
295    
296    int startDataPos = data.position();
297    data.mark();
298    checksums.mark();
299    try {
300      byte[] buf = new byte[bytesPerChecksum];
301      byte[] sum = new byte[type.size];
302      while (data.remaining() > 0) {
303        int n = Math.min(data.remaining(), bytesPerChecksum);
304        checksums.get(sum);
305        data.get(buf, 0, n);
306        summer.reset();
307        summer.update(buf, 0, n);
308        int calculated = (int)summer.getValue();
309        int stored = (sum[0] << 24 & 0xff000000) |
310          (sum[1] << 16 & 0xff0000) |
311          (sum[2] << 8 & 0xff00) |
312          sum[3] & 0xff;
313        if (calculated != stored) {
314          long errPos = basePos + data.position() - startDataPos - n;
315          throw new ChecksumException(
316              "Checksum error: "+ fileName + " at "+ errPos +
317              " exp: " + stored + " got: " + calculated, errPos);
318        }
319      }
320    } finally {
321      data.reset();
322      checksums.reset();
323    }
324  }
325  
326  /**
327   * Implementation of chunked verification specifically on byte arrays. This
328   * is to avoid the copy when dealing with ByteBuffers that have array backing.
329   */
330  private void verifyChunkedSums(
331      byte[] data, int dataOff, int dataLen,
332      byte[] checksums, int checksumsOff, String fileName,
333      long basePos) throws ChecksumException {
334    
335    int remaining = dataLen;
336    int dataPos = 0;
337    while (remaining > 0) {
338      int n = Math.min(remaining, bytesPerChecksum);
339      
340      summer.reset();
341      summer.update(data, dataOff + dataPos, n);
342      dataPos += n;
343      remaining -= n;
344      
345      int calculated = (int)summer.getValue();
346      int stored = (checksums[checksumsOff] << 24 & 0xff000000) |
347        (checksums[checksumsOff + 1] << 16 & 0xff0000) |
348        (checksums[checksumsOff + 2] << 8 & 0xff00) |
349        checksums[checksumsOff + 3] & 0xff;
350      checksumsOff += 4;
351      if (calculated != stored) {
352        long errPos = basePos + dataPos - n;
353        throw new ChecksumException(
354            "Checksum error: "+ fileName + " at "+ errPos +
355            " exp: " + stored + " got: " + calculated, errPos);
356      }
357    }
358  }
359
360  /**
361   * Calculate checksums for the given data.
362   * 
363   * The 'mark' of the ByteBuffer parameters may be modified by this function,
364   * but the position is maintained.
365   * 
366   * @param data the DirectByteBuffer pointing to the data to checksum.
367   * @param checksums the DirectByteBuffer into which checksums will be
368   *                  stored. Enough space must be available in this
369   *                  buffer to put the checksums.
370   */
371  public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) {
372    if (type.size == 0) return;
373    
374    if (data.hasArray() && checksums.hasArray()) {
375      calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(),
376          checksums.array(), checksums.arrayOffset() + checksums.position());
377      return;
378    }
379    
380    data.mark();
381    checksums.mark();
382    try {
383      byte[] buf = new byte[bytesPerChecksum];
384      while (data.remaining() > 0) {
385        int n = Math.min(data.remaining(), bytesPerChecksum);
386        data.get(buf, 0, n);
387        summer.reset();
388        summer.update(buf, 0, n);
389        checksums.putInt((int)summer.getValue());
390      }
391    } finally {
392      data.reset();
393      checksums.reset();
394    }
395  }
396
397  /**
398   * Implementation of chunked calculation specifically on byte arrays. This
399   * is to avoid the copy when dealing with ByteBuffers that have array backing.
400   */
401  private void calculateChunkedSums(
402      byte[] data, int dataOffset, int dataLength,
403      byte[] sums, int sumsOffset) {
404
405    int remaining = dataLength;
406    while (remaining > 0) {
407      int n = Math.min(remaining, bytesPerChecksum);
408      summer.reset();
409      summer.update(data, dataOffset, n);
410      dataOffset += n;
411      remaining -= n;
412      long calculated = summer.getValue();
413      sums[sumsOffset++] = (byte) (calculated >> 24);
414      sums[sumsOffset++] = (byte) (calculated >> 16);
415      sums[sumsOffset++] = (byte) (calculated >> 8);
416      sums[sumsOffset++] = (byte) (calculated);
417    }
418  }
419
420  @Override
421  public boolean equals(Object other) {
422    if (!(other instanceof DataChecksum)) {
423      return false;
424    }
425    DataChecksum o = (DataChecksum)other;
426    return o.bytesPerChecksum == this.bytesPerChecksum &&
427      o.type == this.type;
428  }
429  
430  @Override
431  public int hashCode() {
432    return (this.type.id + 31) * this.bytesPerChecksum;
433  }
434  
435  @Override
436  public String toString() {
437    return "DataChecksum(type=" + type +
438      ", chunkSize=" + bytesPerChecksum + ")";
439  }
440  
441  /**
442   * This just provides a dummy implimentation for Checksum class
443   * This is used when there is no checksum available or required for 
444   * data
445   */
446  static class ChecksumNull implements Checksum {
447    
448    public ChecksumNull() {}
449    
450    //Dummy interface
451    @Override
452    public long getValue() { return 0; }
453    @Override
454    public void reset() {}
455    @Override
456    public void update(byte[] b, int off, int len) {}
457    @Override
458    public void update(int b) {}
459  };
460}