001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.util; 020 021import java.io.DataInputStream; 022import java.io.DataOutputStream; 023import java.io.IOException; 024import java.nio.ByteBuffer; 025import java.util.zip.Checksum; 026 027import org.apache.hadoop.classification.InterfaceAudience; 028import org.apache.hadoop.classification.InterfaceStability; 029import org.apache.hadoop.fs.ChecksumException; 030 031/** 032 * This class provides inteface and utilities for processing checksums for 033 * DFS data transfers. 034 */ 035@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 036@InterfaceStability.Evolving 037public class DataChecksum implements Checksum { 038 039 // Misc constants 040 public static final int HEADER_LEN = 5; /// 1 byte type and 4 byte len 041 042 // checksum types 043 public static final int CHECKSUM_NULL = 0; 044 public static final int CHECKSUM_CRC32 = 1; 045 public static final int CHECKSUM_CRC32C = 2; 046 public static final int CHECKSUM_DEFAULT = 3; 047 public static final int CHECKSUM_MIXED = 4; 048 049 /** The checksum types */ 050 public static enum Type { 051 NULL (CHECKSUM_NULL, 0), 052 CRC32 (CHECKSUM_CRC32, 4), 053 CRC32C(CHECKSUM_CRC32C, 4), 054 DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum 055 MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum 056 057 public final int id; 058 public final int size; 059 060 private Type(int id, int size) { 061 this.id = id; 062 this.size = size; 063 } 064 065 /** @return the type corresponding to the id. */ 066 public static Type valueOf(int id) { 067 if (id < 0 || id >= values().length) { 068 throw new IllegalArgumentException("id=" + id 069 + " out of range [0, " + values().length + ")"); 070 } 071 return values()[id]; 072 } 073 } 074 075 076 public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) { 077 if ( bytesPerChecksum <= 0 ) { 078 return null; 079 } 080 081 switch ( type ) { 082 case NULL : 083 return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum ); 084 case CRC32 : 085 return new DataChecksum(type, new PureJavaCrc32(), bytesPerChecksum ); 086 case CRC32C: 087 return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum); 088 default: 089 return null; 090 } 091 } 092 093 /** 094 * Creates a DataChecksum from HEADER_LEN bytes from arr[offset]. 095 * @return DataChecksum of the type in the array or null in case of an error. 096 */ 097 public static DataChecksum newDataChecksum( byte bytes[], int offset ) { 098 if ( offset < 0 || bytes.length < offset + HEADER_LEN ) { 099 return null; 100 } 101 102 // like readInt(): 103 int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) | 104 ( (bytes[offset+2] & 0xff) << 16 ) | 105 ( (bytes[offset+3] & 0xff) << 8 ) | 106 ( (bytes[offset+4] & 0xff) ); 107 return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum ); 108 } 109 110 /** 111 * This constructucts a DataChecksum by reading HEADER_LEN bytes from 112 * input stream <i>in</i> 113 */ 114 public static DataChecksum newDataChecksum( DataInputStream in ) 115 throws IOException { 116 int type = in.readByte(); 117 int bpc = in.readInt(); 118 DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc ); 119 if ( summer == null ) { 120 throw new IOException( "Could not create DataChecksum of type " + 121 type + " with bytesPerChecksum " + bpc ); 122 } 123 return summer; 124 } 125 126 /** 127 * Writes the checksum header to the output stream <i>out</i>. 128 */ 129 public void writeHeader( DataOutputStream out ) 130 throws IOException { 131 out.writeByte( type.id ); 132 out.writeInt( bytesPerChecksum ); 133 } 134 135 public byte[] getHeader() { 136 byte[] header = new byte[DataChecksum.HEADER_LEN]; 137 header[0] = (byte) (type.id & 0xff); 138 // Writing in buffer just like DataOutput.WriteInt() 139 header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff); 140 header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff); 141 header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff); 142 header[1+3] = (byte) (bytesPerChecksum & 0xff); 143 return header; 144 } 145 146 /** 147 * Writes the current checksum to the stream. 148 * If <i>reset</i> is true, then resets the checksum. 149 * @return number of bytes written. Will be equal to getChecksumSize(); 150 */ 151 public int writeValue( DataOutputStream out, boolean reset ) 152 throws IOException { 153 if ( type.size <= 0 ) { 154 return 0; 155 } 156 157 if ( type.size == 4 ) { 158 out.writeInt( (int) summer.getValue() ); 159 } else { 160 throw new IOException( "Unknown Checksum " + type ); 161 } 162 163 if ( reset ) { 164 reset(); 165 } 166 167 return type.size; 168 } 169 170 /** 171 * Writes the current checksum to a buffer. 172 * If <i>reset</i> is true, then resets the checksum. 173 * @return number of bytes written. Will be equal to getChecksumSize(); 174 */ 175 public int writeValue( byte[] buf, int offset, boolean reset ) 176 throws IOException { 177 if ( type.size <= 0 ) { 178 return 0; 179 } 180 181 if ( type.size == 4 ) { 182 int checksum = (int) summer.getValue(); 183 buf[offset+0] = (byte) ((checksum >>> 24) & 0xff); 184 buf[offset+1] = (byte) ((checksum >>> 16) & 0xff); 185 buf[offset+2] = (byte) ((checksum >>> 8) & 0xff); 186 buf[offset+3] = (byte) (checksum & 0xff); 187 } else { 188 throw new IOException( "Unknown Checksum " + type ); 189 } 190 191 if ( reset ) { 192 reset(); 193 } 194 195 return type.size; 196 } 197 198 /** 199 * Compares the checksum located at buf[offset] with the current checksum. 200 * @return true if the checksum matches and false otherwise. 201 */ 202 public boolean compare( byte buf[], int offset ) { 203 if ( type.size == 4 ) { 204 int checksum = ( (buf[offset+0] & 0xff) << 24 ) | 205 ( (buf[offset+1] & 0xff) << 16 ) | 206 ( (buf[offset+2] & 0xff) << 8 ) | 207 ( (buf[offset+3] & 0xff) ); 208 return checksum == (int) summer.getValue(); 209 } 210 return type.size == 0; 211 } 212 213 private final Type type; 214 private final Checksum summer; 215 private final int bytesPerChecksum; 216 private int inSum = 0; 217 218 private DataChecksum( Type type, Checksum checksum, int chunkSize ) { 219 this.type = type; 220 summer = checksum; 221 bytesPerChecksum = chunkSize; 222 } 223 224 // Accessors 225 public Type getChecksumType() { 226 return type; 227 } 228 public int getChecksumSize() { 229 return type.size; 230 } 231 public int getBytesPerChecksum() { 232 return bytesPerChecksum; 233 } 234 public int getNumBytesInSum() { 235 return inSum; 236 } 237 238 public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE; 239 static public int getChecksumHeaderSize() { 240 return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int 241 } 242 //Checksum Interface. Just a wrapper around member summer. 243 @Override 244 public long getValue() { 245 return summer.getValue(); 246 } 247 @Override 248 public void reset() { 249 summer.reset(); 250 inSum = 0; 251 } 252 @Override 253 public void update( byte[] b, int off, int len ) { 254 if ( len > 0 ) { 255 summer.update( b, off, len ); 256 inSum += len; 257 } 258 } 259 @Override 260 public void update( int b ) { 261 summer.update( b ); 262 inSum += 1; 263 } 264 265 /** 266 * Verify that the given checksums match the given data. 267 * 268 * The 'mark' of the ByteBuffer parameters may be modified by this function,. 269 * but the position is maintained. 270 * 271 * @param data the DirectByteBuffer pointing to the data to verify. 272 * @param checksums the DirectByteBuffer pointing to a series of stored 273 * checksums 274 * @param fileName the name of the file being read, for error-reporting 275 * @param basePos the file position to which the start of 'data' corresponds 276 * @throws ChecksumException if the checksums do not match 277 */ 278 public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums, 279 String fileName, long basePos) 280 throws ChecksumException { 281 if (type.size == 0) return; 282 283 if (data.hasArray() && checksums.hasArray()) { 284 verifyChunkedSums( 285 data.array(), data.arrayOffset() + data.position(), data.remaining(), 286 checksums.array(), checksums.arrayOffset() + checksums.position(), 287 fileName, basePos); 288 return; 289 } 290 if (NativeCrc32.isAvailable()) { 291 NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data, 292 fileName, basePos); 293 return; 294 } 295 296 int startDataPos = data.position(); 297 data.mark(); 298 checksums.mark(); 299 try { 300 byte[] buf = new byte[bytesPerChecksum]; 301 byte[] sum = new byte[type.size]; 302 while (data.remaining() > 0) { 303 int n = Math.min(data.remaining(), bytesPerChecksum); 304 checksums.get(sum); 305 data.get(buf, 0, n); 306 summer.reset(); 307 summer.update(buf, 0, n); 308 int calculated = (int)summer.getValue(); 309 int stored = (sum[0] << 24 & 0xff000000) | 310 (sum[1] << 16 & 0xff0000) | 311 (sum[2] << 8 & 0xff00) | 312 sum[3] & 0xff; 313 if (calculated != stored) { 314 long errPos = basePos + data.position() - startDataPos - n; 315 throw new ChecksumException( 316 "Checksum error: "+ fileName + " at "+ errPos + 317 " exp: " + stored + " got: " + calculated, errPos); 318 } 319 } 320 } finally { 321 data.reset(); 322 checksums.reset(); 323 } 324 } 325 326 /** 327 * Implementation of chunked verification specifically on byte arrays. This 328 * is to avoid the copy when dealing with ByteBuffers that have array backing. 329 */ 330 private void verifyChunkedSums( 331 byte[] data, int dataOff, int dataLen, 332 byte[] checksums, int checksumsOff, String fileName, 333 long basePos) throws ChecksumException { 334 335 int remaining = dataLen; 336 int dataPos = 0; 337 while (remaining > 0) { 338 int n = Math.min(remaining, bytesPerChecksum); 339 340 summer.reset(); 341 summer.update(data, dataOff + dataPos, n); 342 dataPos += n; 343 remaining -= n; 344 345 int calculated = (int)summer.getValue(); 346 int stored = (checksums[checksumsOff] << 24 & 0xff000000) | 347 (checksums[checksumsOff + 1] << 16 & 0xff0000) | 348 (checksums[checksumsOff + 2] << 8 & 0xff00) | 349 checksums[checksumsOff + 3] & 0xff; 350 checksumsOff += 4; 351 if (calculated != stored) { 352 long errPos = basePos + dataPos - n; 353 throw new ChecksumException( 354 "Checksum error: "+ fileName + " at "+ errPos + 355 " exp: " + stored + " got: " + calculated, errPos); 356 } 357 } 358 } 359 360 /** 361 * Calculate checksums for the given data. 362 * 363 * The 'mark' of the ByteBuffer parameters may be modified by this function, 364 * but the position is maintained. 365 * 366 * @param data the DirectByteBuffer pointing to the data to checksum. 367 * @param checksums the DirectByteBuffer into which checksums will be 368 * stored. Enough space must be available in this 369 * buffer to put the checksums. 370 */ 371 public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) { 372 if (type.size == 0) return; 373 374 if (data.hasArray() && checksums.hasArray()) { 375 calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(), 376 checksums.array(), checksums.arrayOffset() + checksums.position()); 377 return; 378 } 379 380 data.mark(); 381 checksums.mark(); 382 try { 383 byte[] buf = new byte[bytesPerChecksum]; 384 while (data.remaining() > 0) { 385 int n = Math.min(data.remaining(), bytesPerChecksum); 386 data.get(buf, 0, n); 387 summer.reset(); 388 summer.update(buf, 0, n); 389 checksums.putInt((int)summer.getValue()); 390 } 391 } finally { 392 data.reset(); 393 checksums.reset(); 394 } 395 } 396 397 /** 398 * Implementation of chunked calculation specifically on byte arrays. This 399 * is to avoid the copy when dealing with ByteBuffers that have array backing. 400 */ 401 private void calculateChunkedSums( 402 byte[] data, int dataOffset, int dataLength, 403 byte[] sums, int sumsOffset) { 404 405 int remaining = dataLength; 406 while (remaining > 0) { 407 int n = Math.min(remaining, bytesPerChecksum); 408 summer.reset(); 409 summer.update(data, dataOffset, n); 410 dataOffset += n; 411 remaining -= n; 412 long calculated = summer.getValue(); 413 sums[sumsOffset++] = (byte) (calculated >> 24); 414 sums[sumsOffset++] = (byte) (calculated >> 16); 415 sums[sumsOffset++] = (byte) (calculated >> 8); 416 sums[sumsOffset++] = (byte) (calculated); 417 } 418 } 419 420 @Override 421 public boolean equals(Object other) { 422 if (!(other instanceof DataChecksum)) { 423 return false; 424 } 425 DataChecksum o = (DataChecksum)other; 426 return o.bytesPerChecksum == this.bytesPerChecksum && 427 o.type == this.type; 428 } 429 430 @Override 431 public int hashCode() { 432 return (this.type.id + 31) * this.bytesPerChecksum; 433 } 434 435 @Override 436 public String toString() { 437 return "DataChecksum(type=" + type + 438 ", chunkSize=" + bytesPerChecksum + ")"; 439 } 440 441 /** 442 * This just provides a dummy implimentation for Checksum class 443 * This is used when there is no checksum available or required for 444 * data 445 */ 446 static class ChecksumNull implements Checksum { 447 448 public ChecksumNull() {} 449 450 //Dummy interface 451 @Override 452 public long getValue() { return 0; } 453 @Override 454 public void reset() {} 455 @Override 456 public void update(byte[] b, int off, int len) {} 457 @Override 458 public void update(int b) {} 459 }; 460}