001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.util; 020 021import java.io.DataInputStream; 022import java.io.DataOutputStream; 023import java.io.IOException; 024import java.nio.ByteBuffer; 025import java.util.zip.CRC32; 026import java.util.zip.Checksum; 027 028import org.apache.hadoop.classification.InterfaceAudience; 029import org.apache.hadoop.classification.InterfaceStability; 030import org.apache.hadoop.fs.ChecksumException; 031 032/** 033 * This class provides interface and utilities for processing checksums for 034 * DFS data transfers. 035 */ 036@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 037@InterfaceStability.Evolving 038public class DataChecksum implements Checksum { 039 040 // checksum types 041 public static final int CHECKSUM_NULL = 0; 042 public static final int CHECKSUM_CRC32 = 1; 043 public static final int CHECKSUM_CRC32C = 2; 044 public static final int CHECKSUM_DEFAULT = 3; 045 public static final int CHECKSUM_MIXED = 4; 046 047 /** The checksum types */ 048 public static enum Type { 049 NULL (CHECKSUM_NULL, 0), 050 CRC32 (CHECKSUM_CRC32, 4), 051 CRC32C(CHECKSUM_CRC32C, 4), 052 DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum 053 MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum 054 055 public final int id; 056 public final int size; 057 058 private Type(int id, int size) { 059 this.id = id; 060 this.size = size; 061 } 062 063 /** @return the type corresponding to the id. */ 064 public static Type valueOf(int id) { 065 if (id < 0 || id >= values().length) { 066 throw new IllegalArgumentException("id=" + id 067 + " out of range [0, " + values().length + ")"); 068 } 069 return values()[id]; 070 } 071 } 072 073 /** 074 * Create a Crc32 Checksum object. The implementation of the Crc32 algorithm 075 * is chosen depending on the platform. 076 */ 077 public static Checksum newCrc32() { 078 //return Shell.isJava7OrAbove()? new CRC32(): new PureJavaCrc32(); 079 // Reverting to usage of PureJavaCrc32 (new version as of 2.5.1). No use for native CRC32 080 return new PureJavaCrc32(); 081 } 082 083 public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) { 084 if ( bytesPerChecksum <= 0 ) { 085 return null; 086 } 087 088 switch ( type ) { 089 case NULL : 090 return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum ); 091 case CRC32 : 092 return new DataChecksum(type, newCrc32(), bytesPerChecksum ); 093 case CRC32C: 094 return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum); 095 default: 096 return null; 097 } 098 } 099 100 /** 101 * Creates a DataChecksum from HEADER_LEN bytes from arr[offset]. 102 * @return DataChecksum of the type in the array or null in case of an error. 103 */ 104 public static DataChecksum newDataChecksum( byte bytes[], int offset ) { 105 if (offset < 0 || bytes.length < offset + getChecksumHeaderSize()) { 106 return null; 107 } 108 109 // like readInt(): 110 int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) | 111 ( (bytes[offset+2] & 0xff) << 16 ) | 112 ( (bytes[offset+3] & 0xff) << 8 ) | 113 ( (bytes[offset+4] & 0xff) ); 114 return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum ); 115 } 116 117 /** 118 * This constructs a DataChecksum by reading HEADER_LEN bytes from input 119 * stream <i>in</i> 120 */ 121 public static DataChecksum newDataChecksum( DataInputStream in ) 122 throws IOException { 123 int type = in.readByte(); 124 int bpc = in.readInt(); 125 DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc ); 126 if ( summer == null ) { 127 throw new IOException( "Could not create DataChecksum of type " + 128 type + " with bytesPerChecksum " + bpc ); 129 } 130 return summer; 131 } 132 133 /** 134 * Writes the checksum header to the output stream <i>out</i>. 135 */ 136 public void writeHeader( DataOutputStream out ) 137 throws IOException { 138 out.writeByte( type.id ); 139 out.writeInt( bytesPerChecksum ); 140 } 141 142 public byte[] getHeader() { 143 byte[] header = new byte[getChecksumHeaderSize()]; 144 header[0] = (byte) (type.id & 0xff); 145 // Writing in buffer just like DataOutput.WriteInt() 146 header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff); 147 header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff); 148 header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff); 149 header[1+3] = (byte) (bytesPerChecksum & 0xff); 150 return header; 151 } 152 153 /** 154 * Writes the current checksum to the stream. 155 * If <i>reset</i> is true, then resets the checksum. 156 * @return number of bytes written. Will be equal to getChecksumSize(); 157 */ 158 public int writeValue( DataOutputStream out, boolean reset ) 159 throws IOException { 160 if ( type.size <= 0 ) { 161 return 0; 162 } 163 164 if ( type.size == 4 ) { 165 out.writeInt( (int) summer.getValue() ); 166 } else { 167 throw new IOException( "Unknown Checksum " + type ); 168 } 169 170 if ( reset ) { 171 reset(); 172 } 173 174 return type.size; 175 } 176 177 /** 178 * Writes the current checksum to a buffer. 179 * If <i>reset</i> is true, then resets the checksum. 180 * @return number of bytes written. Will be equal to getChecksumSize(); 181 */ 182 public int writeValue( byte[] buf, int offset, boolean reset ) 183 throws IOException { 184 if ( type.size <= 0 ) { 185 return 0; 186 } 187 188 if ( type.size == 4 ) { 189 int checksum = (int) summer.getValue(); 190 buf[offset+0] = (byte) ((checksum >>> 24) & 0xff); 191 buf[offset+1] = (byte) ((checksum >>> 16) & 0xff); 192 buf[offset+2] = (byte) ((checksum >>> 8) & 0xff); 193 buf[offset+3] = (byte) (checksum & 0xff); 194 } else { 195 throw new IOException( "Unknown Checksum " + type ); 196 } 197 198 if ( reset ) { 199 reset(); 200 } 201 202 return type.size; 203 } 204 205 /** 206 * Compares the checksum located at buf[offset] with the current checksum. 207 * @return true if the checksum matches and false otherwise. 208 */ 209 public boolean compare( byte buf[], int offset ) { 210 if ( type.size == 4 ) { 211 int checksum = ( (buf[offset+0] & 0xff) << 24 ) | 212 ( (buf[offset+1] & 0xff) << 16 ) | 213 ( (buf[offset+2] & 0xff) << 8 ) | 214 ( (buf[offset+3] & 0xff) ); 215 return checksum == (int) summer.getValue(); 216 } 217 return type.size == 0; 218 } 219 220 private final Type type; 221 private final Checksum summer; 222 private final int bytesPerChecksum; 223 private int inSum = 0; 224 225 private DataChecksum( Type type, Checksum checksum, int chunkSize ) { 226 this.type = type; 227 summer = checksum; 228 bytesPerChecksum = chunkSize; 229 } 230 231 /** @return the checksum algorithm type. */ 232 public Type getChecksumType() { 233 return type; 234 } 235 /** @return the size for a checksum. */ 236 public int getChecksumSize() { 237 return type.size; 238 } 239 /** @return the required checksum size given the data length. */ 240 public int getChecksumSize(int dataSize) { 241 return ((dataSize - 1)/getBytesPerChecksum() + 1) * getChecksumSize(); 242 } 243 public int getBytesPerChecksum() { 244 return bytesPerChecksum; 245 } 246 public int getNumBytesInSum() { 247 return inSum; 248 } 249 250 public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE; 251 static public int getChecksumHeaderSize() { 252 return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int 253 } 254 //Checksum Interface. Just a wrapper around member summer. 255 @Override 256 public long getValue() { 257 return summer.getValue(); 258 } 259 @Override 260 public void reset() { 261 summer.reset(); 262 inSum = 0; 263 } 264 @Override 265 public void update( byte[] b, int off, int len ) { 266 if ( len > 0 ) { 267 summer.update( b, off, len ); 268 inSum += len; 269 } 270 } 271 @Override 272 public void update( int b ) { 273 summer.update( b ); 274 inSum += 1; 275 } 276 277 /** 278 * Verify that the given checksums match the given data. 279 * 280 * The 'mark' of the ByteBuffer parameters may be modified by this function,. 281 * but the position is maintained. 282 * 283 * @param data the DirectByteBuffer pointing to the data to verify. 284 * @param checksums the DirectByteBuffer pointing to a series of stored 285 * checksums 286 * @param fileName the name of the file being read, for error-reporting 287 * @param basePos the file position to which the start of 'data' corresponds 288 * @throws ChecksumException if the checksums do not match 289 */ 290 public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums, 291 String fileName, long basePos) 292 throws ChecksumException { 293 if (type.size == 0) return; 294 295 if (data.hasArray() && checksums.hasArray()) { 296 verifyChunkedSums( 297 data.array(), data.arrayOffset() + data.position(), data.remaining(), 298 checksums.array(), checksums.arrayOffset() + checksums.position(), 299 fileName, basePos); 300 return; 301 } 302 if (NativeCrc32.isAvailable()) { 303 NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data, 304 fileName, basePos); 305 return; 306 } 307 308 int startDataPos = data.position(); 309 data.mark(); 310 checksums.mark(); 311 try { 312 byte[] buf = new byte[bytesPerChecksum]; 313 byte[] sum = new byte[type.size]; 314 while (data.remaining() > 0) { 315 int n = Math.min(data.remaining(), bytesPerChecksum); 316 checksums.get(sum); 317 data.get(buf, 0, n); 318 summer.reset(); 319 summer.update(buf, 0, n); 320 int calculated = (int)summer.getValue(); 321 int stored = (sum[0] << 24 & 0xff000000) | 322 (sum[1] << 16 & 0xff0000) | 323 (sum[2] << 8 & 0xff00) | 324 sum[3] & 0xff; 325 if (calculated != stored) { 326 long errPos = basePos + data.position() - startDataPos - n; 327 throw new ChecksumException( 328 "Checksum error: "+ fileName + " at "+ errPos + 329 " exp: " + stored + " got: " + calculated, errPos); 330 } 331 } 332 } finally { 333 data.reset(); 334 checksums.reset(); 335 } 336 } 337 338 /** 339 * Implementation of chunked verification specifically on byte arrays. This 340 * is to avoid the copy when dealing with ByteBuffers that have array backing. 341 */ 342 private void verifyChunkedSums( 343 byte[] data, int dataOff, int dataLen, 344 byte[] checksums, int checksumsOff, String fileName, 345 long basePos) throws ChecksumException { 346 if (type.size == 0) return; 347 348 if (NativeCrc32.isAvailable()) { 349 NativeCrc32.verifyChunkedSumsByteArray(bytesPerChecksum, type.id, 350 checksums, checksumsOff, data, dataOff, dataLen, fileName, basePos); 351 return; 352 } 353 354 int remaining = dataLen; 355 int dataPos = 0; 356 while (remaining > 0) { 357 int n = Math.min(remaining, bytesPerChecksum); 358 359 summer.reset(); 360 summer.update(data, dataOff + dataPos, n); 361 dataPos += n; 362 remaining -= n; 363 364 int calculated = (int)summer.getValue(); 365 int stored = (checksums[checksumsOff] << 24 & 0xff000000) | 366 (checksums[checksumsOff + 1] << 16 & 0xff0000) | 367 (checksums[checksumsOff + 2] << 8 & 0xff00) | 368 checksums[checksumsOff + 3] & 0xff; 369 checksumsOff += 4; 370 if (calculated != stored) { 371 long errPos = basePos + dataPos - n; 372 throw new ChecksumException( 373 "Checksum error: "+ fileName + " at "+ errPos + 374 " exp: " + stored + " got: " + calculated, errPos); 375 } 376 } 377 } 378 379 /** 380 * Calculate checksums for the given data. 381 * 382 * The 'mark' of the ByteBuffer parameters may be modified by this function, 383 * but the position is maintained. 384 * 385 * @param data the DirectByteBuffer pointing to the data to checksum. 386 * @param checksums the DirectByteBuffer into which checksums will be 387 * stored. Enough space must be available in this 388 * buffer to put the checksums. 389 */ 390 public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) { 391 if (type.size == 0) return; 392 393 if (data.hasArray() && checksums.hasArray()) { 394 calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(), 395 checksums.array(), checksums.arrayOffset() + checksums.position()); 396 return; 397 } 398 399 if (NativeCrc32.isAvailable()) { 400 NativeCrc32.calculateChunkedSums(bytesPerChecksum, type.id, 401 checksums, data); 402 return; 403 } 404 405 data.mark(); 406 checksums.mark(); 407 try { 408 byte[] buf = new byte[bytesPerChecksum]; 409 while (data.remaining() > 0) { 410 int n = Math.min(data.remaining(), bytesPerChecksum); 411 data.get(buf, 0, n); 412 summer.reset(); 413 summer.update(buf, 0, n); 414 checksums.putInt((int)summer.getValue()); 415 } 416 } finally { 417 data.reset(); 418 checksums.reset(); 419 } 420 } 421 422 /** 423 * Implementation of chunked calculation specifically on byte arrays. This 424 * is to avoid the copy when dealing with ByteBuffers that have array backing. 425 */ 426 public void calculateChunkedSums( 427 byte[] data, int dataOffset, int dataLength, 428 byte[] sums, int sumsOffset) { 429 if (type.size == 0) return; 430 431 if (NativeCrc32.isAvailable()) { 432 NativeCrc32.calculateChunkedSumsByteArray(bytesPerChecksum, type.id, 433 sums, sumsOffset, data, dataOffset, dataLength); 434 return; 435 } 436 437 int remaining = dataLength; 438 while (remaining > 0) { 439 int n = Math.min(remaining, bytesPerChecksum); 440 summer.reset(); 441 summer.update(data, dataOffset, n); 442 dataOffset += n; 443 remaining -= n; 444 long calculated = summer.getValue(); 445 sums[sumsOffset++] = (byte) (calculated >> 24); 446 sums[sumsOffset++] = (byte) (calculated >> 16); 447 sums[sumsOffset++] = (byte) (calculated >> 8); 448 sums[sumsOffset++] = (byte) (calculated); 449 } 450 } 451 452 @Override 453 public boolean equals(Object other) { 454 if (!(other instanceof DataChecksum)) { 455 return false; 456 } 457 DataChecksum o = (DataChecksum)other; 458 return o.bytesPerChecksum == this.bytesPerChecksum && 459 o.type == this.type; 460 } 461 462 @Override 463 public int hashCode() { 464 return (this.type.id + 31) * this.bytesPerChecksum; 465 } 466 467 @Override 468 public String toString() { 469 return "DataChecksum(type=" + type + 470 ", chunkSize=" + bytesPerChecksum + ")"; 471 } 472 473 /** 474 * This just provides a dummy implimentation for Checksum class 475 * This is used when there is no checksum available or required for 476 * data 477 */ 478 static class ChecksumNull implements Checksum { 479 480 public ChecksumNull() {} 481 482 //Dummy interface 483 @Override 484 public long getValue() { return 0; } 485 @Override 486 public void reset() {} 487 @Override 488 public void update(byte[] b, int off, int len) {} 489 @Override 490 public void update(int b) {} 491 }; 492}