001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.util; 020 021import java.io.DataInputStream; 022import java.io.DataOutputStream; 023import java.io.IOException; 024import java.nio.ByteBuffer; 025import java.util.zip.CRC32; 026import java.util.zip.Checksum; 027 028import org.apache.hadoop.classification.InterfaceAudience; 029import org.apache.hadoop.classification.InterfaceStability; 030import org.apache.hadoop.fs.ChecksumException; 031 032/** 033 * This class provides inteface and utilities for processing checksums for 034 * DFS data transfers. 035 */ 036@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 037@InterfaceStability.Evolving 038public class DataChecksum implements Checksum { 039 040 // Misc constants 041 public static final int HEADER_LEN = 5; /// 1 byte type and 4 byte len 042 043 // checksum types 044 public static final int CHECKSUM_NULL = 0; 045 public static final int CHECKSUM_CRC32 = 1; 046 public static final int CHECKSUM_CRC32C = 2; 047 public static final int CHECKSUM_DEFAULT = 3; 048 public static final int CHECKSUM_MIXED = 4; 049 050 /** The checksum types */ 051 public static enum Type { 052 NULL (CHECKSUM_NULL, 0), 053 CRC32 (CHECKSUM_CRC32, 4), 054 CRC32C(CHECKSUM_CRC32C, 4), 055 DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum 056 MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum 057 058 public final int id; 059 public final int size; 060 061 private Type(int id, int size) { 062 this.id = id; 063 this.size = size; 064 } 065 066 /** @return the type corresponding to the id. */ 067 public static Type valueOf(int id) { 068 if (id < 0 || id >= values().length) { 069 throw new IllegalArgumentException("id=" + id 070 + " out of range [0, " + values().length + ")"); 071 } 072 return values()[id]; 073 } 074 } 075 076 /** 077 * Create a Crc32 Checksum object. The implementation of the Crc32 algorithm 078 * is chosen depending on the platform. 079 */ 080 public static Checksum newCrc32() { 081 //return Shell.isJava7OrAbove()? new CRC32(): new PureJavaCrc32(); 082 // Reverting to usage of PureJavaCrc32 (new version as of 2.5.1). No use for native CRC32 083 return new PureJavaCrc32(); 084 } 085 086 public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) { 087 if ( bytesPerChecksum <= 0 ) { 088 return null; 089 } 090 091 switch ( type ) { 092 case NULL : 093 return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum ); 094 case CRC32 : 095 return new DataChecksum(type, newCrc32(), bytesPerChecksum ); 096 case CRC32C: 097 return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum); 098 default: 099 return null; 100 } 101 } 102 103 /** 104 * Creates a DataChecksum from HEADER_LEN bytes from arr[offset]. 105 * @return DataChecksum of the type in the array or null in case of an error. 106 */ 107 public static DataChecksum newDataChecksum( byte bytes[], int offset ) { 108 if ( offset < 0 || bytes.length < offset + HEADER_LEN ) { 109 return null; 110 } 111 112 // like readInt(): 113 int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) | 114 ( (bytes[offset+2] & 0xff) << 16 ) | 115 ( (bytes[offset+3] & 0xff) << 8 ) | 116 ( (bytes[offset+4] & 0xff) ); 117 return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum ); 118 } 119 120 /** 121 * This constructucts a DataChecksum by reading HEADER_LEN bytes from 122 * input stream <i>in</i> 123 */ 124 public static DataChecksum newDataChecksum( DataInputStream in ) 125 throws IOException { 126 int type = in.readByte(); 127 int bpc = in.readInt(); 128 DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc ); 129 if ( summer == null ) { 130 throw new IOException( "Could not create DataChecksum of type " + 131 type + " with bytesPerChecksum " + bpc ); 132 } 133 return summer; 134 } 135 136 /** 137 * Writes the checksum header to the output stream <i>out</i>. 138 */ 139 public void writeHeader( DataOutputStream out ) 140 throws IOException { 141 out.writeByte( type.id ); 142 out.writeInt( bytesPerChecksum ); 143 } 144 145 public byte[] getHeader() { 146 byte[] header = new byte[DataChecksum.HEADER_LEN]; 147 header[0] = (byte) (type.id & 0xff); 148 // Writing in buffer just like DataOutput.WriteInt() 149 header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff); 150 header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff); 151 header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff); 152 header[1+3] = (byte) (bytesPerChecksum & 0xff); 153 return header; 154 } 155 156 /** 157 * Writes the current checksum to the stream. 158 * If <i>reset</i> is true, then resets the checksum. 159 * @return number of bytes written. Will be equal to getChecksumSize(); 160 */ 161 public int writeValue( DataOutputStream out, boolean reset ) 162 throws IOException { 163 if ( type.size <= 0 ) { 164 return 0; 165 } 166 167 if ( type.size == 4 ) { 168 out.writeInt( (int) summer.getValue() ); 169 } else { 170 throw new IOException( "Unknown Checksum " + type ); 171 } 172 173 if ( reset ) { 174 reset(); 175 } 176 177 return type.size; 178 } 179 180 /** 181 * Writes the current checksum to a buffer. 182 * If <i>reset</i> is true, then resets the checksum. 183 * @return number of bytes written. Will be equal to getChecksumSize(); 184 */ 185 public int writeValue( byte[] buf, int offset, boolean reset ) 186 throws IOException { 187 if ( type.size <= 0 ) { 188 return 0; 189 } 190 191 if ( type.size == 4 ) { 192 int checksum = (int) summer.getValue(); 193 buf[offset+0] = (byte) ((checksum >>> 24) & 0xff); 194 buf[offset+1] = (byte) ((checksum >>> 16) & 0xff); 195 buf[offset+2] = (byte) ((checksum >>> 8) & 0xff); 196 buf[offset+3] = (byte) (checksum & 0xff); 197 } else { 198 throw new IOException( "Unknown Checksum " + type ); 199 } 200 201 if ( reset ) { 202 reset(); 203 } 204 205 return type.size; 206 } 207 208 /** 209 * Compares the checksum located at buf[offset] with the current checksum. 210 * @return true if the checksum matches and false otherwise. 211 */ 212 public boolean compare( byte buf[], int offset ) { 213 if ( type.size == 4 ) { 214 int checksum = ( (buf[offset+0] & 0xff) << 24 ) | 215 ( (buf[offset+1] & 0xff) << 16 ) | 216 ( (buf[offset+2] & 0xff) << 8 ) | 217 ( (buf[offset+3] & 0xff) ); 218 return checksum == (int) summer.getValue(); 219 } 220 return type.size == 0; 221 } 222 223 private final Type type; 224 private final Checksum summer; 225 private final int bytesPerChecksum; 226 private int inSum = 0; 227 228 private DataChecksum( Type type, Checksum checksum, int chunkSize ) { 229 this.type = type; 230 summer = checksum; 231 bytesPerChecksum = chunkSize; 232 } 233 234 // Accessors 235 public Type getChecksumType() { 236 return type; 237 } 238 public int getChecksumSize() { 239 return type.size; 240 } 241 public int getBytesPerChecksum() { 242 return bytesPerChecksum; 243 } 244 public int getNumBytesInSum() { 245 return inSum; 246 } 247 248 public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE; 249 static public int getChecksumHeaderSize() { 250 return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int 251 } 252 //Checksum Interface. Just a wrapper around member summer. 253 @Override 254 public long getValue() { 255 return summer.getValue(); 256 } 257 @Override 258 public void reset() { 259 summer.reset(); 260 inSum = 0; 261 } 262 @Override 263 public void update( byte[] b, int off, int len ) { 264 if ( len > 0 ) { 265 summer.update( b, off, len ); 266 inSum += len; 267 } 268 } 269 @Override 270 public void update( int b ) { 271 summer.update( b ); 272 inSum += 1; 273 } 274 275 /** 276 * Verify that the given checksums match the given data. 277 * 278 * The 'mark' of the ByteBuffer parameters may be modified by this function,. 279 * but the position is maintained. 280 * 281 * @param data the DirectByteBuffer pointing to the data to verify. 282 * @param checksums the DirectByteBuffer pointing to a series of stored 283 * checksums 284 * @param fileName the name of the file being read, for error-reporting 285 * @param basePos the file position to which the start of 'data' corresponds 286 * @throws ChecksumException if the checksums do not match 287 */ 288 public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums, 289 String fileName, long basePos) 290 throws ChecksumException { 291 if (type.size == 0) return; 292 293 if (data.hasArray() && checksums.hasArray()) { 294 verifyChunkedSums( 295 data.array(), data.arrayOffset() + data.position(), data.remaining(), 296 checksums.array(), checksums.arrayOffset() + checksums.position(), 297 fileName, basePos); 298 return; 299 } 300 if (NativeCrc32.isAvailable()) { 301 NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data, 302 fileName, basePos); 303 return; 304 } 305 306 int startDataPos = data.position(); 307 data.mark(); 308 checksums.mark(); 309 try { 310 byte[] buf = new byte[bytesPerChecksum]; 311 byte[] sum = new byte[type.size]; 312 while (data.remaining() > 0) { 313 int n = Math.min(data.remaining(), bytesPerChecksum); 314 checksums.get(sum); 315 data.get(buf, 0, n); 316 summer.reset(); 317 summer.update(buf, 0, n); 318 int calculated = (int)summer.getValue(); 319 int stored = (sum[0] << 24 & 0xff000000) | 320 (sum[1] << 16 & 0xff0000) | 321 (sum[2] << 8 & 0xff00) | 322 sum[3] & 0xff; 323 if (calculated != stored) { 324 long errPos = basePos + data.position() - startDataPos - n; 325 throw new ChecksumException( 326 "Checksum error: "+ fileName + " at "+ errPos + 327 " exp: " + stored + " got: " + calculated, errPos); 328 } 329 } 330 } finally { 331 data.reset(); 332 checksums.reset(); 333 } 334 } 335 336 /** 337 * Implementation of chunked verification specifically on byte arrays. This 338 * is to avoid the copy when dealing with ByteBuffers that have array backing. 339 */ 340 private void verifyChunkedSums( 341 byte[] data, int dataOff, int dataLen, 342 byte[] checksums, int checksumsOff, String fileName, 343 long basePos) throws ChecksumException { 344 345 int remaining = dataLen; 346 int dataPos = 0; 347 while (remaining > 0) { 348 int n = Math.min(remaining, bytesPerChecksum); 349 350 summer.reset(); 351 summer.update(data, dataOff + dataPos, n); 352 dataPos += n; 353 remaining -= n; 354 355 int calculated = (int)summer.getValue(); 356 int stored = (checksums[checksumsOff] << 24 & 0xff000000) | 357 (checksums[checksumsOff + 1] << 16 & 0xff0000) | 358 (checksums[checksumsOff + 2] << 8 & 0xff00) | 359 checksums[checksumsOff + 3] & 0xff; 360 checksumsOff += 4; 361 if (calculated != stored) { 362 long errPos = basePos + dataPos - n; 363 throw new ChecksumException( 364 "Checksum error: "+ fileName + " at "+ errPos + 365 " exp: " + stored + " got: " + calculated, errPos); 366 } 367 } 368 } 369 370 /** 371 * Calculate checksums for the given data. 372 * 373 * The 'mark' of the ByteBuffer parameters may be modified by this function, 374 * but the position is maintained. 375 * 376 * @param data the DirectByteBuffer pointing to the data to checksum. 377 * @param checksums the DirectByteBuffer into which checksums will be 378 * stored. Enough space must be available in this 379 * buffer to put the checksums. 380 */ 381 public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) { 382 if (type.size == 0) return; 383 384 if (data.hasArray() && checksums.hasArray()) { 385 calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(), 386 checksums.array(), checksums.arrayOffset() + checksums.position()); 387 return; 388 } 389 390 data.mark(); 391 checksums.mark(); 392 try { 393 byte[] buf = new byte[bytesPerChecksum]; 394 while (data.remaining() > 0) { 395 int n = Math.min(data.remaining(), bytesPerChecksum); 396 data.get(buf, 0, n); 397 summer.reset(); 398 summer.update(buf, 0, n); 399 checksums.putInt((int)summer.getValue()); 400 } 401 } finally { 402 data.reset(); 403 checksums.reset(); 404 } 405 } 406 407 /** 408 * Implementation of chunked calculation specifically on byte arrays. This 409 * is to avoid the copy when dealing with ByteBuffers that have array backing. 410 */ 411 private void calculateChunkedSums( 412 byte[] data, int dataOffset, int dataLength, 413 byte[] sums, int sumsOffset) { 414 415 int remaining = dataLength; 416 while (remaining > 0) { 417 int n = Math.min(remaining, bytesPerChecksum); 418 summer.reset(); 419 summer.update(data, dataOffset, n); 420 dataOffset += n; 421 remaining -= n; 422 long calculated = summer.getValue(); 423 sums[sumsOffset++] = (byte) (calculated >> 24); 424 sums[sumsOffset++] = (byte) (calculated >> 16); 425 sums[sumsOffset++] = (byte) (calculated >> 8); 426 sums[sumsOffset++] = (byte) (calculated); 427 } 428 } 429 430 @Override 431 public boolean equals(Object other) { 432 if (!(other instanceof DataChecksum)) { 433 return false; 434 } 435 DataChecksum o = (DataChecksum)other; 436 return o.bytesPerChecksum == this.bytesPerChecksum && 437 o.type == this.type; 438 } 439 440 @Override 441 public int hashCode() { 442 return (this.type.id + 31) * this.bytesPerChecksum; 443 } 444 445 @Override 446 public String toString() { 447 return "DataChecksum(type=" + type + 448 ", chunkSize=" + bytesPerChecksum + ")"; 449 } 450 451 /** 452 * This just provides a dummy implimentation for Checksum class 453 * This is used when there is no checksum available or required for 454 * data 455 */ 456 static class ChecksumNull implements Checksum { 457 458 public ChecksumNull() {} 459 460 //Dummy interface 461 @Override 462 public long getValue() { return 0; } 463 @Override 464 public void reset() {} 465 @Override 466 public void update(byte[] b, int off, int len) {} 467 @Override 468 public void update(int b) {} 469 }; 470}