001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import static org.apache.hadoop.hdfs.server.namenode.FSImageFormat.renameReservedPathsOnUpgrade;
021import static org.apache.hadoop.util.Time.now;
022
023import java.io.FilterInputStream;
024import java.io.IOException;
025import java.io.InputStream;
026import java.util.Arrays;
027import java.util.EnumMap;
028import java.util.List;
029
030import org.apache.commons.logging.Log;
031import org.apache.commons.logging.LogFactory;
032import org.apache.hadoop.classification.InterfaceAudience;
033import org.apache.hadoop.classification.InterfaceStability;
034import org.apache.hadoop.fs.FileSystem;
035import org.apache.hadoop.hdfs.protocol.Block;
036import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
037import org.apache.hadoop.hdfs.protocol.HdfsConstants;
038import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
039import org.apache.hadoop.hdfs.protocol.LayoutVersion;
040import org.apache.hadoop.hdfs.protocol.LocatedBlock;
041import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
042import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
043import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
044import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
045import org.apache.hadoop.hdfs.server.common.Storage;
046import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddBlockOp;
047import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCacheDirectiveInfoOp;
048import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCachePoolOp;
049import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCloseOp;
050import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllocateBlockIdOp;
051import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllowSnapshotOp;
052import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.BlockListUpdatingOp;
053import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp;
054import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ClearNSQuotaOp;
055import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ConcatDeleteOp;
056import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CreateSnapshotOp;
057import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp;
058import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteSnapshotOp;
059import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DisallowSnapshotOp;
060import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.GetDelegationTokenOp;
061import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
062import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCacheDirectiveInfoOp;
063import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCachePoolOp;
064import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ReassignLeaseOp;
065import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCacheDirectiveInfoOp;
066import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCachePoolOp;
067import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOldOp;
068import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOp;
069import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameSnapshotOp;
070import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenewDelegationTokenOp;
071import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetAclOp;
072import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RollingUpgradeOp;
073import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV1Op;
074import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV2Op;
075import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetNSQuotaOp;
076import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetOwnerOp;
077import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetPermissionsOp;
078import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp;
079import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp;
080import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp;
081import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp;
082import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp;
083import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp;
084import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
085import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
086import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
087import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
088import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
089import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
090import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
091import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
092import org.apache.hadoop.hdfs.util.ChunkedArrayList;
093import org.apache.hadoop.hdfs.util.Holder;
094
095import com.google.common.base.Joiner;
096import com.google.common.base.Preconditions;
097
098@InterfaceAudience.Private
099@InterfaceStability.Evolving
100public class FSEditLogLoader {
101  static final Log LOG = LogFactory.getLog(FSEditLogLoader.class.getName());
102  static final long REPLAY_TRANSACTION_LOG_INTERVAL = 1000; // 1sec
103
104  private final FSNamesystem fsNamesys;
105  private long lastAppliedTxId;
106  /** Total number of end transactions loaded. */
107  private int totalEdits = 0;
108  
109  public FSEditLogLoader(FSNamesystem fsNamesys, long lastAppliedTxId) {
110    this.fsNamesys = fsNamesys;
111    this.lastAppliedTxId = lastAppliedTxId;
112  }
113  
114  long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId)
115      throws IOException {
116    return loadFSEdits(edits, expectedStartingTxId, null, null);
117  }
118
119  /**
120   * Load an edit log, and apply the changes to the in-memory structure
121   * This is where we apply edits that we've been writing to disk all
122   * along.
123   */
124  long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId,
125      StartupOption startOpt, MetaRecoveryContext recovery) throws IOException {
126    StartupProgress prog = NameNode.getStartupProgress();
127    Step step = createStartupProgressStep(edits);
128    prog.beginStep(Phase.LOADING_EDITS, step);
129    fsNamesys.writeLock();
130    try {
131      long startTime = now();
132      FSImage.LOG.info("Start loading edits file " + edits.getName());
133      long numEdits = loadEditRecords(edits, false, expectedStartingTxId,
134          startOpt, recovery);
135      FSImage.LOG.info("Edits file " + edits.getName() 
136          + " of size " + edits.length() + " edits # " + numEdits 
137          + " loaded in " + (now()-startTime)/1000 + " seconds");
138      return numEdits;
139    } finally {
140      edits.close();
141      fsNamesys.writeUnlock();
142      prog.endStep(Phase.LOADING_EDITS, step);
143    }
144  }
145
146  long loadEditRecords(EditLogInputStream in, boolean closeOnExit,
147      long expectedStartingTxId, StartupOption startOpt,
148      MetaRecoveryContext recovery) throws IOException {
149    FSDirectory fsDir = fsNamesys.dir;
150
151    EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts =
152      new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class);
153
154    if (LOG.isTraceEnabled()) {
155      LOG.trace("Acquiring write lock to replay edit log");
156    }
157
158    fsNamesys.writeLock();
159    fsDir.writeLock();
160
161    long recentOpcodeOffsets[] = new long[4];
162    Arrays.fill(recentOpcodeOffsets, -1);
163    
164    long expectedTxId = expectedStartingTxId;
165    long numEdits = 0;
166    long lastTxId = in.getLastTxId();
167    long numTxns = (lastTxId - expectedStartingTxId) + 1;
168    StartupProgress prog = NameNode.getStartupProgress();
169    Step step = createStartupProgressStep(in);
170    prog.setTotal(Phase.LOADING_EDITS, step, numTxns);
171    Counter counter = prog.getCounter(Phase.LOADING_EDITS, step);
172    long lastLogTime = now();
173    long lastInodeId = fsNamesys.getLastInodeId();
174    
175    try {
176      while (true) {
177        try {
178          FSEditLogOp op;
179          try {
180            op = in.readOp();
181            if (op == null) {
182              break;
183            }
184          } catch (Throwable e) {
185            // Handle a problem with our input
186            check203UpgradeFailure(in.getVersion(true), e);
187            String errorMessage =
188              formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId);
189            FSImage.LOG.error(errorMessage, e);
190            if (recovery == null) {
191               // We will only try to skip over problematic opcodes when in
192               // recovery mode.
193              throw new EditLogInputException(errorMessage, e, numEdits);
194            }
195            MetaRecoveryContext.editLogLoaderPrompt(
196                "We failed to read txId " + expectedTxId,
197                recovery, "skipping the bad section in the log");
198            in.resync();
199            continue;
200          }
201          recentOpcodeOffsets[(int)(numEdits % recentOpcodeOffsets.length)] =
202            in.getPosition();
203          if (op.hasTransactionId()) {
204            if (op.getTransactionId() > expectedTxId) { 
205              MetaRecoveryContext.editLogLoaderPrompt("There appears " +
206                  "to be a gap in the edit log.  We expected txid " +
207                  expectedTxId + ", but got txid " +
208                  op.getTransactionId() + ".", recovery, "ignoring missing " +
209                  " transaction IDs");
210            } else if (op.getTransactionId() < expectedTxId) { 
211              MetaRecoveryContext.editLogLoaderPrompt("There appears " +
212                  "to be an out-of-order edit in the edit log.  We " +
213                  "expected txid " + expectedTxId + ", but got txid " +
214                  op.getTransactionId() + ".", recovery,
215                  "skipping the out-of-order edit");
216              continue;
217            }
218          }
219          try {
220            if (LOG.isTraceEnabled()) {
221              LOG.trace("op=" + op + ", startOpt=" + startOpt
222                  + ", numEdits=" + numEdits + ", totalEdits=" + totalEdits);
223            }
224            long inodeId = applyEditLogOp(op, fsDir, startOpt,
225                in.getVersion(true), lastInodeId);
226            if (lastInodeId < inodeId) {
227              lastInodeId = inodeId;
228            }
229          } catch (RollingUpgradeOp.RollbackException e) {
230            throw e;
231          } catch (Throwable e) {
232            LOG.error("Encountered exception on operation " + op, e);
233            if (recovery == null) {
234              throw e instanceof IOException? (IOException)e: new IOException(e);
235            }
236
237            MetaRecoveryContext.editLogLoaderPrompt("Failed to " +
238             "apply edit log operation " + op + ": error " +
239             e.getMessage(), recovery, "applying edits");
240          }
241          // Now that the operation has been successfully decoded and
242          // applied, update our bookkeeping.
243          incrOpCount(op.opCode, opCounts, step, counter);
244          if (op.hasTransactionId()) {
245            lastAppliedTxId = op.getTransactionId();
246            expectedTxId = lastAppliedTxId + 1;
247          } else {
248            expectedTxId = lastAppliedTxId = expectedStartingTxId;
249          }
250          // log progress
251          if (op.hasTransactionId()) {
252            long now = now();
253            if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) {
254              long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1;
255              int percent = Math.round((float) deltaTxId / numTxns * 100);
256              LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns
257                  + " transactions completed. (" + percent + "%)");
258              lastLogTime = now;
259            }
260          }
261          numEdits++;
262          totalEdits++;
263        } catch (RollingUpgradeOp.RollbackException e) {
264          LOG.info("Stopped at OP_START_ROLLING_UPGRADE for rollback.");
265          break;
266        } catch (MetaRecoveryContext.RequestStopException e) {
267          MetaRecoveryContext.LOG.warn("Stopped reading edit log at " +
268              in.getPosition() + "/"  + in.length());
269          break;
270        }
271      }
272    } finally {
273      fsNamesys.resetLastInodeId(lastInodeId);
274      if(closeOnExit) {
275        in.close();
276      }
277      fsDir.writeUnlock();
278      fsNamesys.writeUnlock();
279
280      if (LOG.isTraceEnabled()) {
281        LOG.trace("replaying edit log finished");
282      }
283
284      if (FSImage.LOG.isDebugEnabled()) {
285        dumpOpCounts(opCounts);
286      }
287    }
288    return numEdits;
289  }
290  
291  // allocate and update last allocated inode id
292  private long getAndUpdateLastInodeId(long inodeIdFromOp, int logVersion,
293      long lastInodeId) throws IOException {
294    long inodeId = inodeIdFromOp;
295
296    if (inodeId == INodeId.GRANDFATHER_INODE_ID) {
297      if (NameNodeLayoutVersion.supports(
298          LayoutVersion.Feature.ADD_INODE_ID, logVersion)) {
299        throw new IOException("The layout version " + logVersion
300            + " supports inodeId but gave bogus inodeId");
301      }
302      inodeId = fsNamesys.allocateNewInodeId();
303    } else {
304      // need to reset lastInodeId. fsnamesys gets lastInodeId firstly from
305      // fsimage but editlog captures more recent inodeId allocations
306      if (inodeId > lastInodeId) {
307        fsNamesys.resetLastInodeId(inodeId);
308      }
309    }
310    return inodeId;
311  }
312
313  @SuppressWarnings("deprecation")
314  private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
315      StartupOption startOpt, int logVersion, long lastInodeId) throws IOException {
316    long inodeId = INodeId.GRANDFATHER_INODE_ID;
317    if (LOG.isTraceEnabled()) {
318      LOG.trace("replaying edit log: " + op);
319    }
320    final boolean toAddRetryCache = fsNamesys.hasRetryCache() && op.hasRpcIds();
321    
322    switch (op.opCode) {
323    case OP_ADD: {
324      AddCloseOp addCloseOp = (AddCloseOp)op;
325      final String path =
326          renameReservedPathsOnUpgrade(addCloseOp.path, logVersion);
327      if (FSNamesystem.LOG.isDebugEnabled()) {
328        FSNamesystem.LOG.debug(op.opCode + ": " + path +
329            " numblocks : " + addCloseOp.blocks.length +
330            " clientHolder " + addCloseOp.clientName +
331            " clientMachine " + addCloseOp.clientMachine);
332      }
333      // There three cases here:
334      // 1. OP_ADD to create a new file
335      // 2. OP_ADD to update file blocks
336      // 3. OP_ADD to open file for append
337
338      // See if the file already exists (persistBlocks call)
339      final INodesInPath iip = fsDir.getLastINodeInPath(path);
340      final INodeFile oldFile = INodeFile.valueOf(
341          iip.getINode(0), path, true);
342      INodeFile newFile = oldFile;
343      if (oldFile == null) { // this is OP_ADD on a new file (case 1)
344        // versions > 0 support per file replication
345        // get name and replication
346        final short replication = fsNamesys.getBlockManager()
347            .adjustReplication(addCloseOp.replication);
348        assert addCloseOp.blocks.length == 0;
349
350        // add to the file tree
351        inodeId = getAndUpdateLastInodeId(addCloseOp.inodeId, logVersion,
352            lastInodeId);
353        newFile = fsDir.unprotectedAddFile(inodeId,
354            path, addCloseOp.permissions, addCloseOp.aclEntries,
355            replication, addCloseOp.mtime, addCloseOp.atime,
356            addCloseOp.blockSize, true, addCloseOp.clientName,
357            addCloseOp.clientMachine);
358        fsNamesys.leaseManager.addLease(addCloseOp.clientName, path);
359
360        // add the op into retry cache if necessary
361        if (toAddRetryCache) {
362          HdfsFileStatus stat = fsNamesys.dir.createFileStatus(
363              HdfsFileStatus.EMPTY_NAME, newFile, Snapshot.CURRENT_STATE_ID);
364          fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId,
365              addCloseOp.rpcCallId, stat);
366        }
367      } else { // This is OP_ADD on an existing file
368        if (!oldFile.isUnderConstruction()) {
369          // This is case 3: a call to append() on an already-closed file.
370          if (FSNamesystem.LOG.isDebugEnabled()) {
371            FSNamesystem.LOG.debug("Reopening an already-closed file " +
372                "for append");
373          }
374          LocatedBlock lb = fsNamesys.prepareFileForWrite(path,
375              oldFile, addCloseOp.clientName, addCloseOp.clientMachine, null,
376              false, iip.getLatestSnapshotId(), false);
377          newFile = INodeFile.valueOf(fsDir.getINode(path),
378              path, true);
379          
380          // add the op into retry cache is necessary
381          if (toAddRetryCache) {
382            fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId,
383                addCloseOp.rpcCallId, lb);
384          }
385        }
386      }
387      // Fall-through for case 2.
388      // Regardless of whether it's a new file or an updated file,
389      // update the block list.
390      
391      // Update the salient file attributes.
392      newFile.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
393      newFile.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
394      updateBlocks(fsDir, addCloseOp, newFile);
395      break;
396    }
397    case OP_CLOSE: {
398      AddCloseOp addCloseOp = (AddCloseOp)op;
399      final String path =
400          renameReservedPathsOnUpgrade(addCloseOp.path, logVersion);
401      if (FSNamesystem.LOG.isDebugEnabled()) {
402        FSNamesystem.LOG.debug(op.opCode + ": " + path +
403            " numblocks : " + addCloseOp.blocks.length +
404            " clientHolder " + addCloseOp.clientName +
405            " clientMachine " + addCloseOp.clientMachine);
406      }
407
408      final INodesInPath iip = fsDir.getLastINodeInPath(path);
409      final INodeFile file = INodeFile.valueOf(iip.getINode(0), path);
410
411      // Update the salient file attributes.
412      file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
413      file.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
414      updateBlocks(fsDir, addCloseOp, file);
415
416      // Now close the file
417      if (!file.isUnderConstruction() &&
418          logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) {
419        // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE
420        // could show up twice in a row. But after that version, this
421        // should be fixed, so we should treat it as an error.
422        throw new IOException(
423            "File is not under construction: " + path);
424      }
425      // One might expect that you could use removeLease(holder, path) here,
426      // but OP_CLOSE doesn't serialize the holder. So, remove by path.
427      if (file.isUnderConstruction()) {
428        fsNamesys.leaseManager.removeLeaseWithPrefixPath(path);
429        file.toCompleteFile(file.getModificationTime());
430      }
431      break;
432    }
433    case OP_UPDATE_BLOCKS: {
434      UpdateBlocksOp updateOp = (UpdateBlocksOp)op;
435      final String path =
436          renameReservedPathsOnUpgrade(updateOp.path, logVersion);
437      if (FSNamesystem.LOG.isDebugEnabled()) {
438        FSNamesystem.LOG.debug(op.opCode + ": " + path +
439            " numblocks : " + updateOp.blocks.length);
440      }
441      INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(path),
442          path);
443      // Update in-memory data structures
444      updateBlocks(fsDir, updateOp, oldFile);
445      
446      if (toAddRetryCache) {
447        fsNamesys.addCacheEntry(updateOp.rpcClientId, updateOp.rpcCallId);
448      }
449      break;
450    }
451    case OP_ADD_BLOCK: {
452      AddBlockOp addBlockOp = (AddBlockOp) op;
453      String path = renameReservedPathsOnUpgrade(addBlockOp.getPath(), logVersion);
454      if (FSNamesystem.LOG.isDebugEnabled()) {
455        FSNamesystem.LOG.debug(op.opCode + ": " + path +
456            " new block id : " + addBlockOp.getLastBlock().getBlockId());
457      }
458      INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(path), path);
459      // add the new block to the INodeFile
460      addNewBlock(fsDir, addBlockOp, oldFile);
461      break;
462    }
463    case OP_SET_REPLICATION: {
464      SetReplicationOp setReplicationOp = (SetReplicationOp)op;
465      short replication = fsNamesys.getBlockManager().adjustReplication(
466          setReplicationOp.replication);
467      fsDir.unprotectedSetReplication(
468          renameReservedPathsOnUpgrade(setReplicationOp.path, logVersion),
469                                      replication, null);
470      break;
471    }
472    case OP_CONCAT_DELETE: {
473      ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op;
474      String trg = renameReservedPathsOnUpgrade(concatDeleteOp.trg, logVersion);
475      String[] srcs = new String[concatDeleteOp.srcs.length];
476      for (int i=0; i<srcs.length; i++) {
477        srcs[i] =
478            renameReservedPathsOnUpgrade(concatDeleteOp.srcs[i], logVersion);
479      }
480      fsDir.unprotectedConcat(trg, srcs, concatDeleteOp.timestamp);
481      
482      if (toAddRetryCache) {
483        fsNamesys.addCacheEntry(concatDeleteOp.rpcClientId,
484            concatDeleteOp.rpcCallId);
485      }
486      break;
487    }
488    case OP_RENAME_OLD: {
489      RenameOldOp renameOp = (RenameOldOp)op;
490      final String src = renameReservedPathsOnUpgrade(renameOp.src, logVersion);
491      final String dst = renameReservedPathsOnUpgrade(renameOp.dst, logVersion);
492      fsDir.unprotectedRenameTo(src, dst,
493                                renameOp.timestamp);
494      
495      if (toAddRetryCache) {
496        fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId);
497      }
498      break;
499    }
500    case OP_DELETE: {
501      DeleteOp deleteOp = (DeleteOp)op;
502      fsDir.unprotectedDelete(
503          renameReservedPathsOnUpgrade(deleteOp.path, logVersion),
504          deleteOp.timestamp);
505      
506      if (toAddRetryCache) {
507        fsNamesys.addCacheEntry(deleteOp.rpcClientId, deleteOp.rpcCallId);
508      }
509      break;
510    }
511    case OP_MKDIR: {
512      MkdirOp mkdirOp = (MkdirOp)op;
513      inodeId = getAndUpdateLastInodeId(mkdirOp.inodeId, logVersion,
514          lastInodeId);
515      fsDir.unprotectedMkdir(inodeId,
516          renameReservedPathsOnUpgrade(mkdirOp.path, logVersion),
517          mkdirOp.permissions, mkdirOp.aclEntries, mkdirOp.timestamp);
518      break;
519    }
520    case OP_SET_GENSTAMP_V1: {
521      SetGenstampV1Op setGenstampV1Op = (SetGenstampV1Op)op;
522      fsNamesys.setGenerationStampV1(setGenstampV1Op.genStampV1);
523      break;
524    }
525    case OP_SET_PERMISSIONS: {
526      SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op;
527      fsDir.unprotectedSetPermission(
528          renameReservedPathsOnUpgrade(setPermissionsOp.src, logVersion),
529          setPermissionsOp.permissions);
530      break;
531    }
532    case OP_SET_OWNER: {
533      SetOwnerOp setOwnerOp = (SetOwnerOp)op;
534      fsDir.unprotectedSetOwner(
535          renameReservedPathsOnUpgrade(setOwnerOp.src, logVersion),
536          setOwnerOp.username, setOwnerOp.groupname);
537      break;
538    }
539    case OP_SET_NS_QUOTA: {
540      SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op;
541      fsDir.unprotectedSetQuota(
542          renameReservedPathsOnUpgrade(setNSQuotaOp.src, logVersion),
543          setNSQuotaOp.nsQuota, HdfsConstants.QUOTA_DONT_SET);
544      break;
545    }
546    case OP_CLEAR_NS_QUOTA: {
547      ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op;
548      fsDir.unprotectedSetQuota(
549          renameReservedPathsOnUpgrade(clearNSQuotaOp.src, logVersion),
550          HdfsConstants.QUOTA_RESET, HdfsConstants.QUOTA_DONT_SET);
551      break;
552    }
553
554    case OP_SET_QUOTA:
555      SetQuotaOp setQuotaOp = (SetQuotaOp)op;
556      fsDir.unprotectedSetQuota(
557          renameReservedPathsOnUpgrade(setQuotaOp.src, logVersion),
558          setQuotaOp.nsQuota, setQuotaOp.dsQuota);
559      break;
560
561    case OP_TIMES: {
562      TimesOp timesOp = (TimesOp)op;
563
564      fsDir.unprotectedSetTimes(
565          renameReservedPathsOnUpgrade(timesOp.path, logVersion),
566          timesOp.mtime, timesOp.atime, true);
567      break;
568    }
569    case OP_SYMLINK: {
570      if (!FileSystem.areSymlinksEnabled()) {
571        throw new IOException("Symlinks not supported - please remove symlink before upgrading to this version of HDFS");
572      }
573      SymlinkOp symlinkOp = (SymlinkOp)op;
574      inodeId = getAndUpdateLastInodeId(symlinkOp.inodeId, logVersion,
575          lastInodeId);
576      fsDir.unprotectedAddSymlink(inodeId,
577          renameReservedPathsOnUpgrade(symlinkOp.path, logVersion),
578          symlinkOp.value, symlinkOp.mtime, symlinkOp.atime,
579          symlinkOp.permissionStatus);
580      
581      if (toAddRetryCache) {
582        fsNamesys.addCacheEntry(symlinkOp.rpcClientId, symlinkOp.rpcCallId);
583      }
584      break;
585    }
586    case OP_RENAME: {
587      RenameOp renameOp = (RenameOp)op;
588      fsDir.unprotectedRenameTo(
589          renameReservedPathsOnUpgrade(renameOp.src, logVersion),
590          renameReservedPathsOnUpgrade(renameOp.dst, logVersion),
591          renameOp.timestamp, renameOp.options);
592      
593      if (toAddRetryCache) {
594        fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId);
595      }
596      break;
597    }
598    case OP_GET_DELEGATION_TOKEN: {
599      GetDelegationTokenOp getDelegationTokenOp
600        = (GetDelegationTokenOp)op;
601
602      fsNamesys.getDelegationTokenSecretManager()
603        .addPersistedDelegationToken(getDelegationTokenOp.token,
604                                     getDelegationTokenOp.expiryTime);
605      break;
606    }
607    case OP_RENEW_DELEGATION_TOKEN: {
608      RenewDelegationTokenOp renewDelegationTokenOp
609        = (RenewDelegationTokenOp)op;
610      fsNamesys.getDelegationTokenSecretManager()
611        .updatePersistedTokenRenewal(renewDelegationTokenOp.token,
612                                     renewDelegationTokenOp.expiryTime);
613      break;
614    }
615    case OP_CANCEL_DELEGATION_TOKEN: {
616      CancelDelegationTokenOp cancelDelegationTokenOp
617        = (CancelDelegationTokenOp)op;
618      fsNamesys.getDelegationTokenSecretManager()
619          .updatePersistedTokenCancellation(
620              cancelDelegationTokenOp.token);
621      break;
622    }
623    case OP_UPDATE_MASTER_KEY: {
624      UpdateMasterKeyOp updateMasterKeyOp = (UpdateMasterKeyOp)op;
625      fsNamesys.getDelegationTokenSecretManager()
626        .updatePersistedMasterKey(updateMasterKeyOp.key);
627      break;
628    }
629    case OP_REASSIGN_LEASE: {
630      ReassignLeaseOp reassignLeaseOp = (ReassignLeaseOp)op;
631
632      Lease lease = fsNamesys.leaseManager.getLease(
633          reassignLeaseOp.leaseHolder);
634      final String path =
635          renameReservedPathsOnUpgrade(reassignLeaseOp.path, logVersion);
636      INodeFile pendingFile = fsDir.getINode(path).asFile();
637      Preconditions.checkState(pendingFile.isUnderConstruction());
638      fsNamesys.reassignLeaseInternal(lease,
639          path, reassignLeaseOp.newHolder, pendingFile);
640      break;
641    }
642    case OP_START_LOG_SEGMENT:
643    case OP_END_LOG_SEGMENT: {
644      // no data in here currently.
645      break;
646    }
647    case OP_CREATE_SNAPSHOT: {
648      CreateSnapshotOp createSnapshotOp = (CreateSnapshotOp) op;
649      final String snapshotRoot =
650          renameReservedPathsOnUpgrade(createSnapshotOp.snapshotRoot,
651              logVersion);
652      String path = fsNamesys.getSnapshotManager().createSnapshot(
653          snapshotRoot, createSnapshotOp.snapshotName);
654      if (toAddRetryCache) {
655        fsNamesys.addCacheEntryWithPayload(createSnapshotOp.rpcClientId,
656            createSnapshotOp.rpcCallId, path);
657      }
658      break;
659    }
660    case OP_DELETE_SNAPSHOT: {
661      DeleteSnapshotOp deleteSnapshotOp = (DeleteSnapshotOp) op;
662      BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
663      List<INode> removedINodes = new ChunkedArrayList<INode>();
664      final String snapshotRoot =
665          renameReservedPathsOnUpgrade(deleteSnapshotOp.snapshotRoot,
666              logVersion);
667      fsNamesys.getSnapshotManager().deleteSnapshot(
668          snapshotRoot, deleteSnapshotOp.snapshotName,
669          collectedBlocks, removedINodes);
670      fsNamesys.removeBlocksAndUpdateSafemodeTotal(collectedBlocks);
671      collectedBlocks.clear();
672      fsNamesys.dir.removeFromInodeMap(removedINodes);
673      removedINodes.clear();
674      
675      if (toAddRetryCache) {
676        fsNamesys.addCacheEntry(deleteSnapshotOp.rpcClientId,
677            deleteSnapshotOp.rpcCallId);
678      }
679      break;
680    }
681    case OP_RENAME_SNAPSHOT: {
682      RenameSnapshotOp renameSnapshotOp = (RenameSnapshotOp) op;
683      final String snapshotRoot =
684          renameReservedPathsOnUpgrade(renameSnapshotOp.snapshotRoot,
685              logVersion);
686      fsNamesys.getSnapshotManager().renameSnapshot(
687          snapshotRoot, renameSnapshotOp.snapshotOldName,
688          renameSnapshotOp.snapshotNewName);
689      
690      if (toAddRetryCache) {
691        fsNamesys.addCacheEntry(renameSnapshotOp.rpcClientId,
692            renameSnapshotOp.rpcCallId);
693      }
694      break;
695    }
696    case OP_ALLOW_SNAPSHOT: {
697      AllowSnapshotOp allowSnapshotOp = (AllowSnapshotOp) op;
698      final String snapshotRoot =
699          renameReservedPathsOnUpgrade(allowSnapshotOp.snapshotRoot, logVersion);
700      fsNamesys.getSnapshotManager().setSnapshottable(
701          snapshotRoot, false);
702      break;
703    }
704    case OP_DISALLOW_SNAPSHOT: {
705      DisallowSnapshotOp disallowSnapshotOp = (DisallowSnapshotOp) op;
706      final String snapshotRoot =
707          renameReservedPathsOnUpgrade(disallowSnapshotOp.snapshotRoot,
708              logVersion);
709      fsNamesys.getSnapshotManager().resetSnapshottable(
710          snapshotRoot);
711      break;
712    }
713    case OP_SET_GENSTAMP_V2: {
714      SetGenstampV2Op setGenstampV2Op = (SetGenstampV2Op) op;
715      fsNamesys.setGenerationStampV2(setGenstampV2Op.genStampV2);
716      break;
717    }
718    case OP_ALLOCATE_BLOCK_ID: {
719      AllocateBlockIdOp allocateBlockIdOp = (AllocateBlockIdOp) op;
720      fsNamesys.setLastAllocatedBlockId(allocateBlockIdOp.blockId);
721      break;
722    }
723    case OP_ROLLING_UPGRADE_START: {
724      if (startOpt == StartupOption.ROLLINGUPGRADE) {
725        final RollingUpgradeStartupOption rollingUpgradeOpt
726            = startOpt.getRollingUpgradeStartupOption(); 
727        if (rollingUpgradeOpt == RollingUpgradeStartupOption.ROLLBACK) {
728          throw new RollingUpgradeOp.RollbackException();
729        } else if (rollingUpgradeOpt == RollingUpgradeStartupOption.DOWNGRADE) {
730          //ignore upgrade marker
731          break;
732        }
733      }
734      // start rolling upgrade
735      final long startTime = ((RollingUpgradeOp) op).getTime();
736      fsNamesys.startRollingUpgradeInternal(startTime);
737      fsNamesys.triggerRollbackCheckpoint();
738      break;
739    }
740    case OP_ROLLING_UPGRADE_FINALIZE: {
741      final long finalizeTime = ((RollingUpgradeOp) op).getTime();
742      fsNamesys.finalizeRollingUpgradeInternal(finalizeTime);
743      fsNamesys.getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK,
744          NameNodeFile.IMAGE);
745      break;
746    }
747    case OP_ADD_CACHE_DIRECTIVE: {
748      AddCacheDirectiveInfoOp addOp = (AddCacheDirectiveInfoOp) op;
749      CacheDirectiveInfo result = fsNamesys.
750          getCacheManager().addDirectiveFromEditLog(addOp.directive);
751      if (toAddRetryCache) {
752        Long id = result.getId();
753        fsNamesys.addCacheEntryWithPayload(op.rpcClientId, op.rpcCallId, id);
754      }
755      break;
756    }
757    case OP_MODIFY_CACHE_DIRECTIVE: {
758      ModifyCacheDirectiveInfoOp modifyOp =
759          (ModifyCacheDirectiveInfoOp) op;
760      fsNamesys.getCacheManager().modifyDirectiveFromEditLog(
761          modifyOp.directive);
762      if (toAddRetryCache) {
763        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
764      }
765      break;
766    }
767    case OP_REMOVE_CACHE_DIRECTIVE: {
768      RemoveCacheDirectiveInfoOp removeOp =
769          (RemoveCacheDirectiveInfoOp) op;
770      fsNamesys.getCacheManager().removeDirective(removeOp.id, null);
771      if (toAddRetryCache) {
772        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
773      }
774      break;
775    }
776    case OP_ADD_CACHE_POOL: {
777      AddCachePoolOp addOp = (AddCachePoolOp) op;
778      fsNamesys.getCacheManager().addCachePool(addOp.info);
779      if (toAddRetryCache) {
780        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
781      }
782      break;
783    }
784    case OP_MODIFY_CACHE_POOL: {
785      ModifyCachePoolOp modifyOp = (ModifyCachePoolOp) op;
786      fsNamesys.getCacheManager().modifyCachePool(modifyOp.info);
787      if (toAddRetryCache) {
788        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
789      }
790      break;
791    }
792    case OP_REMOVE_CACHE_POOL: {
793      RemoveCachePoolOp removeOp = (RemoveCachePoolOp) op;
794      fsNamesys.getCacheManager().removeCachePool(removeOp.poolName);
795      if (toAddRetryCache) {
796        fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
797      }
798      break;
799    }
800    case OP_SET_ACL: {
801      SetAclOp setAclOp = (SetAclOp) op;
802      fsDir.unprotectedSetAcl(setAclOp.src, setAclOp.aclEntries);
803      break;
804    }
805    default:
806      throw new IOException("Invalid operation read " + op.opCode);
807    }
808    return inodeId;
809  }
810  
811  private static String formatEditLogReplayError(EditLogInputStream in,
812      long recentOpcodeOffsets[], long txid) {
813    StringBuilder sb = new StringBuilder();
814    sb.append("Error replaying edit log at offset " + in.getPosition());
815    sb.append(".  Expected transaction ID was ").append(txid);
816    if (recentOpcodeOffsets[0] != -1) {
817      Arrays.sort(recentOpcodeOffsets);
818      sb.append("\nRecent opcode offsets:");
819      for (long offset : recentOpcodeOffsets) {
820        if (offset != -1) {
821          sb.append(' ').append(offset);
822        }
823      }
824    }
825    return sb.toString();
826  }
827
828  /**
829   * Add a new block into the given INodeFile
830   */
831  private void addNewBlock(FSDirectory fsDir, AddBlockOp op, INodeFile file)
832      throws IOException {
833    BlockInfo[] oldBlocks = file.getBlocks();
834    Block pBlock = op.getPenultimateBlock();
835    Block newBlock= op.getLastBlock();
836    
837    if (pBlock != null) { // the penultimate block is not null
838      Preconditions.checkState(oldBlocks != null && oldBlocks.length > 0);
839      // compare pBlock with the last block of oldBlocks
840      Block oldLastBlock = oldBlocks[oldBlocks.length - 1];
841      if (oldLastBlock.getBlockId() != pBlock.getBlockId()
842          || oldLastBlock.getGenerationStamp() != pBlock.getGenerationStamp()) {
843        throw new IOException(
844            "Mismatched block IDs or generation stamps for the old last block of file "
845                + op.getPath() + ", the old last block is " + oldLastBlock
846                + ", and the block read from editlog is " + pBlock);
847      }
848      
849      oldLastBlock.setNumBytes(pBlock.getNumBytes());
850      if (oldLastBlock instanceof BlockInfoUnderConstruction) {
851        fsNamesys.getBlockManager().forceCompleteBlock(file,
852            (BlockInfoUnderConstruction) oldLastBlock);
853        fsNamesys.getBlockManager().processQueuedMessagesForBlock(pBlock);
854      }
855    } else { // the penultimate block is null
856      Preconditions.checkState(oldBlocks == null || oldBlocks.length == 0);
857    }
858    // add the new block
859    BlockInfo newBI = new BlockInfoUnderConstruction(
860          newBlock, file.getBlockReplication());
861    fsNamesys.getBlockManager().addBlockCollection(newBI, file);
862    file.addBlock(newBI);
863    fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
864  }
865  
866  /**
867   * Update in-memory data structures with new block information.
868   * @throws IOException
869   */
870  private void updateBlocks(FSDirectory fsDir, BlockListUpdatingOp op,
871      INodeFile file) throws IOException {
872    // Update its block list
873    BlockInfo[] oldBlocks = file.getBlocks();
874    Block[] newBlocks = op.getBlocks();
875    String path = op.getPath();
876    
877    // Are we only updating the last block's gen stamp.
878    boolean isGenStampUpdate = oldBlocks.length == newBlocks.length;
879    
880    // First, update blocks in common
881    for (int i = 0; i < oldBlocks.length && i < newBlocks.length; i++) {
882      BlockInfo oldBlock = oldBlocks[i];
883      Block newBlock = newBlocks[i];
884      
885      boolean isLastBlock = i == newBlocks.length - 1;
886      if (oldBlock.getBlockId() != newBlock.getBlockId() ||
887          (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && 
888              !(isGenStampUpdate && isLastBlock))) {
889        throw new IOException("Mismatched block IDs or generation stamps, " +
890            "attempting to replace block " + oldBlock + " with " + newBlock +
891            " as block # " + i + "/" + newBlocks.length + " of " +
892            path);
893      }
894      
895      oldBlock.setNumBytes(newBlock.getNumBytes());
896      boolean changeMade =
897        oldBlock.getGenerationStamp() != newBlock.getGenerationStamp();
898      oldBlock.setGenerationStamp(newBlock.getGenerationStamp());
899      
900      if (oldBlock instanceof BlockInfoUnderConstruction &&
901          (!isLastBlock || op.shouldCompleteLastBlock())) {
902        changeMade = true;
903        fsNamesys.getBlockManager().forceCompleteBlock(file,
904            (BlockInfoUnderConstruction) oldBlock);
905      }
906      if (changeMade) {
907        // The state or gen-stamp of the block has changed. So, we may be
908        // able to process some messages from datanodes that we previously
909        // were unable to process.
910        fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
911      }
912    }
913    
914    if (newBlocks.length < oldBlocks.length) {
915      // We're removing a block from the file, e.g. abandonBlock(...)
916      if (!file.isUnderConstruction()) {
917        throw new IOException("Trying to remove a block from file " +
918            path + " which is not under construction.");
919      }
920      if (newBlocks.length != oldBlocks.length - 1) {
921        throw new IOException("Trying to remove more than one block from file "
922            + path);
923      }
924      Block oldBlock = oldBlocks[oldBlocks.length - 1];
925      boolean removed = fsDir.unprotectedRemoveBlock(path, file, oldBlock);
926      if (!removed && !(op instanceof UpdateBlocksOp)) {
927        throw new IOException("Trying to delete non-existant block " + oldBlock);
928      }
929    } else if (newBlocks.length > oldBlocks.length) {
930      // We're adding blocks
931      for (int i = oldBlocks.length; i < newBlocks.length; i++) {
932        Block newBlock = newBlocks[i];
933        BlockInfo newBI;
934        if (!op.shouldCompleteLastBlock()) {
935          // TODO: shouldn't this only be true for the last block?
936          // what about an old-version fsync() where fsync isn't called
937          // until several blocks in?
938          newBI = new BlockInfoUnderConstruction(
939              newBlock, file.getBlockReplication());
940        } else {
941          // OP_CLOSE should add finalized blocks. This code path
942          // is only executed when loading edits written by prior
943          // versions of Hadoop. Current versions always log
944          // OP_ADD operations as each block is allocated.
945          newBI = new BlockInfo(newBlock, file.getBlockReplication());
946        }
947        fsNamesys.getBlockManager().addBlockCollection(newBI, file);
948        file.addBlock(newBI);
949        fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
950      }
951    }
952  }
953
954  private static void dumpOpCounts(
955      EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) {
956    StringBuilder sb = new StringBuilder();
957    sb.append("Summary of operations loaded from edit log:\n  ");
958    Joiner.on("\n  ").withKeyValueSeparator("=").appendTo(sb, opCounts);
959    FSImage.LOG.debug(sb.toString());
960  }
961
962  private void incrOpCount(FSEditLogOpCodes opCode,
963      EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts, Step step,
964      Counter counter) {
965    Holder<Integer> holder = opCounts.get(opCode);
966    if (holder == null) {
967      holder = new Holder<Integer>(1);
968      opCounts.put(opCode, holder);
969    } else {
970      holder.held++;
971    }
972    counter.increment();
973  }
974
975  /**
976   * Throw appropriate exception during upgrade from 203, when editlog loading
977   * could fail due to opcode conflicts.
978   */
979  private void check203UpgradeFailure(int logVersion, Throwable e)
980      throws IOException {
981    // 0.20.203 version version has conflicting opcodes with the later releases.
982    // The editlog must be emptied by restarting the namenode, before proceeding
983    // with the upgrade.
984    if (Storage.is203LayoutVersion(logVersion)
985        && logVersion != HdfsConstants.NAMENODE_LAYOUT_VERSION) {
986      String msg = "During upgrade failed to load the editlog version "
987          + logVersion + " from release 0.20.203. Please go back to the old "
988          + " release and restart the namenode. This empties the editlog "
989          + " and saves the namespace. Resume the upgrade after this step.";
990      throw new IOException(msg, e);
991    }
992  }
993  
994  /**
995   * Find the last valid transaction ID in the stream.
996   * If there are invalid or corrupt transactions in the middle of the stream,
997   * validateEditLog will skip over them.
998   * This reads through the stream but does not close it.
999   *
1000   * @throws IOException if the stream cannot be read due to an IO error (eg
1001   *                     if the log does not exist)
1002   */
1003  static EditLogValidation validateEditLog(EditLogInputStream in) {
1004    long lastPos = 0;
1005    long lastTxId = HdfsConstants.INVALID_TXID;
1006    long numValid = 0;
1007    FSEditLogOp op = null;
1008    while (true) {
1009      lastPos = in.getPosition();
1010      try {
1011        if ((op = in.readOp()) == null) {
1012          break;
1013        }
1014      } catch (Throwable t) {
1015        FSImage.LOG.warn("Caught exception after reading " + numValid +
1016            " ops from " + in + " while determining its valid length." +
1017            "Position was " + lastPos, t);
1018        in.resync();
1019        FSImage.LOG.warn("After resync, position is " + in.getPosition());
1020        continue;
1021      }
1022      if (lastTxId == HdfsConstants.INVALID_TXID
1023          || op.getTransactionId() > lastTxId) {
1024        lastTxId = op.getTransactionId();
1025      }
1026      numValid++;
1027    }
1028    return new EditLogValidation(lastPos, lastTxId, false);
1029  }
1030
1031  static EditLogValidation scanEditLog(EditLogInputStream in) {
1032    long lastPos = 0;
1033    long lastTxId = HdfsConstants.INVALID_TXID;
1034    long numValid = 0;
1035    FSEditLogOp op = null;
1036    while (true) {
1037      lastPos = in.getPosition();
1038      try {
1039        if ((op = in.readOp()) == null) { // TODO
1040          break;
1041        }
1042      } catch (Throwable t) {
1043        FSImage.LOG.warn("Caught exception after reading " + numValid +
1044            " ops from " + in + " while determining its valid length." +
1045            "Position was " + lastPos, t);
1046        in.resync();
1047        FSImage.LOG.warn("After resync, position is " + in.getPosition());
1048        continue;
1049      }
1050      if (lastTxId == HdfsConstants.INVALID_TXID
1051          || op.getTransactionId() > lastTxId) {
1052        lastTxId = op.getTransactionId();
1053      }
1054      numValid++;
1055    }
1056    return new EditLogValidation(lastPos, lastTxId, false);
1057  }
1058
1059  static class EditLogValidation {
1060    private final long validLength;
1061    private final long endTxId;
1062    private final boolean hasCorruptHeader;
1063
1064    EditLogValidation(long validLength, long endTxId,
1065        boolean hasCorruptHeader) {
1066      this.validLength = validLength;
1067      this.endTxId = endTxId;
1068      this.hasCorruptHeader = hasCorruptHeader;
1069    }
1070
1071    long getValidLength() { return validLength; }
1072
1073    long getEndTxId() { return endTxId; }
1074
1075    boolean hasCorruptHeader() { return hasCorruptHeader; }
1076  }
1077
1078  /**
1079   * Stream wrapper that keeps track of the current stream position.
1080   * 
1081   * This stream also allows us to set a limit on how many bytes we can read
1082   * without getting an exception.
1083   */
1084  public static class PositionTrackingInputStream extends FilterInputStream
1085      implements StreamLimiter {
1086    private long curPos = 0;
1087    private long markPos = -1;
1088    private long limitPos = Long.MAX_VALUE;
1089
1090    public PositionTrackingInputStream(InputStream is) {
1091      super(is);
1092    }
1093
1094    private void checkLimit(long amt) throws IOException {
1095      long extra = (curPos + amt) - limitPos;
1096      if (extra > 0) {
1097        throw new IOException("Tried to read " + amt + " byte(s) past " +
1098            "the limit at offset " + limitPos);
1099      }
1100    }
1101    
1102    @Override
1103    public int read() throws IOException {
1104      checkLimit(1);
1105      int ret = super.read();
1106      if (ret != -1) curPos++;
1107      return ret;
1108    }
1109
1110    @Override
1111    public int read(byte[] data) throws IOException {
1112      checkLimit(data.length);
1113      int ret = super.read(data);
1114      if (ret > 0) curPos += ret;
1115      return ret;
1116    }
1117
1118    @Override
1119    public int read(byte[] data, int offset, int length) throws IOException {
1120      checkLimit(length);
1121      int ret = super.read(data, offset, length);
1122      if (ret > 0) curPos += ret;
1123      return ret;
1124    }
1125
1126    @Override
1127    public void setLimit(long limit) {
1128      limitPos = curPos + limit;
1129    }
1130
1131    @Override
1132    public void clearLimit() {
1133      limitPos = Long.MAX_VALUE;
1134    }
1135
1136    @Override
1137    public void mark(int limit) {
1138      super.mark(limit);
1139      markPos = curPos;
1140    }
1141
1142    @Override
1143    public void reset() throws IOException {
1144      if (markPos == -1) {
1145        throw new IOException("Not marked!");
1146      }
1147      super.reset();
1148      curPos = markPos;
1149      markPos = -1;
1150    }
1151
1152    public long getPos() {
1153      return curPos;
1154    }
1155    
1156    @Override
1157    public long skip(long amt) throws IOException {
1158      long extra = (curPos + amt) - limitPos;
1159      if (extra > 0) {
1160        throw new IOException("Tried to skip " + extra + " bytes past " +
1161            "the limit at offset " + limitPos);
1162      }
1163      long ret = super.skip(amt);
1164      curPos += ret;
1165      return ret;
1166    }
1167  }
1168
1169  public long getLastAppliedTxId() {
1170    return lastAppliedTxId;
1171  }
1172
1173  /**
1174   * Creates a Step used for updating startup progress, populated with
1175   * information from the given edits.  The step always includes the log's name.
1176   * If the log has a known length, then the length is included in the step too.
1177   * 
1178   * @param edits EditLogInputStream to use for populating step
1179   * @return Step populated with information from edits
1180   * @throws IOException thrown if there is an I/O error
1181   */
1182  private static Step createStartupProgressStep(EditLogInputStream edits)
1183      throws IOException {
1184    long length = edits.length();
1185    String name = edits.getCurrentStreamName();
1186    return length != -1 ? new Step(name, length) : new Step(name);
1187  }
1188}