001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import java.io.File;
021import java.io.IOException;
022import java.util.ArrayList;
023import java.util.Collections;
024import java.util.Comparator;
025import java.util.EnumSet;
026import java.util.List;
027import java.util.TreeSet;
028
029import org.apache.commons.logging.Log;
030import org.apache.commons.logging.LogFactory;
031import org.apache.hadoop.conf.Configuration;
032import org.apache.hadoop.hdfs.DFSConfigKeys;
033import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
034import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
035import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
036import org.apache.hadoop.hdfs.util.MD5FileUtils;
037
038import com.google.common.base.Preconditions;
039import com.google.common.collect.ComparisonChain;
040import com.google.common.collect.Lists;
041import com.google.common.collect.Sets;
042
043/**
044 * The NNStorageRetentionManager is responsible for inspecting the storage
045 * directories of the NN and enforcing a retention policy on checkpoints
046 * and edit logs.
047 * 
048 * It delegates the actual removal of files to a StoragePurger
049 * implementation, which might delete the files or instead copy them to
050 * a filer or HDFS for later analysis.
051 */
052public class NNStorageRetentionManager {
053  
054  private final int numCheckpointsToRetain;
055  private final long numExtraEditsToRetain;
056  private final int maxExtraEditsSegmentsToRetain;
057  private static final Log LOG = LogFactory.getLog(
058      NNStorageRetentionManager.class);
059  private final NNStorage storage;
060  private final StoragePurger purger;
061  private final LogsPurgeable purgeableLogs;
062  
063  public NNStorageRetentionManager(
064      Configuration conf,
065      NNStorage storage,
066      LogsPurgeable purgeableLogs,
067      StoragePurger purger) {
068    this.numCheckpointsToRetain = conf.getInt(
069        DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY,
070        DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_DEFAULT);
071    this.numExtraEditsToRetain = conf.getLong(
072        DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY,
073        DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_DEFAULT);
074    this.maxExtraEditsSegmentsToRetain = conf.getInt(
075        DFSConfigKeys.DFS_NAMENODE_MAX_EXTRA_EDITS_SEGMENTS_RETAINED_KEY,
076        DFSConfigKeys.DFS_NAMENODE_MAX_EXTRA_EDITS_SEGMENTS_RETAINED_DEFAULT);
077    Preconditions.checkArgument(numCheckpointsToRetain > 0,
078        "Must retain at least one checkpoint");
079    Preconditions.checkArgument(numExtraEditsToRetain >= 0,
080        DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY +
081        " must not be negative");
082    
083    this.storage = storage;
084    this.purgeableLogs = purgeableLogs;
085    this.purger = purger;
086  }
087  
088  public NNStorageRetentionManager(Configuration conf, NNStorage storage,
089      LogsPurgeable purgeableLogs) {
090    this(conf, storage, purgeableLogs, new DeletionStoragePurger());
091  }
092
093  void purgeCheckpoints(NameNodeFile nnf) throws IOException {
094    purgeCheckpoinsAfter(nnf, -1);
095  }
096
097  void purgeCheckpoinsAfter(NameNodeFile nnf, long fromTxId)
098      throws IOException {
099    FSImageTransactionalStorageInspector inspector =
100        new FSImageTransactionalStorageInspector(EnumSet.of(nnf));
101    storage.inspectStorageDirs(inspector);
102    for (FSImageFile image : inspector.getFoundImages()) {
103      if (image.getCheckpointTxId() > fromTxId) {
104        purger.purgeImage(image);
105      }
106    }
107  }
108
109  void purgeOldStorage(NameNodeFile nnf) throws IOException {
110    FSImageTransactionalStorageInspector inspector =
111        new FSImageTransactionalStorageInspector(EnumSet.of(nnf));
112    storage.inspectStorageDirs(inspector);
113
114    long minImageTxId = getImageTxIdToRetain(inspector);
115    purgeCheckpointsOlderThan(inspector, minImageTxId);
116    
117    if (nnf == NameNodeFile.IMAGE_ROLLBACK) {
118      // do not purge edits for IMAGE_ROLLBACK.
119      return;
120    }
121
122    // If fsimage_N is the image we want to keep, then we need to keep
123    // all txns > N. We can remove anything < N+1, since fsimage_N
124    // reflects the state up to and including N. However, we also
125    // provide a "cushion" of older txns that we keep, which is
126    // handy for HA, where a remote node may not have as many
127    // new images.
128    //
129    // First, determine the target number of extra transactions to retain based
130    // on the configured amount.
131    long minimumRequiredTxId = minImageTxId + 1;
132    long purgeLogsFrom = Math.max(0, minimumRequiredTxId - numExtraEditsToRetain);
133    
134    ArrayList<EditLogInputStream> editLogs = new ArrayList<EditLogInputStream>();
135    purgeableLogs.selectInputStreams(editLogs, purgeLogsFrom, false);
136    Collections.sort(editLogs, new Comparator<EditLogInputStream>() {
137      @Override
138      public int compare(EditLogInputStream a, EditLogInputStream b) {
139        return ComparisonChain.start()
140            .compare(a.getFirstTxId(), b.getFirstTxId())
141            .compare(a.getLastTxId(), b.getLastTxId())
142            .result();
143      }
144    });
145
146    // Remove from consideration any edit logs that are in fact required.
147    while (editLogs.size() > 0 &&
148        editLogs.get(editLogs.size() - 1).getFirstTxId() >= minimumRequiredTxId) {
149      editLogs.remove(editLogs.size() - 1);
150    }
151    
152    // Next, adjust the number of transactions to retain if doing so would mean
153    // keeping too many segments around.
154    while (editLogs.size() > maxExtraEditsSegmentsToRetain) {
155      purgeLogsFrom = editLogs.get(0).getLastTxId() + 1;
156      editLogs.remove(0);
157    }
158    
159    // Finally, ensure that we're not trying to purge any transactions that we
160    // actually need.
161    if (purgeLogsFrom > minimumRequiredTxId) {
162      throw new AssertionError("Should not purge more edits than required to "
163          + "restore: " + purgeLogsFrom + " should be <= "
164          + minimumRequiredTxId);
165    }
166    
167    purgeableLogs.purgeLogsOlderThan(purgeLogsFrom);
168  }
169  
170  private void purgeCheckpointsOlderThan(
171      FSImageTransactionalStorageInspector inspector,
172      long minTxId) {
173    for (FSImageFile image : inspector.getFoundImages()) {
174      if (image.getCheckpointTxId() < minTxId) {
175        purger.purgeImage(image);
176      }
177    }
178  }
179
180  /**
181   * @param inspector inspector that has already inspected all storage dirs
182   * @return the transaction ID corresponding to the oldest checkpoint
183   * that should be retained. 
184   */
185  private long getImageTxIdToRetain(FSImageTransactionalStorageInspector inspector) {
186      
187    List<FSImageFile> images = inspector.getFoundImages();
188    TreeSet<Long> imageTxIds = Sets.newTreeSet();
189    for (FSImageFile image : images) {
190      imageTxIds.add(image.getCheckpointTxId());
191    }
192    
193    List<Long> imageTxIdsList = Lists.newArrayList(imageTxIds);
194    if (imageTxIdsList.isEmpty()) {
195      return 0;
196    }
197    
198    Collections.reverse(imageTxIdsList);
199    int toRetain = Math.min(numCheckpointsToRetain, imageTxIdsList.size());    
200    long minTxId = imageTxIdsList.get(toRetain - 1);
201    LOG.info("Going to retain " + toRetain + " images with txid >= " +
202        minTxId);
203    return minTxId;
204  }
205  
206  /**
207   * Interface responsible for disposing of old checkpoints and edit logs.
208   */
209  static interface StoragePurger {
210    void purgeLog(EditLogFile log);
211    void purgeImage(FSImageFile image);
212  }
213  
214  static class DeletionStoragePurger implements StoragePurger {
215    @Override
216    public void purgeLog(EditLogFile log) {
217      LOG.info("Purging old edit log " + log);
218      deleteOrWarn(log.getFile());
219    }
220
221    @Override
222    public void purgeImage(FSImageFile image) {
223      LOG.info("Purging old image " + image);
224      deleteOrWarn(image.getFile());
225      deleteOrWarn(MD5FileUtils.getDigestFileForFile(image.getFile()));
226    }
227
228    private static void deleteOrWarn(File file) {
229      if (!file.delete()) {
230        // It's OK if we fail to delete something -- we'll catch it
231        // next time we swing through this directory.
232        LOG.warn("Could not delete " + file);
233      }      
234    }
235  }
236}