001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT;
021import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT;
022import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES;
023import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES_DEFAULT;
024import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES;
025import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES_DEFAULT;
026import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS;
027import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT;
028
029import java.io.DataInput;
030import java.io.IOException;
031import java.util.ArrayList;
032import java.util.Collection;
033import java.util.Collections;
034import java.util.Date;
035import java.util.EnumSet;
036import java.util.Iterator;
037import java.util.LinkedList;
038import java.util.List;
039import java.util.Map.Entry;
040import java.util.SortedMap;
041import java.util.TreeMap;
042import java.util.concurrent.locks.ReentrantLock;
043
044import org.apache.commons.io.IOUtils;
045import org.apache.commons.logging.Log;
046import org.apache.commons.logging.LogFactory;
047import org.apache.hadoop.classification.InterfaceAudience;
048import org.apache.hadoop.conf.Configuration;
049import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries;
050import org.apache.hadoop.fs.CacheFlag;
051import org.apache.hadoop.fs.InvalidRequestException;
052import org.apache.hadoop.fs.Path;
053import org.apache.hadoop.fs.UnresolvedLinkException;
054import org.apache.hadoop.fs.permission.FsAction;
055import org.apache.hadoop.fs.permission.FsPermission;
056import org.apache.hadoop.hdfs.DFSUtil;
057import org.apache.hadoop.hdfs.protocol.CacheDirective;
058import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
059import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
060import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
061import org.apache.hadoop.hdfs.protocol.CacheDirectiveStats;
062import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
063import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
064import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
065import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
066import org.apache.hadoop.hdfs.protocol.DatanodeID;
067import org.apache.hadoop.hdfs.protocol.LocatedBlock;
068import org.apache.hadoop.hdfs.protocolPB.PBHelper;
069import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
070import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
071import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
072import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList;
073import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList.Type;
074import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
075import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
076import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
077import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
078import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
079import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
080import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
081import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
082import org.apache.hadoop.hdfs.util.ReadOnlyList;
083import org.apache.hadoop.security.AccessControlException;
084import org.apache.hadoop.util.GSet;
085import org.apache.hadoop.util.LightWeightGSet;
086import org.apache.hadoop.util.Time;
087
088import com.google.common.annotations.VisibleForTesting;
089import com.google.common.collect.Lists;
090
091/**
092 * The Cache Manager handles caching on DataNodes.
093 *
094 * This class is instantiated by the FSNamesystem.
095 * It maintains the mapping of cached blocks to datanodes via processing
096 * datanode cache reports. Based on these reports and addition and removal of
097 * caching directives, we will schedule caching and uncaching work.
098 */
099@InterfaceAudience.LimitedPrivate({"HDFS"})
100public final class CacheManager {
101  public static final Log LOG = LogFactory.getLog(CacheManager.class);
102
103  private static final float MIN_CACHED_BLOCKS_PERCENT = 0.001f;
104
105  // TODO: add pending / underCached / schedule cached blocks stats.
106
107  /**
108   * The FSNamesystem that contains this CacheManager.
109   */
110  private final FSNamesystem namesystem;
111
112  /**
113   * The BlockManager associated with the FSN that owns this CacheManager.
114   */
115  private final BlockManager blockManager;
116
117  /**
118   * Cache directives, sorted by ID.
119   *
120   * listCacheDirectives relies on the ordering of elements in this map
121   * to track what has already been listed by the client.
122   */
123  private final TreeMap<Long, CacheDirective> directivesById =
124      new TreeMap<Long, CacheDirective>();
125
126  /**
127   * The directive ID to use for a new directive.  IDs always increase, and are
128   * never reused.
129   */
130  private long nextDirectiveId;
131
132  /**
133   * Cache directives, sorted by path
134   */
135  private final TreeMap<String, List<CacheDirective>> directivesByPath =
136      new TreeMap<String, List<CacheDirective>>();
137
138  /**
139   * Cache pools, sorted by name.
140   */
141  private final TreeMap<String, CachePool> cachePools =
142      new TreeMap<String, CachePool>();
143
144  /**
145   * Maximum number of cache pools to list in one operation.
146   */
147  private final int maxListCachePoolsResponses;
148
149  /**
150   * Maximum number of cache pool directives to list in one operation.
151   */
152  private final int maxListCacheDirectivesNumResponses;
153
154  /**
155   * Interval between scans in milliseconds.
156   */
157  private final long scanIntervalMs;
158
159  /**
160   * All cached blocks.
161   */
162  private final GSet<CachedBlock, CachedBlock> cachedBlocks;
163
164  /**
165   * Lock which protects the CacheReplicationMonitor.
166   */
167  private final ReentrantLock crmLock = new ReentrantLock();
168
169  private final SerializerCompat serializerCompat = new SerializerCompat();
170
171  /**
172   * The CacheReplicationMonitor.
173   */
174  private CacheReplicationMonitor monitor;
175
176  public static final class PersistState {
177    public final CacheManagerSection section;
178    public final List<CachePoolInfoProto> pools;
179    public final List<CacheDirectiveInfoProto> directives;
180
181    public PersistState(CacheManagerSection section,
182        List<CachePoolInfoProto> pools, List<CacheDirectiveInfoProto> directives) {
183      this.section = section;
184      this.pools = pools;
185      this.directives = directives;
186    }
187  }
188
189  CacheManager(FSNamesystem namesystem, Configuration conf,
190      BlockManager blockManager) {
191    this.namesystem = namesystem;
192    this.blockManager = blockManager;
193    this.nextDirectiveId = 1;
194    this.maxListCachePoolsResponses = conf.getInt(
195        DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES,
196        DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES_DEFAULT);
197    this.maxListCacheDirectivesNumResponses = conf.getInt(
198        DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES,
199        DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES_DEFAULT);
200    scanIntervalMs = conf.getLong(
201        DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS,
202        DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT);
203    float cachedBlocksPercent = conf.getFloat(
204          DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT,
205          DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT);
206    if (cachedBlocksPercent < MIN_CACHED_BLOCKS_PERCENT) {
207      LOG.info("Using minimum value " + MIN_CACHED_BLOCKS_PERCENT +
208        " for " + DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT);
209      cachedBlocksPercent = MIN_CACHED_BLOCKS_PERCENT;
210    }
211    this.cachedBlocks = new LightWeightGSet<CachedBlock, CachedBlock>(
212          LightWeightGSet.computeCapacity(cachedBlocksPercent,
213              "cachedBlocks"));
214
215  }
216
217  /**
218   * Resets all tracked directives and pools. Called during 2NN checkpointing to
219   * reset FSNamesystem state. See {FSNamesystem{@link #clear()}.
220   */
221  void clear() {
222    directivesById.clear();
223    directivesByPath.clear();
224    cachePools.clear();
225    nextDirectiveId = 1;
226  }
227
228  public void startMonitorThread() {
229    crmLock.lock();
230    try {
231      if (this.monitor == null) {
232        this.monitor = new CacheReplicationMonitor(namesystem, this,
233            scanIntervalMs, crmLock);
234        this.monitor.start();
235      }
236    } finally {
237      crmLock.unlock();
238    }
239  }
240
241  public void stopMonitorThread() {
242    crmLock.lock();
243    try {
244      if (this.monitor != null) {
245        CacheReplicationMonitor prevMonitor = this.monitor;
246        this.monitor = null;
247        IOUtils.closeQuietly(prevMonitor);
248      }
249    } finally {
250      crmLock.unlock();
251    }
252  }
253
254  public void clearDirectiveStats() {
255    assert namesystem.hasWriteLock();
256    for (CacheDirective directive : directivesById.values()) {
257      directive.resetStatistics();
258    }
259  }
260
261  /**
262   * @return Unmodifiable view of the collection of CachePools.
263   */
264  public Collection<CachePool> getCachePools() {
265    assert namesystem.hasReadLock();
266    return Collections.unmodifiableCollection(cachePools.values());
267  }
268
269  /**
270   * @return Unmodifiable view of the collection of CacheDirectives.
271   */
272  public Collection<CacheDirective> getCacheDirectives() {
273    assert namesystem.hasReadLock();
274    return Collections.unmodifiableCollection(directivesById.values());
275  }
276  
277  @VisibleForTesting
278  public GSet<CachedBlock, CachedBlock> getCachedBlocks() {
279    assert namesystem.hasReadLock();
280    return cachedBlocks;
281  }
282
283  private long getNextDirectiveId() throws IOException {
284    assert namesystem.hasWriteLock();
285    if (nextDirectiveId >= Long.MAX_VALUE - 1) {
286      throw new IOException("No more available IDs.");
287    }
288    return nextDirectiveId++;
289  }
290
291  // Helper getter / validation methods
292
293  private static void checkWritePermission(FSPermissionChecker pc,
294      CachePool pool) throws AccessControlException {
295    if ((pc != null)) {
296      pc.checkPermission(pool, FsAction.WRITE);
297    }
298  }
299
300  private static String validatePoolName(CacheDirectiveInfo directive)
301      throws InvalidRequestException {
302    String pool = directive.getPool();
303    if (pool == null) {
304      throw new InvalidRequestException("No pool specified.");
305    }
306    if (pool.isEmpty()) {
307      throw new InvalidRequestException("Invalid empty pool name.");
308    }
309    return pool;
310  }
311
312  private static String validatePath(CacheDirectiveInfo directive)
313      throws InvalidRequestException {
314    if (directive.getPath() == null) {
315      throw new InvalidRequestException("No path specified.");
316    }
317    String path = directive.getPath().toUri().getPath();
318    if (!DFSUtil.isValidName(path)) {
319      throw new InvalidRequestException("Invalid path '" + path + "'.");
320    }
321    return path;
322  }
323
324  private static short validateReplication(CacheDirectiveInfo directive,
325      short defaultValue) throws InvalidRequestException {
326    short repl = (directive.getReplication() != null)
327        ? directive.getReplication() : defaultValue;
328    if (repl <= 0) {
329      throw new InvalidRequestException("Invalid replication factor " + repl
330          + " <= 0");
331    }
332    return repl;
333  }
334
335  /**
336   * Calculates the absolute expiry time of the directive from the
337   * {@link CacheDirectiveInfo.Expiration}. This converts a relative Expiration
338   * into an absolute time based on the local clock.
339   * 
340   * @param info to validate.
341   * @param maxRelativeExpiryTime of the info's pool.
342   * @return the expiration time, or the pool's max absolute expiration if the
343   *         info's expiration was not set.
344   * @throws InvalidRequestException if the info's Expiration is invalid.
345   */
346  private static long validateExpiryTime(CacheDirectiveInfo info,
347      long maxRelativeExpiryTime) throws InvalidRequestException {
348    if (LOG.isTraceEnabled()) {
349      LOG.trace("Validating directive " + info
350          + " pool maxRelativeExpiryTime " + maxRelativeExpiryTime);
351    }
352    final long now = new Date().getTime();
353    final long maxAbsoluteExpiryTime = now + maxRelativeExpiryTime;
354    if (info == null || info.getExpiration() == null) {
355      return maxAbsoluteExpiryTime;
356    }
357    Expiration expiry = info.getExpiration();
358    if (expiry.getMillis() < 0l) {
359      throw new InvalidRequestException("Cannot set a negative expiration: "
360          + expiry.getMillis());
361    }
362    long relExpiryTime, absExpiryTime;
363    if (expiry.isRelative()) {
364      relExpiryTime = expiry.getMillis();
365      absExpiryTime = now + relExpiryTime;
366    } else {
367      absExpiryTime = expiry.getMillis();
368      relExpiryTime = absExpiryTime - now;
369    }
370    // Need to cap the expiry so we don't overflow a long when doing math
371    if (relExpiryTime > Expiration.MAX_RELATIVE_EXPIRY_MS) {
372      throw new InvalidRequestException("Expiration "
373          + expiry.toString() + " is too far in the future!");
374    }
375    // Fail if the requested expiry is greater than the max
376    if (relExpiryTime > maxRelativeExpiryTime) {
377      throw new InvalidRequestException("Expiration " + expiry.toString()
378          + " exceeds the max relative expiration time of "
379          + maxRelativeExpiryTime + " ms.");
380    }
381    return absExpiryTime;
382  }
383
384  /**
385   * Throws an exception if the CachePool does not have enough capacity to
386   * cache the given path at the replication factor.
387   *
388   * @param pool CachePool where the path is being cached
389   * @param path Path that is being cached
390   * @param replication Replication factor of the path
391   * @throws InvalidRequestException if the pool does not have enough capacity
392   */
393  private void checkLimit(CachePool pool, String path,
394      short replication) throws InvalidRequestException {
395    CacheDirectiveStats stats = computeNeeded(path, replication);
396    if (pool.getLimit() == CachePoolInfo.LIMIT_UNLIMITED) {
397      return;
398    }
399    if (pool.getBytesNeeded() + (stats.getBytesNeeded() * replication) > pool
400        .getLimit()) {
401      throw new InvalidRequestException("Caching path " + path + " of size "
402          + stats.getBytesNeeded() / replication + " bytes at replication "
403          + replication + " would exceed pool " + pool.getPoolName()
404          + "'s remaining capacity of "
405          + (pool.getLimit() - pool.getBytesNeeded()) + " bytes.");
406    }
407  }
408
409  /**
410   * Computes the needed number of bytes and files for a path.
411   * @return CacheDirectiveStats describing the needed stats for this path
412   */
413  private CacheDirectiveStats computeNeeded(String path, short replication) {
414    FSDirectory fsDir = namesystem.getFSDirectory();
415    INode node;
416    long requestedBytes = 0;
417    long requestedFiles = 0;
418    CacheDirectiveStats.Builder builder = new CacheDirectiveStats.Builder();
419    try {
420      node = fsDir.getINode(path);
421    } catch (UnresolvedLinkException e) {
422      // We don't cache through symlinks
423      return builder.build();
424    }
425    if (node == null) {
426      return builder.build();
427    }
428    if (node.isFile()) {
429      requestedFiles = 1;
430      INodeFile file = node.asFile();
431      requestedBytes = file.computeFileSize();
432    } else if (node.isDirectory()) {
433      INodeDirectory dir = node.asDirectory();
434      ReadOnlyList<INode> children = dir
435          .getChildrenList(Snapshot.CURRENT_STATE_ID);
436      requestedFiles = children.size();
437      for (INode child : children) {
438        if (child.isFile()) {
439          requestedBytes += child.asFile().computeFileSize();
440        }
441      }
442    }
443    return new CacheDirectiveStats.Builder()
444        .setBytesNeeded(requestedBytes)
445        .setFilesCached(requestedFiles)
446        .build();
447  }
448
449  /**
450   * Get a CacheDirective by ID, validating the ID and that the directive
451   * exists.
452   */
453  private CacheDirective getById(long id) throws InvalidRequestException {
454    // Check for invalid IDs.
455    if (id <= 0) {
456      throw new InvalidRequestException("Invalid negative ID.");
457    }
458    // Find the directive.
459    CacheDirective directive = directivesById.get(id);
460    if (directive == null) {
461      throw new InvalidRequestException("No directive with ID " + id
462          + " found.");
463    }
464    return directive;
465  }
466
467  /**
468   * Get a CachePool by name, validating that it exists.
469   */
470  private CachePool getCachePool(String poolName)
471      throws InvalidRequestException {
472    CachePool pool = cachePools.get(poolName);
473    if (pool == null) {
474      throw new InvalidRequestException("Unknown pool " + poolName);
475    }
476    return pool;
477  }
478
479  // RPC handlers
480
481  private void addInternal(CacheDirective directive, CachePool pool) {
482    boolean addedDirective = pool.getDirectiveList().add(directive);
483    assert addedDirective;
484    directivesById.put(directive.getId(), directive);
485    String path = directive.getPath();
486    List<CacheDirective> directives = directivesByPath.get(path);
487    if (directives == null) {
488      directives = new ArrayList<CacheDirective>(1);
489      directivesByPath.put(path, directives);
490    }
491    directives.add(directive);
492    // Fix up pool stats
493    CacheDirectiveStats stats =
494        computeNeeded(directive.getPath(), directive.getReplication());
495    directive.addBytesNeeded(stats.getBytesNeeded());
496    directive.addFilesNeeded(directive.getFilesNeeded());
497
498    setNeedsRescan();
499  }
500
501  /**
502   * Adds a directive, skipping most error checking. This should only be called
503   * internally in special scenarios like edit log replay.
504   */
505  CacheDirectiveInfo addDirectiveFromEditLog(CacheDirectiveInfo directive)
506      throws InvalidRequestException {
507    long id = directive.getId();
508    CacheDirective entry = new CacheDirective(directive);
509    CachePool pool = cachePools.get(directive.getPool());
510    addInternal(entry, pool);
511    if (nextDirectiveId <= id) {
512      nextDirectiveId = id + 1;
513    }
514    return entry.toInfo();
515  }
516
517  public CacheDirectiveInfo addDirective(
518      CacheDirectiveInfo info, FSPermissionChecker pc, EnumSet<CacheFlag> flags)
519      throws IOException {
520    assert namesystem.hasWriteLock();
521    CacheDirective directive;
522    try {
523      CachePool pool = getCachePool(validatePoolName(info));
524      checkWritePermission(pc, pool);
525      String path = validatePath(info);
526      short replication = validateReplication(info, (short)1);
527      long expiryTime = validateExpiryTime(info, pool.getMaxRelativeExpiryMs());
528      // Do quota validation if required
529      if (!flags.contains(CacheFlag.FORCE)) {
530        checkLimit(pool, path, replication);
531      }
532      // All validation passed
533      // Add a new entry with the next available ID.
534      long id = getNextDirectiveId();
535      directive = new CacheDirective(id, path, replication, expiryTime);
536      addInternal(directive, pool);
537    } catch (IOException e) {
538      LOG.warn("addDirective of " + info + " failed: ", e);
539      throw e;
540    }
541    LOG.info("addDirective of " + info + " successful.");
542    return directive.toInfo();
543  }
544
545  /**
546   * Factory method that makes a new CacheDirectiveInfo by applying fields in a
547   * CacheDirectiveInfo to an existing CacheDirective.
548   * 
549   * @param info with some or all fields set.
550   * @param defaults directive providing default values for unset fields in
551   *          info.
552   * 
553   * @return new CacheDirectiveInfo of the info applied to the defaults.
554   */
555  private static CacheDirectiveInfo createFromInfoAndDefaults(
556      CacheDirectiveInfo info, CacheDirective defaults) {
557    // Initialize the builder with the default values
558    CacheDirectiveInfo.Builder builder =
559        new CacheDirectiveInfo.Builder(defaults.toInfo());
560    // Replace default with new value if present
561    if (info.getPath() != null) {
562      builder.setPath(info.getPath());
563    }
564    if (info.getReplication() != null) {
565      builder.setReplication(info.getReplication());
566    }
567    if (info.getPool() != null) {
568      builder.setPool(info.getPool());
569    }
570    if (info.getExpiration() != null) {
571      builder.setExpiration(info.getExpiration());
572    }
573    return builder.build();
574  }
575
576  /**
577   * Modifies a directive, skipping most error checking. This is for careful
578   * internal use only. modifyDirective can be non-deterministic since its error
579   * checking depends on current system time, which poses a problem for edit log
580   * replay.
581   */
582  void modifyDirectiveFromEditLog(CacheDirectiveInfo info)
583      throws InvalidRequestException {
584    // Check for invalid IDs.
585    Long id = info.getId();
586    if (id == null) {
587      throw new InvalidRequestException("Must supply an ID.");
588    }
589    CacheDirective prevEntry = getById(id);
590    CacheDirectiveInfo newInfo = createFromInfoAndDefaults(info, prevEntry);
591    removeInternal(prevEntry);
592    addInternal(new CacheDirective(newInfo), getCachePool(newInfo.getPool()));
593  }
594
595  public void modifyDirective(CacheDirectiveInfo info,
596      FSPermissionChecker pc, EnumSet<CacheFlag> flags) throws IOException {
597    assert namesystem.hasWriteLock();
598    String idString =
599        (info.getId() == null) ?
600            "(null)" : info.getId().toString();
601    try {
602      // Check for invalid IDs.
603      Long id = info.getId();
604      if (id == null) {
605        throw new InvalidRequestException("Must supply an ID.");
606      }
607      CacheDirective prevEntry = getById(id);
608      checkWritePermission(pc, prevEntry.getPool());
609
610      // Fill in defaults
611      CacheDirectiveInfo infoWithDefaults =
612          createFromInfoAndDefaults(info, prevEntry);
613      CacheDirectiveInfo.Builder builder =
614          new CacheDirectiveInfo.Builder(infoWithDefaults);
615
616      // Do validation
617      validatePath(infoWithDefaults);
618      validateReplication(infoWithDefaults, (short)-1);
619      // Need to test the pool being set here to avoid rejecting a modify for a
620      // directive that's already been forced into a pool
621      CachePool srcPool = prevEntry.getPool();
622      CachePool destPool = getCachePool(validatePoolName(infoWithDefaults));
623      if (!srcPool.getPoolName().equals(destPool.getPoolName())) {
624        checkWritePermission(pc, destPool);
625        if (!flags.contains(CacheFlag.FORCE)) {
626          checkLimit(destPool, infoWithDefaults.getPath().toUri().getPath(),
627              infoWithDefaults.getReplication());
628        }
629      }
630      // Verify the expiration against the destination pool
631      validateExpiryTime(infoWithDefaults, destPool.getMaxRelativeExpiryMs());
632
633      // Indicate changes to the CRM
634      setNeedsRescan();
635
636      // Validation passed
637      removeInternal(prevEntry);
638      addInternal(new CacheDirective(builder.build()), destPool);
639    } catch (IOException e) {
640      LOG.warn("modifyDirective of " + idString + " failed: ", e);
641      throw e;
642    }
643    LOG.info("modifyDirective of " + idString + " successfully applied " +
644        info+ ".");
645  }
646
647  private void removeInternal(CacheDirective directive)
648      throws InvalidRequestException {
649    assert namesystem.hasWriteLock();
650    // Remove the corresponding entry in directivesByPath.
651    String path = directive.getPath();
652    List<CacheDirective> directives = directivesByPath.get(path);
653    if (directives == null || !directives.remove(directive)) {
654      throw new InvalidRequestException("Failed to locate entry " +
655          directive.getId() + " by path " + directive.getPath());
656    }
657    if (directives.size() == 0) {
658      directivesByPath.remove(path);
659    }
660    // Fix up the stats from removing the pool
661    final CachePool pool = directive.getPool();
662    directive.addBytesNeeded(-directive.getBytesNeeded());
663    directive.addFilesNeeded(-directive.getFilesNeeded());
664
665    directivesById.remove(directive.getId());
666    pool.getDirectiveList().remove(directive);
667    assert directive.getPool() == null;
668
669    setNeedsRescan();
670  }
671
672  public void removeDirective(long id, FSPermissionChecker pc)
673      throws IOException {
674    assert namesystem.hasWriteLock();
675    try {
676      CacheDirective directive = getById(id);
677      checkWritePermission(pc, directive.getPool());
678      removeInternal(directive);
679    } catch (IOException e) {
680      LOG.warn("removeDirective of " + id + " failed: ", e);
681      throw e;
682    }
683    LOG.info("removeDirective of " + id + " successful.");
684  }
685
686  public BatchedListEntries<CacheDirectiveEntry> 
687        listCacheDirectives(long prevId,
688            CacheDirectiveInfo filter,
689            FSPermissionChecker pc) throws IOException {
690    assert namesystem.hasReadLock();
691    final int NUM_PRE_ALLOCATED_ENTRIES = 16;
692    String filterPath = null;
693    if (filter.getId() != null) {
694      throw new IOException("Filtering by ID is unsupported.");
695    }
696    if (filter.getPath() != null) {
697      filterPath = validatePath(filter);
698    }
699    if (filter.getReplication() != null) {
700      throw new IOException("Filtering by replication is unsupported.");
701    }
702    ArrayList<CacheDirectiveEntry> replies =
703        new ArrayList<CacheDirectiveEntry>(NUM_PRE_ALLOCATED_ENTRIES);
704    int numReplies = 0;
705    SortedMap<Long, CacheDirective> tailMap =
706      directivesById.tailMap(prevId + 1);
707    for (Entry<Long, CacheDirective> cur : tailMap.entrySet()) {
708      if (numReplies >= maxListCacheDirectivesNumResponses) {
709        return new BatchedListEntries<CacheDirectiveEntry>(replies, true);
710      }
711      CacheDirective curDirective = cur.getValue();
712      CacheDirectiveInfo info = cur.getValue().toInfo();
713      if (filter.getPool() != null && 
714          !info.getPool().equals(filter.getPool())) {
715        continue;
716      }
717      if (filterPath != null &&
718          !info.getPath().toUri().getPath().equals(filterPath)) {
719        continue;
720      }
721      boolean hasPermission = true;
722      if (pc != null) {
723        try {
724          pc.checkPermission(curDirective.getPool(), FsAction.READ);
725        } catch (AccessControlException e) {
726          hasPermission = false;
727        }
728      }
729      if (hasPermission) {
730        replies.add(new CacheDirectiveEntry(info, cur.getValue().toStats()));
731        numReplies++;
732      }
733    }
734    return new BatchedListEntries<CacheDirectiveEntry>(replies, false);
735  }
736
737  /**
738   * Create a cache pool.
739   * 
740   * Only the superuser should be able to call this function.
741   *
742   * @param info    The info for the cache pool to create.
743   * @return        Information about the cache pool we created.
744   */
745  public CachePoolInfo addCachePool(CachePoolInfo info)
746      throws IOException {
747    assert namesystem.hasWriteLock();
748    CachePool pool;
749    try {
750      CachePoolInfo.validate(info);
751      String poolName = info.getPoolName();
752      pool = cachePools.get(poolName);
753      if (pool != null) {
754        throw new InvalidRequestException("Cache pool " + poolName
755            + " already exists.");
756      }
757      pool = CachePool.createFromInfoAndDefaults(info);
758      cachePools.put(pool.getPoolName(), pool);
759    } catch (IOException e) {
760      LOG.info("addCachePool of " + info + " failed: ", e);
761      throw e;
762    }
763    LOG.info("addCachePool of " + info + " successful.");
764    return pool.getInfo(true);
765  }
766
767  /**
768   * Modify a cache pool.
769   * 
770   * Only the superuser should be able to call this function.
771   *
772   * @param info
773   *          The info for the cache pool to modify.
774   */
775  public void modifyCachePool(CachePoolInfo info)
776      throws IOException {
777    assert namesystem.hasWriteLock();
778    StringBuilder bld = new StringBuilder();
779    try {
780      CachePoolInfo.validate(info);
781      String poolName = info.getPoolName();
782      CachePool pool = cachePools.get(poolName);
783      if (pool == null) {
784        throw new InvalidRequestException("Cache pool " + poolName
785            + " does not exist.");
786      }
787      String prefix = "";
788      if (info.getOwnerName() != null) {
789        pool.setOwnerName(info.getOwnerName());
790        bld.append(prefix).
791          append("set owner to ").append(info.getOwnerName());
792        prefix = "; ";
793      }
794      if (info.getGroupName() != null) {
795        pool.setGroupName(info.getGroupName());
796        bld.append(prefix).
797          append("set group to ").append(info.getGroupName());
798        prefix = "; ";
799      }
800      if (info.getMode() != null) {
801        pool.setMode(info.getMode());
802        bld.append(prefix).append("set mode to " + info.getMode());
803        prefix = "; ";
804      }
805      if (info.getLimit() != null) {
806        pool.setLimit(info.getLimit());
807        bld.append(prefix).append("set limit to " + info.getLimit());
808        prefix = "; ";
809        // New limit changes stats, need to set needs refresh
810        setNeedsRescan();
811      }
812      if (info.getMaxRelativeExpiryMs() != null) {
813        final Long maxRelativeExpiry = info.getMaxRelativeExpiryMs();
814        pool.setMaxRelativeExpiryMs(maxRelativeExpiry);
815        bld.append(prefix).append("set maxRelativeExpiry to "
816            + maxRelativeExpiry);
817        prefix = "; ";
818      }
819      if (prefix.isEmpty()) {
820        bld.append("no changes.");
821      }
822    } catch (IOException e) {
823      LOG.info("modifyCachePool of " + info + " failed: ", e);
824      throw e;
825    }
826    LOG.info("modifyCachePool of " + info.getPoolName() + " successful; "
827        + bld.toString());
828  }
829
830  /**
831   * Remove a cache pool.
832   * 
833   * Only the superuser should be able to call this function.
834   *
835   * @param poolName
836   *          The name for the cache pool to remove.
837   */
838  public void removeCachePool(String poolName)
839      throws IOException {
840    assert namesystem.hasWriteLock();
841    try {
842      CachePoolInfo.validateName(poolName);
843      CachePool pool = cachePools.remove(poolName);
844      if (pool == null) {
845        throw new InvalidRequestException(
846            "Cannot remove non-existent cache pool " + poolName);
847      }
848      // Remove all directives in this pool.
849      Iterator<CacheDirective> iter = pool.getDirectiveList().iterator();
850      while (iter.hasNext()) {
851        CacheDirective directive = iter.next();
852        directivesByPath.remove(directive.getPath());
853        directivesById.remove(directive.getId());
854        iter.remove();
855      }
856      setNeedsRescan();
857    } catch (IOException e) {
858      LOG.info("removeCachePool of " + poolName + " failed: ", e);
859      throw e;
860    }
861    LOG.info("removeCachePool of " + poolName + " successful.");
862  }
863
864  public BatchedListEntries<CachePoolEntry>
865      listCachePools(FSPermissionChecker pc, String prevKey) {
866    assert namesystem.hasReadLock();
867    final int NUM_PRE_ALLOCATED_ENTRIES = 16;
868    ArrayList<CachePoolEntry> results = 
869        new ArrayList<CachePoolEntry>(NUM_PRE_ALLOCATED_ENTRIES);
870    SortedMap<String, CachePool> tailMap = cachePools.tailMap(prevKey, false);
871    int numListed = 0;
872    for (Entry<String, CachePool> cur : tailMap.entrySet()) {
873      if (numListed++ >= maxListCachePoolsResponses) {
874        return new BatchedListEntries<CachePoolEntry>(results, true);
875      }
876      results.add(cur.getValue().getEntry(pc));
877    }
878    return new BatchedListEntries<CachePoolEntry>(results, false);
879  }
880
881  public void setCachedLocations(LocatedBlock block) {
882    CachedBlock cachedBlock =
883        new CachedBlock(block.getBlock().getBlockId(),
884            (short)0, false);
885    cachedBlock = cachedBlocks.get(cachedBlock);
886    if (cachedBlock == null) {
887      return;
888    }
889    List<DatanodeDescriptor> datanodes = cachedBlock.getDatanodes(Type.CACHED);
890    for (DatanodeDescriptor datanode : datanodes) {
891      block.addCachedLoc(datanode);
892    }
893  }
894
895  public final void processCacheReport(final DatanodeID datanodeID,
896      final List<Long> blockIds) throws IOException {
897    namesystem.writeLock();
898    final long startTime = Time.monotonicNow();
899    final long endTime;
900    try {
901      final DatanodeDescriptor datanode = 
902          blockManager.getDatanodeManager().getDatanode(datanodeID);
903      if (datanode == null || !datanode.isAlive) {
904        throw new IOException(
905            "processCacheReport from dead or unregistered datanode: " +
906            datanode);
907      }
908      processCacheReportImpl(datanode, blockIds);
909    } finally {
910      endTime = Time.monotonicNow();
911      namesystem.writeUnlock();
912    }
913
914    // Log the block report processing stats from Namenode perspective
915    final NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
916    if (metrics != null) {
917      metrics.addCacheBlockReport((int) (endTime - startTime));
918    }
919    if (LOG.isDebugEnabled()) {
920      LOG.debug("Processed cache report from "
921          + datanodeID + ", blocks: " + blockIds.size()
922          + ", processing time: " + (endTime - startTime) + " msecs");
923    }
924  }
925
926  private void processCacheReportImpl(final DatanodeDescriptor datanode,
927      final List<Long> blockIds) {
928    CachedBlocksList cached = datanode.getCached();
929    cached.clear();
930    CachedBlocksList cachedList = datanode.getCached();
931    CachedBlocksList pendingCachedList = datanode.getPendingCached();
932    for (Iterator<Long> iter = blockIds.iterator(); iter.hasNext(); ) {
933      long blockId = iter.next();
934      CachedBlock cachedBlock =
935          new CachedBlock(blockId, (short)0, false);
936      CachedBlock prevCachedBlock = cachedBlocks.get(cachedBlock);
937      // Add the block ID from the cache report to the cachedBlocks map
938      // if it's not already there.
939      if (prevCachedBlock != null) {
940        cachedBlock = prevCachedBlock;
941      } else {
942        cachedBlocks.put(cachedBlock);
943      }
944      // Add the block to the datanode's implicit cached block list
945      // if it's not already there.  Similarly, remove it from the pending
946      // cached block list if it exists there.
947      if (!cachedBlock.isPresent(cachedList)) {
948        cachedList.add(cachedBlock);
949      }
950      if (cachedBlock.isPresent(pendingCachedList)) {
951        pendingCachedList.remove(cachedBlock);
952      }
953    }
954  }
955
956  public PersistState saveState() throws IOException {
957    ArrayList<CachePoolInfoProto> pools = Lists
958        .newArrayListWithCapacity(cachePools.size());
959    ArrayList<CacheDirectiveInfoProto> directives = Lists
960        .newArrayListWithCapacity(directivesById.size());
961
962    for (CachePool pool : cachePools.values()) {
963      CachePoolInfo p = pool.getInfo(true);
964      CachePoolInfoProto.Builder b = CachePoolInfoProto.newBuilder()
965          .setPoolName(p.getPoolName());
966
967      if (p.getOwnerName() != null)
968        b.setOwnerName(p.getOwnerName());
969
970      if (p.getGroupName() != null)
971        b.setGroupName(p.getGroupName());
972
973      if (p.getMode() != null)
974        b.setMode(p.getMode().toShort());
975
976      if (p.getLimit() != null)
977        b.setLimit(p.getLimit());
978
979      pools.add(b.build());
980    }
981
982    for (CacheDirective directive : directivesById.values()) {
983      CacheDirectiveInfo info = directive.toInfo();
984      CacheDirectiveInfoProto.Builder b = CacheDirectiveInfoProto.newBuilder()
985          .setId(info.getId());
986
987      if (info.getPath() != null) {
988        b.setPath(info.getPath().toUri().getPath());
989      }
990
991      if (info.getReplication() != null) {
992        b.setReplication(info.getReplication());
993      }
994
995      if (info.getPool() != null) {
996        b.setPool(info.getPool());
997      }
998
999      Expiration expiry = info.getExpiration();
1000      if (expiry != null) {
1001        assert (!expiry.isRelative());
1002        b.setExpiration(PBHelper.convert(expiry));
1003      }
1004
1005      directives.add(b.build());
1006    }
1007    CacheManagerSection s = CacheManagerSection.newBuilder()
1008        .setNextDirectiveId(nextDirectiveId).setNumPools(pools.size())
1009        .setNumDirectives(directives.size()).build();
1010
1011    return new PersistState(s, pools, directives);
1012  }
1013
1014  /**
1015   * Reloads CacheManager state from the passed DataInput. Used during namenode
1016   * startup to restore CacheManager state from an FSImage.
1017   * @param in DataInput from which to restore state
1018   * @throws IOException
1019   */
1020  public void loadStateCompat(DataInput in) throws IOException {
1021    serializerCompat.load(in);
1022  }
1023
1024  public void loadState(PersistState s) throws IOException {
1025    nextDirectiveId = s.section.getNextDirectiveId();
1026    for (CachePoolInfoProto p : s.pools) {
1027      CachePoolInfo info = new CachePoolInfo(p.getPoolName());
1028      if (p.hasOwnerName())
1029        info.setOwnerName(p.getOwnerName());
1030
1031      if (p.hasGroupName())
1032        info.setGroupName(p.getGroupName());
1033
1034      if (p.hasMode())
1035        info.setMode(new FsPermission((short) p.getMode()));
1036
1037      if (p.hasLimit())
1038        info.setLimit(p.getLimit());
1039
1040      addCachePool(info);
1041    }
1042
1043    for (CacheDirectiveInfoProto p : s.directives) {
1044      // Get pool reference by looking it up in the map
1045      final String poolName = p.getPool();
1046      CacheDirective directive = new CacheDirective(p.getId(), new Path(
1047          p.getPath()).toUri().getPath(), (short) p.getReplication(), p
1048          .getExpiration().getMillis());
1049      addCacheDirective(poolName, directive);
1050    }
1051  }
1052
1053  private void addCacheDirective(final String poolName,
1054      final CacheDirective directive) throws IOException {
1055    CachePool pool = cachePools.get(poolName);
1056    if (pool == null) {
1057      throw new IOException("Directive refers to pool " + poolName
1058          + ", which does not exist.");
1059    }
1060    boolean addedDirective = pool.getDirectiveList().add(directive);
1061    assert addedDirective;
1062    if (directivesById.put(directive.getId(), directive) != null) {
1063      throw new IOException("A directive with ID " + directive.getId()
1064          + " already exists");
1065    }
1066    List<CacheDirective> directives = directivesByPath.get(directive.getPath());
1067    if (directives == null) {
1068      directives = new LinkedList<CacheDirective>();
1069      directivesByPath.put(directive.getPath(), directives);
1070    }
1071    directives.add(directive);
1072  }
1073
1074  private final class SerializerCompat {
1075    private void load(DataInput in) throws IOException {
1076      nextDirectiveId = in.readLong();
1077      // pools need to be loaded first since directives point to their parent pool
1078      loadPools(in);
1079      loadDirectives(in);
1080    }
1081
1082    /**
1083     * Load cache pools from fsimage
1084     */
1085    private void loadPools(DataInput in)
1086        throws IOException {
1087      StartupProgress prog = NameNode.getStartupProgress();
1088      Step step = new Step(StepType.CACHE_POOLS);
1089      prog.beginStep(Phase.LOADING_FSIMAGE, step);
1090      int numberOfPools = in.readInt();
1091      prog.setTotal(Phase.LOADING_FSIMAGE, step, numberOfPools);
1092      Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step);
1093      for (int i = 0; i < numberOfPools; i++) {
1094        addCachePool(FSImageSerialization.readCachePoolInfo(in));
1095        counter.increment();
1096      }
1097      prog.endStep(Phase.LOADING_FSIMAGE, step);
1098    }
1099
1100    /**
1101     * Load cache directives from the fsimage
1102     */
1103    private void loadDirectives(DataInput in) throws IOException {
1104      StartupProgress prog = NameNode.getStartupProgress();
1105      Step step = new Step(StepType.CACHE_ENTRIES);
1106      prog.beginStep(Phase.LOADING_FSIMAGE, step);
1107      int numDirectives = in.readInt();
1108      prog.setTotal(Phase.LOADING_FSIMAGE, step, numDirectives);
1109      Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step);
1110      for (int i = 0; i < numDirectives; i++) {
1111        CacheDirectiveInfo info = FSImageSerialization.readCacheDirectiveInfo(in);
1112        // Get pool reference by looking it up in the map
1113        final String poolName = info.getPool();
1114        CacheDirective directive =
1115            new CacheDirective(info.getId(), info.getPath().toUri().getPath(),
1116                info.getReplication(), info.getExpiration().getAbsoluteMillis());
1117        addCacheDirective(poolName, directive);
1118        counter.increment();
1119      }
1120      prog.endStep(Phase.LOADING_FSIMAGE, step);
1121    }
1122  }
1123
1124  public void waitForRescanIfNeeded() {
1125    crmLock.lock();
1126    try {
1127      if (monitor != null) {
1128        monitor.waitForRescanIfNeeded();
1129      }
1130    } finally {
1131      crmLock.unlock();
1132    }
1133  }
1134
1135  private void setNeedsRescan() {
1136    crmLock.lock();
1137    try {
1138      if (monitor != null) {
1139        monitor.setNeedsRescan();
1140      }
1141    } finally {
1142      crmLock.unlock();
1143    }
1144  }
1145
1146  @VisibleForTesting
1147  public Thread getCacheReplicationMonitor() {
1148    crmLock.lock();
1149    try {
1150      return monitor;
1151    } finally {
1152      crmLock.unlock();
1153    }
1154  }
1155}