001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.fs;
019    
020    import org.apache.commons.logging.Log;
021    import org.apache.commons.logging.LogFactory;
022    import org.apache.hadoop.conf.Configuration;
023    import org.apache.hadoop.fs.permission.FsPermission;
024    import org.apache.hadoop.io.IOUtils;
025    import org.apache.hadoop.io.Text;
026    import org.apache.hadoop.util.LineReader;
027    import org.apache.hadoop.util.Progressable;
028    
029    import java.io.FileNotFoundException;
030    import java.io.IOException;
031    import java.io.UnsupportedEncodingException;
032    import java.net.URI;
033    import java.net.URISyntaxException;
034    import java.net.URLDecoder;
035    import java.util.*;
036    
037    /**
038     * This is an implementation of the Hadoop Archive 
039     * Filesystem. This archive Filesystem has index files
040     * of the form _index* and has contents of the form
041     * part-*. The index files store the indexes of the 
042     * real files. The index files are of the form _masterindex
043     * and _index. The master index is a level of indirection 
044     * in to the index file to make the look ups faster. the index
045     * file is sorted with hash code of the paths that it contains 
046     * and the master index contains pointers to the positions in 
047     * index for ranges of hashcodes.
048     */
049    
050    public class HarFileSystem extends FileSystem {
051    
052      private static final Log LOG = LogFactory.getLog(HarFileSystem.class);
053    
054      public static final String METADATA_CACHE_ENTRIES_KEY = "fs.har.metadatacache.entries";
055      public static final int METADATA_CACHE_ENTRIES_DEFAULT = 10;
056    
057      public static final int VERSION = 3;
058    
059      private static Map<URI, HarMetaData> harMetaCache;
060    
061      // uri representation of this Har filesystem
062      private URI uri;
063      // the top level path of the archive
064      // in the underlying file system
065      private Path archivePath;
066      // the har auth
067      private String harAuth;
068    
069      // pointer into the static metadata cache
070      private HarMetaData metadata;
071    
072      private FileSystem fs;
073    
074      /**
075       * public construction of harfilesystem
076       */
077      public HarFileSystem() {
078        // Must call #initialize() method to set the underlying file system
079      }
080    
081      /**
082       * Return the protocol scheme for the FileSystem.
083       * <p/>
084       *
085       * @return <code>har</code>
086       */
087      @Override
088      public String getScheme() {
089        return "har";
090      }
091    
092      /**
093       * Constructor to create a HarFileSystem with an
094       * underlying filesystem.
095       * @param fs underlying file system
096       */
097      public HarFileSystem(FileSystem fs) {
098        this.fs = fs;
099        this.statistics = fs.statistics;
100      }
101     
102      private synchronized void initializeMetadataCache(Configuration conf) {
103        if (harMetaCache == null) {
104          int cacheSize = conf.getInt(METADATA_CACHE_ENTRIES_KEY, METADATA_CACHE_ENTRIES_DEFAULT);
105          harMetaCache = Collections.synchronizedMap(new LruCache<URI, HarMetaData>(cacheSize));
106        }
107      }
108     
109      /**
110       * Initialize a Har filesystem per har archive. The 
111       * archive home directory is the top level directory
112       * in the filesystem that contains the HAR archive.
113       * Be careful with this method, you do not want to go 
114       * on creating new Filesystem instances per call to 
115       * path.getFileSystem().
116       * the uri of Har is 
117       * har://underlyingfsscheme-host:port/archivepath.
118       * or 
119       * har:///archivepath. This assumes the underlying filesystem
120       * to be used in case not specified.
121       */
122      @Override
123      public void initialize(URI name, Configuration conf) throws IOException {
124        // initialize the metadata cache, if needed
125        initializeMetadataCache(conf);
126    
127        // decode the name
128        URI underLyingURI = decodeHarURI(name, conf);
129        // we got the right har Path- now check if this is 
130        // truly a har filesystem
131        Path harPath = archivePath(
132          new Path(name.getScheme(), name.getAuthority(), name.getPath()));
133        if (harPath == null) { 
134          throw new IOException("Invalid path for the Har Filesystem. " + 
135                               name.toString());
136        }
137        if (fs == null) {
138          fs = FileSystem.get(underLyingURI, conf);
139        }
140        uri = harPath.toUri();
141        archivePath = new Path(uri.getPath());
142        harAuth = getHarAuth(underLyingURI);
143        //check for the underlying fs containing
144        // the index file
145        Path masterIndexPath = new Path(archivePath, "_masterindex");
146        Path archiveIndexPath = new Path(archivePath, "_index");
147        if (!fs.exists(masterIndexPath) || !fs.exists(archiveIndexPath)) {
148          throw new IOException("Invalid path for the Har Filesystem. " +
149              "No index file in " + harPath);
150        }
151    
152        metadata = harMetaCache.get(uri);
153        if (metadata != null) {
154          FileStatus mStat = fs.getFileStatus(masterIndexPath);
155          FileStatus aStat = fs.getFileStatus(archiveIndexPath);
156          if (mStat.getModificationTime() != metadata.getMasterIndexTimestamp() ||
157              aStat.getModificationTime() != metadata.getArchiveIndexTimestamp()) {
158            // the archive has been overwritten since we last read it
159            // remove the entry from the meta data cache
160            metadata = null;
161            harMetaCache.remove(uri);
162          }
163        }
164        if (metadata == null) {
165          metadata = new HarMetaData(fs, masterIndexPath, archiveIndexPath);
166          metadata.parseMetaData();
167          harMetaCache.put(uri, metadata);
168        }
169      }
170    
171      @Override
172      public Configuration getConf() {
173        return fs.getConf();
174      }
175    
176      // get the version of the filesystem from the masterindex file
177      // the version is currently not useful since its the first version
178      // of archives
179      public int getHarVersion() throws IOException {
180        if (metadata != null) {
181          return metadata.getVersion();
182        }
183        else {
184          throw new IOException("Invalid meta data for the Har Filesystem");
185        }
186      }
187    
188      /*
189       * find the parent path that is the 
190       * archive path in the path. The last
191       * path segment that ends with .har is 
192       * the path that will be returned.
193       */
194      private Path archivePath(Path p) {
195        Path retPath = null;
196        Path tmp = p;
197        for (int i=0; i< p.depth(); i++) {
198          if (tmp.toString().endsWith(".har")) {
199            retPath = tmp;
200            break;
201          }
202          tmp = tmp.getParent();
203        }
204        return retPath;
205      }
206    
207      /**
208       * decode the raw URI to get the underlying URI
209       * @param rawURI raw Har URI
210       * @return filtered URI of the underlying fileSystem
211       */
212      private URI decodeHarURI(URI rawURI, Configuration conf) throws IOException {
213        String tmpAuth = rawURI.getAuthority();
214        //we are using the default file
215        //system in the config 
216        //so create a underlying uri and 
217        //return it
218        if (tmpAuth == null) {
219          //create a path 
220          return FileSystem.getDefaultUri(conf);
221        }
222        String authority = rawURI.getAuthority();
223        if (authority == null) {
224          throw new IOException("URI: " + rawURI
225              + " is an invalid Har URI since authority==null."
226              + "  Expecting har://<scheme>-<host>/<path>.");
227        }
228     
229        int i = authority.indexOf('-');
230        if (i < 0) {
231          throw new IOException("URI: " + rawURI
232              + " is an invalid Har URI since '-' not found."
233              + "  Expecting har://<scheme>-<host>/<path>.");
234        }
235     
236        if (rawURI.getQuery() != null) {
237          // query component not allowed
238          throw new IOException("query component in Path not supported  " + rawURI);
239        }
240     
241        URI tmp;
242        try {
243          // convert <scheme>-<host> to <scheme>://<host>
244          URI baseUri = new URI(authority.replaceFirst("-", "://"));
245     
246          tmp = new URI(baseUri.getScheme(), baseUri.getAuthority(),
247                rawURI.getPath(), rawURI.getQuery(), rawURI.getFragment());
248        } catch (URISyntaxException e) {
249          throw new IOException("URI: " + rawURI
250              + " is an invalid Har URI. Expecting har://<scheme>-<host>/<path>.");
251        }
252        return tmp;
253      }
254    
255      private static String decodeString(String str)
256        throws UnsupportedEncodingException {
257        return URLDecoder.decode(str, "UTF-8");
258      }
259    
260      private String decodeFileName(String fname)
261        throws UnsupportedEncodingException {
262        int version = metadata.getVersion();
263        if (version == 2 || version == 3){
264          return decodeString(fname);
265        }
266        return fname;
267      }
268    
269      /**
270       * return the top level archive.
271       */
272      @Override
273      public Path getWorkingDirectory() {
274        return new Path(uri.toString());
275      }
276    
277      @Override
278      public Path getInitialWorkingDirectory() {
279        return getWorkingDirectory();
280      }
281    
282      @Override
283      public FsStatus getStatus(Path p) throws IOException {
284        return fs.getStatus(p);
285      }
286    
287      /**
288       * Create a har specific auth 
289       * har-underlyingfs:port
290       * @param underLyingUri the uri of underlying
291       * filesystem
292       * @return har specific auth
293       */
294      private String getHarAuth(URI underLyingUri) {
295        String auth = underLyingUri.getScheme() + "-";
296        if (underLyingUri.getHost() != null) {
297          if (underLyingUri.getUserInfo() != null) {
298            auth += underLyingUri.getUserInfo();
299            auth += "@";
300          }
301          auth += underLyingUri.getHost();
302          if (underLyingUri.getPort() != -1) {
303            auth += ":";
304            auth +=  underLyingUri.getPort();
305          }
306        }
307        else {
308          auth += ":";
309        }
310        return auth;
311      }
312    
313      /**
314       * Used for delegation token related functionality. Must delegate to
315       * underlying file system.
316       */
317      @Override
318      protected URI getCanonicalUri() {
319        return fs.getCanonicalUri();
320      }
321    
322      @Override
323      protected URI canonicalizeUri(URI uri) {
324        return fs.canonicalizeUri(uri);
325      }
326    
327      /**
328       * Returns the uri of this filesystem.
329       * The uri is of the form 
330       * har://underlyingfsschema-host:port/pathintheunderlyingfs
331       */
332      @Override
333      public URI getUri() {
334        return this.uri;
335      }
336      
337      @Override
338      protected void checkPath(Path path) {
339        fs.checkPath(path);
340      }
341    
342      @Override
343      public Path resolvePath(Path p) throws IOException {
344        return fs.resolvePath(p);
345      }
346    
347      /**
348       * this method returns the path 
349       * inside the har filesystem.
350       * this is relative path inside 
351       * the har filesystem.
352       * @param path the fully qualified path in the har filesystem.
353       * @return relative path in the filesystem.
354       */
355      private Path getPathInHar(Path path) {
356        Path harPath = new Path(path.toUri().getPath());
357        if (archivePath.compareTo(harPath) == 0)
358          return new Path(Path.SEPARATOR);
359        Path tmp = new Path(harPath.getName());
360        Path parent = harPath.getParent();
361        while (!(parent.compareTo(archivePath) == 0)) {
362          if (parent.toString().equals(Path.SEPARATOR)) {
363            tmp = null;
364            break;
365          }
366          tmp = new Path(parent.getName(), tmp);
367          parent = parent.getParent();
368        }
369        if (tmp != null) 
370          tmp = new Path(Path.SEPARATOR, tmp);
371        return tmp;
372      }
373      
374      //the relative path of p. basically 
375      // getting rid of /. Parsing and doing 
376      // string manipulation is not good - so
377      // just use the path api to do it.
378      private Path makeRelative(String initial, Path p) {
379        String scheme = this.uri.getScheme();
380        String authority = this.uri.getAuthority();
381        Path root = new Path(Path.SEPARATOR);
382        if (root.compareTo(p) == 0)
383          return new Path(scheme, authority, initial);
384        Path retPath = new Path(p.getName());
385        Path parent = p.getParent();
386        for (int i=0; i < p.depth()-1; i++) {
387          retPath = new Path(parent.getName(), retPath);
388          parent = parent.getParent();
389        }
390        return new Path(new Path(scheme, authority, initial),
391          retPath.toString());
392      }
393      
394      /* this makes a path qualified in the har filesystem
395       * (non-Javadoc)
396       * @see org.apache.hadoop.fs.FilterFileSystem#makeQualified(
397       * org.apache.hadoop.fs.Path)
398       */
399      @Override
400      public Path makeQualified(Path path) {
401        // make sure that we just get the 
402        // path component 
403        Path fsPath = path;
404        if (!path.isAbsolute()) {
405          fsPath = new Path(archivePath, path);
406        }
407    
408        URI tmpURI = fsPath.toUri();
409        //change this to Har uri 
410        return new Path(uri.getScheme(), harAuth, tmpURI.getPath());
411      }
412    
413      /**
414       * Fix offset and length of block locations.
415       * Note that this method modifies the original array.
416       * @param locations block locations of har part file
417       * @param start the start of the desired range in the contained file
418       * @param len the length of the desired range
419       * @param fileOffsetInHar the offset of the desired file in the har part file
420       * @return block locations with fixed offset and length
421       */  
422      static BlockLocation[] fixBlockLocations(BlockLocation[] locations,
423                                              long start,
424                                              long len,
425                                              long fileOffsetInHar) {
426        // offset 1 past last byte of desired range
427        long end = start + len;
428    
429        for (BlockLocation location : locations) {
430          // offset of part block relative to beginning of desired file
431          // (may be negative if file starts in this part block)
432          long harBlockStart = location.getOffset() - fileOffsetInHar;
433          // offset 1 past last byte of har block relative to beginning of
434          // desired file
435          long harBlockEnd = harBlockStart + location.getLength();
436          
437          if (start > harBlockStart) {
438            // desired range starts after beginning of this har block
439            // fix offset to beginning of relevant range (relative to desired file)
440            location.setOffset(start);
441            // fix length to relevant portion of har block
442            location.setLength(location.getLength() - (start - harBlockStart));
443          } else {
444            // desired range includes beginning of this har block
445            location.setOffset(harBlockStart);
446          }
447          
448          if (harBlockEnd > end) {
449            // range ends before end of this har block
450            // fix length to remove irrelevant portion at the end
451            location.setLength(location.getLength() - (harBlockEnd - end));
452          }
453        }
454        
455        return locations;
456      }
457      
458      /**
459       * Get block locations from the underlying fs and fix their
460       * offsets and lengths.
461       * @param file the input file status to get block locations
462       * @param start the start of the desired range in the contained file
463       * @param len the length of the desired range
464       * @return block locations for this segment of file
465       * @throws IOException
466       */
467      @Override
468      public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
469                                                   long len) throws IOException {
470        HarStatus hstatus = getFileHarStatus(file.getPath());
471        Path partPath = new Path(archivePath, hstatus.getPartName());
472        FileStatus partStatus = metadata.getPartFileStatus(partPath);
473    
474        // get all part blocks that overlap with the desired file blocks
475        BlockLocation[] locations = 
476          fs.getFileBlockLocations(partStatus,
477                                   hstatus.getStartIndex() + start, len);
478    
479        return fixBlockLocations(locations, start, len, hstatus.getStartIndex());
480      }
481      
482      /**
483       * the hash of the path p inside  the filesystem
484       * @param p the path in the harfilesystem
485       * @return the hash code of the path.
486       */
487      public static int getHarHash(Path p) {
488        return (p.toString().hashCode() & 0x7fffffff);
489      }
490      
491      static class Store {
492        public Store() {
493          begin = end = startHash = endHash = 0;
494        }
495        public Store(long begin, long end, int startHash, int endHash) {
496          this.begin = begin;
497          this.end = end;
498          this.startHash = startHash;
499          this.endHash = endHash;
500        }
501        public long begin;
502        public long end;
503        public int startHash;
504        public int endHash;
505      }
506      
507      /**
508       * Get filestatuses of all the children of a given directory. This just reads
509       * through index file and reads line by line to get all statuses for children
510       * of a directory. Its a brute force way of getting all such filestatuses
511       * 
512       * @param parent
513       *          the parent path directory
514       * @param statuses
515       *          the list to add the children filestatuses to
516       */
517      private void fileStatusesInIndex(HarStatus parent, List<FileStatus> statuses)
518              throws IOException {
519        String parentString = parent.getName();
520        if (!parentString.endsWith(Path.SEPARATOR)){
521            parentString += Path.SEPARATOR;
522        }
523        Path harPath = new Path(parentString);
524        int harlen = harPath.depth();
525        final Map<String, FileStatus> cache = new TreeMap<String, FileStatus>();
526    
527        for (HarStatus hstatus : metadata.archive.values()) {
528          String child = hstatus.getName();
529          if ((child.startsWith(parentString))) {
530            Path thisPath = new Path(child);
531            if (thisPath.depth() == harlen + 1) {
532              statuses.add(toFileStatus(hstatus, cache));
533            }
534          }
535        }
536      }
537    
538      /**
539       * Combine the status stored in the index and the underlying status. 
540       * @param h status stored in the index
541       * @param cache caching the underlying file statuses
542       * @return the combined file status
543       * @throws IOException
544       */
545      private FileStatus toFileStatus(HarStatus h,
546          Map<String, FileStatus> cache) throws IOException {
547        FileStatus underlying = null;
548        if (cache != null) {
549          underlying = cache.get(h.partName);
550        }
551        if (underlying == null) {
552          final Path p = h.isDir? archivePath: new Path(archivePath, h.partName);
553          underlying = fs.getFileStatus(p);
554          if (cache != null) {
555            cache.put(h.partName, underlying);
556          }
557        }
558    
559        long modTime = 0;
560        int version = metadata.getVersion();
561        if (version < 3) {
562          modTime = underlying.getModificationTime();
563        } else if (version == 3) {
564          modTime = h.getModificationTime();
565        }
566    
567        return new FileStatus(
568            h.isDir()? 0L: h.getLength(),
569            h.isDir(),
570            underlying.getReplication(),
571            underlying.getBlockSize(),
572            modTime,
573            underlying.getAccessTime(),
574            underlying.getPermission(),
575            underlying.getOwner(),
576            underlying.getGroup(),
577            makeRelative(this.uri.getPath(), new Path(h.name)));
578      }
579    
580      // a single line parser for hadoop archives status 
581      // stored in a single line in the index files 
582      // the format is of the form 
583      // filename "dir"/"file" partFileName startIndex length 
584      // <space separated children>
585      private class HarStatus {
586        boolean isDir;
587        String name;
588        List<String> children;
589        String partName;
590        long startIndex;
591        long length;
592        long modificationTime = 0;
593    
594        public HarStatus(String harString) throws UnsupportedEncodingException {
595          String[] splits = harString.split(" ");
596          this.name = decodeFileName(splits[0]);
597          this.isDir = "dir".equals(splits[1]) ? true: false;
598          // this is equal to "none" if its a directory
599          this.partName = splits[2];
600          this.startIndex = Long.parseLong(splits[3]);
601          this.length = Long.parseLong(splits[4]);
602    
603          int version = metadata.getVersion();
604          String[] propSplits = null;
605          // propSplits is used to retrieve the metainformation that Har versions
606          // 1 & 2 missed (modification time, permission, owner group).
607          // These fields are stored in an encoded string placed in different
608          // locations depending on whether it's a file or directory entry.
609          // If it's a directory, the string will be placed at the partName
610          // location (directories have no partName because they don't have data
611          // to be stored). This is done because the number of fields in a
612          // directory entry is unbounded (all children are listed at the end)
613          // If it's a file, the string will be the last field.
614          if (isDir) {
615            if (version == 3){
616              propSplits = decodeString(this.partName).split(" ");
617            }
618            children = new ArrayList<String>();
619            for (int i = 5; i < splits.length; i++) {
620              children.add(decodeFileName(splits[i]));
621            }
622          } else if (version == 3) {
623            propSplits = decodeString(splits[5]).split(" ");
624          }
625    
626          if (propSplits != null && propSplits.length >= 4) {
627            modificationTime = Long.parseLong(propSplits[0]);
628            // the fields below are stored in the file but are currently not used
629            // by HarFileSystem
630            // permission = new FsPermission(Short.parseShort(propSplits[1]));
631            // owner = decodeString(propSplits[2]);
632            // group = decodeString(propSplits[3]);
633          }
634        }
635        public boolean isDir() {
636          return isDir;
637        }
638        
639        public String getName() {
640          return name;
641        }
642        public String getPartName() {
643          return partName;
644        }
645        public long getStartIndex() {
646          return startIndex;
647        }
648        public long getLength() {
649          return length;
650        }
651        public long getModificationTime() {
652          return modificationTime;
653        }
654      }
655      
656      /**
657       * return the filestatus of files in har archive.
658       * The permission returned are that of the archive
659       * index files. The permissions are not persisted 
660       * while creating a hadoop archive.
661       * @param f the path in har filesystem
662       * @return filestatus.
663       * @throws IOException
664       */
665      @Override
666      public FileStatus getFileStatus(Path f) throws IOException {
667        HarStatus hstatus = getFileHarStatus(f);
668        return toFileStatus(hstatus, null);
669      }
670    
671      private HarStatus getFileHarStatus(Path f) throws IOException {
672        // get the fs DataInputStream for the underlying file
673        // look up the index.
674        Path p = makeQualified(f);
675        Path harPath = getPathInHar(p);
676        if (harPath == null) {
677          throw new IOException("Invalid file name: " + f + " in " + uri);
678        }
679        HarStatus hstatus = metadata.archive.get(harPath);
680        if (hstatus == null) {
681          throw new FileNotFoundException("File: " +  f + " does not exist in " + uri);
682        }
683        return hstatus;
684      }
685    
686      /**
687       * @return null since no checksum algorithm is implemented.
688       */
689      @Override
690      public FileChecksum getFileChecksum(Path f) {
691        return null;
692      }
693    
694      /**
695       * Returns a har input stream which fakes end of 
696       * file. It reads the index files to get the part 
697       * file name and the size and start of the file.
698       */
699      @Override
700      public FSDataInputStream open(Path f, int bufferSize) throws IOException {
701        // get the fs DataInputStream for the underlying file
702        HarStatus hstatus = getFileHarStatus(f);
703        if (hstatus.isDir()) {
704          throw new FileNotFoundException(f + " : not a file in " +
705                    archivePath);
706        }
707        return new HarFSDataInputStream(fs, new Path(archivePath, 
708            hstatus.getPartName()),
709            hstatus.getStartIndex(), hstatus.getLength(), bufferSize);
710      }
711    
712      /**
713       * Used for delegation token related functionality. Must delegate to
714       * underlying file system.
715       */
716      @Override
717      public FileSystem[] getChildFileSystems() {
718        return new FileSystem[]{fs};
719      }
720    
721      @Override
722      public FSDataOutputStream create(Path f, FsPermission permission,
723          boolean overwrite, int bufferSize, short replication, long blockSize,
724          Progressable progress) throws IOException {
725        throw new IOException("Har: create not allowed.");
726      }
727    
728      @SuppressWarnings("deprecation")
729      @Override
730      public FSDataOutputStream createNonRecursive(Path f, boolean overwrite,
731          int bufferSize, short replication, long blockSize, Progressable progress)
732          throws IOException {
733        throw new IOException("Har: create not allowed.");
734      }
735    
736      @Override
737      public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException {
738        throw new IOException("Har: append not allowed.");
739      }
740    
741      @Override
742      public void close() throws IOException {
743        super.close();
744        if (fs != null) {
745          try {
746            fs.close();
747          } catch(IOException ie) {
748            //this might already be closed
749            // ignore
750          }
751        }
752      }
753      
754      /**
755       * Not implemented.
756       */
757      @Override
758      public boolean setReplication(Path src, short replication) throws IOException{
759        throw new IOException("Har: setReplication not allowed");
760      }
761    
762      @Override
763      public boolean rename(Path src, Path dst) throws IOException {
764        throw new IOException("Har: rename not allowed");
765      }
766    
767      @Override
768      public FSDataOutputStream append(Path f) throws IOException {
769        throw new IOException("Har: append not allowed");
770      }
771    
772      /**
773       * Not implemented.
774       */
775      @Override
776      public boolean delete(Path f, boolean recursive) throws IOException { 
777        throw new IOException("Har: delete not allowed");
778      }
779    
780      /**
781       * liststatus returns the children of a directory 
782       * after looking up the index files.
783       */
784      @Override
785      public FileStatus[] listStatus(Path f) throws IOException {
786        //need to see if the file is an index in file
787        //get the filestatus of the archive directory
788        // we will create fake filestatuses to return
789        // to the client
790        List<FileStatus> statuses = new ArrayList<FileStatus>();
791        Path tmpPath = makeQualified(f);
792        Path harPath = getPathInHar(tmpPath);
793        HarStatus hstatus = metadata.archive.get(harPath);
794        if (hstatus == null) {
795          throw new FileNotFoundException("File " + f + " not found in " + archivePath);
796        }
797        if (hstatus.isDir()) {
798          fileStatusesInIndex(hstatus, statuses);
799        } else {
800          statuses.add(toFileStatus(hstatus, null));
801        }
802        
803        return statuses.toArray(new FileStatus[statuses.size()]);
804      }
805      
806      /**
807       * return the top level archive path.
808       */
809      @Override
810      public Path getHomeDirectory() {
811        return new Path(uri.toString());
812      }
813    
814      @Override
815      public void setWorkingDirectory(Path newDir) {
816        //does nothing.
817      }
818      
819      /**
820       * not implemented.
821       */
822      @Override
823      public boolean mkdirs(Path f, FsPermission permission) throws IOException {
824        throw new IOException("Har: mkdirs not allowed");
825      }
826      
827      /**
828       * not implemented.
829       */
830      @Override
831      public void copyFromLocalFile(boolean delSrc, boolean overwrite,
832          Path src, Path dst) throws IOException {
833        throw new IOException("Har: copyfromlocalfile not allowed");
834      }
835    
836      @Override
837      public void copyFromLocalFile(boolean delSrc, boolean overwrite,
838          Path[] srcs, Path dst) throws IOException {
839        throw new IOException("Har: copyfromlocalfile not allowed");
840      }
841    
842      /**
843       * copies the file in the har filesystem to a local file.
844       */
845      @Override
846      public void copyToLocalFile(boolean delSrc, Path src, Path dst) 
847        throws IOException {
848        FileUtil.copy(this, src, getLocal(getConf()), dst, false, getConf());
849      }
850      
851      /**
852       * not implemented.
853       */
854      @Override
855      public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) 
856        throws IOException {
857        throw new IOException("Har: startLocalOutput not allowed");
858      }
859      
860      /**
861       * not implemented.
862       */
863      @Override
864      public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) 
865        throws IOException {
866        throw new IOException("Har: completeLocalOutput not allowed");
867      }
868      
869      /**
870       * not implemented.
871       */
872      @Override
873      public void setOwner(Path p, String username, String groupname)
874        throws IOException {
875        throw new IOException("Har: setowner not allowed");
876      }
877    
878      @Override
879      public void setTimes(Path p, long mtime, long atime) throws IOException {
880        throw new IOException("Har: setTimes not allowed");
881      }
882    
883      /**
884       * Not implemented.
885       */
886      @Override
887      public void setPermission(Path p, FsPermission permission)
888        throws IOException {
889        throw new IOException("Har: setPermission not allowed");
890      }
891      
892      /**
893       * Hadoop archives input stream. This input stream fakes EOF 
894       * since archive files are part of bigger part files.
895       */
896      private static class HarFSDataInputStream extends FSDataInputStream {
897        /**
898         * Create an input stream that fakes all the reads/positions/seeking.
899         */
900        private static class HarFsInputStream extends FSInputStream
901            implements CanSetDropBehind, CanSetReadahead {
902          private long position, start, end;
903          //The underlying data input stream that the
904          // underlying filesystem will return.
905          private FSDataInputStream underLyingStream;
906          //one byte buffer
907          private byte[] oneBytebuff = new byte[1];
908          HarFsInputStream(FileSystem fs, Path path, long start,
909              long length, int bufferSize) throws IOException {
910            underLyingStream = fs.open(path, bufferSize);
911            underLyingStream.seek(start);
912            // the start of this file in the part file
913            this.start = start;
914            // the position pointer in the part file
915            this.position = start;
916            // the end pointer in the part file
917            this.end = start + length;
918          }
919          
920          @Override
921          public synchronized int available() throws IOException {
922            long remaining = end - underLyingStream.getPos();
923            if (remaining > (long)Integer.MAX_VALUE) {
924              return Integer.MAX_VALUE;
925            }
926            return (int) remaining;
927          }
928          
929          @Override
930          public synchronized  void close() throws IOException {
931            underLyingStream.close();
932            super.close();
933          }
934          
935          //not implemented
936          @Override
937          public void mark(int readLimit) {
938            // do nothing 
939          }
940          
941          /**
942           * reset is not implemented
943           */
944          @Override
945          public void reset() throws IOException {
946            throw new IOException("reset not implemented.");
947          }
948          
949          @Override
950          public synchronized int read() throws IOException {
951            int ret = read(oneBytebuff, 0, 1);
952            return (ret <= 0) ? -1: (oneBytebuff[0] & 0xff);
953          }
954          
955          @Override
956          public synchronized int read(byte[] b) throws IOException {
957            int ret = read(b, 0, b.length);
958            if (ret != -1) {
959              position += ret;
960            }
961            return ret;
962          }
963          
964          /**
965           * 
966           */
967          @Override
968          public synchronized int read(byte[] b, int offset, int len) 
969            throws IOException {
970            int newlen = len;
971            int ret = -1;
972            if (position + len > end) {
973              newlen = (int) (end - position);
974            }
975            // end case
976            if (newlen == 0)
977              return ret;
978            ret = underLyingStream.read(b, offset, newlen);
979            position += ret;
980            return ret;
981          }
982          
983          @Override
984          public synchronized long skip(long n) throws IOException {
985            long tmpN = n;
986            if (tmpN > 0) {
987              if (position + tmpN > end) {
988                tmpN = end - position;
989              }
990              underLyingStream.seek(tmpN + position);
991              position += tmpN;
992              return tmpN;
993            }
994            return (tmpN < 0)? -1 : 0;
995          }
996          
997          @Override
998          public synchronized long getPos() throws IOException {
999            return (position - start);
1000          }
1001          
1002          @Override
1003          public synchronized void seek(long pos) throws IOException {
1004            if (pos < 0 || (start + pos > end)) {
1005              throw new IOException("Failed to seek: EOF");
1006            }
1007            position = start + pos;
1008            underLyingStream.seek(position);
1009          }
1010    
1011          @Override
1012          public boolean seekToNewSource(long targetPos) throws IOException {
1013            // do not need to implement this
1014            // hdfs in itself does seektonewsource
1015            // while reading.
1016            return false;
1017          }
1018          
1019          /**
1020           * implementing position readable. 
1021           */
1022          @Override
1023          public int read(long pos, byte[] b, int offset, int length) 
1024          throws IOException {
1025            int nlength = length;
1026            if (start + nlength + pos > end) {
1027              nlength = (int) (end - (start + pos));
1028            }
1029            return underLyingStream.read(pos + start , b, offset, nlength);
1030          }
1031          
1032          /**
1033           * position readable again.
1034           */
1035          @Override
1036          public void readFully(long pos, byte[] b, int offset, int length) 
1037          throws IOException {
1038            if (start + length + pos > end) {
1039              throw new IOException("Not enough bytes to read.");
1040            }
1041            underLyingStream.readFully(pos + start, b, offset, length);
1042          }
1043          
1044          @Override
1045          public void readFully(long pos, byte[] b) throws IOException {
1046              readFully(pos, b, 0, b.length);
1047          }
1048    
1049          @Override
1050          public void setReadahead(Long readahead) throws IOException {
1051            underLyingStream.setReadahead(readahead);
1052          }
1053    
1054          @Override
1055          public void setDropBehind(Boolean dropBehind) throws IOException {
1056            underLyingStream.setDropBehind(dropBehind);
1057          }
1058        }
1059      
1060        /**
1061         * constructors for har input stream.
1062         * @param fs the underlying filesystem
1063         * @param p The path in the underlying filesystem
1064         * @param start the start position in the part file
1065         * @param length the length of valid data in the part file
1066         * @param bufsize the buffer size
1067         * @throws IOException
1068         */
1069        public HarFSDataInputStream(FileSystem fs, Path  p, long start, 
1070            long length, int bufsize) throws IOException {
1071            super(new HarFsInputStream(fs, p, start, length, bufsize));
1072        }
1073      }
1074    
1075      private class HarMetaData {
1076        private FileSystem fs;
1077        private int version;
1078        // the masterIndex of the archive
1079        private Path masterIndexPath;
1080        // the index file 
1081        private Path archiveIndexPath;
1082    
1083        private long masterIndexTimestamp;
1084        private long archiveIndexTimestamp;
1085    
1086        List<Store> stores = new ArrayList<Store>();
1087        Map<Path, HarStatus> archive = new HashMap<Path, HarStatus>();
1088        private Map<Path, FileStatus> partFileStatuses = new HashMap<Path, FileStatus>();
1089    
1090        public HarMetaData(FileSystem fs, Path masterIndexPath, Path archiveIndexPath) {
1091          this.fs = fs;
1092          this.masterIndexPath = masterIndexPath;
1093          this.archiveIndexPath = archiveIndexPath;
1094        }
1095    
1096        public FileStatus getPartFileStatus(Path partPath) throws IOException {
1097          FileStatus status;
1098          status = partFileStatuses.get(partPath);
1099          if (status == null) {
1100            status = fs.getFileStatus(partPath);
1101            partFileStatuses.put(partPath, status);
1102          }
1103          return status;
1104        }
1105    
1106        public long getMasterIndexTimestamp() {
1107          return masterIndexTimestamp;
1108        }
1109    
1110        public long getArchiveIndexTimestamp() {
1111          return archiveIndexTimestamp;
1112        }
1113    
1114        private int getVersion() {
1115          return version;
1116        }
1117    
1118        private void parseMetaData() throws IOException {
1119          Text line = new Text();
1120          long read;
1121          FSDataInputStream in = null;
1122          LineReader lin = null;
1123    
1124          try {
1125            in = fs.open(masterIndexPath);
1126            FileStatus masterStat = fs.getFileStatus(masterIndexPath);
1127            masterIndexTimestamp = masterStat.getModificationTime();
1128            lin = new LineReader(in, getConf());
1129            read = lin.readLine(line);
1130    
1131            // the first line contains the version of the index file
1132            String versionLine = line.toString();
1133            String[] arr = versionLine.split(" ");
1134            version = Integer.parseInt(arr[0]);
1135            // make it always backwards-compatible
1136            if (this.version > HarFileSystem.VERSION) {
1137              throw new IOException("Invalid version " + 
1138                  this.version + " expected " + HarFileSystem.VERSION);
1139            }
1140    
1141            // each line contains a hashcode range and the index file name
1142            String[] readStr;
1143            while(read < masterStat.getLen()) {
1144              int b = lin.readLine(line);
1145              read += b;
1146              readStr = line.toString().split(" ");
1147              int startHash = Integer.parseInt(readStr[0]);
1148              int endHash  = Integer.parseInt(readStr[1]);
1149              stores.add(new Store(Long.parseLong(readStr[2]), 
1150                  Long.parseLong(readStr[3]), startHash,
1151                  endHash));
1152              line.clear();
1153            }
1154          } catch (IOException ioe) {
1155            LOG.warn("Encountered exception ", ioe);
1156            throw ioe;
1157          } finally {
1158            IOUtils.cleanup(LOG, lin, in);
1159          }
1160    
1161          FSDataInputStream aIn = fs.open(archiveIndexPath);
1162          try {
1163            FileStatus archiveStat = fs.getFileStatus(archiveIndexPath);
1164            archiveIndexTimestamp = archiveStat.getModificationTime();
1165            LineReader aLin;
1166    
1167            // now start reading the real index file
1168            for (Store s: stores) {
1169              read = 0;
1170              aIn.seek(s.begin);
1171              aLin = new LineReader(aIn, getConf());
1172              while (read + s.begin < s.end) {
1173                int tmp = aLin.readLine(line);
1174                read += tmp;
1175                String lineFeed = line.toString();
1176                String[] parsed = lineFeed.split(" ");
1177                parsed[0] = decodeFileName(parsed[0]);
1178                archive.put(new Path(parsed[0]), new HarStatus(lineFeed));
1179                line.clear();
1180              }
1181            }
1182          } finally {
1183            IOUtils.cleanup(LOG, aIn);
1184          }
1185        }
1186      }
1187      
1188      /*
1189       * testing purposes only:
1190       */
1191      HarMetaData getMetadata() {
1192        return metadata;
1193      }
1194    
1195      private static class LruCache<K, V> extends LinkedHashMap<K, V> {
1196        private final int MAX_ENTRIES;
1197    
1198        public LruCache(int maxEntries) {
1199            super(maxEntries + 1, 1.0f, true);
1200            MAX_ENTRIES = maxEntries;
1201        }
1202    
1203        @Override
1204        protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
1205            return size() > MAX_ENTRIES;
1206        }
1207      }
1208    
1209      @SuppressWarnings("deprecation")
1210      @Override
1211      public FsServerDefaults getServerDefaults() throws IOException {
1212        return fs.getServerDefaults();
1213      }
1214    
1215      @Override
1216      public FsServerDefaults getServerDefaults(Path f) throws IOException {
1217        return fs.getServerDefaults(f);
1218      }
1219    
1220      @Override
1221      public long getUsed() throws IOException{
1222        return fs.getUsed();
1223      }
1224    
1225      @SuppressWarnings("deprecation")
1226      @Override
1227      public long getDefaultBlockSize() {
1228        return fs.getDefaultBlockSize();
1229      }
1230    
1231      @SuppressWarnings("deprecation")
1232      @Override
1233      public long getDefaultBlockSize(Path f) {
1234        return fs.getDefaultBlockSize(f);
1235      }
1236    
1237      @SuppressWarnings("deprecation")
1238      @Override
1239      public short getDefaultReplication() {
1240        return fs.getDefaultReplication();
1241      }
1242    
1243      @Override
1244      public short getDefaultReplication(Path f) {
1245        return fs.getDefaultReplication(f);
1246      }
1247    }