001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.fs;
020
021import java.io.BufferedReader;
022import java.io.File;
023import java.io.FileNotFoundException;
024import java.io.IOException;
025import java.io.InputStreamReader;
026import java.util.Arrays;
027
028import org.apache.hadoop.util.Shell;
029
030/**
031 * Class for creating hardlinks.
032 * Supports Unix/Linux, Windows via winutils , and Mac OS X.
033 * 
034 * The HardLink class was formerly a static inner class of FSUtil,
035 * and the methods provided were blatantly non-thread-safe.
036 * To enable volume-parallel Update snapshots, we now provide static 
037 * threadsafe methods that allocate new buffer string arrays
038 * upon each call.  We also provide an API to hardlink all files in a
039 * directory with a single command, which is up to 128 times more 
040 * efficient - and minimizes the impact of the extra buffer creations.
041 */
042public class HardLink { 
043
044  private static HardLinkCommandGetter getHardLinkCommand;
045  
046  public final LinkStats linkStats; //not static
047  
048  //initialize the command "getters" statically, so can use their 
049  //methods without instantiating the HardLink object
050  static { 
051    if (Shell.WINDOWS) {
052      // Windows
053      getHardLinkCommand = new HardLinkCGWin();
054    } else {
055      // Unix or Linux
056      getHardLinkCommand = new HardLinkCGUnix();
057      //override getLinkCountCommand for the particular Unix variant
058      //Linux is already set as the default - {"stat","-c%h", null}
059      if (Shell.MAC || Shell.FREEBSD) {
060        String[] linkCountCmdTemplate = {"/usr/bin/stat","-f%l", null};
061        HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);
062      } else if (Shell.SOLARIS) {
063        String[] linkCountCmdTemplate = {"ls","-l", null};
064        HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);        
065      }
066    }
067  }
068
069  public HardLink() {
070    linkStats = new LinkStats();
071  }
072  
073  /**
074   * This abstract class bridges the OS-dependent implementations of the 
075   * needed functionality for creating hardlinks and querying link counts.
076   * The particular implementation class is chosen during 
077   * static initialization phase of the HardLink class.
078   * The "getter" methods construct shell command strings for various purposes.
079   */
080  private static abstract class HardLinkCommandGetter {
081
082    /**
083     * Get the command string needed to hardlink a bunch of files from
084     * a single source directory into a target directory.  The source directory
085     * is not specified here, but the command will be executed using the source
086     * directory as the "current working directory" of the shell invocation.
087     * 
088     * @param fileBaseNames - array of path-less file names, relative
089     *            to the source directory
090     * @param linkDir - target directory where the hardlinks will be put
091     * @return - an array of Strings suitable for use as a single shell command
092     *            with {@link Runtime.exec()}
093     * @throws IOException - if any of the file or path names misbehave
094     */
095    abstract String[] linkMult(String[] fileBaseNames, File linkDir) 
096                          throws IOException;
097    
098    /**
099     * Get the command string needed to hardlink a single file
100     */
101    abstract String[] linkOne(File file, File linkName) throws IOException;
102    
103    /**
104     * Get the command string to query the hardlink count of a file
105     */
106    abstract String[] linkCount(File file) throws IOException;
107    
108    /**
109     * Calculate the total string length of the shell command
110     * resulting from execution of linkMult, plus the length of the
111     * source directory name (which will also be provided to the shell)
112     * 
113     * @param fileDir - source directory, parent of fileBaseNames
114     * @param fileBaseNames - array of path-less file names, relative
115     *            to the source directory
116     * @param linkDir - target directory where the hardlinks will be put
117     * @return - total data length (must not exceed maxAllowedCmdArgLength)
118     * @throws IOException
119     */
120    abstract int getLinkMultArgLength(
121                     File fileDir, String[] fileBaseNames, File linkDir) 
122                     throws IOException;
123    
124    /**
125     * Get the maximum allowed string length of a shell command on this OS,
126     * which is just the documented minimum guaranteed supported command
127     * length - aprx. 32KB for Unix, and 8KB for Windows.
128     */
129    abstract int getMaxAllowedCmdArgLength(); 
130  }
131  
132  /**
133   * Implementation of HardLinkCommandGetter class for Unix
134   */
135  static class HardLinkCGUnix extends HardLinkCommandGetter {
136    private static String[] hardLinkCommand = {"ln", null, null};
137    private static String[] hardLinkMultPrefix = {"ln"};
138    private static String[] hardLinkMultSuffix = {null};
139    private static String[] getLinkCountCommand = {"stat","-c%h", null};
140    //Unix guarantees at least 32K bytes cmd length.
141    //Subtract another 64b to allow for Java 'exec' overhead
142    private static final int maxAllowedCmdArgLength = 32*1024 - 65;
143    
144    private static synchronized 
145    void setLinkCountCmdTemplate(String[] template) {
146      //May update this for specific unix variants, 
147      //after static initialization phase
148      getLinkCountCommand = template;
149    }
150    
151    /*
152     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
153     */
154    @Override
155    String[] linkOne(File file, File linkName) 
156    throws IOException {
157      String[] buf = new String[hardLinkCommand.length];
158      System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
159      //unix wants argument order: "ln <existing> <new>"
160      buf[1] = FileUtil.makeShellPath(file, true); 
161      buf[2] = FileUtil.makeShellPath(linkName, true);
162      return buf;
163    }
164    
165    /*
166     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
167     */
168    @Override
169    String[] linkMult(String[] fileBaseNames, File linkDir) 
170    throws IOException {
171      String[] buf = new String[fileBaseNames.length 
172                                + hardLinkMultPrefix.length 
173                                + hardLinkMultSuffix.length];
174      int mark=0;
175      System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
176                       hardLinkMultPrefix.length);
177      mark += hardLinkMultPrefix.length;
178      System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
179      mark += fileBaseNames.length;
180      buf[mark] = FileUtil.makeShellPath(linkDir, true);
181      return buf;
182    }
183    
184    /*
185     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
186     */
187    @Override
188    String[] linkCount(File file) 
189    throws IOException {
190      String[] buf = new String[getLinkCountCommand.length];
191      System.arraycopy(getLinkCountCommand, 0, buf, 0, 
192                       getLinkCountCommand.length);
193      buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true);
194      return buf;
195    }
196    
197    /*
198     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
199     */
200    @Override
201    int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
202    throws IOException{
203      int sum = 0;
204      for (String x : fileBaseNames) {
205        // add 1 to account for terminal null or delimiter space
206        sum += 1 + ((x == null) ? 0 : x.length());
207      }
208      sum += 2 + FileUtil.makeShellPath(fileDir, true).length()
209             + FileUtil.makeShellPath(linkDir, true).length();
210      //add the fixed overhead of the hardLinkMult prefix and suffix
211      sum += 3; //length("ln") + 1
212      return sum;
213    }
214    
215    /*
216     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
217     */
218    @Override
219    int getMaxAllowedCmdArgLength() {
220      return maxAllowedCmdArgLength;
221    }
222  }
223  
224  
225  /**
226   * Implementation of HardLinkCommandGetter class for Windows
227   */
228  static class HardLinkCGWin extends HardLinkCommandGetter {
229    //The Windows command getter impl class and its member fields are
230    //package-private ("default") access instead of "private" to assist 
231    //unit testing (sort of) on non-Win servers
232
233    static String[] hardLinkCommand = {
234                        Shell.WINUTILS,"hardlink","create", null, null};
235    static String[] hardLinkMultPrefix = {
236                        "cmd","/q","/c","for", "%f", "in", "("};
237    static String   hardLinkMultDir = "\\%f";
238    static String[] hardLinkMultSuffix = {
239                        ")", "do", Shell.WINUTILS, "hardlink", "create", null,
240                        "%f", "1>NUL"};
241    static String[] getLinkCountCommand = {
242                        Shell.WINUTILS, "hardlink",
243                        "stat", null};
244    //Windows guarantees only 8K - 1 bytes cmd length.
245    //Subtract another 64b to allow for Java 'exec' overhead
246    static final int maxAllowedCmdArgLength = 8*1024 - 65;
247
248    /*
249     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
250     */
251    @Override
252    String[] linkOne(File file, File linkName) 
253    throws IOException {
254      String[] buf = new String[hardLinkCommand.length];
255      System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
256      //windows wants argument order: "create <new> <existing>"
257      buf[4] = file.getCanonicalPath(); 
258      buf[3] = linkName.getCanonicalPath();
259      return buf;
260    }
261    
262    /*
263     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
264     */
265    @Override
266    String[] linkMult(String[] fileBaseNames, File linkDir) 
267    throws IOException {
268      String[] buf = new String[fileBaseNames.length 
269                                + hardLinkMultPrefix.length 
270                                + hardLinkMultSuffix.length];
271      String td = linkDir.getCanonicalPath() + hardLinkMultDir;
272      int mark=0;
273      System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
274                       hardLinkMultPrefix.length);
275      mark += hardLinkMultPrefix.length;
276      System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
277      mark += fileBaseNames.length;
278      System.arraycopy(hardLinkMultSuffix, 0, buf, mark, 
279                       hardLinkMultSuffix.length);
280      mark += hardLinkMultSuffix.length;
281      buf[mark - 3] = td;
282      return buf;
283    }
284    
285    /*
286     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
287     */
288    @Override
289    String[] linkCount(File file) 
290    throws IOException {
291      String[] buf = new String[getLinkCountCommand.length];
292      System.arraycopy(getLinkCountCommand, 0, buf, 0, 
293                       getLinkCountCommand.length);
294      buf[getLinkCountCommand.length - 1] = file.getCanonicalPath();
295      return buf;
296    }
297    
298    /*
299     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
300     */
301    @Override
302    int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
303    throws IOException {
304      int sum = 0;
305      for (String x : fileBaseNames) {
306        // add 1 to account for terminal null or delimiter space
307        sum += 1 + ((x == null) ? 0 : x.length());
308      }
309      sum += 2 + fileDir.getCanonicalPath().length() +
310               linkDir.getCanonicalPath().length();
311      //add the fixed overhead of the hardLinkMult command 
312      //(prefix, suffix, and Dir suffix)
313      sum += ("cmd.exe /q /c for %f in ( ) do "
314              + Shell.WINUTILS + " hardlink create \\%f %f 1>NUL ").length();
315      return sum;
316    }
317    
318    /*
319     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
320     */
321    @Override
322    int getMaxAllowedCmdArgLength() {
323      return maxAllowedCmdArgLength;
324    }
325  }
326  
327  
328  /**
329   * Calculate the nominal length of all contributors to the total 
330   * commandstring length, including fixed overhead of the OS-dependent 
331   * command.  It's protected rather than private, to assist unit testing,
332   * but real clients are not expected to need it -- see the way 
333   * createHardLinkMult() uses it internally so the user doesn't need to worry
334   * about it.
335   * 
336   * @param fileDir - source directory, parent of fileBaseNames
337   * @param fileBaseNames - array of path-less file names, relative
338   *            to the source directory
339   * @param linkDir - target directory where the hardlinks will be put
340   * @return - total data length (must not exceed maxAllowedCmdArgLength)
341   * @throws IOException
342   */
343  protected static int getLinkMultArgLength(
344          File fileDir, String[] fileBaseNames, File linkDir) 
345  throws IOException {
346    return getHardLinkCommand.getLinkMultArgLength(fileDir, 
347          fileBaseNames, linkDir);
348  }
349  
350  /**
351   * Return this private value for use by unit tests.
352   * Shell commands are not allowed to have a total string length
353   * exceeding this size.
354   */
355  protected static int getMaxAllowedCmdArgLength() {
356    return getHardLinkCommand.getMaxAllowedCmdArgLength();
357  }
358  
359  /*
360   * ****************************************************
361   * Complexity is above.  User-visible functionality is below
362   * ****************************************************
363   */
364
365  /**
366   * Creates a hardlink 
367   * @param file - existing source file
368   * @param linkName - desired target link file
369   */
370  public static void createHardLink(File file, File linkName) 
371  throws IOException {
372    if (file == null) {
373      throw new IOException(
374          "invalid arguments to createHardLink: source file is null");
375    }
376    if (linkName == null) {
377      throw new IOException(
378          "invalid arguments to createHardLink: link name is null");
379    }
380          // construct and execute shell command
381    String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName);
382    Process process = Runtime.getRuntime().exec(hardLinkCommand);
383    try {
384      if (process.waitFor() != 0) {
385        String errMsg = new BufferedReader(new InputStreamReader(
386            process.getInputStream())).readLine();
387        if (errMsg == null)  errMsg = "";
388        String inpMsg = new BufferedReader(new InputStreamReader(
389            process.getErrorStream())).readLine();
390        if (inpMsg == null)  inpMsg = "";
391        throw new IOException(errMsg + inpMsg);
392      }
393    } catch (InterruptedException e) {
394      throw new IOException(e);
395    } finally {
396      process.destroy();
397    }
398  }
399
400  /**
401   * Creates hardlinks from multiple existing files within one parent
402   * directory, into one target directory.
403   * @param parentDir - directory containing source files
404   * @param fileBaseNames - list of path-less file names, as returned by 
405   *                        parentDir.list()
406   * @param linkDir - where the hardlinks should be put.  It must already exist.
407   * 
408   * If the list of files is too long (overflows maxAllowedCmdArgLength),
409   * we will automatically split it into multiple invocations of the
410   * underlying method.
411   */
412  public static void createHardLinkMult(File parentDir, String[] fileBaseNames, 
413      File linkDir) throws IOException {
414    //This is the public method all non-test clients are expected to use.
415    //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd
416    createHardLinkMult(parentDir, fileBaseNames, linkDir, 
417                       getHardLinkCommand.getMaxAllowedCmdArgLength());
418  }
419
420  /*
421   * Implements {@link createHardLinkMult} with added variable  "maxLength",
422   * to ease unit testing of the auto-splitting feature for long lists.
423   * Likewise why it returns "callCount", the number of sub-arrays that
424   * the file list had to be split into.
425   * Non-test clients are expected to call the public method instead.
426   */
427  protected static int createHardLinkMult(File parentDir, 
428      String[] fileBaseNames, File linkDir, int maxLength) 
429  throws IOException {
430    if (parentDir == null) {
431      throw new IOException(
432          "invalid arguments to createHardLinkMult: parent directory is null");
433    }
434    if (linkDir == null) {
435      throw new IOException(
436          "invalid arguments to createHardLinkMult: link directory is null");
437    }
438    if (fileBaseNames == null) {
439      throw new IOException(
440          "invalid arguments to createHardLinkMult: "
441          + "filename list can be empty but not null");
442    }
443    if (fileBaseNames.length == 0) {
444      //the OS cmds can't handle empty list of filenames, 
445      //but it's legal, so just return.
446      return 0; 
447    }
448    if (!linkDir.exists()) {
449      throw new FileNotFoundException(linkDir + " not found.");
450    }
451
452    //if the list is too long, split into multiple invocations
453    int callCount = 0;
454    if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength
455          && fileBaseNames.length > 1) {
456      String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2);
457      callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength);
458      String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2,
459          fileBaseNames.length);
460      callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength);  
461      return callCount;
462    } else {
463      callCount = 1;
464    }
465    
466    // construct and execute shell command
467    String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames, 
468        linkDir);
469    Process process = Runtime.getRuntime().exec(hardLinkCommand, null, 
470        parentDir);
471    try {
472      if (process.waitFor() != 0) {
473        String errMsg = new BufferedReader(new InputStreamReader(
474            process.getInputStream())).readLine();
475        if (errMsg == null)  errMsg = "";
476        String inpMsg = new BufferedReader(new InputStreamReader(
477            process.getErrorStream())).readLine();
478        if (inpMsg == null)  inpMsg = "";
479        throw new IOException(errMsg + inpMsg);
480      }
481    } catch (InterruptedException e) {
482      throw new IOException(e);
483    } finally {
484      process.destroy();
485    }
486    return callCount;
487  }
488
489   /**
490   * Retrieves the number of links to the specified file.
491   */
492  public static int getLinkCount(File fileName) throws IOException {
493    if (fileName == null) {
494      throw new IOException(
495          "invalid argument to getLinkCount: file name is null");
496    }
497    if (!fileName.exists()) {
498      throw new FileNotFoundException(fileName + " not found.");
499    }
500
501    // construct and execute shell command
502    String[] cmd = getHardLinkCommand.linkCount(fileName);
503    String inpMsg = null;
504    String errMsg = null;
505    int exitValue = -1;
506    BufferedReader in = null;
507    BufferedReader err = null;
508
509    Process process = Runtime.getRuntime().exec(cmd);
510    try {
511      exitValue = process.waitFor();
512      in = new BufferedReader(new InputStreamReader(
513                                  process.getInputStream()));
514      inpMsg = in.readLine();
515      err = new BufferedReader(new InputStreamReader(
516                                   process.getErrorStream()));
517      errMsg = err.readLine();
518      if (inpMsg == null || exitValue != 0) {
519        throw createIOException(fileName, inpMsg, errMsg, exitValue, null);
520      }
521      if (Shell.SOLARIS) {
522        String[] result = inpMsg.split("\\s+");
523        return Integer.parseInt(result[1]);
524      } else {
525        return Integer.parseInt(inpMsg);
526      }
527    } catch (NumberFormatException e) {
528      throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
529    } catch (InterruptedException e) {
530      throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
531    } finally {
532      process.destroy();
533      if (in != null) in.close();
534      if (err != null) err.close();
535    }
536  }
537  
538  /* Create an IOException for failing to get link count. */
539  private static IOException createIOException(File f, String message,
540      String error, int exitvalue, Exception cause) {
541
542    final String s = "Failed to get link count on file " + f
543        + ": message=" + message
544        + "; error=" + error
545        + "; exit value=" + exitvalue;
546    return (cause == null) ? new IOException(s) : new IOException(s, cause);
547  }
548  
549  
550  /**
551   * HardLink statistics counters and methods.
552   * Not multi-thread safe, obviously.
553   * Init is called during HardLink instantiation, above.
554   * 
555   * These are intended for use by knowledgeable clients, not internally, 
556   * because many of the internal methods are static and can't update these
557   * per-instance counters.
558   */
559  public static class LinkStats {
560    public int countDirs = 0; 
561    public int countSingleLinks = 0; 
562    public int countMultLinks = 0; 
563    public int countFilesMultLinks = 0; 
564    public int countEmptyDirs = 0; 
565    public int countPhysicalFileCopies = 0;
566  
567    public void clear() {
568      countDirs = 0; 
569      countSingleLinks = 0; 
570      countMultLinks = 0; 
571      countFilesMultLinks = 0; 
572      countEmptyDirs = 0; 
573      countPhysicalFileCopies = 0;
574    }
575    
576    public String report() {
577      return "HardLinkStats: " + countDirs + " Directories, including " 
578      + countEmptyDirs + " Empty Directories, " 
579      + countSingleLinks 
580      + " single Link operations, " + countMultLinks 
581      + " multi-Link operations, linking " + countFilesMultLinks 
582      + " files, total " + (countSingleLinks + countFilesMultLinks) 
583      + " linkable files.  Also physically copied " 
584      + countPhysicalFileCopies + " other files.";
585    }
586  }
587}
588