public class DistCpUtils
extends java.lang.Object
| Constructor | Description |
|---|---|
DistCpUtils() |
| Modifier and Type | Method | Description |
|---|---|---|
static void |
checkFileSystemAclSupport(org.apache.hadoop.fs.FileSystem fs) |
Determines if a file system supports ACLs by running a canary getAclStatus
request on the file system root.
|
static void |
checkFileSystemXAttrSupport(org.apache.hadoop.fs.FileSystem fs) |
Determines if a file system supports XAttrs by running a getXAttrs request
on the file system root.
|
static CopyMapper.ChecksumComparison |
checksumsAreEqual(org.apache.hadoop.fs.FileSystem sourceFS,
org.apache.hadoop.fs.Path source,
org.apache.hadoop.fs.FileChecksum sourceChecksum,
org.apache.hadoop.fs.FileSystem targetFS,
org.apache.hadoop.fs.Path target,
long sourceLen) |
Utility to compare checksums for the paths specified.
|
static void |
compareFileLengthsAndChecksums(long srcLen,
org.apache.hadoop.fs.FileSystem sourceFS,
org.apache.hadoop.fs.Path source,
org.apache.hadoop.fs.FileChecksum sourceChecksum,
org.apache.hadoop.fs.FileSystem targetFS,
org.apache.hadoop.fs.Path target,
boolean skipCrc,
long targetLen) |
Utility to compare file lengths and checksums for source and target.
|
static org.apache.hadoop.fs.Path |
discloseRelativeSymlink(org.apache.hadoop.fs.FileStatus stat) |
|
static java.util.List<org.apache.hadoop.fs.permission.AclEntry> |
getAcl(org.apache.hadoop.fs.FileSystem fileSystem,
org.apache.hadoop.fs.FileStatus fileStatus) |
Returns a file's full logical ACL.
|
static long |
getFileSize(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration configuration) |
Retrieves size of the file at the specified path.
|
static java.text.DecimalFormat |
getFormatter() |
|
static int |
getInt(org.apache.hadoop.conf.Configuration configuration,
java.lang.String label) |
Utility to retrieve a specified key from a Configuration.
|
static long |
getLong(org.apache.hadoop.conf.Configuration configuration,
java.lang.String label) |
Utility to retrieve a specified key from a Configuration.
|
static FileListingEntry |
getOriginalFileStatus(org.apache.hadoop.fs.FileStatus sourceStatus,
org.apache.hadoop.conf.Configuration conf,
boolean keepLinks,
java.util.Map<java.lang.String,java.util.Set<java.lang.String>> loopLocator) |
|
static java.lang.String |
getRelativePath(org.apache.hadoop.fs.Path sourceRootPath,
org.apache.hadoop.fs.Path childPath) |
|
static java.lang.String |
getRelativePath(FileListingEntry listingEntry) |
Gets relative path of child path with respect to a root path
For ex.
|
static java.lang.String |
getRelativePath(FileListingEntry root,
FileListingEntry child) |
|
static org.apache.hadoop.fs.Path |
getSplitChunkPath(org.apache.hadoop.fs.Path targetFile,
CopyListingFileStatus srcFileStatus) |
|
static java.lang.Class<? extends org.apache.hadoop.mapreduce.InputFormat> |
getStrategy(org.apache.hadoop.conf.Configuration conf,
DistCpContext context) |
Returns the class that implements a copy strategy.
|
static java.lang.String |
getStringDescriptionFor(long nBytes) |
|
static org.apache.hadoop.fs.Path |
getTmpFile(org.apache.hadoop.fs.Path target,
org.apache.hadoop.mapreduce.Mapper.Context context,
boolean splitSource) |
|
static java.util.Map<java.lang.String,byte[]> |
getXAttrs(org.apache.hadoop.fs.FileSystem fileSystem,
org.apache.hadoop.fs.Path path) |
Returns a file's all xAttrs.
|
static boolean |
isLoop(org.apache.hadoop.fs.FileStatus src,
java.util.Map<java.lang.String,java.util.Set<java.lang.String>> loopLocator) |
|
static boolean |
isParentLoop(org.apache.hadoop.fs.FileStatus sourceStatus,
org.apache.hadoop.conf.Configuration conf) |
|
static java.lang.String |
packAttributes(java.util.EnumSet<DistCpOptions.FileAttribute> attributes) |
Pack file preservation attributes into a string, containing
just the first character of each preservation attribute
|
static FileListingEntry |
pathToFileListingEntry(org.apache.hadoop.fs.Path path,
org.apache.hadoop.fs.FileSystem fs) |
|
static void |
preserve(org.apache.hadoop.fs.FileSystem targetFS,
org.apache.hadoop.fs.Path path,
CopyListingFileStatus srcFileStatus,
java.util.EnumSet<DistCpOptions.FileAttribute> attributes,
boolean preserveRawXattrs) |
Preserve attribute on file matching that of the file status being sent
as argument.
|
static <T> void |
publish(org.apache.hadoop.conf.Configuration configuration,
java.lang.String label,
T value) |
Utility to publish a value to a configuration.
|
static org.apache.hadoop.fs.Path |
sortListing(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path sourceListing) |
Sort sequence file containing FileStatus and Text as key and value
respectively.
|
static void |
sortListing(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path sourceListing,
org.apache.hadoop.fs.Path output) |
Sort sequence file containing FileStatus and Text as key and value
respectively, saving the result to the
output path, which
will be deleted first. |
static void |
toCopyListingFileStatus(org.apache.hadoop.fs.FileSystem fileSystem,
FileListingEntry listingEntry,
boolean preserveAcls,
boolean preserveXAttrs,
boolean preserveRawXAttrs,
int blocksPerChunk) |
Converts FileStatus to a list of CopyListingFileStatus.
|
static CopyListingFileStatus |
toCopyListingFileStatusHelper(org.apache.hadoop.fs.FileSystem fileSystem,
org.apache.hadoop.fs.FileStatus fileStatus,
boolean preserveAcls,
boolean preserveXAttrs,
boolean preserveRawXAttrs,
long chunkOffset,
long chunkLength) |
Converts a FileStatus to a CopyListingFileStatus.
|
static java.util.EnumSet<DistCpOptions.FileAttribute> |
unpackAttributes(java.lang.String attributes) |
Unpacks preservation attribute string containing the first character of
each preservation attribute back to a set of attributes to preserve
|
public static long getFileSize(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration configuration)
throws java.io.IOException
path - The path of the file whose size is sought.configuration - Configuration, to retrieve the appropriate FileSystem.java.io.IOExceptionpublic static <T> void publish(org.apache.hadoop.conf.Configuration configuration,
java.lang.String label,
T value)
T - The type of the value.configuration - The Configuration to which the value must be written.label - The label for the value being published.value - The value being published.public static int getInt(org.apache.hadoop.conf.Configuration configuration,
java.lang.String label)
configuration - The Configuration in which the key is sought.label - The key being sought.public static long getLong(org.apache.hadoop.conf.Configuration configuration,
java.lang.String label)
configuration - The Configuration in which the key is sought.label - The key being sought.public static java.lang.Class<? extends org.apache.hadoop.mapreduce.InputFormat> getStrategy(org.apache.hadoop.conf.Configuration conf,
DistCpContext context)
conf - - Configuration objectcontext - - Distcp context with associated input optionspublic static java.lang.String getRelativePath(FileListingEntry listingEntry)
listingEntry - - listingEntrypublic static java.lang.String getRelativePath(org.apache.hadoop.fs.Path sourceRootPath,
org.apache.hadoop.fs.Path childPath)
public static java.lang.String getRelativePath(FileListingEntry root, FileListingEntry child)
public static FileListingEntry pathToFileListingEntry(org.apache.hadoop.fs.Path path, org.apache.hadoop.fs.FileSystem fs) throws java.io.IOException
java.io.IOExceptionpublic static java.lang.String packAttributes(java.util.EnumSet<DistCpOptions.FileAttribute> attributes)
attributes - - Attribute set to preservepublic static java.util.EnumSet<DistCpOptions.FileAttribute> unpackAttributes(java.lang.String attributes)
attributes - - Attribute stringpublic static void preserve(org.apache.hadoop.fs.FileSystem targetFS,
org.apache.hadoop.fs.Path path,
CopyListingFileStatus srcFileStatus,
java.util.EnumSet<DistCpOptions.FileAttribute> attributes,
boolean preserveRawXattrs)
throws java.io.IOException
targetFS - - File systempath - - Path that needs to preserve original file statussrcFileStatus - - Original file statusattributes - - Attribute set that needs to be preservedpreserveRawXattrs - if true, raw.* xattrs should be preservedjava.io.IOException - - Exception if any (particularly relating to group/owner
change or any transient error)public static java.util.List<org.apache.hadoop.fs.permission.AclEntry> getAcl(org.apache.hadoop.fs.FileSystem fileSystem,
org.apache.hadoop.fs.FileStatus fileStatus)
throws java.io.IOException
fileSystem - FileSystem containing the filefileStatus - FileStatus of filejava.io.IOException - if there is an I/O errorpublic static java.util.Map<java.lang.String,byte[]> getXAttrs(org.apache.hadoop.fs.FileSystem fileSystem,
org.apache.hadoop.fs.Path path)
throws java.io.IOException
fileSystem - FileSystem containing the filepath - file pathjava.io.IOException - if there is an I/O errorpublic static void toCopyListingFileStatus(org.apache.hadoop.fs.FileSystem fileSystem,
FileListingEntry listingEntry,
boolean preserveAcls,
boolean preserveXAttrs,
boolean preserveRawXAttrs,
int blocksPerChunk)
throws java.io.IOException
fileSystem - FileSystem containing the filelistingEntry - listingEntrypreserveAcls - boolean true if preserving ACLspreserveXAttrs - boolean true if preserving XAttrspreserveRawXAttrs - boolean true if preserving raw.* XAttrsblocksPerChunk - size of chunks when copying chunks in paralleljava.io.IOException - if there is an I/O errorpublic static FileListingEntry getOriginalFileStatus(org.apache.hadoop.fs.FileStatus sourceStatus, org.apache.hadoop.conf.Configuration conf, boolean keepLinks, java.util.Map<java.lang.String,java.util.Set<java.lang.String>> loopLocator)
public static boolean isParentLoop(org.apache.hadoop.fs.FileStatus sourceStatus,
org.apache.hadoop.conf.Configuration conf)
throws java.io.IOException
java.io.IOExceptionpublic static boolean isLoop(org.apache.hadoop.fs.FileStatus src,
java.util.Map<java.lang.String,java.util.Set<java.lang.String>> loopLocator)
throws java.io.IOException
java.io.IOExceptionpublic static org.apache.hadoop.fs.Path discloseRelativeSymlink(org.apache.hadoop.fs.FileStatus stat)
throws java.io.IOException
java.io.IOExceptionpublic static CopyListingFileStatus toCopyListingFileStatusHelper(org.apache.hadoop.fs.FileSystem fileSystem, org.apache.hadoop.fs.FileStatus fileStatus, boolean preserveAcls, boolean preserveXAttrs, boolean preserveRawXAttrs, long chunkOffset, long chunkLength) throws java.io.IOException
fileSystem - FileSystem containing the filefileStatus - FileStatus of filepreserveAcls - boolean true if preserving ACLspreserveXAttrs - boolean true if preserving XAttrspreserveRawXAttrs - boolean true if preserving raw.* XAttrschunkOffset - chunk offset in byteschunkLength - chunk length in bytesjava.io.IOException - if there is an I/O errorpublic static org.apache.hadoop.fs.Path sortListing(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path sourceListing)
throws java.io.IOException
conf - - ConfigurationsourceListing - - Source listing filejava.io.IOException - - Any exception during sort.public static void sortListing(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path sourceListing,
org.apache.hadoop.fs.Path output)
throws java.io.IOException
output path, which
will be deleted first.conf - - ConfigurationsourceListing - - Source listing fileoutput - output pathjava.io.IOException - - Any exception during sort.public static void checkFileSystemAclSupport(org.apache.hadoop.fs.FileSystem fs)
throws CopyListing.AclsNotSupportedException
fs - FileSystem to checkCopyListing.AclsNotSupportedException - if fs does not support ACLspublic static void checkFileSystemXAttrSupport(org.apache.hadoop.fs.FileSystem fs)
throws CopyListing.XAttrsNotSupportedException
fs - FileSystem to checkCopyListing.XAttrsNotSupportedException - if fs does not support XAttrspublic static java.text.DecimalFormat getFormatter()
public static java.lang.String getStringDescriptionFor(long nBytes)
public static CopyMapper.ChecksumComparison checksumsAreEqual(org.apache.hadoop.fs.FileSystem sourceFS, org.apache.hadoop.fs.Path source, org.apache.hadoop.fs.FileChecksum sourceChecksum, org.apache.hadoop.fs.FileSystem targetFS, org.apache.hadoop.fs.Path target, long sourceLen) throws java.io.IOException
sourceFS - FileSystem for the source path.source - The source path.sourceChecksum - The checksum of the source file. If it is null we
still need to retrieve it through sourceFS.targetFS - FileSystem for the target path.target - The target path.java.io.IOException - if there's an exception while retrieving checksums.public static void compareFileLengthsAndChecksums(long srcLen,
org.apache.hadoop.fs.FileSystem sourceFS,
org.apache.hadoop.fs.Path source,
org.apache.hadoop.fs.FileChecksum sourceChecksum,
org.apache.hadoop.fs.FileSystem targetFS,
org.apache.hadoop.fs.Path target,
boolean skipCrc,
long targetLen)
throws java.io.IOException
sourceFS - FileSystem for the source path.source - The source path.sourceChecksum - The checksum of the source file. If it is null we
still need to retrieve it through sourceFS.targetFS - FileSystem for the target path.target - The target path.skipCrc - The flag to indicate whether to skip checksums.java.io.IOException - if there's a mismatch in file lengths or checksums.public static org.apache.hadoop.fs.Path getSplitChunkPath(org.apache.hadoop.fs.Path targetFile,
CopyListingFileStatus srcFileStatus)
public static org.apache.hadoop.fs.Path getTmpFile(org.apache.hadoop.fs.Path target,
org.apache.hadoop.mapreduce.Mapper.Context context,
boolean splitSource)
Copyright © 2008–2025 Apache Software Foundation. All rights reserved.