package org.apache.hadoop.hive.ql.io;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.parse.SplitSample;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.HadoopShimsSecure;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.lib.CombineFileSplit;

/* loaded from: input_file:org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.class */
public class CombineHiveInputFormat<K extends WritableComparable, V extends Writable> extends HiveInputFormat<K, V> {
    private static final String CLASS_NAME = CombineHiveInputFormat.class.getName();
    public static final Log LOG = LogFactory.getLog(CLASS_NAME);
    private static final int MAX_CHECK_NONCOMBINABLE_THREAD_NUM = 50;
    private static final int DEFAULT_NUM_PATH_PER_THREAD = 100;

    /* loaded from: input_file:org/apache/hadoop/hive/ql/io/CombineHiveInputFormat$AvoidSplitCombination.class */
    public interface AvoidSplitCombination {
        boolean shouldSkipCombine(Path path, Configuration configuration) throws IOException;
    }

    /* loaded from: input_file:org/apache/hadoop/hive/ql/io/CombineHiveInputFormat$CheckNonCombinablePathCallable.class */
    private class CheckNonCombinablePathCallable implements Callable<Set<Integer>> {
        private final Path[] paths;
        private final int start;
        private final int length;
        private final JobConf conf;

        public CheckNonCombinablePathCallable(Path[] pathArr, int i, int i2, JobConf jobConf) {
            this.paths = pathArr;
            this.start = i;
            this.length = i2;
            this.conf = jobConf;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.concurrent.Callable
        public Set<Integer> call() throws Exception {
            HashSet hashSet = new HashSet();
            for (int i = 0; i < this.length; i++) {
                AvoidSplitCombination inputFormatFromCache = HiveInputFormat.getInputFormatFromCache(HiveFileFormatUtils.getPartitionDescFromPathRecursively(CombineHiveInputFormat.this.pathToPartitionInfo, this.paths[i + this.start], IOPrepareCache.get().allocatePartitionDescMap()).getInputFileFormatClass(), this.conf);
                if ((inputFormatFromCache instanceof AvoidSplitCombination) && inputFormatFromCache.shouldSkipCombine(this.paths[i + this.start], this.conf)) {
                    if (CombineHiveInputFormat.LOG.isDebugEnabled()) {
                        CombineHiveInputFormat.LOG.debug("The path [" + this.paths[i + this.start] + "] is being parked for HiveInputFormat.getSplits");
                    }
                    hashSet.add(Integer.valueOf(i));
                }
            }
            return hashSet;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/hadoop/hive/ql/io/CombineHiveInputFormat$CombineFilter.class */
    public static class CombineFilter implements PathFilter {
        private final Set<String> pStrings = new HashSet();

        public CombineFilter(Path path) {
            addPath(path);
        }

        public void addPath(Path path) {
            this.pStrings.add(path.toUri().getPath());
        }

        public boolean accept(Path path) {
            boolean z = false;
            while (true) {
                if (path == null || 0 != 0) {
                    break;
                }
                if (this.pStrings.contains(path.toUri().getPath())) {
                    z = true;
                    break;
                }
                path = path.getParent();
            }
            return z;
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            sb.append("PathFilter: ");
            Iterator<String> it = this.pStrings.iterator();
            while (it.hasNext()) {
                sb.append(it.next() + " ");
            }
            return sb.toString();
        }
    }

    /* loaded from: input_file:org/apache/hadoop/hive/ql/io/CombineHiveInputFormat$CombineHiveInputSplit.class */
    public static class CombineHiveInputSplit extends HadoopShimsSecure.InputSplitShim {
        private String inputFormatClassName;
        private CombineFileSplit inputSplitShim;
        private Map<String, PartitionDesc> pathToPartitionInfo;

        public CombineHiveInputSplit() throws IOException {
            this(ShimLoader.getHadoopShims().getCombineFileInputFormat().getInputSplitShim());
        }

        public CombineHiveInputSplit(CombineFileSplit combineFileSplit) throws IOException {
            this(combineFileSplit.getJob(), combineFileSplit);
        }

        public CombineHiveInputSplit(JobConf jobConf, CombineFileSplit combineFileSplit) throws IOException {
            this(jobConf, combineFileSplit, null);
        }

        public CombineHiveInputSplit(JobConf jobConf, CombineFileSplit combineFileSplit, Map<String, PartitionDesc> map) throws IOException {
            this.inputSplitShim = combineFileSplit;
            this.pathToPartitionInfo = map;
            if (jobConf != null) {
                if (this.pathToPartitionInfo == null) {
                    this.pathToPartitionInfo = Utilities.getMapWork(jobConf).getPathToPartitionInfo();
                }
                Path[] paths = combineFileSplit.getPaths();
                if (paths.length > 0) {
                    this.inputFormatClassName = HiveFileFormatUtils.getPartitionDescFromPathRecursively(this.pathToPartitionInfo, paths[0], IOPrepareCache.get().getPartitionDescMap()).getInputFileFormatClass().getName();
                }
            }
        }

        public CombineFileSplit getInputSplitShim() {
            return this.inputSplitShim;
        }

        public String inputFormatClassName() {
            return this.inputFormatClassName;
        }

        public void setInputFormatClassName(String str) {
            this.inputFormatClassName = str;
        }

        public JobConf getJob() {
            return this.inputSplitShim.getJob();
        }

        public long getLength() {
            return this.inputSplitShim.getLength();
        }

        public long[] getStartOffsets() {
            return this.inputSplitShim.getStartOffsets();
        }

        public long[] getLengths() {
            return this.inputSplitShim.getLengths();
        }

        public long getOffset(int i) {
            return this.inputSplitShim.getOffset(i);
        }

        public long getLength(int i) {
            return this.inputSplitShim.getLength(i);
        }

        public int getNumPaths() {
            return this.inputSplitShim.getNumPaths();
        }

        public Path getPath(int i) {
            return this.inputSplitShim.getPath(i);
        }

        public Path[] getPaths() {
            return this.inputSplitShim.getPaths();
        }

        public String[] getLocations() throws IOException {
            return this.inputSplitShim.getLocations();
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            sb.append(this.inputSplitShim.toString());
            sb.append("InputFormatClass: " + this.inputFormatClassName);
            sb.append("\n");
            return sb.toString();
        }

        @Override // org.apache.hadoop.hive.shims.HadoopShimsSecure.InputSplitShim
        public void readFields(DataInput dataInput) throws IOException {
            this.inputSplitShim.readFields(dataInput);
            this.inputFormatClassName = dataInput.readUTF();
        }

        @Override // org.apache.hadoop.hive.shims.HadoopShimsSecure.InputSplitShim
        public void write(DataOutput dataOutput) throws IOException {
            this.inputSplitShim.write(dataOutput);
            if (this.inputFormatClassName == null) {
                if (this.pathToPartitionInfo == null) {
                    this.pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo();
                }
                this.inputFormatClassName = HiveFileFormatUtils.getPartitionDescFromPathRecursively(this.pathToPartitionInfo, this.inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap()).getInputFileFormatClass().getName();
            }
            dataOutput.writeUTF(this.inputFormatClassName);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/hadoop/hive/ql/io/CombineHiveInputFormat$CombinePathInputFormat.class */
    public static class CombinePathInputFormat {
        private final List<Operator<? extends OperatorDesc>> opList;
        private final String inputFormatClassName;
        private final String deserializerClassName;

        public CombinePathInputFormat(List<Operator<? extends OperatorDesc>> list, String str, String str2) {
            this.opList = list;
            this.inputFormatClassName = str;
            this.deserializerClassName = str2;
        }

        public boolean equals(Object obj) {
            if (!(obj instanceof CombinePathInputFormat)) {
                return false;
            }
            CombinePathInputFormat combinePathInputFormat = (CombinePathInputFormat) obj;
            return this.opList.equals(combinePathInputFormat.opList) && this.inputFormatClassName.equals(combinePathInputFormat.inputFormatClassName) && (this.deserializerClassName != null ? this.deserializerClassName.equals(combinePathInputFormat.deserializerClassName) : combinePathInputFormat.deserializerClassName == null);
        }

        public int hashCode() {
            if (this.opList == null) {
                return 0;
            }
            return this.opList.hashCode();
        }
    }

    private InputSplit[] getCombineSplits(JobConf jobConf, int i, Map<String, PartitionDesc> map) throws IOException {
        init(jobConf);
        LinkedHashMap<String, ArrayList<String>> pathToAliases = this.mrwork.getPathToAliases();
        LinkedHashMap<String, Operator<? extends OperatorDesc>> aliasToWork = this.mrwork.getAliasToWork();
        HadoopShims.CombineFileInputFormatShim combineFileInputFormat = ShimLoader.getHadoopShims().getCombineFileInputFormat();
        if (combineFileInputFormat == null) {
            return super.getSplits(jobConf, i);
        }
        if (combineFileInputFormat.getInputPathsShim(jobConf).length == 0) {
            throw new IOException("No input paths specified in job");
        }
        ArrayList arrayList = new ArrayList();
        Path[] inputPathsShim = combineFileInputFormat.getInputPathsShim(jobConf);
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        HashMap hashMap = new HashMap();
        HashSet hashSet = new HashSet();
        for (Path path : inputPathsShim) {
            PartitionDesc partitionDescFromPathRecursively = HiveFileFormatUtils.getPartitionDescFromPathRecursively(map, path, IOPrepareCache.get().allocatePartitionDescMap());
            TableDesc tableDesc = partitionDescFromPathRecursively.getTableDesc();
            if (tableDesc != null && tableDesc.isNonNative()) {
                return super.getSplits(jobConf, i);
            }
            Class<? extends InputFormat> inputFileFormatClass = partitionDescFromPathRecursively.getInputFileFormatClass();
            String name = inputFileFormatClass.getName();
            InputFormat<WritableComparable, Writable> inputFormatFromCache = getInputFormatFromCache(inputFileFormatClass, jobConf);
            String str = null;
            try {
                str = partitionDescFromPathRecursively.getDeserializer(jobConf).getClass().getName();
            } catch (Exception e) {
            }
            FileSystem fileSystem = path.getFileSystem(jobConf);
            if (this.mrwork != null && !this.mrwork.getHadoopSupportsSplittable() && (inputFormatFromCache instanceof TextInputFormat)) {
                LinkedList linkedList = new LinkedList();
                if (fileSystem.getFileStatus(path).isDir()) {
                    linkedList.offer(path);
                } else if (new CompressionCodecFactory(jobConf).getCodec(path) != null) {
                    return super.getSplits(jobConf, i);
                }
                while (linkedList.peek() != null) {
                    FileStatus[] listStatus = fileSystem.listStatus((Path) linkedList.remove(), FileUtils.HIDDEN_FILES_PATH_FILTER);
                    for (int i2 = 0; i2 < listStatus.length; i2++) {
                        if (listStatus[i2].isDir()) {
                            linkedList.offer(listStatus[i2].getPath());
                        } else if (new CompressionCodecFactory(jobConf).getCodec(listStatus[i2].getPath()) != null) {
                            return super.getSplits(jobConf, i);
                        }
                    }
                }
            }
            if (inputFormatFromCache instanceof SymlinkTextInputFormat) {
                return super.getSplits(jobConf, i);
            }
            if (!this.mrwork.isMapperCannotSpanPartns()) {
                CombinePathInputFormat combinePathInputFormat = new CombinePathInputFormat(HiveFileFormatUtils.doGetWorksFromPath(pathToAliases, aliasToWork, path), name, str);
                CombineFilter combineFilter = (CombineFilter) hashMap.get(combinePathInputFormat);
                if (combineFilter == null) {
                    CombineFilter combineFilter2 = new CombineFilter(path);
                    LOG.info("CombineHiveInputSplit creating pool for " + path + "; using filter path " + path);
                    combineFileInputFormat.createPool(jobConf, combineFilter2);
                    hashMap.put(combinePathInputFormat, combineFilter2);
                } else {
                    LOG.info("CombineHiveInputSplit: pool is already created for " + path + "; using filter path " + path);
                    combineFilter.addPath(path);
                }
            } else if (path.getFileSystem(jobConf).getFileStatus(path).isDir()) {
                arrayList2.add(path);
            } else {
                Path parent = path.getParent();
                arrayList3.add(path);
                hashSet.add(parent);
            }
        }
        List<CombineFileSplit> arrayList4 = new ArrayList();
        if (this.mrwork.isMapperCannotSpanPartns()) {
            Iterator it = arrayList2.iterator();
            while (it.hasNext()) {
                processPaths(jobConf, combineFileInputFormat, arrayList4, (Path) it.next());
            }
            if (arrayList3.size() > 0) {
                Iterator it2 = hashSet.iterator();
                while (it2.hasNext()) {
                    combineFileInputFormat.createPool(jobConf, new CombineFilter((Path) it2.next()));
                }
                processPaths(jobConf, combineFileInputFormat, arrayList4, (Path[]) arrayList3.toArray(new Path[0]));
            }
        } else {
            arrayList4 = Arrays.asList(combineFileInputFormat.m2941getSplits(jobConf, 1));
        }
        if (this.mrwork.getNameToSplitSample() != null && !this.mrwork.getNameToSplitSample().isEmpty()) {
            arrayList4 = sampleSplits(arrayList4);
        }
        Iterator<CombineFileSplit> it3 = arrayList4.iterator();
        while (it3.hasNext()) {
            arrayList.add(new CombineHiveInputSplit(jobConf, it3.next(), map));
        }
        LOG.info("number of splits " + arrayList.size());
        return (InputSplit[]) arrayList.toArray(new CombineHiveInputSplit[arrayList.size()]);
    }

    @Override // org.apache.hadoop.hive.ql.io.HiveInputFormat
    public InputSplit[] getSplits(JobConf jobConf, int i) throws IOException {
        PerfLogger perfLogger = PerfLogger.getPerfLogger();
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
        init(jobConf);
        ArrayList arrayList = new ArrayList();
        Path[] inputPaths = getInputPaths(jobConf);
        ArrayList arrayList2 = new ArrayList(inputPaths.length / 2);
        ArrayList arrayList3 = new ArrayList(inputPaths.length / 2);
        int min = Math.min(50, (int) Math.ceil(inputPaths.length / 100.0d));
        int ceil = (int) Math.ceil(inputPaths.length / min);
        LOG.info("Total number of paths: " + inputPaths.length + ", launching " + min + " threads to check non-combinable ones.");
        ExecutorService newFixedThreadPool = Executors.newFixedThreadPool(min);
        ArrayList arrayList4 = new ArrayList(min);
        try {
            int i2 = 0;
            while (i2 < min) {
                try {
                    int i3 = i2 * ceil;
                    arrayList4.add(newFixedThreadPool.submit(new CheckNonCombinablePathCallable(inputPaths, i3, i2 != min - 1 ? ceil : inputPaths.length - i3, jobConf)));
                    i2++;
                } catch (Exception e) {
                    LOG.error("Error checking non-combinable path", e);
                    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
                    throw new IOException(e);
                }
            }
            HashSet hashSet = new HashSet();
            Iterator it = arrayList4.iterator();
            while (it.hasNext()) {
                hashSet.addAll((Collection) ((Future) it.next()).get());
            }
            for (int i4 = 0; i4 < inputPaths.length; i4++) {
                if (hashSet.contains(Integer.valueOf(i4))) {
                    arrayList2.add(inputPaths[i4]);
                } else {
                    arrayList3.add(inputPaths[i4]);
                }
            }
            String str = jobConf.get(HiveConf.ConfVars.HADOOPMAPREDINPUTDIR.varname);
            if (LOG.isDebugEnabled()) {
                LOG.debug("The received input paths are: [" + str + "] against the property " + HiveConf.ConfVars.HADOOPMAPREDINPUTDIR.varname);
            }
            if (arrayList2.size() > 0) {
                FileInputFormat.setInputPaths(jobConf, (Path[]) arrayList2.toArray(new Path[arrayList2.size()]));
                for (InputSplit inputSplit : super.getSplits(jobConf, i)) {
                    arrayList.add(inputSplit);
                }
            }
            if (arrayList3.size() > 0) {
                FileInputFormat.setInputPaths(jobConf, (Path[]) arrayList3.toArray(new Path[arrayList3.size()]));
                for (InputSplit inputSplit2 : getCombineSplits(jobConf, i, this.pathToPartitionInfo != null ? this.pathToPartitionInfo : Utilities.getMapWork(jobConf).getPathToPartitionInfo())) {
                    arrayList.add(inputSplit2);
                }
            }
            if (str != null) {
                jobConf.set(HiveConf.ConfVars.HADOOPMAPREDINPUTDIR.varname, str);
            }
            Utilities.clearWorkMapForConf(jobConf);
            LOG.info("Number of all splits " + arrayList.size());
            perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
            return (InputSplit[]) arrayList.toArray(new InputSplit[arrayList.size()]);
        } finally {
            newFixedThreadPool.shutdownNow();
        }
    }

    private void processPaths(JobConf jobConf, HadoopShims.CombineFileInputFormatShim combineFileInputFormatShim, List<CombineFileSplit> list, Path... pathArr) throws IOException {
        JobConf jobConf2 = new JobConf(jobConf);
        FileInputFormat.setInputPaths(jobConf2, pathArr);
        list.addAll(Arrays.asList(combineFileInputFormatShim.m2941getSplits(jobConf2, 1)));
    }

    private List<CombineFileSplit> sampleSplits(List<CombineFileSplit> list) {
        HashMap<String, SplitSample> nameToSplitSample = this.mrwork.getNameToSplitSample();
        ArrayList arrayList = new ArrayList();
        HashMap hashMap = new HashMap();
        LinkedHashMap<String, ArrayList<String>> pathToAliases = this.mrwork.getPathToAliases();
        Map<String, ArrayList<String>> removeScheme = removeScheme(pathToAliases);
        for (CombineFileSplit combineFileSplit : list) {
            String str = null;
            for (Path path : combineFileSplit.getPaths()) {
                List<String> doGetAliasesFromPath = HiveFileFormatUtils.doGetAliasesFromPath(path.toUri().getScheme() == null ? removeScheme : pathToAliases, path);
                if (doGetAliasesFromPath.size() != 1 || !nameToSplitSample.containsKey(doGetAliasesFromPath.get(0)) || (str != null && doGetAliasesFromPath.get(0) != str)) {
                    str = null;
                    break;
                }
                str = doGetAliasesFromPath.get(0);
            }
            if (str != null) {
                if (!hashMap.containsKey(str)) {
                    hashMap.put(str, new ArrayList());
                }
                ((ArrayList) hashMap.get(str)).add(combineFileSplit);
            } else {
                arrayList.add(combineFileSplit);
            }
        }
        for (Map.Entry entry : hashMap.entrySet()) {
            ArrayList arrayList2 = (ArrayList) entry.getValue();
            long j = 0;
            Iterator it = arrayList2.iterator();
            while (it.hasNext()) {
                j += ((CombineFileSplit) it.next()).getLength();
            }
            SplitSample splitSample = nameToSplitSample.get(entry.getKey());
            long targetSize = splitSample.getTargetSize(j);
            int seedNum = splitSample.getSeedNum() % arrayList2.size();
            long j2 = 0;
            int i = 0;
            while (true) {
                if (i < arrayList2.size()) {
                    CombineFileSplit combineFileSplit2 = (CombineFileSplit) arrayList2.get((seedNum + i) % arrayList2.size());
                    arrayList.add(combineFileSplit2);
                    long length = combineFileSplit2.getLength();
                    if (j2 + length >= targetSize) {
                        LOG.info("Sample alias " + entry.getValue() + " using " + (i + 1) + "splits");
                        if (j2 + length > targetSize) {
                            ((HadoopShimsSecure.InputSplitShim) combineFileSplit2).shrinkSplit(targetSize - j2);
                        }
                    } else {
                        j2 += length;
                        i++;
                    }
                }
            }
        }
        return arrayList;
    }

    Map<String, ArrayList<String>> removeScheme(Map<String, ArrayList<String>> map) {
        HashMap hashMap = new HashMap();
        for (Map.Entry<String, ArrayList<String>> entry : map.entrySet()) {
            hashMap.put(new Path(entry.getKey()).toUri().getPath(), entry.getValue());
        }
        return hashMap;
    }

    @Override // org.apache.hadoop.hive.ql.io.HiveInputFormat
    public RecordReader getRecordReader(InputSplit inputSplit, JobConf jobConf, Reporter reporter) throws IOException {
        if (!(inputSplit instanceof CombineHiveInputSplit)) {
            return super.getRecordReader(inputSplit, jobConf, reporter);
        }
        CombineHiveInputSplit combineHiveInputSplit = (CombineHiveInputSplit) inputSplit;
        String str = null;
        try {
            str = combineHiveInputSplit.inputFormatClassName();
            pushProjectionsAndFilters(jobConf, jobConf.getClassByName(str), combineHiveInputSplit.getPath(0).toString(), combineHiveInputSplit.getPath(0).toUri().getPath());
            return ShimLoader.getHadoopShims().getCombineFileInputFormat().getRecordReader(jobConf, (CombineFileSplit) inputSplit, reporter, CombineHiveRecordReader.class);
        } catch (Exception e) {
            throw new IOException("cannot find class " + str);
        }
    }
}
