package parquet.hadoop;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import parquet.Log;
import parquet.filter.UnboundRecordFilter;
import parquet.hadoop.api.InitContext;
import parquet.hadoop.api.ReadSupport;
import parquet.hadoop.metadata.BlockMetaData;
import parquet.hadoop.metadata.ColumnChunkMetaData;
import parquet.hadoop.metadata.FileMetaData;
import parquet.hadoop.metadata.GlobalMetaData;
import parquet.hadoop.metadata.ParquetMetadata;
import parquet.hadoop.util.ConfigurationUtil;
import parquet.hadoop.util.ContextUtil;
import parquet.schema.MessageType;
import parquet.schema.MessageTypeParser;

/* JADX WARN: Classes with same name are omitted:
  input_file:WEB-INF/lib/parquet-hive-bundle-1.4.1.jar:parquet/hadoop/ParquetInputFormat.class
 */
/* loaded from: input_file:WEB-INF/lib/parquet-hadoop-1.4.1.jar:parquet/hadoop/ParquetInputFormat.class */
public class ParquetInputFormat<T> extends FileInputFormat<Void, T> {
    public static final String READ_SUPPORT_CLASS = "parquet.read.support.class";
    public static final String UNBOUND_RECORD_FILTER = "parquet.read.filter";
    private Class<?> readSupportClass;
    private List<Footer> footers;
    private static final Log LOG = Log.getLog(ParquetInputFormat.class);
    private static final PathFilter hiddenFileFilter = new PathFilter() { // from class: parquet.hadoop.ParquetInputFormat.3
        public boolean accept(Path path) {
            String name = path.getName();
            return (name.startsWith("_") || name.startsWith(".")) ? false : true;
        }
    };

    public static void setReadSupportClass(Job job, Class<?> cls) {
        ContextUtil.getConfiguration(job).set(READ_SUPPORT_CLASS, cls.getName());
    }

    public static void setUnboundRecordFilter(Job job, Class<? extends UnboundRecordFilter> cls) {
        ContextUtil.getConfiguration(job).set(UNBOUND_RECORD_FILTER, cls.getName());
    }

    public static Class<?> getUnboundRecordFilter(Configuration configuration) {
        return ConfigurationUtil.getClassFromConfig(configuration, UNBOUND_RECORD_FILTER, UnboundRecordFilter.class);
    }

    public static void setReadSupportClass(JobConf jobConf, Class<?> cls) {
        jobConf.set(READ_SUPPORT_CLASS, cls.getName());
    }

    public static Class<?> getReadSupportClass(Configuration configuration) {
        return ConfigurationUtil.getClassFromConfig(configuration, READ_SUPPORT_CLASS, ReadSupport.class);
    }

    public ParquetInputFormat() {
    }

    /* JADX WARN: Multi-variable type inference failed */
    public <S extends ReadSupport<T>> ParquetInputFormat(Class<S> cls) {
        this.readSupportClass = cls;
    }

    public RecordReader<Void, T> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        ReadSupport<T> readSupport = getReadSupport(ContextUtil.getConfiguration(taskAttemptContext));
        Class<?> unboundRecordFilter = getUnboundRecordFilter(ContextUtil.getConfiguration(taskAttemptContext));
        if (unboundRecordFilter == null) {
            return new ParquetRecordReader(readSupport);
        }
        try {
            return new ParquetRecordReader(readSupport, (UnboundRecordFilter) unboundRecordFilter.newInstance());
        } catch (IllegalAccessException e) {
            throw new BadConfigurationException("could not instantiate unbound record filter class", e);
        } catch (InstantiationException e2) {
            throw new BadConfigurationException("could not instantiate unbound record filter class", e2);
        }
    }

    public ReadSupport<T> getReadSupport(Configuration configuration) {
        try {
            if (this.readSupportClass == null) {
                this.readSupportClass = getReadSupportClass(configuration);
            }
            return (ReadSupport) this.readSupportClass.newInstance();
        } catch (IllegalAccessException e) {
            throw new BadConfigurationException("could not instantiate read support class", e);
        } catch (InstantiationException e2) {
            throw new BadConfigurationException("could not instantiate read support class", e2);
        }
    }

    static <T> List<ParquetInputSplit> generateSplits(List<BlockMetaData> list, BlockLocation[] blockLocationArr, FileStatus fileStatus, FileMetaData fileMetaData, Class<?> cls, String str, Map<String, String> map) throws IOException {
        String intern = fileMetaData.getSchema().toString().intern();
        Comparator<BlockLocation> comparator = new Comparator<BlockLocation>() { // from class: parquet.hadoop.ParquetInputFormat.1
            @Override // java.util.Comparator
            public int compare(BlockLocation blockLocation, BlockLocation blockLocation2) {
                return Long.signum(blockLocation.getOffset() - blockLocation2.getOffset());
            }
        };
        Arrays.sort(blockLocationArr, comparator);
        ArrayList arrayList = new ArrayList(blockLocationArr.length);
        for (int i = 0; i < blockLocationArr.length; i++) {
            arrayList.add(new ArrayList());
        }
        for (BlockMetaData blockMetaData : list) {
            final long firstDataPageOffset = blockMetaData.getColumns().get(0).getFirstDataPageOffset();
            int binarySearch = Arrays.binarySearch(blockLocationArr, new BlockLocation() { // from class: parquet.hadoop.ParquetInputFormat.2
                public long getOffset() {
                    return firstDataPageOffset;
                }
            }, comparator);
            if (binarySearch >= 0) {
                ((List) arrayList.get(binarySearch)).add(blockMetaData);
            } else {
                int i2 = (-binarySearch) - 1;
                if (i2 == 0) {
                    LOG.warn("row group before the first HDFS block:  " + blockMetaData);
                    ((List) arrayList.get(0)).add(blockMetaData);
                } else {
                    ((List) arrayList.get(i2 - 1)).add(blockMetaData);
                }
            }
        }
        ArrayList arrayList2 = new ArrayList();
        for (int i3 = 0; i3 < blockLocationArr.length; i3++) {
            BlockLocation blockLocation = blockLocationArr[i3];
            List<BlockMetaData> list2 = (List) arrayList.get(i3);
            if (list2.size() == 0) {
                LOG.debug("HDFS block without row group: " + blockLocationArr[i3]);
            } else {
                long j = 0;
                for (BlockMetaData blockMetaData2 : list2) {
                    MessageType parseMessageType = MessageTypeParser.parseMessageType(str);
                    for (ColumnChunkMetaData columnChunkMetaData : blockMetaData2.getColumns()) {
                        if (parseMessageType.containsPath(columnChunkMetaData.getPath().toArray())) {
                            j += columnChunkMetaData.getTotalSize();
                        }
                    }
                }
                arrayList2.add(new ParquetInputSplit(fileStatus.getPath(), blockLocation.getOffset(), j, blockLocation.getHosts(), list2, str, intern, fileMetaData.getKeyValueMetaData(), map));
            }
        }
        return arrayList2;
    }

    public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(getSplits(ContextUtil.getConfiguration(jobContext), getFooters(jobContext)));
        return arrayList;
    }

    public List<ParquetInputSplit> getSplits(Configuration configuration, List<Footer> list) throws IOException {
        ArrayList arrayList = new ArrayList();
        GlobalMetaData globalMetaData = ParquetFileWriter.getGlobalMetaData(list);
        ReadSupport.ReadContext init = getReadSupport(configuration).init(new InitContext(configuration, globalMetaData.getKeyValueMetaData(), globalMetaData.getSchema()));
        for (Footer footer : list) {
            Path file = footer.getFile();
            LOG.debug(file);
            FileSystem fileSystem = file.getFileSystem(configuration);
            FileStatus fileStatus = fileSystem.getFileStatus(file);
            ParquetMetadata parquetMetadata = footer.getParquetMetadata();
            arrayList.addAll(generateSplits(parquetMetadata.getBlocks(), fileSystem.getFileBlockLocations(fileStatus, 0L, fileStatus.getLen()), fileStatus, parquetMetadata.getFileMetaData(), this.readSupportClass, init.getRequestedSchema().toString(), init.getReadSupportMetadata()));
        }
        return arrayList;
    }

    protected List<FileStatus> listStatus(JobContext jobContext) throws IOException {
        return getAllFileRecursively(super.listStatus(jobContext), ContextUtil.getConfiguration(jobContext));
    }

    private static List<FileStatus> getAllFileRecursively(List<FileStatus> list, Configuration configuration) throws IOException {
        ArrayList arrayList = new ArrayList();
        int size = list.size();
        for (int i = 0; i < size; i++) {
            FileStatus fileStatus = list.get(i);
            if (fileStatus.isDir()) {
                Path path = fileStatus.getPath();
                staticAddInputPathRecursively(arrayList, path.getFileSystem(configuration), path, hiddenFileFilter);
            } else {
                arrayList.add(fileStatus);
            }
        }
        LOG.info("Total input paths to process : " + arrayList.size());
        return arrayList;
    }

    private static void staticAddInputPathRecursively(List<FileStatus> list, FileSystem fileSystem, Path path, PathFilter pathFilter) throws IOException {
        for (FileStatus fileStatus : fileSystem.listStatus(path, pathFilter)) {
            if (fileStatus.isDir()) {
                staticAddInputPathRecursively(list, fileSystem, fileStatus.getPath(), pathFilter);
            } else {
                list.add(fileStatus);
            }
        }
    }

    public List<Footer> getFooters(JobContext jobContext) throws IOException {
        if (this.footers == null) {
            this.footers = getFooters(ContextUtil.getConfiguration(jobContext), listStatus(jobContext));
        }
        return this.footers;
    }

    public List<Footer> getFooters(Configuration configuration, List<FileStatus> list) throws IOException {
        LOG.debug("reading " + list.size() + " files");
        return ParquetFileReader.readAllFootersInParallelUsingSummaryFiles(configuration, list);
    }

    public GlobalMetaData getGlobalMetaData(JobContext jobContext) throws IOException {
        return ParquetFileWriter.getGlobalMetaData(getFooters(jobContext));
    }
}
