package parquet.hadoop;

import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import parquet.Log;
import parquet.bytes.BytesInput;
import parquet.bytes.BytesUtils;
import parquet.column.ColumnDescriptor;
import parquet.column.page.DataPageV1;
import parquet.column.page.DataPageV2;
import parquet.column.page.DictionaryPage;
import parquet.column.page.PageReadStore;
import parquet.format.DataPageHeader;
import parquet.format.DataPageHeaderV2;
import parquet.format.DictionaryPageHeader;
import parquet.format.PageHeader;
import parquet.format.Util;
import parquet.format.converter.ParquetMetadataConverter;
import parquet.hadoop.ColumnChunkPageReadStore;
import parquet.hadoop.metadata.BlockMetaData;
import parquet.hadoop.metadata.ColumnChunkMetaData;
import parquet.hadoop.metadata.ColumnPath;
import parquet.hadoop.metadata.ParquetMetadata;
import parquet.hadoop.util.HiddenFileFilter;
import parquet.hadoop.util.counters.BenchmarkCounter;
import parquet.io.ParquetDecodingException;

/* loaded from: input_file:parquet/hadoop/ParquetFileReader.class */
public class ParquetFileReader implements Closeable {
    private static final Log LOG = Log.getLog(ParquetFileReader.class);
    public static String PARQUET_READ_PARALLELISM = "parquet.metadata.read.parallelism";
    private static ParquetMetadataConverter converter = new ParquetMetadataConverter();
    private final CodecFactory codecFactory;
    private final List<BlockMetaData> blocks;
    private final FSDataInputStream f;
    private final Path filePath;
    private int currentBlock = 0;
    private final Map<ColumnPath, ColumnDescriptor> paths = new HashMap();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:parquet/hadoop/ParquetFileReader$Chunk.class */
    public class Chunk extends ByteArrayInputStream {
        private final ChunkDescriptor descriptor;

        public Chunk(ChunkDescriptor chunkDescriptor, byte[] bArr, int i) {
            super(bArr);
            this.descriptor = chunkDescriptor;
            this.pos = i;
        }

        protected PageHeader readPageHeader() throws IOException {
            return Util.readPageHeader(this);
        }

        public ColumnChunkPageReadStore.ColumnChunkPageReader readAllPages() throws IOException {
            ArrayList arrayList = new ArrayList();
            DictionaryPage dictionaryPage = null;
            long j = 0;
            while (j < this.descriptor.metadata.getValueCount()) {
                PageHeader readPageHeader = readPageHeader();
                int uncompressed_page_size = readPageHeader.getUncompressed_page_size();
                int compressed_page_size = readPageHeader.getCompressed_page_size();
                switch (readPageHeader.type) {
                    case DICTIONARY_PAGE:
                        if (dictionaryPage == null) {
                            DictionaryPageHeader dictionary_page_header = readPageHeader.getDictionary_page_header();
                            dictionaryPage = new DictionaryPage(readAsBytesInput(compressed_page_size), uncompressed_page_size, dictionary_page_header.getNum_values(), ParquetFileReader.converter.getEncoding(dictionary_page_header.getEncoding()));
                            break;
                        } else {
                            throw new ParquetDecodingException("more than one dictionary page in column " + this.descriptor.col);
                        }
                    case DATA_PAGE:
                        DataPageHeader data_page_header = readPageHeader.getData_page_header();
                        arrayList.add(new DataPageV1(readAsBytesInput(compressed_page_size), data_page_header.getNum_values(), uncompressed_page_size, ParquetMetadataConverter.fromParquetStatistics(data_page_header.getStatistics(), this.descriptor.col.getType()), ParquetFileReader.converter.getEncoding(data_page_header.getRepetition_level_encoding()), ParquetFileReader.converter.getEncoding(data_page_header.getDefinition_level_encoding()), ParquetFileReader.converter.getEncoding(data_page_header.getEncoding())));
                        j += data_page_header.getNum_values();
                        break;
                    case DATA_PAGE_V2:
                        DataPageHeaderV2 data_page_header_v2 = readPageHeader.getData_page_header_v2();
                        arrayList.add(new DataPageV2(data_page_header_v2.getNum_rows(), data_page_header_v2.getNum_nulls(), data_page_header_v2.getNum_values(), readAsBytesInput(data_page_header_v2.getRepetition_levels_byte_length()), readAsBytesInput(data_page_header_v2.getDefinition_levels_byte_length()), ParquetFileReader.converter.getEncoding(data_page_header_v2.getEncoding()), readAsBytesInput((compressed_page_size - data_page_header_v2.getRepetition_levels_byte_length()) - data_page_header_v2.getDefinition_levels_byte_length()), uncompressed_page_size, ParquetMetadataConverter.fromParquetStatistics(data_page_header_v2.getStatistics(), this.descriptor.col.getType()), data_page_header_v2.isIs_compressed()));
                        j += data_page_header_v2.getNum_values();
                        break;
                    default:
                        if (Log.DEBUG) {
                            ParquetFileReader.LOG.debug("skipping page of type " + readPageHeader.getType() + " of size " + compressed_page_size);
                        }
                        skip(compressed_page_size);
                        break;
                }
            }
            if (j != this.descriptor.metadata.getValueCount()) {
                throw new IOException("Expected " + this.descriptor.metadata.getValueCount() + " values in column chunk at " + ParquetFileReader.this.filePath + " offset " + this.descriptor.metadata.getFirstDataPageOffset() + " but got " + j + " values instead over " + arrayList.size() + " pages ending at file offset " + (this.descriptor.fileOffset + pos()));
            }
            return new ColumnChunkPageReadStore.ColumnChunkPageReader(ParquetFileReader.this.codecFactory.getDecompressor(this.descriptor.metadata.getCodec()), arrayList, dictionaryPage);
        }

        public int pos() {
            return this.pos;
        }

        public BytesInput readAsBytesInput(int i) throws IOException {
            BytesInput from = BytesInput.from(this.buf, this.pos, i);
            this.pos += i;
            return from;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:parquet/hadoop/ParquetFileReader$ChunkDescriptor.class */
    public static class ChunkDescriptor {
        private final ColumnDescriptor col;
        private final ColumnChunkMetaData metadata;
        private final long fileOffset;
        private final int size;

        private ChunkDescriptor(ColumnDescriptor columnDescriptor, ColumnChunkMetaData columnChunkMetaData, long j, int i) {
            this.col = columnDescriptor;
            this.metadata = columnChunkMetaData;
            this.fileOffset = j;
            this.size = i;
        }
    }

    /* loaded from: input_file:parquet/hadoop/ParquetFileReader$ConsecutiveChunkList.class */
    private class ConsecutiveChunkList {
        private final long offset;
        private int length;
        private final List<ChunkDescriptor> chunks = new ArrayList();

        ConsecutiveChunkList(long j) {
            this.offset = j;
        }

        public void addChunk(ChunkDescriptor chunkDescriptor) {
            this.chunks.add(chunkDescriptor);
            this.length += chunkDescriptor.size;
        }

        public List<Chunk> readAll(FSDataInputStream fSDataInputStream) throws IOException {
            ArrayList arrayList = new ArrayList(this.chunks.size());
            fSDataInputStream.seek(this.offset);
            byte[] bArr = new byte[this.length];
            fSDataInputStream.readFully(bArr);
            BenchmarkCounter.incrementBytesRead(this.length);
            int i = 0;
            for (int i2 = 0; i2 < this.chunks.size(); i2++) {
                ChunkDescriptor chunkDescriptor = this.chunks.get(i2);
                if (i2 < this.chunks.size() - 1) {
                    arrayList.add(new Chunk(chunkDescriptor, bArr, i));
                } else {
                    arrayList.add(new WorkaroundChunk(chunkDescriptor, bArr, i, fSDataInputStream));
                }
                i += chunkDescriptor.size;
            }
            return arrayList;
        }

        public long endPos() {
            return this.offset + this.length;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:parquet/hadoop/ParquetFileReader$WorkaroundChunk.class */
    public class WorkaroundChunk extends Chunk {
        private final FSDataInputStream f;

        private WorkaroundChunk(ChunkDescriptor chunkDescriptor, byte[] bArr, int i, FSDataInputStream fSDataInputStream) {
            super(chunkDescriptor, bArr, i);
            this.f = fSDataInputStream;
        }

        @Override // parquet.hadoop.ParquetFileReader.Chunk
        protected PageHeader readPageHeader() throws IOException {
            PageHeader readPageHeader;
            int i = this.pos;
            try {
                readPageHeader = Util.readPageHeader(this);
            } catch (IOException e) {
                this.pos = i;
                ParquetFileReader.LOG.info("completing the column chunk to read the page header");
                readPageHeader = Util.readPageHeader(new SequenceInputStream(this, this.f));
            }
            return readPageHeader;
        }

        @Override // parquet.hadoop.ParquetFileReader.Chunk
        public BytesInput readAsBytesInput(int i) throws IOException {
            if (this.pos + i <= this.count) {
                return super.readAsBytesInput(i);
            }
            int i2 = this.count - this.pos;
            int i3 = i - i2;
            ParquetFileReader.LOG.info("completed the column chunk with " + i3 + " bytes");
            return BytesInput.concat(super.readAsBytesInput(i2), BytesInput.copy(BytesInput.from(this.f, i3)));
        }
    }

    @Deprecated
    public static List<Footer> readAllFootersInParallelUsingSummaryFiles(Configuration configuration, List<FileStatus> list) throws IOException {
        return readAllFootersInParallelUsingSummaryFiles(configuration, list, false);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static ParquetMetadataConverter.MetadataFilter filter(boolean z) {
        return z ? ParquetMetadataConverter.SKIP_ROW_GROUPS : ParquetMetadataConverter.NO_FILTER;
    }

    public static List<Footer> readAllFootersInParallelUsingSummaryFiles(final Configuration configuration, final Collection<FileStatus> collection, final boolean z) throws IOException {
        HashSet<Path> hashSet = new HashSet();
        Iterator<FileStatus> it = collection.iterator();
        while (it.hasNext()) {
            hashSet.add(it.next().getPath().getParent());
        }
        ArrayList arrayList = new ArrayList();
        for (final Path path : hashSet) {
            arrayList.add(new Callable<Map<Path, Footer>>() { // from class: parquet.hadoop.ParquetFileReader.1
                /* JADX WARN: Can't rename method to resolve collision */
                @Override // java.util.concurrent.Callable
                public Map<Path, Footer> call() throws Exception {
                    List<Footer> footersFromSummaryFile;
                    ParquetMetadata readSummaryMetadata = ParquetFileReader.readSummaryMetadata(configuration, path, z);
                    if (readSummaryMetadata == null) {
                        return Collections.emptyMap();
                    }
                    if (z) {
                        footersFromSummaryFile = new ArrayList();
                        Iterator it2 = collection.iterator();
                        while (it2.hasNext()) {
                            footersFromSummaryFile.add(new Footer(((FileStatus) it2.next()).getPath(), readSummaryMetadata));
                        }
                    } else {
                        footersFromSummaryFile = ParquetFileReader.footersFromSummaryFile(path, readSummaryMetadata);
                    }
                    HashMap hashMap = new HashMap();
                    for (Footer footer : footersFromSummaryFile) {
                        Footer footer2 = new Footer(new Path(path, footer.getFile().getName()), footer.getParquetMetadata());
                        hashMap.put(footer2.getFile(), footer2);
                    }
                    return hashMap;
                }
            });
        }
        HashMap hashMap = new HashMap();
        try {
            Iterator it2 = runAllInParallel(configuration.getInt(PARQUET_READ_PARALLELISM, 5), arrayList).iterator();
            while (it2.hasNext()) {
                hashMap.putAll((Map) it2.next());
            }
            ArrayList arrayList2 = new ArrayList(collection.size());
            ArrayList arrayList3 = new ArrayList();
            for (FileStatus fileStatus : collection) {
                Footer footer = (Footer) hashMap.get(fileStatus.getPath());
                if (footer != null) {
                    arrayList2.add(footer);
                } else {
                    arrayList3.add(fileStatus);
                }
            }
            if (arrayList3.size() > 0) {
                if (Log.INFO) {
                    LOG.info("reading another " + arrayList3.size() + " footers");
                }
                arrayList2.addAll(readAllFootersInParallel(configuration, arrayList3, z));
            }
            return arrayList2;
        } catch (ExecutionException e) {
            throw new IOException("Error reading summaries", e);
        }
    }

    private static <T> List<T> runAllInParallel(int i, List<Callable<T>> list) throws ExecutionException {
        LOG.info("Initiating action with parallelism: " + i);
        ExecutorService newFixedThreadPool = Executors.newFixedThreadPool(i);
        try {
            ArrayList arrayList = new ArrayList();
            Iterator<Callable<T>> it = list.iterator();
            while (it.hasNext()) {
                arrayList.add(newFixedThreadPool.submit(it.next()));
            }
            ArrayList arrayList2 = new ArrayList(list.size());
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                try {
                    arrayList2.add(((Future) it2.next()).get());
                } catch (InterruptedException e) {
                    throw new RuntimeException("The thread was interrupted", e);
                }
            }
            return arrayList2;
        } finally {
            newFixedThreadPool.shutdownNow();
        }
    }

    @Deprecated
    public static List<Footer> readAllFootersInParallel(Configuration configuration, List<FileStatus> list) throws IOException {
        return readAllFootersInParallel(configuration, list, false);
    }

    public static List<Footer> readAllFootersInParallel(final Configuration configuration, List<FileStatus> list, final boolean z) throws IOException {
        ArrayList arrayList = new ArrayList();
        for (final FileStatus fileStatus : list) {
            arrayList.add(new Callable<Footer>() { // from class: parquet.hadoop.ParquetFileReader.2
                /* JADX WARN: Can't rename method to resolve collision */
                @Override // java.util.concurrent.Callable
                public Footer call() throws Exception {
                    try {
                        return new Footer(fileStatus.getPath(), ParquetFileReader.readFooter(configuration, fileStatus, ParquetFileReader.filter(z)));
                    } catch (IOException e) {
                        throw new IOException("Could not read footer for file " + fileStatus, e);
                    }
                }
            });
        }
        try {
            return runAllInParallel(configuration.getInt(PARQUET_READ_PARALLELISM, 5), arrayList);
        } catch (ExecutionException e) {
            throw new IOException("Could not read footer: " + e.getMessage(), e.getCause());
        }
    }

    public static List<Footer> readAllFootersInParallel(Configuration configuration, FileStatus fileStatus) throws IOException {
        return readAllFootersInParallel(configuration, listFiles(configuration, fileStatus), false);
    }

    @Deprecated
    public static List<Footer> readFooters(Configuration configuration, Path path) throws IOException {
        return readFooters(configuration, status(configuration, path));
    }

    private static FileStatus status(Configuration configuration, Path path) throws IOException {
        return path.getFileSystem(configuration).getFileStatus(path);
    }

    @Deprecated
    public static List<Footer> readFooters(Configuration configuration, FileStatus fileStatus) throws IOException {
        return readFooters(configuration, fileStatus, false);
    }

    public static List<Footer> readFooters(Configuration configuration, FileStatus fileStatus, boolean z) throws IOException {
        return readAllFootersInParallelUsingSummaryFiles(configuration, listFiles(configuration, fileStatus), z);
    }

    private static List<FileStatus> listFiles(Configuration configuration, FileStatus fileStatus) throws IOException {
        if (!fileStatus.isDir()) {
            return Arrays.asList(fileStatus);
        }
        FileStatus[] listStatus = fileStatus.getPath().getFileSystem(configuration).listStatus(fileStatus.getPath(), HiddenFileFilter.INSTANCE);
        ArrayList arrayList = new ArrayList();
        for (FileStatus fileStatus2 : listStatus) {
            arrayList.addAll(listFiles(configuration, fileStatus2));
        }
        return arrayList;
    }

    public static List<Footer> readSummaryFile(Configuration configuration, FileStatus fileStatus) throws IOException {
        return footersFromSummaryFile(fileStatus.getPath().getParent(), readFooter(configuration, fileStatus, filter(false)));
    }

    static ParquetMetadata readSummaryMetadata(Configuration configuration, Path path, boolean z) throws IOException {
        Path path2 = new Path(path, "_metadata");
        Path path3 = new Path(path, ParquetFileWriter.PARQUET_COMMON_METADATA_FILE);
        FileSystem fileSystem = path.getFileSystem(configuration);
        if (z && fileSystem.exists(path3)) {
            if (Log.INFO) {
                LOG.info("reading summary file: " + path3);
            }
            return readFooter(configuration, path3, filter(z));
        }
        if (!fileSystem.exists(path2)) {
            return null;
        }
        if (Log.INFO) {
            LOG.info("reading summary file: " + path2);
        }
        return readFooter(configuration, path2, filter(z));
    }

    static List<Footer> footersFromSummaryFile(Path path, ParquetMetadata parquetMetadata) {
        HashMap hashMap = new HashMap();
        for (BlockMetaData blockMetaData : parquetMetadata.getBlocks()) {
            Path path2 = new Path(path, blockMetaData.getPath());
            ParquetMetadata parquetMetadata2 = (ParquetMetadata) hashMap.get(path2);
            if (parquetMetadata2 == null) {
                parquetMetadata2 = new ParquetMetadata(parquetMetadata.getFileMetaData(), new ArrayList());
                hashMap.put(path2, parquetMetadata2);
            }
            parquetMetadata2.getBlocks().add(blockMetaData);
        }
        ArrayList arrayList = new ArrayList();
        for (Map.Entry entry : hashMap.entrySet()) {
            arrayList.add(new Footer((Path) entry.getKey(), (ParquetMetadata) entry.getValue()));
        }
        return arrayList;
    }

    @Deprecated
    public static final ParquetMetadata readFooter(Configuration configuration, Path path) throws IOException {
        return readFooter(configuration, path, ParquetMetadataConverter.NO_FILTER);
    }

    public static ParquetMetadata readFooter(Configuration configuration, Path path, ParquetMetadataConverter.MetadataFilter metadataFilter) throws IOException {
        return readFooter(configuration, path.getFileSystem(configuration).getFileStatus(path), metadataFilter);
    }

    @Deprecated
    public static final ParquetMetadata readFooter(Configuration configuration, FileStatus fileStatus) throws IOException {
        return readFooter(configuration, fileStatus, ParquetMetadataConverter.NO_FILTER);
    }

    public static final ParquetMetadata readFooter(Configuration configuration, FileStatus fileStatus, ParquetMetadataConverter.MetadataFilter metadataFilter) throws IOException {
        InputStream open = fileStatus.getPath().getFileSystem(configuration).open(fileStatus.getPath());
        try {
            long len = fileStatus.getLen();
            if (Log.DEBUG) {
                LOG.debug("File length " + len);
            }
            if (len < ParquetFileWriter.MAGIC.length + 4 + ParquetFileWriter.MAGIC.length) {
                throw new RuntimeException(fileStatus.getPath() + " is not a Parquet file (too small)");
            }
            long length = (len - 4) - ParquetFileWriter.MAGIC.length;
            if (Log.DEBUG) {
                LOG.debug("reading footer index at " + length);
            }
            open.seek(length);
            int readIntLittleEndian = BytesUtils.readIntLittleEndian(open);
            byte[] bArr = new byte[ParquetFileWriter.MAGIC.length];
            open.readFully(bArr);
            if (!Arrays.equals(ParquetFileWriter.MAGIC, bArr)) {
                throw new RuntimeException(fileStatus.getPath() + " is not a Parquet file. expected magic number at tail " + Arrays.toString(ParquetFileWriter.MAGIC) + " but found " + Arrays.toString(bArr));
            }
            long j = length - readIntLittleEndian;
            if (Log.DEBUG) {
                LOG.debug("read footer length: " + readIntLittleEndian + ", footer index: " + j);
            }
            if (j < ParquetFileWriter.MAGIC.length || j >= length) {
                throw new RuntimeException("corrupted file: the footer index is not within the file");
            }
            open.seek(j);
            ParquetMetadata readParquetMetadata = converter.readParquetMetadata(open, metadataFilter);
            open.close();
            return readParquetMetadata;
        } catch (Throwable th) {
            open.close();
            throw th;
        }
    }

    public ParquetFileReader(Configuration configuration, Path path, List<BlockMetaData> list, List<ColumnDescriptor> list2) throws IOException {
        this.filePath = path;
        this.f = path.getFileSystem(configuration).open(path);
        this.blocks = list;
        for (ColumnDescriptor columnDescriptor : list2) {
            this.paths.put(ColumnPath.get(columnDescriptor.getPath()), columnDescriptor);
        }
        this.codecFactory = new CodecFactory(configuration);
    }

    public PageReadStore readNextRowGroup() throws IOException {
        if (this.currentBlock == this.blocks.size()) {
            return null;
        }
        BlockMetaData blockMetaData = this.blocks.get(this.currentBlock);
        if (blockMetaData.getRowCount() == 0) {
            throw new RuntimeException("Illegal row group of 0 rows");
        }
        ColumnChunkPageReadStore columnChunkPageReadStore = new ColumnChunkPageReadStore(blockMetaData.getRowCount());
        ArrayList arrayList = new ArrayList();
        ConsecutiveChunkList consecutiveChunkList = null;
        for (ColumnChunkMetaData columnChunkMetaData : blockMetaData.getColumns()) {
            ColumnPath path = columnChunkMetaData.getPath();
            BenchmarkCounter.incrementTotalBytes(columnChunkMetaData.getTotalSize());
            ColumnDescriptor columnDescriptor = this.paths.get(path);
            if (columnDescriptor != null) {
                long startingPos = columnChunkMetaData.getStartingPos();
                if (consecutiveChunkList == null || consecutiveChunkList.endPos() != startingPos) {
                    consecutiveChunkList = new ConsecutiveChunkList(startingPos);
                    arrayList.add(consecutiveChunkList);
                }
                consecutiveChunkList.addChunk(new ChunkDescriptor(columnDescriptor, columnChunkMetaData, startingPos, (int) columnChunkMetaData.getTotalSize()));
            }
        }
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            for (Chunk chunk : ((ConsecutiveChunkList) it.next()).readAll(this.f)) {
                columnChunkPageReadStore.addColumn(chunk.descriptor.col, chunk.readAllPages());
            }
        }
        this.currentBlock++;
        return columnChunkPageReadStore;
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        this.f.close();
        this.codecFactory.release();
    }
}
