package org.apache.drill.exec.store.parquet;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.expression.ValueExpressions;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.expr.FilterPredicate;
import org.apache.drill.exec.expr.stat.RowsMatch;
import org.apache.drill.exec.ops.ExecutorFragmentContext;
import org.apache.drill.exec.ops.OperatorContext;
import org.apache.drill.exec.physical.base.AbstractGroupScanWithMetadata;
import org.apache.drill.exec.physical.impl.ScanBatch;
import org.apache.drill.exec.proto.helper.QueryIdHelper;
import org.apache.drill.exec.record.metadata.TupleSchema;
import org.apache.drill.exec.server.options.OptionManager;
import org.apache.drill.exec.store.ColumnExplorer;
import org.apache.drill.exec.store.CommonParquetRecordReader;
import org.apache.drill.exec.store.dfs.DrillFileSystem;
import org.apache.drill.exec.store.ischema.InfoSchemaConstants;
import org.apache.drill.exec.store.parquet.FilterEvaluatorUtils;
import org.apache.drill.exec.store.parquet.ParquetReaderUtility;
import org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader;
import org.apache.drill.exec.store.parquet.compression.DrillCompressionCodecFactory;
import org.apache.drill.exec.store.parquet.metadata.Metadata;
import org.apache.drill.exec.store.parquet.metadata.MetadataBase;
import org.apache.drill.exec.store.parquet.metadata.Metadata_V4;
import org.apache.drill.exec.store.parquet2.DrillParquetReader;
import org.apache.drill.metastore.statistics.ColumnStatistics;
import org.apache.drill.metastore.util.SchemaPathUtils;
import org.apache.drill.shaded.guava.com.google.common.base.Functions;
import org.apache.drill.shaded.guava.com.google.common.base.Stopwatch;
import org.apache.drill.shaded.guava.com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.hadoop.util.HadoopInputFile;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/drill/exec/store/parquet/AbstractParquetScanBatchCreator.class */
public abstract class AbstractParquetScanBatchCreator {
    private static final Logger logger = LoggerFactory.getLogger(AbstractParquetScanBatchCreator.class);

    /* loaded from: input_file:org/apache/drill/exec/store/parquet/AbstractParquetScanBatchCreator$AbstractDrillFileSystemManager.class */
    protected static abstract class AbstractDrillFileSystemManager {
        protected final OperatorContext operatorContext;

        /* JADX INFO: Access modifiers changed from: protected */
        public AbstractDrillFileSystemManager(OperatorContext operatorContext) {
            this.operatorContext = operatorContext;
        }

        protected abstract DrillFileSystem get(Configuration configuration, Path path) throws ExecutionSetupException;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public ScanBatch getBatch(ExecutorFragmentContext executorFragmentContext, AbstractParquetRowGroupScan abstractParquetRowGroupScan, OperatorContext operatorContext) throws ExecutionSetupException {
        ColumnExplorer columnExplorer = new ColumnExplorer(executorFragmentContext.getOptions(), abstractParquetRowGroupScan.getColumns());
        if (!columnExplorer.isStarQuery()) {
            abstractParquetRowGroupScan = abstractParquetRowGroupScan.copy(columnExplorer.getTableColumns());
            abstractParquetRowGroupScan.setOperatorId(abstractParquetRowGroupScan.getOperatorId());
        }
        AbstractDrillFileSystemManager drillFileSystemCreator = getDrillFileSystemCreator(operatorContext, executorFragmentContext.getOptions());
        HashMap hashMap = new HashMap();
        LinkedList linkedList = new LinkedList();
        ArrayList arrayList = new ArrayList();
        Map<String, String> linkedHashMap = new LinkedHashMap();
        ParquetReaderConfig readerConfig = abstractParquetRowGroupScan.getReaderConfig();
        RowGroupReadEntry rowGroupReadEntry = null;
        ParquetMetadata parquetMetadata = null;
        long j = 0;
        try {
            ValueExpressions.BooleanExpression filter = abstractParquetRowGroupScan.getFilter();
            boolean z = (filter == null || ((filter instanceof ValueExpressions.BooleanExpression) && filter.getBoolean())) ? false : true;
            Path path = null;
            Metadata_V4.ParquetTableMetadata_v4 parquetTableMetadata_v4 = null;
            Metadata_V4.ParquetFileAndRowCountMetadata parquetFileAndRowCountMetadata = null;
            FilterPredicate<?> filterPredicate = null;
            Set<SchemaPath> set = null;
            HashSet hashSet = null;
            long j2 = 0;
            long size = abstractParquetRowGroupScan.getRowGroupReadEntries().size();
            Stopwatch createUnstarted = Stopwatch.createUnstarted();
            if (z) {
                filterPredicate = AbstractGroupScanWithMetadata.getFilterPredicate(filter, executorFragmentContext, executorFragmentContext.getFunctionRegistry(), executorFragmentContext.getOptions(), true, true, abstractParquetRowGroupScan.getSchema());
                set = (Set) filter.accept(FilterEvaluatorUtils.FieldReferenceFinder.INSTANCE, (Object) null);
                hashSet = new HashSet();
                String str = executorFragmentContext.getOptions().getOption(ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL).string_val;
                for (SchemaPath schemaPath : set) {
                    if (!abstractParquetRowGroupScan.supportsFileImplicitColumns() || !schemaPath.toString().matches(str + "\\d+")) {
                        hashSet.add(SchemaPath.getSimplePath(schemaPath.getRootSegmentPath()));
                    }
                }
                z = !hashSet.isEmpty();
            }
            for (RowGroupReadEntry rowGroupReadEntry2 : abstractParquetRowGroupScan.getRowGroupReadEntries()) {
                Stopwatch createUnstarted2 = logger.isTraceEnabled() ? Stopwatch.createUnstarted() : null;
                DrillFileSystem drillFileSystem = drillFileSystemCreator.get(abstractParquetRowGroupScan.getFsConf(rowGroupReadEntry2), rowGroupReadEntry2.getPath());
                if (!hashMap.containsKey(rowGroupReadEntry2.getPath())) {
                    if (createUnstarted2 != null) {
                        createUnstarted2.start();
                    }
                    ParquetMetadata readFooter = readFooter(drillFileSystem.getConf(), rowGroupReadEntry2.getPath(), readerConfig);
                    if (createUnstarted2 != null) {
                        logger.trace("ParquetTrace,Read Footer,{},{},{},{},{},{},{}", new Object[]{InfoSchemaConstants.IS_CATALOG_CONNECT, rowGroupReadEntry2.getPath(), InfoSchemaConstants.IS_CATALOG_CONNECT, 0, 0, 0, Long.valueOf(createUnstarted2.elapsed(TimeUnit.MICROSECONDS))});
                    }
                    hashMap.put(rowGroupReadEntry2.getPath(), readFooter);
                }
                ParquetMetadata parquetMetadata2 = (ParquetMetadata) hashMap.get(rowGroupReadEntry2.getPath());
                if (z) {
                    createUnstarted.start();
                    RowsMatch rowsMatch = RowsMatch.ALL;
                    if (rowGroupReadEntry2.isEmpty()) {
                        rowsMatch = RowsMatch.NONE;
                    } else {
                        int rowGroupIndex = rowGroupReadEntry2.getRowGroupIndex();
                        long rowCount = ((BlockMetaData) parquetMetadata2.getBlocks().get(rowGroupIndex)).getRowCount();
                        if (!rowGroupReadEntry2.getPath().equals(path)) {
                            parquetTableMetadata_v4 = new Metadata_V4.ParquetTableMetadata_v4();
                            parquetFileAndRowCountMetadata = Metadata.getParquetFileMetadata_v4(parquetTableMetadata_v4, parquetMetadata2, drillFileSystem.getFileStatus(rowGroupReadEntry2.getPath()), drillFileSystem, false, true, hashSet, readerConfig);
                            path = rowGroupReadEntry2.getPath();
                        }
                        MetadataBase.RowGroupMetadata rowGroupMetadata = parquetFileAndRowCountMetadata.getFileMetadata().getRowGroups().get(rowGroupReadEntry2.getRowGroupIndex());
                        Map<SchemaPath, ColumnStatistics<?>> rowGroupColumnStatistics = ParquetTableMetadataUtils.getRowGroupColumnStatistics(parquetTableMetadata_v4, rowGroupMetadata);
                        try {
                            Map<SchemaPath, TypeProtos.MajorType> intermediateFields = ParquetTableMetadataUtils.getIntermediateFields(parquetTableMetadata_v4, rowGroupMetadata);
                            Map<SchemaPath, TypeProtos.MajorType> rowGroupFields = ParquetTableMetadataUtils.getRowGroupFields(parquetTableMetadata_v4, rowGroupMetadata);
                            TupleSchema tupleSchema = new TupleSchema();
                            rowGroupFields.forEach((schemaPath2, majorType) -> {
                                SchemaPathUtils.addColumnMetadata(tupleSchema, schemaPath2, majorType, intermediateFields);
                            });
                            if (!tupleSchema.isEquivalent(abstractParquetRowGroupScan.getSchema())) {
                                filterPredicate = AbstractGroupScanWithMetadata.getFilterPredicate(filter, executorFragmentContext, executorFragmentContext.getFunctionRegistry(), executorFragmentContext.getOptions(), true, true, tupleSchema);
                            }
                            rowsMatch = FilterEvaluatorUtils.matches(filterPredicate, rowGroupColumnStatistics, rowCount, tupleSchema, set);
                            long elapsed = createUnstarted.elapsed(TimeUnit.MICROSECONDS);
                            j2 += elapsed;
                            Logger logger2 = logger;
                            Object[] objArr = new Object[5];
                            objArr[0] = rowsMatch == RowsMatch.NONE ? "Excluded" : "Included";
                            objArr[1] = rowGroupReadEntry2.getPath();
                            objArr[2] = Integer.valueOf(rowGroupIndex);
                            objArr[3] = Long.valueOf(rowCount);
                            objArr[4] = Long.valueOf(elapsed);
                            logger2.trace("Run-time pruning: {} row-group {} (RG index: {} row count: {}), took {} usec", objArr);
                        } catch (Exception e) {
                            logger.warn("Run-time pruning check failed - {}. Skip pruning rowgroup - {}", e.getMessage(), rowGroupReadEntry2.getPath());
                            logger.debug("Failure during run-time pruning: {}", e.getMessage(), e);
                        }
                    }
                    createUnstarted.stop();
                    createUnstarted.reset();
                    if (rowsMatch == RowsMatch.NONE) {
                        j++;
                        if (rowGroupReadEntry == null) {
                            rowGroupReadEntry = rowGroupReadEntry2;
                            parquetMetadata = parquetMetadata2;
                        }
                    }
                }
                linkedHashMap = createReaderAndImplicitColumns(executorFragmentContext, abstractParquetRowGroupScan, operatorContext, columnExplorer, linkedList, arrayList, linkedHashMap, rowGroupReadEntry2, drillFileSystem, parquetMetadata2, false);
            }
            if (linkedList.isEmpty() && rowGroupReadEntry != null) {
                linkedHashMap = createReaderAndImplicitColumns(executorFragmentContext, abstractParquetRowGroupScan, operatorContext, columnExplorer, linkedList, arrayList, linkedHashMap, rowGroupReadEntry, drillFileSystemCreator.get(abstractParquetRowGroupScan.getFsConf(rowGroupReadEntry), rowGroupReadEntry.getPath()), parquetMetadata, true);
            }
            if (j2 > 0) {
                Logger logger3 = logger;
                Object[] objArr2 = new Object[4];
                objArr2[0] = Long.valueOf(j2);
                objArr2[1] = Long.valueOf(size);
                objArr2[2] = Long.valueOf(j);
                objArr2[3] = size == j ? "ALL_PRUNED !!" : InfoSchemaConstants.IS_CATALOG_CONNECT;
                logger3.info("Finished parquet_runtime_pruning in {} usec. Out of given {} rowgroups, {} were pruned. {}", objArr2);
            }
            Iterator<CommonParquetRecordReader> it = linkedList.iterator();
            while (it.hasNext()) {
                it.next().updateRowGroupsStats(size, j);
            }
            Map transformValues = Maps.transformValues(linkedHashMap, Functions.constant((Object) null));
            for (Map<String, String> map : arrayList) {
                map.putAll(Maps.difference(map, transformValues).entriesOnlyOnRight());
            }
            return new ScanBatch(executorFragmentContext, operatorContext, linkedList, arrayList);
        } catch (IOException | InterruptedException e2) {
            throw new ExecutionSetupException(e2);
        }
    }

    private Map<String, String> createReaderAndImplicitColumns(ExecutorFragmentContext executorFragmentContext, AbstractParquetRowGroupScan abstractParquetRowGroupScan, OperatorContext operatorContext, ColumnExplorer columnExplorer, List<CommonParquetRecordReader> list, List<Map<String, String>> list2, Map<String, String> map, RowGroupReadEntry rowGroupReadEntry, DrillFileSystem drillFileSystem, ParquetMetadata parquetMetadata, boolean z) {
        ParquetReaderUtility.DateCorruptionStatus detectCorruptDates = ParquetReaderUtility.detectCorruptDates(parquetMetadata, abstractParquetRowGroupScan.getColumns(), abstractParquetRowGroupScan.getReaderConfig().autoCorrectCorruptedDates());
        logger.debug("Contains corrupt dates: {}.", detectCorruptDates);
        boolean z2 = executorFragmentContext.getOptions().getBoolean(ExecConstants.PARQUET_NEW_RECORD_READER);
        boolean containsComplexColumn = ParquetReaderUtility.containsComplexColumn(parquetMetadata, abstractParquetRowGroupScan.getColumns());
        logger.debug("PARQUET_NEW_RECORD_READER is {}. Complex columns {}.", z2 ? "enabled" : "disabled", containsComplexColumn ? "found." : "not found.");
        long numRecordsToRead = z ? 0L : rowGroupReadEntry.getNumRecordsToRead();
        CommonParquetRecordReader drillParquetReader = (z2 || containsComplexColumn) ? new DrillParquetReader(executorFragmentContext, parquetMetadata, rowGroupReadEntry, columnExplorer.getTableColumns(), drillFileSystem, detectCorruptDates, numRecordsToRead) : new ParquetRecordReader(executorFragmentContext, rowGroupReadEntry.getPath(), rowGroupReadEntry.getRowGroupIndex(), numRecordsToRead, drillFileSystem, DrillCompressionCodecFactory.createDirectCodecFactory(drillFileSystem.getConf(), new ParquetDirectByteBufferAllocator(operatorContext.getAllocator()), 0), parquetMetadata, abstractParquetRowGroupScan.getColumns(), detectCorruptDates);
        logger.debug("Query {} uses {}", QueryIdHelper.getQueryId(operatorContext.getFragmentContext().getHandle().getQueryId()), drillParquetReader.getClass().getSimpleName());
        list.add(drillParquetReader);
        Map<String, String> populateColumns = columnExplorer.populateColumns(rowGroupReadEntry.getPath(), abstractParquetRowGroupScan.getPartitionValues(rowGroupReadEntry), abstractParquetRowGroupScan.supportsFileImplicitColumns(), drillFileSystem, rowGroupReadEntry.getRowGroupIndex(), rowGroupReadEntry.getStart(), rowGroupReadEntry.getLength());
        list2.add(populateColumns);
        if (populateColumns.size() > map.size()) {
            map = populateColumns;
        }
        return map;
    }

    protected abstract AbstractDrillFileSystemManager getDrillFileSystemCreator(OperatorContext operatorContext, OptionManager optionManager);

    private ParquetMetadata readFooter(Configuration configuration, Path path, ParquetReaderConfig parquetReaderConfig) throws IOException {
        ParquetFileReader open = ParquetFileReader.open(HadoopInputFile.fromPath(path, parquetReaderConfig.addCountersToConf(configuration)), parquetReaderConfig.toReadOptions());
        try {
            ParquetMetadata footer = open.getFooter();
            if (open != null) {
                open.close();
            }
            return footer;
        } catch (Throwable th) {
            if (open != null) {
                try {
                    open.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }
}
