package org.apache.drill.exec.store.parquet;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.drill.common.expression.ErrorCollectorImpl;
import org.apache.drill.common.expression.ExpressionStringBuilder;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.expression.ValueExpressions;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.exec.compile.sig.ConstantExpressionIdentifier;
import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
import org.apache.drill.exec.expr.fn.FunctionImplementationRegistry;
import org.apache.drill.exec.expr.stat.ParquetFilterPredicate;
import org.apache.drill.exec.ops.UdfUtilities;
import org.apache.drill.exec.physical.EndpointAffinity;
import org.apache.drill.exec.physical.base.AbstractFileGroupScan;
import org.apache.drill.exec.physical.base.GroupScan;
import org.apache.drill.exec.physical.base.ScanStats;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.exec.proto.CoordinationProtos;
import org.apache.drill.exec.server.options.OptionManager;
import org.apache.drill.exec.store.ColumnExplorer;
import org.apache.drill.exec.store.dfs.FileSelection;
import org.apache.drill.exec.store.dfs.ReadEntryWithPath;
import org.apache.drill.exec.store.ischema.InfoSchemaConstants;
import org.apache.drill.exec.store.parquet.ParquetRGFilterEvaluator;
import org.apache.drill.exec.store.parquet.metadata.MetadataBase;
import org.apache.drill.exec.store.parquet.stat.ColumnStatistics;
import org.apache.drill.exec.store.parquet.stat.ParquetMetaStatCollector;
import org.apache.drill.exec.store.schedule.AffinityCreator;
import org.apache.drill.exec.store.schedule.AssignmentCreator;
import org.apache.drill.exec.store.schedule.EndpointByteMapImpl;
import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;
import org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap;
import org.apache.drill.shaded.guava.com.google.common.collect.ListMultimap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.class */
public abstract class AbstractParquetGroupScan extends AbstractFileGroupScan {
    private static final Logger logger;
    protected List<SchemaPath> columns;
    protected List<ReadEntryWithPath> entries;
    protected LogicalExpression filter;
    protected MetadataBase.ParquetTableMetadataBase parquetTableMetadata;
    protected List<RowGroupInfo> rowGroupInfos;
    protected ListMultimap<Integer, RowGroupInfo> mappings;
    protected Set<String> fileSet;
    protected ParquetReaderConfig readerConfig;
    private List<EndpointAffinity> endpointAffinities;
    private ParquetGroupScanStatistics parquetGroupScanStatistics;
    private boolean matchAllRowGroups;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* JADX INFO: Access modifiers changed from: protected */
    public AbstractParquetGroupScan(String str, List<SchemaPath> list, List<ReadEntryWithPath> list2, ParquetReaderConfig parquetReaderConfig, LogicalExpression logicalExpression) {
        super(str);
        this.matchAllRowGroups = false;
        this.columns = list;
        this.entries = list2;
        this.readerConfig = parquetReaderConfig == null ? ParquetReaderConfig.getDefaultInstance() : parquetReaderConfig;
        this.filter = logicalExpression;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public AbstractParquetGroupScan(AbstractParquetGroupScan abstractParquetGroupScan) {
        super(abstractParquetGroupScan);
        this.matchAllRowGroups = false;
        this.columns = abstractParquetGroupScan.columns == null ? null : new ArrayList(abstractParquetGroupScan.columns);
        this.parquetTableMetadata = abstractParquetGroupScan.parquetTableMetadata;
        this.rowGroupInfos = abstractParquetGroupScan.rowGroupInfos == null ? null : new ArrayList(abstractParquetGroupScan.rowGroupInfos);
        this.filter = abstractParquetGroupScan.filter;
        this.endpointAffinities = abstractParquetGroupScan.endpointAffinities == null ? null : new ArrayList(abstractParquetGroupScan.endpointAffinities);
        this.mappings = abstractParquetGroupScan.mappings == null ? null : ArrayListMultimap.create(abstractParquetGroupScan.mappings);
        this.parquetGroupScanStatistics = abstractParquetGroupScan.parquetGroupScanStatistics == null ? null : new ParquetGroupScanStatistics(abstractParquetGroupScan.parquetGroupScanStatistics);
        this.fileSet = abstractParquetGroupScan.fileSet == null ? null : new HashSet(abstractParquetGroupScan.fileSet);
        this.entries = abstractParquetGroupScan.entries == null ? null : new ArrayList(abstractParquetGroupScan.entries);
        this.readerConfig = abstractParquetGroupScan.readerConfig;
        this.matchAllRowGroups = abstractParquetGroupScan.matchAllRowGroups;
    }

    @JsonProperty
    public List<SchemaPath> getColumns() {
        return this.columns;
    }

    @JsonProperty
    public List<ReadEntryWithPath> getEntries() {
        return this.entries;
    }

    @JsonProperty("readerConfig")
    @JsonInclude(JsonInclude.Include.NON_NULL)
    public ParquetReaderConfig getReaderConfigForSerialization() {
        if (ParquetReaderConfig.getDefaultInstance().equals(this.readerConfig)) {
            return null;
        }
        return this.readerConfig;
    }

    @JsonIgnore
    public ParquetReaderConfig getReaderConfig() {
        return this.readerConfig;
    }

    @JsonIgnore
    public boolean isMatchAllRowGroups() {
        return this.matchAllRowGroups;
    }

    @Override // org.apache.drill.exec.physical.base.AbstractGroupScan, org.apache.drill.exec.physical.base.GroupScan
    @JsonIgnore
    public Collection<String> getFiles() {
        return this.fileSet;
    }

    @Override // org.apache.drill.exec.physical.base.AbstractGroupScan, org.apache.drill.exec.physical.base.GroupScan
    public boolean hasFiles() {
        return true;
    }

    @Override // org.apache.drill.exec.physical.base.AbstractGroupScan, org.apache.drill.exec.physical.base.GroupScan
    public boolean canPushdownProjects(List<SchemaPath> list) {
        return true;
    }

    @Override // org.apache.drill.exec.physical.base.AbstractGroupScan, org.apache.drill.exec.physical.base.GroupScan
    public long getColumnValueCount(SchemaPath schemaPath) {
        return this.parquetGroupScanStatistics.getColumnValueCount(schemaPath);
    }

    @Override // org.apache.drill.exec.physical.base.AbstractGroupScan, org.apache.drill.exec.physical.base.HasAffinity
    public List<EndpointAffinity> getOperatorAffinity() {
        return this.endpointAffinities;
    }

    @Override // org.apache.drill.exec.physical.base.GroupScan
    public void applyAssignments(List<CoordinationProtos.DrillbitEndpoint> list) {
        this.mappings = AssignmentCreator.getMappings(list, this.rowGroupInfos);
    }

    @Override // org.apache.drill.exec.physical.base.GroupScan
    public int getMaxParallelizationWidth() {
        return this.rowGroupInfos.size();
    }

    @Override // org.apache.drill.exec.physical.base.GroupScan
    public String getDigest() {
        return toString();
    }

    @Override // org.apache.drill.exec.physical.base.AbstractGroupScan
    public ScanStats getScanStats() {
        int size = this.columns == null ? 20 : this.columns.size();
        ScanStats scanStats = new ScanStats(ScanStats.GroupScanProperty.EXACT_ROW_COUNT, this.parquetGroupScanStatistics.getRowCount(), 1.0d, r0 * size);
        logger.trace("Drill parquet scan statistics: {}", scanStats);
        return scanStats;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public List<RowGroupReadEntry> getReadEntries(int i) {
        if (!$assertionsDisabled && i >= this.mappings.size()) {
            throw new AssertionError(String.format("Mappings length [%d] should be longer than minor fragment id [%d] but it isn't.", Integer.valueOf(this.mappings.size()), Integer.valueOf(i)));
        }
        List<RowGroupInfo> list = this.mappings.get(Integer.valueOf(i));
        Preconditions.checkArgument(!list.isEmpty(), String.format("MinorFragmentId %d has no read entries assigned", Integer.valueOf(i)));
        ArrayList arrayList = new ArrayList();
        for (RowGroupInfo rowGroupInfo : list) {
            arrayList.add(new RowGroupReadEntry(rowGroupInfo.getPath(), rowGroupInfo.getStart(), rowGroupInfo.getLength(), rowGroupInfo.getRowGroupIndex(), rowGroupInfo.getNumRecordsToRead()));
        }
        return arrayList;
    }

    @Override // org.apache.drill.exec.physical.base.AbstractGroupScan, org.apache.drill.exec.physical.base.GroupScan
    @JsonProperty
    public LogicalExpression getFilter() {
        return this.filter;
    }

    public void setFilter(LogicalExpression logicalExpression) {
        this.filter = logicalExpression;
    }

    @Override // org.apache.drill.exec.physical.base.AbstractGroupScan, org.apache.drill.exec.physical.base.GroupScan
    public AbstractParquetGroupScan applyFilter(LogicalExpression logicalExpression, UdfUtilities udfUtilities, FunctionImplementationRegistry functionImplementationRegistry, OptionManager optionManager) {
        if (!this.parquetTableMetadata.isRowGroupPrunable() || this.rowGroupInfos.size() > optionManager.getOption(PlannerSettings.PARQUET_ROWGROUP_FILTER_PUSHDOWN_PLANNING_THRESHOLD)) {
            return null;
        }
        Set<SchemaPath> set = (Set) logicalExpression.accept(new ParquetRGFilterEvaluator.FieldReferenceFinder(), (Object) null);
        ArrayList arrayList = new ArrayList(this.rowGroupInfos.size());
        ParquetFilterPredicate parquetFilterPredicate = getParquetFilterPredicate(logicalExpression, udfUtilities, functionImplementationRegistry, optionManager, true);
        if (parquetFilterPredicate == null) {
            return null;
        }
        boolean z = true;
        for (RowGroupInfo rowGroupInfo : this.rowGroupInfos) {
            ParquetFilterPredicate.RowsMatch matches = ParquetRGFilterEvaluator.matches(parquetFilterPredicate, new ParquetMetaStatCollector(this.parquetTableMetadata, rowGroupInfo.getColumns(), new ColumnExplorer(optionManager, this.columns).populateImplicitColumns(rowGroupInfo.getPath(), getPartitionValues(rowGroupInfo), supportsFileImplicitColumns())).collectColStat(set), rowGroupInfo.getRowCount(), this.parquetTableMetadata, rowGroupInfo.getColumns(), set);
            if (matches != ParquetFilterPredicate.RowsMatch.NONE) {
                if (z) {
                    z = matches == ParquetFilterPredicate.RowsMatch.ALL;
                }
                arrayList.add(rowGroupInfo);
            }
        }
        if (arrayList.size() == this.rowGroupInfos.size()) {
            logger.debug("applyFilter() does not have any pruning!");
            this.matchAllRowGroups = z;
            return null;
        }
        if (arrayList.size() == 0) {
            if (this.rowGroupInfos.size() == 1) {
                return null;
            }
            z = false;
            logger.debug("All row groups have been filtered out. Add back one to get schema from scanner.");
            arrayList.add(this.rowGroupInfos.iterator().next());
        }
        logger.debug("applyFilter {} reduce parquet rowgroup # from {} to {}", new Object[]{ExpressionStringBuilder.toString(logicalExpression), Integer.valueOf(this.rowGroupInfos.size()), Integer.valueOf(arrayList.size())});
        try {
            AbstractParquetGroupScan cloneWithRowGroupInfos = cloneWithRowGroupInfos(arrayList);
            cloneWithRowGroupInfos.matchAllRowGroups = z;
            return cloneWithRowGroupInfos;
        } catch (IOException e) {
            logger.warn("Could not apply filter prune due to Exception : {}", e);
            return null;
        }
    }

    public ParquetFilterPredicate getParquetFilterPredicate(LogicalExpression logicalExpression, UdfUtilities udfUtilities, FunctionImplementationRegistry functionImplementationRegistry, OptionManager optionManager, boolean z) {
        if (!$assertionsDisabled && this.rowGroupInfos.size() <= 0) {
            throw new AssertionError("row groups count cannot be 0");
        }
        RowGroupInfo next = this.rowGroupInfos.iterator().next();
        Map<SchemaPath, ColumnStatistics> collectColStat = new ParquetMetaStatCollector(this.parquetTableMetadata, next.getColumns(), new ColumnExplorer(optionManager, this.columns).populateImplicitColumns(next.getPath(), getPartitionValues(next), supportsFileImplicitColumns())).collectColStat((Set) logicalExpression.accept(new ParquetRGFilterEvaluator.FieldReferenceFinder(), (Object) null));
        ErrorCollectorImpl errorCollectorImpl = new ErrorCollectorImpl();
        LogicalExpression materializeFilterExpr = ExpressionTreeMaterializer.materializeFilterExpr(logicalExpression, collectColStat, errorCollectorImpl, functionImplementationRegistry);
        if (errorCollectorImpl.hasErrors()) {
            logger.error("{} error(s) encountered when materialize filter expression : {}", Integer.valueOf(errorCollectorImpl.getErrorCount()), errorCollectorImpl.toErrorString());
            return null;
        }
        logger.debug("materializedFilter : {}", ExpressionStringBuilder.toString(materializeFilterExpr));
        return ParquetFilterBuilder.buildParquetFilterPredicate(materializeFilterExpr, ConstantExpressionIdentifier.getConstantExpressionSet(materializeFilterExpr), udfUtilities, z);
    }

    @Override // org.apache.drill.exec.physical.base.AbstractGroupScan, org.apache.drill.exec.physical.base.GroupScan
    public boolean supportsLimitPushdown() {
        return true;
    }

    @Override // org.apache.drill.exec.physical.base.AbstractGroupScan, org.apache.drill.exec.physical.base.GroupScan
    public GroupScan applyLimit(int i) {
        int max = Math.max(i, 1);
        if (this.parquetGroupScanStatistics.getRowCount() <= max) {
            logger.debug("limit push down does not apply, since total number of rows [{}] is less or equal to the required [{}].", Long.valueOf(this.parquetGroupScanStatistics.getRowCount()), Integer.valueOf(max));
            return null;
        }
        ArrayList arrayList = new ArrayList(this.rowGroupInfos.size());
        int i2 = 0;
        Iterator<RowGroupInfo> it = this.rowGroupInfos.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            RowGroupInfo next = it.next();
            long rowCount = next.getRowCount();
            if (i2 + rowCount <= max) {
                i2 = (int) (i2 + rowCount);
                next.setNumRecordsToRead(rowCount);
                arrayList.add(next);
            } else if (i2 < max) {
                next.setNumRecordsToRead(max - i2);
                arrayList.add(next);
            }
        }
        if (this.rowGroupInfos.size() == arrayList.size()) {
            logger.debug("limit push down does not apply, since number of row groups was not reduced.");
            return null;
        }
        logger.debug("applyLimit() reduce parquet row groups # from {} to {}.", Integer.valueOf(this.rowGroupInfos.size()), Integer.valueOf(arrayList.size()));
        try {
            return cloneWithRowGroupInfos(arrayList);
        } catch (IOException e) {
            logger.warn("Could not apply row count based prune due to Exception: {}", e);
            return null;
        }
    }

    @Override // org.apache.drill.exec.physical.base.AbstractGroupScan, org.apache.drill.exec.physical.base.GroupScan
    public List<SchemaPath> getPartitionColumns() {
        return this.parquetGroupScanStatistics.getPartitionColumns();
    }

    @JsonIgnore
    public TypeProtos.MajorType getTypeForColumn(SchemaPath schemaPath) {
        return this.parquetGroupScanStatistics.getTypeForColumn(schemaPath);
    }

    @JsonIgnore
    public <T> T getPartitionValue(String str, SchemaPath schemaPath, Class<T> cls) {
        return cls.cast(this.parquetGroupScanStatistics.getPartitionValue(str, schemaPath));
    }

    @JsonIgnore
    public Set<String> getFileSet() {
        return this.fileSet;
    }

    @Override // org.apache.drill.exec.physical.base.AbstractFileGroupScan, org.apache.drill.exec.physical.base.FileGroupScan
    public void modifyFileSelection(FileSelection fileSelection) {
        List<String> files = fileSelection.getFiles();
        this.fileSet = new HashSet(files);
        this.entries = new ArrayList(files.size());
        this.entries.addAll((Collection) files.stream().map(ReadEntryWithPath::new).collect(Collectors.toList()));
        this.rowGroupInfos = (List) this.rowGroupInfos.stream().filter(rowGroupInfo -> {
            return this.fileSet.contains(rowGroupInfo.getPath());
        }).collect(Collectors.toList());
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void init() throws IOException {
        initInternal();
        if (!$assertionsDisabled && this.parquetTableMetadata == null) {
            throw new AssertionError();
        }
        if (this.fileSet == null) {
            this.fileSet = new HashSet();
            this.fileSet.addAll((Collection) this.parquetTableMetadata.getFiles().stream().map((v0) -> {
                return v0.getPath();
            }).collect(Collectors.toSet()));
        }
        HashMap hashMap = new HashMap();
        for (CoordinationProtos.DrillbitEndpoint drillbitEndpoint : getDrillbits()) {
            hashMap.put(drillbitEndpoint.getAddress(), drillbitEndpoint);
        }
        this.rowGroupInfos = new ArrayList();
        for (MetadataBase.ParquetFileMetadata parquetFileMetadata : this.parquetTableMetadata.getFiles()) {
            int i = 0;
            for (MetadataBase.RowGroupMetadata rowGroupMetadata : parquetFileMetadata.getRowGroups()) {
                RowGroupInfo rowGroupInfo = new RowGroupInfo(parquetFileMetadata.getPath(), rowGroupMetadata.getStart().longValue(), rowGroupMetadata.getLength().longValue(), i, rowGroupMetadata.getRowCount().longValue());
                EndpointByteMapImpl endpointByteMapImpl = new EndpointByteMapImpl();
                Stream<String> stream = rowGroupMetadata.getHostAffinity().keySet().stream();
                hashMap.getClass();
                stream.filter((v1) -> {
                    return r1.containsKey(v1);
                }).forEach(str -> {
                    endpointByteMapImpl.add((CoordinationProtos.DrillbitEndpoint) hashMap.get(str), rowGroupMetadata.getHostAffinity().get(str).floatValue() * ((float) rowGroupMetadata.getLength().longValue()));
                });
                rowGroupInfo.setEndpointByteMap(endpointByteMapImpl);
                rowGroupInfo.setColumns(rowGroupMetadata.getColumns());
                i++;
                this.rowGroupInfos.add(rowGroupInfo);
            }
        }
        this.endpointAffinities = AffinityCreator.getAffinityMap(this.rowGroupInfos);
        this.parquetGroupScanStatistics = new ParquetGroupScanStatistics(this.rowGroupInfos, this.parquetTableMetadata);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getFilterString() {
        return (this.filter == null || this.filter.equals(ValueExpressions.BooleanExpression.TRUE)) ? InfoSchemaConstants.IS_CATALOG_CONNECT : ExpressionStringBuilder.toString(this.filter);
    }

    protected abstract void initInternal() throws IOException;

    protected abstract Collection<CoordinationProtos.DrillbitEndpoint> getDrillbits();

    protected abstract AbstractParquetGroupScan cloneWithFileSelection(Collection<String> collection) throws IOException;

    protected abstract boolean supportsFileImplicitColumns();

    protected abstract List<String> getPartitionValues(RowGroupInfo rowGroupInfo);

    private AbstractParquetGroupScan cloneWithRowGroupInfos(List<RowGroupInfo> list) throws IOException {
        AbstractParquetGroupScan cloneWithFileSelection = cloneWithFileSelection((Set) list.stream().map((v0) -> {
            return v0.getPath();
        }).collect(Collectors.toSet()));
        cloneWithFileSelection.rowGroupInfos = list;
        cloneWithFileSelection.parquetGroupScanStatistics.collect(cloneWithFileSelection.rowGroupInfos, cloneWithFileSelection.parquetTableMetadata);
        cloneWithFileSelection.endpointAffinities = AffinityCreator.getAffinityMap(cloneWithFileSelection.rowGroupInfos);
        return cloneWithFileSelection;
    }

    static {
        $assertionsDisabled = !AbstractParquetGroupScan.class.desiredAssertionStatus();
        logger = LoggerFactory.getLogger(AbstractParquetGroupScan.class);
    }
}
