package org.apache.spark.sql.execution.datasources.parquet;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.TimeZone;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.page.PageReadStore;
import org.apache.parquet.schema.Type;
import org.apache.spark.memory.MemoryMode;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.execution.vectorized.ColumnVectorUtils;
import org.apache.spark.sql.execution.vectorized.OffHeapColumnVector;
import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
import org.apache.spark.sql.execution.vectorized.WritableColumnVector;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.vectorized.ColumnarBatch;

/* loaded from: input_file:org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.class */
public class VectorizedParquetRecordReader extends SpecificParquetRecordReaderBase<Object> {
    private static final int CAPACITY = 4096;
    private int batchIdx;
    private int numBatched;
    private VectorizedColumnReader[] columnReaders;
    private long rowsReturned;
    private long totalCountLoadedSoFar;
    private boolean[] missingColumns;
    private TimeZone convertTz;
    private ColumnarBatch columnarBatch;
    private WritableColumnVector[] columnVectors;
    private boolean returnColumnarBatch;
    private final MemoryMode MEMORY_MODE;

    public VectorizedParquetRecordReader(TimeZone timeZone, boolean z) {
        this.batchIdx = 0;
        this.numBatched = 0;
        this.totalCountLoadedSoFar = 0L;
        this.convertTz = null;
        this.convertTz = timeZone;
        this.MEMORY_MODE = z ? MemoryMode.OFF_HEAP : MemoryMode.ON_HEAP;
    }

    public VectorizedParquetRecordReader(boolean z) {
        this(null, z);
    }

    @Override // org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase
    public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException, UnsupportedOperationException {
        super.initialize(inputSplit, taskAttemptContext);
        initializeInternal();
    }

    @Override // org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase
    public void initialize(String str, List<String> list) throws IOException, UnsupportedOperationException {
        super.initialize(str, list);
        initializeInternal();
    }

    @Override // org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase
    public void close() throws IOException {
        if (this.columnarBatch != null) {
            this.columnarBatch.close();
            this.columnarBatch = null;
        }
        super.close();
    }

    public boolean nextKeyValue() throws IOException {
        resultBatch();
        if (this.returnColumnarBatch) {
            return nextBatch();
        }
        if (this.batchIdx >= this.numBatched && !nextBatch()) {
            return false;
        }
        this.batchIdx++;
        return true;
    }

    public Object getCurrentValue() {
        return this.returnColumnarBatch ? this.columnarBatch : this.columnarBatch.getRow(this.batchIdx - 1);
    }

    public float getProgress() {
        return ((float) this.rowsReturned) / ((float) this.totalRowCount);
    }

    private void initBatch(MemoryMode memoryMode, StructType structType, InternalRow internalRow) {
        StructType structType2 = new StructType();
        for (StructField structField : this.sparkSchema.fields()) {
            structType2 = structType2.add(structField);
        }
        if (structType != null) {
            for (StructField structField2 : structType.fields()) {
                structType2 = structType2.add(structField2);
            }
        }
        if (memoryMode == MemoryMode.OFF_HEAP) {
            this.columnVectors = OffHeapColumnVector.allocateColumns(CAPACITY, structType2);
        } else {
            this.columnVectors = OnHeapColumnVector.allocateColumns(CAPACITY, structType2);
        }
        this.columnarBatch = new ColumnarBatch(this.columnVectors);
        if (structType != null) {
            int length = this.sparkSchema.fields().length;
            for (int i = 0; i < structType.fields().length; i++) {
                ColumnVectorUtils.populate(this.columnVectors[i + length], internalRow, i);
                this.columnVectors[i + length].setIsConstant();
            }
        }
        for (int i2 = 0; i2 < this.missingColumns.length; i2++) {
            if (this.missingColumns[i2]) {
                this.columnVectors[i2].putNulls(0, CAPACITY);
                this.columnVectors[i2].setIsConstant();
            }
        }
    }

    private void initBatch() {
        initBatch(this.MEMORY_MODE, null, null);
    }

    public void initBatch(StructType structType, InternalRow internalRow) {
        initBatch(this.MEMORY_MODE, structType, internalRow);
    }

    public ColumnarBatch resultBatch() {
        if (this.columnarBatch == null) {
            initBatch();
        }
        return this.columnarBatch;
    }

    public void enableReturningBatches() {
        this.returnColumnarBatch = true;
    }

    public boolean nextBatch() throws IOException {
        for (WritableColumnVector writableColumnVector : this.columnVectors) {
            writableColumnVector.reset();
        }
        this.columnarBatch.setNumRows(0);
        if (this.rowsReturned >= this.totalRowCount) {
            return false;
        }
        checkEndOfRowGroup();
        int min = (int) Math.min(4096L, this.totalCountLoadedSoFar - this.rowsReturned);
        for (int i = 0; i < this.columnReaders.length; i++) {
            if (this.columnReaders[i] != null) {
                this.columnReaders[i].readBatch(min, this.columnVectors[i]);
            }
        }
        this.rowsReturned += min;
        this.columnarBatch.setNumRows(min);
        this.numBatched = min;
        this.batchIdx = 0;
        return true;
    }

    private void initializeInternal() throws IOException, UnsupportedOperationException {
        this.missingColumns = new boolean[this.requestedSchema.getFieldCount()];
        for (int i = 0; i < this.requestedSchema.getFieldCount(); i++) {
            Type type = (Type) this.requestedSchema.getFields().get(i);
            if (!type.isPrimitive() || type.isRepetition(Type.Repetition.REPEATED)) {
                throw new UnsupportedOperationException("Complex types not supported.");
            }
            String[] strArr = (String[]) this.requestedSchema.getPaths().get(i);
            if (this.fileSchema.containsPath(strArr)) {
                if (!this.fileSchema.getColumnDescription(strArr).equals(this.requestedSchema.getColumns().get(i))) {
                    throw new UnsupportedOperationException("Schema evolution not supported.");
                }
                this.missingColumns[i] = false;
            } else {
                if (((ColumnDescriptor) this.requestedSchema.getColumns().get(i)).getMaxDefinitionLevel() == 0) {
                    throw new IOException("Required column is missing in data file. Col: " + Arrays.toString(strArr));
                }
                this.missingColumns[i] = true;
            }
        }
    }

    private void checkEndOfRowGroup() throws IOException {
        if (this.rowsReturned != this.totalCountLoadedSoFar) {
            return;
        }
        PageReadStore readNextRowGroup = this.reader.readNextRowGroup();
        if (readNextRowGroup == null) {
            throw new IOException("expecting more rows but reached last block. Read " + this.rowsReturned + " out of " + this.totalRowCount);
        }
        List columns = this.requestedSchema.getColumns();
        List fields = this.requestedSchema.asGroupType().getFields();
        this.columnReaders = new VectorizedColumnReader[columns.size()];
        for (int i = 0; i < columns.size(); i++) {
            if (!this.missingColumns[i]) {
                this.columnReaders[i] = new VectorizedColumnReader((ColumnDescriptor) columns.get(i), ((Type) fields.get(i)).getOriginalType(), readNextRowGroup.getPageReader((ColumnDescriptor) columns.get(i)), this.convertTz);
            }
        }
        this.totalCountLoadedSoFar += readNextRowGroup.getRowCount();
    }
}
