package org.apache.drill.exec.store.pdf;

import com.google.common.base.Strings;
import java.io.InputStream;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.GregorianCalendar;
import java.util.Iterator;
import java.util.List;
import org.apache.drill.common.AutoCloseables;
import org.apache.drill.common.exceptions.CustomErrorContext;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.exec.physical.impl.scan.file.FileScanFramework;
import org.apache.drill.exec.physical.impl.scan.framework.ManagedReader;
import org.apache.drill.exec.physical.resultSet.RowSetLoader;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.vector.accessor.ScalarWriter;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import technology.tabula.RectangularTextContainer;
import technology.tabula.Table;

/* loaded from: input_file:org/apache/drill/exec/store/pdf/PdfBatchReader.class */
public class PdfBatchReader implements ManagedReader<FileScanFramework.FileSchemaNegotiator> {
    private static final Logger logger = LoggerFactory.getLogger(PdfBatchReader.class);
    private static final String NEW_FIELD_PREFIX = "field_";
    private final int maxRecords;
    private final PdfReaderConfig config;
    private final int startingTableIndex;
    private PdfMetadataReader metadataReader;
    private FileSplit split;
    private CustomErrorContext errorContext;
    private RowSetLoader rowWriter;
    private PDDocument document;
    private SchemaBuilder builder;
    private List<String> columnHeaders;
    private Table currentTable;
    private int currentTableIndex;
    private List<String> firstRow;
    private PdfRowIterator rowIterator;
    private FileScanFramework.FileSchemaNegotiator negotiator;
    private List<Table> tables;
    private int unregisteredColumnCount = 0;
    private final List<PdfColumnWriter> writers = new ArrayList();

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: org.apache.drill.exec.store.pdf.PdfBatchReader$1, reason: invalid class name */
    /* loaded from: input_file:org/apache/drill/exec/store/pdf/PdfBatchReader$1.class */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$apache$drill$common$types$TypeProtos$MinorType = new int[TypeProtos.MinorType.values().length];

        static {
            try {
                $SwitchMap$org$apache$drill$common$types$TypeProtos$MinorType[TypeProtos.MinorType.VARCHAR.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$apache$drill$common$types$TypeProtos$MinorType[TypeProtos.MinorType.SMALLINT.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$apache$drill$common$types$TypeProtos$MinorType[TypeProtos.MinorType.TINYINT.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$org$apache$drill$common$types$TypeProtos$MinorType[TypeProtos.MinorType.INT.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
            try {
                $SwitchMap$org$apache$drill$common$types$TypeProtos$MinorType[TypeProtos.MinorType.BIGINT.ordinal()] = 5;
            } catch (NoSuchFieldError e5) {
            }
            try {
                $SwitchMap$org$apache$drill$common$types$TypeProtos$MinorType[TypeProtos.MinorType.FLOAT4.ordinal()] = 6;
            } catch (NoSuchFieldError e6) {
            }
            try {
                $SwitchMap$org$apache$drill$common$types$TypeProtos$MinorType[TypeProtos.MinorType.FLOAT8.ordinal()] = 7;
            } catch (NoSuchFieldError e7) {
            }
            try {
                $SwitchMap$org$apache$drill$common$types$TypeProtos$MinorType[TypeProtos.MinorType.DATE.ordinal()] = 8;
            } catch (NoSuchFieldError e8) {
            }
            try {
                $SwitchMap$org$apache$drill$common$types$TypeProtos$MinorType[TypeProtos.MinorType.TIME.ordinal()] = 9;
            } catch (NoSuchFieldError e9) {
            }
            try {
                $SwitchMap$org$apache$drill$common$types$TypeProtos$MinorType[TypeProtos.MinorType.TIMESTAMP.ordinal()] = 10;
            } catch (NoSuchFieldError e10) {
            }
        }
    }

    /* loaded from: input_file:org/apache/drill/exec/store/pdf/PdfBatchReader$BigIntPdfColumnWriter.class */
    public static class BigIntPdfColumnWriter extends PdfColumnWriter {
        BigIntPdfColumnWriter(int i, String str, RowSetLoader rowSetLoader) {
            super(i, str, rowSetLoader.scalar(str));
        }

        @Override // org.apache.drill.exec.store.pdf.PdfBatchReader.PdfColumnWriter
        public void load(RectangularTextContainer<?> rectangularTextContainer) {
            this.writer.setLong(Long.parseLong(rectangularTextContainer.getText()));
        }

        @Override // org.apache.drill.exec.store.pdf.PdfBatchReader.PdfColumnWriter
        public void loadFromValue(Object obj) {
            this.writer.setLong(((Long) obj).longValue());
        }
    }

    /* loaded from: input_file:org/apache/drill/exec/store/pdf/PdfBatchReader$DatePdfColumnWriter.class */
    public static class DatePdfColumnWriter extends PdfColumnWriter {
        private String dateFormat;

        DatePdfColumnWriter(int i, String str, RowSetLoader rowSetLoader, FileScanFramework.FileSchemaNegotiator fileSchemaNegotiator) {
            super(i, str, rowSetLoader.scalar(str));
            ColumnMetadata metadata = fileSchemaNegotiator.providedSchema().metadata(str);
            if (metadata != null) {
                this.dateFormat = metadata.property("drill.format");
            }
        }

        @Override // org.apache.drill.exec.store.pdf.PdfBatchReader.PdfColumnWriter
        public void load(RectangularTextContainer<?> rectangularTextContainer) {
            this.writer.setDate(Strings.isNullOrEmpty(this.dateFormat) ? LocalDate.parse(rectangularTextContainer.getText()) : LocalDate.parse(rectangularTextContainer.getText(), DateTimeFormatter.ofPattern(this.dateFormat)));
        }

        @Override // org.apache.drill.exec.store.pdf.PdfBatchReader.PdfColumnWriter
        public void loadFromValue(Object obj) {
            if (obj != null) {
                this.writer.setDate(LocalDate.parse((String) obj));
            }
        }
    }

    /* loaded from: input_file:org/apache/drill/exec/store/pdf/PdfBatchReader$DoublePdfColumnWriter.class */
    public static class DoublePdfColumnWriter extends PdfColumnWriter {
        DoublePdfColumnWriter(int i, String str, RowSetLoader rowSetLoader) {
            super(i, str, rowSetLoader.scalar(str));
        }

        @Override // org.apache.drill.exec.store.pdf.PdfBatchReader.PdfColumnWriter
        public void load(RectangularTextContainer<?> rectangularTextContainer) {
            this.writer.setDouble(Double.parseDouble(rectangularTextContainer.getText()));
        }

        @Override // org.apache.drill.exec.store.pdf.PdfBatchReader.PdfColumnWriter
        public void loadFromValue(Object obj) {
            this.writer.setDouble(((Double) obj).doubleValue());
        }
    }

    /* loaded from: input_file:org/apache/drill/exec/store/pdf/PdfBatchReader$IntPdfColumnWriter.class */
    public static class IntPdfColumnWriter extends PdfColumnWriter {
        /* JADX INFO: Access modifiers changed from: package-private */
        public IntPdfColumnWriter(int i, String str, RowSetLoader rowSetLoader) {
            super(i, str, rowSetLoader.scalar(str));
        }

        @Override // org.apache.drill.exec.store.pdf.PdfBatchReader.PdfColumnWriter
        public void load(RectangularTextContainer<?> rectangularTextContainer) {
            this.writer.setInt(Integer.parseInt(rectangularTextContainer.getText()));
        }

        @Override // org.apache.drill.exec.store.pdf.PdfBatchReader.PdfColumnWriter
        public void loadFromValue(Object obj) {
            this.writer.setInt(((Integer) obj).intValue());
        }
    }

    /* loaded from: input_file:org/apache/drill/exec/store/pdf/PdfBatchReader$PdfColumnWriter.class */
    public static abstract class PdfColumnWriter {
        final String columnName;
        final ScalarWriter writer;
        final int columnIndex;

        public PdfColumnWriter(int i, String str, ScalarWriter scalarWriter) {
            this.columnIndex = i;
            this.columnName = str;
            this.writer = scalarWriter;
        }

        public abstract void load(RectangularTextContainer<?> rectangularTextContainer);

        public abstract void loadFromValue(Object obj);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/drill/exec/store/pdf/PdfBatchReader$PdfReaderConfig.class */
    public static class PdfReaderConfig {
        final PdfFormatPlugin plugin;

        /* JADX INFO: Access modifiers changed from: package-private */
        public PdfReaderConfig(PdfFormatPlugin pdfFormatPlugin) {
            this.plugin = pdfFormatPlugin;
        }
    }

    /* loaded from: input_file:org/apache/drill/exec/store/pdf/PdfBatchReader$StringPdfColumnWriter.class */
    public static class StringPdfColumnWriter extends PdfColumnWriter {
        /* JADX INFO: Access modifiers changed from: package-private */
        public StringPdfColumnWriter(int i, String str, RowSetLoader rowSetLoader) {
            super(i, str, rowSetLoader.scalar(str));
        }

        @Override // org.apache.drill.exec.store.pdf.PdfBatchReader.PdfColumnWriter
        public void load(RectangularTextContainer<?> rectangularTextContainer) {
            this.writer.setString(rectangularTextContainer.getText());
        }

        @Override // org.apache.drill.exec.store.pdf.PdfBatchReader.PdfColumnWriter
        public void loadFromValue(Object obj) {
            if (Strings.isNullOrEmpty((String) obj)) {
                return;
            }
            this.writer.setString((String) obj);
        }
    }

    /* loaded from: input_file:org/apache/drill/exec/store/pdf/PdfBatchReader$TimePdfColumnWriter.class */
    public static class TimePdfColumnWriter extends PdfColumnWriter {
        private String dateFormat;

        TimePdfColumnWriter(int i, String str, RowSetLoader rowSetLoader, FileScanFramework.FileSchemaNegotiator fileSchemaNegotiator) {
            super(i, str, rowSetLoader.scalar(str));
            ColumnMetadata metadata = fileSchemaNegotiator.providedSchema().metadata(str);
            if (metadata != null) {
                this.dateFormat = metadata.property("drill.format");
            }
        }

        @Override // org.apache.drill.exec.store.pdf.PdfBatchReader.PdfColumnWriter
        public void load(RectangularTextContainer<?> rectangularTextContainer) {
            this.writer.setTime(Strings.isNullOrEmpty(this.dateFormat) ? LocalTime.parse(rectangularTextContainer.getText()) : LocalTime.parse(rectangularTextContainer.getText(), DateTimeFormatter.ofPattern(this.dateFormat)));
        }

        @Override // org.apache.drill.exec.store.pdf.PdfBatchReader.PdfColumnWriter
        public void loadFromValue(Object obj) {
            if (obj != null) {
                this.writer.setTime(LocalTime.parse((String) obj));
            }
        }
    }

    /* loaded from: input_file:org/apache/drill/exec/store/pdf/PdfBatchReader$TimestampPdfColumnWriter.class */
    public static class TimestampPdfColumnWriter extends PdfColumnWriter {
        private String dateFormat;

        /* JADX INFO: Access modifiers changed from: package-private */
        public TimestampPdfColumnWriter(int i, String str, RowSetLoader rowSetLoader) {
            super(i, str, rowSetLoader.scalar(str));
        }

        TimestampPdfColumnWriter(int i, String str, RowSetLoader rowSetLoader, FileScanFramework.FileSchemaNegotiator fileSchemaNegotiator) {
            super(i, str, rowSetLoader.scalar(str));
            ColumnMetadata metadata = fileSchemaNegotiator.providedSchema().metadata(str);
            if (metadata != null) {
                this.dateFormat = metadata.property("drill.format");
            }
        }

        @Override // org.apache.drill.exec.store.pdf.PdfBatchReader.PdfColumnWriter
        public void load(RectangularTextContainer<?> rectangularTextContainer) {
            Instant instant = null;
            if (Strings.isNullOrEmpty(this.dateFormat)) {
                instant = Instant.parse(rectangularTextContainer.getText());
            } else {
                try {
                    instant = Instant.ofEpochMilli(new SimpleDateFormat(this.dateFormat).parse(rectangularTextContainer.getText()).getTime());
                } catch (ParseException e) {
                    PdfBatchReader.logger.error("Error parsing timestamp: " + e.getMessage());
                }
            }
            this.writer.setTimestamp(instant);
        }

        @Override // org.apache.drill.exec.store.pdf.PdfBatchReader.PdfColumnWriter
        public void loadFromValue(Object obj) {
            if (obj != null) {
                this.writer.setTimestamp(((GregorianCalendar) obj).getTime().toInstant());
            }
        }
    }

    public PdfBatchReader(PdfReaderConfig pdfReaderConfig, int i) {
        this.maxRecords = i;
        this.config = pdfReaderConfig;
        this.startingTableIndex = ((PdfFormatConfig) pdfReaderConfig.plugin.getConfig()).defaultTableIndex() < 0 ? 0 : ((PdfFormatConfig) pdfReaderConfig.plugin.getConfig()).defaultTableIndex();
        this.currentTableIndex = this.startingTableIndex;
        this.columnHeaders = new ArrayList();
    }

    public boolean open(FileScanFramework.FileSchemaNegotiator fileSchemaNegotiator) {
        this.negotiator = fileSchemaNegotiator;
        this.split = fileSchemaNegotiator.split();
        this.errorContext = fileSchemaNegotiator.parentErrorContext();
        this.builder = new SchemaBuilder();
        openFile();
        this.metadataReader = new PdfMetadataReader(this.document);
        if (((PdfFormatConfig) this.config.plugin.getConfig()).combinePages()) {
            this.tables = PdfUtils.extractTablesFromPDF(this.document, ((PdfFormatConfig) this.config.plugin.getConfig()).getAlgorithm());
            this.currentTable = this.tables.get(0);
        } else {
            this.currentTable = PdfUtils.getSpecificTable(this.document, this.startingTableIndex, ((PdfFormatConfig) this.config.plugin.getConfig()).getAlgorithm());
            this.tables = Collections.singletonList(this.currentTable);
            if (this.currentTable == null && this.startingTableIndex != 0) {
                throw UserException.dataReadError().message("The specified table index " + this.startingTableIndex + " does not exist in this file. ", new Object[0]).addContext(this.errorContext).build(logger);
            }
        }
        this.rowIterator = new PdfRowIterator(this.currentTable);
        if (this.rowIterator.hasNext()) {
            this.firstRow = PdfUtils.convertRowToStringArray(this.rowIterator.next());
        }
        TupleMetadata tupleMetadata = null;
        if (fileSchemaNegotiator.hasProvidedSchema()) {
            tupleMetadata = fileSchemaNegotiator.providedSchema();
            fileSchemaNegotiator.tableSchema(tupleMetadata, false);
        } else {
            fileSchemaNegotiator.tableSchema(buildSchema(), false);
        }
        this.rowWriter = fileSchemaNegotiator.build().writer();
        this.metadataReader.setRowWriter(this.rowWriter);
        if (fileSchemaNegotiator.hasProvidedSchema()) {
            buildWriterListFromProvidedSchema(tupleMetadata);
        } else {
            buildWriterList();
        }
        this.metadataReader.addImplicitColumnsToSchema();
        return true;
    }

    public boolean next() {
        while (!this.rowWriter.isFull()) {
            if (this.rowWriter.limitReached(this.maxRecords)) {
                return false;
            }
            if (((PdfFormatConfig) this.config.plugin.getConfig()).combinePages() && !this.rowIterator.hasNext() && this.currentTableIndex < this.tables.size() - 1) {
                this.currentTableIndex++;
                this.currentTable = this.tables.get(this.currentTableIndex);
                this.rowIterator = new PdfRowIterator(this.currentTable);
                if (((PdfFormatConfig) this.config.plugin.getConfig()).extractHeaders()) {
                    this.rowIterator.next();
                }
            } else if (!this.rowIterator.hasNext()) {
                if (this.currentTable != null) {
                    return false;
                }
                this.rowWriter.start();
                this.metadataReader.writeMetadata();
                this.rowWriter.save();
                return false;
            }
            processRow(this.rowIterator.next());
        }
        return true;
    }

    private void processRow(List<RectangularTextContainer> list) {
        if (list == null || list.size() == 0) {
            this.rowWriter.start();
            this.metadataReader.writeMetadata();
            this.rowWriter.save();
            return;
        }
        this.rowWriter.start();
        int i = 0;
        Iterator<RectangularTextContainer> it = list.iterator();
        while (it.hasNext()) {
            if (!Strings.isNullOrEmpty(it.next().getText())) {
                this.writers.get(i).load(list.get(i));
            }
            i++;
        }
        this.metadataReader.writeMetadata();
        this.rowWriter.save();
    }

    public void close() {
        if (this.document != null) {
            AutoCloseables.closeSilently(new AutoCloseable[]{this.document.getDocument()});
            AutoCloseables.closeSilently(new AutoCloseable[]{this.document});
            this.document = null;
        }
    }

    private void openFile() {
        try {
            InputStream openPossiblyCompressedStream = this.negotiator.fileSystem().openPossiblyCompressedStream(this.split.getPath());
            if (Strings.isNullOrEmpty(((PdfFormatConfig) this.config.plugin.getConfig()).password())) {
                this.document = PDDocument.load(openPossiblyCompressedStream);
            } else {
                this.document = PDDocument.load(openPossiblyCompressedStream, ((PdfFormatConfig) this.config.plugin.getConfig()).password());
            }
            AutoCloseables.closeSilently(new AutoCloseable[]{openPossiblyCompressedStream});
        } catch (Exception e) {
            throw UserException.dataReadError(e).addContext("Failed to open open input file: %s", this.split.getPath().toString()).addContext(this.errorContext).build(logger);
        }
    }

    private TupleMetadata buildSchema() {
        this.columnHeaders = this.firstRow;
        if (this.columnHeaders == null) {
            return this.builder.buildSchema();
        }
        int i = 0;
        for (String str : this.firstRow) {
            if (Strings.isNullOrEmpty(str) || !((PdfFormatConfig) this.config.plugin.getConfig()).extractHeaders()) {
                str = NEW_FIELD_PREFIX + this.unregisteredColumnCount;
                this.columnHeaders.set(i, str);
                this.unregisteredColumnCount++;
            }
            this.builder.addNullable(str, TypeProtos.MinorType.VARCHAR);
            i++;
        }
        return this.builder.buildSchema();
    }

    private void buildWriterList() {
        if (this.columnHeaders == null) {
            return;
        }
        for (String str : this.columnHeaders) {
            this.writers.add(new StringPdfColumnWriter(this.columnHeaders.indexOf(str), str, this.rowWriter));
        }
    }

    private void buildWriterListFromProvidedSchema(TupleMetadata tupleMetadata) {
        if (tupleMetadata == null) {
            buildWriterList();
            return;
        }
        for (MaterializedField materializedField : tupleMetadata.toFieldList()) {
            String name = materializedField.getName();
            TypeProtos.MinorType minorType = materializedField.getType().getMinorType();
            this.columnHeaders.add(name);
            switch (AnonymousClass1.$SwitchMap$org$apache$drill$common$types$TypeProtos$MinorType[minorType.ordinal()]) {
                case 1:
                    this.writers.add(new StringPdfColumnWriter(0, name, this.rowWriter));
                    break;
                case 2:
                case 3:
                case 4:
                    this.writers.add(new IntPdfColumnWriter(0, name, this.rowWriter));
                    break;
                case 5:
                    this.writers.add(new BigIntPdfColumnWriter(0, name, this.rowWriter));
                    break;
                case 6:
                case 7:
                    this.writers.add(new DoublePdfColumnWriter(0, name, this.rowWriter));
                    break;
                case 8:
                    this.writers.add(new DatePdfColumnWriter(0, name, this.rowWriter, this.negotiator));
                    break;
                case 9:
                    this.writers.add(new TimePdfColumnWriter(0, name, this.rowWriter, this.negotiator));
                    break;
                case 10:
                    this.writers.add(new TimestampPdfColumnWriter(0, name, this.rowWriter, this.negotiator));
                    break;
                default:
                    throw UserException.unsupportedError().message("PDF Reader with provided schema does not support " + minorType.name() + " data type.", new Object[0]).addContext(this.errorContext).build(logger);
            }
        }
    }
}
