/*
 * Decompiled with CFR 0.152.
 */
package org.apache.drill.exec.store.pdf;

import java.nio.file.Paths;
import java.time.LocalDate;
import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.exec.physical.rowSet.DirectRowSet;
import org.apache.drill.exec.physical.rowSet.RowSet;
import org.apache.drill.exec.physical.rowSet.RowSetBuilder;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.rpc.RpcException;
import org.apache.drill.test.BaseDirTestWatcher;
import org.apache.drill.test.ClusterFixture;
import org.apache.drill.test.ClusterFixtureBuilder;
import org.apache.drill.test.ClusterTest;
import org.apache.drill.test.QueryBuilder;
import org.apache.drill.test.QueryTestUtil;
import org.apache.drill.test.rowSet.RowSetComparison;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;

@Category(value={RowSetTests.class})
public class TestPdfFormat
extends ClusterTest {
    @BeforeClass
    public static void setup() throws Exception {
        ClusterTest.startCluster((ClusterFixtureBuilder)ClusterFixture.builder((BaseDirTestWatcher)dirTestWatcher));
        dirTestWatcher.copyResourceToRoot(Paths.get("pdf/", new String[0]));
    }

    @Test
    public void testStarQuery() throws RpcException {
        String sql = "SELECT * FROM cp.`pdf/argentina_diputados_voting_record.pdf` WHERE `Provincia` = 'Rio Negro'";
        QueryBuilder q = client.queryBuilder().sql(sql);
        DirectRowSet results = q.rowSet();
        TupleMetadata expectedSchema = new SchemaBuilder().addNullable("Apellido y Nombre", TypeProtos.MinorType.VARCHAR).addNullable("Bloque pol\u00edtico", TypeProtos.MinorType.VARCHAR).addNullable("Provincia", TypeProtos.MinorType.VARCHAR).addNullable("field_0", TypeProtos.MinorType.VARCHAR).buildSchema();
        RowSet.SingleRowSet expected = new RowSetBuilder(client.allocator(), expectedSchema).addRow(new Object[]{"ALBRIEU, Oscar Edmundo Nicolas", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}).addRow(new Object[]{"AVOSCAN, Herman Horacio", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}).addRow(new Object[]{"CEJAS, Jorge Alberto", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}).build();
        new RowSetComparison((RowSet)expected).verifyAndClearAll((RowSet)results);
    }

    @Test
    public void testExplicitQuery() throws RpcException {
        String sql = "SELECT `Apellido y Nombre`, `Bloque pol\u00edtico`, `Provincia`, `field_0` FROM cp.`pdf/argentina_diputados_voting_record.pdf` WHERE `Provincia` = 'Rio Negro'";
        QueryBuilder q = client.queryBuilder().sql(sql);
        DirectRowSet results = q.rowSet();
        TupleMetadata expectedSchema = new SchemaBuilder().addNullable("Apellido y Nombre", TypeProtos.MinorType.VARCHAR).addNullable("Bloque pol\u00edtico", TypeProtos.MinorType.VARCHAR).addNullable("Provincia", TypeProtos.MinorType.VARCHAR).addNullable("field_0", TypeProtos.MinorType.VARCHAR).buildSchema();
        RowSet.SingleRowSet expected = new RowSetBuilder(client.allocator(), expectedSchema).addRow(new Object[]{"ALBRIEU, Oscar Edmundo Nicolas", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}).addRow(new Object[]{"AVOSCAN, Herman Horacio", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}).addRow(new Object[]{"CEJAS, Jorge Alberto", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}).build();
        new RowSetComparison((RowSet)expected).verifyAndClearAll((RowSet)results);
    }

    @Test
    public void testFullScan() throws Exception {
        String sql = "SELECT * FROM table(cp.`pdf/argentina_diputados_voting_record.pdf` (type => 'pdf', combinePages => false, extractHeaders => false))";
        DirectRowSet results = client.queryBuilder().sql(sql).rowSet();
        Assert.assertEquals((long)31L, (long)results.rowCount());
        results.clear();
        sql = "SELECT * FROM table(cp.`pdf/argentina_diputados_voting_record.pdf` (type => 'pdf', combinePages => false, extractHeaders => true))";
        results = client.queryBuilder().sql(sql).rowSet();
        Assert.assertEquals((long)31L, (long)results.rowCount());
        results.clear();
    }

    @Test
    public void testEncryptedFile() throws Exception {
        String sql = "SELECT * FROM table(cp.`pdf/encrypted.pdf` (type => 'pdf', combinePages => false, extractHeaders => true, password => 'userpassword'))";
        DirectRowSet results = client.queryBuilder().sql(sql).rowSet();
        TupleMetadata expectedSchema = new SchemaBuilder().addNullable("FLA Audit Profile", TypeProtos.MinorType.VARCHAR).addNullable("field_0", TypeProtos.MinorType.VARCHAR).buildSchema();
        RowSet.SingleRowSet expected = new RowSetBuilder(client.allocator(), expectedSchema).addRow(new Object[]{"Country", "China"}).addRow(new Object[]{"Factory name", "01001523B"}).addRow(new Object[]{"IEM", "BVCPS (HK), Shen Zhen Office"}).addRow(new Object[]{"Date of audit", "May 20-22, 2003"}).addRow(new Object[]{"PC(s)", "adidas-Salomon"}).addRow(new Object[]{"Number of workers", "243"}).addRow(new Object[]{"Product(s)", "Scarf, cap, gloves, beanies and headbands"}).addRow(new Object[]{"Production processes", "Sewing, cutting, packing, embroidery, die-cutting"}).build();
        new RowSetComparison((RowSet)expected).verifyAndClearAll((RowSet)results);
    }

    @Test
    public void testNoHeaders() throws RpcException {
        String sql = "SELECT * FROM table(cp.`pdf/argentina_diputados_voting_record.pdf` (type => 'pdf', combinePages => false, extractHeaders => false)) WHERE field_2 = 'Rio Negro'";
        QueryBuilder q = client.queryBuilder().sql(sql);
        DirectRowSet results = q.rowSet();
        TupleMetadata expectedSchema = new SchemaBuilder().addNullable("field_0", TypeProtos.MinorType.VARCHAR).addNullable("field_1", TypeProtos.MinorType.VARCHAR).addNullable("field_2", TypeProtos.MinorType.VARCHAR).addNullable("field_3", TypeProtos.MinorType.VARCHAR).buildSchema();
        RowSet.SingleRowSet expected = new RowSetBuilder(client.allocator(), expectedSchema).addRow(new Object[]{"ALBRIEU, Oscar Edmundo Nicolas", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}).addRow(new Object[]{"AVOSCAN, Herman Horacio", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}).addRow(new Object[]{"CEJAS, Jorge Alberto", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}).build();
        new RowSetComparison((RowSet)expected).verifyAndClearAll((RowSet)results);
    }

    @Test
    public void testMetadataQuery() throws RpcException {
        String sql = "SELECT _page_count, _title, _author, _subject, _keywords, _creator, _producer,_creation_date, _modification_date, _trapped FROM cp.`pdf/20.pdf` LIMIT 1";
        QueryBuilder q = client.queryBuilder().sql(sql);
        DirectRowSet results = q.rowSet();
        TupleMetadata expectedSchema = new SchemaBuilder().addNullable("_page_count", TypeProtos.MinorType.INT).addNullable("_title", TypeProtos.MinorType.VARCHAR).addNullable("_author", TypeProtos.MinorType.VARCHAR).addNullable("_subject", TypeProtos.MinorType.VARCHAR).addNullable("_keywords", TypeProtos.MinorType.VARCHAR).addNullable("_creator", TypeProtos.MinorType.VARCHAR).addNullable("_producer", TypeProtos.MinorType.VARCHAR).addNullable("_creation_date", TypeProtos.MinorType.TIMESTAMP).addNullable("_modification_date", TypeProtos.MinorType.TIMESTAMP).addNullable("_trapped", TypeProtos.MinorType.VARCHAR).buildSchema();
        RowSet.SingleRowSet expected = new RowSetBuilder(client.allocator(), expectedSchema).addRow(new Object[]{1, "Agricultural Landuse Survey in The Sumas River Watershed Summa", "Vision", "Agricultural Landuse Survey in The Sumas River Watershed Summa", "Agricultural Landuse Survey in The Sumas River Watershed Summa", "PScript5.dll Version 5.2.2", "Acrobat Distiller 7.0.5 (Windows)", 857403000000L, 1230835135000L, null}).build();
        new RowSetComparison((RowSet)expected).verifyAndClearAll((RowSet)results);
    }

    @Test
    public void testUnicode() throws Exception {
        String sql = "SELECT * FROM cp.`pdf/arabic.pdf`";
        DirectRowSet results = client.queryBuilder().sql(sql).rowSet();
        TupleMetadata expectedSchema = new SchemaBuilder().addNullable("\u0645\u0631\u062d\u0628\u0627\u064b", TypeProtos.MinorType.VARCHAR).addNullable("\u0627\u0633\u0645\u064a \u0633\u0644\u0637\u0627\u0646", TypeProtos.MinorType.VARCHAR).buildSchema();
        RowSet.SingleRowSet expected = new RowSetBuilder(client.allocator(), expectedSchema).addRow(new Object[]{"\u0627\u0646\u0627 \u0645\u0646 \u0648\u0644\u0627\u064a\u0629 \u0643\u0627\u0631\u0648\u0644\u064a\u0646\u0627 \u0627\u0644\u0634\u0645\u0627\u0644", "\u0645\u0646 \u0627\u064a\u0646 \u0627\u0646\u062a\u061f"}).addRow(new Object[]{"1234", "\u0639\u0646\u062f\u064a 47 \u0642\u0637\u0637"}).addRow(new Object[]{"\u0647\u0644 \u0627\u0646\u062a \u0634\u0628\u0627\u0643\u061f", "\u0627\u0633\u0645\u064a Jeremy \u0641\u064a \u0627\u0644\u0627\u0646\u062c\u0644\u064a\u0632\u064a\u0629"}).addRow(new Object[]{"Jeremy is \u062c\u0631\u0645\u064a in Arabic", null}).build();
        new RowSetComparison((RowSet)expected).verifyAndClearAll((RowSet)results);
    }

    @Test
    public void testSerDe() throws Exception {
        String sql = "SELECT COUNT(*) AS cnt FROM table(cp.`pdf/argentina_diputados_voting_record.pdf` (type => 'pdf', combinePages => false))";
        String plan = this.queryBuilder().sql(sql).explainJson();
        long cnt = this.queryBuilder().physical(plan).singletonLong();
        Assert.assertEquals((String)"Counts should match", (long)31L, (long)cnt);
    }

    @Test
    public void testPageMerge() throws Exception {
        String sql = "SELECT * FROM table(cp.`pdf/schools.pdf` (type => 'pdf', combinePages => true, extractHeaders=> true))";
        QueryBuilder.QuerySummary results = client.queryBuilder().sql(sql).run();
        Assert.assertEquals((long)221L, (long)results.recordCount());
    }

    @Test
    public void testFileWithNoTables() throws Exception {
        String sql = "SELECT * FROM table(cp.`pdf/labor.pdf` (type => 'pdf', extractionAlgorithm => 'spreadsheet'))";
        QueryBuilder.QuerySummary results = client.queryBuilder().sql(sql).run();
        Assert.assertEquals((long)1L, (long)results.recordCount());
    }

    @Test
    public void testMetadataQueryWithFileWithNoTables() throws RpcException {
        String sql = "SELECT _page_count, _title, _author, _subject, _keywords, _creator, _producer,_creation_date, _modification_date, _trapped FROM table(cp.`pdf/labor.pdf` (type => 'pdf', extractionAlgorithm => 'spreadsheet')) LIMIT 1";
        DirectRowSet results = client.queryBuilder().sql(sql).rowSet();
        TupleMetadata expectedSchema = new SchemaBuilder().addNullable("_page_count", TypeProtos.MinorType.INT).addNullable("_title", TypeProtos.MinorType.VARCHAR).addNullable("_author", TypeProtos.MinorType.VARCHAR).addNullable("_subject", TypeProtos.MinorType.VARCHAR).addNullable("_keywords", TypeProtos.MinorType.VARCHAR).addNullable("_creator", TypeProtos.MinorType.VARCHAR).addNullable("_producer", TypeProtos.MinorType.VARCHAR).addNullable("_creation_date", TypeProtos.MinorType.TIMESTAMP).addNullable("_modification_date", TypeProtos.MinorType.TIMESTAMP).addNullable("_trapped", TypeProtos.MinorType.VARCHAR).buildSchema();
        RowSet.SingleRowSet expected = new RowSetBuilder(client.allocator(), expectedSchema).addRow(new Object[]{1, null, null, null, null, "pdftk 2.01 - www.pdftk.com", "itext-paulo-155 (itextpdf.sf.net-lowagie.com)", QueryTestUtil.ConvertDateToLong((String)"2015-04-25T23:09:47Z"), QueryTestUtil.ConvertDateToLong((String)"2015-04-25T23:09:47Z"), null}).build();
        new RowSetComparison((RowSet)expected).verifyAndClearAll((RowSet)results);
    }

    @Test
    public void testExtractionAlgorithms() throws Exception {
        String sql = "SELECT * FROM table(cp.`pdf/schools.pdf` (type => 'pdf', combinePages => true, extractionAlgorithm => 'spreadsheet'))";
        DirectRowSet results = client.queryBuilder().sql(sql).rowSet();
        TupleMetadata expectedSchema = new SchemaBuilder().addNullable("field_0", TypeProtos.MinorType.VARCHAR).addNullable("Last Name", TypeProtos.MinorType.VARCHAR).addNullable("First Name", TypeProtos.MinorType.VARCHAR).addNullable("Address", TypeProtos.MinorType.VARCHAR).addNullable("City", TypeProtos.MinorType.VARCHAR).addNullable("State", TypeProtos.MinorType.VARCHAR).addNullable("Zip", TypeProtos.MinorType.VARCHAR).addNullable("Occupation", TypeProtos.MinorType.VARCHAR).addNullable("Employer", TypeProtos.MinorType.VARCHAR).addNullable("Date", TypeProtos.MinorType.VARCHAR).addNullable("Amount", TypeProtos.MinorType.VARCHAR).buildSchema();
        Assert.assertTrue((boolean)results.schema().isEquivalent(expectedSchema));
        Assert.assertEquals((long)216L, (long)results.rowCount());
        results.clear();
        sql = "SELECT * FROM table(cp.`pdf/schools.pdf` (type => 'pdf', combinePages => true, extractionAlgorithm => 'basic'))";
        results = client.queryBuilder().sql(sql).rowSet();
        expectedSchema = new SchemaBuilder().addNullable("Last Name", TypeProtos.MinorType.VARCHAR).addNullable("First Name Address", TypeProtos.MinorType.VARCHAR).addNullable("field_0", TypeProtos.MinorType.VARCHAR).addNullable("City", TypeProtos.MinorType.VARCHAR).addNullable("State", TypeProtos.MinorType.VARCHAR).addNullable("Zip", TypeProtos.MinorType.VARCHAR).addNullable("field_1", TypeProtos.MinorType.VARCHAR).addNullable("Occupation Employer", TypeProtos.MinorType.VARCHAR).addNullable("Date", TypeProtos.MinorType.VARCHAR).addNullable("field_2", TypeProtos.MinorType.VARCHAR).addNullable("Amount", TypeProtos.MinorType.VARCHAR).buildSchema();
        Assert.assertTrue((boolean)results.schema().isEquivalent(expectedSchema));
        Assert.assertEquals((long)221L, (long)results.rowCount());
        results.clear();
    }

    @Test
    public void testProvidedSchema() throws Exception {
        String sql = "SELECT * FROM table(cp.`pdf/schools.pdf` (type => 'pdf', combinePages => true, schema => 'inline=(`Last Name` VARCHAR, `First Name Address` VARCHAR, `field_0` VARCHAR, `City` VARCHAR, `State` VARCHAR, `Zip` VARCHAR, `field_1` VARCHAR, `Occupation Employer` VARCHAR, `Date` VARCHAR, `field_2` DATE properties {`drill.format` = `M/d/yyyy`}, `Amount` DOUBLE)')) LIMIT 5";
        DirectRowSet results = client.queryBuilder().sql(sql).rowSet();
        TupleMetadata expectedSchema = new SchemaBuilder().addNullable("Last Name", TypeProtos.MinorType.VARCHAR).addNullable("First Name Address", TypeProtos.MinorType.VARCHAR).addNullable("field_0", TypeProtos.MinorType.VARCHAR).addNullable("City", TypeProtos.MinorType.VARCHAR).addNullable("State", TypeProtos.MinorType.VARCHAR).addNullable("Zip", TypeProtos.MinorType.VARCHAR).addNullable("field_1", TypeProtos.MinorType.VARCHAR).addNullable("Occupation Employer", TypeProtos.MinorType.VARCHAR).addNullable("Date", TypeProtos.MinorType.VARCHAR).addNullable("field_2", TypeProtos.MinorType.DATE).addNullable("Amount", TypeProtos.MinorType.FLOAT8).buildSchema();
        RowSet.SingleRowSet expected = new RowSetBuilder(client.allocator(), expectedSchema).addRow(new Object[]{"Lidstad", "Dick & Peg 62 Mississippi River Blvd N", null, "Saint Paul", "MN", null, "55104", "retired", null, LocalDate.parse("2012-10-12"), 60.0}).addRow(new Object[]{"Strom", "Pam 1229 Hague Ave", null, "St. Paul", "MN", null, "55104", null, null, LocalDate.parse("2012-09-12"), 60.0}).addRow(new Object[]{"Seeba", "Louise & Paul 1399 Sheldon St", null, "Saint Paul", "MN", null, "55108", "BOE City of Saint Paul", null, LocalDate.parse("2012-10-12"), 60.0}).addRow(new Object[]{"Schumacher / Bales", "Douglas L. / Patricia 948 County Rd. D W", null, "Saint Paul", "MN", null, "55126", null, null, LocalDate.parse("2012-10-13"), 60.0}).addRow(new Object[]{"Abrams", "Marjorie 238 8th St east", null, "St Paul", "MN", null, "55101", "Retired Retired", null, LocalDate.parse("2012-08-08"), 75.0}).build();
        new RowSetComparison((RowSet)expected).verifyAndClearAll((RowSet)results);
    }

    @Test
    public void testSpecificTable() throws Exception {
        String sql = "SELECT COUNT(*) FROM table(cp.`pdf/schools.pdf` (type => 'pdf', defaultTableIndex => 3))";
        long resultCount = client.queryBuilder().sql(sql).singletonLong();
        Assert.assertEquals((long)45L, (long)resultCount);
    }

    @Test
    public void testWithCompressedFile() throws Exception {
        QueryTestUtil.generateCompressedFile((String)"pdf/argentina_diputados_voting_record.pdf", (String)"zip", (String)"pdf/compressed.pdf.zip");
        String sql = "SELECT * FROM dfs.`pdf/compressed.pdf.zip` WHERE `Provincia` = 'Rio Negro'";
        QueryBuilder q = client.queryBuilder().sql(sql);
        DirectRowSet results = q.rowSet();
        TupleMetadata expectedSchema = new SchemaBuilder().addNullable("Apellido y Nombre", TypeProtos.MinorType.VARCHAR).addNullable("Bloque pol\u00edtico", TypeProtos.MinorType.VARCHAR).addNullable("Provincia", TypeProtos.MinorType.VARCHAR).addNullable("field_0", TypeProtos.MinorType.VARCHAR).buildSchema();
        RowSet.SingleRowSet expected = new RowSetBuilder(client.allocator(), expectedSchema).addRow(new Object[]{"ALBRIEU, Oscar Edmundo Nicolas", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}).addRow(new Object[]{"AVOSCAN, Herman Horacio", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}).addRow(new Object[]{"CEJAS, Jorge Alberto", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}).build();
        new RowSetComparison((RowSet)expected).verifyAndClearAll((RowSet)results);
    }
}

