package org.apache.drill.exec.metastore;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import java.io.File;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.io.FileUtils;
import org.apache.drill.categories.MetastoreTest;
import org.apache.drill.categories.SlowTest;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.sql.TestMetastoreCommands;
import org.apache.drill.exec.util.StoragePluginTestUtils;
import org.apache.drill.metastore.metadata.BaseTableMetadata;
import org.apache.drill.metastore.metadata.FileMetadata;
import org.apache.drill.metastore.metadata.MetadataInfo;
import org.apache.drill.metastore.metadata.MetadataType;
import org.apache.drill.metastore.metadata.SegmentMetadata;
import org.apache.drill.metastore.metadata.TableInfo;
import org.apache.drill.metastore.statistics.ColumnStatistics;
import org.apache.drill.metastore.statistics.ColumnStatisticsKind;
import org.apache.drill.metastore.statistics.StatisticsHolder;
import org.apache.drill.metastore.statistics.TableStatisticsKind;
import org.apache.drill.test.ClusterFixture;
import org.apache.drill.test.ClusterFixtureBuilder;
import org.apache.drill.test.ClusterTest;
import org.apache.hadoop.fs.Path;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.ExpectedException;

@Category({SlowTest.class, MetastoreTest.class})
/* loaded from: input_file:org/apache/drill/exec/metastore/TestMetastoreWithEasyFormatPlugin.class */
public class TestMetastoreWithEasyFormatPlugin extends ClusterTest {
    private static final String SCHEMA_STRING = "'inline=(`o_orderkey` INT not null, `o_custkey` INT not null, `o_orderstatus` VARCHAR not null, `o_totalprice` DOUBLE not null, `o_orderdate` DATE not null format \"yyyy-MM-dd''T''HH:mm:ss.SSSXXX\", `o_orderpriority` VARCHAR not null, `o_clerk` VARCHAR not null, `o_shippriority` INT not null, `o_comment` VARCHAR not null)'";
    private static final TupleMetadata SCHEMA = new SchemaBuilder().add("o_orderkey", TypeProtos.MinorType.INT).add("o_custkey", TypeProtos.MinorType.INT).add("o_orderstatus", TypeProtos.MinorType.VARCHAR).add("o_totalprice", TypeProtos.MinorType.FLOAT8).add("o_orderdate", TypeProtos.MinorType.DATE).add("o_orderpriority", TypeProtos.MinorType.VARCHAR).add("o_clerk", TypeProtos.MinorType.VARCHAR).add("o_shippriority", TypeProtos.MinorType.INT).add("o_comment", TypeProtos.MinorType.VARCHAR).addNullable("dir0", TypeProtos.MinorType.VARCHAR).addNullable("dir1", TypeProtos.MinorType.VARCHAR).build();

    @Rule
    public ExpectedException thrown = ExpectedException.none();

    @BeforeClass
    public static void setUp() throws Exception {
        ClusterFixtureBuilder builder = ClusterFixture.builder(dirTestWatcher);
        builder.configProperty("drill.exec.zk.root", dirTestWatcher.getRootDir().getAbsolutePath());
        startCluster(builder);
    }

    @Before
    public void prepare() {
        client.alterSession("metastore.enabled", true);
        client.alterSession("metastore.metadata.use_schema", true);
        client.alterSession("metastore.metadata.use_statistics", true);
        client.alterSession("planner.slice_target", 1);
    }

    @Test
    public void testAnalyzeOnTextTable() throws Exception {
        TableInfo tableInfo = getTableInfo("multilevel/csv", "default", "csv");
        File copyResourceToRoot = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel/csv", new String[0]));
        Path path = new Path(copyResourceToRoot.toURI().getPath());
        BaseTableMetadata baseTableMetadata = TestMetastoreCommands.getBaseTableMetadata(tableInfo, copyResourceToRoot, SCHEMA);
        TableInfo build = TableInfo.builder().name("multilevel/csv").storagePlugin(StoragePluginTestUtils.DFS_PLUGIN_NAME).workspace("default").build();
        HashMap hashMap = new HashMap(TestMetastoreCommands.DIR0_1994_SEGMENT_COLUMN_STATISTICS);
        hashMap.put(SchemaPath.getSimplePath("o_comment"), TestMetastoreCommands.getColumnStatistics(" accounts nag slyly. ironic", "yly final requests over the furiously regula", 40L, TypeProtos.MinorType.VARCHAR));
        SegmentMetadata build2 = SegmentMetadata.builder().tableInfo(build).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).identifier("1994").key("1994").build()).path(new Path(path, "1994")).schema(SCHEMA).lastModifiedTime(TestMetastoreCommands.getMaxLastModified(new File(copyResourceToRoot, "1994"))).column(SchemaPath.getSimplePath("dir0")).columnsStatistics(hashMap).metadataStatistics(Collections.singletonList(new StatisticsHolder(40L, TableStatisticsKind.ROW_COUNT))).locations(ImmutableSet.of(new Path(path, "1994/Q1/orders_94_q1.csv"), new Path(path, "1994/Q2/orders_94_q2.csv"), new Path(path, "1994/Q3/orders_94_q3.csv"), new Path(path, "1994/Q4/orders_94_q4.csv"))).partitionValues(Collections.singletonList("1994")).build();
        ImmutableSet of = ImmutableSet.of(new Path(path, "1994"), new Path(path, "1995"), new Path(path, "1996"));
        HashSet hashSet = new HashSet();
        hashSet.add(ImmutableSet.of(new Path(path, "1994/Q2/orders_94_q2.csv"), new Path(path, "1994/Q4/orders_94_q4.csv"), new Path(path, "1994/Q1/orders_94_q1.csv"), new Path(path, "1994/Q3/orders_94_q3.csv")));
        hashSet.add(ImmutableSet.of(new Path(path, "1995/Q2/orders_95_q2.csv"), new Path(path, "1995/Q4/orders_95_q4.csv"), new Path(path, "1995/Q1/orders_95_q1.csv"), new Path(path, "1995/Q3/orders_95_q3.csv")));
        hashSet.add(ImmutableSet.of(new Path(path, "1996/Q3/orders_96_q3.csv"), new Path(path, "1996/Q2/orders_96_q2.csv"), new Path(path, "1996/Q4/orders_96_q4.csv"), new Path(path, "1996/Q1/orders_96_q1.csv")));
        FileMetadata build3 = FileMetadata.builder().tableInfo(build).metadataInfo(MetadataInfo.builder().type(MetadataType.FILE).identifier("1994/Q1/orders_94_q1.csv").key("1994").build()).schema(SCHEMA).lastModifiedTime(new File(new File(new File(copyResourceToRoot, "1994"), "Q1"), "orders_94_q1.csv").lastModified()).columnsStatistics(TestMetastoreCommands.DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS).metadataStatistics(Collections.singletonList(new StatisticsHolder(10L, TableStatisticsKind.ROW_COUNT))).path(new Path(path, "1994/Q1/orders_94_q1.csv")).build();
        try {
            testBuilder().sqlQuery("analyze table table(dfs.`%s`(schema=>%s)) refresh metadata", "multilevel/csv", SCHEMA_STRING).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", "multilevel/csv")).go();
            Assert.assertEquals(baseTableMetadata, cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo));
            List segmentsMetadataByColumn = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByColumn(tableInfo, (List) null, "`dir0`");
            SegmentMetadata segmentMetadata = (SegmentMetadata) segmentsMetadataByColumn.stream().filter(segmentMetadata2 -> {
                return segmentMetadata2.getMetadataInfo().identifier().equals("1994");
            }).findAny().orElseThrow(() -> {
                return new AssertionError("Segment is absent");
            });
            segmentMetadata.toBuilder().locations(segmentMetadata.getLocations());
            Assert.assertEquals(build2, segmentMetadata);
            Assert.assertEquals(of, (Set) segmentsMetadataByColumn.stream().map((v0) -> {
                return v0.getLocation();
            }).collect(Collectors.toSet()));
            Assert.assertEquals(hashSet, (Set) segmentsMetadataByColumn.stream().map((v0) -> {
                return v0.getLocations();
            }).collect(Collectors.toSet()));
            List segmentsMetadataByColumn2 = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByColumn(tableInfo, (List) null, "`dir1`");
            Assert.assertEquals(12L, segmentsMetadataByColumn2.size());
            Assert.assertEquals(SegmentMetadata.builder().tableInfo(build).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).identifier("1994/Q1").key("1994").build()).path(new Path(new Path(path, "1994"), "Q1")).schema(SCHEMA).lastModifiedTime(TestMetastoreCommands.getMaxLastModified(new File(new File(copyResourceToRoot, "1994"), "Q1"))).column(SchemaPath.getSimplePath("dir1")).columnsStatistics(TestMetastoreCommands.DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS).metadataStatistics(Collections.singletonList(new StatisticsHolder(10L, TableStatisticsKind.ROW_COUNT))).locations(ImmutableSet.of(new Path(path, "1994/Q1/orders_94_q1.csv"))).partitionValues(Collections.singletonList("Q1")).build(), segmentsMetadataByColumn2.stream().filter(segmentMetadata3 -> {
                return segmentMetadata3.getMetadataInfo().identifier().equals("1994/Q1");
            }).findAny().orElse(null));
            List filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, (String) null, (List) null);
            Assert.assertEquals(12L, filesMetadata.size());
            Assert.assertEquals(build3, filesMetadata.stream().filter(fileMetadata -> {
                return fileMetadata.getMetadataInfo().identifier().equals("1994/Q1/orders_94_q1.csv");
            }).findAny().orElse(null));
            run("analyze table dfs.`%s` drop metadata if exists", "multilevel/csv");
        } catch (Throwable th) {
            run("analyze table dfs.`%s` drop metadata if exists", "multilevel/csv");
            throw th;
        }
    }

    @Test
    public void testAnalyzeOnTextTableWithHeader() throws Exception {
        File copyResourceToRoot = dirTestWatcher.copyResourceToRoot(Paths.get("store/text/data/cars.csvh", new String[0]));
        TableInfo tableInfo = getTableInfo("store/text/data/cars.csvh", "default", "csvh");
        BaseTableMetadata build = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TestMetastoreCommands.TABLE_META_INFO).schema(new SchemaBuilder().add("Year", TypeProtos.MinorType.VARCHAR).add("Make", TypeProtos.MinorType.VARCHAR).add("Model", TypeProtos.MinorType.VARCHAR).add("Description", TypeProtos.MinorType.VARCHAR).add("Price", TypeProtos.MinorType.VARCHAR).build()).location(new Path(copyResourceToRoot.toURI().getPath())).columnsStatistics(ImmutableMap.builder().put(SchemaPath.getSimplePath("Description"), TestMetastoreCommands.getColumnStatistics("", "ac, abs, moon", 4L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("Make"), TestMetastoreCommands.getColumnStatistics("Chevy", "Jeep", 4L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("Model"), TestMetastoreCommands.getColumnStatistics("E350", "Venture \"Extended Edition, Very Large\"", 4L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("Price"), TestMetastoreCommands.getColumnStatistics("3000.00", "5000.00", 4L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("Year"), TestMetastoreCommands.getColumnStatistics("1996", "1999", 4L, TypeProtos.MinorType.VARCHAR)).build()).metadataStatistics(Arrays.asList(new StatisticsHolder(4L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(TestMetastoreCommands.getMaxLastModified(copyResourceToRoot)).build();
        try {
            testBuilder().sqlQuery("analyze table dfs.`%s` refresh metadata", "store/text/data/cars.csvh").unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", "store/text/data/cars.csvh")).go();
            Assert.assertEquals(build, cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo));
            run("analyze table dfs.`%s` drop metadata if exists", "store/text/data/cars.csvh");
        } catch (Throwable th) {
            run("analyze table dfs.`%s` drop metadata if exists", "store/text/data/cars.csvh");
            throw th;
        }
    }

    @Test
    public void testIncrementalAnalyzeNewFile() throws Exception {
        File copyResourceToTestTmp = dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/csv", new String[0]), Paths.get("multilevel/csvNewFile", new String[0]));
        Path path = new Path(copyResourceToTestTmp.toURI().getPath());
        TableInfo tableInfo = getTableInfo("multilevel/csvNewFile", StoragePluginTestUtils.TMP_SCHEMA, "csv");
        HashMap hashMap = new HashMap(TestMetastoreCommands.TABLE_COLUMN_STATISTICS);
        hashMap.replaceAll((schemaPath, columnStatistics) -> {
            return columnStatistics.cloneWith(new ColumnStatistics(Arrays.asList(new StatisticsHolder(130L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder(130L, ColumnStatisticsKind.NON_NULL_VALUES_COUNT))));
        });
        BaseTableMetadata build = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TestMetastoreCommands.TABLE_META_INFO).schema(SCHEMA).location(path).columnsStatistics(hashMap).metadataStatistics(Arrays.asList(new StatisticsHolder(130L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(TestMetastoreCommands.getMaxLastModified(copyResourceToTestTmp)).build();
        try {
            testBuilder().sqlQuery("ANALYZE TABLE table(dfs.tmp.`%s` (schema=>%s)) REFRESH METADATA", "multilevel/csvNewFile", SCHEMA_STRING).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", "multilevel/csvNewFile")).go();
            Assert.assertEquals(15L, cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, (List) null, (String) null).size());
            Assert.assertEquals(12L, cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, (String) null, (List) null).size());
            dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel", "csv", "1994", "Q4", "orders_94_q4.csv"), Paths.get("multilevel/csvNewFile", "1994", "Q4", "orders_94_q4_1.csv"));
            testBuilder().sqlQuery("ANALYZE TABLE table(dfs.tmp.`%s` (schema=>%s)) REFRESH METADATA", "multilevel/csvNewFile", SCHEMA_STRING).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", "multilevel/csvNewFile")).go();
            Assert.assertEquals(build, cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo));
            Assert.assertEquals(15L, cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, (List) null, (String) null).size());
            Assert.assertEquals(13L, cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, (String) null, (List) null).size());
            run("analyze table dfs.tmp.`%s` drop metadata if exists", "multilevel/csvNewFile");
            FileUtils.deleteQuietly(copyResourceToTestTmp);
        } catch (Throwable th) {
            run("analyze table dfs.tmp.`%s` drop metadata if exists", "multilevel/csvNewFile");
            FileUtils.deleteQuietly(copyResourceToTestTmp);
            throw th;
        }
    }

    @Test
    public void testIncrementalAnalyzeUnchangedTable() throws Exception {
        File copyResourceToTestTmp = dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/csv", new String[0]), Paths.get("multilevel/csvUnchanged", new String[0]));
        TableInfo tableInfo = getTableInfo("multilevel/csvUnchanged", StoragePluginTestUtils.TMP_SCHEMA, "csv");
        long maxLastModified = TestMetastoreCommands.getMaxLastModified(copyResourceToTestTmp);
        try {
            testBuilder().sqlQuery("ANALYZE TABLE table(dfs.tmp.`%s` (schema=>%s)) REFRESH METADATA", "multilevel/csvUnchanged", SCHEMA_STRING).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", "multilevel/csvUnchanged")).go();
            Assert.assertEquals(15L, cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, (List) null, (String) null).size());
            testBuilder().sqlQuery("ANALYZE TABLE table(dfs.tmp.`%s` (schema=>%s)) REFRESH METADATA", "multilevel/csvUnchanged", SCHEMA_STRING).unOrdered().baselineColumns("ok", "summary").baselineValues(false, "Table metadata is up to date, analyze wasn't performed.").go();
            Assert.assertEquals(15L, cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, (List) null, (String) null).size());
            Assert.assertEquals(maxLastModified, cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo).lastModifiedTime().longValue());
            run("analyze table dfs.tmp.`%s` drop metadata if exists", "multilevel/csvUnchanged");
            FileUtils.deleteQuietly(copyResourceToTestTmp);
        } catch (Throwable th) {
            run("analyze table dfs.tmp.`%s` drop metadata if exists", "multilevel/csvUnchanged");
            FileUtils.deleteQuietly(copyResourceToTestTmp);
            throw th;
        }
    }

    @Test
    public void testIntWithNullsPartitionPruning() throws Exception {
        try {
            client.alterSession("store.format", "csvh");
            run("create table dfs.tmp.`%s/a` as\nselect 100 as mykey, cast(null as varchar) as col_notexist from cp.`tpch/nation.parquet`\nunion all\nselect cast(null as int) as mykey, 'a' as col_notexist from cp.`tpch/region.parquet`", "t5");
            run("create table dfs.tmp.`%s/b` as\nselect 200 as mykey, cast(null as varchar) as col_notexist from cp.`tpch/nation.parquet`\nunion all\nselect  cast(null as int) as mykey, 'a' as col_notexist from cp.`tpch/region.parquet`", "t5");
            testBuilder().sqlQuery("analyze table table(dfs.tmp.`%s` (schema=>'inline=(mykey int, col_notexist varchar)')) REFRESH METADATA", "t5").unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", "t5")).go();
            Assert.assertEquals("Row count does not match the expected value", 25L, queryBuilder().sql("select mykey from dfs.tmp.`%s` where mykey = 100", "t5").run().recordCount());
            queryBuilder().sql("select mykey from dfs.tmp.`%s` where mykey = 100", "t5").planMatcher().include("usedMetastore=true", "Filter", "numFiles=1").match();
            run("analyze table dfs.tmp.`%s` drop metadata if exists", "t5");
            run("drop table if exists dfs.tmp.`%s`", "t5");
        } catch (Throwable th) {
            run("analyze table dfs.tmp.`%s` drop metadata if exists", "t5");
            run("drop table if exists dfs.tmp.`%s`", "t5");
            throw th;
        }
    }

    @Test
    public void testPartitionPruningWithIsNull() throws Exception {
        try {
            client.alterSession("store.format", "csvh");
            run("create table dfs.tmp.`%s/a` as\nselect cast(null as int) as mykey, 'a' as col_notexist from cp.`tpch/region.parquet`", "t6");
            run("create table dfs.tmp.`%s/b` as\nselect 200 as mykey, cast(null as varchar) as col_notexist from cp.`tpch/nation.parquet`", "t6");
            testBuilder().sqlQuery("analyze table table(dfs.tmp.`%s` (schema=>'inline=(mykey int, col_notexist varchar)')) REFRESH METADATA", "t6").unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", "t6")).go();
            Assert.assertEquals("Row count does not match the expected value", 5L, queryBuilder().sql("select mykey from dfs.tmp.`%s` where mykey is null", "t6").run().recordCount());
            queryBuilder().sql("select mykey from dfs.tmp.`%s` where mykey is null", "t6").planMatcher().include("usedMetastore=true").exclude("Filter").match();
            client.resetSession("store.format");
            run("analyze table dfs.tmp.`%s` drop metadata if exists", "t6");
            run("drop table if exists dfs.tmp.`%s`", "t6");
        } catch (Throwable th) {
            client.resetSession("store.format");
            run("analyze table dfs.tmp.`%s` drop metadata if exists", "t6");
            run("drop table if exists dfs.tmp.`%s`", "t6");
            throw th;
        }
    }

    @Test
    public void testAnalyzeOnJsonTable() throws Exception {
        TableInfo tableInfo = getTableInfo("multilevel/json", "default", ClusterFixture.EXPLAIN_PLAN_JSON);
        File copyResourceToRoot = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel/json", new String[0]));
        Path path = new Path(copyResourceToRoot.toURI().getPath());
        TupleMetadata build = new SchemaBuilder().addNullable("dir0", TypeProtos.MinorType.VARCHAR).addNullable("dir1", TypeProtos.MinorType.VARCHAR).addNullable("o_orderkey", TypeProtos.MinorType.BIGINT).addNullable("o_custkey", TypeProtos.MinorType.BIGINT).addNullable("o_orderstatus", TypeProtos.MinorType.VARCHAR).addNullable("o_totalprice", TypeProtos.MinorType.FLOAT8).addNullable("o_orderdate", TypeProtos.MinorType.VARCHAR).addNullable("o_orderpriority", TypeProtos.MinorType.VARCHAR).addNullable("o_clerk", TypeProtos.MinorType.VARCHAR).addNullable("o_shippriority", TypeProtos.MinorType.BIGINT).addNullable("o_comment", TypeProtos.MinorType.VARCHAR).build();
        HashMap hashMap = new HashMap(TestMetastoreCommands.TABLE_COLUMN_STATISTICS);
        hashMap.put(SchemaPath.getSimplePath("o_custkey"), TestMetastoreCommands.getColumnStatistics(25L, 1498L, 120L, TypeProtos.MinorType.BIGINT));
        hashMap.put(SchemaPath.getSimplePath("o_orderdate"), TestMetastoreCommands.getColumnStatistics("1994-01-01T00:00:00.000-08:00", "1996-12-19T00:00:00.000-08:00", 120L, TypeProtos.MinorType.VARCHAR));
        hashMap.put(SchemaPath.getSimplePath("o_orderkey"), TestMetastoreCommands.getColumnStatistics(1L, 1319L, 120L, TypeProtos.MinorType.BIGINT));
        hashMap.put(SchemaPath.getSimplePath("o_shippriority"), TestMetastoreCommands.getColumnStatistics(0L, 0L, 120L, TypeProtos.MinorType.BIGINT));
        BaseTableMetadata build2 = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TestMetastoreCommands.TABLE_META_INFO).schema(build).location(new Path(copyResourceToRoot.toURI().getPath())).columnsStatistics(hashMap).metadataStatistics(Arrays.asList(new StatisticsHolder(120L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(TestMetastoreCommands.getMaxLastModified(copyResourceToRoot)).build();
        TableInfo build3 = TableInfo.builder().name("multilevel/json").storagePlugin(StoragePluginTestUtils.DFS_PLUGIN_NAME).workspace("default").build();
        HashMap hashMap2 = new HashMap(TestMetastoreCommands.DIR0_1994_SEGMENT_COLUMN_STATISTICS);
        hashMap2.put(SchemaPath.getSimplePath("o_custkey"), TestMetastoreCommands.getColumnStatistics(25L, 1469L, 40L, TypeProtos.MinorType.BIGINT));
        hashMap2.put(SchemaPath.getSimplePath("o_orderdate"), TestMetastoreCommands.getColumnStatistics("1994-01-01T00:00:00.000-08:00", "1994-12-23T00:00:00.000-08:00", 40L, TypeProtos.MinorType.VARCHAR));
        hashMap2.put(SchemaPath.getSimplePath("o_orderkey"), TestMetastoreCommands.getColumnStatistics(5L, 1031L, 40L, TypeProtos.MinorType.BIGINT));
        hashMap2.put(SchemaPath.getSimplePath("o_shippriority"), TestMetastoreCommands.getColumnStatistics(0L, 0L, 40L, TypeProtos.MinorType.BIGINT));
        SegmentMetadata build4 = SegmentMetadata.builder().tableInfo(build3).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).identifier("1994").key("1994").build()).path(new Path(path, "1994")).schema(build).lastModifiedTime(TestMetastoreCommands.getMaxLastModified(new File(copyResourceToRoot, "1994"))).column(SchemaPath.getSimplePath("dir0")).columnsStatistics(hashMap2).metadataStatistics(Collections.singletonList(new StatisticsHolder(40L, TableStatisticsKind.ROW_COUNT))).locations(ImmutableSet.of(new Path(path, "1994/Q1/orders_94_q1.json"), new Path(path, "1994/Q2/orders_94_q2.json"), new Path(path, "1994/Q3/orders_94_q3.json"), new Path(path, "1994/Q4/orders_94_q4.json"))).partitionValues(Collections.singletonList("1994")).build();
        ImmutableSet of = ImmutableSet.of(new Path(path, "1994"), new Path(path, "1995"), new Path(path, "1996"));
        HashSet hashSet = new HashSet();
        hashSet.add(ImmutableSet.of(new Path(path, "1994/Q2/orders_94_q2.json"), new Path(path, "1994/Q4/orders_94_q4.json"), new Path(path, "1994/Q1/orders_94_q1.json"), new Path(path, "1994/Q3/orders_94_q3.json")));
        hashSet.add(ImmutableSet.of(new Path(path, "1995/Q2/orders_95_q2.json"), new Path(path, "1995/Q4/orders_95_q4.json"), new Path(path, "1995/Q1/orders_95_q1.json"), new Path(path, "1995/Q3/orders_95_q3.json")));
        hashSet.add(ImmutableSet.of(new Path(path, "1996/Q3/orders_96_q3.json"), new Path(path, "1996/Q2/orders_96_q2.json"), new Path(path, "1996/Q4/orders_96_q4.json"), new Path(path, "1996/Q1/orders_96_q1.json")));
        HashMap hashMap3 = new HashMap(TestMetastoreCommands.DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS);
        hashMap3.put(SchemaPath.getSimplePath("o_custkey"), TestMetastoreCommands.getColumnStatistics(392L, 1411L, 10L, TypeProtos.MinorType.BIGINT));
        hashMap3.put(SchemaPath.getSimplePath("o_orderdate"), TestMetastoreCommands.getColumnStatistics("1994-01-01T00:00:00.000-08:00", "1994-03-26T00:00:00.000-08:00", 10L, TypeProtos.MinorType.VARCHAR));
        hashMap3.put(SchemaPath.getSimplePath("o_orderkey"), TestMetastoreCommands.getColumnStatistics(66L, 833L, 10L, TypeProtos.MinorType.BIGINT));
        hashMap3.put(SchemaPath.getSimplePath("o_shippriority"), TestMetastoreCommands.getColumnStatistics(0L, 0L, 10L, TypeProtos.MinorType.BIGINT));
        FileMetadata build5 = FileMetadata.builder().tableInfo(build3).metadataInfo(MetadataInfo.builder().type(MetadataType.FILE).identifier("1994/Q1/orders_94_q1.json").key("1994").build()).schema(build).lastModifiedTime(new File(new File(new File(copyResourceToRoot, "1994"), "Q1"), "orders_94_q1.json").lastModified()).columnsStatistics(hashMap3).metadataStatistics(Collections.singletonList(new StatisticsHolder(10L, TableStatisticsKind.ROW_COUNT))).path(new Path(path, "1994/Q1/orders_94_q1.json")).build();
        try {
            testBuilder().sqlQuery("analyze table table(dfs.`%s`(schema=>%s)) refresh metadata", "multilevel/json", SCHEMA_STRING).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", "multilevel/json")).go();
            Assert.assertEquals(build2, cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo));
            List segmentsMetadataByColumn = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByColumn(tableInfo, (List) null, "`dir0`");
            SegmentMetadata segmentMetadata = (SegmentMetadata) segmentsMetadataByColumn.stream().filter(segmentMetadata2 -> {
                return segmentMetadata2.getMetadataInfo().identifier().equals("1994");
            }).findAny().orElseThrow(() -> {
                return new AssertionError("Segment is absent");
            });
            segmentMetadata.toBuilder().locations(segmentMetadata.getLocations());
            Assert.assertEquals(build4, segmentMetadata);
            Assert.assertEquals(of, (Set) segmentsMetadataByColumn.stream().map((v0) -> {
                return v0.getLocation();
            }).collect(Collectors.toSet()));
            Assert.assertEquals(hashSet, (Set) segmentsMetadataByColumn.stream().map((v0) -> {
                return v0.getLocations();
            }).collect(Collectors.toSet()));
            List segmentsMetadataByColumn2 = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByColumn(tableInfo, (List) null, "`dir1`");
            Assert.assertEquals(12L, segmentsMetadataByColumn2.size());
            Assert.assertEquals(SegmentMetadata.builder().tableInfo(build3).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).identifier("1994/Q1").key("1994").build()).path(new Path(new Path(path, "1994"), "Q1")).schema(build).lastModifiedTime(TestMetastoreCommands.getMaxLastModified(new File(new File(copyResourceToRoot, "1994"), "Q1"))).column(SchemaPath.getSimplePath("dir1")).columnsStatistics(hashMap3).metadataStatistics(Collections.singletonList(new StatisticsHolder(10L, TableStatisticsKind.ROW_COUNT))).locations(ImmutableSet.of(new Path(path, "1994/Q1/orders_94_q1.json"))).partitionValues(Collections.singletonList("Q1")).build(), segmentsMetadataByColumn2.stream().filter(segmentMetadata3 -> {
                return segmentMetadata3.getMetadataInfo().identifier().equals("1994/Q1");
            }).findAny().orElse(null));
            List filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, (String) null, (List) null);
            Assert.assertEquals(12L, filesMetadata.size());
            Assert.assertEquals(build5, filesMetadata.stream().filter(fileMetadata -> {
                return fileMetadata.getMetadataInfo().identifier().equals("1994/Q1/orders_94_q1.json");
            }).findAny().orElse(null));
            run("analyze table dfs.`%s` drop metadata if exists", "multilevel/json");
        } catch (Throwable th) {
            run("analyze table dfs.`%s` drop metadata if exists", "multilevel/json");
            throw th;
        }
    }

    @Test
    public void testAnalyzeWithSampleStatistics() throws Exception {
        try {
            dirTestWatcher.copyResourceToRoot(Paths.get("multilevel/json/1994/Q1", new String[0]));
            client.alterSession(PlannerSettings.STATISTICS_USE.getOptionName(), true);
            testBuilder().sqlQuery("ANALYZE TABLE dfs.`%s` COLUMNS(o_orderkey) REFRESH METADATA COMPUTE STATISTICS SAMPLE 95 PERCENT", "multilevel/json/1994/Q1").unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", "multilevel/json/1994/Q1")).go();
            testBuilder().sqlQuery("select EST_NUM_NON_NULLS is not null as has_value\nfrom information_schema.`columns` where table_name='%s' and column_name='o_orderkey'", "multilevel/json/1994/Q1").unOrdered().baselineColumns("has_value").baselineValues(true).go();
            run("analyze table dfs.`%s` drop metadata if exists", "multilevel/json/1994/Q1");
            client.resetSession(PlannerSettings.STATISTICS_USE.getOptionName());
        } catch (Throwable th) {
            run("analyze table dfs.`%s` drop metadata if exists", "multilevel/json/1994/Q1");
            client.resetSession(PlannerSettings.STATISTICS_USE.getOptionName());
            throw th;
        }
    }

    @Test
    public void testEmptyCSV() throws Exception {
        File copyResourceToRoot = dirTestWatcher.copyResourceToRoot(Paths.get("store/text/directoryWithEmptyCSV/empty.csv", new String[0]));
        TableInfo tableInfo = getTableInfo("store/text/directoryWithEmptyCSV/empty.csv", "default", "csv");
        BaseTableMetadata build = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TestMetastoreCommands.TABLE_META_INFO).schema(new SchemaBuilder().add("Description", TypeProtos.MinorType.VARCHAR).build()).location(new Path(copyResourceToRoot.toURI().getPath())).columnsStatistics(ImmutableMap.builder().put(SchemaPath.getSimplePath("Description"), TestMetastoreCommands.getColumnStatistics(null, null, 0L, TypeProtos.MinorType.VARCHAR)).build()).metadataStatistics(Arrays.asList(new StatisticsHolder(0L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(TestMetastoreCommands.getMaxLastModified(copyResourceToRoot)).build();
        try {
            testBuilder().sqlQuery("analyze table table(dfs.`%s` (schema=>'inline=(`Description` VARCHAR not null)')) refresh metadata", "store/text/directoryWithEmptyCSV/empty.csv").unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", "store/text/directoryWithEmptyCSV/empty.csv")).go();
            Assert.assertTrue("table metadata wasn't found", cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo).isExists());
            Assert.assertEquals(build, cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo));
            Assert.assertEquals(1L, cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, (String) null, (List) null).size());
            queryBuilder().sql("select * from dfs.`%s`", "store/text/directoryWithEmptyCSV/empty.csv").planMatcher().include("usedMetastore=true").match();
            testBuilder().sqlQuery("select * from dfs.`%s`", "store/text/directoryWithEmptyCSV/empty.csv").unOrdered().baselineColumns("Description").expectsEmptyResultSet().go();
            run("analyze table dfs.`%s` drop metadata if exists", "store/text/directoryWithEmptyCSV/empty.csv");
        } catch (Throwable th) {
            run("analyze table dfs.`%s` drop metadata if exists", "store/text/directoryWithEmptyCSV/empty.csv");
            throw th;
        }
    }

    @Test
    public void testNonEmptyTableWithEmptyFile() throws Exception {
        dirTestWatcher.copyResourceToTestTmp(Paths.get("store", ClusterFixture.EXPLAIN_PLAN_TEXT, "directoryWithEmptyCSV", "empty.csv"), Paths.get("csv_with_empty_file", "empty.csv"));
        File parentFile = dirTestWatcher.copyResourceToTestTmp(Paths.get("store", ClusterFixture.EXPLAIN_PLAN_TEXT, "data", "nations.csv"), Paths.get("csv_with_empty_file", "nations.csv")).getParentFile();
        TableInfo tableInfo = getTableInfo("csv_with_empty_file", StoragePluginTestUtils.TMP_SCHEMA, "csv");
        BaseTableMetadata build = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TestMetastoreCommands.TABLE_META_INFO).schema(new SchemaBuilder().add("n_nationkey", TypeProtos.MinorType.INT).add("n_name", TypeProtos.MinorType.VARCHAR).add("n_regionkey", TypeProtos.MinorType.INT).add("n_comment", TypeProtos.MinorType.VARCHAR).build()).location(new Path(parentFile.toURI().getPath())).columnsStatistics(ImmutableMap.builder().put(SchemaPath.getSimplePath("n_nationkey"), TestMetastoreCommands.getColumnStatistics(0, 24, 25L, TypeProtos.MinorType.INT)).put(SchemaPath.getSimplePath("n_name"), TestMetastoreCommands.getColumnStatistics("ALGERIA", "VIETNAM", 25L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("n_regionkey"), TestMetastoreCommands.getColumnStatistics(0, 4, 25L, TypeProtos.MinorType.INT)).put(SchemaPath.getSimplePath("n_comment"), TestMetastoreCommands.getColumnStatistics("alfoxespromiseslylyaccordingtotheregularaccounts.boldrequestsalon", "yfinalpackages.slowfoxescajolequickly.quicklysilentplateletsbreachironicaccounts.unusualpintobe", 25L, TypeProtos.MinorType.VARCHAR)).build()).metadataStatistics(Arrays.asList(new StatisticsHolder(25L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(TestMetastoreCommands.getMaxLastModified(parentFile)).build();
        try {
            testBuilder().sqlQuery("ANALYZE TABLE table(dfs.tmp.`%s` (schema=>'inline=(`n_nationkey` INT not null,`n_name` VARCHAR not null,`n_regionkey` INT not null,`n_comment` VARCHAR not null)')) REFRESH METADATA", "csv_with_empty_file").unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", "csv_with_empty_file")).go();
            Assert.assertTrue("table metadata wasn't found", cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo).isExists());
            Assert.assertEquals(build, cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo));
            Assert.assertEquals(2L, cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, (String) null, (List) null).size());
            queryBuilder().sql("select * from dfs.tmp.`%s`", "csv_with_empty_file").planMatcher().include("usedMetastore=true").match();
            Assert.assertEquals(25L, queryBuilder().sql("select * from dfs.tmp.`%s`", "csv_with_empty_file").run().recordCount());
            run("analyze table dfs.tmp.`%s` drop metadata if exists", "csv_with_empty_file");
        } catch (Throwable th) {
            run("analyze table dfs.tmp.`%s` drop metadata if exists", "csv_with_empty_file");
            throw th;
        }
    }

    @Test
    public void testFilesPruningWithLimit() throws Exception {
        dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/csv", new String[0]), Paths.get("multilevel/csvLimit", new String[0]));
        try {
            testBuilder().sqlQuery("ANALYZE TABLE table(dfs.tmp.`%s` (schema=>%s)) REFRESH METADATA", "multilevel/csvLimit", SCHEMA_STRING).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", "multilevel/csvLimit")).go();
            queryBuilder().sql("select * from dfs.tmp.`%s` limit 1", "multilevel/csvLimit").planMatcher().include("Limit", "numFiles=1", "limit=1").match();
            queryBuilder().sql("select * from dfs.tmp.`%s` limit 21", "multilevel/csvLimit").planMatcher().include("Limit", "numFiles=3", "limit=21").match();
            run("analyze table dfs.tmp.`%s` drop metadata if exists", "multilevel/csvLimit");
        } catch (Throwable th) {
            run("analyze table dfs.tmp.`%s` drop metadata if exists", "multilevel/csvLimit");
            throw th;
        }
    }

    private TableInfo getTableInfo(String str, String str2, String str3) {
        return TableInfo.builder().name(str).owner(cluster.config().getString("user.name")).storagePlugin(StoragePluginTestUtils.DFS_PLUGIN_NAME).workspace(str2).type(str3).build();
    }
}
