package org.apache.orc.tools.convert;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPInputStream;
import jodd.io.ZipUtil;
import jodd.util.StringPool;
import org.apache.batik.dom.svg.SVGPathSegConstants;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.orc.OrcFile;
import org.apache.orc.RecordReader;
import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
import org.apache.orc.tools.json.JsonSchemaFinder;
import org.datanucleus.store.schema.SchemaTool;

/* loaded from: input_file:WEB-INF/lib/hive-exec-2.3.7-mapr-2101.jar:org/apache/orc/tools/convert/ConvertTool.class */
public class ConvertTool {
    static final String DEFAULT_TIMESTAMP_FORMAT = "yyyy[[-][/]]MM[[-][/]]dd[['T'][ ]]HH:mm:ss[ ][XXX][X]";
    private final List<FileInformation> fileList;
    private final TypeDescription schema;
    private final char csvSeparator;
    private final char csvQuote;
    private final char csvEscape;
    private final int csvHeaderLines;
    private final String csvNullString;
    private final String timestampFormat;
    private final Writer writer;
    private final VectorizedRowBatch batch;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:WEB-INF/lib/hive-exec-2.3.7-mapr-2101.jar:org/apache/orc/tools/convert/ConvertTool$Compression.class */
    public enum Compression {
        NONE,
        GZIP
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:WEB-INF/lib/hive-exec-2.3.7-mapr-2101.jar:org/apache/orc/tools/convert/ConvertTool$FileInformation.class */
    public class FileInformation {
        private final Compression compression;
        private final Format format;
        private final Path path;
        private final FileSystem filesystem;
        private final Configuration conf;
        private final long size;

        FileInformation(Path path, Configuration configuration) throws IOException {
            this.path = path;
            this.conf = configuration;
            this.filesystem = path.getFileSystem(configuration);
            this.size = this.filesystem.getFileStatus(path).getLen();
            String name = path.getName();
            int lastIndexOf = name.lastIndexOf(".");
            if (lastIndexOf < 0 || !ZipUtil.GZIP_EXT.equals(name.substring(lastIndexOf))) {
                this.compression = Compression.NONE;
            } else {
                this.compression = Compression.GZIP;
                name = name.substring(0, lastIndexOf);
                lastIndexOf = name.lastIndexOf(".");
            }
            if (lastIndexOf < 0) {
                throw new IllegalArgumentException("No extension on file " + path);
            }
            String substring = name.substring(lastIndexOf);
            if (".json".equals(substring) || ".jsn".equals(substring)) {
                this.format = Format.JSON;
            } else if (".csv".equals(substring)) {
                this.format = Format.CSV;
            } else {
                if (!".orc".equals(substring)) {
                    throw new IllegalArgumentException("Unknown kind of file " + path);
                }
                this.format = Format.ORC;
            }
        }

        Reader getReader(InputStream inputStream) throws IOException {
            if (this.compression == Compression.GZIP) {
                inputStream = new GZIPInputStream(inputStream);
            }
            return new InputStreamReader(inputStream, StandardCharsets.UTF_8);
        }

        public RecordReader getRecordReader() throws IOException {
            switch (this.format) {
                case ORC:
                    org.apache.orc.Reader createReader = OrcFile.createReader(this.path, OrcFile.readerOptions(this.conf));
                    return createReader.rows(createReader.options().schema(ConvertTool.this.schema));
                case JSON:
                    FSDataInputStream open = this.filesystem.open(this.path);
                    return new JsonReader(getReader(open), open, this.size, ConvertTool.this.schema);
                case CSV:
                    FSDataInputStream open2 = this.filesystem.open(this.path);
                    return new CsvReader(getReader(open2), open2, this.size, ConvertTool.this.schema, ConvertTool.this.csvSeparator, ConvertTool.this.csvQuote, ConvertTool.this.csvEscape, ConvertTool.this.csvHeaderLines, ConvertTool.this.csvNullString, ConvertTool.this.timestampFormat);
                default:
                    throw new IllegalArgumentException("Unhandled format " + this.format + " for " + this.path);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:WEB-INF/lib/hive-exec-2.3.7-mapr-2101.jar:org/apache/orc/tools/convert/ConvertTool$Format.class */
    public enum Format {
        JSON,
        CSV,
        ORC
    }

    TypeDescription buildSchema(List<FileInformation> list, Configuration configuration) throws IOException {
        JsonSchemaFinder jsonSchemaFinder = new JsonSchemaFinder();
        int i = 0;
        for (FileInformation fileInformation : list) {
            if (fileInformation.format == Format.JSON) {
                System.err.println("Scanning " + fileInformation.path + " for schema");
                i++;
                jsonSchemaFinder.addFile(fileInformation.getReader(fileInformation.filesystem.open(fileInformation.path)));
            } else if (fileInformation.format == Format.ORC) {
                System.err.println("Merging schema from " + fileInformation.path);
                i++;
                jsonSchemaFinder.addSchema(OrcFile.createReader(fileInformation.path, OrcFile.readerOptions(configuration).filesystem(fileInformation.filesystem)).getSchema());
            }
        }
        if (i == 0) {
            throw new IllegalArgumentException("Please specify a schema using --schema for converting CSV files.");
        }
        return jsonSchemaFinder.getSchema();
    }

    public static void main(Configuration configuration, String[] strArr) throws IOException, ParseException {
        new ConvertTool(configuration, strArr).run();
    }

    List<FileInformation> buildFileList(String[] strArr, Configuration configuration) throws IOException {
        ArrayList arrayList = new ArrayList(strArr.length);
        for (String str : strArr) {
            arrayList.add(new FileInformation(new Path(str), configuration));
        }
        return arrayList;
    }

    public ConvertTool(Configuration configuration, String[] strArr) throws IOException, ParseException {
        CommandLine parseOptions = parseOptions(strArr);
        this.fileList = buildFileList(parseOptions.getArgs(), configuration);
        if (parseOptions.hasOption('s')) {
            this.schema = TypeDescription.fromString(parseOptions.getOptionValue('s'));
        } else {
            this.schema = buildSchema(this.fileList, configuration);
        }
        this.csvQuote = getCharOption(parseOptions, 'q', '\"');
        this.csvEscape = getCharOption(parseOptions, 'e', '\\');
        this.csvSeparator = getCharOption(parseOptions, 'S', ',');
        this.csvHeaderLines = getIntOption(parseOptions, 'H', 0);
        this.csvNullString = parseOptions.getOptionValue('n', "");
        this.timestampFormat = parseOptions.getOptionValue(SVGPathSegConstants.PATHSEG_CURVETO_QUADRATIC_SMOOTH_REL_LETTER, DEFAULT_TIMESTAMP_FORMAT);
        this.writer = OrcFile.createWriter(new Path(parseOptions.hasOption('o') ? parseOptions.getOptionValue('o') : "output.orc"), OrcFile.writerOptions(configuration).setSchema(this.schema));
        this.batch = this.schema.createRowBatch();
    }

    void run() throws IOException {
        for (FileInformation fileInformation : this.fileList) {
            System.err.println("Processing " + fileInformation.path);
            RecordReader recordReader = fileInformation.getRecordReader();
            while (recordReader.nextBatch(this.batch)) {
                this.writer.addRowBatch(this.batch);
            }
            recordReader.close();
        }
        this.writer.close();
    }

    private static int getIntOption(CommandLine commandLine, char c, int i) {
        return commandLine.hasOption(c) ? Integer.parseInt(commandLine.getOptionValue(c)) : i;
    }

    private static char getCharOption(CommandLine commandLine, char c, char c2) {
        return commandLine.hasOption(c) ? commandLine.getOptionValue(c).charAt(0) : c2;
    }

    private static CommandLine parseOptions(String[] strArr) throws ParseException {
        Options options = new Options();
        options.addOption(Option.builder("h").longOpt("help").desc("Provide help").build());
        options.addOption(Option.builder("s").longOpt(SchemaTool.OPTION_SCHEMA_NAME).hasArg().desc("The schema to write in to the file").build());
        options.addOption(Option.builder("o").longOpt("output").desc("Output filename").hasArg().build());
        options.addOption(Option.builder(StringPool.N).longOpt("null").desc("CSV null string").hasArg().build());
        options.addOption(Option.builder("q").longOpt("quote").desc("CSV quote character").hasArg().build());
        options.addOption(Option.builder("e").longOpt("escape").desc("CSV escape character").hasArg().build());
        options.addOption(Option.builder("S").longOpt("separator").desc("CSV separator character").hasArg().build());
        options.addOption(Option.builder("H").longOpt("header").desc("CSV header lines").hasArg().build());
        options.addOption(Option.builder(SVGPathSegConstants.PATHSEG_CURVETO_QUADRATIC_SMOOTH_REL_LETTER).longOpt("timestampformat").desc("Timestamp Format").hasArg().build());
        CommandLine parse = new DefaultParser().parse(options, strArr);
        if (parse.hasOption('h') || parse.getArgs().length == 0) {
            new HelpFormatter().printHelp("convert", options);
            System.exit(1);
        }
        return parse;
    }
}
