package org.apache.sqoop.connector.hdfs;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.security.PrivilegedExceptionAction;
import java.util.Arrays;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.Seekable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader;
import org.apache.hadoop.util.LineReader;
import org.apache.log4j.Logger;
import org.apache.parquet.avro.AvroReadSupport;
import org.apache.parquet.hadoop.ParquetInputFormat;
import org.apache.sqoop.common.SqoopException;
import org.apache.sqoop.connector.common.SqoopIDFUtils;
import org.apache.sqoop.connector.hadoop.security.SecurityUtils;
import org.apache.sqoop.connector.hdfs.configuration.FromJobConfiguration;
import org.apache.sqoop.connector.hdfs.configuration.LinkConfiguration;
import org.apache.sqoop.connector.idf.AVROIntermediateDataFormat;
import org.apache.sqoop.error.code.HdfsConnectorError;
import org.apache.sqoop.etl.io.DataWriter;
import org.apache.sqoop.job.etl.Extractor;
import org.apache.sqoop.job.etl.ExtractorContext;
import org.apache.sqoop.schema.ByteArraySchema;
import org.apache.sqoop.schema.Schema;

/* loaded from: input_file:org/apache/sqoop/connector/hdfs/HdfsExtractor.class */
public class HdfsExtractor extends Extractor<LinkConfiguration, FromJobConfiguration, HdfsPartition> {
    public static final Logger LOG = Logger.getLogger(HdfsExtractor.class);
    private static final byte[] PARQUET_MAGIC = "PAR1".getBytes(Charset.forName("ASCII"));
    private DataWriter dataWriter;
    private Schema schema;
    private Configuration conf = new Configuration();
    private long rowsRead = 0;

    public void extract(final ExtractorContext extractorContext, final LinkConfiguration linkConfiguration, final FromJobConfiguration fromJobConfiguration, final HdfsPartition hdfsPartition) {
        try {
            SecurityUtils.createProxyUserAndLoadDelegationTokens(extractorContext).doAs(new PrivilegedExceptionAction<Void>() { // from class: org.apache.sqoop.connector.hdfs.HdfsExtractor.1
                /* JADX WARN: Can't rename method to resolve collision */
                @Override // java.security.PrivilegedExceptionAction
                public Void run() throws Exception {
                    HdfsUtils.contextToConfiguration(extractorContext.getContext(), HdfsExtractor.this.conf);
                    HdfsExtractor.this.dataWriter = extractorContext.getDataWriter();
                    HdfsExtractor.this.schema = extractorContext.getSchema();
                    HdfsPartition hdfsPartition2 = hdfsPartition;
                    HdfsExtractor.LOG.info("Working on partition: " + hdfsPartition2);
                    int numberOfFiles = hdfsPartition2.getNumberOfFiles();
                    for (int i = 0; i < numberOfFiles; i++) {
                        HdfsExtractor.this.extractFile(linkConfiguration, fromJobConfiguration, hdfsPartition2.getFile(i), hdfsPartition2.getOffset(i), hdfsPartition2.getLength(i), hdfsPartition2.getLocations());
                    }
                    return null;
                }
            });
        } catch (Exception e) {
            throw new SqoopException(HdfsConnectorError.GENERIC_HDFS_CONNECTOR_0001, e);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void extractFile(LinkConfiguration linkConfiguration, FromJobConfiguration fromJobConfiguration, Path path, long j, long j2, String[] strArr) throws IOException, InterruptedException {
        LOG.info("Extracting file " + path);
        LOG.info("\t from offset " + j);
        LOG.info("\t to offset " + (j + j2));
        LOG.info("\t of length " + j2);
        if (isSequenceFile(path)) {
            extractSequenceFile(linkConfiguration, fromJobConfiguration, path, j, j2, strArr);
        } else if (isParquetFile(path)) {
            extractParquetFile(linkConfiguration, fromJobConfiguration, path, j, j2, strArr);
        } else {
            extractTextFile(linkConfiguration, fromJobConfiguration, path, j, j2);
        }
    }

    private void extractSequenceFile(LinkConfiguration linkConfiguration, FromJobConfiguration fromJobConfiguration, Path path, long j, long j2, String[] strArr) throws IOException {
        LOG.info("Extracting sequence file");
        SequenceFileRecordReader sequenceFileRecordReader = new SequenceFileRecordReader();
        try {
            try {
                sequenceFileRecordReader.initialize(new FileSplit(path, j, j2, strArr), new SqoopTaskAttemptContext(this.conf));
                while (sequenceFileRecordReader.nextKeyValue()) {
                    this.rowsRead++;
                    extractRow(linkConfiguration, fromJobConfiguration, (Text) sequenceFileRecordReader.getCurrentKey());
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
        } finally {
            sequenceFileRecordReader.close();
        }
    }

    private void extractTextFile(LinkConfiguration linkConfiguration, FromJobConfiguration fromJobConfiguration, Path path, long j, long j2) throws IOException {
        LineReader lineReader;
        Text text;
        int readLine;
        LOG.info("Extracting text file");
        long j3 = j + j2;
        Seekable open = path.getFileSystem(this.conf).open(path);
        CompressionCodec codec = new CompressionCodecFactory(this.conf).getCodec(path);
        Seekable seekable = open;
        if (codec == null) {
            open.seek(j);
            lineReader = new LineReader(open);
        } else {
            lineReader = new LineReader(codec.createInputStream(open, codec.createDecompressor()), this.conf);
            seekable = open;
        }
        if (j != 0) {
            j += lineReader.readLine(new Text(), 0);
        }
        LOG.info("Start position: " + String.valueOf(j));
        long j4 = j;
        while (j4 <= j3 && (readLine = lineReader.readLine((text = new Text()), Integer.MAX_VALUE)) != 0) {
            j4 = codec == null ? j4 + readLine : seekable.getPos();
            this.rowsRead++;
            extractRow(linkConfiguration, fromJobConfiguration, text);
        }
        LOG.info("Extracting ended on position: " + seekable.getPos());
        open.close();
    }

    private void extractParquetFile(LinkConfiguration linkConfiguration, FromJobConfiguration fromJobConfiguration, Path path, long j, long j2, String[] strArr) throws IOException, InterruptedException {
        InputSplit fileSplit = new FileSplit(path, j, j2, strArr);
        this.conf.set(ParquetInputFormat.READ_SUPPORT_CLASS, AvroReadSupport.class.getName());
        ParquetInputFormat parquetInputFormat = new ParquetInputFormat();
        SqoopTaskAttemptContext sqoopTaskAttemptContext = new SqoopTaskAttemptContext(this.conf);
        RecordReader createRecordReader = parquetInputFormat.createRecordReader(fileSplit, sqoopTaskAttemptContext);
        createRecordReader.initialize(fileSplit, sqoopTaskAttemptContext);
        AVROIntermediateDataFormat aVROIntermediateDataFormat = new AVROIntermediateDataFormat(this.schema);
        while (createRecordReader.nextKeyValue()) {
            GenericRecord genericRecord = (GenericRecord) createRecordReader.getCurrentValue();
            this.rowsRead++;
            if (this.schema instanceof ByteArraySchema) {
                this.dataWriter.writeArrayRecord(new Object[]{aVROIntermediateDataFormat.toObject(genericRecord)});
            } else {
                this.dataWriter.writeArrayRecord(aVROIntermediateDataFormat.toObject(genericRecord));
            }
        }
    }

    public long getRowsRead() {
        return this.rowsRead;
    }

    private boolean isSequenceFile(Path path) {
        try {
            new SequenceFile.Reader(path.getFileSystem(this.conf), path, this.conf).close();
            return true;
        } catch (IOException e) {
            return false;
        }
    }

    private boolean isParquetFile(Path path) {
        try {
            FileSystem fileSystem = path.getFileSystem(this.conf);
            FileStatus fileStatus = fileSystem.getFileStatus(path);
            FSDataInputStream open = fileSystem.open(path);
            long len = fileStatus.getLen();
            byte[] bArr = new byte[PARQUET_MAGIC.length];
            open.readFully(bArr);
            if (LOG.isDebugEnabled()) {
                LOG.error("file start: " + new String(bArr, Charset.forName("ASCII")));
            }
            if (!Arrays.equals(bArr, PARQUET_MAGIC)) {
                return false;
            }
            open.seek(len - PARQUET_MAGIC.length);
            byte[] bArr2 = new byte[PARQUET_MAGIC.length];
            open.readFully(bArr2);
            if (LOG.isDebugEnabled()) {
                LOG.error("file end: " + new String(bArr2, Charset.forName("ASCII")));
            }
            return Arrays.equals(bArr2, PARQUET_MAGIC);
        } catch (IOException e) {
            return false;
        }
    }

    private void extractRow(LinkConfiguration linkConfiguration, FromJobConfiguration fromJobConfiguration, Text text) throws UnsupportedEncodingException {
        if (this.schema instanceof ByteArraySchema) {
            this.dataWriter.writeArrayRecord(new Object[]{text.toString().getBytes("ISO-8859-1")});
        } else if (HdfsUtils.hasCustomFormat(linkConfiguration, fromJobConfiguration)) {
            this.dataWriter.writeArrayRecord(SqoopIDFUtils.fromCSV(text.toString(), this.schema, fromJobConfiguration.fromJobConfig.nullValue));
        } else {
            this.dataWriter.writeStringRecord(text.toString());
        }
    }
}
