package org.apache.hadoop.examples.terasort;

import java.io.EOFException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import org.apache.hadoop.util.IndexedSortable;
import org.apache.hadoop.util.QuickSort;
import org.apache.hadoop.util.StringUtils;

/* JADX WARN: Classes with same name are omitted:
  input_file:classes/org/apache/hadoop/examples/terasort/TeraInputFormat.class
 */
/* loaded from: input_file:hadoop-mapreduce-examples-2.7.0-mapr-1710.jar:org/apache/hadoop/examples/terasort/TeraInputFormat.class */
public class TeraInputFormat extends FileInputFormat<Text, Text> {
    static final String PARTITION_FILENAME = "_partition.lst";
    private static final String NUM_PARTITIONS = "mapreduce.terasort.num.partitions";
    private static final String SAMPLE_SIZE = "mapreduce.terasort.partitions.sample";
    static final int KEY_LENGTH = 10;
    static final int VALUE_LENGTH = 90;
    static final int RECORD_LENGTH = 100;
    private static MRJobConfig lastContext = null;
    private static List<InputSplit> lastResult = null;

    /* JADX WARN: Classes with same name are omitted:
      input_file:classes/org/apache/hadoop/examples/terasort/TeraInputFormat$SamplerThreadGroup.class
     */
    /* loaded from: input_file:hadoop-mapreduce-examples-2.7.0-mapr-1710.jar:org/apache/hadoop/examples/terasort/TeraInputFormat$SamplerThreadGroup.class */
    static class SamplerThreadGroup extends ThreadGroup {
        private Throwable throwable;

        public SamplerThreadGroup(String str) {
            super(str);
        }

        @Override // java.lang.ThreadGroup, java.lang.Thread.UncaughtExceptionHandler
        public void uncaughtException(Thread thread, Throwable th) {
            this.throwable = th;
        }

        public Throwable getThrowable() {
            return this.throwable;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* JADX WARN: Classes with same name are omitted:
      input_file:classes/org/apache/hadoop/examples/terasort/TeraInputFormat$TeraRecordReader.class
     */
    /* loaded from: input_file:hadoop-mapreduce-examples-2.7.0-mapr-1710.jar:org/apache/hadoop/examples/terasort/TeraInputFormat$TeraRecordReader.class */
    public static class TeraRecordReader extends RecordReader<Text, Text> {
        private FSDataInputStream in;
        private long offset;
        private long length;
        private static final int RECORD_LENGTH = 100;
        private byte[] buffer = new byte[RECORD_LENGTH];
        private Text key;
        private Text value;

        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            Path path = ((FileSplit) inputSplit).getPath();
            this.in = path.getFileSystem(taskAttemptContext.getConfiguration()).open(path);
            long start = ((FileSplit) inputSplit).getStart();
            this.offset = (100 - (start % 100)) % 100;
            this.in.seek(start + this.offset);
            this.length = ((FileSplit) inputSplit).getLength();
        }

        public void close() throws IOException {
            this.in.close();
        }

        /* renamed from: getCurrentKey, reason: merged with bridge method [inline-methods] */
        public Text m59getCurrentKey() {
            return this.key;
        }

        /* renamed from: getCurrentValue, reason: merged with bridge method [inline-methods] */
        public Text m58getCurrentValue() {
            return this.value;
        }

        public float getProgress() throws IOException {
            return ((float) this.offset) / ((float) this.length);
        }

        public boolean nextKeyValue() throws IOException {
            if (this.offset >= this.length) {
                return false;
            }
            int i = 0;
            while (true) {
                int i2 = i;
                if (i2 >= RECORD_LENGTH) {
                    if (this.key == null) {
                        this.key = new Text();
                    }
                    if (this.value == null) {
                        this.value = new Text();
                    }
                    this.key.set(this.buffer, 0, TeraInputFormat.KEY_LENGTH);
                    this.value.set(this.buffer, TeraInputFormat.KEY_LENGTH, TeraInputFormat.VALUE_LENGTH);
                    this.offset += 100;
                    return true;
                }
                long read = this.in.read(this.buffer, i2, RECORD_LENGTH - i2);
                if (read == -1) {
                    if (i2 == 0) {
                        return false;
                    }
                    throw new EOFException("read past eof");
                }
                i = (int) (i2 + read);
            }
        }
    }

    /* JADX WARN: Classes with same name are omitted:
      input_file:classes/org/apache/hadoop/examples/terasort/TeraInputFormat$TextSampler.class
     */
    /* loaded from: input_file:hadoop-mapreduce-examples-2.7.0-mapr-1710.jar:org/apache/hadoop/examples/terasort/TeraInputFormat$TextSampler.class */
    static class TextSampler implements IndexedSortable {
        private ArrayList<Text> records = new ArrayList<>();

        TextSampler() {
        }

        public int compare(int i, int i2) {
            return this.records.get(i).compareTo(this.records.get(i2));
        }

        public void swap(int i, int i2) {
            Text text = this.records.get(i);
            Text text2 = this.records.get(i2);
            this.records.set(i2, text);
            this.records.set(i, text2);
        }

        public void addKey(Text text) {
            synchronized (this) {
                this.records.add(new Text(text));
            }
        }

        Text[] createPartitions(int i) {
            int size = this.records.size();
            System.out.println("Making " + i + " from " + size + " sampled records");
            if (i > size) {
                throw new IllegalArgumentException("Requested more partitions than input keys (" + i + " > " + size + ")");
            }
            new QuickSort().sort(this, 0, this.records.size());
            float f = size / i;
            Text[] textArr = new Text[i - 1];
            for (int i2 = 1; i2 < i; i2++) {
                textArr[i2 - 1] = this.records.get(Math.round(f * i2));
            }
            return textArr;
        }
    }

    public static void writePartitionFile(final JobContext jobContext, Path path) throws Throwable {
        long currentTimeMillis = System.currentTimeMillis();
        Configuration configuration = jobContext.getConfiguration();
        final TeraInputFormat teraInputFormat = new TeraInputFormat();
        final TextSampler textSampler = new TextSampler();
        int numReduceTasks = jobContext.getNumReduceTasks();
        long j = configuration.getLong(SAMPLE_SIZE, 100000L);
        final List<InputSplit> splits = teraInputFormat.getSplits(jobContext);
        long currentTimeMillis2 = System.currentTimeMillis();
        System.out.println("Computing input splits took " + (currentTimeMillis2 - currentTimeMillis) + "ms");
        int min = Math.min(configuration.getInt(NUM_PARTITIONS, KEY_LENGTH), splits.size());
        System.out.println("Sampling " + min + " splits of " + splits.size());
        final long j2 = j / min;
        final int size = splits.size() / min;
        Thread[] threadArr = new Thread[min];
        SamplerThreadGroup samplerThreadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
        for (int i = 0; i < min; i++) {
            final int i2 = i;
            threadArr[i] = new Thread(samplerThreadGroup, "Sampler Reader " + i2) { // from class: org.apache.hadoop.examples.terasort.TeraInputFormat.1
                {
                    setDaemon(true);
                }

                @Override // java.lang.Thread, java.lang.Runnable
                public void run() {
                    long j3 = 0;
                    try {
                        TaskAttemptContext taskAttemptContextImpl = new TaskAttemptContextImpl(jobContext.getConfiguration(), new TaskAttemptID());
                        RecordReader<Text, Text> createRecordReader = teraInputFormat.createRecordReader((InputSplit) splits.get(size * i2), taskAttemptContextImpl);
                        createRecordReader.initialize((InputSplit) splits.get(size * i2), taskAttemptContextImpl);
                        while (createRecordReader.nextKeyValue()) {
                            textSampler.addKey(new Text((Text) createRecordReader.getCurrentKey()));
                            j3++;
                            if (j2 <= j3) {
                                break;
                            }
                        }
                    } catch (IOException e) {
                        System.err.println("Got an exception while reading splits " + StringUtils.stringifyException(e));
                        throw new RuntimeException(e);
                    } catch (InterruptedException e2) {
                    }
                }
            };
            threadArr[i].start();
        }
        FileSystem fileSystem = path.getFileSystem(configuration);
        FSDataOutputStream create = fileSystem.create(path, true, 65536, (short) 10, fileSystem.getDefaultBlockSize(path));
        for (int i3 = 0; i3 < min; i3++) {
            try {
                threadArr[i3].join();
            } catch (InterruptedException e) {
            }
            if (samplerThreadGroup.getThrowable() != null) {
                throw samplerThreadGroup.getThrowable();
                break;
            }
        }
        for (Text text : textSampler.createPartitions(numReduceTasks)) {
            text.write(create);
        }
        create.close();
        System.out.println("Computing parititions took " + (System.currentTimeMillis() - currentTimeMillis2) + "ms");
    }

    public RecordReader<Text, Text> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
        return new TeraRecordReader();
    }

    public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
        if (jobContext == lastContext) {
            return lastResult;
        }
        long currentTimeMillis = System.currentTimeMillis();
        lastContext = jobContext;
        lastResult = super.getSplits(jobContext);
        long currentTimeMillis2 = System.currentTimeMillis();
        System.out.println("Spent " + (currentTimeMillis2 - currentTimeMillis) + "ms computing base-splits.");
        if (jobContext.getConfiguration().getBoolean(TeraScheduler.USE, true)) {
            lastResult = new TeraScheduler((FileSplit[]) lastResult.toArray(new FileSplit[0]), jobContext.getConfiguration()).getNewFileSplits();
            System.out.println("Spent " + (System.currentTimeMillis() - currentTimeMillis2) + "ms computing TeraScheduler splits.");
        }
        return lastResult;
    }
}
