package org.apache.mahout.utils;

import java.io.IOException;
import java.io.Serializable;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.lib.MultipleOutputs;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterator;

/* loaded from: input_file:org/apache/mahout/utils/SplitInputJob.class */
public final class SplitInputJob {
    private static final String DOWNSAMPLING_FACTOR = "SplitInputJob.downsamplingFactor";
    private static final String RANDOM_SELECTION_PCT = "SplitInputJob.randomSelectionPct";
    private static final String TRAINING_TAG = "training";
    private static final String TEST_TAG = "test";

    /* loaded from: input_file:org/apache/mahout/utils/SplitInputJob$SplitInputComparator.class */
    public static class SplitInputComparator extends WritableComparator implements Serializable {
        private final Random rnd;

        protected SplitInputComparator() {
            super(WritableComparable.class);
            this.rnd = RandomUtils.getRandom();
        }

        @Override // org.apache.hadoop.io.WritableComparator, org.apache.hadoop.io.RawComparator
        public int compare(byte[] bArr, int i, int i2, byte[] bArr2, int i3, int i4) {
            return this.rnd.nextBoolean() ? 1 : -1;
        }
    }

    /* loaded from: input_file:org/apache/mahout/utils/SplitInputJob$SplitInputMapper.class */
    public static class SplitInputMapper extends Mapper<WritableComparable<?>, Writable, WritableComparable<?>, Writable> {
        private int downsamplingFactor;

        @Override // org.apache.hadoop.mapreduce.Mapper
        public void setup(Mapper<WritableComparable<?>, Writable, WritableComparable<?>, Writable>.Context context) {
            this.downsamplingFactor = context.getConfiguration().getInt(SplitInputJob.DOWNSAMPLING_FACTOR, 1);
        }

        @Override // org.apache.hadoop.mapreduce.Mapper
        public void run(Mapper<WritableComparable<?>, Writable, WritableComparable<?>, Writable>.Context context) throws IOException, InterruptedException {
            setup(context);
            int i = 0;
            while (context.nextKeyValue()) {
                if (i % this.downsamplingFactor == 0) {
                    map(context.getCurrentKey(), context.getCurrentValue(), context);
                }
                i++;
            }
            cleanup(context);
        }
    }

    /* loaded from: input_file:org/apache/mahout/utils/SplitInputJob$SplitInputReducer.class */
    public static class SplitInputReducer extends Reducer<WritableComparable<?>, Writable, WritableComparable<?>, Writable> {
        private MultipleOutputs multipleOutputs;
        private OutputCollector<WritableComparable<?>, Writable> trainingCollector = null;
        private OutputCollector<WritableComparable<?>, Writable> testCollector = null;
        private final Random rnd = RandomUtils.getRandom();
        private float randomSelectionPercent;

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.hadoop.mapreduce.Reducer
        public void setup(Reducer<WritableComparable<?>, Writable, WritableComparable<?>, Writable>.Context context) throws IOException {
            this.randomSelectionPercent = context.getConfiguration().getFloat(SplitInputJob.RANDOM_SELECTION_PCT, PackedInts.COMPACT);
            this.multipleOutputs = new MultipleOutputs(new JobConf(context.getConfiguration()));
            this.trainingCollector = this.multipleOutputs.getCollector(SplitInputJob.TRAINING_TAG, null);
            this.testCollector = this.multipleOutputs.getCollector("test", null);
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.hadoop.mapreduce.Reducer
        public void reduce(WritableComparable<?> writableComparable, Iterable<Writable> iterable, Reducer<WritableComparable<?>, Writable, WritableComparable<?>, Writable>.Context context) throws IOException, InterruptedException {
            for (Writable writable : iterable) {
                if (this.rnd.nextInt(100) < this.randomSelectionPercent) {
                    this.testCollector.collect(writableComparable, writable);
                } else {
                    this.trainingCollector.collect(writableComparable, writable);
                }
            }
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.hadoop.mapreduce.Reducer
        public void cleanup(Reducer<WritableComparable<?>, Writable, WritableComparable<?>, Writable>.Context context) throws IOException {
            this.multipleOutputs.close();
        }
    }

    private SplitInputJob() {
    }

    public static void run(Configuration configuration, Path path, Path path2, int i, float f) throws IOException, ClassNotFoundException, InterruptedException {
        configuration.setInt(DOWNSAMPLING_FACTOR, (int) (100.0d / i));
        configuration.setFloat(RANDOM_SELECTION_PCT, f);
        SequenceFileDirIterator sequenceFileDirIterator = new SequenceFileDirIterator(path, PathType.LIST, PathFilters.partFilter(), null, false, FileSystem.get(configuration).getConf());
        if (!sequenceFileDirIterator.hasNext()) {
            throw new IllegalStateException("Couldn't determine class of the input values");
        }
        Pair next = sequenceFileDirIterator.next();
        Class<?> cls = ((WritableComparable) next.getFirst()).getClass();
        Class<?> cls2 = ((Writable) next.getSecond()).getClass();
        JobConf jobConf = new JobConf(configuration);
        MultipleOutputs.addNamedOutput(jobConf, TRAINING_TAG, SequenceFileOutputFormat.class, cls, cls2);
        MultipleOutputs.addNamedOutput(jobConf, "test", SequenceFileOutputFormat.class, cls, cls2);
        Job job = new Job((Configuration) jobConf);
        job.setJarByClass(SplitInputJob.class);
        FileInputFormat.addInputPath(job, path);
        FileOutputFormat.setOutputPath(job, path2);
        job.setNumReduceTasks(1);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.class);
        job.setMapperClass(SplitInputMapper.class);
        job.setReducerClass(SplitInputReducer.class);
        job.setSortComparatorClass(SplitInputComparator.class);
        job.setOutputKeyClass(cls);
        job.setOutputValueClass(cls2);
        job.submit();
        if (!job.waitForCompletion(true)) {
            throw new IllegalStateException("Job failed!");
        }
    }
}
