package org.apache.mahout.clustering.minhash;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.commandline.MinhashOptionCreator;
import org.apache.mahout.math.VectorWritable;

/* loaded from: input_file:org/apache/mahout/clustering/minhash/MinHashDriver.class */
public final class MinHashDriver extends AbstractJob {
    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new Configuration(), new MinHashDriver(), strArr);
    }

    private void runJob(Path path, Path path2, int i, int i2, String str, int i3, int i4, int i5, boolean z) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = getConf();
        conf.setInt(MinhashOptionCreator.MIN_CLUSTER_SIZE, i);
        conf.setInt(MinhashOptionCreator.MIN_VECTOR_SIZE, i2);
        conf.set(MinhashOptionCreator.HASH_TYPE, str);
        conf.setInt(MinhashOptionCreator.NUM_HASH_FUNCTIONS, i3);
        conf.setInt(MinhashOptionCreator.KEY_GROUPS, i4);
        conf.setBoolean(MinhashOptionCreator.DEBUG_OUTPUT, z);
        Class<VectorWritable> cls = z ? VectorWritable.class : Text.class;
        Class<SequenceFileOutputFormat> cls2 = z ? SequenceFileOutputFormat.class : TextOutputFormat.class;
        Job job = new Job(conf, "MinHash Clustering");
        job.setJarByClass(MinHashDriver.class);
        FileInputFormat.setInputPaths(job, new Path[]{path});
        FileOutputFormat.setOutputPath(job, path2);
        job.setMapperClass(MinHashMapper.class);
        job.setReducerClass(MinHashReducer.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(cls2);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(cls);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(cls);
        job.setNumReduceTasks(i5);
        job.waitForCompletion(true);
    }

    public int run(String[] strArr) throws IOException, ClassNotFoundException, InterruptedException {
        addInputOption();
        addOutputOption();
        addOption(MinhashOptionCreator.minClusterSizeOption().create());
        addOption(MinhashOptionCreator.minVectorSizeOption().create());
        addOption(MinhashOptionCreator.hashTypeOption().create());
        addOption(MinhashOptionCreator.numHashFunctionsOption().create());
        addOption(MinhashOptionCreator.keyGroupsOption().create());
        addOption(MinhashOptionCreator.numReducersOption().create());
        addOption(MinhashOptionCreator.debugOutputOption().create());
        addOption(DefaultOptionCreator.overwriteOption().create());
        if (parseArguments(strArr) == null) {
            return -1;
        }
        Path inputPath = getInputPath();
        Path outputPath = getOutputPath();
        if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
            HadoopUtil.delete(getConf(), outputPath);
        }
        runJob(inputPath, outputPath, Integer.valueOf(getOption(MinhashOptionCreator.MIN_CLUSTER_SIZE)).intValue(), Integer.valueOf(getOption(MinhashOptionCreator.MIN_VECTOR_SIZE)).intValue(), getOption(MinhashOptionCreator.HASH_TYPE), Integer.valueOf(getOption(MinhashOptionCreator.NUM_HASH_FUNCTIONS)).intValue(), Integer.valueOf(getOption(MinhashOptionCreator.KEY_GROUPS)).intValue(), Integer.parseInt(getOption(MinhashOptionCreator.NUM_REDUCERS)), hasOption(MinhashOptionCreator.DEBUG_OUTPUT));
        return 0;
    }
}
