package org.apache.mahout.clustering.minhash;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.minhash.HashFactory;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.math.VectorWritable;

@Deprecated
/* loaded from: input_file:org/apache/mahout/clustering/minhash/MinHashDriver.class */
public final class MinHashDriver extends AbstractJob {
    public static final String NUM_HASH_FUNCTIONS = "numHashFunctions";
    public static final String KEY_GROUPS = "keyGroups";
    public static final String HASH_TYPE = "hashType";
    public static final String MIN_CLUSTER_SIZE = "minClusterSize";
    public static final String MIN_VECTOR_SIZE = "minVectorSize";
    public static final String NUM_REDUCERS = "numReducers";
    public static final String DEBUG_OUTPUT = "debugOutput";
    public static final String VECTOR_DIMENSION_TO_HASH = "vectorDimensionToHash";
    static final String HASH_DIMENSION_VALUE = "value";

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new MinHashDriver(), strArr);
    }

    public int run(String[] strArr) throws Exception {
        addInputOption();
        addOutputOption();
        addOption(MIN_CLUSTER_SIZE, "mcs", "Minimum points inside a cluster", String.valueOf(10));
        addOption(MIN_VECTOR_SIZE, "mvs", "Minimum size of vector to be hashed", String.valueOf(5));
        addOption(VECTOR_DIMENSION_TO_HASH, "vdh", "Dimension of vector to hash. Available types: (value, index). Defaults to 'value'", "value");
        addOption(HASH_TYPE, "ht", "Type of hash function to use. Available types: (linear, polynomial, murmur) ", HashFactory.HashType.MURMUR.toString());
        addOption(NUM_HASH_FUNCTIONS, "nh", "Number of hash functions to be used", String.valueOf(10));
        addOption(KEY_GROUPS, "kg", "Number of key groups to be used", String.valueOf(2));
        addOption(NUM_REDUCERS, "nr", "The number of reduce tasks. Defaults to 2", String.valueOf(2));
        addFlag(DEBUG_OUTPUT, "debug", "Output the whole vectors for debugging");
        addOption(DefaultOptionCreator.overwriteOption().create());
        if (parseArguments(strArr) == null) {
            return -1;
        }
        if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
            HadoopUtil.delete(getConf(), getOutputPath());
        }
        int intValue = Integer.valueOf(getOption(MIN_CLUSTER_SIZE)).intValue();
        int intValue2 = Integer.valueOf(getOption(MIN_VECTOR_SIZE)).intValue();
        String option = getOption(VECTOR_DIMENSION_TO_HASH);
        String option2 = getOption(HASH_TYPE);
        int intValue3 = Integer.valueOf(getOption(NUM_HASH_FUNCTIONS)).intValue();
        int intValue4 = Integer.valueOf(getOption(KEY_GROUPS)).intValue();
        int parseInt = Integer.parseInt(getOption(NUM_REDUCERS));
        boolean hasOption = hasOption(DEBUG_OUTPUT);
        try {
            HashFactory.HashType.valueOf(option2);
            Job prepareJob = prepareJob(getInputPath(), getOutputPath(), SequenceFileInputFormat.class, MinHashMapper.class, Text.class, hasOption ? VectorWritable.class : Text.class, MinHashReducer.class, Text.class, VectorWritable.class, hasOption ? SequenceFileOutputFormat.class : TextOutputFormat.class);
            Configuration configuration = prepareJob.getConfiguration();
            configuration.setInt(MIN_CLUSTER_SIZE, intValue);
            configuration.setInt(MIN_VECTOR_SIZE, intValue2);
            configuration.set(VECTOR_DIMENSION_TO_HASH, option);
            configuration.set(HASH_TYPE, option2);
            configuration.setInt(NUM_HASH_FUNCTIONS, intValue3);
            configuration.setInt(KEY_GROUPS, intValue4);
            configuration.setBoolean(DEBUG_OUTPUT, hasOption);
            prepareJob.setNumReduceTasks(parseInt);
            return !prepareJob.waitForCompletion(true) ? -1 : 0;
        } catch (IllegalArgumentException e) {
            System.err.println("Unknown hashType: " + option2);
            return -1;
        }
    }
}
