/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.vectorizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ToolRunner;
import org.apache.lucene.analysis.Analyzer;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.vectorizer.SimpleTextEncodingVectorizer;
import org.apache.mahout.vectorizer.VectorizerConfig;
import org.apache.mahout.vectorizer.encoders.FeatureVectorEncoder;
import org.apache.mahout.vectorizer.encoders.LuceneTextValueEncoder;

public final class EncodedVectorsFromSequenceFiles
extends AbstractJob {
    public static void main(String[] args) throws Exception {
        ToolRunner.run(new Configuration(), new EncodedVectorsFromSequenceFiles(), args);
    }

    @Override
    public int run(String[] args) throws Exception {
        this.addInputOption();
        this.addOutputOption();
        this.addOption(DefaultOptionCreator.analyzerOption().create());
        this.addOption(EncodedVectorsFromSequenceFiles.buildOption("sequentialAccessVector", "seq", "(Optional) Whether output vectors should be SequentialAccessVectors. If set true else false", false, false, null));
        this.addOption(EncodedVectorsFromSequenceFiles.buildOption("namedVector", "nv", "Create named vectors using the key.  False by default", false, false, null));
        this.addOption("cardinality", "c", "The cardinality to use for creating the vectors.  Default is 5000", "5000");
        this.addOption("encoderFieldName", "en", "The name of the encoder to be passed to the FeatureVectorEncoder constructor. Default is text. Note this is not the class name of a FeatureValueEncoder, but is instead the construction argument.", "text");
        this.addOption("encoderClass", "ec", "The class name of the encoder to be used. Default is " + LuceneTextValueEncoder.class.getName(), LuceneTextValueEncoder.class.getName());
        this.addOption(DefaultOptionCreator.overwriteOption().create());
        if (this.parseArguments(args) == null) {
            return -1;
        }
        Path input = this.getInputPath();
        Path output = this.getOutputPath();
        if (this.hasOption("overwrite")) {
            HadoopUtil.delete(this.getConf(), output);
        }
        Class<? extends Analyzer> analyzerClass = this.getAnalyzerClassFromOption();
        Configuration conf = this.getConf();
        boolean sequentialAccessOutput = this.hasOption("sequentialAccessVector");
        boolean namedVectors = this.hasOption("namedVector");
        int cardinality = 5000;
        if (this.hasOption("cardinality")) {
            cardinality = Integer.parseInt(this.getOption("cardinality"));
        }
        String encoderName = "text";
        if (this.hasOption("encoderFieldName")) {
            encoderName = this.getOption("encoderFieldName");
        }
        String encoderClass = LuceneTextValueEncoder.class.getName();
        if (this.hasOption("encoderClass")) {
            encoderClass = this.getOption("encoderClass");
            ClassUtils.instantiateAs(encoderClass, FeatureVectorEncoder.class, new Class[]{String.class}, new Object[]{encoderName});
        }
        SimpleTextEncodingVectorizer vectorizer = new SimpleTextEncodingVectorizer();
        VectorizerConfig config = new VectorizerConfig(conf, analyzerClass.getName(), encoderClass, encoderName, sequentialAccessOutput, namedVectors, cardinality);
        vectorizer.createVectors(input, output, config);
        return 0;
    }
}

