/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.classifier.naivebayes.training;

import com.google.common.base.Splitter;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.classifier.naivebayes.BayesUtils;
import org.apache.mahout.classifier.naivebayes.NaiveBayesModel;
import org.apache.mahout.classifier.naivebayes.training.IndexInstancesMapper;
import org.apache.mahout.classifier.naivebayes.training.ThetaMapper;
import org.apache.mahout.classifier.naivebayes.training.WeightsMapper;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
import org.apache.mahout.common.mapreduce.VectorSumReducer;
import org.apache.mahout.math.VectorWritable;

public final class TrainNaiveBayesJob
extends AbstractJob {
    private static final String TRAIN_COMPLEMENTARY = "trainComplementary";
    private static final String ALPHA_I = "alphaI";
    private static final String LABEL_INDEX = "labelIndex";
    private static final String EXTRACT_LABELS = "extractLabels";
    private static final String LABELS = "labels";
    public static final String WEIGHTS_PER_FEATURE = "__SPF";
    public static final String WEIGHTS_PER_LABEL = "__SPL";
    public static final String LABEL_THETA_NORMALIZER = "_LTN";
    public static final String SUMMED_OBSERVATIONS = "summedObservations";
    public static final String WEIGHTS = "weights";
    public static final String THETAS = "thetas";

    public static void main(String[] args) throws Exception {
        ToolRunner.run(new Configuration(), new TrainNaiveBayesJob(), args);
    }

    @Override
    public int run(String[] args) throws Exception {
        String labPathStr;
        this.addInputOption();
        this.addOutputOption();
        this.addOption(LABELS, "l", "comma-separated list of labels to include in training", false);
        this.addOption(TrainNaiveBayesJob.buildOption(EXTRACT_LABELS, "el", "Extract the labels from the input", false, false, ""));
        this.addOption(ALPHA_I, "a", "smoothing parameter", String.valueOf(1.0f));
        this.addOption(TrainNaiveBayesJob.buildOption(TRAIN_COMPLEMENTARY, "c", "train complementary?", false, false, String.valueOf(false)));
        this.addOption(LABEL_INDEX, "li", "The path to store the label index in", false);
        this.addOption(DefaultOptionCreator.overwriteOption().create());
        Map<String, List<String>> parsedArgs = this.parseArguments(args);
        if (parsedArgs == null) {
            return -1;
        }
        if (this.hasOption("overwrite")) {
            HadoopUtil.delete(this.getConf(), this.getOutputPath());
            HadoopUtil.delete(this.getConf(), this.getTempPath());
        }
        Path labPath = (labPathStr = this.getOption(LABEL_INDEX)) != null ? new Path(labPathStr) : this.getTempPath(LABEL_INDEX);
        long labelSize = this.createLabelIndex(labPath);
        float alphaI = Float.parseFloat(this.getOption(ALPHA_I));
        boolean trainComplementary = this.hasOption(TRAIN_COMPLEMENTARY);
        HadoopUtil.setSerializations(this.getConf());
        HadoopUtil.cacheFiles(labPath, this.getConf());
        Job indexInstances = this.prepareJob(this.getInputPath(), this.getTempPath(SUMMED_OBSERVATIONS), SequenceFileInputFormat.class, IndexInstancesMapper.class, IntWritable.class, VectorWritable.class, VectorSumReducer.class, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);
        indexInstances.setCombinerClass(VectorSumReducer.class);
        boolean succeeded = indexInstances.waitForCompletion(true);
        if (!succeeded) {
            return -1;
        }
        Job weightSummer = this.prepareJob(this.getTempPath(SUMMED_OBSERVATIONS), this.getTempPath(WEIGHTS), SequenceFileInputFormat.class, WeightsMapper.class, Text.class, VectorWritable.class, VectorSumReducer.class, Text.class, VectorWritable.class, SequenceFileOutputFormat.class);
        weightSummer.getConfiguration().set(WeightsMapper.NUM_LABELS, String.valueOf(labelSize));
        weightSummer.setCombinerClass(VectorSumReducer.class);
        succeeded = weightSummer.waitForCompletion(true);
        if (!succeeded) {
            return -1;
        }
        HadoopUtil.cacheFiles(this.getTempPath(WEIGHTS), this.getConf());
        Job thetaSummer = this.prepareJob(this.getTempPath(SUMMED_OBSERVATIONS), this.getTempPath(THETAS), SequenceFileInputFormat.class, ThetaMapper.class, Text.class, VectorWritable.class, VectorSumReducer.class, Text.class, VectorWritable.class, SequenceFileOutputFormat.class);
        thetaSummer.setCombinerClass(VectorSumReducer.class);
        thetaSummer.getConfiguration().setFloat(ThetaMapper.ALPHA_I, alphaI);
        thetaSummer.getConfiguration().setBoolean(ThetaMapper.TRAIN_COMPLEMENTARY, trainComplementary);
        this.getConf().setFloat(ThetaMapper.ALPHA_I, alphaI);
        NaiveBayesModel naiveBayesModel = BayesUtils.readModelFromDir(this.getTempPath(), this.getConf());
        naiveBayesModel.validate();
        naiveBayesModel.serialize(this.getOutputPath(), this.getConf());
        return 0;
    }

    private long createLabelIndex(Path labPath) throws IOException {
        long labelSize = 0L;
        if (this.hasOption(LABELS)) {
            Iterable<String> labels = Splitter.on(",").split(this.getOption(LABELS));
            labelSize = BayesUtils.writeLabelIndex(this.getConf(), labels, labPath);
        } else if (this.hasOption(EXTRACT_LABELS)) {
            SequenceFileDirIterable<Text, IntWritable> iterable = new SequenceFileDirIterable<Text, IntWritable>(this.getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(), this.getConf());
            labelSize = BayesUtils.writeLabelIndex(this.getConf(), labPath, iterable);
        }
        return labelSize;
    }
}

