/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.clustering.classify;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.classify.ClusterClassificationMapper;
import org.apache.mahout.clustering.classify.ClusterClassifier;
import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
import org.apache.mahout.clustering.iterator.ClusterWritable;
import org.apache.mahout.clustering.iterator.ClusteringPolicy;
import org.apache.mahout.clustering.iterator.DistanceMeasureCluster;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterator;
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;

public final class ClusterClassificationDriver
extends AbstractJob {
    @Override
    public int run(String[] args) throws Exception {
        this.addInputOption();
        this.addOutputOption();
        this.addOption(DefaultOptionCreator.methodOption().create());
        this.addOption(DefaultOptionCreator.clustersInOption().withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.").create());
        if (this.parseArguments(args) == null) {
            return -1;
        }
        Path input = this.getInputPath();
        Path output = this.getOutputPath();
        if (this.getConf() == null) {
            this.setConf(new Configuration());
        }
        Path clustersIn = new Path(this.getOption("clusters"));
        boolean runSequential = this.getOption("method").equalsIgnoreCase("sequential");
        double clusterClassificationThreshold = 0.0;
        if (this.hasOption("outlierThreshold")) {
            clusterClassificationThreshold = Double.parseDouble(this.getOption("outlierThreshold"));
        }
        ClusterClassificationDriver.run(this.getConf(), input, clustersIn, output, clusterClassificationThreshold, true, runSequential);
        return 0;
    }

    private ClusterClassificationDriver() {
    }

    public static void main(String[] args) throws Exception {
        ToolRunner.run(new Configuration(), new ClusterClassificationDriver(), args);
    }

    public static void run(Configuration conf, Path input, Path clusteringOutputPath, Path output, Double clusterClassificationThreshold, boolean emitMostLikely, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        if (runSequential) {
            ClusterClassificationDriver.classifyClusterSeq(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely);
        } else {
            ClusterClassificationDriver.classifyClusterMR(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely);
        }
    }

    private static void classifyClusterSeq(Configuration conf, Path input, Path clusters, Path output, Double clusterClassificationThreshold, boolean emitMostLikely) throws IOException {
        List<Cluster> clusterModels = ClusterClassificationDriver.populateClusterModels(clusters, conf);
        ClusteringPolicy policy = ClusterClassifier.readPolicy(ClusterClassificationDriver.finalClustersPath(conf, clusters));
        ClusterClassifier clusterClassifier = new ClusterClassifier(clusterModels, policy);
        ClusterClassificationDriver.selectCluster(input, clusterModels, clusterClassifier, output, clusterClassificationThreshold, emitMostLikely);
    }

    private static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
        ArrayList<Cluster> clusterModels = Lists.newArrayList();
        Path finalClustersPath = ClusterClassificationDriver.finalClustersPath(conf, clusterOutputPath);
        SequenceFileDirValueIterator it = new SequenceFileDirValueIterator(finalClustersPath, PathType.LIST, PathFilters.partFilter(), null, false, conf);
        while (it.hasNext()) {
            ClusterWritable next = (ClusterWritable)it.next();
            Cluster cluster = next.getValue();
            cluster.configure(conf);
            clusterModels.add(cluster);
        }
        return clusterModels;
    }

    private static Path finalClustersPath(Configuration conf, Path clusterOutputPath) throws IOException {
        FileSystem fileSystem = clusterOutputPath.getFileSystem(conf);
        FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter());
        return clusterFiles[0].getPath();
    }

    private static void selectCluster(Path input, List<Cluster> clusterModels, ClusterClassifier clusterClassifier, Path output, Double clusterClassificationThreshold, boolean emitMostLikely) throws IOException {
        Configuration conf = new Configuration();
        SequenceFile.Writer writer = new SequenceFile.Writer(input.getFileSystem(conf), conf, new Path(output, "part-m-0"), IntWritable.class, WeightedPropertyVectorWritable.class);
        for (Pair vw : new SequenceFileDirIterable(input, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
            Vector pdfPerCluster;
            Class<?> keyClass = ((Writable)vw.getFirst()).getClass();
            Vector vector = ((VectorWritable)vw.getSecond()).get();
            if (!keyClass.equals(NamedVector.class)) {
                if (keyClass.equals(Text.class)) {
                    vector = new NamedVector(vector, ((Writable)vw.getFirst()).toString());
                } else if (keyClass.equals(IntWritable.class)) {
                    vector = new NamedVector(vector, Integer.toString(((IntWritable)vw.getFirst()).get()));
                }
            }
            if (!ClusterClassificationDriver.shouldClassify(pdfPerCluster = clusterClassifier.classify(vector), clusterClassificationThreshold)) continue;
            ClusterClassificationDriver.classifyAndWrite(clusterModels, clusterClassificationThreshold, emitMostLikely, writer, new VectorWritable(vector), pdfPerCluster);
        }
        writer.close();
    }

    private static void classifyAndWrite(List<Cluster> clusterModels, Double clusterClassificationThreshold, boolean emitMostLikely, SequenceFile.Writer writer, VectorWritable vw, Vector pdfPerCluster) throws IOException {
        HashMap<Text, Text> props = Maps.newHashMap();
        if (emitMostLikely) {
            int maxValueIndex = pdfPerCluster.maxValueIndex();
            WeightedPropertyVectorWritable weightedPropertyVectorWritable = new WeightedPropertyVectorWritable(pdfPerCluster.maxValue(), vw.get(), props);
            ClusterClassificationDriver.write(clusterModels, writer, weightedPropertyVectorWritable, maxValueIndex);
        } else {
            ClusterClassificationDriver.writeAllAboveThreshold(clusterModels, clusterClassificationThreshold, writer, vw, pdfPerCluster);
        }
    }

    private static void writeAllAboveThreshold(List<Cluster> clusterModels, Double clusterClassificationThreshold, SequenceFile.Writer writer, VectorWritable vw, Vector pdfPerCluster) throws IOException {
        HashMap<Text, Text> props = Maps.newHashMap();
        for (Vector.Element pdf : pdfPerCluster.nonZeroes()) {
            if (!(pdf.get() >= clusterClassificationThreshold)) continue;
            WeightedPropertyVectorWritable wvw = new WeightedPropertyVectorWritable(pdf.get(), vw.get(), props);
            int clusterIndex = pdf.index();
            ClusterClassificationDriver.write(clusterModels, writer, wvw, clusterIndex);
        }
    }

    private static void write(List<Cluster> clusterModels, SequenceFile.Writer writer, WeightedPropertyVectorWritable weightedPropertyVectorWritable, int maxValueIndex) throws IOException {
        Cluster cluster = clusterModels.get(maxValueIndex);
        DistanceMeasureCluster distanceMeasureCluster = (DistanceMeasureCluster)cluster;
        DistanceMeasure distanceMeasure = distanceMeasureCluster.getMeasure();
        double distance = distanceMeasure.distance(cluster.getCenter(), weightedPropertyVectorWritable.getVector());
        weightedPropertyVectorWritable.getProperties().put(new Text("distance"), new Text(Double.toString(distance)));
        writer.append(new IntWritable(cluster.getId()), weightedPropertyVectorWritable);
    }

    private static boolean shouldClassify(Vector pdfPerCluster, Double clusterClassificationThreshold) {
        return pdfPerCluster.maxValue() >= clusterClassificationThreshold;
    }

    private static void classifyClusterMR(Configuration conf, Path input, Path clustersIn, Path output, Double clusterClassificationThreshold, boolean emitMostLikely) throws IOException, InterruptedException, ClassNotFoundException {
        conf.setFloat("pdf_threshold", clusterClassificationThreshold.floatValue());
        conf.setBoolean("emit_most_likely", emitMostLikely);
        conf.set("clusters_in", clustersIn.toUri().toString());
        Job job = new Job(conf, "Cluster Classification Driver running over input: " + input);
        job.setJarByClass(ClusterClassificationDriver.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setMapperClass(ClusterClassificationMapper.class);
        job.setNumReduceTasks(0);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(WeightedPropertyVectorWritable.class);
        FileInputFormat.addInputPath(job, input);
        FileOutputFormat.setOutputPath(job, output);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Cluster Classification Driver Job failed processing " + input);
        }
    }

    public static void run(Configuration conf, Path input, Path clusteringOutputPath, Path output, double clusterClassificationThreshold, boolean emitMostLikely, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        if (runSequential) {
            ClusterClassificationDriver.classifyClusterSeq(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely);
        } else {
            ClusterClassificationDriver.classifyClusterMR(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely);
        }
    }
}

