/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.clustering.classify;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.clustering.ClusteringTestUtils;
import org.apache.mahout.clustering.canopy.CanopyDriver;
import org.apache.mahout.clustering.classify.ClusterClassificationDriver;
import org.apache.mahout.clustering.classify.ClusterClassifier;
import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
import org.apache.mahout.clustering.iterator.CanopyClusteringPolicy;
import org.apache.mahout.clustering.iterator.ClusteringPolicy;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

public class ClusterClassificationDriverTest
extends MahoutTestCase {
    private static final double[][] REFERENCE = new double[][]{{1.0, 1.0}, {2.0, 1.0}, {1.0, 2.0}, {4.0, 4.0}, {5.0, 4.0}, {4.0, 5.0}, {5.0, 5.0}, {9.0, 9.0}, {8.0, 8.0}};
    private FileSystem fs;
    private Path clusteringOutputPath;
    private Configuration conf;
    private Path pointsPath;
    private Path classifiedOutputPath;
    private List<Vector> firstCluster;
    private List<Vector> secondCluster;
    private List<Vector> thirdCluster;

    @Override
    @Before
    public void setUp() throws Exception {
        super.setUp();
        Configuration conf = this.getConfiguration();
        this.fs = FileSystem.get((Configuration)conf);
        this.firstCluster = Lists.newArrayList();
        this.secondCluster = Lists.newArrayList();
        this.thirdCluster = Lists.newArrayList();
    }

    private static List<VectorWritable> getPointsWritable(double[][] raw) {
        ArrayList points = Lists.newArrayList();
        for (double[] fr : raw) {
            RandomAccessSparseVector vec = new RandomAccessSparseVector(fr.length);
            vec.assign(fr);
            points.add(new VectorWritable((Vector)vec));
        }
        return points;
    }

    @Test
    public void testVectorClassificationWithOutlierRemovalMR() throws Exception {
        List<VectorWritable> points = ClusterClassificationDriverTest.getPointsWritable(REFERENCE);
        this.pointsPath = this.getTestTempDirPath("points");
        this.clusteringOutputPath = this.getTestTempDirPath("output");
        this.classifiedOutputPath = this.getTestTempDirPath("classifiedClusters");
        HadoopUtil.delete((Configuration)this.conf, (Path[])new Path[]{this.classifiedOutputPath});
        this.conf = this.getConfiguration();
        ClusteringTestUtils.writePointsToFile(points, true, new Path(this.pointsPath, "file1"), this.fs, this.conf);
        this.runClustering(this.pointsPath, this.conf, false);
        this.runClassificationWithOutlierRemoval(false);
        this.collectVectorsForAssertion();
        this.assertVectorsWithOutlierRemoval();
    }

    @Test
    public void testVectorClassificationWithoutOutlierRemoval() throws Exception {
        List<VectorWritable> points = ClusterClassificationDriverTest.getPointsWritable(REFERENCE);
        this.pointsPath = this.getTestTempDirPath("points");
        this.clusteringOutputPath = this.getTestTempDirPath("output");
        this.classifiedOutputPath = this.getTestTempDirPath("classify");
        this.conf = this.getConfiguration();
        ClusteringTestUtils.writePointsToFile(points, new Path(this.pointsPath, "file1"), this.fs, this.conf);
        this.runClustering(this.pointsPath, this.conf, true);
        this.runClassificationWithoutOutlierRemoval();
        this.collectVectorsForAssertion();
        this.assertVectorsWithoutOutlierRemoval();
    }

    @Test
    public void testVectorClassificationWithOutlierRemoval() throws Exception {
        List<VectorWritable> points = ClusterClassificationDriverTest.getPointsWritable(REFERENCE);
        this.pointsPath = this.getTestTempDirPath("points");
        this.clusteringOutputPath = this.getTestTempDirPath("output");
        this.classifiedOutputPath = this.getTestTempDirPath("classify");
        this.conf = this.getConfiguration();
        ClusteringTestUtils.writePointsToFile(points, new Path(this.pointsPath, "file1"), this.fs, this.conf);
        this.runClustering(this.pointsPath, this.conf, true);
        this.runClassificationWithOutlierRemoval(true);
        this.collectVectorsForAssertion();
        this.assertVectorsWithOutlierRemoval();
    }

    private void runClustering(Path pointsPath, Configuration conf, Boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        CanopyDriver.run((Configuration)conf, (Path)pointsPath, (Path)this.clusteringOutputPath, (DistanceMeasure)new ManhattanDistanceMeasure(), (double)3.1, (double)2.1, (boolean)false, (double)0.0, (boolean)runSequential);
        Path finalClustersPath = new Path(this.clusteringOutputPath, "clusters-0-final");
        ClusterClassifier.writePolicy((ClusteringPolicy)new CanopyClusteringPolicy(), (Path)finalClustersPath);
    }

    private void runClassificationWithoutOutlierRemoval() throws IOException, InterruptedException, ClassNotFoundException {
        ClusterClassificationDriver.run((Configuration)this.getConfiguration(), (Path)this.pointsPath, (Path)this.clusteringOutputPath, (Path)this.classifiedOutputPath, (double)0.0, (boolean)true, (boolean)true);
    }

    private void runClassificationWithOutlierRemoval(boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException {
        ClusterClassificationDriver.run((Configuration)this.getConfiguration(), (Path)this.pointsPath, (Path)this.clusteringOutputPath, (Path)this.classifiedOutputPath, (double)0.73, (boolean)true, (boolean)runSequential);
    }

    private void collectVectorsForAssertion() throws IOException {
        FileStatus[] listStatus;
        Path[] partFilePaths = FileUtil.stat2Paths((FileStatus[])this.fs.globStatus(this.classifiedOutputPath));
        for (FileStatus partFile : listStatus = this.fs.listStatus(partFilePaths, PathFilters.partFilter())) {
            SequenceFile.Reader classifiedVectors = new SequenceFile.Reader(this.fs, partFile.getPath(), this.conf);
            IntWritable clusterIdAsKey = new IntWritable();
            WeightedPropertyVectorWritable point = new WeightedPropertyVectorWritable();
            while (classifiedVectors.next((Writable)clusterIdAsKey, (Writable)point)) {
                this.collectVector(clusterIdAsKey.toString(), point.getVector());
            }
        }
    }

    private void collectVector(String clusterId, Vector vector) {
        if ("0".equals(clusterId)) {
            this.firstCluster.add(vector);
        } else if ("1".equals(clusterId)) {
            this.secondCluster.add(vector);
        } else if ("2".equals(clusterId)) {
            this.thirdCluster.add(vector);
        }
    }

    private void assertVectorsWithOutlierRemoval() {
        this.checkClustersWithOutlierRemoval();
    }

    private void assertVectorsWithoutOutlierRemoval() {
        this.assertFirstClusterWithoutOutlierRemoval();
        this.assertSecondClusterWithoutOutlierRemoval();
        this.assertThirdClusterWithoutOutlierRemoval();
    }

    private void assertThirdClusterWithoutOutlierRemoval() {
        Assert.assertEquals((long)2L, (long)this.thirdCluster.size());
        for (Vector vector : this.thirdCluster) {
            Assert.assertTrue((boolean)ArrayUtils.contains((Object[])new String[]{"{0:9.0,1:9.0}", "{0:8.0,1:8.0}"}, (Object)vector.asFormatString()));
        }
    }

    private void assertSecondClusterWithoutOutlierRemoval() {
        Assert.assertEquals((long)4L, (long)this.secondCluster.size());
        for (Vector vector : this.secondCluster) {
            Assert.assertTrue((boolean)ArrayUtils.contains((Object[])new String[]{"{0:4.0,1:4.0}", "{0:5.0,1:4.0}", "{0:4.0,1:5.0}", "{0:5.0,1:5.0}"}, (Object)vector.asFormatString()));
        }
    }

    private void assertFirstClusterWithoutOutlierRemoval() {
        Assert.assertEquals((long)3L, (long)this.firstCluster.size());
        for (Vector vector : this.firstCluster) {
            Assert.assertTrue((boolean)ArrayUtils.contains((Object[])new String[]{"{0:1.0,1:1.0}", "{0:2.0,1:1.0}", "{0:1.0,1:2.0}"}, (Object)vector.asFormatString()));
        }
    }

    private void checkClustersWithOutlierRemoval() {
        HashSet reference = Sets.newHashSet((Object[])new String[]{"{0:9.0,1:9.0}", "{0:1.0,1:1.0}"});
        ArrayList clusters = Lists.newArrayList();
        clusters.add(this.firstCluster);
        clusters.add(this.secondCluster);
        clusters.add(this.thirdCluster);
        int singletonCnt = 0;
        int emptyCnt = 0;
        for (List vList : clusters) {
            if (vList.isEmpty()) {
                ++emptyCnt;
                continue;
            }
            ++singletonCnt;
            ClusterClassificationDriverTest.assertEquals((String)("expecting only singleton clusters; got size=" + vList.size()), (long)1L, (long)vList.size());
            if (((Vector)vList.get(0)).getClass().equals(NamedVector.class)) {
                Assert.assertTrue((String)("not expecting cluster:" + ((NamedVector)vList.get(0)).getDelegate().asFormatString()), (boolean)reference.contains(((NamedVector)vList.get(0)).getDelegate().asFormatString()));
                reference.remove(((NamedVector)vList.get(0)).getDelegate().asFormatString());
                continue;
            }
            if (!((Vector)vList.get(0)).getClass().equals(RandomAccessSparseVector.class)) continue;
            Assert.assertTrue((String)("not expecting cluster:" + ((Vector)vList.get(0)).asFormatString()), (boolean)reference.contains(((Vector)vList.get(0)).asFormatString()));
            reference.remove(((Vector)vList.get(0)).asFormatString());
        }
        Assert.assertEquals((String)"Different number of empty clusters than expected!", (long)1L, (long)emptyCnt);
        Assert.assertEquals((String)"Different number of singletons than expected!", (long)2L, (long)singletonCnt);
        Assert.assertEquals((String)"Didn't match all reference clusters!", (long)0L, (long)reference.size());
    }
}

