/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.clustering.kmeans;

import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import java.io.Closeable;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.ClusteringTestUtils;
import org.apache.mahout.clustering.canopy.CanopyDriver;
import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
import org.apache.mahout.clustering.iterator.ClusterWritable;
import org.apache.mahout.clustering.kmeans.KMeansDriver;
import org.apache.mahout.clustering.kmeans.Kluster;
import org.apache.mahout.common.DummyOutputCollector;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.SequentialAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.junit.Before;
import org.junit.Test;

public final class TestKmeansClustering
extends MahoutTestCase {
    public static final double[][] REFERENCE = new double[][]{{1.0, 1.0}, {2.0, 1.0}, {1.0, 2.0}, {2.0, 2.0}, {3.0, 3.0}, {4.0, 4.0}, {5.0, 4.0}, {4.0, 5.0}, {5.0, 5.0}};
    private static final int[][] EXPECTED_NUM_POINTS = new int[][]{{9}, {4, 5}, {4, 4, 1}, {1, 2, 1, 5}, {1, 1, 1, 2, 4}, {1, 1, 1, 1, 1, 4}, {1, 1, 1, 1, 1, 2, 2}, {1, 1, 1, 1, 1, 1, 2, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}};
    private FileSystem fs;

    @Override
    @Before
    public void setUp() throws Exception {
        super.setUp();
        Configuration conf = this.getConfiguration();
        this.fs = FileSystem.get((Configuration)conf);
    }

    public static List<VectorWritable> getPointsWritable(double[][] raw) {
        ArrayList points = Lists.newArrayList();
        for (double[] fr : raw) {
            RandomAccessSparseVector vec = new RandomAccessSparseVector(fr.length);
            vec.assign(fr);
            points.add(new VectorWritable((Vector)vec));
        }
        return points;
    }

    public static List<VectorWritable> getPointsWritableDenseVector(double[][] raw) {
        ArrayList points = Lists.newArrayList();
        for (double[] fr : raw) {
            DenseVector vec = new DenseVector(fr.length);
            vec.assign(fr);
            points.add(new VectorWritable((Vector)vec));
        }
        return points;
    }

    public static List<Vector> getPoints(double[][] raw) {
        ArrayList points = Lists.newArrayList();
        for (double[] fr : raw) {
            SequentialAccessSparseVector vec = new SequentialAccessSparseVector(fr.length);
            vec.assign(fr);
            points.add(vec);
        }
        return points;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Test
    public void testKMeansSeqJob() throws Exception {
        EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
        List<VectorWritable> points = TestKmeansClustering.getPointsWritable(REFERENCE);
        Path pointsPath = this.getTestTempDirPath("points");
        Path clustersPath = this.getTestTempDirPath("clusters");
        Configuration conf = this.getConfiguration();
        ClusteringTestUtils.writePointsToFile(points, true, new Path(pointsPath, "file1"), this.fs, conf);
        ClusteringTestUtils.writePointsToFile(points, true, new Path(pointsPath, "file2"), this.fs, conf);
        for (int k = 1; k < points.size(); ++k) {
            System.out.println("testKMeansMRJob k= " + k);
            Path path = new Path(clustersPath, "part-00000");
            FileSystem fs = FileSystem.get((URI)path.toUri(), (Configuration)conf);
            SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, Kluster.class);
            try {
                for (int i = 0; i < k + 1; ++i) {
                    Vector vec = points.get(i).get();
                    Kluster cluster = new Kluster(vec, i, (DistanceMeasure)measure);
                    cluster.observe(cluster.getCenter(), 1.0);
                    writer.append((Writable)new Text(cluster.getIdentifier()), (Writable)cluster);
                }
            }
            finally {
                Closeables.close((Closeable)writer, (boolean)false);
            }
            Path outputPath = this.getTestTempDirPath("output" + k);
            String[] args = new String[]{TestKmeansClustering.optKey("input"), pointsPath.toString(), TestKmeansClustering.optKey("clusters"), clustersPath.toString(), TestKmeansClustering.optKey("output"), outputPath.toString(), TestKmeansClustering.optKey("distanceMeasure"), EuclideanDistanceMeasure.class.getName(), TestKmeansClustering.optKey("convergenceDelta"), "0.001", TestKmeansClustering.optKey("maxIter"), "2", TestKmeansClustering.optKey("clustering"), TestKmeansClustering.optKey("overwrite"), TestKmeansClustering.optKey("method"), "sequential"};
            ToolRunner.run((Configuration)conf, (Tool)new KMeansDriver(), (String[])args);
            Path clusteredPointsPath = new Path(outputPath, "clusteredPoints");
            int[] expect = EXPECTED_NUM_POINTS[k];
            DummyOutputCollector<WritableComparable, Writable> collector = new DummyOutputCollector<WritableComparable, Writable>();
            for (Pair record : new SequenceFileIterable(new Path(clusteredPointsPath, "part-m-0"), conf)) {
                collector.collect((WritableComparable)record.getFirst(), (Writable)record.getSecond());
            }
            TestKmeansClustering.assertEquals((String)("clusters[" + k + ']'), (long)expect.length, (long)collector.getKeys().size());
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Test
    public void testKMeansSeqJobDenseVector() throws Exception {
        EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
        List<VectorWritable> points = TestKmeansClustering.getPointsWritableDenseVector(REFERENCE);
        Path pointsPath = this.getTestTempDirPath("points");
        Path clustersPath = this.getTestTempDirPath("clusters");
        Configuration conf = this.getConfiguration();
        ClusteringTestUtils.writePointsToFile(points, true, new Path(pointsPath, "file1"), this.fs, conf);
        ClusteringTestUtils.writePointsToFile(points, true, new Path(pointsPath, "file2"), this.fs, conf);
        for (int k = 1; k < points.size(); ++k) {
            System.out.println("testKMeansMRJob k= " + k);
            Path path = new Path(clustersPath, "part-00000");
            FileSystem fs = FileSystem.get((URI)path.toUri(), (Configuration)conf);
            SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, Kluster.class);
            try {
                for (int i = 0; i < k + 1; ++i) {
                    Vector vec = points.get(i).get();
                    Kluster cluster = new Kluster(vec, i, (DistanceMeasure)measure);
                    cluster.observe(cluster.getCenter(), 1.0);
                    writer.append((Writable)new Text(cluster.getIdentifier()), (Writable)cluster);
                }
            }
            finally {
                Closeables.close((Closeable)writer, (boolean)false);
            }
            Path outputPath = this.getTestTempDirPath("output" + k);
            String[] args = new String[]{TestKmeansClustering.optKey("input"), pointsPath.toString(), TestKmeansClustering.optKey("clusters"), clustersPath.toString(), TestKmeansClustering.optKey("output"), outputPath.toString(), TestKmeansClustering.optKey("distanceMeasure"), EuclideanDistanceMeasure.class.getName(), TestKmeansClustering.optKey("convergenceDelta"), "0.001", TestKmeansClustering.optKey("maxIter"), "2", TestKmeansClustering.optKey("clustering"), TestKmeansClustering.optKey("overwrite"), TestKmeansClustering.optKey("method"), "sequential"};
            ToolRunner.run((Configuration)conf, (Tool)new KMeansDriver(), (String[])args);
            Path clusteredPointsPath = new Path(outputPath, "clusteredPoints");
            int[] expect = EXPECTED_NUM_POINTS[k];
            DummyOutputCollector<WritableComparable, Writable> collector = new DummyOutputCollector<WritableComparable, Writable>();
            for (Pair record : new SequenceFileIterable(new Path(clusteredPointsPath, "part-m-0"), conf)) {
                collector.collect((WritableComparable)record.getFirst(), (Writable)record.getSecond());
            }
            TestKmeansClustering.assertEquals((String)("clusters[" + k + ']'), (long)expect.length, (long)collector.getKeys().size());
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Test
    public void testKMeansMRJob() throws Exception {
        EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
        List<VectorWritable> points = TestKmeansClustering.getPointsWritable(REFERENCE);
        Path pointsPath = this.getTestTempDirPath("points");
        Path clustersPath = this.getTestTempDirPath("clusters");
        Configuration conf = this.getConfiguration();
        ClusteringTestUtils.writePointsToFile(points, true, new Path(pointsPath, "file1"), this.fs, conf);
        ClusteringTestUtils.writePointsToFile(points, true, new Path(pointsPath, "file2"), this.fs, conf);
        for (int k = 1; k < points.size(); k += 3) {
            System.out.println("testKMeansMRJob k= " + k);
            Path path = new Path(clustersPath, "part-00000");
            FileSystem fs = FileSystem.get((URI)path.toUri(), (Configuration)conf);
            SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, Kluster.class);
            try {
                for (int i = 0; i < k + 1; ++i) {
                    Vector vec = points.get(i).get();
                    Kluster cluster = new Kluster(vec, i, (DistanceMeasure)measure);
                    cluster.observe(cluster.getCenter(), 1.0);
                    writer.append((Writable)new Text(cluster.getIdentifier()), (Writable)cluster);
                }
            }
            finally {
                Closeables.close((Closeable)writer, (boolean)false);
            }
            Path outputPath = this.getTestTempDirPath("output" + k);
            String[] args = new String[]{TestKmeansClustering.optKey("input"), pointsPath.toString(), TestKmeansClustering.optKey("clusters"), clustersPath.toString(), TestKmeansClustering.optKey("output"), outputPath.toString(), TestKmeansClustering.optKey("distanceMeasure"), EuclideanDistanceMeasure.class.getName(), TestKmeansClustering.optKey("convergenceDelta"), "0.001", TestKmeansClustering.optKey("maxIter"), "2", TestKmeansClustering.optKey("clustering"), TestKmeansClustering.optKey("overwrite")};
            ToolRunner.run((Configuration)this.getConfiguration(), (Tool)new KMeansDriver(), (String[])args);
            Path clusteredPointsPath = new Path(outputPath, "clusteredPoints");
            int[] expect = EXPECTED_NUM_POINTS[k];
            DummyOutputCollector<WritableComparable, Writable> collector = new DummyOutputCollector<WritableComparable, Writable>();
            for (Pair record : new SequenceFileIterable(new Path(clusteredPointsPath, "part-m-00000"), conf)) {
                collector.collect((WritableComparable)record.getFirst(), (Writable)record.getSecond());
            }
            TestKmeansClustering.assertEquals((String)("clusters[" + k + ']'), (long)expect.length, (long)collector.getKeys().size());
        }
    }

    @Test
    public void testKMeansWithCanopyClusterInput() throws Exception {
        FileStatus[] outParts;
        List<VectorWritable> points = TestKmeansClustering.getPointsWritable(REFERENCE);
        Path pointsPath = this.getTestTempDirPath("points");
        Configuration conf = this.getConfiguration();
        ClusteringTestUtils.writePointsToFile(points, true, new Path(pointsPath, "file1"), this.fs, conf);
        ClusteringTestUtils.writePointsToFile(points, true, new Path(pointsPath, "file2"), this.fs, conf);
        Path outputPath = this.getTestTempDirPath("output");
        CanopyDriver.run((Configuration)conf, (Path)pointsPath, (Path)outputPath, (DistanceMeasure)new ManhattanDistanceMeasure(), (double)3.1, (double)2.1, (boolean)false, (double)0.0, (boolean)false);
        DummyOutputCollector<Object, Writable> collector1 = new DummyOutputCollector<Object, Writable>();
        for (FileStatus outPartStat : outParts = FileSystem.get((Configuration)conf).globStatus(new Path(outputPath, "clusters-0-final/*-0*"))) {
            for (Pair record : new SequenceFileIterable(outPartStat.getPath(), conf)) {
                collector1.collect((Object)((WritableComparable)record.getFirst()), (Writable)record.getSecond());
            }
        }
        boolean got15 = false;
        boolean got43 = false;
        int count = 0;
        for (Text k : collector1.getKeys()) {
            ++count;
            List vl = collector1.getValue(k);
            TestKmeansClustering.assertEquals((String)"non-singleton centroid!", (long)1L, (long)vl.size());
            ClusterWritable clusterWritable = (ClusterWritable)vl.get(0);
            Vector v = clusterWritable.getValue().getCenter();
            TestKmeansClustering.assertEquals((String)"cetriod vector is wrong length", (long)2L, (long)v.size());
            if (Math.abs(v.get(0) - 1.5) < 1.0E-6 && Math.abs(v.get(1) - 1.5) < 1.0E-6 && !got15) {
                got15 = true;
                continue;
            }
            if (Math.abs(v.get(0) - 4.333333333333334) < 1.0E-6 && Math.abs(v.get(1) - 4.333333333333334) < 1.0E-6 && !got43) {
                got43 = true;
                continue;
            }
            TestKmeansClustering.fail((String)("got unexpected center: " + v + " [" + v.getClass().toString() + ']'));
        }
        TestKmeansClustering.assertEquals((String)"got unexpected number of centers", (long)2L, (long)count);
        Path kmeansOutput = new Path(outputPath, "kmeans");
        KMeansDriver.run((Configuration)this.getConfiguration(), (Path)pointsPath, (Path)new Path(outputPath, "clusters-0-final"), (Path)kmeansOutput, (double)0.001, (int)10, (boolean)true, (double)0.0, (boolean)false);
        Path clusteredPointsPath = new Path(kmeansOutput, "clusteredPoints");
        DummyOutputCollector<Object, Writable> collector = new DummyOutputCollector<Object, Writable>();
        for (Pair record : new SequenceFileIterable(new Path(clusteredPointsPath, "part-m-00000"), conf)) {
            collector.collect((Object)((WritableComparable)record.getFirst()), (Writable)record.getSecond());
        }
        for (IntWritable k : collector.getKeys()) {
            int idx;
            Vector v;
            List wpvList = collector.getValue(k);
            TestKmeansClustering.assertTrue((String)"empty cluster!", (!wpvList.isEmpty() ? 1 : 0) != 0);
            if (((WeightedPropertyVectorWritable)wpvList.get(0)).getVector().get(0) <= 2.0) {
                for (WeightedPropertyVectorWritable wv : wpvList) {
                    v = wv.getVector();
                    TestKmeansClustering.assertTrue((String)"bad cluster!", (v.get(idx = v.maxValueIndex()) <= 2.0 ? 1 : 0) != 0);
                }
                TestKmeansClustering.assertEquals((String)"Wrong size cluster", (long)4L, (long)wpvList.size());
                continue;
            }
            for (WeightedPropertyVectorWritable wv : wpvList) {
                v = wv.getVector();
                TestKmeansClustering.assertTrue((String)"bad cluster!", (v.get(idx = v.minValueIndex()) > 2.0 ? 1 : 0) != 0);
            }
            TestKmeansClustering.assertEquals((String)"Wrong size cluster", (long)5L, (long)wpvList.size());
        }
    }
}

