/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.clustering.canopy;

import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import java.io.Closeable;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.ClusteringTestUtils;
import org.apache.mahout.clustering.canopy.Canopy;
import org.apache.mahout.clustering.canopy.CanopyClusterer;
import org.apache.mahout.clustering.canopy.CanopyDriver;
import org.apache.mahout.clustering.canopy.CanopyMapper;
import org.apache.mahout.clustering.canopy.CanopyReducer;
import org.apache.mahout.clustering.iterator.ClusterWritable;
import org.apache.mahout.common.DummyRecordWriter;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.junit.Before;
import org.junit.Test;

@Deprecated
public final class TestCanopyCreation
extends MahoutTestCase {
    private static final double[][] RAW = new double[][]{{1.0, 1.0}, {2.0, 1.0}, {1.0, 2.0}, {2.0, 2.0}, {3.0, 3.0}, {4.0, 4.0}, {5.0, 4.0}, {4.0, 5.0}, {5.0, 5.0}};
    private List<Canopy> referenceManhattan;
    private final DistanceMeasure manhattanDistanceMeasure = new ManhattanDistanceMeasure();
    private List<Vector> manhattanCentroids;
    private List<Canopy> referenceEuclidean;
    private final DistanceMeasure euclideanDistanceMeasure = new EuclideanDistanceMeasure();
    private List<Vector> euclideanCentroids;
    private FileSystem fs;

    private static List<VectorWritable> getPointsWritable() {
        ArrayList points = Lists.newArrayList();
        for (double[] fr : RAW) {
            RandomAccessSparseVector vec = new RandomAccessSparseVector(fr.length);
            vec.assign(fr);
            points.add(new VectorWritable((Vector)vec));
        }
        return points;
    }

    private static List<Vector> getPoints() {
        ArrayList points = Lists.newArrayList();
        for (double[] fr : RAW) {
            RandomAccessSparseVector vec = new RandomAccessSparseVector(fr.length);
            vec.assign(fr);
            points.add(vec);
        }
        return points;
    }

    private static void printCanopies(Iterable<Canopy> canopies) {
        for (Canopy canopy : canopies) {
            System.out.println(canopy.asFormatString(null));
        }
    }

    @Override
    @Before
    public void setUp() throws Exception {
        super.setUp();
        this.fs = FileSystem.get((Configuration)this.getConfiguration());
        this.referenceManhattan = CanopyClusterer.createCanopies(TestCanopyCreation.getPoints(), (DistanceMeasure)this.manhattanDistanceMeasure, (double)3.1, (double)2.1);
        this.manhattanCentroids = CanopyClusterer.getCenters(this.referenceManhattan);
        this.referenceEuclidean = CanopyClusterer.createCanopies(TestCanopyCreation.getPoints(), (DistanceMeasure)this.euclideanDistanceMeasure, (double)3.1, (double)2.1);
        this.euclideanCentroids = CanopyClusterer.getCenters(this.referenceEuclidean);
    }

    @Test
    public void testReferenceManhattan() throws Exception {
        TestCanopyCreation.printCanopies(this.referenceManhattan);
        TestCanopyCreation.assertEquals((String)"number of canopies", (long)3L, (long)this.referenceManhattan.size());
        for (int canopyIx = 0; canopyIx < this.referenceManhattan.size(); ++canopyIx) {
            Canopy testCanopy = this.referenceManhattan.get(canopyIx);
            int[] expectedNumPoints = new int[]{4, 4, 3};
            double[][] expectedCentroids = new double[][]{{1.5, 1.5}, {4.0, 4.0}, {4.666666666666667, 4.666666666666667}};
            TestCanopyCreation.assertEquals((String)("canopy points " + canopyIx), (long)testCanopy.getNumObservations(), (long)expectedNumPoints[canopyIx]);
            double[] refCentroid = expectedCentroids[canopyIx];
            Vector testCentroid = testCanopy.computeCentroid();
            for (int pointIx = 0; pointIx < refCentroid.length; ++pointIx) {
                TestCanopyCreation.assertEquals((String)("canopy centroid " + canopyIx + '[' + pointIx + ']'), (double)refCentroid[pointIx], (double)testCentroid.get(pointIx), (double)1.0E-6);
            }
        }
    }

    @Test
    public void testReferenceEuclidean() throws Exception {
        TestCanopyCreation.printCanopies(this.referenceEuclidean);
        TestCanopyCreation.assertEquals((String)"number of canopies", (long)3L, (long)this.referenceEuclidean.size());
        int[] expectedNumPoints = new int[]{5, 5, 3};
        double[][] expectedCentroids = new double[][]{{1.8, 1.8}, {4.2, 4.2}, {4.666666666666667, 4.666666666666667}};
        for (int canopyIx = 0; canopyIx < this.referenceEuclidean.size(); ++canopyIx) {
            Canopy testCanopy = this.referenceEuclidean.get(canopyIx);
            TestCanopyCreation.assertEquals((String)("canopy points " + canopyIx), (long)testCanopy.getNumObservations(), (long)expectedNumPoints[canopyIx]);
            double[] refCentroid = expectedCentroids[canopyIx];
            Vector testCentroid = testCanopy.computeCentroid();
            for (int pointIx = 0; pointIx < refCentroid.length; ++pointIx) {
                TestCanopyCreation.assertEquals((String)("canopy centroid " + canopyIx + '[' + pointIx + ']'), (double)refCentroid[pointIx], (double)testCentroid.get(pointIx), (double)1.0E-6);
            }
        }
    }

    @Test
    public void testCanopyMapperManhattan() throws Exception {
        CanopyMapper mapper = new CanopyMapper();
        Configuration conf = this.getConfiguration();
        conf.set("org.apache.mahout.clustering.canopy.measure", this.manhattanDistanceMeasure.getClass().getName());
        conf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(3.1));
        conf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(2.1));
        conf.set("org.apache.mahout.clustering.canopy.canopyFilter", "0");
        DummyRecordWriter writer = new DummyRecordWriter();
        Mapper.Context context = DummyRecordWriter.build(mapper, conf, writer);
        mapper.setup(context);
        List<VectorWritable> points = TestCanopyCreation.getPointsWritable();
        for (VectorWritable point : points) {
            mapper.map((WritableComparable)new Text(), point, context);
        }
        mapper.cleanup(context);
        TestCanopyCreation.assertEquals((String)"Number of map results", (long)1L, (long)writer.getData().size());
        List data = writer.getValue(new Text("centroid"));
        TestCanopyCreation.assertEquals((String)"Number of centroids", (long)3L, (long)data.size());
        for (int i = 0; i < data.size(); ++i) {
            TestCanopyCreation.assertEquals((String)"Centroid error", (Object)this.manhattanCentroids.get(i).asFormatString(), (Object)((VectorWritable)data.get(i)).get().asFormatString());
        }
    }

    @Test
    public void testCanopyMapperEuclidean() throws Exception {
        CanopyMapper mapper = new CanopyMapper();
        Configuration conf = this.getConfiguration();
        conf.set("org.apache.mahout.clustering.canopy.measure", this.euclideanDistanceMeasure.getClass().getName());
        conf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(3.1));
        conf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(2.1));
        conf.set("org.apache.mahout.clustering.canopy.canopyFilter", "0");
        DummyRecordWriter writer = new DummyRecordWriter();
        Mapper.Context context = DummyRecordWriter.build(mapper, conf, writer);
        mapper.setup(context);
        List<VectorWritable> points = TestCanopyCreation.getPointsWritable();
        for (VectorWritable point : points) {
            mapper.map((WritableComparable)new Text(), point, context);
        }
        mapper.cleanup(context);
        TestCanopyCreation.assertEquals((String)"Number of map results", (long)1L, (long)writer.getData().size());
        List data = writer.getValue(new Text("centroid"));
        TestCanopyCreation.assertEquals((String)"Number of centroids", (long)3L, (long)data.size());
        for (int i = 0; i < data.size(); ++i) {
            TestCanopyCreation.assertEquals((String)"Centroid error", (Object)this.euclideanCentroids.get(i).asFormatString(), (Object)((VectorWritable)data.get(i)).get().asFormatString());
        }
    }

    @Test
    public void testCanopyReducerManhattan() throws Exception {
        CanopyReducer reducer = new CanopyReducer();
        Configuration conf = this.getConfiguration();
        conf.set("org.apache.mahout.clustering.canopy.measure", "org.apache.mahout.common.distance.ManhattanDistanceMeasure");
        conf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(3.1));
        conf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(2.1));
        conf.set("org.apache.mahout.clustering.canopy.canopyFilter", "0");
        DummyRecordWriter writer = new DummyRecordWriter();
        Reducer.Context context = DummyRecordWriter.build(reducer, conf, writer, Text.class, VectorWritable.class);
        reducer.setup(context);
        List<VectorWritable> points = TestCanopyCreation.getPointsWritable();
        reducer.reduce(new Text("centroid"), points, context);
        Iterable keys = writer.getKeysInInsertionOrder();
        TestCanopyCreation.assertEquals((String)"Number of centroids", (long)3L, (long)Iterables.size(keys));
        int i = 0;
        for (Text key : keys) {
            List data = writer.getValue(key);
            ClusterWritable clusterWritable = (ClusterWritable)data.get(0);
            Canopy canopy = (Canopy)clusterWritable.getValue();
            TestCanopyCreation.assertEquals((String)(this.manhattanCentroids.get(i).asFormatString() + " is not equal to " + canopy.computeCentroid().asFormatString()), (Object)this.manhattanCentroids.get(i), (Object)canopy.computeCentroid());
            ++i;
        }
    }

    @Test
    public void testCanopyReducerEuclidean() throws Exception {
        CanopyReducer reducer = new CanopyReducer();
        Configuration conf = this.getConfiguration();
        conf.set("org.apache.mahout.clustering.canopy.measure", "org.apache.mahout.common.distance.EuclideanDistanceMeasure");
        conf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(3.1));
        conf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(2.1));
        conf.set("org.apache.mahout.clustering.canopy.canopyFilter", "0");
        DummyRecordWriter writer = new DummyRecordWriter();
        Reducer.Context context = DummyRecordWriter.build(reducer, conf, writer, Text.class, VectorWritable.class);
        reducer.setup(context);
        List<VectorWritable> points = TestCanopyCreation.getPointsWritable();
        reducer.reduce(new Text("centroid"), points, context);
        Iterable keys = writer.getKeysInInsertionOrder();
        TestCanopyCreation.assertEquals((String)"Number of centroids", (long)3L, (long)Iterables.size(keys));
        int i = 0;
        for (Text key : keys) {
            List data = writer.getValue(key);
            ClusterWritable clusterWritable = (ClusterWritable)data.get(0);
            Canopy canopy = (Canopy)clusterWritable.getValue();
            TestCanopyCreation.assertEquals((String)(this.euclideanCentroids.get(i).asFormatString() + " is not equal to " + canopy.computeCentroid().asFormatString()), (Object)this.euclideanCentroids.get(i), (Object)canopy.computeCentroid());
            ++i;
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Test
    public void testCanopyGenManhattanMR() throws Exception {
        List<VectorWritable> points = TestCanopyCreation.getPointsWritable();
        Configuration config = this.getConfiguration();
        ClusteringTestUtils.writePointsToFile(points, this.getTestTempFilePath("testdata/file1"), this.fs, config);
        ClusteringTestUtils.writePointsToFile(points, this.getTestTempFilePath("testdata/file2"), this.fs, config);
        Path output = this.getTestTempDirPath("output");
        CanopyDriver.run((Configuration)config, (Path)this.getTestTempDirPath("testdata"), (Path)output, (DistanceMeasure)this.manhattanDistanceMeasure, (double)3.1, (double)2.1, (boolean)false, (double)0.0, (boolean)false);
        Path path = new Path(output, "clusters-0-final/part-r-00000");
        FileSystem fs = FileSystem.get((URI)path.toUri(), (Configuration)config);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, config);
        try {
            Text key = new Text();
            ClusterWritable clusterWritable = new ClusterWritable();
            TestCanopyCreation.assertTrue((String)"more to come", (boolean)reader.next((Writable)key, (Writable)clusterWritable));
            TestCanopyCreation.assertEquals((String)"1st key", (Object)"C-0", (Object)key.toString());
            ArrayList refCenters = Lists.newArrayList();
            refCenters.add(new Pair((Object)1.5, (Object)1.5));
            refCenters.add(new Pair((Object)4.333333333333334, (Object)4.333333333333334));
            Pair c = new Pair((Object)clusterWritable.getValue().getCenter().get(0), (Object)clusterWritable.getValue().getCenter().get(1));
            TestCanopyCreation.assertTrue((String)("center " + c + " not found"), (boolean)TestCanopyCreation.findAndRemove((Pair<Double, Double>)c, refCenters, 1.0E-6));
            TestCanopyCreation.assertTrue((String)"more to come", (boolean)reader.next((Writable)key, (Writable)clusterWritable));
            TestCanopyCreation.assertEquals((String)"2nd key", (Object)"C-1", (Object)key.toString());
            c = new Pair((Object)clusterWritable.getValue().getCenter().get(0), (Object)clusterWritable.getValue().getCenter().get(1));
            TestCanopyCreation.assertTrue((String)("center " + c + " not found"), (boolean)TestCanopyCreation.findAndRemove((Pair<Double, Double>)c, refCenters, 1.0E-6));
            TestCanopyCreation.assertFalse((String)"more to come", (boolean)reader.next((Writable)key, (Writable)clusterWritable));
        }
        finally {
            Closeables.close((Closeable)reader, (boolean)true);
        }
    }

    static boolean findAndRemove(Pair<Double, Double> target, Collection<Pair<Double, Double>> list, double epsilon) {
        for (Pair<Double, Double> curr : list) {
            if (!(Math.abs((Double)target.getFirst() - (Double)curr.getFirst()) < epsilon) || !(Math.abs((Double)target.getSecond() - (Double)curr.getSecond()) < epsilon)) continue;
            list.remove(curr);
            return true;
        }
        return false;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Test
    public void testCanopyGenEuclideanMR() throws Exception {
        List<VectorWritable> points = TestCanopyCreation.getPointsWritable();
        Configuration config = this.getConfiguration();
        ClusteringTestUtils.writePointsToFile(points, this.getTestTempFilePath("testdata/file1"), this.fs, config);
        ClusteringTestUtils.writePointsToFile(points, this.getTestTempFilePath("testdata/file2"), this.fs, config);
        Path output = this.getTestTempDirPath("output");
        CanopyDriver.run((Configuration)config, (Path)this.getTestTempDirPath("testdata"), (Path)output, (DistanceMeasure)this.euclideanDistanceMeasure, (double)3.1, (double)2.1, (boolean)false, (double)0.0, (boolean)false);
        Path path = new Path(output, "clusters-0-final/part-r-00000");
        FileSystem fs = FileSystem.get((URI)path.toUri(), (Configuration)config);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, config);
        try {
            Text key = new Text();
            ClusterWritable clusterWritable = new ClusterWritable();
            TestCanopyCreation.assertTrue((String)"more to come", (boolean)reader.next((Writable)key, (Writable)clusterWritable));
            TestCanopyCreation.assertEquals((String)"1st key", (Object)"C-0", (Object)key.toString());
            ArrayList refCenters = Lists.newArrayList();
            refCenters.add(new Pair((Object)1.8, (Object)1.8));
            refCenters.add(new Pair((Object)4.433333333333334, (Object)4.433333333333334));
            Pair c = new Pair((Object)clusterWritable.getValue().getCenter().get(0), (Object)clusterWritable.getValue().getCenter().get(1));
            TestCanopyCreation.assertTrue((String)("center " + c + " not found"), (boolean)TestCanopyCreation.findAndRemove((Pair<Double, Double>)c, refCenters, 1.0E-6));
            TestCanopyCreation.assertTrue((String)"more to come", (boolean)reader.next((Writable)key, (Writable)clusterWritable));
            TestCanopyCreation.assertEquals((String)"2nd key", (Object)"C-1", (Object)key.toString());
            c = new Pair((Object)clusterWritable.getValue().getCenter().get(0), (Object)clusterWritable.getValue().getCenter().get(1));
            TestCanopyCreation.assertTrue((String)("center " + c + " not found"), (boolean)TestCanopyCreation.findAndRemove((Pair<Double, Double>)c, refCenters, 1.0E-6));
            TestCanopyCreation.assertFalse((String)"more to come", (boolean)reader.next((Writable)key, (Writable)clusterWritable));
        }
        finally {
            Closeables.close((Closeable)reader, (boolean)true);
        }
    }

    @Test
    public void testClusteringManhattanSeq() throws Exception {
        List<VectorWritable> points = TestCanopyCreation.getPointsWritable();
        Configuration config = this.getConfiguration();
        ClusteringTestUtils.writePointsToFile(points, this.getTestTempFilePath("testdata/file1"), this.fs, config);
        Path output = this.getTestTempDirPath("output");
        CanopyDriver.run((Configuration)config, (Path)this.getTestTempDirPath("testdata"), (Path)output, (DistanceMeasure)this.manhattanDistanceMeasure, (double)3.1, (double)2.1, (boolean)true, (double)0.0, (boolean)true);
        Path path = new Path(output, "clusters-0-final/part-r-00000");
        int ix = 0;
        for (ClusterWritable clusterWritable : new SequenceFileValueIterable(path, true, config)) {
            TestCanopyCreation.assertEquals((String)("Center [" + ix + ']'), (Object)this.manhattanCentroids.get(ix), (Object)clusterWritable.getValue().getCenter());
            ++ix;
        }
        path = new Path(output, "clusteredPoints/part-m-0");
        long count = HadoopUtil.countRecords((Path)path, (Configuration)config);
        TestCanopyCreation.assertEquals((String)"number of points", (long)points.size(), (long)count);
    }

    @Test
    public void testClusteringEuclideanSeq() throws Exception {
        List<VectorWritable> points = TestCanopyCreation.getPointsWritable();
        Configuration config = this.getConfiguration();
        ClusteringTestUtils.writePointsToFile(points, this.getTestTempFilePath("testdata/file1"), this.fs, config);
        Path output = this.getTestTempDirPath("output");
        String[] args = new String[]{TestCanopyCreation.optKey("input"), this.getTestTempDirPath("testdata").toString(), TestCanopyCreation.optKey("output"), output.toString(), TestCanopyCreation.optKey("distanceMeasure"), EuclideanDistanceMeasure.class.getName(), TestCanopyCreation.optKey("t1"), "3.1", TestCanopyCreation.optKey("t2"), "2.1", TestCanopyCreation.optKey("clustering"), TestCanopyCreation.optKey("overwrite"), TestCanopyCreation.optKey("method"), "sequential"};
        ToolRunner.run((Configuration)config, (Tool)new CanopyDriver(), (String[])args);
        Path path = new Path(output, "clusters-0-final/part-r-00000");
        int ix = 0;
        for (ClusterWritable clusterWritable : new SequenceFileValueIterable(path, true, config)) {
            TestCanopyCreation.assertEquals((String)("Center [" + ix + ']'), (Object)this.euclideanCentroids.get(ix), (Object)clusterWritable.getValue().getCenter());
            ++ix;
        }
        path = new Path(output, "clusteredPoints/part-m-0");
        long count = HadoopUtil.countRecords((Path)path, (Configuration)config);
        TestCanopyCreation.assertEquals((String)"number of points", (long)points.size(), (long)count);
    }

    @Test
    public void testClusteringEuclideanWithOutlierRemovalSeq() throws Exception {
        List<VectorWritable> points = TestCanopyCreation.getPointsWritable();
        Configuration config = this.getConfiguration();
        ClusteringTestUtils.writePointsToFile(points, this.getTestTempFilePath("testdata/file1"), this.fs, config);
        Path output = this.getTestTempDirPath("output");
        String[] args = new String[]{TestCanopyCreation.optKey("input"), this.getTestTempDirPath("testdata").toString(), TestCanopyCreation.optKey("output"), output.toString(), TestCanopyCreation.optKey("distanceMeasure"), EuclideanDistanceMeasure.class.getName(), TestCanopyCreation.optKey("t1"), "3.1", TestCanopyCreation.optKey("t2"), "2.1", TestCanopyCreation.optKey("outlierThreshold"), "0.5", TestCanopyCreation.optKey("clustering"), TestCanopyCreation.optKey("overwrite"), TestCanopyCreation.optKey("method"), "sequential"};
        ToolRunner.run((Configuration)config, (Tool)new CanopyDriver(), (String[])args);
        Path path = new Path(output, "clusters-0-final/part-r-00000");
        int ix = 0;
        for (ClusterWritable clusterWritable : new SequenceFileValueIterable(path, true, config)) {
            TestCanopyCreation.assertEquals((String)("Center [" + ix + ']'), (Object)this.euclideanCentroids.get(ix), (Object)clusterWritable.getValue().getCenter());
            ++ix;
        }
        path = new Path(output, "clusteredPoints/part-m-0");
        long count = HadoopUtil.countRecords((Path)path, (Configuration)config);
        int expectedPointsHavingPDFGreaterThanThreshold = 6;
        TestCanopyCreation.assertEquals((String)"number of points", (long)expectedPointsHavingPDFGreaterThanThreshold, (long)count);
    }

    @Test
    public void testClusteringManhattanMR() throws Exception {
        List<VectorWritable> points = TestCanopyCreation.getPointsWritable();
        Configuration conf = this.getConfiguration();
        ClusteringTestUtils.writePointsToFile(points, true, this.getTestTempFilePath("testdata/file1"), this.fs, conf);
        ClusteringTestUtils.writePointsToFile(points, true, this.getTestTempFilePath("testdata/file2"), this.fs, conf);
        Path output = this.getTestTempDirPath("output");
        CanopyDriver.run((Configuration)conf, (Path)this.getTestTempDirPath("testdata"), (Path)output, (DistanceMeasure)this.manhattanDistanceMeasure, (double)3.1, (double)2.1, (boolean)true, (double)0.0, (boolean)false);
        Path path = new Path(output, "clusteredPoints/part-m-00000");
        long count = HadoopUtil.countRecords((Path)path, (Configuration)conf);
        TestCanopyCreation.assertEquals((String)"number of points", (long)points.size(), (long)count);
    }

    @Test
    public void testClusteringEuclideanMR() throws Exception {
        List<VectorWritable> points = TestCanopyCreation.getPointsWritable();
        Configuration conf = this.getConfiguration();
        ClusteringTestUtils.writePointsToFile(points, true, this.getTestTempFilePath("testdata/file1"), this.fs, conf);
        ClusteringTestUtils.writePointsToFile(points, true, this.getTestTempFilePath("testdata/file2"), this.fs, conf);
        Path output = this.getTestTempDirPath("output");
        String[] args = new String[]{TestCanopyCreation.optKey("input"), this.getTestTempDirPath("testdata").toString(), TestCanopyCreation.optKey("output"), output.toString(), TestCanopyCreation.optKey("distanceMeasure"), EuclideanDistanceMeasure.class.getName(), TestCanopyCreation.optKey("t1"), "3.1", TestCanopyCreation.optKey("t2"), "2.1", TestCanopyCreation.optKey("clustering"), TestCanopyCreation.optKey("overwrite")};
        ToolRunner.run((Configuration)this.getConfiguration(), (Tool)new CanopyDriver(), (String[])args);
        Path path = new Path(output, "clusteredPoints/part-m-00000");
        long count = HadoopUtil.countRecords((Path)path, (Configuration)conf);
        TestCanopyCreation.assertEquals((String)"number of points", (long)points.size(), (long)count);
    }

    @Test
    public void testClusteringEuclideanWithOutlierRemovalMR() throws Exception {
        List<VectorWritable> points = TestCanopyCreation.getPointsWritable();
        Configuration conf = this.getConfiguration();
        ClusteringTestUtils.writePointsToFile(points, true, this.getTestTempFilePath("testdata/file1"), this.fs, conf);
        ClusteringTestUtils.writePointsToFile(points, true, this.getTestTempFilePath("testdata/file2"), this.fs, conf);
        Path output = this.getTestTempDirPath("output");
        String[] args = new String[]{TestCanopyCreation.optKey("input"), this.getTestTempDirPath("testdata").toString(), TestCanopyCreation.optKey("output"), output.toString(), TestCanopyCreation.optKey("distanceMeasure"), EuclideanDistanceMeasure.class.getName(), TestCanopyCreation.optKey("t1"), "3.1", TestCanopyCreation.optKey("t2"), "2.1", TestCanopyCreation.optKey("outlierThreshold"), "0.7", TestCanopyCreation.optKey("clustering"), TestCanopyCreation.optKey("overwrite")};
        ToolRunner.run((Configuration)this.getConfiguration(), (Tool)new CanopyDriver(), (String[])args);
        Path path = new Path(output, "clusteredPoints/part-m-00000");
        long count = HadoopUtil.countRecords((Path)path, (Configuration)conf);
        int expectedPointsAfterOutlierRemoval = 8;
        TestCanopyCreation.assertEquals((String)"number of points", (long)expectedPointsAfterOutlierRemoval, (long)count);
    }

    @Test
    public void testCanopyReducerT3T4Configuration() throws Exception {
        CanopyReducer reducer = new CanopyReducer();
        Configuration conf = this.getConfiguration();
        conf.set("org.apache.mahout.clustering.canopy.measure", "org.apache.mahout.common.distance.ManhattanDistanceMeasure");
        conf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(3.1));
        conf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(2.1));
        conf.set("org.apache.mahout.clustering.canopy.t3", String.valueOf(1.1));
        conf.set("org.apache.mahout.clustering.canopy.t4", String.valueOf(0.1));
        conf.set("org.apache.mahout.clustering.canopy.canopyFilter", "0");
        DummyRecordWriter writer = new DummyRecordWriter();
        Reducer.Context context = DummyRecordWriter.build(reducer, conf, writer, Text.class, VectorWritable.class);
        reducer.setup(context);
        TestCanopyCreation.assertEquals((double)1.1, (double)reducer.getCanopyClusterer().getT1(), (double)1.0E-6);
        TestCanopyCreation.assertEquals((double)0.1, (double)reducer.getCanopyClusterer().getT2(), (double)1.0E-6);
    }

    @Test
    public void testCanopyMapperClusterFilter() throws Exception {
        CanopyMapper mapper = new CanopyMapper();
        Configuration conf = this.getConfiguration();
        conf.set("org.apache.mahout.clustering.canopy.measure", this.manhattanDistanceMeasure.getClass().getName());
        conf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(3.1));
        conf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(2.1));
        conf.set("org.apache.mahout.clustering.canopy.canopyFilter", "3");
        DummyRecordWriter writer = new DummyRecordWriter();
        Mapper.Context context = DummyRecordWriter.build(mapper, conf, writer);
        mapper.setup(context);
        List<VectorWritable> points = TestCanopyCreation.getPointsWritable();
        for (VectorWritable point : points) {
            mapper.map((WritableComparable)new Text(), point, context);
        }
        mapper.cleanup(context);
        TestCanopyCreation.assertEquals((String)"Number of map results", (long)1L, (long)writer.getData().size());
        List data = writer.getValue(new Text("centroid"));
        TestCanopyCreation.assertEquals((String)"Number of centroids", (long)2L, (long)data.size());
    }

    @Test
    public void testCanopyReducerClusterFilter() throws Exception {
        CanopyReducer reducer = new CanopyReducer();
        Configuration conf = this.getConfiguration();
        conf.set("org.apache.mahout.clustering.canopy.measure", "org.apache.mahout.common.distance.ManhattanDistanceMeasure");
        conf.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(3.1));
        conf.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(2.1));
        conf.set("org.apache.mahout.clustering.canopy.canopyFilter", "3");
        DummyRecordWriter writer = new DummyRecordWriter();
        Reducer.Context context = DummyRecordWriter.build(reducer, conf, writer, Text.class, VectorWritable.class);
        reducer.setup(context);
        List<VectorWritable> points = TestCanopyCreation.getPointsWritable();
        reducer.reduce(new Text("centroid"), points, context);
        Set keys = writer.getKeys();
        TestCanopyCreation.assertEquals((String)"Number of centroids", (long)2L, (long)keys.size());
    }
}

