/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.clustering.minhash;

import com.google.common.collect.Lists;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;

public final class LastfmClusterEvaluator {
    private LastfmClusterEvaluator() {
    }

    private static String usedMemory() {
        Runtime runtime = Runtime.getRuntime();
        return "Used Memory: [" + (runtime.totalMemory() - runtime.freeMemory()) / 0x100000L + " MB] ";
    }

    private static double computeSimilarity(Iterable<Integer> listenerVector1, Iterable<Integer> listenerVector2) {
        HashSet<Integer> first = new HashSet<Integer>();
        for (Integer ele : listenerVector1) {
            first.add(ele);
        }
        HashSet<Integer> second = new HashSet<Integer>();
        for (Integer ele : listenerVector2) {
            second.add(ele);
        }
        HashSet intersection = new HashSet(first);
        intersection.retainAll(second);
        double intersectSize = intersection.size();
        first.addAll(second);
        double unionSize = first.size();
        return unionSize == 0.0 ? 0.0 : intersectSize / unionSize;
    }

    private static void testPrecision(Path clusterFile, double threshold, double samplePercentage) {
        Configuration conf = new Configuration();
        Random rand = RandomUtils.getRandom();
        Text prevCluster = new Text();
        ArrayList listenerVectors = Lists.newArrayList();
        long similarListeners = 0L;
        long allListeners = 0L;
        int clustersProcessed = 0;
        for (Pair record : new SequenceFileIterable(clusterFile, true, conf)) {
            Text cluster = (Text)record.getFirst();
            VectorWritable point = (VectorWritable)record.getSecond();
            if (!cluster.equals((Object)prevCluster)) {
                prevCluster.set(cluster.toString());
                if (rand.nextDouble() > samplePercentage) {
                    listenerVectors.clear();
                    continue;
                }
                int numListeners = listenerVectors.size();
                allListeners += (long)numListeners;
                for (int i = 0; i < numListeners; ++i) {
                    List listenerVector1 = (List)listenerVectors.get(i);
                    for (int j = i + 1; j < numListeners; ++j) {
                        List listenerVector2 = (List)listenerVectors.get(j);
                        double similarity = LastfmClusterEvaluator.computeSimilarity(listenerVector1, listenerVector2);
                        similarListeners += similarity >= threshold ? 1L : 0L;
                    }
                }
                listenerVectors.clear();
                System.out.print('\r' + LastfmClusterEvaluator.usedMemory() + " Clusters processed: " + ++clustersProcessed);
            }
            ArrayList listeners = Lists.newArrayList();
            for (Vector.Element ele : point.get()) {
                listeners.add((int)ele.get());
            }
            listenerVectors.add(listeners);
        }
        System.out.println("\nTest Results");
        System.out.println("=============");
        System.out.println(" (A) Listeners in same cluster with simiarity above threshold (" + threshold + ") : " + similarListeners);
        System.out.println(" (B) All listeners: " + allListeners);
        NumberFormat format = NumberFormat.getInstance();
        format.setMaximumFractionDigits(2);
        double precision = (double)similarListeners / (double)allListeners * 100.0;
        System.out.println(" Average cluster precision: A/B = " + format.format(precision));
    }

    public static void main(String[] args) {
        if (args.length < 3) {
            System.out.println("LastfmClusterEvaluation <cluster-file> <threshold> <sample-percentage>");
            System.out.println("      <cluster-file>: Absolute Path of file containing cluster information in DEBUG format");
            System.out.println("         <threshold>: Minimum threshold for jaccard co-efficient for considering two items");
            System.out.println("                      in a cluster to be really similar. Should be between 0.0 and 1.0");
            System.out.println(" <sample-percentage>: Percentage of clusters to sample. Should be between 0.0 and 1.0");
            return;
        }
        Path clusterFile = new Path(args[0]);
        double threshold = Double.parseDouble(args[1]);
        double samplePercentage = Double.parseDouble(args[2]);
        LastfmClusterEvaluator.testPrecision(clusterFile, threshold, samplePercentage);
    }
}

