/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.mllib.feature;

import java.util.Arrays;
import java.util.List;
import org.apache.spark.SharedSparkSession;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.mllib.feature.HashingTF;
import org.apache.spark.mllib.feature.IDF;
import org.apache.spark.mllib.linalg.Vector;
import org.junit.Assert;
import org.junit.Test;

public class JavaTfIdfSuite
extends SharedSparkSession {
    @Test
    public void tfIdf() {
        HashingTF tf = new HashingTF();
        JavaRDD documents = this.jsc.parallelize(Arrays.asList(Arrays.asList("this is a sentence".split(" ")), Arrays.asList("this is another sentence".split(" ")), Arrays.asList("this is still a sentence".split(" "))), 2);
        JavaRDD termFreqs = tf.transform(documents);
        termFreqs.collect();
        IDF idf = new IDF();
        JavaRDD tfIdfs = idf.fit(termFreqs).transform(termFreqs);
        List localTfIdfs = tfIdfs.collect();
        int indexOfThis = tf.indexOf((Object)"this");
        for (Vector v : localTfIdfs) {
            Assert.assertEquals((double)0.0, (double)v.apply(indexOfThis), (double)1.0E-15);
        }
    }

    @Test
    public void tfIdfMinimumDocumentFrequency() {
        HashingTF tf = new HashingTF();
        JavaRDD documents = this.jsc.parallelize(Arrays.asList(Arrays.asList("this is a sentence".split(" ")), Arrays.asList("this is another sentence".split(" ")), Arrays.asList("this is still a sentence".split(" "))), 2);
        JavaRDD termFreqs = tf.transform(documents);
        termFreqs.collect();
        IDF idf = new IDF(2);
        JavaRDD tfIdfs = idf.fit(termFreqs).transform(termFreqs);
        List localTfIdfs = tfIdfs.collect();
        int indexOfThis = tf.indexOf((Object)"this");
        for (Vector v : localTfIdfs) {
            Assert.assertEquals((double)0.0, (double)v.apply(indexOfThis), (double)1.0E-15);
        }
    }
}

