package org.apache.spark.ml.feature;

import java.util.Arrays;
import java.util.Iterator;
import org.apache.spark.SharedSparkSession;
import org.apache.spark.ml.linalg.Vector;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.junit.Assert;
import org.junit.Test;

/* loaded from: input_file:org/apache/spark/ml/feature/JavaHashingTFSuite.class */
public class JavaHashingTFSuite extends SharedSparkSession {
    @Test
    public void hashingTF() {
        Dataset transform = new HashingTF().setInputCol("words").setOutputCol("rawFeatures").setNumFeatures(20).transform(new Tokenizer().setInputCol("sentence").setOutputCol("words").transform(this.spark.createDataFrame(Arrays.asList(RowFactory.create(new Object[]{Double.valueOf(0.0d), "Hi I heard about Spark"}), RowFactory.create(new Object[]{Double.valueOf(0.0d), "I wish Java could use case classes"}), RowFactory.create(new Object[]{Double.valueOf(1.0d), "Logistic regression models are neat"})), new StructType(new StructField[]{new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), new StructField("sentence", DataTypes.StringType, false, Metadata.empty())}))));
        Iterator it = new IDF().setInputCol("rawFeatures").setOutputCol("features").fit(transform).transform(transform).select("features", new String[]{"label"}).takeAsList(3).iterator();
        while (it.hasNext()) {
            Assert.assertEquals(20, ((Vector) ((Row) it.next()).getAs(0)).size());
        }
    }
}
