/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.ml.feature;

import java.util.Arrays;
import java.util.List;
import org.apache.spark.SharedSparkSession;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.RegexTokenizer;
import org.apache.spark.ml.feature.TokenizerTestData;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.junit.Assert;
import org.junit.Test;

public class JavaTokenizerSuite
extends SharedSparkSession {
    @Test
    public void regexTokenizer() {
        RegexTokenizer myRegExTokenizer = ((RegexTokenizer)((RegexTokenizer)new RegexTokenizer().setInputCol("rawText")).setOutputCol("tokens")).setPattern("\\s").setGaps(true).setToLowercase(false).setMinTokenLength(3);
        JavaRDD rdd = this.jsc.parallelize(Arrays.asList(new TokenizerTestData("Test of tok.", new String[]{"Test", "tok."}), new TokenizerTestData("Te,st.  punct", new String[]{"Te,st.", "punct"})));
        Dataset dataset = this.spark.createDataFrame(rdd, TokenizerTestData.class);
        List pairs = myRegExTokenizer.transform(dataset).select("tokens", new String[]{"wantedTokens"}).collectAsList();
        for (Row r : pairs) {
            Assert.assertEquals((Object)r.get(0), (Object)r.get(1));
        }
    }
}

