package org.apache.mahout.vectorizer.encoders;

import com.google.common.collect.ImmutableMap;
import java.util.Locale;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.util.Version;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.DenseVector;
import org.junit.Test;

/* loaded from: input_file:org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.class */
public final class TextValueEncoderTest extends MahoutTestCase {
    @Test
    public void testAddToVector() {
        TextValueEncoder textValueEncoder = new TextValueEncoder("text");
        DenseVector denseVector = new DenseVector(200);
        textValueEncoder.addToVector("test1 and more", denseVector);
        textValueEncoder.flush(1.0d, denseVector);
        assertEquals(6.0d, denseVector.norm(1.0d), 0.0d);
        assertEquals(1.0d, denseVector.maxValue(), 0.0d);
        StaticWordValueEncoder staticWordValueEncoder = new StaticWordValueEncoder("text");
        staticWordValueEncoder.setDictionary(ImmutableMap.of("word1", Double.valueOf(3.0d), "word2", Double.valueOf(1.5d)));
        textValueEncoder.setWordEncoder(staticWordValueEncoder);
        DenseVector denseVector2 = new DenseVector(200);
        textValueEncoder.addToVector("test1 and more", denseVector2);
        textValueEncoder.flush(1.0d, denseVector2);
        DenseVector denseVector3 = new DenseVector(200);
        staticWordValueEncoder.addToVector("test1", denseVector3);
        staticWordValueEncoder.addToVector("and", denseVector3);
        staticWordValueEncoder.addToVector("more", denseVector3);
        assertEquals(0.0d, denseVector3.minus(denseVector2).norm(1.0d), 0.0d);
        assertEquals(denseVector3.zSum(), denseVector3.dot(denseVector), 0.0d);
    }

    @Test
    public void testAsString() {
        Locale.setDefault(Locale.ENGLISH);
        assertEquals("[text:test1:1.0000, text:and:1.0000, text:more:1.0000]", new TextValueEncoder("text").asString("test1 and more"));
    }

    @Test
    public void testLuceneEncoding() throws Exception {
        LuceneTextValueEncoder luceneTextValueEncoder = new LuceneTextValueEncoder("text");
        luceneTextValueEncoder.setAnalyzer(new WhitespaceAnalyzer(Version.LUCENE_34));
        DenseVector denseVector = new DenseVector(200);
        luceneTextValueEncoder.addToVector("test1 and more", denseVector);
        luceneTextValueEncoder.flush(1.0d, denseVector);
        assertEquals(6.0d, denseVector.norm(1.0d), 0.0d);
        assertEquals(1.0d, denseVector.maxValue(), 0.0d);
        DenseVector denseVector2 = new DenseVector(200);
        luceneTextValueEncoder.addToVector("", denseVector2);
        luceneTextValueEncoder.flush(1.0d, denseVector2);
        assertEquals(0.0d, denseVector2.norm(1.0d), 0.0d);
        assertEquals(0.0d, denseVector2.maxValue(), 0.0d);
        DenseVector denseVector3 = new DenseVector(200);
        StringBuilder sb = new StringBuilder(5000);
        for (int i = 0; i < 1000; i++) {
            sb.append("token_").append(i).append(' ');
        }
        luceneTextValueEncoder.addToVector(sb.toString(), denseVector3);
        luceneTextValueEncoder.flush(1.0d, denseVector3);
        assertEquals(2000.0d, denseVector3.norm(1.0d), 0.0d);
        assertEquals(19.0d, denseVector3.maxValue(), 0.0d);
    }
}
