package org.apache.mahout.vectorizer.term;

import com.google.common.io.Closeables;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.StringTuple;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.common.lucene.IteratorTokenStream;
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.SequentialAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.map.OpenObjectIntHashMap;
import org.apache.mahout.vectorizer.DictionaryVectorizer;
import org.apache.mahout.vectorizer.common.PartialVectorMerger;

/* loaded from: input_file:org/apache/mahout/vectorizer/term/TFPartialVectorReducer.class */
public class TFPartialVectorReducer extends Reducer<Text, StringTuple, Text, VectorWritable> {
    private int dimension;
    private boolean sequentialAccess;
    private boolean namedVector;
    private final OpenObjectIntHashMap<String> dictionary = new OpenObjectIntHashMap<>();
    private int maxNGramSize = 1;

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.hadoop.mapreduce.Reducer
    public void reduce(Text text, Iterable<StringTuple> iterable, Reducer<Text, StringTuple, Text, VectorWritable>.Context context) throws IOException, InterruptedException {
        Iterator<StringTuple> it = iterable.iterator();
        if (it.hasNext()) {
            StringTuple next = it.next();
            Vector randomAccessSparseVector = new RandomAccessSparseVector(this.dimension, next.length());
            if (this.maxNGramSize >= 2) {
                ShingleFilter shingleFilter = new ShingleFilter(new IteratorTokenStream(next.getEntries().iterator()), this.maxNGramSize);
                shingleFilter.reset();
                do {
                    try {
                        String obj = ((CharTermAttribute) shingleFilter.getAttribute(CharTermAttribute.class)).toString();
                        if (!obj.isEmpty() && this.dictionary.containsKey(obj)) {
                            int i = this.dictionary.get(obj);
                            randomAccessSparseVector.setQuick(i, randomAccessSparseVector.getQuick(i) + 1.0d);
                        }
                    } catch (Throwable th) {
                        Closeables.close(shingleFilter, true);
                        throw th;
                    }
                } while (shingleFilter.incrementToken());
                shingleFilter.end();
                Closeables.close(shingleFilter, true);
            } else {
                for (String str : next.getEntries()) {
                    if (!str.isEmpty() && this.dictionary.containsKey(str)) {
                        int i2 = this.dictionary.get(str);
                        randomAccessSparseVector.setQuick(i2, randomAccessSparseVector.getQuick(i2) + 1.0d);
                    }
                }
            }
            if (this.sequentialAccess) {
                randomAccessSparseVector = new SequentialAccessSparseVector(randomAccessSparseVector);
            }
            if (this.namedVector) {
                randomAccessSparseVector = new NamedVector(randomAccessSparseVector, text.toString());
            }
            if (randomAccessSparseVector.getNumNondefaultElements() > 0) {
                context.write(text, new VectorWritable(randomAccessSparseVector));
            } else {
                context.getCounter("TFPartialVectorReducer", "emptyVectorCount").increment(1L);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.hadoop.mapreduce.Reducer
    public void setup(Reducer<Text, StringTuple, Text, VectorWritable>.Context context) throws IOException, InterruptedException {
        super.setup(context);
        Configuration configuration = context.getConfiguration();
        this.dimension = configuration.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE);
        this.sequentialAccess = configuration.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
        this.namedVector = configuration.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);
        this.maxNGramSize = configuration.getInt(DictionaryVectorizer.MAX_NGRAMS, this.maxNGramSize);
        Iterator it = new SequenceFileIterable(HadoopUtil.getSingleCachedFile(configuration), true, configuration).iterator();
        while (it.hasNext()) {
            Pair pair = (Pair) it.next();
            this.dictionary.put(((Writable) pair.getFirst()).toString(), ((IntWritable) pair.getSecond()).get());
        }
    }
}
