package org.apache.mahout.classifier.sgd;

import com.google.common.collect.HashMultiset;
import com.google.common.collect.Ordering;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import org.apache.mahout.classifier.NewsgroupHelper;
import org.apache.mahout.vectorizer.encoders.Dictionary;

/* loaded from: input_file:org/apache/mahout/classifier/sgd/TrainNewsGroups.class */
public final class TrainNewsGroups {
    private TrainNewsGroups() {
    }

    public static void main(String[] strArr) throws IOException {
        File file = new File(strArr[0]);
        HashMultiset create = HashMultiset.create();
        int parseInt = strArr.length > 1 ? Integer.parseInt(strArr[1]) : 0;
        Dictionary dictionary = new Dictionary();
        NewsgroupHelper newsgroupHelper = new NewsgroupHelper();
        newsgroupHelper.getEncoder().setProbes(2);
        AdaptiveLogisticRegression adaptiveLogisticRegression = new AdaptiveLogisticRegression(20, 10000, new L1());
        adaptiveLogisticRegression.setInterval(800);
        adaptiveLogisticRegression.setAveragingWindow(500);
        ArrayList<File> arrayList = new ArrayList();
        for (File file2 : file.listFiles()) {
            if (file2.isDirectory()) {
                dictionary.intern(file2.getName());
                arrayList.addAll(Arrays.asList(file2.listFiles()));
            }
        }
        Collections.shuffle(arrayList);
        System.out.println(arrayList.size() + " training files");
        SGDInfo sGDInfo = new SGDInfo();
        int i = 0;
        for (File file3 : arrayList) {
            int intern = dictionary.intern(file3.getParentFile().getName());
            adaptiveLogisticRegression.train(intern, newsgroupHelper.encodeFeatureVector(file3, intern, parseInt, create));
            i++;
            SGDHelper.analyzeState(sGDInfo, parseInt, i, adaptiveLogisticRegression.getBest());
        }
        adaptiveLogisticRegression.close();
        SGDHelper.dissect(parseInt, dictionary, adaptiveLogisticRegression, arrayList, create);
        System.out.println("exiting main");
        ModelSerializer.writeBinary(new File(System.getProperty("java.io.tmpdir"), "news-group.model").getAbsolutePath(), adaptiveLogisticRegression.getBest().getPayload().getLearner().getModels().get(0));
        ArrayList arrayList2 = new ArrayList();
        System.out.println("Word counts");
        Iterator it = create.elementSet().iterator();
        while (it.hasNext()) {
            arrayList2.add(Integer.valueOf(create.count((String) it.next())));
        }
        Collections.sort(arrayList2, Ordering.natural().reverse());
        int i2 = 0;
        Iterator it2 = arrayList2.iterator();
        while (it2.hasNext()) {
            System.out.println(i2 + "\t" + ((Integer) it2.next()));
            i2++;
            if (i2 > 1000) {
                return;
            }
        }
    }
}
