/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.classifier.sgd;

import com.google.common.base.Charsets;
import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import com.google.common.io.Resources;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Locale;
import java.util.Set;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.option.DefaultOption;
import org.apache.commons.cli2.util.HelpFormatter;
import org.apache.mahout.classifier.sgd.CsvRecordFactory;
import org.apache.mahout.classifier.sgd.LogisticModelParameters;
import org.apache.mahout.classifier.sgd.OnlineLogisticRegression;
import org.apache.mahout.classifier.sgd.RecordFactory;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;

public final class TrainLogistic {
    private static String inputFile;
    private static String outputFile;
    private static LogisticModelParameters lmp;
    private static int passes;
    private static boolean scores;
    private static OnlineLogisticRegression model;

    private TrainLogistic() {
    }

    public static void main(String[] args) throws Exception {
        TrainLogistic.mainToOutput(args, new PrintWriter(System.out, true));
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    static void mainToOutput(String[] args, PrintWriter output) throws Exception {
        if (TrainLogistic.parseArgs(args)) {
            double logPEstimate = 0.0;
            int samples = 0;
            CsvRecordFactory csv = lmp.getCsvRecordFactory();
            OnlineLogisticRegression lr = lmp.createRegression();
            for (int pass = 0; pass < passes; ++pass) {
                BufferedReader in = TrainLogistic.open(inputFile);
                try {
                    csv.firstLine(in.readLine());
                    String line = in.readLine();
                    int lineCount = 0;
                    while (line != null) {
                        RandomAccessSparseVector input = new RandomAccessSparseVector(lmp.getNumFeatures());
                        int targetValue = csv.processLine(line, (Vector)input);
                        double logP = lr.logLikelihood(targetValue, (Vector)input);
                        if (!Double.isInfinite(logP)) {
                            logPEstimate = samples < 20 ? ((double)samples * logPEstimate + logP) / (double)(samples + 1) : 0.95 * logPEstimate + 0.05 * logP;
                            ++samples;
                        }
                        double p = lr.classifyScalar((Vector)input);
                        if (scores) {
                            output.printf(Locale.ENGLISH, "%10d %2d %10.2f %2.4f %10.4f %10.4f\n", samples, targetValue, lr.currentLearningRate(), p, logP, logPEstimate);
                        }
                        lr.train(targetValue, (Vector)input);
                        line = in.readLine();
                        ++lineCount;
                    }
                    continue;
                }
                finally {
                    Closeables.closeQuietly((Closeable)in);
                }
            }
            FileOutputStream modelOutput = new FileOutputStream(outputFile);
            try {
                lmp.saveTo(modelOutput);
            }
            finally {
                Closeables.closeQuietly((Closeable)modelOutput);
            }
            output.printf(Locale.ENGLISH, "%d\n", lmp.getNumFeatures());
            output.printf(Locale.ENGLISH, "%s ~ ", lmp.getTargetVariable());
            String sep = "";
            for (String v : csv.getTraceDictionary().keySet()) {
                double weight = TrainLogistic.predictorWeight(lr, 0, (RecordFactory)csv, v);
                if (weight == 0.0) continue;
                output.printf(Locale.ENGLISH, "%s%.3f*%s", sep, weight, v);
                sep = " + ";
            }
            output.printf("\n", new Object[0]);
            model = lr;
            for (int row = 0; row < lr.getBeta().numRows(); ++row) {
                for (String key : csv.getTraceDictionary().keySet()) {
                    double weight = TrainLogistic.predictorWeight(lr, row, (RecordFactory)csv, key);
                    if (weight == 0.0) continue;
                    output.printf(Locale.ENGLISH, "%20s %.5f\n", key, weight);
                }
                for (int column = 0; column < lr.getBeta().numCols(); ++column) {
                    output.printf(Locale.ENGLISH, "%15.9f ", lr.getBeta().get(row, column));
                }
                output.println();
            }
        }
    }

    private static double predictorWeight(OnlineLogisticRegression lr, int row, RecordFactory csv, String predictor) {
        double weight = 0.0;
        for (Integer column : (Set)csv.getTraceDictionary().get(predictor)) {
            weight += lr.getBeta().get(row, column.intValue());
        }
        return weight;
    }

    private static boolean parseArgs(String[] args) {
        DefaultOptionBuilder builder = new DefaultOptionBuilder();
        DefaultOption help = builder.withLongName("help").withDescription("print this list").create();
        DefaultOption quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();
        DefaultOption scores = builder.withLongName("scores").withDescription("output score diagnostics during training").create();
        ArgumentBuilder argumentBuilder = new ArgumentBuilder();
        DefaultOption inputFile = builder.withLongName("input").withRequired(true).withArgument(argumentBuilder.withName("input").withMaximum(1).create()).withDescription("where to get training data").create();
        DefaultOption outputFile = builder.withLongName("output").withRequired(true).withArgument(argumentBuilder.withName("output").withMaximum(1).create()).withDescription("where to get training data").create();
        DefaultOption predictors = builder.withLongName("predictors").withRequired(true).withArgument(argumentBuilder.withName("p").create()).withDescription("a list of predictor variables").create();
        DefaultOption types = builder.withLongName("types").withRequired(true).withArgument(argumentBuilder.withName("t").create()).withDescription("a list of predictor variable types (numeric, word, or text)").create();
        DefaultOption target = builder.withLongName("target").withRequired(true).withArgument(argumentBuilder.withName("target").withMaximum(1).create()).withDescription("the name of the target variable").create();
        DefaultOption features = builder.withLongName("features").withArgument(argumentBuilder.withName("numFeatures").withDefault((Object)"1000").withMaximum(1).create()).withDescription("the number of internal hashed features to use").create();
        DefaultOption passes = builder.withLongName("passes").withArgument(argumentBuilder.withName("passes").withDefault((Object)"2").withMaximum(1).create()).withDescription("the number of times to pass over the input data").create();
        DefaultOption lambda = builder.withLongName("lambda").withArgument(argumentBuilder.withName("lambda").withDefault((Object)"1e-4").withMaximum(1).create()).withDescription("the amount of coefficient decay to use").create();
        DefaultOption rate = builder.withLongName("rate").withArgument(argumentBuilder.withName("learningRate").withDefault((Object)"1e-3").withMaximum(1).create()).withDescription("the learning rate").create();
        DefaultOption noBias = builder.withLongName("noBias").withDescription("don't include a bias term").create();
        DefaultOption targetCategories = builder.withLongName("categories").withRequired(true).withArgument(argumentBuilder.withName("number").withMaximum(1).create()).withDescription("the number of target categories to be considered").create();
        Group normalArgs = new GroupBuilder().withOption((Option)help).withOption((Option)quiet).withOption((Option)inputFile).withOption((Option)outputFile).withOption((Option)target).withOption((Option)targetCategories).withOption((Option)predictors).withOption((Option)types).withOption((Option)passes).withOption((Option)lambda).withOption((Option)rate).withOption((Option)noBias).withOption((Option)features).create();
        Parser parser = new Parser();
        parser.setHelpOption((Option)help);
        parser.setHelpTrigger("--help");
        parser.setGroup(normalArgs);
        parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
        CommandLine cmdLine = parser.parseAndHelp(args);
        if (cmdLine == null) {
            return false;
        }
        TrainLogistic.inputFile = TrainLogistic.getStringArgument(cmdLine, (Option)inputFile);
        TrainLogistic.outputFile = TrainLogistic.getStringArgument(cmdLine, (Option)outputFile);
        ArrayList typeList = Lists.newArrayList();
        for (Object x : cmdLine.getValues((Option)types)) {
            typeList.add(x.toString());
        }
        ArrayList predictorList = Lists.newArrayList();
        for (Object x : cmdLine.getValues((Option)predictors)) {
            predictorList.add(x.toString());
        }
        lmp = new LogisticModelParameters();
        lmp.setTargetVariable(TrainLogistic.getStringArgument(cmdLine, (Option)target));
        lmp.setMaxTargetCategories(TrainLogistic.getIntegerArgument(cmdLine, (Option)targetCategories));
        lmp.setNumFeatures(TrainLogistic.getIntegerArgument(cmdLine, (Option)features));
        lmp.setUseBias(!TrainLogistic.getBooleanArgument(cmdLine, (Option)noBias));
        lmp.setTypeMap(predictorList, typeList);
        lmp.setLambda(TrainLogistic.getDoubleArgument(cmdLine, (Option)lambda));
        lmp.setLearningRate(TrainLogistic.getDoubleArgument(cmdLine, (Option)rate));
        TrainLogistic.scores = TrainLogistic.getBooleanArgument(cmdLine, (Option)scores);
        TrainLogistic.passes = TrainLogistic.getIntegerArgument(cmdLine, (Option)passes);
        return true;
    }

    private static String getStringArgument(CommandLine cmdLine, Option inputFile) {
        return (String)cmdLine.getValue(inputFile);
    }

    private static boolean getBooleanArgument(CommandLine cmdLine, Option option) {
        return cmdLine.hasOption(option);
    }

    private static int getIntegerArgument(CommandLine cmdLine, Option features) {
        return Integer.parseInt((String)cmdLine.getValue(features));
    }

    private static double getDoubleArgument(CommandLine cmdLine, Option op) {
        return Double.parseDouble((String)cmdLine.getValue(op));
    }

    public static OnlineLogisticRegression getModel() {
        return model;
    }

    public static LogisticModelParameters getParameters() {
        return lmp;
    }

    static BufferedReader open(String inputFile) throws IOException {
        InputStream in;
        try {
            in = Resources.getResource((String)inputFile).openStream();
        }
        catch (IllegalArgumentException e) {
            in = new FileInputStream(new File(inputFile));
        }
        return new BufferedReader(new InputStreamReader(in, Charsets.UTF_8));
    }
}

