/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.utils.vectors;

import com.google.common.io.Closeables;
import com.google.common.io.Files;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.HashSet;
import java.util.Iterator;
import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.utils.vectors.VectorHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class VectorDumper
extends AbstractJob {
    private static final Logger log = LoggerFactory.getLogger(VectorDumper.class);

    private VectorDumper() {
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public int run(String[] args) throws Exception {
        Writer writer;
        boolean shouldClose;
        Path[] pathArr;
        Path input;
        this.addInputOption();
        this.addOutputOption();
        this.addOption("useKey", "u", "If the Key is a vector than dump that instead");
        this.addOption("printKey", "p", "Print out the key as well, delimited by tab (or the value if useKey is true");
        this.addOption("dictionary", "d", "The dictionary file.", false);
        this.addOption("dictionaryType", "dt", "The dictionary file type (text|seqfile)", false);
        this.addOption("csv", "c", "Output the Vector as CSV.  Otherwise it substitutes in the terms for vector cell entries");
        this.addOption("namesAsComments", "n", "If using CSV output, optionally add a comment line for each NamedVector (if the vector is one) printing out the name");
        this.addOption("nameOnly", "N", "Use the name as the value for each NamedVector (skip other vectors)");
        this.addOption("sortVectors", "sort", "Sort output key/value pairs of the vector entries in abs magnitude descending order");
        this.addOption("quiet", "q", "Print only file contents");
        this.addOption("sizeOnly", "sz", "Dump only the size of the vector");
        this.addOption("numItems", "ni", "Output at most <n> vecors", false);
        this.addOption("vectorSize", "vs", "Truncate vectors to <vs> length when dumping (most useful when in conjunction with -sort", false);
        this.addOption(VectorDumper.buildOption((String)"filter", (String)"fi", (String)"Only dump out those vectors whose name matches the filter.  Multiple items may be specified by repeating the argument.", (boolean)true, (int)1, (int)Integer.MAX_VALUE, (boolean)false, null));
        if (this.parseArguments(args, false, true) == null) {
            return -1;
        }
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get((Configuration)conf);
        FileStatus fileStatus = fs.getFileStatus(input = this.getInputPath());
        if (fileStatus.isDir()) {
            pathArr = FileUtil.stat2Paths((FileStatus[])fs.listStatus(input, PathFilters.logsCRCFilter()));
        } else {
            FileStatus[] inputPaths = fs.globStatus(input);
            pathArr = new Path[inputPaths.length];
            int i = 0;
            for (FileStatus fstatus : inputPaths) {
                pathArr[i++] = fstatus.getPath();
            }
        }
        String dictionaryType = this.getOption("dictionaryType", "text");
        boolean sortVectors = this.hasOption("sortVectors");
        boolean quiet = this.hasOption("quiet");
        if (!quiet) {
            log.info("Sort? {}", (Object)sortVectors);
        }
        String[] dictionary = null;
        if (this.hasOption("dictionary")) {
            String dictFile = this.getOption("dictionary");
            if ("text".equals(dictionaryType)) {
                dictionary = VectorHelper.loadTermDictionary(new File(dictFile));
            } else if ("sequencefile".equals(dictionaryType)) {
                dictionary = VectorHelper.loadTermDictionary(conf, dictFile);
            } else {
                throw new IOException("Invalid dictionary type: " + dictionaryType);
            }
        }
        HashSet filters = this.hasOption("filter") ? new HashSet(this.getOptions("filter")) : null;
        boolean useCSV = this.hasOption("csv");
        boolean sizeOnly = this.hasOption("sizeOnly");
        boolean nameOnly = this.hasOption("nameOnly");
        boolean namesAsComments = this.hasOption("namesAsComments");
        boolean transposeKeyValue = this.hasOption("vectorAsKey");
        File output = this.getOutputFile();
        if (output != null) {
            shouldClose = true;
            log.info("Output file: {}", (Object)output);
            Files.createParentDirs((File)output);
            writer = Files.newWriter((File)output, (Charset)Charsets.UTF_8);
        } else {
            shouldClose = false;
            writer = new OutputStreamWriter((OutputStream)System.out, Charsets.UTF_8);
        }
        try {
            boolean printKey = this.hasOption("printKey");
            if (useCSV && dictionary != null) {
                writer.write("#");
                for (int j = 0; j < dictionary.length; ++j) {
                    writer.write(dictionary[j]);
                    if (j >= dictionary.length - 1) continue;
                    writer.write(44);
                }
                writer.write(10);
            }
            Long numItems = null;
            if (this.hasOption("numItems")) {
                numItems = Long.parseLong(this.getOption("numItems"));
                if (quiet) {
                    writer.append("#Max Items to dump: ").append(String.valueOf(numItems)).append('\n');
                }
            }
            int maxIndexesPerVector = this.hasOption("vectorSize") ? Integer.parseInt(this.getOption("vectorSize")) : Integer.MAX_VALUE;
            long itemCount = 0L;
            int fileCount = 0;
            for (Path path : pathArr) {
                if (numItems != null && numItems <= itemCount) break;
                if (quiet) {
                    log.info("Processing file '{}' ({}/{})", new Object[]{path, ++fileCount, pathArr.length});
                }
                SequenceFileIterable iterable = new SequenceFileIterable(path, true, conf);
                Iterator iterator = iterable.iterator();
                long i = 0L;
                while (iterator.hasNext() && (numItems == null || itemCount < numItems)) {
                    Vector vector;
                    Pair record = (Pair)iterator.next();
                    Writable keyWritable = (Writable)record.getFirst();
                    Writable valueWritable = (Writable)record.getSecond();
                    if (printKey) {
                        Writable notTheVectorWritable = transposeKeyValue ? valueWritable : keyWritable;
                        writer.write(notTheVectorWritable.toString());
                        writer.write(9);
                    }
                    try {
                        vector = ((VectorWritable)(transposeKeyValue ? keyWritable : valueWritable)).get();
                    }
                    catch (ClassCastException e) {
                        if ((transposeKeyValue ? keyWritable : valueWritable) instanceof WeightedPropertyVectorWritable) {
                            vector = ((WeightedPropertyVectorWritable)(transposeKeyValue ? keyWritable : valueWritable)).getVector();
                        }
                        throw e;
                    }
                    if (filters != null && vector instanceof NamedVector && !filters.contains(((NamedVector)vector).getName())) continue;
                    if (sizeOnly) {
                        if (vector instanceof NamedVector) {
                            writer.write(((NamedVector)vector).getName());
                            writer.write(":");
                        } else {
                            writer.write(String.valueOf(i++));
                            writer.write(":");
                        }
                        writer.write(String.valueOf(vector.size()));
                        writer.write(10);
                    } else if (nameOnly) {
                        if (vector instanceof NamedVector) {
                            writer.write(((NamedVector)vector).getName());
                            writer.write(10);
                        }
                    } else {
                        String fmtStr = useCSV ? VectorHelper.vectorToCSVString(vector, namesAsComments) : VectorHelper.vectorToJson(vector, dictionary, maxIndexesPerVector, sortVectors);
                        writer.write(fmtStr);
                        writer.write(10);
                    }
                    ++itemCount;
                }
            }
            writer.flush();
        }
        finally {
            if (shouldClose) {
                Closeables.close((Closeable)writer, (boolean)false);
            }
        }
        return 0;
    }

    public static void main(String[] args) throws Exception {
        ToolRunner.run((Configuration)new Configuration(), (Tool)new VectorDumper(), (String[])args);
    }
}

