package org.apache.mahout.text;

import com.sun.jersey.core.header.QualityFactor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ToolRunner;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.commandline.DefaultOptionCreator;

/* loaded from: input_file:org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.class */
public class SequenceFilesFromLuceneStorageDriver extends AbstractJob {
    static final String OPTION_ID_FIELD = "idField";
    static final String OPTION_FIELD = "fields";
    static final String OPTION_QUERY = "query";
    static final String OPTION_MAX_HITS = "maxHits";
    static final int DEFAULT_MAX_HITS = Integer.MAX_VALUE;
    static final String SEPARATOR_FIELDS = ",";
    static final Query DEFAULT_QUERY = new MatchAllDocsQuery();
    static final String QUERY_DELIMITER = "'";
    private static final Pattern COMPILE = Pattern.compile(QUERY_DELIMITER);

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new SequenceFilesFromLuceneStorageDriver(), strArr);
    }

    @Override // org.apache.hadoop.util.Tool
    public int run(String[] strArr) throws Exception {
        addOutputOption();
        addInputOption();
        addOption(OPTION_ID_FIELD, "id", "The field in the index containing the id", true);
        addOption(OPTION_FIELD, "f", "The stored field(s) in the index containing text", true);
        addOption(OPTION_QUERY, QualityFactor.QUALITY_FACTOR, "(Optional) Lucene query. Defaults to " + DEFAULT_QUERY.getClass().getSimpleName());
        addOption(OPTION_MAX_HITS, "n", "(Optional) Max hits. Defaults to 2147483647");
        addOption(DefaultOptionCreator.methodOption().create());
        if (parseArguments(strArr) == null) {
            return -1;
        }
        Configuration conf = getConf();
        String[] split = getInputPath().toString().split(",");
        ArrayList arrayList = new ArrayList();
        for (String str : split) {
            arrayList.add(new Path(str));
        }
        LuceneStorageConfiguration newLucene2SeqConfiguration = newLucene2SeqConfiguration(conf, arrayList, getOutputPath(), getOption(OPTION_ID_FIELD), Arrays.asList(getOption(OPTION_FIELD).split(",")));
        Query query = DEFAULT_QUERY;
        if (hasOption(OPTION_QUERY)) {
            try {
                String replaceAll = COMPILE.matcher(getOption(OPTION_QUERY)).replaceAll("");
                query = new QueryParser(Version.LUCENE_46, replaceAll, new StandardAnalyzer(Version.LUCENE_46)).parse(replaceAll);
            } catch (ParseException e) {
                throw new IllegalArgumentException(e.getMessage(), e);
            }
        }
        newLucene2SeqConfiguration.setQuery(query);
        newLucene2SeqConfiguration.setMaxHits(hasOption(OPTION_MAX_HITS) ? Integer.valueOf(getOption(OPTION_MAX_HITS)).intValue() : Integer.MAX_VALUE);
        if (hasOption("method") && getOption("method").equals("sequential")) {
            new SequenceFilesFromLuceneStorage().run(newLucene2SeqConfiguration);
            return 0;
        }
        new SequenceFilesFromLuceneStorageMRJob().run(newLucene2SeqConfiguration);
        return 0;
    }

    public LuceneStorageConfiguration newLucene2SeqConfiguration(Configuration configuration, List<Path> list, Path path, String str, List<String> list2) {
        return new LuceneStorageConfiguration(configuration, list, path, str, list2);
    }
}
