/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.text;

import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.iterator.FileLineIterable;
import org.apache.mahout.text.SequenceFilesFromMailArchives;
import org.apache.mahout.utils.email.MailOptions;
import org.apache.mahout.utils.email.MailProcessor;

public class SequenceFilesFromMailArchivesMapper
extends Mapper<IntWritable, BytesWritable, Text, Text> {
    private Text outKey = new Text();
    private Text outValue = new Text();
    private static final Pattern MESSAGE_START = Pattern.compile("^From \\S+@\\S.*\\d{4}$", 2);
    private static final Pattern MESSAGE_ID_PREFIX = Pattern.compile("^message-id: <(.*)>$", 2);
    private MailOptions options;

    @Override
    public void setup(Mapper.Context context) throws IOException, InterruptedException {
        Charset charset;
        Configuration configuration = context.getConfiguration();
        this.options = new MailOptions();
        this.options.setPrefix(configuration.get(SequenceFilesFromMailArchives.KEY_PREFIX_OPTION[1], ""));
        if (!configuration.get(SequenceFilesFromMailArchives.CHUNK_SIZE_OPTION[0], "").equals("")) {
            this.options.setChunkSize(configuration.getInt(SequenceFilesFromMailArchives.CHUNK_SIZE_OPTION[0], 64));
        }
        if (!configuration.get(SequenceFilesFromMailArchives.CHARSET_OPTION[0], "").equals("")) {
            charset = Charset.forName(configuration.get(SequenceFilesFromMailArchives.CHARSET_OPTION[0], "UTF-8"));
            this.options.setCharset(charset);
        } else {
            charset = Charset.forName("UTF-8");
            this.options.setCharset(charset);
        }
        ArrayList<Pattern> patterns = Lists.newArrayListWithCapacity(5);
        HashMap<String, Integer> patternOrder = Maps.newHashMap();
        int order = 0;
        if (!configuration.get(SequenceFilesFromMailArchives.FROM_OPTION[1], "").equals("")) {
            patterns.add(MailProcessor.FROM_PREFIX);
            patternOrder.put("FROM", order++);
        }
        if (!configuration.get(SequenceFilesFromMailArchives.TO_OPTION[1], "").equals("")) {
            patterns.add(MailProcessor.TO_PREFIX);
            patternOrder.put("TO", order++);
        }
        if (!configuration.get(SequenceFilesFromMailArchives.REFERENCES_OPTION[1], "").equals("")) {
            patterns.add(MailProcessor.REFS_PREFIX);
            patternOrder.put("REFS", order++);
        }
        if (!configuration.get(SequenceFilesFromMailArchives.SUBJECT_OPTION[1], "").equals("")) {
            patterns.add(MailProcessor.SUBJECT_PREFIX);
            patternOrder.put("SUBJECT", ++order);
        }
        this.options.setStripQuotedText(configuration.getBoolean(SequenceFilesFromMailArchives.STRIP_QUOTED_OPTION[1], false));
        this.options.setPatternsToMatch(patterns.toArray(new Pattern[patterns.size()]));
        this.options.setPatternOrder(patternOrder);
        this.options.setIncludeBody(configuration.getBoolean(SequenceFilesFromMailArchives.BODY_OPTION[1], false));
        this.options.setSeparator("\n");
        if (!configuration.get(SequenceFilesFromMailArchives.SEPARATOR_OPTION[1], "").equals("")) {
            this.options.setSeparator(configuration.get(SequenceFilesFromMailArchives.SEPARATOR_OPTION[1], ""));
        }
        if (!configuration.get(SequenceFilesFromMailArchives.BODY_SEPARATOR_OPTION[1], "").equals("")) {
            this.options.setBodySeparator(configuration.get(SequenceFilesFromMailArchives.BODY_SEPARATOR_OPTION[1], ""));
        }
        if (!configuration.get(SequenceFilesFromMailArchives.QUOTED_REGEX_OPTION[1], "").equals("")) {
            this.options.setQuotedTextPattern(Pattern.compile(configuration.get(SequenceFilesFromMailArchives.QUOTED_REGEX_OPTION[1], "")));
        }
    }

    public long parseMailboxLineByLine(String filename, InputStream mailBoxInputStream, Mapper.Context context) throws IOException, InterruptedException {
        long messageCount = 0L;
        try {
            StringBuilder contents = new StringBuilder();
            StringBuilder body = new StringBuilder();
            Matcher messageIdMatcher = MESSAGE_ID_PREFIX.matcher("");
            Matcher messageBoundaryMatcher = MESSAGE_START.matcher("");
            String[] patternResults = new String[this.options.getPatternsToMatch().length];
            Matcher[] matches = new Matcher[this.options.getPatternsToMatch().length];
            for (int i = 0; i < matches.length; ++i) {
                matches[i] = this.options.getPatternsToMatch()[i].matcher("");
            }
            String messageId = null;
            boolean inBody = false;
            Pattern quotedTextPattern = this.options.getQuotedTextPattern();
            for (String nextLine : new FileLineIterable(mailBoxInputStream, this.options.getCharset(), false, filename)) {
                if (this.options.isStripQuotedText() && quotedTextPattern.matcher(nextLine).find()) continue;
                for (int i = 0; i < matches.length; ++i) {
                    Matcher matcher = matches[i];
                    matcher.reset(nextLine);
                    if (!matcher.matches()) continue;
                    patternResults[i] = matcher.group(1);
                }
                if (messageId != null) {
                    messageBoundaryMatcher.reset(nextLine);
                    if (messageBoundaryMatcher.matches()) {
                        String key = SequenceFilesFromMailArchivesMapper.generateKey(filename, this.options.getPrefix(), messageId);
                        SequenceFilesFromMailArchivesMapper.writeContent(this.options.getSeparator(), contents, body, patternResults);
                        this.outKey.set(key);
                        this.outValue.set(contents.toString());
                        context.write(this.outKey, this.outValue);
                        contents.setLength(0);
                        body.setLength(0);
                        messageId = null;
                        inBody = false;
                        continue;
                    }
                    if (inBody && this.options.isIncludeBody()) {
                        if (nextLine.isEmpty()) continue;
                        body.append(nextLine).append(this.options.getBodySeparator());
                        continue;
                    }
                    inBody = nextLine.isEmpty();
                    continue;
                }
                if (nextLine.length() <= 14) continue;
                messageIdMatcher.reset(nextLine);
                if (!messageIdMatcher.matches()) continue;
                messageId = messageIdMatcher.group(1);
                ++messageCount;
            }
            if (messageId != null) {
                String key = SequenceFilesFromMailArchivesMapper.generateKey(filename, this.options.getPrefix(), messageId);
                SequenceFilesFromMailArchivesMapper.writeContent(this.options.getSeparator(), contents, body, patternResults);
                this.outKey.set(key);
                this.outValue.set(contents.toString());
                context.write(this.outKey, this.outValue);
                contents.setLength(0);
            }
        }
        catch (FileNotFoundException ignored) {
            // empty catch block
        }
        return messageCount;
    }

    protected static String generateKey(String mboxFilename, String prefix, String messageId) {
        return Joiner.on("/").join(Lists.newArrayList(prefix, mboxFilename, messageId).iterator());
    }

    private static void writeContent(String separator, StringBuilder contents, CharSequence body, String[] matches) {
        String matchesString = Joiner.on(separator).useForNull("").join(Arrays.asList(matches).iterator());
        contents.append(matchesString).append(separator).append(body);
    }

    @Override
    public void map(IntWritable key, BytesWritable value, Mapper.Context context) throws IOException, InterruptedException {
        Configuration configuration = context.getConfiguration();
        Path filePath = ((CombineFileSplit)context.getInputSplit()).getPath(key.get());
        String relativeFilePath = HadoopUtil.calcRelativeFilePath(configuration, filePath);
        ByteArrayInputStream is = new ByteArrayInputStream(value.getBytes());
        this.parseMailboxLineByLine(relativeFilePath, is, context);
    }
}

