package org.apache.mahout.text;

import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.iterator.FileLineIterable;
import org.apache.mahout.utils.email.MailOptions;
import org.apache.mahout.utils.email.MailProcessor;

/* loaded from: input_file:org/apache/mahout/text/SequenceFilesFromMailArchivesMapper.class */
public class SequenceFilesFromMailArchivesMapper extends Mapper<IntWritable, BytesWritable, Text, Text> {
    private Text outKey = new Text();
    private Text outValue = new Text();
    private static final Pattern MESSAGE_START = Pattern.compile("^From \\S+@\\S.*\\d{4}$", 2);
    private static final Pattern MESSAGE_ID_PREFIX = Pattern.compile("^message-id: <(.*)>$", 2);
    private MailOptions options;

    public void setup(Mapper<IntWritable, BytesWritable, Text, Text>.Context context) throws IOException, InterruptedException {
        Configuration configuration = context.getConfiguration();
        this.options = new MailOptions();
        this.options.setPrefix(configuration.get(SequenceFilesFromMailArchives.KEY_PREFIX_OPTION[1], ""));
        if (!configuration.get(SequenceFilesFromMailArchives.CHUNK_SIZE_OPTION[0], "").equals("")) {
            this.options.setChunkSize(configuration.getInt(SequenceFilesFromMailArchives.CHUNK_SIZE_OPTION[0], 64));
        }
        if (configuration.get(SequenceFilesFromMailArchives.CHARSET_OPTION[0], "").equals("")) {
            this.options.setCharset(Charset.forName("UTF-8"));
        } else {
            this.options.setCharset(Charset.forName(configuration.get(SequenceFilesFromMailArchives.CHARSET_OPTION[0], "UTF-8")));
        }
        ArrayList newArrayListWithCapacity = Lists.newArrayListWithCapacity(5);
        HashMap newHashMap = Maps.newHashMap();
        int i = 0;
        if (!configuration.get(SequenceFilesFromMailArchives.FROM_OPTION[1], "").equals("")) {
            newArrayListWithCapacity.add(MailProcessor.FROM_PREFIX);
            i = 0 + 1;
            newHashMap.put(MailOptions.FROM, 0);
        }
        if (!configuration.get(SequenceFilesFromMailArchives.TO_OPTION[1], "").equals("")) {
            newArrayListWithCapacity.add(MailProcessor.TO_PREFIX);
            int i2 = i;
            i++;
            newHashMap.put(MailOptions.TO, Integer.valueOf(i2));
        }
        if (!configuration.get(SequenceFilesFromMailArchives.REFERENCES_OPTION[1], "").equals("")) {
            newArrayListWithCapacity.add(MailProcessor.REFS_PREFIX);
            int i3 = i;
            i++;
            newHashMap.put(MailOptions.REFS, Integer.valueOf(i3));
        }
        if (!configuration.get(SequenceFilesFromMailArchives.SUBJECT_OPTION[1], "").equals("")) {
            newArrayListWithCapacity.add(MailProcessor.SUBJECT_PREFIX);
            newHashMap.put(MailOptions.SUBJECT, Integer.valueOf(i + 1));
        }
        this.options.setStripQuotedText(configuration.getBoolean(SequenceFilesFromMailArchives.STRIP_QUOTED_OPTION[1], false));
        this.options.setPatternsToMatch((Pattern[]) newArrayListWithCapacity.toArray(new Pattern[newArrayListWithCapacity.size()]));
        this.options.setPatternOrder(newHashMap);
        this.options.setIncludeBody(configuration.getBoolean(SequenceFilesFromMailArchives.BODY_OPTION[1], false));
        this.options.setSeparator("\n");
        if (!configuration.get(SequenceFilesFromMailArchives.SEPARATOR_OPTION[1], "").equals("")) {
            this.options.setSeparator(configuration.get(SequenceFilesFromMailArchives.SEPARATOR_OPTION[1], ""));
        }
        if (!configuration.get(SequenceFilesFromMailArchives.BODY_SEPARATOR_OPTION[1], "").equals("")) {
            this.options.setBodySeparator(configuration.get(SequenceFilesFromMailArchives.BODY_SEPARATOR_OPTION[1], ""));
        }
        if (configuration.get(SequenceFilesFromMailArchives.QUOTED_REGEX_OPTION[1], "").equals("")) {
            return;
        }
        this.options.setQuotedTextPattern(Pattern.compile(configuration.get(SequenceFilesFromMailArchives.QUOTED_REGEX_OPTION[1], "")));
    }

    public long parseMailboxLineByLine(String str, InputStream inputStream, Mapper<IntWritable, BytesWritable, Text, Text>.Context context) throws IOException, InterruptedException {
        long j = 0;
        try {
            StringBuilder sb = new StringBuilder();
            StringBuilder sb2 = new StringBuilder();
            Matcher matcher = MESSAGE_ID_PREFIX.matcher("");
            Matcher matcher2 = MESSAGE_START.matcher("");
            String[] strArr = new String[this.options.getPatternsToMatch().length];
            Matcher[] matcherArr = new Matcher[this.options.getPatternsToMatch().length];
            for (int i = 0; i < matcherArr.length; i++) {
                matcherArr[i] = this.options.getPatternsToMatch()[i].matcher("");
            }
            String str2 = null;
            boolean z = false;
            Pattern quotedTextPattern = this.options.getQuotedTextPattern();
            Iterator it = new FileLineIterable(inputStream, this.options.getCharset(), false, str).iterator();
            while (it.hasNext()) {
                String str3 = (String) it.next();
                if (!this.options.isStripQuotedText() || !quotedTextPattern.matcher(str3).find()) {
                    for (int i2 = 0; i2 < matcherArr.length; i2++) {
                        Matcher matcher3 = matcherArr[i2];
                        matcher3.reset(str3);
                        if (matcher3.matches()) {
                            strArr[i2] = matcher3.group(1);
                        }
                    }
                    if (str2 != null) {
                        matcher2.reset(str3);
                        if (matcher2.matches()) {
                            String generateKey = generateKey(str, this.options.getPrefix(), str2);
                            writeContent(this.options.getSeparator(), sb, sb2, strArr);
                            this.outKey.set(generateKey);
                            this.outValue.set(sb.toString());
                            context.write(this.outKey, this.outValue);
                            sb.setLength(0);
                            sb2.setLength(0);
                            str2 = null;
                            z = false;
                        } else if (!z || !this.options.isIncludeBody()) {
                            z = str3.isEmpty();
                        } else if (!str3.isEmpty()) {
                            sb2.append(str3).append(this.options.getBodySeparator());
                        }
                    } else if (str3.length() > 14) {
                        matcher.reset(str3);
                        if (matcher.matches()) {
                            str2 = matcher.group(1);
                            j++;
                        }
                    }
                }
            }
            if (str2 != null) {
                String generateKey2 = generateKey(str, this.options.getPrefix(), str2);
                writeContent(this.options.getSeparator(), sb, sb2, strArr);
                this.outKey.set(generateKey2);
                this.outValue.set(sb.toString());
                context.write(this.outKey, this.outValue);
                sb.setLength(0);
            }
        } catch (FileNotFoundException e) {
        }
        return j;
    }

    protected static String generateKey(String str, String str2, String str3) {
        return Joiner.on("/").join(Lists.newArrayList(new String[]{str2, str, str3}).iterator());
    }

    private static void writeContent(String str, StringBuilder sb, CharSequence charSequence, String[] strArr) {
        sb.append(Joiner.on(str).useForNull("").join(Arrays.asList(strArr).iterator())).append(str).append(charSequence);
    }

    public void map(IntWritable intWritable, BytesWritable bytesWritable, Mapper<IntWritable, BytesWritable, Text, Text>.Context context) throws IOException, InterruptedException {
        parseMailboxLineByLine(HadoopUtil.calcRelativeFilePath(context.getConfiguration(), context.getInputSplit().getPath(intWritable.get())), new ByteArrayInputStream(bytesWritable.getBytes()), context);
    }

    public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
        map((IntWritable) obj, (BytesWritable) obj2, (Mapper<IntWritable, BytesWritable, Text, Text>.Context) context);
    }
}
