package org.apache.hadoop.hive.ql.udf.generic;

import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Locale;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.Text;

@Description(name = "sentences", value = "_FUNC_(str, lang, country) - Splits str into arrays of sentences, where each sentence is an array of words. The 'lang' and'country' arguments are optional, and if omitted, the default locale is used.", extended = "Example:\n  > SELECT _FUNC_('Hello there! I am a UDF.') FROM src LIMIT 1;\n  [ [\"Hello\", \"there\"], [\"I\", \"am\", \"a\", \"UDF\"] ]\n  > SELECT _FUNC_(review, language) FROM movies;\nUnnecessary punctuation, such as periods and commas in English, is automatically stripped. If specified, 'lang' should be a two-letter ISO-639 language code (such as 'en'), and 'country' should be a two-letter ISO-3166 code (such as 'us'). Not all country and language codes are fully supported, and if an unsupported code is specified, a default locale is used to process that string.")
/* loaded from: input_file:WEB-INF/lib/hive-exec-2.3.6-mapr-2009.jar:org/apache/hadoop/hive/ql/udf/generic/GenericUDFSentences.class */
public class GenericUDFSentences extends GenericUDF {
    private transient ObjectInspectorConverters.Converter[] converters;
    static final /* synthetic */ boolean $assertionsDisabled;

    @Override // org.apache.hadoop.hive.ql.udf.generic.GenericUDF
    public ObjectInspector initialize(ObjectInspector[] objectInspectorArr) throws UDFArgumentException {
        if (objectInspectorArr.length < 1 || objectInspectorArr.length > 3) {
            throw new UDFArgumentLengthException("The function sentences takes between 1 and 3 arguments.");
        }
        this.converters = new ObjectInspectorConverters.Converter[objectInspectorArr.length];
        for (int i = 0; i < objectInspectorArr.length; i++) {
            this.converters[i] = ObjectInspectorConverters.getConverter(objectInspectorArr[i], PrimitiveObjectInspectorFactory.writableStringObjectInspector);
        }
        return ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector));
    }

    @Override // org.apache.hadoop.hive.ql.udf.generic.GenericUDF
    public Object evaluate(GenericUDF.DeferredObject[] deferredObjectArr) throws HiveException {
        Locale locale;
        if (!$assertionsDisabled && (deferredObjectArr.length < 1 || deferredObjectArr.length > 3)) {
            throw new AssertionError();
        }
        if (deferredObjectArr[0].get() == null) {
            return null;
        }
        if (deferredObjectArr.length <= 1 || deferredObjectArr[1].get() == null) {
            locale = Locale.getDefault();
        } else {
            Text text = (Text) this.converters[1].convert(deferredObjectArr[1].get());
            Text text2 = null;
            if (deferredObjectArr.length > 2 && deferredObjectArr[2].get() != null) {
                text2 = (Text) this.converters[2].convert(deferredObjectArr[2].get());
            }
            locale = text2 != null ? new Locale(text.toString().toLowerCase(), text2.toString().toUpperCase()) : new Locale(text.toString().toLowerCase());
        }
        String text3 = ((Text) this.converters[0].convert(deferredObjectArr[0].get())).toString();
        ArrayList arrayList = new ArrayList();
        BreakIterator sentenceInstance = BreakIterator.getSentenceInstance(locale);
        sentenceInstance.setText(text3);
        int i = 0;
        while (sentenceInstance.next() != -1) {
            String substring = text3.substring(i, sentenceInstance.current());
            i = sentenceInstance.current();
            arrayList.add(new ArrayList());
            BreakIterator wordInstance = BreakIterator.getWordInstance(locale);
            wordInstance.setText(substring);
            int i2 = 0;
            ArrayList arrayList2 = (ArrayList) arrayList.get(arrayList.size() - 1);
            while (wordInstance.next() != -1) {
                String substring2 = substring.substring(i2, wordInstance.current());
                i2 = wordInstance.current();
                if (Character.isLetterOrDigit(substring2.charAt(0))) {
                    arrayList2.add(new Text(substring2));
                }
            }
        }
        return arrayList;
    }

    @Override // org.apache.hadoop.hive.ql.udf.generic.GenericUDF
    public String getDisplayString(String[] strArr) {
        if ($assertionsDisabled || (strArr.length >= 1 && strArr.length <= 3)) {
            return getStandardDisplayString("sentences", strArr);
        }
        throw new AssertionError();
    }

    static {
        $assertionsDisabled = !GenericUDFSentences.class.desiredAssertionStatus();
    }
}
