package ai.grazie.nlp.tokenizer.spacy;

import ai.grazie.utils.mpp.Platform;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.collections.MapsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.ranges.RangesKt;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;

/* compiled from: SpacyBaseLanguage.kt */
@Metadata(mv = {1, 7, 0}, k = 1, xi = 48, d1 = {"�� \n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0010 \n\u0002\u0010\u000e\n\u0002\b\n\n\u0002\u0018\u0002\n\u0002\b\u0005\bÆ\u0002\u0018��2\u00020\u0001:\u0001\u0014B\u0007\b\u0002¢\u0006\u0002\u0010\u0002J\u0014\u0010\u000f\u001a\u00020\u00102\f\u0010\u0011\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004J\u0014\u0010\u0012\u001a\u00020\u00102\f\u0010\u0011\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004J\u0014\u0010\u0013\u001a\u00020\u00102\f\u0010\u0011\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004R\u0017\u0010\u0003\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004¢\u0006\b\n��\u001a\u0004\b\u0006\u0010\u0007R\u0017\u0010\b\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004¢\u0006\b\n��\u001a\u0004\b\t\u0010\u0007R\u0014\u0010\n\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\f\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0017\u0010\r\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004¢\u0006\b\n��\u001a\u0004\b\u000e\u0010\u0007¨\u0006\u0015"}, d2 = {"Lai/grazie/nlp/tokenizer/spacy/SpacyBaseLanguage;", "", "()V", "infixes", "", "", "getInfixes", "()Ljava/util/List;", "prefixes", "getPrefixes", "spacyDefaultInfixes", "spacyDefaultPrefixes", "spacyDefaultSuffixes", "suffixes", "getSuffixes", "compileInfix", "Lkotlin/text/Regex;", "cases", "compilePrefix", "compileSuffix", "BaseExceptions", "nlp-tokenizer"})
@SourceDebugExtension({"SMAP\nSpacyBaseLanguage.kt\nKotlin\n*S Kotlin\n*F\n+ 1 SpacyBaseLanguage.kt\nai/grazie/nlp/tokenizer/spacy/SpacyBaseLanguage\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n*L\n1#1,126:1\n1549#2:127\n1620#2,3:128\n766#2:131\n857#2,2:132\n766#2:134\n857#2,2:135\n1549#2:137\n1620#2,3:138\n766#2:141\n857#2,2:142\n1549#2:144\n1620#2,3:145\n766#2:148\n857#2,2:149\n*S KotlinDebug\n*F\n+ 1 SpacyBaseLanguage.kt\nai/grazie/nlp/tokenizer/spacy/SpacyBaseLanguage\n*L\n76#1:127\n76#1:128,3\n82#1:131\n82#1:132,2\n84#1:134\n84#1:135,2\n90#1:137\n90#1:138,3\n95#1:141\n95#1:142,2\n100#1:144\n100#1:145,3\n105#1:148\n105#1:149,2\n*E\n"})
/* loaded from: input_file:ai/grazie/nlp/tokenizer/spacy/SpacyBaseLanguage.class */
public final class SpacyBaseLanguage {

    @NotNull
    public static final SpacyBaseLanguage INSTANCE = new SpacyBaseLanguage();

    @NotNull
    private static final List<String> spacyDefaultPrefixes = CollectionsKt.plus(CollectionsKt.plus(CollectionsKt.plus(CollectionsKt.plus(CollectionsKt.plus(CollectionsKt.listOf(new String[]{"§", "%", "=", "—", "–", "\\+(?![0-9])"}), SpacyTokenizerCharClasses.INSTANCE.getListPunct()), SpacyTokenizerCharClasses.INSTANCE.getListEllipses()), SpacyTokenizerCharClasses.INSTANCE.getListQuotes()), SpacyTokenizerCharClasses.INSTANCE.getListCurrency()), SpacyTokenizerCharClasses.INSTANCE.getListIcons());

    @NotNull
    private static final List<String> prefixes = CollectionsKt.plus(CollectionsKt.listOf(new String[]{"\\*+", "#+", "[`]{2,3}", "[']{2,3}", "[-]+"}), spacyDefaultPrefixes);

    @NotNull
    private static final List<String> spacyDefaultSuffixes = CollectionsKt.plus(CollectionsKt.plus(CollectionsKt.plus(CollectionsKt.plus(CollectionsKt.plus(SpacyTokenizerCharClasses.INSTANCE.getListPunct(), SpacyTokenizerCharClasses.INSTANCE.getListEllipses()), SpacyTokenizerCharClasses.INSTANCE.getListQuotes()), SpacyTokenizerCharClasses.INSTANCE.getListIcons()), CollectionsKt.listOf(new String[]{"—", "–"})), CollectionsKt.listOf(new String[]{"(?<=[0-9])\\+", "(?<=°[FfCcKk])\\.", "(?<=[0-9])(?:" + SpacyTokenizerCharClasses.INSTANCE.getCurrency() + ")", "(?<=[0-9])(?:" + SpacyTokenizerCharClasses.INSTANCE.getUnits() + ")", "(?<=[0-9" + SpacyTokenizerCharClasses.INSTANCE.getAlphaLower() + "%²\\-\\+" + SpacyTokenizerCharClasses.INSTANCE.getPunct() + "(?:" + SpacyTokenizerCharClasses.INSTANCE.getConcatQuotes() + ")])\\.", "(?<=[" + SpacyTokenizerCharClasses.INSTANCE.getAlphaUpper() + "][" + SpacyTokenizerCharClasses.INSTANCE.getAlphaUpper() + "])\\."}));

    @NotNull
    private static final List<String> suffixes = CollectionsKt.plus(CollectionsKt.plus(CollectionsKt.listOf(new String[]{"\\.+", "!+", "\\?+", "\\*+", "\\[\\d+\\]", "[`]{2,3}", "[']{2,3}", "[-]+"}), SpacyTokenizerCharClasses.INSTANCE.getListCurrency()), spacyDefaultSuffixes);

    @NotNull
    private static final List<String> spacyDefaultInfixes = CollectionsKt.plus(CollectionsKt.plus(SpacyTokenizerCharClasses.INSTANCE.getListEllipses(), SpacyTokenizerCharClasses.INSTANCE.getListIcons()), CollectionsKt.listOf(new String[]{"(?<=[0-9])[+\\-\\*^](?=[0-9-])", "(?<=[" + SpacyTokenizerCharClasses.INSTANCE.getAlphaLower() + SpacyTokenizerCharClasses.INSTANCE.getConcatQuotes() + "])\\.(?=[" + SpacyTokenizerCharClasses.INSTANCE.getAlphaUpper() + SpacyTokenizerCharClasses.INSTANCE.getConcatQuotes() + "])", "(?<=[" + SpacyTokenizerCharClasses.INSTANCE.getAlpha() + "]),(?=[" + SpacyTokenizerCharClasses.INSTANCE.getAlpha() + "])", "(?<=[" + SpacyTokenizerCharClasses.INSTANCE.getAlpha() + "])(?:" + SpacyTokenizerCharClasses.INSTANCE.getHyphens() + ")(?=[" + SpacyTokenizerCharClasses.INSTANCE.getAlpha() + "])", "(?<=[" + SpacyTokenizerCharClasses.INSTANCE.getAlpha() + "0-9])[:<>=/](?=[" + SpacyTokenizerCharClasses.INSTANCE.getAlpha() + "])"}));

    @NotNull
    private static final List<String> infixes = CollectionsKt.plus(CollectionsKt.listOf(new String[]{"(?<=[" + SpacyTokenizerCharClasses.INSTANCE.getAlpha() + "])(?:\\(|\\))(?=[" + SpacyTokenizerCharClasses.INSTANCE.getAlpha() + "])", "(?<=[" + SpacyTokenizerCharClasses.INSTANCE.getAlpha() + "0-9])(?:" + SpacyTokenizerCharClasses.INSTANCE.getHyphens() + ")(?=[" + SpacyTokenizerCharClasses.INSTANCE.getAlpha() + "])", "(?<=[" + SpacyTokenizerCharClasses.INSTANCE.getAlpha() + "])(?:[" + SpacyTokenizerCharClasses.INSTANCE.getNonContractionConcatQuotes() + "])(?=[" + SpacyTokenizerCharClasses.INSTANCE.getAlpha() + "])"}), spacyDefaultInfixes);

    /* compiled from: SpacyBaseLanguage.kt */
    @Metadata(mv = {1, 7, 0}, k = 1, xi = 48, d1 = {"��(\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0010 \n\u0002\u0010\u000e\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0004\bÆ\u0002\u0018��2\u00020\u0001B\u0007\b\u0002¢\u0006\u0002\u0010\u0002R\u0014\u0010\u0003\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u0006\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��R9\u0010\u0007\u001a*\u0012\u0004\u0012\u00020\u0005\u0012\n\u0012\b\u0012\u0004\u0012\u00020\t0\u00040\bj\u0014\u0012\u0004\u0012\u00020\u0005\u0012\n\u0012\b\u0012\u0004\u0012\u00020\t0\u0004`\n¢\u0006\b\n��\u001a\u0004\b\u000b\u0010\fR\u0014\u0010\r\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004X\u0082\u0004¢\u0006\u0002\n��¨\u0006\u000e"}, d2 = {"Lai/grazie/nlp/tokenizer/spacy/SpacyBaseLanguage$BaseExceptions;", "", "()V", "emoticons", "", "", "ends", "exceptions", "Ljava/util/HashMap;", "Lai/grazie/nlp/tokenizer/spacy/SpacyTokenInfo;", "Lkotlin/collections/HashMap;", "getExceptions", "()Ljava/util/HashMap;", "spaces", "nlp-tokenizer"})
    @SourceDebugExtension({"SMAP\nSpacyBaseLanguage.kt\nKotlin\n*S Kotlin\n*F\n+ 1 SpacyBaseLanguage.kt\nai/grazie/nlp/tokenizer/spacy/SpacyBaseLanguage$BaseExceptions\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n*L\n1#1,126:1\n1271#2,2:127\n1285#2,4:129\n*S KotlinDebug\n*F\n+ 1 SpacyBaseLanguage.kt\nai/grazie/nlp/tokenizer/spacy/SpacyBaseLanguage$BaseExceptions\n*L\n123#1:127,2\n123#1:129,4\n*E\n"})
    /* loaded from: input_file:ai/grazie/nlp/tokenizer/spacy/SpacyBaseLanguage$BaseExceptions.class */
    public static final class BaseExceptions {

        @NotNull
        public static final BaseExceptions INSTANCE = new BaseExceptions();

        @NotNull
        private static final List<String> spaces = CollectionsKt.listOf(new String[]{" ", "\t", "\\t", "\n", "\\n", "—", " "});

        @NotNull
        private static final List<String> ends = CollectionsKt.listOf(new String[]{"'", "\\\\\")", "<space>", "''", "C++", "a.", "b.", "c.", "d.", "e.", "f.", "g.", "h.", "i.", "j.", "k.", "l.", "m.", "n.", "o.", "p.", "q.", "r.", "s.", "t.", "u.", "v.", "w.", "x.", "y.", "z.", "ä.", "ö.", "ü."});

        @NotNull
        private static final List<String> emoticons = StringsKt.split$default(":) :-) :)) :-)) :))) :-))) (: (-: =) (= \") :] :-] [: [-: [= =] :o) (o: :} :-} \n        8) 8-) (-8 ;) ;-) (; (-; :( :-( :(( :-(( :((( :-((( ): )-: =( >:( :') :'-) :'( :'-( :/ :-/ =/ =| :| :-| ]= \n        =[ :1 :P :-P :p :-p :O :-O :o :-o :0 :-0 :() >:o :* :-* :3 :-3 =3 :> :-> :X :-X :x :-x :D :-D ;D ;-D =D xD \n        XD xDD XDD 8D 8-D ^_^ ^__^ ^___^ >.< >.> <.< ._. ;_; -_- -__- v.v V.V v_v V_V o_o o_O O_o O_O 0_o o_0 0_0 o.O \n        O.o O.O o.o 0.0 o.0 0.o @_@ <3 <33 <333 </3 (^_^) (-_-) (._.) (>_<) (*_*) (¬_¬) ಠ_ಠ ಠ︵ಠ (ಠ_ಠ) ¯\\(ツ)/¯ (╯°□°）╯︵┻━┻ ><(((*>", new String[]{" "}, false, 0, 6, (Object) null);

        @NotNull
        private static final HashMap<String, List<SpacyTokenInfo>> exceptions;

        private BaseExceptions() {
        }

        @NotNull
        public final HashMap<String, List<SpacyTokenInfo>> getExceptions() {
            return exceptions;
        }

        static {
            List plus = CollectionsKt.plus(CollectionsKt.plus(spaces, ends), emoticons);
            LinkedHashMap linkedHashMap = new LinkedHashMap(RangesKt.coerceAtLeast(MapsKt.mapCapacity(CollectionsKt.collectionSizeOrDefault(plus, 10)), 16));
            for (Object obj : plus) {
                linkedHashMap.put(obj, CollectionsKt.listOf(new SpacyTokenInfo((String) obj, null, null, null, 14, null)));
            }
            exceptions = linkedHashMap;
        }
    }

    private SpacyBaseLanguage() {
    }

    @NotNull
    public final List<String> getPrefixes() {
        return prefixes;
    }

    @NotNull
    public final List<String> getSuffixes() {
        return suffixes;
    }

    @NotNull
    public final List<String> getInfixes() {
        return infixes;
    }

    @NotNull
    public final Regex compilePrefix(@NotNull List<String> list) {
        ArrayList arrayList;
        Intrinsics.checkNotNullParameter(list, "cases");
        if (Platform.Companion.getCurrent() == Platform.JS) {
            List<String> list2 = list;
            ArrayList arrayList2 = new ArrayList(CollectionsKt.collectionSizeOrDefault(list2, 10));
            Iterator<T> it = list2.iterator();
            while (it.hasNext()) {
                arrayList2.add(StringsKt.replace$default((String) it.next(), "\\x", "\\u", false, 4, (Object) null));
            }
            arrayList = arrayList2;
        } else {
            arrayList = list;
        }
        List<String> list3 = arrayList;
        if (list.contains("(")) {
            ArrayList arrayList3 = new ArrayList();
            for (Object obj : list3) {
                if (!Intrinsics.areEqual(StringsKt.trim((String) obj).toString(), "")) {
                    arrayList3.add(obj);
                }
            }
            return new Regex(CollectionsKt.joinToString$default(arrayList3, "|", (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<String, CharSequence>() { // from class: ai.grazie.nlp.tokenizer.spacy.SpacyBaseLanguage$compilePrefix$2
                @NotNull
                public final CharSequence invoke(@NotNull String str) {
                    Intrinsics.checkNotNullParameter(str, "it");
                    return "^" + Regex.Companion.escape(str);
                }
            }, 30, (Object) null));
        }
        ArrayList arrayList4 = new ArrayList();
        for (Object obj2 : list3) {
            if (!Intrinsics.areEqual(StringsKt.trim((String) obj2).toString(), "")) {
                arrayList4.add(obj2);
            }
        }
        return new Regex(CollectionsKt.joinToString$default(arrayList4, "|", (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<String, CharSequence>() { // from class: ai.grazie.nlp.tokenizer.spacy.SpacyBaseLanguage$compilePrefix$4
            @NotNull
            public final CharSequence invoke(@NotNull String str) {
                Intrinsics.checkNotNullParameter(str, "it");
                return "^" + str;
            }
        }, 30, (Object) null));
    }

    @NotNull
    public final Regex compileSuffix(@NotNull List<String> list) {
        ArrayList arrayList;
        Intrinsics.checkNotNullParameter(list, "cases");
        if (Platform.Companion.getCurrent() == Platform.JS) {
            List<String> list2 = list;
            ArrayList arrayList2 = new ArrayList(CollectionsKt.collectionSizeOrDefault(list2, 10));
            Iterator<T> it = list2.iterator();
            while (it.hasNext()) {
                arrayList2.add(StringsKt.replace$default((String) it.next(), "\\x", "\\u", false, 4, (Object) null));
            }
            arrayList = arrayList2;
        } else {
            arrayList = list;
        }
        List<String> list3 = arrayList;
        ArrayList arrayList3 = new ArrayList();
        for (Object obj : list3) {
            if (!Intrinsics.areEqual(StringsKt.trim((String) obj).toString(), "")) {
                arrayList3.add(obj);
            }
        }
        return new Regex(CollectionsKt.joinToString$default(arrayList3, "|", (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<String, CharSequence>() { // from class: ai.grazie.nlp.tokenizer.spacy.SpacyBaseLanguage$compileSuffix$2
            @NotNull
            public final CharSequence invoke(@NotNull String str) {
                Intrinsics.checkNotNullParameter(str, "it");
                return str + "$";
            }
        }, 30, (Object) null));
    }

    @NotNull
    public final Regex compileInfix(@NotNull List<String> list) {
        ArrayList arrayList;
        Intrinsics.checkNotNullParameter(list, "cases");
        if (Platform.Companion.getCurrent() == Platform.JS) {
            List<String> list2 = list;
            ArrayList arrayList2 = new ArrayList(CollectionsKt.collectionSizeOrDefault(list2, 10));
            Iterator<T> it = list2.iterator();
            while (it.hasNext()) {
                arrayList2.add(StringsKt.replace$default((String) it.next(), "\\x", "\\u", false, 4, (Object) null));
            }
            arrayList = arrayList2;
        } else {
            arrayList = list;
        }
        List<String> list3 = arrayList;
        ArrayList arrayList3 = new ArrayList();
        for (Object obj : list3) {
            if (!Intrinsics.areEqual(StringsKt.trim((String) obj).toString(), "")) {
                arrayList3.add(obj);
            }
        }
        return new Regex(CollectionsKt.joinToString$default(arrayList3, "|", (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<String, CharSequence>() { // from class: ai.grazie.nlp.tokenizer.spacy.SpacyBaseLanguage$compileInfix$2
            @NotNull
            public final CharSequence invoke(@NotNull String str) {
                Intrinsics.checkNotNullParameter(str, "it");
                return str;
            }
        }, 30, (Object) null));
    }
}
