package ai.grazie.nlp.tokenizer.spacy;

import ai.grazie.nlp.langs.Language;
import ai.grazie.nlp.tokenizer.Tokenizer;
import ai.grazie.nlp.tokenizer.spacy.SpacyBaseLanguage;
import ai.grazie.nlp.tokenizer.spacy.de.SpacyGerman;
import ai.grazie.nlp.tokenizer.spacy.de.SpacyGermanTokenizerExceptions;
import ai.grazie.nlp.tokenizer.spacy.en.SpacyEnglish;
import ai.grazie.nlp.tokenizer.spacy.en.SpacyEnglishTokenizerExceptions;
import ai.grazie.nlp.tokenizer.spacy.ru.SpacyRussian;
import ai.grazie.nlp.tokenizer.spacy.ru.SpacyRussianTokenizerExceptions;
import ai.grazie.nlp.tokenizer.spacy.uk.SpacyUkrainian;
import ai.grazie.nlp.tokenizer.spacy.uk.SpacyUkrainianTokenizerExceptions;
import ai.grazie.nlp.tokenizer.utils.UtilsKt;
import ai.grazie.nlp.utils.RangesKt;
import ai.grazie.nlp.utils.normalization.AggregatedNormalizer;
import ai.grazie.nlp.utils.normalization.CapsNormalizer;
import ai.grazie.nlp.utils.normalization.UnicodePunctuationNormalizer;
import ai.grazie.text.Text;
import ai.grazie.text.TextRange;
import ai.grazie.utils.mpp.LoggerFactory;
import ai.grazie.utils.mpp.MPPLogger;
import com.intellij.textMatching.PrefixMatchingUtil;
import com.intellij.xml.util.HtmlUtil;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.collections.SetsKt;
import kotlin.jvm.functions.Function0;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.ranges.IntRange;
import kotlin.sequences.Sequence;
import kotlin.text.CharsKt;
import kotlin.text.MatchResult;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;

/* compiled from: SpacyTokenizerFast.kt */
@Metadata(mv = {1, 7, 0}, k = 1, xi = 48, d1 = {"��^\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\"\n\u0002\u0010\f\n\u0002\b\u0003\n\u0002\u0010 \n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010\u000e\n��\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\b\n\u0002\b\u0002\n\u0002\u0010\u000b\n\u0002\b\n\u0018�� '2\u00020\u0001:\u0002'(B-\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0003\u0012\u0006\u0010\u0005\u001a\u00020\u0003\u0012\u0006\u0010\u0006\u001a\u00020\u0007\u0012\u0006\u0010\b\u001a\u00020\t¢\u0006\u0002\u0010\nJ\u001e\u0010\u0010\u001a\b\u0012\u0004\u0012\u00020\u00120\u00112\u0006\u0010\u0013\u001a\u00020\u00142\u0006\u0010\u0015\u001a\u00020\u0016H\u0002J\u0016\u0010\u0017\u001a\b\u0012\u0004\u0012\u00020\u00190\u00182\u0006\u0010\u001a\u001a\u00020\u0016H\u0002J\u0010\u0010\u001b\u001a\u00020\u001c2\u0006\u0010\u001a\u001a\u00020\u0016H\u0002J\u0010\u0010\u001d\u001a\u00020\u001c2\u0006\u0010\u001a\u001a\u00020\u0016H\u0002J\u0010\u0010\u001e\u001a\u00020\u001f2\u0006\u0010 \u001a\u00020\u0016H\u0002J\u0016\u0010!\u001a\b\u0012\u0004\u0012\u00020\u00160\u00112\u0006\u0010\"\u001a\u00020\u0016H\u0002J\u0016\u0010#\u001a\b\u0012\u0004\u0012\u00020\u00120\u00112\u0006\u0010$\u001a\u00020\u0016H\u0016J\u0016\u0010%\u001a\b\u0012\u0004\u0012\u00020\u00120\u00112\u0006\u0010$\u001a\u00020\u0016H\u0002J\u0016\u0010&\u001a\b\u0012\u0004\u0012\u00020\u00120\u00112\u0006\u0010\u0015\u001a\u00020\u0016H\u0002R\u000e\u0010\u0004\u001a\u00020\u0003X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\r0\fX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\b\u001a\u00020\tX\u0082\u0004¢\u0006\u0002\n��R\u0011\u0010\u0002\u001a\u00020\u0003¢\u0006\b\n��\u001a\u0004\b\u000e\u0010\u000fR\u000e\u0010\u0006\u001a\u00020\u0007X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0005\u001a\u00020\u0003X\u0082\u0004¢\u0006\u0002\n��¨\u0006)"}, d2 = {"Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast;", "Lai/grazie/nlp/tokenizer/Tokenizer;", PrefixMatchingUtil.baseName, "Lkotlin/text/Regex;", "infix", "suffix", "specialCases", "Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerSpecialCases;", "preNormalizer", "Lai/grazie/nlp/utils/normalization/AggregatedNormalizer;", "(Lkotlin/text/Regex;Lkotlin/text/Regex;Lkotlin/text/Regex;Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerSpecialCases;Lai/grazie/nlp/utils/normalization/AggregatedNormalizer;)V", "oneCharSuffixes", "", "", "getPrefix", "()Lkotlin/text/Regex;", "finalizeSplit", "", "Lai/grazie/nlp/tokenizer/Tokenizer$Token;", "splits", "Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast$TokenSplits;", "word", "", "findInfix", "Lkotlin/sequences/Sequence;", "Lkotlin/text/MatchResult;", "tok", "findPrefix", "", "findSuffix", "isSpecialOrFinal", "", "token", "splitInfixes", "remainingWord", "tokenize", "text", "tokenizeSpacyNormalized", "tokenizeToken", "Companion", "TokenSplits", "nlp-tokenizer"})
@SourceDebugExtension({"SMAP\nSpacyTokenizerFast.kt\nKotlin\n*S Kotlin\n*F\n+ 1 SpacyTokenizerFast.kt\nai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast\n+ 2 _Strings.kt\nkotlin/text/StringsKt___StringsKt\n+ 3 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n*L\n1#1,354:1\n1083#2,2:355\n1064#2,2:357\n1064#2,2:359\n1064#2,2:376\n766#3:361\n857#3,2:362\n1549#3:364\n1620#3,3:365\n1549#3:368\n1620#3,3:369\n1549#3:372\n1620#3,3:373\n1855#3,2:378\n*S KotlinDebug\n*F\n+ 1 SpacyTokenizerFast.kt\nai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast\n*L\n170#1:355,2\n182#1:357,2\n183#1:359,2\n286#1:376,2\n200#1:361\n200#1:362,2\n201#1:364\n201#1:365,3\n214#1:368\n214#1:369,3\n226#1:372\n226#1:373,3\n292#1:378,2\n*E\n"})
/* loaded from: input_file:ai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast.class */
public final class SpacyTokenizerFast implements Tokenizer {

    @NotNull
    private final Regex prefix;

    @NotNull
    private final Regex infix;

    @NotNull
    private final Regex suffix;

    @NotNull
    private final SpacyTokenizerSpecialCases specialCases;

    @NotNull
    private final AggregatedNormalizer preNormalizer;

    @NotNull
    private final Set<Character> oneCharSuffixes;

    @NotNull
    public static final Companion Companion = new Companion(null);

    @NotNull
    private static final MPPLogger logger = LoggerFactory.INSTANCE.create("ai.grazie.nlp.tokenizer.spacy.SpacyTokenizer");

    /* compiled from: SpacyTokenizerFast.kt */
    @Metadata(mv = {1, 7, 0}, k = 1, xi = 48, d1 = {"��0\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010\u000b\n��\n\u0002\u0010 \n\u0002\u0010\u000e\n\u0002\b\u0006\b\u0086\u0003\u0018��2\u00020\u0001B\u0007\b\u0002¢\u0006\u0002\u0010\u0002J(\u0010\u0005\u001a\u00020\u00062\u0006\u0010\u0007\u001a\u00020\b2\b\b\u0002\u0010\t\u001a\u00020\n2\u000e\b\u0002\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\r0\fJ*\u0010\u000e\u001a\u00020\u00062\b\b\u0002\u0010\t\u001a\u00020\n2\u000e\b\u0002\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\r0\f2\b\b\u0002\u0010\u000f\u001a\u00020\nJ*\u0010\u0010\u001a\u00020\u00062\b\b\u0002\u0010\t\u001a\u00020\n2\u000e\b\u0002\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\r0\f2\b\b\u0002\u0010\u000f\u001a\u00020\nJ \u0010\u0011\u001a\u00020\u00062\b\b\u0002\u0010\t\u001a\u00020\n2\u000e\b\u0002\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\r0\fJ \u0010\u0012\u001a\u00020\u00062\b\b\u0002\u0010\t\u001a\u00020\n2\u000e\b\u0002\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\r0\fR\u000e\u0010\u0003\u001a\u00020\u0004X\u0082\u0004¢\u0006\u0002\n��¨\u0006\u0013"}, d2 = {"Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast$Companion;", "", "()V", "logger", "Lai/grazie/utils/mpp/MPPLogger;", "load", "Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast;", HtmlUtil.LANGUAGE_ATTRIBUTE_NAME, "Lai/grazie/nlp/langs/Language;", "replaceApostropheWithQuote", "", "additionalPrefixes", "", "", "loadEnglish", "useNewAffixes", "loadGerman", "loadRussian", "loadUkrainian", "nlp-tokenizer"})
    /* loaded from: input_file:ai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast$Companion.class */
    public static final class Companion {

        /* compiled from: SpacyTokenizerFast.kt */
        @Metadata(mv = {1, 7, 0}, k = 3, xi = 48)
        /* loaded from: input_file:ai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast$Companion$WhenMappings.class */
        public /* synthetic */ class WhenMappings {
            public static final /* synthetic */ int[] $EnumSwitchMapping$0;

            static {
                int[] iArr = new int[Language.values().length];
                try {
                    iArr[Language.ENGLISH.ordinal()] = 1;
                } catch (NoSuchFieldError e) {
                }
                try {
                    iArr[Language.GERMAN.ordinal()] = 2;
                } catch (NoSuchFieldError e2) {
                }
                try {
                    iArr[Language.RUSSIAN.ordinal()] = 3;
                } catch (NoSuchFieldError e3) {
                }
                try {
                    iArr[Language.UKRAINIAN.ordinal()] = 4;
                } catch (NoSuchFieldError e4) {
                }
                $EnumSwitchMapping$0 = iArr;
            }
        }

        private Companion() {
        }

        @NotNull
        public final SpacyTokenizerFast load(@NotNull Language language, boolean z, @NotNull List<String> list) {
            SpacyTokenizerFast loadUkrainian;
            Intrinsics.checkNotNullParameter(language, HtmlUtil.LANGUAGE_ATTRIBUTE_NAME);
            Intrinsics.checkNotNullParameter(list, "additionalPrefixes");
            switch (WhenMappings.$EnumSwitchMapping$0[language.ordinal()]) {
                case 1:
                    loadUkrainian = loadEnglish$default(this, z, list, false, 4, null);
                    break;
                case 2:
                    loadUkrainian = loadGerman$default(this, z, list, false, 4, null);
                    break;
                case 3:
                    loadUkrainian = loadRussian(z, list);
                    break;
                case 4:
                    loadUkrainian = loadUkrainian(z, list);
                    break;
                default:
                    throw new IllegalStateException("Language is not supported".toString());
            }
            return loadUkrainian;
        }

        public static /* synthetic */ SpacyTokenizerFast load$default(Companion companion, Language language, boolean z, List list, int i, Object obj) {
            if ((i & 2) != 0) {
                z = false;
            }
            if ((i & 4) != 0) {
                list = CollectionsKt.emptyList();
            }
            return companion.load(language, z, list);
        }

        @NotNull
        public final SpacyTokenizerFast loadEnglish(boolean z, @NotNull List<String> list, boolean z2) {
            Intrinsics.checkNotNullParameter(list, "additionalPrefixes");
            return new SpacyTokenizerFast(SpacyBaseLanguage.INSTANCE.compilePrefix(list.isEmpty() ? SpacyEnglish.INSTANCE.getPrefixes() : CollectionsKt.plus(SpacyEnglish.INSTANCE.getPrefixes(), list)), SpacyBaseLanguage.INSTANCE.compileInfix(CollectionsKt.plus(SpacyEnglish.INSTANCE.getInfixes(), z2 ? SpacyEnglish.INSTANCE.getNewInfixes() : CollectionsKt.emptyList())), SpacyBaseLanguage.INSTANCE.compileSuffix(CollectionsKt.plus(SpacyEnglish.INSTANCE.getSuffixes(), z2 ? SpacyEnglish.INSTANCE.getNewSuffixes() : CollectionsKt.emptyList())), new SpacyTokenizerSpecialCases(SpacyBaseLanguage.BaseExceptions.INSTANCE.getExceptions(), SpacyEnglishTokenizerExceptions.INSTANCE.getExceptions()), new AggregatedNormalizer(new UnicodePunctuationNormalizer(z), new CapsNormalizer()));
        }

        public static /* synthetic */ SpacyTokenizerFast loadEnglish$default(Companion companion, boolean z, List list, boolean z2, int i, Object obj) {
            if ((i & 1) != 0) {
                z = false;
            }
            if ((i & 2) != 0) {
                list = CollectionsKt.emptyList();
            }
            if ((i & 4) != 0) {
                z2 = false;
            }
            return companion.loadEnglish(z, list, z2);
        }

        @NotNull
        public final SpacyTokenizerFast loadGerman(boolean z, @NotNull List<String> list, boolean z2) {
            Intrinsics.checkNotNullParameter(list, "additionalPrefixes");
            return new SpacyTokenizerFast(SpacyBaseLanguage.INSTANCE.compilePrefix(list.isEmpty() ? SpacyGerman.INSTANCE.getPrefixes() : CollectionsKt.plus(SpacyGerman.INSTANCE.getPrefixes(), list)), SpacyBaseLanguage.INSTANCE.compileInfix(z2 ? SpacyGerman.INSTANCE.getNewInfixes() : SpacyGerman.INSTANCE.getInfixes()), SpacyBaseLanguage.INSTANCE.compileSuffix(SpacyGerman.INSTANCE.getSuffixes()), new SpacyTokenizerSpecialCases(SpacyBaseLanguage.BaseExceptions.INSTANCE.getExceptions(), SpacyGermanTokenizerExceptions.INSTANCE.getExceptions()), new AggregatedNormalizer(new UnicodePunctuationNormalizer(z), new CapsNormalizer()));
        }

        public static /* synthetic */ SpacyTokenizerFast loadGerman$default(Companion companion, boolean z, List list, boolean z2, int i, Object obj) {
            if ((i & 1) != 0) {
                z = false;
            }
            if ((i & 2) != 0) {
                list = CollectionsKt.emptyList();
            }
            if ((i & 4) != 0) {
                z2 = false;
            }
            return companion.loadGerman(z, list, z2);
        }

        @NotNull
        public final SpacyTokenizerFast loadRussian(boolean z, @NotNull List<String> list) {
            Intrinsics.checkNotNullParameter(list, "additionalPrefixes");
            return new SpacyTokenizerFast(SpacyBaseLanguage.INSTANCE.compilePrefix(list.isEmpty() ? SpacyRussian.INSTANCE.getPrefixes() : CollectionsKt.plus(SpacyRussian.INSTANCE.getPrefixes(), list)), SpacyBaseLanguage.INSTANCE.compileInfix(SpacyRussian.INSTANCE.getInfixes()), SpacyBaseLanguage.INSTANCE.compileSuffix(SpacyRussian.INSTANCE.getSuffixes()), new SpacyTokenizerSpecialCases(SpacyBaseLanguage.BaseExceptions.INSTANCE.getExceptions(), SpacyRussianTokenizerExceptions.INSTANCE.getExceptions()), new AggregatedNormalizer(new UnicodePunctuationNormalizer(z), new CapsNormalizer()));
        }

        public static /* synthetic */ SpacyTokenizerFast loadRussian$default(Companion companion, boolean z, List list, int i, Object obj) {
            if ((i & 1) != 0) {
                z = false;
            }
            if ((i & 2) != 0) {
                list = CollectionsKt.emptyList();
            }
            return companion.loadRussian(z, list);
        }

        @NotNull
        public final SpacyTokenizerFast loadUkrainian(boolean z, @NotNull List<String> list) {
            Intrinsics.checkNotNullParameter(list, "additionalPrefixes");
            return new SpacyTokenizerFast(SpacyBaseLanguage.INSTANCE.compilePrefix(list.isEmpty() ? SpacyUkrainian.INSTANCE.getPrefixes() : CollectionsKt.plus(SpacyUkrainian.INSTANCE.getPrefixes(), list)), SpacyBaseLanguage.INSTANCE.compileInfix(SpacyUkrainian.INSTANCE.getInfixes()), SpacyBaseLanguage.INSTANCE.compileSuffix(SpacyUkrainian.INSTANCE.getSuffixes()), new SpacyTokenizerSpecialCases(SpacyBaseLanguage.BaseExceptions.INSTANCE.getExceptions(), SpacyUkrainianTokenizerExceptions.INSTANCE.getExceptions()), new AggregatedNormalizer(new UnicodePunctuationNormalizer(z), new CapsNormalizer()));
        }

        public static /* synthetic */ SpacyTokenizerFast loadUkrainian$default(Companion companion, boolean z, List list, int i, Object obj) {
            if ((i & 1) != 0) {
                z = false;
            }
            if ((i & 2) != 0) {
                list = CollectionsKt.emptyList();
            }
            return companion.loadUkrainian(z, list);
        }

        public /* synthetic */ Companion(DefaultConstructorMarker defaultConstructorMarker) {
            this();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* compiled from: SpacyTokenizerFast.kt */
    @Metadata(mv = {1, 7, 0}, k = 1, xi = 48, d1 = {"��&\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\u0018\u0002\n\u0002\b\u0007\n\u0002\u0010 \n\u0002\u0018\u0002\n��\b\u0002\u0018��2\u00020\u0001B\u0005¢\u0006\u0002\u0010\u0002J\f\u0010\r\u001a\b\u0012\u0004\u0012\u00020\u000f0\u000eR!\u0010\u0003\u001a\u0012\u0012\u0004\u0012\u00020\u00050\u0004j\b\u0012\u0004\u0012\u00020\u0005`\u0006¢\u0006\b\n��\u001a\u0004\b\u0007\u0010\bR!\u0010\t\u001a\u0012\u0012\u0004\u0012\u00020\u00050\u0004j\b\u0012\u0004\u0012\u00020\u0005`\u0006¢\u0006\b\n��\u001a\u0004\b\n\u0010\bR!\u0010\u000b\u001a\u0012\u0012\u0004\u0012\u00020\u00050\u0004j\b\u0012\u0004\u0012\u00020\u0005`\u0006¢\u0006\b\n��\u001a\u0004\b\f\u0010\b¨\u0006\u0010"}, d2 = {"Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast$TokenSplits;", "", "()V", "prefixes", "Ljava/util/ArrayList;", "", "Lkotlin/collections/ArrayList;", "getPrefixes", "()Ljava/util/ArrayList;", "suffixes", "getSuffixes", "wordTokens", "getWordTokens", "toList", "", "Lai/grazie/nlp/tokenizer/Tokenizer$Token;", "nlp-tokenizer"})
    @SourceDebugExtension({"SMAP\nSpacyTokenizerFast.kt\nKotlin\n*S Kotlin\n*F\n+ 1 SpacyTokenizerFast.kt\nai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast$TokenSplits\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n*L\n1#1,354:1\n1549#2:355\n1620#2,3:356\n*S KotlinDebug\n*F\n+ 1 SpacyTokenizerFast.kt\nai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast$TokenSplits\n*L\n148#1:355\n148#1:356,3\n*E\n"})
    /* loaded from: input_file:ai/grazie/nlp/tokenizer/spacy/SpacyTokenizerFast$TokenSplits.class */
    public static final class TokenSplits {

        @NotNull
        private final ArrayList<String> prefixes = new ArrayList<>();

        @NotNull
        private final ArrayList<String> suffixes = new ArrayList<>();

        @NotNull
        private final ArrayList<String> wordTokens = new ArrayList<>();

        @NotNull
        public final ArrayList<String> getPrefixes() {
            return this.prefixes;
        }

        @NotNull
        public final ArrayList<String> getSuffixes() {
            return this.suffixes;
        }

        @NotNull
        public final ArrayList<String> getWordTokens() {
            return this.wordTokens;
        }

        @NotNull
        public final List<Tokenizer.Token> toList() {
            int i = 0;
            List<String> plus = CollectionsKt.plus(CollectionsKt.plus(this.prefixes, this.wordTokens), CollectionsKt.reversed(this.suffixes));
            ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(plus, 10));
            for (String str : plus) {
                int i2 = i;
                i += str.length();
                arrayList.add(new Tokenizer.Token(str, new IntRange(i2, i - 1)));
            }
            return arrayList;
        }
    }

    public SpacyTokenizerFast(@NotNull Regex regex, @NotNull Regex regex2, @NotNull Regex regex3, @NotNull SpacyTokenizerSpecialCases spacyTokenizerSpecialCases, @NotNull AggregatedNormalizer aggregatedNormalizer) {
        Intrinsics.checkNotNullParameter(regex, PrefixMatchingUtil.baseName);
        Intrinsics.checkNotNullParameter(regex2, "infix");
        Intrinsics.checkNotNullParameter(regex3, "suffix");
        Intrinsics.checkNotNullParameter(spacyTokenizerSpecialCases, "specialCases");
        Intrinsics.checkNotNullParameter(aggregatedNormalizer, "preNormalizer");
        this.prefix = regex;
        this.infix = regex2;
        this.suffix = regex3;
        this.specialCases = spacyTokenizerSpecialCases;
        this.preNormalizer = aggregatedNormalizer;
        this.oneCharSuffixes = SetsKt.setOf(new Character[]{'.', ':', ';', ',', '?', '!'});
    }

    @NotNull
    public final Regex getPrefix() {
        return this.prefix;
    }

    @Override // ai.grazie.nlp.tokenizer.Tokenizer
    @NotNull
    public List<Tokenizer.Token> tokenize(@NotNull String str) {
        boolean z;
        boolean z2;
        boolean z3;
        Intrinsics.checkNotNullParameter(str, "text");
        String normalize = this.preNormalizer.normalize(str);
        if (normalize.length() != str.length()) {
            MPPLogger.error$default(logger, new Function0<String>() { // from class: ai.grazie.nlp.tokenizer.spacy.SpacyTokenizerFast$tokenize$1
                @NotNull
                /* renamed from: invoke, reason: merged with bridge method [inline-methods] */
                public final String m127invoke() {
                    return "Normalized length is not equal to original text length. This shouldn't happen as normalizers should preserve text length.";
                }
            }, null, 2, null);
            normalize = str;
        }
        List<Tokenizer.Token> list = tokenizeSpacyNormalized(normalize);
        if (!list.isEmpty()) {
            Tokenizer.Token token = (Tokenizer.Token) CollectionsKt.last(list);
            if (StringsKt.endsWith$default(token.getToken(), '.', false, 2, (Object) null)) {
                String token2 = token.getToken();
                int i = 0;
                while (true) {
                    if (i >= token2.length()) {
                        z3 = false;
                        break;
                    }
                    if (Character.isLetter(token2.charAt(i))) {
                        z3 = true;
                        break;
                    }
                    i++;
                }
                if (z3) {
                    String substring = token.getToken().substring(0, token.getToken().length() - 1);
                    Intrinsics.checkNotNullExpressionValue(substring, "this as java.lang.String…ing(startIndex, endIndex)");
                    list = CollectionsKt.plus(list.subList(0, list.size() - 1), UtilsKt.splitToken(token, (List<String>) CollectionsKt.listOf(new String[]{substring, "."})));
                }
            }
            ArrayList arrayList = new ArrayList();
            int i2 = 0;
            for (Tokenizer.Token token3 : list) {
                int i3 = i2;
                i2++;
                if (i3 == 0) {
                    arrayList.add(token3);
                } else {
                    String token4 = ((Tokenizer.Token) CollectionsKt.last(arrayList)).getToken();
                    int i4 = 0;
                    while (true) {
                        if (i4 >= token4.length()) {
                            z = true;
                            break;
                        }
                        char charAt = token4.charAt(i4);
                        if (!(charAt == '?' || charAt == '!')) {
                            z = false;
                            break;
                        }
                        i4++;
                    }
                    boolean z4 = z;
                    String token5 = token3.getToken();
                    int i5 = 0;
                    while (true) {
                        if (i5 >= token5.length()) {
                            z2 = true;
                            break;
                        }
                        char charAt2 = token5.charAt(i5);
                        if (!(charAt2 == '?' || charAt2 == '!')) {
                            z2 = false;
                            break;
                        }
                        i5++;
                    }
                    boolean z5 = z2;
                    if (z4 && z5 && RangesKt.getCheckedEndExclusive(((Tokenizer.Token) CollectionsKt.last(arrayList)).getRange()) == token3.getRange().getFirst()) {
                        Tokenizer.Token token6 = (Tokenizer.Token) CollectionsKt.removeLast(arrayList);
                        arrayList.add(new Tokenizer.Token(new Text(token6.getToken() + token3.getToken()), new TextRange(token6.getRange().getFirst(), RangesKt.getCheckedEndExclusive(token3.getRange()))));
                    } else {
                        arrayList.add(token3);
                    }
                }
            }
            list = arrayList;
        }
        ArrayList arrayList2 = new ArrayList();
        for (Object obj : list) {
            if (!StringsKt.isBlank(((Tokenizer.Token) obj).getToken())) {
                arrayList2.add(obj);
            }
        }
        ArrayList<Tokenizer.Token> arrayList3 = arrayList2;
        ArrayList arrayList4 = new ArrayList(CollectionsKt.collectionSizeOrDefault(arrayList3, 10));
        for (Tokenizer.Token token7 : arrayList3) {
            arrayList4.add(new Tokenizer.Token(StringsKt.substring(str, token7.getRange()), token7.getRange()));
        }
        return arrayList4;
    }

    private final List<Tokenizer.Token> tokenizeSpacyNormalized(String str) {
        if (str.length() == 0) {
            return CollectionsKt.emptyList();
        }
        ArrayList arrayList = new ArrayList();
        int i = -1;
        int length = str.length();
        for (int i2 = 0; i2 < length; i2++) {
            int i3 = i2;
            if (CharsKt.isWhitespace(str.charAt(i2))) {
                if (i != -1) {
                    String substring = str.substring(i, i3);
                    Intrinsics.checkNotNullExpressionValue(substring, "this as java.lang.String…ing(startIndex, endIndex)");
                    ArrayList arrayList2 = arrayList;
                    List<Tokenizer.Token> list = tokenizeToken(substring);
                    ArrayList arrayList3 = new ArrayList(CollectionsKt.collectionSizeOrDefault(list, 10));
                    for (Tokenizer.Token token : list) {
                        arrayList3.add(new Tokenizer.Token(token.getToken(), RangesKt.withOffset(token.getRange(), i)));
                    }
                    CollectionsKt.addAll(arrayList2, arrayList3);
                }
                i = -1;
            } else if (i == -1) {
                i = i3;
            }
        }
        if (i != -1) {
            String substring2 = str.substring(i);
            Intrinsics.checkNotNullExpressionValue(substring2, "this as java.lang.String).substring(startIndex)");
            ArrayList arrayList4 = arrayList;
            List<Tokenizer.Token> list2 = tokenizeToken(substring2);
            ArrayList arrayList5 = new ArrayList(CollectionsKt.collectionSizeOrDefault(list2, 10));
            for (Tokenizer.Token token2 : list2) {
                arrayList5.add(new Tokenizer.Token(token2.getToken(), RangesKt.withOffset(token2.getRange(), i)));
            }
            CollectionsKt.addAll(arrayList4, arrayList5);
        }
        return arrayList;
    }

    /* JADX WARN: Code restructure failed: missing block: B:35:0x012b, code lost:
    
        if (isSpecialOrFinal(r8) != false) goto L42;
     */
    /* JADX WARN: Code restructure failed: missing block: B:37:0x0136, code lost:
    
        if (r5.specialCases.urlMatch(r8) == false) goto L44;
     */
    /* JADX WARN: Code restructure failed: missing block: B:38:0x0140, code lost:
    
        r0.getWordTokens().addAll(splitInfixes(r8));
     */
    /* JADX WARN: Code restructure failed: missing block: B:39:0x0154, code lost:
    
        return r0.toList();
     */
    /* JADX WARN: Code restructure failed: missing block: B:41:0x013f, code lost:
    
        return finalizeSplit(r0, r8);
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private final java.util.List<ai.grazie.nlp.tokenizer.Tokenizer.Token> tokenizeToken(java.lang.String r6) {
        /*
            Method dump skipped, instructions count: 341
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: ai.grazie.nlp.tokenizer.spacy.SpacyTokenizerFast.tokenizeToken(java.lang.String):java.util.List");
    }

    private final boolean isSpecialOrFinal(String str) {
        boolean z;
        if (this.specialCases.get(str) == null) {
            String str2 = str;
            int i = 0;
            while (true) {
                if (i >= str2.length()) {
                    z = true;
                    break;
                }
                if (!Character.isLetter(str2.charAt(i))) {
                    z = false;
                    break;
                }
                i++;
            }
            if (!z) {
                return false;
            }
        }
        return true;
    }

    private final List<Tokenizer.Token> finalizeSplit(TokenSplits tokenSplits, String str) {
        List<SpacyTokenInfo> list = this.specialCases.get(str);
        if (list != null) {
            Iterator<T> it = list.iterator();
            while (it.hasNext()) {
                tokenSplits.getWordTokens().add(((SpacyTokenInfo) it.next()).getOrth());
            }
        } else {
            tokenSplits.getWordTokens().add(str);
        }
        return tokenSplits.toList();
    }

    private final int findPrefix(String str) {
        MatchResult find$default = Regex.find$default(this.prefix, str, 0, 2, (Object) null);
        if (find$default == null) {
            return 0;
        }
        return (kotlin.ranges.RangesKt.last(find$default.getRange()) + 1) - kotlin.ranges.RangesKt.first(find$default.getRange());
    }

    private final int findSuffix(String str) {
        MatchResult find$default = Regex.find$default(this.suffix, str, 0, 2, (Object) null);
        if (find$default == null) {
            return 0;
        }
        return (kotlin.ranges.RangesKt.last(find$default.getRange()) + 1) - kotlin.ranges.RangesKt.first(find$default.getRange());
    }

    private final List<String> splitInfixes(String str) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        for (MatchResult matchResult : findInfix(str)) {
            int first = matchResult.getRange().getFirst();
            int last = matchResult.getRange().getLast() + 1;
            if (first != 0) {
                if (first != i) {
                    String substring = str.substring(i, first);
                    Intrinsics.checkNotNullExpressionValue(substring, "this as java.lang.String…ing(startIndex, endIndex)");
                    arrayList.add(substring);
                }
                if (first != last) {
                    String substring2 = str.substring(first, last);
                    Intrinsics.checkNotNullExpressionValue(substring2, "this as java.lang.String…ing(startIndex, endIndex)");
                    arrayList.add(substring2);
                }
                i = last;
            }
        }
        String substring3 = str.substring(i);
        Intrinsics.checkNotNullExpressionValue(substring3, "this as java.lang.String).substring(startIndex)");
        if (substring3.length() > 0) {
            arrayList.add(substring3);
        }
        return arrayList;
    }

    private final Sequence<MatchResult> findInfix(String str) {
        return Regex.findAll$default(this.infix, str, 0, 2, (Object) null);
    }
}
