package org.languagetool.tagging.disambiguation;

import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.WordUtils;
import org.jetbrains.annotations.Nullable;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.JLanguageTool;
import org.languagetool.tools.StringTools;

/* loaded from: input_file:org/languagetool/tagging/disambiguation/MultiWordChunker.class */
public class MultiWordChunker extends AbstractDisambiguator {
    private final String filename;
    private final boolean allowFirstCapitalized;
    private final boolean allowAllUppercase;
    private final boolean allowTitlecase;
    private volatile boolean initialized;
    private Map<String, Integer> mStartSpace;
    private Map<String, Integer> mStartNoSpace;
    private Map<String, AnalyzedToken> mFullSpace;
    private Map<String, AnalyzedToken> mFullNoSpace;
    private static final int MAX_TOKENS_IN_MULTIWORD = 20;
    private static final String DEFAULT_SEPARATOR = "\t";
    private String separator;
    private String defaultTag;
    private boolean addIgnoreSpelling;
    private boolean isRemovePreviousTags;
    public static String tagForNotAddingTags = "_NONE_";
    private static final Pattern GermanLineExpander = Pattern.compile("^.*/[ESN]+$");

    public MultiWordChunker(String str) {
        this(str, false, false, false);
    }

    public MultiWordChunker(String str, boolean z, boolean z2, boolean z3) {
        this.defaultTag = null;
        this.addIgnoreSpelling = false;
        this.isRemovePreviousTags = false;
        this.filename = str;
        this.allowFirstCapitalized = z;
        this.allowAllUppercase = z2;
        this.allowTitlecase = z3;
    }

    public MultiWordChunker(String str, boolean z, boolean z2, boolean z3, String str2) {
        this.defaultTag = null;
        this.addIgnoreSpelling = false;
        this.isRemovePreviousTags = false;
        this.filename = str;
        this.allowFirstCapitalized = z;
        this.allowAllUppercase = z2;
        this.allowTitlecase = z3;
        this.defaultTag = str2;
    }

    private void lazyInit() {
        if (this.initialized) {
            return;
        }
        synchronized (this) {
            if (this.initialized) {
                return;
            }
            Object2IntOpenHashMap object2IntOpenHashMap = new Object2IntOpenHashMap();
            Object2IntOpenHashMap object2IntOpenHashMap2 = new Object2IntOpenHashMap();
            Object2ObjectOpenHashMap object2ObjectOpenHashMap = new Object2ObjectOpenHashMap();
            Object2ObjectOpenHashMap object2ObjectOpenHashMap2 = new Object2ObjectOpenHashMap();
            fillMaps(object2IntOpenHashMap, object2IntOpenHashMap2, object2ObjectOpenHashMap, object2ObjectOpenHashMap2);
            object2IntOpenHashMap.trim();
            object2IntOpenHashMap2.trim();
            object2ObjectOpenHashMap.trim();
            object2ObjectOpenHashMap2.trim();
            this.mStartSpace = object2IntOpenHashMap;
            this.mStartNoSpace = object2IntOpenHashMap2;
            this.mFullSpace = object2ObjectOpenHashMap;
            this.mFullNoSpace = object2ObjectOpenHashMap2;
            this.initialized = true;
        }
    }

    /* JADX WARN: Failed to calculate best type for var: r14v0 ??
    java.lang.NullPointerException
     */
    /* JADX WARN: Failed to calculate best type for var: r15v0 ??
    java.lang.NullPointerException
     */
    /* JADX WARN: Multi-variable type inference failed. Error: java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.RegisterArg.getSVar()" because the return value of "jadx.core.dex.nodes.InsnNode.getResult()" is null
    	at jadx.core.dex.visitors.typeinference.AbstractTypeConstraint.collectRelatedVars(AbstractTypeConstraint.java:31)
    	at jadx.core.dex.visitors.typeinference.AbstractTypeConstraint.<init>(AbstractTypeConstraint.java:19)
    	at jadx.core.dex.visitors.typeinference.TypeSearch$1.<init>(TypeSearch.java:376)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.makeMoveConstraint(TypeSearch.java:376)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.makeConstraint(TypeSearch.java:361)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.collectConstraints(TypeSearch.java:341)
    	at java.base/java.util.ArrayList.forEach(ArrayList.java:1596)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.run(TypeSearch.java:60)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.runMultiVariableSearch(FixTypesVisitor.java:116)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.visit(FixTypesVisitor.java:91)
     */
    /* JADX WARN: Not initialized variable reg: 14, insn: 0x026a: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r14 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) A[TRY_LEAVE], block:B:79:0x026a */
    /* JADX WARN: Not initialized variable reg: 15, insn: 0x026f: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r15 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]), block:B:81:0x026f */
    /* JADX WARN: Type inference failed for: r14v0, types: [java.io.InputStream] */
    /* JADX WARN: Type inference failed for: r15v0, types: [java.lang.Throwable] */
    private void fillMaps(Map<String, Integer> map, Map<String, Integer> map2, Map<String, AnalyzedToken> map3, Map<String, AnalyzedToken> map4) {
        HashMap hashMap = new HashMap();
        try {
            try {
                InputStream fromResourceDirAsStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(this.filename);
                Throwable th = null;
                for (String str : loadWords(fromResourceDirAsStream)) {
                    String[] split = str.split(this.separator);
                    if (split.length != 2 && this.defaultTag == null) {
                        throw new RuntimeException("Invalid format in " + this.filename + ": '" + str + "', expected two tab-separated parts");
                    }
                    if (split.length != 1 && this.defaultTag != null) {
                        throw new RuntimeException("Invalid format in " + this.filename + ": '" + str + "', expected one element with no separator");
                    }
                    ArrayList<String> arrayList = new ArrayList();
                    String str2 = (String) hashMap.computeIfAbsent(split[0], Function.identity());
                    String str3 = (String) hashMap.computeIfAbsent(this.defaultTag != null ? this.defaultTag : split[1], Function.identity());
                    boolean z = str2.indexOf(32) > 0;
                    arrayList.add(str2);
                    if (z) {
                        arrayList.addAll(getTokenLettercaseVariants(str2, map3));
                    } else {
                        arrayList.addAll(getTokenLettercaseVariants(str2, map4));
                    }
                    for (String str4 : arrayList) {
                        if (z) {
                            String[] split2 = str4.split(" ");
                            String str5 = split2[0];
                            if (!map.containsKey(str5)) {
                                map.put(str5, Integer.valueOf(split2.length));
                            } else if (map.get(str5).intValue() < split2.length) {
                                map.put(str5, Integer.valueOf(split2.length));
                            }
                            map3.put(str4, new AnalyzedToken(str4, str3, str2));
                        } else {
                            String substring = str4.substring(0, 1);
                            if (!map2.containsKey(substring)) {
                                map2.put(substring, Integer.valueOf(str4.length()));
                            } else if (map2.get(substring).intValue() < str4.length()) {
                                map2.put(substring, Integer.valueOf(str4.length()));
                            }
                            map4.put(str4, new AnalyzedToken(str4, str3, str2));
                        }
                    }
                }
                if (fromResourceDirAsStream != null) {
                    if (0 != 0) {
                        try {
                            fromResourceDirAsStream.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        fromResourceDirAsStream.close();
                    }
                }
            } finally {
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public List<String> getTokenLettercaseVariants(String str, Map<String, AnalyzedToken> map) {
        ArrayList arrayList = new ArrayList();
        if (this.allowAllUppercase && !StringTools.isCamelCase(str)) {
            String upperCase = str.toUpperCase();
            if (!map.containsKey(upperCase) && !str.equals(upperCase)) {
                arrayList.add(upperCase);
            }
        }
        if (this.allowFirstCapitalized) {
            String uppercaseFirstChar = StringTools.uppercaseFirstChar(str);
            if (!map.containsKey(uppercaseFirstChar) && !str.equals(uppercaseFirstChar)) {
                arrayList.add(uppercaseFirstChar);
            }
            if (this.allowTitlecase && str.split(" ").length > 1 && StringTools.allStartWithLowercase(str)) {
                String capitalize = WordUtils.capitalize(str);
                if (!capitalize.equals(uppercaseFirstChar) && !str.equals(capitalize)) {
                    arrayList.add(capitalize);
                }
                String titlecaseGlobal = StringTools.titlecaseGlobal(str);
                if (!titlecaseGlobal.equals(uppercaseFirstChar) && !titlecaseGlobal.equals(capitalize) && !str.equals(titlecaseGlobal)) {
                    arrayList.add(titlecaseGlobal);
                }
            }
        }
        return arrayList;
    }

    @Override // org.languagetool.tagging.disambiguation.Disambiguator
    public AnalyzedSentence disambiguate(AnalyzedSentence analyzedSentence) throws IOException {
        return disambiguate(analyzedSentence, null);
    }

    @Override // org.languagetool.tagging.disambiguation.Disambiguator
    public final AnalyzedSentence disambiguate(AnalyzedSentence analyzedSentence, @Nullable JLanguageTool.CheckCancelledCallback checkCancelledCallback) throws IOException {
        lazyInit();
        AnalyzedTokenReadings[] tokens = analyzedSentence.getTokens();
        for (int i = 0; i < tokens.length; i++) {
            String token = tokens[i].getToken();
            if (token.length() >= 1) {
                for (int i2 = i + 1; i2 < tokens.length && !tokens[i2].isWhitespace(); i2++) {
                    token = token + tokens[i2].getToken();
                }
                if (checkCancelledCallback != null && checkCancelledCallback.checkCancelled()) {
                    break;
                }
                if (this.mStartSpace.containsKey(token)) {
                    int i3 = 0;
                    StringBuilder sb = new StringBuilder();
                    int intValue = this.mStartSpace.get(token).intValue();
                    int i4 = i;
                    int i5 = 0;
                    while (i4 < tokens.length && i4 - i < 20) {
                        if (tokens[i4].isWhitespace()) {
                            if (i4 > 1 && !tokens[i4 - 1].isWhitespace()) {
                                sb.append(' ');
                                i5++;
                            }
                            if (i5 == intValue) {
                                break;
                            }
                        } else {
                            sb.append(tokens[i4].getToken());
                            AnalyzedToken analyzedToken = this.mFullSpace.get(sb.toString());
                            if (analyzedToken != null) {
                                if (!analyzedToken.getPOSTag().equals(tagForNotAddingTags)) {
                                    if (i3 == 0) {
                                        tokens[i] = setAndAnnotate(tokens[i], new AnalyzedToken(tokens[i4].getToken(), analyzedToken.getPOSTag(), analyzedToken.getLemma()));
                                    } else {
                                        tokens[i] = prepareNewReading(analyzedToken, tokens[i].getToken(), tokens[i], false);
                                        tokens[i3] = prepareNewReading(analyzedToken, tokens[i3].getToken(), tokens[i3], true);
                                    }
                                }
                                if (this.addIgnoreSpelling) {
                                    if (i3 == 0) {
                                        tokens[i].ignoreSpelling();
                                    } else {
                                        for (int i6 = i; i6 <= i3; i6++) {
                                            tokens[i6].ignoreSpelling();
                                        }
                                    }
                                }
                            }
                        }
                        i4++;
                        i3 = i4;
                    }
                }
                if (this.mStartNoSpace.containsKey(token.substring(0, 1))) {
                    StringBuilder sb2 = new StringBuilder();
                    for (int i7 = i; i7 < tokens.length && !tokens[i7].isWhitespace() && i7 - i < 20; i7++) {
                        sb2.append(tokens[i7].getToken());
                        AnalyzedToken analyzedToken2 = this.mFullNoSpace.get(sb2.toString());
                        if (analyzedToken2 != null) {
                            if (!analyzedToken2.getPOSTag().equals(tagForNotAddingTags)) {
                                if (i == i7) {
                                    String pOSTag = analyzedToken2.getPOSTag();
                                    if (!isLowPriorityTag(pOSTag) || !tokens[i].hasReading() || tokens[i].isPosTagUnknown()) {
                                        tokens[i] = setAndAnnotate(tokens[i], new AnalyzedToken(tokens[i7].getToken(), pOSTag, analyzedToken2.getLemma()));
                                    }
                                } else {
                                    tokens[i] = prepareNewReading(analyzedToken2, tokens[i].getToken(), tokens[i], false);
                                    tokens[i7] = prepareNewReading(analyzedToken2, tokens[i7].getToken(), tokens[i7], true);
                                }
                            }
                            if (this.addIgnoreSpelling) {
                                for (int i8 = i; i8 <= i7; i8++) {
                                    tokens[i8].ignoreSpelling();
                                }
                            }
                        }
                    }
                }
            }
        }
        return this.isRemovePreviousTags ? new AnalyzedSentence(removePreviousTags(tokens)) : new AnalyzedSentence(tokens);
    }

    private AnalyzedTokenReadings prepareNewReading(AnalyzedToken analyzedToken, String str, AnalyzedTokenReadings analyzedTokenReadings, boolean z) {
        StringBuilder sb = new StringBuilder();
        sb.append('<');
        if (z) {
            sb.append('/');
        }
        sb.append(analyzedToken.getPOSTag());
        sb.append('>');
        return setAndAnnotate(analyzedTokenReadings, new AnalyzedToken(str, sb.toString(), analyzedToken.getLemma()));
    }

    private AnalyzedTokenReadings setAndAnnotate(AnalyzedTokenReadings analyzedTokenReadings, AnalyzedToken analyzedToken) {
        analyzedTokenReadings.addReading(analyzedToken, "MULTIWORD_CHUNKER");
        return analyzedTokenReadings;
    }

    private List<String> loadWords(InputStream inputStream) {
        ArrayList arrayList = new ArrayList();
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
            Throwable th = null;
            try {
                this.separator = DEFAULT_SEPARATOR;
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    String trim = readLine.trim();
                    if (trim.startsWith("#separatorRegExp=")) {
                        this.separator = trim.replace("#separatorRegExp=", "");
                    }
                    if (!trim.isEmpty() && trim.charAt(0) != '#') {
                        String trim2 = StringUtils.substringBefore(trim, "#").trim();
                        if (GermanLineExpander.matcher(trim2).matches()) {
                            String[] split = trim2.split("/");
                            arrayList.add(split[0].trim());
                            if (split[1].contains("E")) {
                                arrayList.add(split[0].trim() + "e");
                            }
                            if (split[1].contains("S")) {
                                arrayList.add(split[0].trim() + "s");
                            }
                            if (split[1].contains("N")) {
                                arrayList.add(split[0].trim() + "n");
                            }
                        } else {
                            arrayList.add(trim2);
                        }
                    }
                }
                if (bufferedReader != null) {
                    if (0 != 0) {
                        try {
                            bufferedReader.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        bufferedReader.close();
                    }
                }
                return arrayList;
            } finally {
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public void setIgnoreSpelling(boolean z) {
        this.addIgnoreSpelling = z;
    }

    public void setRemovePreviousTags(boolean z) {
        this.isRemovePreviousTags = z;
    }

    private AnalyzedTokenReadings[] removePreviousTags(AnalyzedTokenReadings[] analyzedTokenReadingsArr) {
        String str = "";
        String str2 = "";
        String str3 = "";
        for (int i = 0; i < analyzedTokenReadingsArr.length; i++) {
            if (!analyzedTokenReadingsArr[i].isWhitespace()) {
                if (str3.isEmpty()) {
                    AnalyzedToken multiWordAnalyzedToken = getMultiWordAnalyzedToken(analyzedTokenReadingsArr, Integer.valueOf(i));
                    if (multiWordAnalyzedToken != null) {
                        str = multiWordAnalyzedToken.getPOSTag().substring(1, multiWordAnalyzedToken.getPOSTag().length() - 1);
                        str2 = multiWordAnalyzedToken.getLemma();
                        if (analyzedTokenReadingsArr[i].hasPosTagAndLemma("</" + str + ">", str2)) {
                            analyzedTokenReadingsArr[i].removeReading(analyzedTokenReadingsArr[i].readingWithTagRegex("</" + str + ">"), "HybridDisamb");
                            analyzedTokenReadingsArr[i].removeReading(analyzedTokenReadingsArr[i].readingWithTagRegex("<" + str + ">"), "HybridDisamb");
                            analyzedTokenReadingsArr[i].addReading(new AnalyzedToken(multiWordAnalyzedToken.getToken(), str, str2), "HybridDisamb");
                            str3 = "";
                            str2 = "";
                        } else {
                            analyzedTokenReadingsArr[i] = new AnalyzedTokenReadings(analyzedTokenReadingsArr[i], Arrays.asList(new AnalyzedToken(multiWordAnalyzedToken.getToken(), str, str2)), "HybridDisamb");
                            str3 = getNextPosTag(str);
                        }
                    }
                } else {
                    AnalyzedToken analyzedToken = new AnalyzedToken(analyzedTokenReadingsArr[i].getToken(), str3, str2);
                    if (analyzedTokenReadingsArr[i].hasPosTagAndLemma("</" + str + ">", str2)) {
                        str3 = "";
                        str2 = "";
                    }
                    analyzedTokenReadingsArr[i] = new AnalyzedTokenReadings(analyzedTokenReadingsArr[i], Arrays.asList(analyzedToken), "HybridDisamb");
                }
            }
        }
        return analyzedTokenReadingsArr;
    }

    private AnalyzedToken getMultiWordAnalyzedToken(AnalyzedTokenReadings[] analyzedTokenReadingsArr, Integer num) {
        ArrayList<AnalyzedToken> arrayList = new ArrayList();
        Iterator<AnalyzedToken> it = analyzedTokenReadingsArr[num.intValue()].iterator();
        while (it.hasNext()) {
            AnalyzedToken next = it.next();
            String pOSTag = next.getPOSTag();
            if (pOSTag != null && pOSTag.startsWith("<") && pOSTag.endsWith(">") && !pOSTag.startsWith("</")) {
                arrayList.add(next);
            }
        }
        if (arrayList.size() <= 0) {
            return null;
        }
        AnalyzedToken analyzedToken = null;
        int i = 0;
        for (AnalyzedToken analyzedToken2 : arrayList) {
            String str = "</" + analyzedToken2.getPOSTag().substring(1);
            String substring = analyzedToken2.getPOSTag().substring(1, analyzedToken2.getPOSTag().length() - 2);
            String lemma = analyzedToken2.getLemma();
            int i2 = 1;
            while (true) {
                if (num.intValue() + i2 >= analyzedTokenReadingsArr.length) {
                    break;
                }
                if (analyzedTokenReadingsArr[num.intValue() + i2].hasPosTagAndLemma(str, lemma)) {
                    if (i2 > i) {
                        i = i2;
                        analyzedToken = analyzedToken2;
                    }
                    if (i2 == i && !isLowPriorityTag(substring)) {
                        i = i2;
                        analyzedToken = analyzedToken2;
                    }
                } else {
                    i2++;
                }
            }
        }
        return analyzedToken;
    }

    private String getNextPosTag(String str) {
        return str.startsWith("NC") ? "AQ0" + str.substring(2, 4) + "0" : str.startsWith("N ") ? "J " + str.substring(2) : str;
    }

    private boolean isLowPriorityTag(String str) {
        return str.equals("NPCN000");
    }
}
