/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner;

import edu.northwestern.at.morphadorner.MorphAdorner;
import edu.northwestern.at.morphadorner.MorphAdornerLogger;
import edu.northwestern.at.morphadorner.MorphAdornerSettings;
import edu.northwestern.at.morphadorner.WordAttributePatterns;
import edu.northwestern.at.morphadorner.corpuslinguistics.adornedword.AdornedWord;
import edu.northwestern.at.morphadorner.corpuslinguistics.lexicon.Lexicon;
import edu.northwestern.at.morphadorner.corpuslinguistics.lexicon.LexiconFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.lexicon.UsesLexicon;
import edu.northwestern.at.morphadorner.corpuslinguistics.namerecognizer.Names;
import edu.northwestern.at.morphadorner.corpuslinguistics.namestandardizer.NameStandardizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.namestandardizer.NameStandardizerFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.InvalidRuleException;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.PartOfSpeechTagger;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.transitionmatrix.TransitionMatrix;
import edu.northwestern.at.morphadorner.corpuslinguistics.spellingmapper.SpellingMapper;
import edu.northwestern.at.morphadorner.corpuslinguistics.spellingmapper.SpellingMapperFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.spellingstandardizer.SpellingStandardizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.spellingstandardizer.SpellingStandardizerFactory;
import edu.northwestern.at.morphadorner.xgtagger.XGOptions;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.Formatters;
import edu.northwestern.at.utils.RomanNumeralUtils;
import edu.northwestern.at.utils.SingleTagTaggedStrings;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.TaggedStrings;
import edu.northwestern.at.utils.TextFile;
import edu.northwestern.at.utils.UTF8Properties;
import edu.northwestern.at.utils.UnicodeReader;
import edu.northwestern.at.utils.logger.UsesLogger;
import edu.northwestern.at.utils.xml.DOMUtils;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class MorphAdornerUtils {
    protected static Pattern underlineCapCapPattern = Pattern.compile("^_([ABCDEFGHIJKLMNOPQRSTUVWXYZ])([ABCDEFGHIJKLMNOPQRSTUVWXYZ])");
    protected static final Matcher underlineCapCapMatcher = underlineCapCapPattern.matcher("");
    protected static Runtime runTime = Runtime.getRuntime();

    public static int countPageBreaks(Document document) {
        NodeList pbNodes = document.getElementsByTagName("pb");
        return pbNodes.getLength();
    }

    public static SpellingMapper createSpellingMapper(UTF8Properties properties) throws IOException {
        return SpellingMapperFactory.newSpellingMapper(properties);
    }

    public static NameStandardizer createNameStandardizer(Lexicon wordLexicon, MorphAdornerSettings adornerSettings, MorphAdornerLogger adornerLogger) throws IOException {
        NameStandardizer nameStandardizer = NameStandardizerFactory.newNameStandardizer(adornerSettings.properties);
        if (nameStandardizer != null) {
            if (wordLexicon != null) {
                long startTime = System.currentTimeMillis();
                nameStandardizer.loadNamesFromLexicon(wordLexicon);
                int numberOfNames = nameStandardizer.getNumberOfNames();
                adornerLogger.println("Loaded_names", new Object[]{Formatters.formatIntegerWithCommas(numberOfNames), MorphAdornerUtils.durationString(adornerSettings, startTime)});
            }
            if (nameStandardizer instanceof UsesLogger) {
                ((UsesLogger)((Object)nameStandardizer)).setLogger(adornerLogger.getLogger());
            }
        }
        return nameStandardizer;
    }

    public static Lexicon loadWordLexicon(MorphAdornerSettings adornerSettings, MorphAdornerLogger adornerLogger) throws IOException {
        long startTime = System.currentTimeMillis();
        Lexicon wordLexicon = LexiconFactory.newLexicon(adornerSettings.properties);
        if (adornerSettings.wordLexiconURL != null) {
            wordLexicon.loadLexicon(adornerSettings.wordLexiconURL, "utf-8");
        }
        adornerLogger.println("Loaded_word_lexicon", new Object[]{Formatters.formatIntegerWithCommas(wordLexicon.getLexiconSize()), MorphAdornerUtils.durationString(adornerSettings, startTime)});
        ((UsesLogger)((Object)wordLexicon)).setLogger(adornerLogger.getLogger());
        return wordLexicon;
    }

    public static Lexicon loadSuffixLexicon(MorphAdornerSettings adornerSettings, MorphAdornerLogger adornerLogger) throws IOException {
        long startTime = System.currentTimeMillis();
        Lexicon suffixLexicon = LexiconFactory.newLexicon(adornerSettings.properties);
        if (adornerSettings.suffixLexiconURL != null) {
            suffixLexicon.loadLexicon(adornerSettings.suffixLexiconURL, "utf-8");
        }
        adornerLogger.println("Loaded_suffix_lexicon", new Object[]{Formatters.formatIntegerWithCommas(suffixLexicon.getLexiconSize()), MorphAdornerUtils.durationString(adornerSettings, startTime)});
        ((UsesLogger)((Object)suffixLexicon)).setLogger(adornerLogger.getLogger());
        return suffixLexicon;
    }

    public static TransitionMatrix loadTransitionMatrix(PartOfSpeechTagger tagger, MorphAdornerSettings adornerSettings, MorphAdornerLogger adornerLogger) throws IOException {
        TransitionMatrix transitionMatrix = new TransitionMatrix();
        if (adornerSettings.transitionMatrixURL != null && tagger.usesTransitionProbabilities()) {
            long startTime = System.currentTimeMillis();
            transitionMatrix.loadTransitionMatrix(adornerSettings.transitionMatrixURL, "utf-8", '\t');
            tagger.setTransitionMatrix(transitionMatrix);
            adornerLogger.println("Loaded_transition_matrix", new Object[]{MorphAdornerUtils.durationString(adornerSettings, startTime)});
            transitionMatrix.setLogger(adornerLogger.getLogger());
        }
        return transitionMatrix;
    }

    public static void loadTaggerRules(PartOfSpeechTagger tagger, MorphAdornerSettings adornerSettings, MorphAdornerLogger adornerLogger) throws InvalidRuleException, IOException {
        if (adornerSettings.contextRulesURL != null && tagger.usesContextRules()) {
            String[] contextRules = new TextFile(adornerSettings.contextRulesURL, "utf-8").toArray();
            tagger.setContextRules(contextRules);
        }
        if (adornerSettings.lexicalRulesURL != null && tagger.usesLexicalRules()) {
            String[] lexicalRules = new TextFile(adornerSettings.lexicalRulesURL, "utf-8").toArray();
            tagger.setLexicalRules(lexicalRules);
        }
    }

    public static SpellingStandardizer createSpellingStandardizer(Lexicon wordLexicon, Names names, MorphAdornerSettings adornerSettings, MorphAdornerLogger adornerLogger) throws IOException {
        SpellingStandardizer spellingStandardizer = SpellingStandardizerFactory.newSpellingStandardizer(adornerSettings.properties);
        if (spellingStandardizer != null) {
            int i;
            long startTime = System.currentTimeMillis();
            if (spellingStandardizer instanceof UsesLexicon) {
                ((UsesLexicon)((Object)spellingStandardizer)).setLexicon(wordLexicon);
            }
            int numberOfStandardSpellings = 0;
            if (adornerSettings.spellingsURL != null) {
                spellingStandardizer.loadStandardSpellings(adornerSettings.spellingsURL, "utf-8");
                numberOfStandardSpellings = spellingStandardizer.getNumberOfStandardSpellings();
                adornerLogger.println("Loaded_standard_spellings", new Object[]{Formatters.formatIntegerWithCommas(numberOfStandardSpellings), MorphAdornerUtils.durationString(adornerSettings, startTime)});
            }
            spellingStandardizer.addStandardSpellings(names.getFirstNames());
            spellingStandardizer.addStandardSpellings(names.getSurnames());
            spellingStandardizer.addStandardSpellings(names.getPlaceNames().keySet());
            if (adornerSettings.alternateSpellingsURLs != null) {
                int altSpellingsCount = 0;
                for (i = 0; i < adornerSettings.alternateSpellingsURLs.length; ++i) {
                    startTime = System.currentTimeMillis();
                    spellingStandardizer.loadAlternativeSpellings(adornerSettings.alternateSpellingsURLs[i], "utf-8", "\t");
                    adornerLogger.println("Loaded_alternate_spellings", new Object[]{Formatters.formatIntegerWithCommas(spellingStandardizer.getNumberOfAlternateSpellings() - altSpellingsCount), MorphAdornerUtils.durationString(adornerSettings, startTime)});
                    altSpellingsCount = spellingStandardizer.getNumberOfAlternateSpellings();
                }
            }
            if (adornerSettings.alternateSpellingsByWordClassURLs != null) {
                int[] altCountsCum = new int[]{0, 0};
                for (i = 0; i < adornerSettings.alternateSpellingsByWordClassURLs.length; ++i) {
                    startTime = System.currentTimeMillis();
                    spellingStandardizer.loadAlternativeSpellingsByWordClass(adornerSettings.alternateSpellingsByWordClassURLs[i], "utf-8");
                    int[] altCounts = spellingStandardizer.getNumberOfAlternateSpellingsByWordClass();
                    adornerLogger.println("Loaded_alternate_spellings_by_word_class", new Object[]{Formatters.formatIntegerWithCommas(altCounts[1] - altCountsCum[1]), Formatters.formatIntegerWithCommas(altCounts[0] - altCountsCum[0]), MorphAdornerUtils.durationString(adornerSettings, startTime)});
                    altCountsCum[0] = altCounts[0];
                    altCountsCum[1] = altCounts[1];
                }
            }
            if (spellingStandardizer instanceof UsesLogger) {
                ((UsesLogger)((Object)spellingStandardizer)).setLogger(adornerLogger.getLogger());
            }
        }
        return spellingStandardizer;
    }

    public static String durationString(MorphAdornerSettings adornerSettings, long startTime) {
        StringBuffer result = new StringBuffer();
        long duration = (System.currentTimeMillis() - startTime + 999L) / 1000L;
        String durationString = Formatters.formatLongWithCommas(duration);
        if (duration < 1L) {
            durationString = "< 1";
        }
        String secondString = duration > 1L ? "seconds" : "second";
        result.append(durationString);
        result.append(" ");
        result.append(adornerSettings.getString(secondString));
        result.append(".");
        return result.toString();
    }

    public static void fixEmptySoftTags(XGOptions xgOptions, Document document) {
        List<Node> nodes = DOMUtils.getDescendants(document);
        for (int i = 0; i < nodes.size(); ++i) {
            String text;
            Node node = nodes.get(i);
            String nodeName = node.getNodeName();
            if (!xgOptions.isSoftTag(nodeName) || (text = DOMUtils.getText(node)).length() != 0) continue;
            if (nodeName.equals("gap")) {
                DOMUtils.setText(node, " \ue500 ");
                continue;
            }
            if (nodeName.equals("pb")) continue;
            DOMUtils.setText(node, " ");
        }
    }

    public static String fixSpelling(String spelling) {
        String result = spelling;
        if (!result.equals("|")) {
            result = StringUtils.replaceAll(result, "|", "");
        }
        if (!result.equals("{")) {
            result = StringUtils.replaceAll(result, "{", "");
        }
        if (!result.equals("}")) {
            result = StringUtils.replaceAll(result, "}", "");
        }
        if (!result.equals("+")) {
            result = StringUtils.replaceAll(result, "+", "");
        }
        if (result.length() > 1 && result.charAt(0) == '_') {
            underlineCapCapMatcher.reset(result);
            if (underlineCapCapMatcher.find()) {
                String char1 = result.charAt(1) + "";
                String char2 = Character.toLowerCase(result.charAt(2)) + "";
                String rest = "";
                if (result.length() > 3) {
                    rest = result.substring(3);
                }
                result = char1 + char2 + rest;
            }
        }
        return result;
    }

    public static void fixSupTags(Document document) {
        NodeList supNodes = document.getElementsByTagName("hi");
        for (int i = 0; i < supNodes.getLength(); ++i) {
            String loSupText;
            Node sibling;
            String supText;
            Element supNode = (Element)supNodes.item(i);
            String hiType = supNode.getAttribute("rend");
            if (hiType == null || !hiType.equals("superscript") || (supText = DOMUtils.getText(supNode)).startsWith("^") || (sibling = supNode.getPreviousSibling()) == null) continue;
            String siblingText = sibling.getTextContent();
            if (siblingText.endsWith(" y")) {
                loSupText = supText.toLowerCase();
                if (!loSupText.equals("e") && !loSupText.equals("t") && !loSupText.equals("c") && !loSupText.equals("en") && !loSupText.equals("ere") && !loSupText.equals("f") && !loSupText.equals("i") && !loSupText.equals("m") && !loSupText.equals("n") && !loSupText.equals("o") && !loSupText.equals("u")) continue;
                supText = "\ue503" + supText;
                DOMUtils.setText(supNode, supText);
                continue;
            }
            if (!siblingText.endsWith(" w") || !(loSupText = supText.toLowerCase()).equals("ch") && !loSupText.equals("t") && !loSupText.equals("th")) continue;
            supText = "\ue503" + supText;
            DOMUtils.setText(supNode, supText);
        }
    }

    public static String getLemma(MorphAdorner adorner, String spelling, String partOfSpeech) {
        String result;
        block11: {
            String lemmaClass;
            block10: {
                result = spelling;
                lemmaClass = adorner.partOfSpeechTags.getLemmaWordClass(partOfSpeech);
                if (!adorner.lemmatizer.cantLemmatize(spelling) && !lemmaClass.equals("none")) break block10;
                if (!adorner.partOfSpeechTags.isNumberTag(partOfSpeech) || !RomanNumeralUtils.isLooseRomanNumeral(result)) break block11;
                if (result.charAt(0) == '.') {
                    result = result.substring(1);
                }
                if (result.charAt(result.length() - 1) != '.') break block11;
                result = result.substring(0, result.length() - 1);
                break block11;
            }
            boolean isCompoundTag = adorner.partOfSpeechTags.isCompoundTag(partOfSpeech);
            if (isCompoundTag && adorner.lemmatizer.isCompoundLemma(result = adorner.lemmatizer.lemmatize(spelling, "compound"))) {
                return result;
            }
            List<String> wordList = adorner.spellingTokenizer.extractWords(spelling);
            if (!isCompoundTag || wordList.size() == 1) {
                if (lemmaClass.length() == 0) {
                    result = adorner.lemmatizer.lemmatize(spelling, "compound");
                    if (result.equals(spelling)) {
                        result = adorner.lemmatizer.lemmatize(spelling);
                    }
                } else {
                    result = adorner.lemmatizer.lemmatize(spelling, lemmaClass);
                }
            } else {
                result = "";
                String lemmaPiece = "";
                String[] posTags = adorner.partOfSpeechTags.splitTag(partOfSpeech);
                if (posTags.length == wordList.size()) {
                    for (int i = 0; i < wordList.size(); ++i) {
                        String wordPiece = wordList.get(i);
                        if (i > 0) {
                            result = result + adorner.lemmaSeparator;
                        }
                        lemmaClass = adorner.partOfSpeechTags.getLemmaWordClass(posTags[i]);
                        lemmaPiece = adorner.lemmatizer.lemmatize(wordPiece, lemmaClass);
                        result = result + lemmaPiece;
                    }
                }
            }
        }
        return result;
    }

    protected static String getStandardizedSpelling(MorphAdorner adorner, String correctedSpelling, String standardizedSpelling, String partOfSpeech) {
        String spelling = correctedSpelling;
        String result = correctedSpelling;
        if (adorner.partOfSpeechTags.isProperNounTag(partOfSpeech)) {
            result = adorner.nameStandardizer.standardizeProperName(spelling);
        } else if (!(adorner.partOfSpeechTags.isNounTag(partOfSpeech) && CharUtils.hasInternalCaps(spelling) || adorner.partOfSpeechTags.isForeignWordTag(partOfSpeech))) {
            if (adorner.partOfSpeechTags.isNumberTag(partOfSpeech)) {
                if (RomanNumeralUtils.isLooseRomanNumeral(result)) {
                    if (result.charAt(0) == '.') {
                        result = result.substring(1);
                    }
                    if (result.charAt(result.length() - 1) == '.') {
                        result = result.substring(0, result.length() - 1);
                    }
                }
            } else {
                result = adorner.spellingStandardizer.standardizeSpelling(spelling, adorner.partOfSpeechTags.getMajorWordClass(partOfSpeech));
                if (result.equalsIgnoreCase(spelling)) {
                    result = spelling;
                }
            }
        }
        return result;
    }

    public static String[] getKWIC(List<AdornedWord> sentence, int wordIndex, int KWICWidth) {
        int i;
        AdornedWord adornedWord;
        String[] results = new String[3];
        StringBuffer KWICBuffer = new StringBuffer();
        AdornedWord KWICWord = sentence.get(wordIndex);
        int l = 0;
        int maxWidth = (KWICWidth - 4 - KWICWord.getToken().length()) / 2;
        for (i = wordIndex - 1; l < maxWidth && i >= 0; l += adornedWord.getToken().length() + 1, --i) {
            adornedWord = sentence.get(i);
            if (KWICBuffer.length() > 0) {
                KWICBuffer.insert(0, " ");
            }
            KWICBuffer.insert(0, adornedWord.getToken());
        }
        results[0] = KWICBuffer.toString();
        results[1] = KWICWord.getToken();
        KWICBuffer.setLength(0);
        int nWords = sentence.size();
        for (i = wordIndex + 1; KWICBuffer.length() < maxWidth && i < nWords; ++i) {
            AdornedWord adornedWord2 = sentence.get(i);
            KWICBuffer.append(adornedWord2.getToken());
            KWICBuffer.append(" ");
        }
        results[2] = KWICBuffer.toString();
        return results;
    }

    public static int getWordCount(List<List<String>> sentences) {
        int result = 0;
        for (int i = 0; i < sentences.size(); ++i) {
            result += sentences.get(i).size();
        }
        return result;
    }

    public static int[] getWordAndSentenceCounts(List<List<String>> sentences) {
        int[] result = new int[]{0, 0};
        for (int i = 0; i < sentences.size(); ++i) {
            String word;
            List<String> sentence = sentences.get(i);
            int wordCount = sentence.size();
            boolean done = false;
            while (!done && (word = sentence.get(wordCount - 1)).equals("\ue500")) {
                done = --wordCount < 1;
            }
            if (wordCount > 0) {
                result[0] = result[0] + 1;
            }
            result[1] = result[1] + wordCount;
        }
        return result;
    }

    public static TaggedStrings getWordList(String wordFileName, String posTag, String loadedMessage, MorphAdornerSettings adornerSettings, MorphAdornerLogger adornerLogger) {
        long startTime = System.currentTimeMillis();
        TextFile wordFile = new TextFile(MorphAdornerUtils.class.getResourceAsStream(wordFileName), "utf-8");
        SingleTagTaggedStrings words = new SingleTagTaggedStrings(wordFile.toArray(), posTag);
        wordFile = null;
        adornerLogger.println(loadedMessage, new Object[]{Formatters.formatIntegerWithCommas(words.getStringCount()), MorphAdornerUtils.durationString(adornerSettings, startTime)});
        return words;
    }

    public static TaggedStrings getExtraWordsList(String wordFileName, String posTag, String loadedMessage, MorphAdornerSettings adornerSettings, MorphAdornerLogger adornerLogger) {
        long startTime = System.currentTimeMillis();
        UTF8Properties words = null;
        try {
            words = new UTF8Properties();
            words.load(MorphAdornerUtils.class.getResourceAsStream(wordFileName), posTag);
        }
        catch (Exception e) {
            // empty catch block
        }
        if (words.size() > 0) {
            adornerLogger.println(loadedMessage, new Object[]{Formatters.formatIntegerWithCommas(words.getStringCount()), MorphAdornerUtils.durationString(adornerSettings, startTime)});
        }
        return words;
    }

    public static boolean isAdorned(String xmlFileName, int maxLinesToCheck) {
        boolean result = false;
        try {
            BufferedReader bufferedReader = new BufferedReader(new UnicodeReader(new FileInputStream(xmlFileName), "utf-8"));
            int linesRead = 0;
            String line = bufferedReader.readLine();
            while (line != null && !result && linesRead < maxLinesToCheck) {
                ++linesRead;
                int wPos = line.indexOf("<w ");
                if (wPos >= 0) {
                    String[] groupValues = WordAttributePatterns.wReplacer.matchGroups(line);
                    try {
                        String id;
                        String[] idValues = WordAttributePatterns.idReplacer.matchGroups(groupValues[2]);
                        if (idValues != null && (id = idValues[2]) != null && id.length() > 0) {
                            result = true;
                            break;
                        }
                    }
                    catch (Exception e) {
                        // empty catch block
                    }
                }
                line = bufferedReader.readLine();
            }
            bufferedReader.close();
        }
        catch (Exception exception) {
            // empty catch block
        }
        return result;
    }

    public static void logMemoryUsage(MorphAdornerLogger adornerLogger, String label) {
        long freeMem = runTime.freeMemory();
        long totalMem = runTime.totalMemory();
        adornerLogger.println("Memory_used", new Object[]{label, Formatters.formatLongWithCommas(freeMem), Formatters.formatLongWithCommas(totalMem)});
    }

    protected MorphAdornerUtils() {
    }
}

