/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter;

import edu.northwestern.at.morphadorner.corpuslinguistics.abbreviations.Abbreviations;
import edu.northwestern.at.morphadorner.corpuslinguistics.namerecognizer.Names;
import edu.northwestern.at.morphadorner.corpuslinguistics.partsofspeech.PartOfSpeechTags;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser;
import edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.ICU4JBreakIteratorSentenceSplitterIterator;
import edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.SentenceSplitter;
import edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.SentenceSplitterIterator;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.DefaultWordTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.WordTokenizer;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.IsCloseable;
import edu.northwestern.at.utils.IsCloseableObject;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.MutableInteger;
import edu.northwestern.at.utils.logger.DummyLogger;
import edu.northwestern.at.utils.logger.Logger;
import edu.northwestern.at.utils.logger.UsesLogger;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

public abstract class AbstractSentenceSplitter
extends IsCloseableObject
implements SentenceSplitter,
IsCloseable,
UsesLogger {
    protected WordTokenizer wordTokenizer = new DefaultWordTokenizer();
    protected PartOfSpeechGuesser partOfSpeechGuesser;
    protected SentenceSplitterIterator sentenceSplitterIterator;
    protected Names names = new Names();
    protected Abbreviations abbreviations = new Abbreviations();
    protected Logger logger = new DummyLogger();
    protected static final String disallowedSentenceStarters = ",%.";

    @Override
    public void setPartOfSpeechGuesser(PartOfSpeechGuesser partOfSpeechGuesser) {
        this.partOfSpeechGuesser = partOfSpeechGuesser;
    }

    @Override
    public void setAbbreviations(Abbreviations abbreviations) {
        this.abbreviations = abbreviations;
    }

    @Override
    public void setSentenceSplitterIterator(SentenceSplitterIterator sentenceSplitterIterator) {
        this.sentenceSplitterIterator = sentenceSplitterIterator;
    }

    protected boolean fixUpSentence(List<String> sentenceWords, List<String> previousSentenceWords) {
        boolean done;
        boolean eosSeen = true;
        boolean bl = done = sentenceWords.size() == 0;
        while (!done) {
            done = true;
            if (previousSentenceWords == null || previousSentenceWords.size() <= 0) continue;
            if (this.isClosingPunctuationOnly(sentenceWords)) {
                previousSentenceWords.addAll(sentenceWords);
                sentenceWords.clear();
                return eosSeen;
            }
            String lastToken = previousSentenceWords.get(previousSentenceWords.size() - 1);
            if (lastToken.equals(")") || lastToken.equals("]") || lastToken.equals("}") || lastToken.equals("\u2014")) {
                String nextToken = sentenceWords.get(0);
                eosSeen = CharUtils.isCapitalLetter(nextToken.charAt(0));
                continue;
            }
            if (sentenceWords.get(0).equals("\u2014")) {
                String prevSentenceLastWord = "";
                int lastWordIndex = previousSentenceWords.size() - 1;
                if (lastWordIndex >= 0) {
                    prevSentenceLastWord = previousSentenceWords.get(lastWordIndex);
                }
                if (prevSentenceLastWord.endsWith(".") || prevSentenceLastWord.endsWith("!") || prevSentenceLastWord.endsWith("?")) continue;
                previousSentenceWords.add("\u2014");
                sentenceWords.remove(0);
                done = sentenceWords.size() == 0;
                continue;
            }
            if (sentenceWords.get(0).equals(",")) {
                previousSentenceWords.add(",");
                sentenceWords.remove(0);
                done = sentenceWords.size() == 0;
                continue;
            }
            if (CharUtils.isNumber(sentenceWords.get(0)) && CharUtils.isNumber(lastToken)) {
                previousSentenceWords.add(sentenceWords.get(0));
                sentenceWords.remove(0);
                done = sentenceWords.size() == 0;
                continue;
            }
            if (sentenceWords.get(0).equals("s")) {
                String lastTokenM1;
                if (!CharUtils.endsWithSingleQuote(lastToken)) continue;
                lastToken = lastToken + sentenceWords.get(0);
                sentenceWords.remove(0);
                if (CharUtils.isSingleQuoteS(lastToken) && ((lastTokenM1 = previousSentenceWords.get(previousSentenceWords.size() - 2)).endsWith(".") || !CharUtils.isPunctuation(lastTokenM1))) {
                    lastToken = lastTokenM1 + lastToken;
                    previousSentenceWords.remove(previousSentenceWords.size() - 1);
                }
                previousSentenceWords.remove(previousSentenceWords.size() - 1);
                previousSentenceWords.add(lastToken);
                eosSeen = false;
                continue;
            }
            if (Abbreviations.isInitial(lastToken)) {
                eosSeen = false;
                String nextToken = sentenceWords.get(0);
                if (!CharUtils.isCapitalLetter(nextToken.charAt(0)) || this.isNoun(nextToken)) continue;
                eosSeen = true;
                continue;
            }
            if (this.abbreviations.isAbbreviation(lastToken)) {
                if (this.abbreviations.isEOSAbbreviation(lastToken)) {
                    if (this.verbSeen(previousSentenceWords)) {
                        String nextToken = sentenceWords.get(0);
                        if (CharUtils.isCapitalLetter(nextToken.charAt(0))) {
                            if (this.isProperNoun(nextToken) || this.isPronoun(nextToken)) {
                                boolean nextVerbSeen = false;
                                String prevToken = "";
                                for (int j = 1; j < sentenceWords.size() && !nextVerbSeen; ++j) {
                                    nextToken = sentenceWords.get(j);
                                    nextVerbSeen = this.isVerb(nextToken) && !prevToken.equals("to");
                                    prevToken = nextToken.toLowerCase();
                                    if (nextVerbSeen) break;
                                }
                                eosSeen = nextVerbSeen;
                                continue;
                            }
                            eosSeen = true;
                            continue;
                        }
                        eosSeen = true;
                        continue;
                    }
                    eosSeen = false;
                    continue;
                }
                eosSeen = false;
                continue;
            }
            if (sentenceWords.get(0).length() == 1) {
                char ch = sentenceWords.get(0).charAt(0);
                int j = disallowedSentenceStarters.indexOf(ch);
                if (disallowedSentenceStarters.indexOf(ch) < 0) continue;
                eosSeen = false;
                continue;
            }
            if (!Character.isLowerCase(sentenceWords.get(0).charAt(0))) continue;
            eosSeen = false;
        }
        return eosSeen;
    }

    public boolean isClosingPunctuationOnly(List<String> sentenceWords) {
        boolean result = false;
        if (sentenceWords != null && sentenceWords.size() > 0) {
            for (int i = 0; i < sentenceWords.size(); ++i) {
                String token = sentenceWords.get(i);
                result = true;
                if (token.equals(".") || token.equals(")") || token.equals("]") || token.equals("}")) continue;
                result = false;
                break;
            }
        }
        return result;
    }

    @Override
    public List<List<String>> extractSentences(String text, WordTokenizer tokenizer) {
        List<List<String>> result = ListFactory.createNewList();
        this.sentenceSplitterIterator.setText(text);
        List<String> previousSentenceWords = null;
        while (this.sentenceSplitterIterator.hasNext()) {
            List<String> sentenceWords;
            String sentenceText = this.sentenceSplitterIterator.next();
            String nextSentenceText = this.sentenceSplitterIterator.peek();
            if (nextSentenceText != null && nextSentenceText.equals(".")) {
                sentenceText = sentenceText + nextSentenceText;
                this.sentenceSplitterIterator.next();
            }
            if ((sentenceWords = tokenizer.extractWords(sentenceText)).size() == 0) continue;
            List<List<String>> subSentences = this.splitSentenceWordList(sentenceWords);
            for (int i = 0; i < subSentences.size(); ++i) {
                sentenceWords = subSentences.get(i);
                boolean eosSeen = this.fixUpSentence(sentenceWords, previousSentenceWords);
                if (sentenceWords.size() <= 0) continue;
                if (eosSeen && !this.quoteOnlySentence(sentenceWords)) {
                    this.addSentence(sentenceWords, result);
                    previousSentenceWords = sentenceWords;
                    continue;
                }
                if (previousSentenceWords != null) {
                    previousSentenceWords.addAll(sentenceWords);
                    continue;
                }
                this.addSentence(sentenceWords, result);
                previousSentenceWords = sentenceWords;
            }
        }
        return result;
    }

    public boolean quoteOnlySentence(List<String> sentenceWords) {
        boolean result = false;
        if (sentenceWords.size() == 0) {
            return result;
        }
        String word = sentenceWords.get(0);
        result = word.equals("\ue500") || word.length() == 1 && CharUtils.isClosingQuote(word.charAt(0));
        for (int i = 1; i < sentenceWords.size(); ++i) {
            word = sentenceWords.get(i);
            boolean bl = result = word.equals("\ue500") || word.length() == 1 && CharUtils.isClosingQuote(word.charAt(0));
            if (!result) break;
        }
        return result;
    }

    @Override
    public List<List<String>> extractSentences(String text) {
        return this.extractSentences(text, this.wordTokenizer);
    }

    @Override
    public int[] findSentenceOffsets(String text, List<List<String>> sentences) {
        int sentenceCount = sentences.size();
        int[] result = new int[sentenceCount + 1];
        int offset = 0;
        for (int i = 0; i < sentenceCount; ++i) {
            List<String> sentence = sentences.get(i);
            result[i] = offset;
            int nbCount = 0;
            for (int j = 0; j < sentence.size(); ++j) {
                nbCount += sentence.get(j).toString().length();
            }
            int tNbCount = 0;
            while (tNbCount < nbCount) {
                if (!CharUtils.isWhitespace(text.charAt(offset))) {
                    ++tNbCount;
                }
                ++offset;
            }
        }
        result[sentenceCount] = text.length();
        return result;
    }

    protected void addSentenceBad(List<String> sentence, List<List<String>> sentenceList) {
        String token;
        boolean quoteTokenFound = false;
        for (int i = 0; i < sentence.size(); ++i) {
            token = sentence.get(i);
            if (token.length() <= 1 || token.charAt(0) != '\u2019') continue;
            quoteTokenFound = true;
            break;
        }
        if (quoteTokenFound) {
            StringBuffer sb = new StringBuffer();
            for (int i = 0; i < sentence.size(); ++i) {
                token = sentence.get(i);
                if (token.length() > 1 && token.charAt(0) == '\u2019') {
                    token = '\ue502' + token.substring(1);
                }
                if (i > 0) {
                    sb.append(" ");
                }
                sb.append(token);
            }
            String newText = sb.toString();
            ICU4JBreakIteratorSentenceSplitterIterator subSentenceIterator = new ICU4JBreakIteratorSentenceSplitterIterator(newText);
            while (subSentenceIterator.hasNext()) {
                StringTokenizer stringTokenizer = new StringTokenizer(subSentenceIterator.next());
                List<String> newSentence = ListFactory.createNewList();
                while (stringTokenizer.hasMoreTokens()) {
                    token = stringTokenizer.nextToken();
                    if (token.length() > 1 && token.charAt(0) == '\ue502') {
                        token = '\u2019' + token.substring(1);
                    }
                    newSentence.add(token);
                }
                sentenceList.add(newSentence);
            }
        } else {
            sentenceList.add(sentence);
        }
    }

    protected void addSentence(List<String> sentence, List<List<String>> sentenceList) {
        if (sentence != null && sentence.size() > 0) {
            sentenceList.add(sentence);
        }
    }

    protected boolean isVerb(String word) {
        boolean result = false;
        if (this.partOfSpeechGuesser != null) {
            Map<String, MutableInteger> guessedTags = this.partOfSpeechGuesser.guessPartsOfSpeech(word);
            PartOfSpeechTags partOfSpeechTags = this.partOfSpeechGuesser.getWordLexicon().getPartOfSpeechTags();
            Iterator<String> iterator = guessedTags.keySet().iterator();
            while (iterator.hasNext() && !result) {
                String tag = iterator.next();
                result = result || partOfSpeechTags.isVerbTag(tag);
            }
        }
        return result;
    }

    protected boolean isProperNoun(String word) {
        boolean result = this.names.isNameOrPlace(word);
        if (!result && this.partOfSpeechGuesser != null) {
            boolean startsWithCapital = CharUtils.isCapitalLetter(word.charAt(0));
            Map<String, MutableInteger> guessedTags = this.partOfSpeechGuesser.guessPartsOfSpeech(word);
            PartOfSpeechTags partOfSpeechTags = this.partOfSpeechGuesser.getWordLexicon().getPartOfSpeechTags();
            Iterator<String> iterator = guessedTags.keySet().iterator();
            while (iterator.hasNext() && !result) {
                String tag = iterator.next();
                result = partOfSpeechTags.isProperNounTag(tag) || partOfSpeechTags.isNounTag(tag) && startsWithCapital;
            }
        }
        return result;
    }

    protected boolean isPronoun(String word) {
        boolean result = false;
        if (this.partOfSpeechGuesser != null) {
            Map<String, MutableInteger> guessedTags = this.partOfSpeechGuesser.guessPartsOfSpeech(word);
            PartOfSpeechTags partOfSpeechTags = this.partOfSpeechGuesser.getWordLexicon().getPartOfSpeechTags();
            Iterator<String> iterator = guessedTags.keySet().iterator();
            while (iterator.hasNext() && !result) {
                String tag = iterator.next();
                result = partOfSpeechTags.isPronounTag(tag);
            }
        }
        return result;
    }

    protected boolean isNoun(String word) {
        boolean result = false;
        if (this.partOfSpeechGuesser != null) {
            Map<String, MutableInteger> guessedTags = this.partOfSpeechGuesser.guessPartsOfSpeech(word);
            PartOfSpeechTags partOfSpeechTags = this.partOfSpeechGuesser.getWordLexicon().getPartOfSpeechTags();
            Iterator<String> iterator = guessedTags.keySet().iterator();
            while (iterator.hasNext() && !result) {
                String tag = iterator.next();
                result = partOfSpeechTags.isNounTag(tag);
            }
        }
        return result;
    }

    protected List<List<String>> splitSentenceWordList(List<String> sentenceWords) {
        List<List<String>> result = ListFactory.createNewList();
        List<String> subSentence = ListFactory.createNewList();
        for (int i = 0; i < sentenceWords.size(); ++i) {
            String word = sentenceWords.get(i);
            subSentence.add(word);
            if (word.equals("\ue500")) {
                result.add(subSentence);
                subSentence = ListFactory.createNewList();
                continue;
            }
            if (!word.equals(".")) continue;
            result.add(subSentence);
            subSentence = ListFactory.createNewList();
        }
        if (subSentence.size() > 0) {
            result.add(subSentence);
        }
        return result;
    }

    protected boolean verbSeen(List<String> tokenList) {
        boolean result = false;
        if (tokenList != null) {
            for (int i = 0; i < tokenList.size() - 1; ++i) {
                boolean bl = result = result || this.isVerb(tokenList.get(i));
                if (result) break;
            }
        }
        return result;
    }

    @Override
    public Logger getLogger() {
        return this.logger;
    }

    @Override
    public void setLogger(Logger logger) {
        this.logger = logger;
    }
}

