/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer;

import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.AbstractWordTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.WordTokenizer;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.RomanNumeralUtils;
import java.util.List;
import java.util.StringTokenizer;

public class DefaultWordTokenizer
extends AbstractWordTokenizer
implements WordTokenizer {
    @Override
    public List<String> extractWords(String text) {
        List<String> result = ListFactory.createNewList();
        String pretokenizedText = this.preTokenizer.pretokenize(text);
        StringTokenizer tokenizer = new StringTokenizer(pretokenizedText);
        while (tokenizer.hasMoreTokens()) {
            String previousToken;
            String token = tokenizer.nextToken();
            token = this.preprocessToken(token, result);
            boolean doneLeading = false;
            while (token.length() > 0 && !doneLeading) {
                hyphensMatcher.reset(token);
                if (hyphensMatcher.matches()) {
                    doneLeading = true;
                    continue;
                }
                if (token.charAt(0) == '&') {
                    if (this.contractions.containsString(token)) {
                        doneLeading = true;
                        continue;
                    }
                    this.addWordToSentence(result, "&");
                    token = token.substring(1);
                    continue;
                }
                if (!this.apostropheCanBeQuote && CharUtils.isApostrophe(token.charAt(0))) {
                    doneLeading = true;
                    continue;
                }
                if (this.isSingleOpeningQuote(token.charAt(0))) {
                    if (this.contractions.containsString(token)) {
                        doneLeading = true;
                        continue;
                    }
                    if (this.aposTokens.isKnownAposToken(token)) {
                        doneLeading = true;
                        continue;
                    }
                    this.addWordToSentence(result, token.charAt(0) + "");
                    token = token.substring(1);
                    continue;
                }
                if (CharUtils.isOpeningQuote(token.charAt(0))) {
                    this.addWordToSentence(result, token.charAt(0) + "");
                    token = token.substring(1);
                    continue;
                }
                if (token.charAt(0) == '%') {
                    this.addWordToSentence(result, "%");
                    token = token.substring(1);
                    continue;
                }
                if (token.charAt(0) == '*') {
                    this.addWordToSentence(result, "*");
                    token = token.substring(1);
                    continue;
                }
                if (CharUtils.isBreakingDash(token.charAt(0))) {
                    if (token.length() > 1) {
                        if (!CharUtils.isNumber(token.substring(1))) {
                            this.addWordToSentence(result, token.charAt(0) + "");
                            token = token.substring(1);
                            continue;
                        }
                        doneLeading = true;
                        continue;
                    }
                    doneLeading = true;
                    continue;
                }
                doneLeading = true;
            }
            int l = token.length();
            String endDelims = "";
            hyphensMatcher.reset(token);
            if (!hyphensMatcher.matches()) {
                boolean doneEnding = false;
                while (l > 1 && !doneEnding) {
                    if (token.charAt(l - 1) == '\'') {
                        if (this.aposTokens.isKnownAposToken(token)) {
                            doneEnding = true;
                        } else {
                            endDelims = token.charAt(l - 1) + endDelims;
                            token = token.substring(0, token.length() - 1);
                        }
                    } else if (token.charAt(l - 1) == ':' || CharUtils.isBreakingDash(token.charAt(l - 1)) || this.isClosingQuote(token.charAt(l - 1))) {
                        endDelims = token.charAt(l - 1) + endDelims;
                        token = token.substring(0, token.length() - 1);
                    } else if (token.charAt(l - 1) == '!' || token.charAt(l - 1) == '?') {
                        if (this.abbreviations.isAbbreviation(token)) {
                            doneEnding = true;
                        } else {
                            endDelims = token.charAt(l - 1) + endDelims;
                            token = token.substring(0, token.length() - 1);
                        }
                    } else if (token.charAt(l - 1) == '.') {
                        if (token.charAt(0) == '$') {
                            if (token.length() > 2 && token.substring(1, l - 2).indexOf(46) >= 0) {
                                endDelims = "." + endDelims;
                                token = token.substring(0, token.length() - 1);
                            }
                            doneEnding = true;
                        } else if (this.isLetterOrSingleQuote(token.charAt(l - 2))) {
                            if (this.abbreviations.isAbbreviation(token)) {
                                doneEnding = true;
                            } else {
                                endDelims = "." + endDelims;
                                token = token.substring(0, token.length() - 1);
                            }
                        } else if (CharUtils.isAllPeriods(token)) {
                            doneEnding = true;
                        } else if (CharUtils.isNumber(token)) {
                            doneEnding = true;
                        } else if (RomanNumeralUtils.isLooseRomanNumeral(token)) {
                            doneEnding = true;
                        } else {
                            endDelims = "." + endDelims;
                            token = token.substring(0, token.length() - 1);
                        }
                    } else {
                        doneEnding = true;
                    }
                    l = token.length();
                }
            }
            if ((token.equals("'s") || token.equals("'S")) && result.size() > 0 && (previousToken = result.get(result.size() - 1)).matches("([A-Za-z]){0,1}(--|---|\u2011\u2011|\u2011\u2011\u2011|(\\*+))")) {
                previousToken = previousToken + token;
                result.set(result.size() - 1, previousToken);
                token = "";
            }
            if (token.length() > 0) {
                String[] tokens = this.splitToken(token);
                for (int k = 0; k < tokens.length; ++k) {
                    if (tokens[k].length() <= 0) continue;
                    this.addWordToSentence(result, tokens[k]);
                }
            }
            for (int k = 0; k < endDelims.length(); ++k) {
                String delimToken = endDelims.charAt(k) + "";
                this.addWordToSentence(result, delimToken);
            }
        }
        return result;
    }

    @Override
    public void addWordToSentence(List<String> sentence, String word) {
        boolean coalesce;
        boolean bl = coalesce = sentence.size() > 0 && (this.coalesceHyphens && word.equals("-") || this.coalesceAsterisks && word.equals("*"));
        if (coalesce) {
            String previousWord = sentence.get(sentence.size() - 1);
            if (previousWord.endsWith(word)) {
                previousWord = previousWord + word;
                sentence.set(sentence.size() - 1, previousWord);
            } else {
                sentence.add(word);
            }
        } else if (word.equals(".") && sentence.size() > 0) {
            String prevWord = sentence.get(sentence.size() - 1);
            if (prevWord.charAt(0) == '.' && RomanNumeralUtils.isLooseRomanNumeral(prevWord)) {
                prevWord = prevWord + ".";
                sentence.set(sentence.size() - 1, prevWord);
            } else {
                sentence.add(word);
            }
        } else {
            sentence.add(word);
        }
    }
}

