/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer;

import edu.northwestern.at.morphadorner.corpuslinguistics.abbreviations.Abbreviations;
import edu.northwestern.at.morphadorner.corpuslinguistics.apostokens.AposTokens;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.DefaultWordTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.PreTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.PreTokenizerFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.WordTokenizer;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.IsCloseable;
import edu.northwestern.at.utils.IsCloseableObject;
import edu.northwestern.at.utils.RomanNumeralUtils;
import edu.northwestern.at.utils.SetUtils;
import edu.northwestern.at.utils.SingleTagTaggedStrings;
import edu.northwestern.at.utils.TaggedStrings;
import edu.northwestern.at.utils.logger.DummyLogger;
import edu.northwestern.at.utils.logger.Logger;
import edu.northwestern.at.utils.logger.UsesLogger;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public abstract class AbstractWordTokenizer
extends IsCloseableObject
implements WordTokenizer,
IsCloseable,
UsesLogger {
    protected PreTokenizer preTokenizer = PreTokenizerFactory.newPreTokenizer();
    protected TaggedStrings contractions;
    protected String contractionsURL = "resources/contractions.txt";
    protected Logger logger;
    protected Abbreviations abbreviations;
    protected AposTokens aposTokens;
    protected boolean coalesceHyphens = false;
    protected boolean coalesceAsterisks = true;
    protected boolean apostropheCanBeQuote = true;
    protected static final Pattern hyphensPattern = Pattern.compile("^([-\u2011]{2,})$");
    protected static final Matcher hyphensMatcher = hyphensPattern.matcher("");

    public AbstractWordTokenizer() {
        this.loadContractions();
        this.logger = new DummyLogger();
        this.abbreviations = new Abbreviations();
        this.aposTokens = new AposTokens();
    }

    @Override
    public Logger getLogger() {
        return this.logger;
    }

    @Override
    public void setLogger(Logger logger) {
        this.logger = logger;
    }

    @Override
    public void setAbbreviations(Abbreviations abbreviations) {
        this.abbreviations = abbreviations;
    }

    @Override
    public void setAposTokens(AposTokens aposTokens) {
        this.aposTokens = aposTokens;
    }

    @Override
    public PreTokenizer getPreTokenizer() {
        return this.preTokenizer;
    }

    @Override
    public void setPreTokenizer(PreTokenizer preTokenizer) {
        this.preTokenizer = preTokenizer;
    }

    protected void loadContractions() {
        try {
            Set<String> contractionsSet = SetUtils.loadSet(DefaultWordTokenizer.class.getResource(this.contractionsURL), "utf-8");
            this.contractions = new SingleTagTaggedStrings(contractionsSet.toArray(new String[contractionsSet.size()]), "1");
        }
        catch (Exception exception) {
            // empty catch block
        }
    }

    @Override
    public String preprocessToken(String token, List<String> tokenList) {
        return token;
    }

    public boolean isSingleOpeningQuote(char ch) {
        return ch == '\u2018' || ch == '\'' && this.apostropheCanBeQuote;
    }

    protected boolean isLetterOrSingleQuote(char ch) {
        return CharUtils.isLetter(ch) || ch == '\u2018' || ch == '\'';
    }

    protected boolean isClosingQuote(char ch) {
        return ch == '\u2019' || ch == '\u201d' || ch == '\'' && this.apostropheCanBeQuote;
    }

    protected String[] splitToken(String token) {
        String[] result = new String[]{token};
        int iPos = token.indexOf(".");
        if (!(iPos < 0 || CharUtils.isCurrency(token) || CharUtils.isAllPeriods(token) || token.endsWith(".") || this.abbreviations.isKnownAbbreviation(token) || CharUtils.isNumber(token) || RomanNumeralUtils.isLooseRomanNumeral(token))) {
            String token1 = token.substring(0, iPos + 1);
            String token2 = token.substring(iPos + 1);
            if (this.abbreviations.isKnownAbbreviation(token1)) {
                if (!token2.equals("'s") && !token2.equals("'S'")) {
                    result = new String[]{token1, token2};
                }
            } else if (!token2.equals("'s") && !token2.equals("'S'")) {
                result = new String[]{token1.substring(0, token1.length() - 1), ".", token2};
            }
        }
        return result;
    }

    @Override
    public void addWordToSentence(List<String> sentence, String word) {
        sentence.add(word);
    }

    @Override
    public int[] findWordOffsets(String sentenceText, List<?> words) {
        int wordCount = words.size();
        int[] result = new int[wordCount + 1];
        int sentenceTextLength = sentenceText.length();
        int offset = 0;
        for (int i = 0; i < wordCount; ++i) {
            String word = words.get(i).toString();
            while (CharUtils.isWhitespace(sentenceText.charAt(offset))) {
                ++offset;
            }
            result[i] = offset;
            int nbCount = word.length();
            int tNbCount = 0;
            while (tNbCount < nbCount) {
                if (!CharUtils.isWhitespace(sentenceText.charAt(offset))) {
                    ++tNbCount;
                }
                ++offset;
            }
        }
        result[wordCount] = sentenceText.length();
        return result;
    }

    public boolean isMultipleHyphens(String s) {
        hyphensMatcher.reset(s);
        return hyphensMatcher.matches();
    }

    @Override
    public abstract List<String> extractWords(String var1);
}

