/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer;

import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.RuleBasedBreakIterator;
import edu.northwestern.at.morphadorner.corpuslinguistics.abbreviations.Abbreviations;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.AbstractWordTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.CanSplitAroundPeriods;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.CanTokenizeWhitespace;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.NoopPreTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.WordTokenizer;
import edu.northwestern.at.utils.FileUtils;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.UnicodeReader;
import java.io.Reader;
import java.util.List;
import java.util.Locale;

public class ICU4JBreakIteratorWordTokenizer
extends AbstractWordTokenizer
implements WordTokenizer,
CanTokenizeWhitespace,
CanSplitAroundPeriods {
    protected Locale locale = Locale.US;
    protected boolean storeWhitespaceTokens = false;
    protected boolean mergeWhitespaceTokens = false;
    protected boolean splitAroundPeriods = true;
    protected BreakIterator wordIterator = null;
    protected String wordBreakRulesFileName = "resources/wordbreakrules.txt";

    public ICU4JBreakIteratorWordTokenizer() {
        this.createWordIterator();
    }

    public ICU4JBreakIteratorWordTokenizer(Locale locale) {
        this.locale = locale;
        this.createWordIterator();
    }

    @Override
    public boolean getStoreWhitespaceTokens() {
        return this.storeWhitespaceTokens;
    }

    @Override
    public void setStoreWhitespaceTokens(boolean storeWhitespaceTokens) {
        this.storeWhitespaceTokens = storeWhitespaceTokens;
    }

    @Override
    public boolean getMergeWhitespaceTokens() {
        return this.mergeWhitespaceTokens;
    }

    @Override
    public void setMergeWhitespaceTokens(boolean mergeWhitespaceTokens) {
        this.mergeWhitespaceTokens = mergeWhitespaceTokens;
    }

    @Override
    public boolean getSplitAroundPeriods() {
        return this.splitAroundPeriods;
    }

    @Override
    public void setSplitAroundPeriods(boolean splitAroundPeriods) {
        this.splitAroundPeriods = splitAroundPeriods;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected void createWordIterator() {
        this.preTokenizer = new NoopPreTokenizer();
        String abbrevsPattern = Abbreviations.createAbbreviationsPattern(null);
        Reader reader = null;
        try {
            reader = new UnicodeReader(ICU4JBreakIteratorWordTokenizer.class.getResourceAsStream(this.wordBreakRulesFileName), "utf-8");
            String wordBreakRules = FileUtils.readTextFile(reader);
            reader.close();
            wordBreakRules = StringUtils.replaceAll(wordBreakRules, "%abbreviations%", abbrevsPattern);
            this.wordIterator = new RuleBasedBreakIterator(wordBreakRules);
        }
        catch (Exception e) {
            this.wordIterator = BreakIterator.getWordInstance((Locale)this.locale);
        }
        finally {
            try {
                if (reader != null) {
                    reader.close();
                }
            }
            catch (Exception exception) {}
        }
    }

    @Override
    public List<String> extractWords(String text) {
        List<String> result = ListFactory.createNewList();
        String fixedText = this.preTokenizer.pretokenize(text);
        this.wordIterator.setText(fixedText);
        int start = this.wordIterator.first();
        int end = this.wordIterator.next();
        while (end != -1) {
            String token = fixedText.substring(start, end);
            if (Character.isWhitespace(token.charAt(0))) {
                if (this.storeWhitespaceTokens) {
                    if (this.mergeWhitespaceTokens && result.size() > 1) {
                        String prevToken = result.get(result.size() - 1);
                        if (Character.isWhitespace(prevToken.charAt(0))) {
                            result.set(result.size() - 1, prevToken + token);
                        } else {
                            this.addWordToSentence(result, token);
                        }
                    } else {
                        this.addWordToSentence(result, token);
                    }
                }
            } else if ((token = this.preprocessToken(token, result)).length() > 0) {
                if (this.splitAroundPeriods) {
                    String[] tokens = this.splitToken(token);
                    for (int k = 0; k < tokens.length; ++k) {
                        if (tokens[k].length() <= 0) continue;
                        this.addWordToSentence(result, tokens[k]);
                    }
                } else {
                    this.addWordToSentence(result, token);
                }
            }
            start = end;
            end = this.wordIterator.next();
        }
        return result;
    }
}

