/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer;

import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.AbstractWordTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.WordTokenizer;
import edu.northwestern.at.utils.ListFactory;
import java.util.List;
import java.util.StringTokenizer;

public class ContractionTokenizer
extends AbstractWordTokenizer
implements WordTokenizer {
    public static String prepareTextForTokenization(String str) {
        str = str.replaceAll("([^'])' ", "$1 ' ");
        str = str.replaceAll("'([sSmMdD]) ", " '$1 ");
        str = str.replaceAll("'ll ", " 'll ");
        str = str.replaceAll("'re ", " 're ");
        str = str.replaceAll("'ve ", " 've ");
        str = str.replaceAll("n't ", " n't ");
        str = str.replaceAll("'t ", " 't ");
        str = str.replaceAll("'s ", " 's ");
        str = str.replaceAll("'LL ", " 'LL ");
        str = str.replaceAll("'RE ", " 'RE ");
        str = str.replaceAll("'VE ", " 'VE ");
        str = str.replaceAll("N'T ", " N'T ");
        str = str.replaceAll("'T ", " 'T ");
        str = str.replaceAll("'S ", " 'S ");
        str = str.replaceAll(" ([Cc])annot ", " $1an not ");
        str = str.replaceAll(" ([Dd])'ye ", " $1' ye ");
        str = str.replaceAll(" ([Gg])imme ", " $1im me ");
        str = str.replaceAll(" ([Gg])onna ", " $1on na ");
        str = str.replaceAll(" ([Gg])otta ", " $1ot ta ");
        str = str.replaceAll(" ([Ll])emme ", " $1em me ");
        str = str.replaceAll(" ([Mm])ore'n ", " $1ore 'n ");
        str = str.replaceAll(" '([Tt])is ", " '$1 is ");
        str = str.replaceAll(" '([Tt])was ", " '$1 was ");
        str = str.replaceAll(" ([Ww])anna ", " $1an na ");
        str = str.trim();
        return str;
    }

    @Override
    public List<String> extractWords(String text) {
        List<String> result = ListFactory.createNewList();
        String fixedText = ContractionTokenizer.prepareTextForTokenization(text);
        StringTokenizer tokenizer = new StringTokenizer(fixedText);
        while (tokenizer.hasMoreTokens()) {
            result.add(tokenizer.nextToken());
        }
        return result;
    }
}

