/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer;

import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.PreTokenizer;
import edu.northwestern.at.utils.IsCloseableObject;
import edu.northwestern.at.utils.PatternReplacer;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.logger.DummyLogger;
import edu.northwestern.at.utils.logger.Logger;
import edu.northwestern.at.utils.logger.UsesLogger;

public abstract class AbstractPreTokenizer
extends IsCloseableObject
implements PreTokenizer,
UsesLogger {
    protected static final String periods = "(\\.{3,})";
    protected static final String asterisks = "([\\*]+)";
    protected static final String hyphens = "(-{2,})";
    protected static final String commaSeparator = "(,)([^0-9])";
    protected Logger logger = new DummyLogger();
    protected static final String alwaysSeparators = "((-{2,})|(\\.{3,})|[\\(\\)\\[\\]\\{\\}\";:/=`\u00b6<>\u00a1\u00bf\u00ab\u00bb_\u201c\u201d\u2014\\|\u00a6\u2758[\\p{InGeneralPunctuation}&&[^\u2022\u2032\u2033\u2034\u2018\u2019\u2010\u2011]]\\p{InLetterlikeSymbols}\\p{InMathematicalOperators}\\p{InMiscellaneousTechnical}[\\p{InGeometricShapes}&&[^\u25cf]]\\p{InMiscellaneousSymbols}\\p{InDingbats}\\p{InAlphabeticPresentationForms}])";
    protected static PatternReplacer alwaysSeparatorsReplacer = new PatternReplacer("((-{2,})|(\\.{3,})|[\\(\\)\\[\\]\\{\\}\";:/=`\u00b6<>\u00a1\u00bf\u00ab\u00bb_\u201c\u201d\u2014\\|\u00a6\u2758[\\p{InGeneralPunctuation}&&[^\u2022\u2032\u2033\u2034\u2018\u2019\u2010\u2011]]\\p{InLetterlikeSymbols}\\p{InMathematicalOperators}\\p{InMiscellaneousTechnical}[\\p{InGeometricShapes}&&[^\u25cf]]\\p{InMiscellaneousSymbols}\\p{InDingbats}\\p{InAlphabeticPresentationForms}])", " $1 ");
    protected static PatternReplacer commaSeparatorReplacer = new PatternReplacer("(,)([^0-9])", " $1 $2");

    @Override
    public Logger getLogger() {
        return this.logger;
    }

    @Override
    public void setLogger(Logger logger) {
        this.logger = logger;
    }

    @Override
    public String pretokenize(String line) {
        String result = StringUtils.replaceAll(line, "\t", " ");
        result = alwaysSeparatorsReplacer.replace(result);
        result = commaSeparatorReplacer.replace(result);
        return result;
    }
}

