/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.tools.punktabbreviationdetector;

import edu.northwestern.at.morphadorner.corpuslinguistics.statistics.BigramLogLikelihood;
import edu.northwestern.at.morphadorner.tools.punktabbreviationdetector.PunktToken;
import edu.northwestern.at.morphadorner.tools.punktabbreviationdetector.PunktTokenType;
import edu.northwestern.at.utils.StringUtils;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

public class PunktTokenCounter {
    protected static final int START = 0;
    protected static final int CANDIDATE_1 = 1;
    protected static final int CANDIDATE_2 = 2;
    protected int state;
    protected StringBuilder b;
    protected Map<String, Integer> c;
    protected Set<String> candidates;
    protected int n;
    protected double abbreviationThreshold = 0.3;
    protected boolean ignoreAbbreviationPenalty = false;

    PunktTokenCounter(double abbreviationThreshhold, boolean ignoreAbbreviationPenalty) {
        this.abbreviationThreshold = abbreviationThreshhold;
        this.ignoreAbbreviationPenalty = ignoreAbbreviationPenalty;
        this.state = 0;
        this.b = new StringBuilder();
        this.c = new HashMap<String, Integer>();
        this.candidates = new HashSet<String>();
        this.n = 0;
    }

    PunktTokenCounter() {
        this(0.3, false);
    }

    protected void count(PunktToken t) {
        if (t == null) {
            return;
        }
        switch (this.state) {
            case 0: {
                if (this.isPeriod(t)) {
                    this.inc(".");
                    break;
                }
                if (t.getTokenType() != PunktTokenType.WORD && t.getTokenType() != PunktTokenType.NUMBER) break;
                this.b.append(t.getTokenText());
                this.state = 1;
                break;
            }
            case 1: {
                if (this.isPeriod(t)) {
                    this.b.append(".");
                    this.state = 2;
                    break;
                }
                this.inc(this.b.toString());
                this.b = new StringBuilder();
                this.state = 0;
                break;
            }
            case 2: {
                if (t.getTokenType() == PunktTokenType.WHITESPACE) {
                    this.inc(this.b.toString());
                    this.inc(".");
                    this.b = new StringBuilder();
                    this.state = 0;
                    break;
                }
                if (t.getTokenType() == PunktTokenType.WORD || t.getTokenType() == PunktTokenType.NUMBER) {
                    this.b.append(t.getTokenText());
                    this.state = 1;
                    break;
                }
                this.inc(this.b.toString());
                this.b = new StringBuilder();
                this.state = 0;
            }
        }
    }

    protected void finish() {
        if (this.b.length() > 0) {
            String s = this.b.toString();
            this.inc(s);
            if (s.endsWith(".")) {
                this.inc(".");
            }
            this.b = new StringBuilder();
        }
    }

    protected boolean isPeriod(PunktToken token) {
        return token.getTokenType() == PunktTokenType.NONWORD && token.getTokenText().equals(".");
    }

    protected void inc(String s) {
        Integer count = this.c.get(s);
        if (count == null) {
            this.c.put(s, 1);
        } else {
            this.c.put(s, count + 1);
        }
        ++this.n;
        if (s.length() > 1 && s.endsWith(".") && !Character.isDigit(s.charAt(0))) {
            this.candidates.add(s);
        }
    }

    public int getCount(String tokenString) {
        Integer count = this.c.get(tokenString);
        if (count == null) {
            return 0;
        }
        return count;
    }

    public int getN() {
        return this.n;
    }

    public Set<String> getCandidates() {
        return this.candidates;
    }

    public Set<String> getAbbreviations() {
        HashSet<String> abbreviations = new HashSet<String>();
        for (String candidate : this.candidates) {
            if (!this.isAnAbbreviation(candidate)) continue;
            abbreviations.add(candidate);
        }
        return abbreviations;
    }

    protected boolean isAnAbbreviation(String candidate) {
        double score;
        if (candidate.length() < 2 || !candidate.endsWith(".")) {
            return false;
        }
        String withoutPeriod = candidate.substring(0, candidate.length() - 1);
        int cT = this.getCount(candidate) + this.getCount(withoutPeriod);
        int cTPeriod = this.getCount(candidate);
        double logLambda = BigramLogLikelihood.calculateLogLikelihood(cT, this.getCount("."), cTPeriod, this.getN());
        int nPeriods = StringUtils.countChar(candidate, '.');
        int fPeriods = nPeriods + 1;
        int npc = candidate.length() - nPeriods;
        double fLength = 1.0 / Math.exp(npc);
        double fPenalty = 1.0;
        if (!this.ignoreAbbreviationPenalty) {
            fPenalty = 1.0 / Math.pow(npc, cT - cTPeriod);
        }
        return (score = logLambda * (double)fPeriods * fLength * fPenalty) >= this.abbreviationThreshold;
    }
}

