/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.corpuslinguistics.multiwordunits;

import edu.northwestern.at.morphadorner.corpuslinguistics.ngram.NGramExtractor;
import edu.northwestern.at.morphadorner.corpuslinguistics.statistics.BigramLogLikelihood;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.math.ArithUtils;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

public class MultiwordUnitData {
    protected String mwu;
    protected int mwuCount;
    protected int mwuLength;
    protected String[] words;
    protected int[] wordCounts;
    protected double dice;
    protected double logLikelihood;
    protected double phiSquared;
    protected double scp;
    protected double si;
    protected double sigLogLikelihood;
    protected NGramExtractor[] extractors;
    protected String leftSuccessorPattern;
    protected String rightSuccessorPattern;
    protected int totalWordCount;
    protected Map<String, Integer> wordCountMap;

    public MultiwordUnitData(String mwu, Map<String, Integer> wordCountMap, int totalWordCount, NGramExtractor[] extractors) {
        this.mwu = mwu;
        this.wordCountMap = wordCountMap;
        this.totalWordCount = totalWordCount;
        this.extractors = extractors;
        this.leftSuccessorPattern = "\t" + mwu;
        this.rightSuccessorPattern = mwu + "\t";
        this.words = NGramExtractor.splitNGramIntoWords(mwu);
        this.mwuLength = this.words.length;
        NGramExtractor extractor = extractors[this.words.length - 1];
        this.mwuCount = extractor.getNGramCount(mwu);
        this.calculateAssociationMeasures();
    }

    public String getMWUText() {
        return this.mwu;
    }

    public int getMWUTextCount() {
        return this.mwuCount;
    }

    public int getMWUTextLength() {
        return this.mwuLength;
    }

    public String[] getWords() {
        return this.words;
    }

    public int[] getWordCounts() {
        return this.wordCounts;
    }

    public String leftAntecedent() {
        StringBuffer sb = new StringBuffer(this.words.length * 10);
        for (int i = 0; i < this.words.length - 1; ++i) {
            if (i > 0) {
                sb = sb.append("\t");
            }
            sb.append(this.words[i]);
        }
        return sb.toString();
    }

    public String rightAntecedent() {
        StringBuffer sb = new StringBuffer(this.words.length * 10);
        for (int i = 1; i < this.words.length; ++i) {
            if (i > 1) {
                sb = sb.append("\t");
            }
            sb.append(this.words[i]);
        }
        return sb.toString();
    }

    public String[] successors() {
        NGramExtractor extractor = this.extractors[this.words.length];
        List<String> successorList = ListFactory.createNewList();
        String[] ngramsp1 = extractor.getNGrams();
        for (int i = 0; i < ngramsp1.length; ++i) {
            if (ngramsp1[i].startsWith(this.rightSuccessorPattern)) {
                successorList.add(ngramsp1[i]);
                continue;
            }
            if (!ngramsp1[i].endsWith(this.leftSuccessorPattern)) continue;
            successorList.add(ngramsp1[i]);
        }
        return successorList.toArray(new String[0]);
    }

    public String[] leftSuccessors() {
        NGramExtractor extractor = this.extractors[this.words.length];
        List<String> successorList = ListFactory.createNewList();
        Map<String, Integer> successorMap = extractor.getNGramMap();
        for (String potentialSuccessor : successorMap.keySet()) {
            if (!potentialSuccessor.endsWith(this.leftSuccessorPattern)) continue;
            successorList.add(potentialSuccessor);
        }
        return successorList.toArray(new String[0]);
    }

    public String[] rightSuccessors() {
        String potentialSuccessor;
        NGramExtractor extractor = this.extractors[this.words.length];
        List<String> successorList = ListFactory.createNewList();
        TreeMap successorMap = (TreeMap)extractor.getNGramMap();
        successorMap = new TreeMap(successorMap.tailMap(this.rightSuccessorPattern));
        Iterator iterator = successorMap.keySet().iterator();
        while (iterator.hasNext() && (potentialSuccessor = (String)iterator.next()).startsWith(this.rightSuccessorPattern)) {
            successorList.add(potentialSuccessor);
        }
        return successorList.toArray(new String[0]);
    }

    public double getAvx() {
        NGramExtractor extractor = this.extractors[this.words.length - 1];
        double avx = this.getWordCount(this.words[0]);
        for (int i = 1; i <= this.words.length - 2; ++i) {
            StringBuffer sb = new StringBuffer(this.words.length * 10);
            int k = -1;
            for (int j = 0; j <= i; ++j) {
                if (j > 0) {
                    sb = sb.append("\t");
                }
                sb = sb.append(this.words[j]);
                ++k;
            }
            NGramExtractor extractormi = this.extractors[k];
            avx += (double)extractormi.getNGramCount(sb.toString());
        }
        return avx / (double)(this.words.length - 1);
    }

    public double getAvy() {
        NGramExtractor extractor = this.extractors[this.words.length - 1];
        double avy = this.getWordCount(this.words[this.words.length - 1]);
        int k = this.words.length - 2;
        for (int i = 1; i < this.words.length - 1; ++i) {
            StringBuffer sb = new StringBuffer(this.words.length * 10);
            for (int j = i; j < this.words.length; ++j) {
                if (sb.length() > 0) {
                    sb = sb.append("\t");
                }
                sb = sb.append(this.words[j]);
            }
            NGramExtractor extractormi = this.extractors[k--];
            avy += (double)extractormi.getNGramCount(sb.toString());
        }
        return avy / (double)(this.words.length - 1);
    }

    protected double getAvp() {
        int n = this.words.length;
        double avp = 0.0;
        for (int i = 0; i < n - 1; ++i) {
            avp += this.prob(this.words, 0, i) * this.prob(this.words, i + 1, n - 1);
        }
        return avp / (double)(n - 1);
    }

    protected double getAvp2() {
        int n = this.words.length;
        double avp2 = 0.0;
        for (int i = 0; i < n - 1; ++i) {
            avp2 += this.freq(this.words, 0, i) * this.freq(this.words, i + 1, n - 1);
        }
        return avp2 / (double)(n - 1);
    }

    public void calculateAssociationMeasures() {
        double avx = this.getAvx();
        double avy = this.getAvy();
        double avp = this.getAvp();
        double avp2 = this.getAvp2();
        this.logLikelihood = BigramLogLikelihood.calculateLogLikelihood(avx, avy, this.mwuCount, this.totalWordCount);
        this.dice = 2.0 * this.freq(this.words, 0, this.words.length - 1) / (avx + avy);
        double probWords = this.prob(this.words, 0, this.words.length - 1);
        double scpValue = probWords * probWords / avp;
        this.scp = Math.max(Math.min(scpValue, 1.0), 0.0);
        this.si = ArithUtils.log2(probWords / avp);
        this.phiSquared = 0.0;
        double freq = this.mwuCount;
        double n = this.totalWordCount;
        double numerator = freq * n - avp2;
        numerator *= numerator;
        double denominator = avp2 * (n - avx) * (n - avy);
        if (denominator != 0.0) {
            this.phiSquared = numerator / denominator;
        }
    }

    public double prob(String[] words, int i1, int i2) {
        StringBuffer sb = new StringBuffer(words.length * 10);
        int k = 0;
        for (int i = i1; i <= i2; ++i) {
            if (sb.length() > 0) {
                sb.append("\t");
            }
            sb.append(words[i]);
            ++k;
        }
        double result = k == 1 ? (double)this.getWordCount(sb.toString()) / (double)this.totalWordCount : (double)this.extractors[k - 1].getNGramCount(sb.toString()) / (double)this.extractors[k - 1].getNumberOfNGrams();
        return result;
    }

    public double freq(String[] words, int i1, int i2) {
        StringBuffer sb = new StringBuffer(words.length * 10);
        int k = 0;
        for (int i = i1; i <= i2; ++i) {
            if (sb.length() > 0) {
                sb.append("\t");
            }
            sb.append(words[i]);
            ++k;
        }
        double result = k == 1 ? (double)this.getWordCount(sb.toString()) : (double)this.extractors[k - 1].getNGramCount(sb.toString());
        return result;
    }

    public double getDice() {
        return this.dice;
    }

    public double getLogLikelihood() {
        return this.logLikelihood;
    }

    public double getPhiSquared() {
        return this.phiSquared;
    }

    public double getSCP() {
        return this.scp;
    }

    public double getSI() {
        return this.si;
    }

    public double getSigLogLikelihood() {
        return this.sigLogLikelihood;
    }

    public int getWordCount(String word) {
        int result = 0;
        if (this.wordCountMap.containsKey(word)) {
            result = this.wordCountMap.get(word);
        }
        return result;
    }

    public String toString() {
        return this.mwu.replaceAll("\t", " ");
    }
}

