/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.corpuslinguistics.ngram;

import edu.northwestern.at.utils.MapFactory;
import edu.northwestern.at.utils.StringUtils;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class NGramExtractor {
    int nGramSize = 2;
    int windowSize = 2;
    protected Map<String, Integer> nGramCounts = MapFactory.createNewSortedMap();
    protected int numberOfNGrams = 0;

    public NGramExtractor(int nGramSize, int windowSize) {
        this.nGramSize = nGramSize;
        this.windowSize = windowSize;
    }

    public void addWords(String[] words) {
        this.addWords(Arrays.asList(words));
    }

    public void addWords(List<String> wordList) {
        for (int i = this.nGramSize - 1; i < wordList.size(); ++i) {
            StringBuffer sb = new StringBuffer();
            for (int j = this.nGramSize - 1; j >= 0; --j) {
                if (sb.length() > 0) {
                    sb = sb.append("\t");
                }
                sb = sb.append(wordList.get(i - j));
            }
            String nGramString = sb.toString();
            if (this.nGramCounts.containsKey(nGramString)) {
                int freq = this.nGramCounts.get(nGramString);
                this.nGramCounts.put(nGramString, new Integer(++freq));
                continue;
            }
            this.nGramCounts.put(nGramString, new Integer(1));
        }
    }

    public void mergeNGramExtractor(NGramExtractor extractor) {
        Map<String, Integer> otherMap = extractor.getNGramMap();
        for (String nGramString : otherMap.keySet()) {
            if (this.nGramCounts.containsKey(nGramString)) {
                int freq = this.nGramCounts.get(nGramString);
                this.nGramCounts.put(nGramString, new Integer(++freq));
                continue;
            }
            this.nGramCounts.put(nGramString, new Integer(1));
        }
        this.numberOfNGrams = 0;
        for (String nGramString : otherMap.keySet()) {
            Integer count = this.nGramCounts.get(nGramString);
            this.numberOfNGrams += count.intValue();
        }
    }

    public int getNGramCount(String ngram) {
        int result = 0;
        if (this.nGramCounts.containsKey(ngram)) {
            Integer count = this.nGramCounts.get(ngram);
            result = count;
        }
        return result;
    }

    public String[] getNGrams() {
        int nNGrams = this.nGramCounts.size();
        String[] nGrams = new String[nNGrams];
        Set<String> keyset = this.nGramCounts.keySet();
        Iterator<String> iterator = keyset.iterator();
        for (int i = 0; i < nNGrams; ++i) {
            nGrams[i] = iterator.next();
        }
        return nGrams;
    }

    public Map<String, Integer> getNGramMap() {
        return this.nGramCounts;
    }

    public int getNumberOfNGrams() {
        this.numberOfNGrams = 0;
        for (String nGramString : this.nGramCounts.keySet()) {
            Integer count = this.nGramCounts.get(nGramString);
            this.numberOfNGrams += count.intValue();
        }
        return this.numberOfNGrams;
    }

    public int getNumberOfUniqueNGrams() {
        return this.nGramCounts.size();
    }

    public static String[] splitNGramIntoWords(String ngram) {
        return StringUtils.makeTokenArray(ngram, "\t");
    }
}

