/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.tools.createlexicon;

import edu.northwestern.at.morphadorner.corpuslinguistics.lexicon.BaseLexicon;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.Formatters;
import edu.northwestern.at.utils.MutableInteger;
import edu.northwestern.at.utils.UnicodeReader;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Map;

public class CreateLexicon {
    protected static String trainingDataFileName;
    protected static String wordLexiconFileName;
    protected static String suffixLexiconFileName;
    protected static int maxSuffixCount;
    protected static int maxSuffixLength;
    protected static int minSuffixLength;

    private static void generateLexicon() throws IOException {
        long startTime = System.currentTimeMillis();
        System.out.println("Reading training data from " + trainingDataFileName + " .");
        BufferedReader reader = new BufferedReader(new UnicodeReader(new FileInputStream(trainingDataFileName), "utf-8"));
        BaseLexicon wordLexicon = new BaseLexicon();
        BaseLexicon suffixLexicon = new BaseLexicon();
        int linesRead = 0;
        int badLinesRead = 0;
        String line = reader.readLine();
        while (line != null) {
            if ((line = line.trim()).length() > 0) {
                String[] tokens = line.split("\t");
                for (int i = 0; i < tokens.length; ++i) {
                    tokens[i] = tokens[i].trim();
                }
                String spelling = "";
                String pos = "";
                String lemma = "";
                String standard = "";
                switch (tokens.length) {
                    case 1: {
                        spelling = tokens[0];
                        pos = tokens[0];
                        lemma = tokens[0];
                        break;
                    }
                    case 2: {
                        spelling = tokens[0];
                        pos = tokens[1];
                        break;
                    }
                    case 3: {
                        spelling = tokens[0];
                        pos = tokens[1];
                        lemma = tokens[2];
                        break;
                    }
                    case 4: {
                        spelling = tokens[0];
                        pos = tokens[1];
                        lemma = tokens[2];
                        standard = tokens[3];
                        break;
                    }
                }
                if (spelling.length() > 0 && pos.length() > 0) {
                    boolean isPunc;
                    boolean bl = isPunc = CharUtils.isPunctuationOrSymbol(spelling) && !spelling.equals("&");
                    if (isPunc) {
                        pos = spelling;
                        lemma = spelling;
                        standard = "";
                    } else if (spelling.equals("&")) {
                        standard = "";
                    }
                    wordLexicon.updateEntryCount(spelling, pos, lemma, 1);
                    if (standard.length() > 0 && standard.indexOf(" ") < 0) {
                        wordLexicon.updateEntryCount(standard, pos, lemma, 1);
                    }
                } else {
                    System.out.println("   Skipping bad input line <" + line + ">");
                    ++badLinesRead;
                }
            }
            ++linesRead;
            line = reader.readLine();
        }
        reader.close();
        System.out.println("Processed " + Formatters.formatIntegerWithCommas(linesRead) + " input lines.");
        if (badLinesRead > 0) {
            System.out.println("Skipped " + Formatters.formatIntegerWithCommas(badLinesRead) + " badly formed input lines.");
        }
        String[] entries = wordLexicon.getEntries();
        for (int i = 0; i < entries.length; ++i) {
            boolean hasGap;
            String entry = entries[i];
            int entryCount = wordLexicon.getEntryCount(entry);
            boolean bl = hasGap = entry.indexOf("\u25cf") >= 0;
            if (hasGap || entryCount > maxSuffixCount) continue;
            String lowerCaseEntry = entry.toLowerCase();
            int l = lowerCaseEntry.length();
            Map<String, MutableInteger> categoryCounts = wordLexicon.getCategoryCountsForEntry(entry);
            for (String categoryName : categoryCounts.keySet()) {
                int categoryCount = categoryCounts.get(categoryName).intValue();
                for (int j = maxSuffixLength; j > minSuffixLength - 1; --j) {
                    if (lowerCaseEntry.length() <= j) continue;
                    suffixLexicon.updateEntryCount(lowerCaseEntry.substring(l - j, l), categoryName, "*", categoryCount);
                }
            }
        }
        System.out.println("Writing word lexicon to " + wordLexiconFileName + " .");
        wordLexicon.saveLexiconToTextFile(wordLexiconFileName, "utf-8");
        System.out.println("Writing suffix lexicon to " + suffixLexiconFileName + " .");
        System.out.println("   Maximum suffix length is " + maxSuffixLength + " .");
        if (maxSuffixCount == Integer.MAX_VALUE) {
            System.out.println("   Suffixes generated from all spellings.");
        } else {
            System.out.println("   Suffixes generated from spellings appearing no more than " + Formatters.formatIntegerWithCommas(maxSuffixCount) + " time" + (maxSuffixCount == 1 ? "" : "s") + ".");
        }
        suffixLexicon.saveLexiconToTextFile(suffixLexiconFileName, "utf-8");
        long endTime = (System.currentTimeMillis() - startTime + 999L) / 1000L;
        System.out.println("Lexicons generated in " + Formatters.formatLongWithCommas(endTime) + " seconds.");
        System.out.println("Word lexicon contains " + Formatters.formatIntegerWithCommas(wordLexicon.getLexiconSize()) + " entries.");
        System.out.println("Suffix lexicon contains " + Formatters.formatIntegerWithCommas(suffixLexicon.getLexiconSize()) + " entries.");
        wordLexicon.close();
        suffixLexicon.close();
    }

    protected static void help() {
        System.out.println("Usage: ");
        System.out.println("");
        System.out.println("java -Xmx512m edu.northwestern.at.createlexicon.CreateLexicon trainingdata");
        System.out.println("   outputwordlexicon outputsuffixlexicon maxsuffixlength maxsuffixcount");
        System.out.println("");
        System.out.println("-- training data contains input training data in utf-8 encoding (required).");
        System.out.println("-- outputwordlexicon receives output word lexicon (required).");
        System.out.println("-- outputsuffixlexicon receives output suffix lexicon (required).");
        System.out.println("-- maxsuffixlength is maximum length suffix to generate (optional, default is 6).");
        System.out.println("-- maxsuffixcount is maximum count for spelling to include in suffix lexicon (optional, default is no maximum).");
    }

    protected static boolean initialize(String[] args) {
        boolean result = true;
        if (args.length < 3) {
            result = false;
            CreateLexicon.help();
        } else {
            trainingDataFileName = args[0];
            wordLexiconFileName = args[1];
            suffixLexiconFileName = args[2];
            if (args.length > 3) {
                try {
                    maxSuffixLength = Integer.parseInt(args[3]);
                }
                catch (Exception e) {
                    result = false;
                    System.out.println("Bad maximum suffix length.");
                }
            }
            if (args.length > 4) {
                try {
                    maxSuffixCount = Integer.parseInt(args[4]);
                }
                catch (Exception e) {
                    result = false;
                    System.out.println("Bad maximum suffix count.");
                }
            }
        }
        return result;
    }

    public static void main(String[] args) {
        int returnCode = 0;
        if (CreateLexicon.initialize(args)) {
            try {
                CreateLexicon.generateLexicon();
            }
            catch (Exception e) {
                e.printStackTrace();
                returnCode = 1;
            }
        }
        if (returnCode != 0) {
            System.exit(returnCode);
        }
    }

    static {
        maxSuffixCount = Integer.MAX_VALUE;
        maxSuffixLength = 6;
        minSuffixLength = 1;
    }
}

