/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.tools.tcp;

import edu.northwestern.at.morphadorner.corpuslinguistics.namerecognizer.Names;
import edu.northwestern.at.utils.CountMapUtils;
import edu.northwestern.at.utils.Formatters;
import edu.northwestern.at.utils.MapFactory;
import edu.northwestern.at.utils.MapUtils;
import edu.northwestern.at.utils.SetUtils;
import edu.northwestern.at.utils.StringUtils;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.Map;
import java.util.Set;

public class FindSoftHyphens {
    protected static PrintStream printStream;
    protected static String dividedWordsFileName;
    protected static String wordCountsFileName;
    protected static String standardSpellingsFileName;
    protected static String fixedWordsFileName;
    protected static Set<String> standardSpellings;
    protected static Set<String> dividedWords;
    protected static Map<String, Number> wordCounts;

    public static void main(String[] args) {
        try {
            if (!FindSoftHyphens.initialize(args)) {
                System.exit(1);
            }
            long startTime = System.currentTimeMillis();
            Map<String, String> correctedSpellings = FindSoftHyphens.processWords();
            MapUtils.saveMap(correctedSpellings, fixedWordsFileName, "\t", "", "utf-8");
            long processingTime = (System.currentTimeMillis() - startTime + 999L) / 1000L;
            FindSoftHyphens.terminate(correctedSpellings.size(), processingTime);
        }
        catch (Exception e) {
            e.printStackTrace();
            System.out.println(e.getMessage());
        }
    }

    protected static boolean initialize(String[] args) throws Exception {
        if (args.length < 4) {
            System.err.println("Not enough parameters.");
            return false;
        }
        printStream = new PrintStream((OutputStream)new BufferedOutputStream(System.out), true, "utf-8");
        dividedWordsFileName = args[0];
        wordCountsFileName = args[1];
        standardSpellingsFileName = args[2];
        fixedWordsFileName = args[3];
        dividedWords = SetUtils.loadSortedSet(dividedWordsFileName, "utf-8");
        System.err.println("Loaded " + Formatters.formatIntegerWithCommas(dividedWords.size()) + " divided words.");
        wordCounts = CountMapUtils.loadCountMapFromFile(new File(wordCountsFileName), "utf-8");
        System.err.println("Loaded " + Formatters.formatIntegerWithCommas(wordCounts.size()) + " word counts.");
        standardSpellings = SetUtils.loadSortedSet(standardSpellingsFileName, "utf-8");
        System.err.println("Loaded " + Formatters.formatIntegerWithCommas(standardSpellings.size()) + " standard spellings.");
        return true;
    }

    protected static Map<String, String> processWords() {
        Map<String, String> correctedSpellings = MapFactory.createNewSortedMap();
        Names names = new Names();
        for (String token : dividedWords) {
            String unhyphenated = StringUtils.replaceAll(token, "|", "");
            String unhyphenatedLower = unhyphenated.toLowerCase();
            String hyphenated = StringUtils.replaceAll(token, "|", "-");
            String hyphenatedLower = hyphenated.toLowerCase();
            String correctedSpelling = unhyphenated;
            int unhyphenatedCount = FindSoftHyphens.getWordCount(unhyphenated) + FindSoftHyphens.getWordCount(unhyphenatedLower);
            int hyphenatedCount = FindSoftHyphens.getWordCount(hyphenated) + FindSoftHyphens.getWordCount(hyphenatedLower);
            if (unhyphenatedCount == 0) {
                if (hyphenatedCount == 0) {
                    if (standardSpellings.contains(unhyphenated)) {
                        correctedSpelling = unhyphenated;
                    } else if (standardSpellings.contains(unhyphenatedLower)) {
                        correctedSpelling = unhyphenated;
                    } else if (standardSpellings.contains(hyphenated)) {
                        correctedSpelling = hyphenated;
                    } else if (standardSpellings.contains(hyphenatedLower)) {
                        correctedSpelling = hyphenated;
                    } else if (names.isNameOrPlace(unhyphenated)) {
                        correctedSpelling = unhyphenated;
                    } else if (names.isNameOrPlace(hyphenated)) {
                        correctedSpelling = hyphenated;
                    }
                } else {
                    correctedSpelling = hyphenated;
                }
            } else {
                correctedSpelling = hyphenatedCount == 0 ? unhyphenated : (unhyphenatedCount > hyphenatedCount ? unhyphenated : hyphenated);
            }
            printStream.print(token);
            printStream.print("\t");
            printStream.print(correctedSpelling);
            printStream.print("\t");
            printStream.print(unhyphenatedCount);
            printStream.print("\t");
            printStream.print(hyphenatedCount);
            printStream.println();
            correctedSpellings.put(token, correctedSpelling);
        }
        return correctedSpellings;
    }

    protected static int getWordCount(String word) {
        int result = 0;
        Number count = wordCounts.get(word);
        if (count != null) {
            result = count.intValue();
        }
        return result;
    }

    protected static void terminate(int wordsProcessed, long processingTime) {
        System.err.println("Processed " + Formatters.formatIntegerWithCommas(wordsProcessed) + " words in " + Formatters.formatLongWithCommas(processingTime) + " seconds.");
    }
}

