/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.tools.tcp;

import edu.northwestern.at.morphadorner.tools.compareadornedfiles.AdornedWordData;
import edu.northwestern.at.morphadorner.tools.compareadornedfiles.AdornedWordsLoader;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.CountMapUtils;
import edu.northwestern.at.utils.FileNameUtils;
import edu.northwestern.at.utils.Formatters;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.MapFactory;
import edu.northwestern.at.utils.MapUtils;
import edu.northwestern.at.utils.SetFactory;
import edu.northwestern.at.utils.SetUtils;
import edu.northwestern.at.utils.StringUtils;
import java.io.PrintStream;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class CountDividedWords {
    protected static final int INITPARAMS = 2;
    protected static int filesToProcess = 0;
    protected static int currentFileNumber = 0;
    protected static int totalWords = 0;
    protected static String dividedWordsFileName = null;
    protected static String wordsAndCountsFileName = null;
    protected static PrintStream printStream;
    protected static Map<String, Number> wordsAndCounts;
    protected static Set<String> dividedWords;
    protected static Matcher partWordMatcher;

    public static void main(String[] args) {
        int filesProcessed = 0;
        long processingTime = 0L;
        try {
            if (!CountDividedWords.initialize(args)) {
                System.exit(1);
            }
            long startTime = System.currentTimeMillis();
            filesProcessed = CountDividedWords.processFiles(args);
            processingTime = (System.currentTimeMillis() - startTime + 999L) / 1000L;
        }
        catch (Exception e) {
            e.printStackTrace();
            System.exit(1);
        }
        CountDividedWords.terminate(filesProcessed, processingTime);
    }

    protected static boolean initialize(String[] args) throws Exception {
        if (args.length < 3) {
            System.err.println("Not enough parameters.");
            return false;
        }
        dividedWordsFileName = args[0];
        wordsAndCountsFileName = args[1];
        return true;
    }

    protected static void processOneFile(String xmlFileName) {
        System.out.println("Processing " + xmlFileName + " (" + ++currentFileNumber + "/" + filesToProcess + ")");
        AdornedWordsLoader xmlReader = null;
        try {
            xmlReader = new AdornedWordsLoader(xmlFileName);
        }
        catch (Exception e) {
            e.printStackTrace();
            System.out.println("   *** Processing of " + xmlFileName + " failed.");
            return;
        }
        List<String> idList = xmlReader.getAdornedWordIDs();
        totalWords += idList.size();
        for (int wordOrd = 0; wordOrd < idList.size(); ++wordOrd) {
            String wordText;
            String id = idList.get(wordOrd);
            AdornedWordData w = xmlReader.getAdornedWordData(id);
            if (!CountDividedWords.isFirstWordPart(id) || CharUtils.isPunctuationOrSymbol(wordText = CountDividedWords.getWordText(xmlReader, id)) || id.endsWith("-eos")) continue;
            String token = StringUtils.replaceAll(wordText, "\u2011", "|");
            token = StringUtils.replaceAll(token, "\u2011\u2011", "|");
            if ((token = StringUtils.replaceAll(token, "||", "|")).length() == 0) {
                System.out.println("   Empty word at " + id);
            }
            CountMapUtils.updateWordCountMap(token, 1, wordsAndCounts);
            if (token.indexOf("|") < 0) continue;
            dividedWords.add(token);
        }
    }

    protected static boolean isFirstWordPart(String wordID) {
        boolean result = true;
        partWordMatcher.reset(wordID);
        if (partWordMatcher.find()) {
            String wordPart = partWordMatcher.group(1);
            result = wordPart.equals("1");
        }
        return result;
    }

    protected static String getWordText(AdornedWordsLoader adornedWordsLoader, String wordID) {
        List<String> wordPartIDs = CountDividedWords.getWordPartIDs(adornedWordsLoader, wordID);
        String result = "";
        for (int i = 0; i < wordPartIDs.size(); ++i) {
            result = result + adornedWordsLoader.getAdornedWordData(wordPartIDs.get(i)).getWordText();
        }
        return result;
    }

    protected static List<String> getWordPartIDs(AdornedWordsLoader adornedWordsLoader, String wordID) {
        List<String> result = ListFactory.createNewList();
        partWordMatcher.reset(wordID);
        if (partWordMatcher.find()) {
            String wordPart;
            int dotIndex = wordID.lastIndexOf(".");
            String wordIDBase = wordID.substring(0, dotIndex);
            for (int i = 1; i < 101 && adornedWordsLoader.getAdornedWordData(wordPart = wordIDBase + "." + i) != null; ++i) {
                result.add(wordPart);
            }
        } else {
            result.add(wordID);
        }
        return result;
    }

    protected static int processFiles(String[] args) throws Exception {
        boolean result = false;
        String[] wildCards = new String[args.length - 2];
        for (int i = 2; i < args.length; ++i) {
            wildCards[i - 2] = args[i];
        }
        String[] fileNames = FileNameUtils.expandFileNameWildcards(wildCards);
        filesToProcess = fileNames.length;
        for (int i = 0; i < fileNames.length; ++i) {
            CountDividedWords.processOneFile(fileNames[i]);
        }
        SetUtils.saveSortedSet(dividedWords, dividedWordsFileName, "utf-8");
        MapUtils.saveSortedMap(wordsAndCounts, wordsAndCountsFileName, "\t", "", "utf-8");
        return fileNames.length;
    }

    protected static void terminate(int filesProcessed, long processingTime) {
        System.out.println("Processed " + Formatters.formatLongWithCommas(totalWords) + StringUtils.pluralize(totalWords, " word in ", " words in ") + Formatters.formatIntegerWithCommas(filesProcessed) + StringUtils.pluralize(filesProcessed, " file in ", " files in ") + Formatters.formatLongWithCommas(processingTime) + StringUtils.pluralize(processingTime, " second.", " seconds."));
    }

    static {
        wordsAndCounts = MapFactory.createNewMap();
        dividedWords = SetFactory.createNewSet();
        partWordMatcher = Pattern.compile("\\.(\\d)$").matcher("");
    }
}

