/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.tools.findteitextlanguage;

import edu.northwestern.at.morphadorner.corpuslinguistics.languagerecognizer.DefaultLanguageRecognizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.languagerecognizer.LanguageRecognizer;
import edu.northwestern.at.utils.Compare;
import edu.northwestern.at.utils.FileNameUtils;
import edu.northwestern.at.utils.Formatters;
import edu.northwestern.at.utils.ScoredString;
import edu.northwestern.at.utils.SetFactory;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.xml.TEITextExtractorHandler;
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.Set;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.helpers.DefaultHandler;

public class FindTEITextLanguage {
    protected static LanguageRecognizer recognizer = null;
    protected static final int INITPARAMS = 1;
    protected static Set<DocData> outputSet = SetFactory.createNewSortedSet();
    protected static SAXParserFactory parserFactory;
    protected static int filesToProcess;
    protected static int currentFileNumber;
    protected static int longestTitle;

    public static void main(String[] args) {
        if (!FindTEITextLanguage.initialize(args)) {
            System.exit(1);
        }
        long startTime = System.currentTimeMillis();
        int filesProcessed = FindTEITextLanguage.processFiles(args);
        try {
            FindTEITextLanguage.printResults();
        }
        catch (Exception e) {
            System.err.println("Unable to print results.");
        }
        try {
            FindTEITextLanguage.outputResults(args[0]);
        }
        catch (Exception e) {
            System.err.println("Unable to output results to " + args[0]);
        }
        long processingTime = (System.currentTimeMillis() - startTime + 999L) / 1000L;
        FindTEITextLanguage.terminate(filesProcessed, processingTime);
    }

    protected static boolean initialize(String[] args) {
        if (args.length < 2) {
            System.err.println("Not enough parameters.");
            return false;
        }
        parserFactory = SAXParserFactory.newInstance();
        try {
            recognizer = new DefaultLanguageRecognizer();
        }
        catch (Exception e) {
            System.err.println("Unable to create language recognizer.");
            return false;
        }
        return true;
    }

    protected static void processOneFile(String xmlFileName) {
        System.err.println("Processing " + xmlFileName + " (" + ++currentFileNumber + "/" + filesToProcess + ")");
        try {
            SAXParser saxParser = parserFactory.newSAXParser();
            TEITextExtractorHandler handler = new TEITextExtractorHandler();
            saxParser.parse(xmlFileName, (DefaultHandler)handler);
            String docText = handler.getExtractedText().replaceAll("(\\s+)", " ");
            ScoredString[] languages = recognizer.recognizeLanguage(docText);
            String docTitle = FileNameUtils.stripPathName(xmlFileName);
            DocData docData = new DocData(xmlFileName, docTitle, docText.length(), languages);
            outputSet.add(docData);
            longestTitle = Math.max(longestTitle, docTitle.length());
        }
        catch (Exception e) {
            e.printStackTrace();
            System.err.println("   *** " + xmlFileName + " failed");
        }
    }

    protected static int processFiles(String[] args) {
        boolean result = false;
        String[] wildCards = new String[args.length - 1];
        for (int i = 1; i < args.length; ++i) {
            wildCards[i - 1] = args[i];
        }
        String[] fileNames = FileNameUtils.expandFileNameWildcards(wildCards);
        filesToProcess = fileNames.length;
        for (int i = 0; i < fileNames.length; ++i) {
            FindTEITextLanguage.processOneFile(fileNames[i]);
        }
        return fileNames.length;
    }

    protected static void printResults() throws Exception {
        PrintStream printStream = new PrintStream((OutputStream)new BufferedOutputStream(System.out), true, "utf-8");
        for (DocData docData : outputSet) {
            printStream.print(docData.docTitle);
            printStream.print(StringUtils.dupl(" ", longestTitle - docData.docTitle.length() + 4));
            String docLength = Formatters.formatIntegerWithCommas(docData.docLength);
            docLength = StringUtils.lpad(docLength, 9);
            printStream.print(docLength);
            printStream.print(" ");
            for (int i = 0; i < docData.docLanguages.length; ++i) {
                ScoredString langAndScore = docData.docLanguages[i];
                String scoreString = langAndScore.getString();
                printStream.print(scoreString);
                printStream.print(StringUtils.dupl(" ", 8 - scoreString.length()));
                scoreString = Formatters.formatDouble(langAndScore.getScore(), 4);
                printStream.print(scoreString);
                printStream.print(StringUtils.dupl(" ", 8 - scoreString.length()));
            }
            printStream.println();
        }
    }

    protected static void outputResults(String outputFileName) throws Exception {
        PrintStream printStream = new PrintStream((OutputStream)new BufferedOutputStream(new FileOutputStream(outputFileName)), true, "utf-8");
        for (DocData docData : outputSet) {
            printStream.print(docData.docTitle);
            printStream.print("\t");
            printStream.print(docData.docLength);
            for (int i = 0; i < docData.docLanguages.length; ++i) {
                ScoredString langAndScore = docData.docLanguages[i];
                String scoreString = langAndScore.getString();
                printStream.print("\t");
                printStream.print(scoreString);
                scoreString = Formatters.formatDouble(langAndScore.getScore(), 4);
                printStream.print("\t");
                printStream.print(scoreString);
            }
            printStream.println();
        }
        printStream.close();
    }

    protected static void terminate(int filesProcessed, long processingTime) {
        System.err.println("Processed " + Formatters.formatIntegerWithCommas(filesProcessed) + " files in " + Formatters.formatLongWithCommas(processingTime) + " seconds.");
    }

    static {
        filesToProcess = 0;
        currentFileNumber = 0;
        longestTitle = 0;
    }

    public static class DocData
    implements Comparable {
        public String docFileName;
        public String docTitle;
        public int docLength;
        public ScoredString[] docLanguages;

        public DocData(String docFileName, String docTitle, int docLength, ScoredString[] docLanguages) {
            this.docFileName = docFileName;
            this.docTitle = docTitle;
            this.docLength = docLength;
            ScoredString[] langs = new ScoredString[3];
            if (docLanguages != null) {
                for (int i = 0; i < 3; ++i) {
                    langs[i] = i < docLanguages.length ? docLanguages[i] : new ScoredString("", 0.0);
                }
            } else {
                for (int i = 0; i < 3; ++i) {
                    langs[i] = new ScoredString("", 0.0);
                }
            }
            this.docLanguages = langs;
        }

        public int compareTo(Object object) {
            int result = Integer.MIN_VALUE;
            if (object != null && object instanceof DocData) {
                DocData otherDoc = (DocData)object;
                for (int i = 0; i < this.docLanguages.length && (result = this.docLanguages[i].compareTo(otherDoc.docLanguages[i])) == 0; ++i) {
                }
                if (result == 0) {
                    result = Compare.compare(this.docFileName, otherDoc.docFileName);
                }
            }
            return -result;
        }
    }
}

