/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.tools.adornedtosimpleteip5;

import edu.northwestern.at.morphadorner.tools.AdornedXMLWriter;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.FileNameUtils;
import edu.northwestern.at.utils.FileUtils;
import edu.northwestern.at.utils.Formatters;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.SetFactory;
import edu.northwestern.at.utils.SetUtils;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.xml.JDOMFragmentParser;
import edu.northwestern.at.utils.xml.JDOMUtils;
import edu.northwestern.at.utils.xml.jdom.ElementsFilter;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import org.jdom2.Attribute;
import org.jdom2.Content;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.Namespace;
import org.jdom2.filter.Filter;
import org.jdom2.filter.Filters;
import org.jdom2.util.IteratorIterable;

public class AdornedToSimpleTEIP5 {
    protected static int docsToProcess = 0;
    protected static int currentDocNumber = 0;
    protected static String interpGrpXMLText = "";
    protected static boolean haveInterpGrp = false;
    protected static boolean forceAna = true;
    protected static boolean useReg = true;
    protected static String outputDirectory;
    protected static PrintStream printStream;
    protected static final int INITPARAMS = 5;
    protected static String lastID;
    protected static int gapCount;
    protected static int sentenceCount;
    protected static String badWorksFileName;
    protected static Set<String> badWorksSet;
    protected static String goodWorksFileName;
    protected static Set<String> goodWorksSet;
    protected static Namespace teiNamespace;

    public static void main(String[] args) {
        try {
            if (!AdornedToSimpleTEIP5.initialize(args)) {
                System.exit(1);
            }
            long startTime = System.currentTimeMillis();
            int filesProcessed = AdornedToSimpleTEIP5.processFiles(args);
            long processingTime = (System.currentTimeMillis() - startTime + 999L) / 1000L;
            AdornedToSimpleTEIP5.terminate(filesProcessed, processingTime);
        }
        catch (Exception e) {
            System.out.println(e.getMessage());
        }
    }

    protected static boolean initialize(String[] args) throws Exception {
        printStream = new PrintStream((OutputStream)new BufferedOutputStream(System.out), true, "utf-8");
        if (args.length < 6) {
            System.err.println("Not enough parameters.");
            return false;
        }
        outputDirectory = args[0];
        String regOutType = args[1].toLowerCase();
        if (regOutType.equals("usereg")) {
            useReg = true;
        } else if (regOutType.equals("usechoice")) {
            useReg = false;
        }
        interpGrpXMLText = "";
        try {
            FileUtils.readTextFile(args[2], "utf-8");
        }
        catch (Exception exception) {
            // empty catch block
        }
        interpGrpXMLText = interpGrpXMLText.trim();
        haveInterpGrp = interpGrpXMLText.length() > 0;
        goodWorksFileName = args[3];
        goodWorksSet = SetFactory.createNewSortedSet();
        badWorksFileName = args[4];
        badWorksSet = SetFactory.createNewSortedSet();
        return true;
    }

    protected static void processOneFile(String xmlFileName) {
        String xmlOutputFileName = "";
        try {
            Content content;
            int i;
            String name;
            String strippedFileName = FileNameUtils.stripPathName(xmlFileName);
            strippedFileName = FileNameUtils.changeFileExtension(strippedFileName, "");
            xmlOutputFileName = new File(outputDirectory, strippedFileName + ".xml").getAbsolutePath();
            FileUtils.createPathForFile(xmlOutputFileName);
            Document document = JDOMUtils.parse(xmlFileName);
            Element root = document.getRootElement();
            root.removeChild("monkHeader", Namespace.getNamespace((String)"http://monk.at.northwestern.edu/ns/1.0"));
            Object filter = Filters.element((String)"sup");
            IteratorIterable iterator = root.getDescendants(filter);
            List<Element> elements = ListFactory.createNewList();
            while (iterator.hasNext()) {
                Element element = (Element)iterator.next();
                elements.add(element);
            }
            for (int i2 = 0; i2 < elements.size(); ++i2) {
                Element element = (Element)elements.get(i2);
                AdornedToSimpleTEIP5.replaceSupWithHi(element);
            }
            Filter filter2 = Filters.content();
            IteratorIterable iterator2 = root.getDescendants(filter2);
            List<Content> contents = ListFactory.createNewList();
            String firstWordID = "";
            while (iterator2.hasNext()) {
                Content content2 = (Content)iterator2.next();
                contents.add(content2);
                if (!(content2 instanceof Element)) continue;
                Element element = (Element)content2;
                name = element.getName();
                if (firstWordID.length() != 0 || !name.equals("w")) continue;
                firstWordID = JDOMUtils.getAttributeValue(element, "xml:id", true);
            }
            lastID = strippedFileName + "-" + StringUtils.dupl("0", firstWordID.length());
            SortedSet<String> oldWordIDs = SetFactory.createNewSortedSet();
            for (i = 0; i < contents.size(); ++i) {
                Element element;
                content = (Content)contents.get(i);
                name = "";
                if (!(content instanceof Element) || !(name = (element = (Element)content).getName()).equals("w") && !name.equals("pc")) continue;
                AdornedToSimpleTEIP5.addWordID(element, oldWordIDs);
            }
            for (i = 0; i < contents.size(); ++i) {
                content = (Content)contents.get(i);
                name = "";
                if (content instanceof Element) {
                    name = ((Element)content).getName();
                }
                if (name.equals("w") || name.equals("pc")) {
                    i = AdornedToSimpleTEIP5.handleW(contents, i);
                    continue;
                }
                if (!name.equals("gap")) continue;
                AdornedToSimpleTEIP5.handleGap(content, false, null);
            }
            SortedSet<String> newWordIDs = SetFactory.createNewSortedSet();
            filter = new ElementsFilter(new String[]{"w", "pc"});
            iterator = root.getDescendants(filter);
            while (iterator.hasNext()) {
                Content newWord = (Content)iterator.next();
                AdornedToSimpleTEIP5.addWordID((Element)newWord, newWordIDs);
            }
            oldWordIDs.removeAll(newWordIDs);
            if (oldWordIDs.size() > 0) {
                printStream.println("*** Error *** in " + xmlFileName + ": " + oldWordIDs.size() + " words not properly converted.");
                String[] missingIDs = oldWordIDs.toArray(new String[oldWordIDs.size()]);
                for (int j = 0; j < missingIDs.length; ++j) {
                    printStream.println(missingIDs[j]);
                }
                badWorksSet.add(xmlFileName);
            } else {
                goodWorksSet.add(xmlFileName);
            }
            if (haveInterpGrp) {
                Element text = root.getChild("text", Namespace.getNamespace((String)"http://www.tei-c.org/ns/1.0"));
                if (text == null) {
                    text = root.getChild("text");
                }
                Namespace[] nameSpaceList = new Namespace[]{Namespace.getNamespace((String)"http://www.tei-c.org/ns/1.0")};
                JDOMFragmentParser fragmentParser = new JDOMFragmentParser(nameSpaceList);
                List<Element> elements2 = fragmentParser.parseFragment(interpGrpXMLText);
                for (Element element : elements2) {
                    if (text == null) continue;
                    text.addContent((Content)element);
                }
            }
            AdornedXMLWriter xmlWriter = new AdornedXMLWriter(document, xmlOutputFileName);
            printStream.println("Reformatted " + xmlFileName + " to " + xmlOutputFileName);
        }
        catch (Exception e) {
            e.printStackTrace();
            printStream.println("Problem reformatting " + xmlFileName + " to " + xmlOutputFileName + ": " + e.getMessage());
        }
    }

    protected static void addWordID(Element wordElement, Set<String> wordIDs) {
        String id = JDOMUtils.getAttributeValue(wordElement, "xml:id", true);
        wordIDs.add(id);
    }

    protected static int handleW(List<Content> contents, int index) {
        Element element = (Element)contents.get(index);
        String reg = element.getAttributeValue("reg");
        element = AdornedToSimpleTEIP5.cleanWElement(element);
        String wordText = element.getText().trim();
        if (!useReg && reg != null && !reg.equals(wordText)) {
            AdornedToSimpleTEIP5.generateChoice(element, wordText, reg);
        }
        return index;
    }

    protected static Element cleanWElement(Element element) {
        Element parent;
        Attribute lemAttr;
        String id;
        lastID = id = JDOMUtils.getAttributeValue(element, "xml:id", true);
        gapCount = 0;
        sentenceCount = 0;
        boolean eos = element.getAttributeValue("eos") != null && element.getAttributeValue("eos").equals("1");
        String spe = element.getAttributeValue("spe");
        String part = element.getAttributeValue("part");
        if (part == null) {
            part = "N";
        }
        if ((lemAttr = element.getAttribute("lem")) != null) {
            lemAttr.setName("lemma");
        }
        Attribute posAttr = element.getAttribute("pos");
        String pos = "";
        if (posAttr == null) {
            posAttr = element.getAttribute("ana");
            if (posAttr != null && (pos = posAttr.getValue()).charAt(0) == '#') {
                pos = pos.substring(1);
            }
        } else {
            pos = posAttr.getValue();
        }
        if (posAttr != null) {
            if (haveInterpGrp || forceAna) {
                posAttr.setName("ana");
                posAttr.setValue("#" + posAttr.getValue());
            } else {
                posAttr.setName("pos");
                posAttr.setValue(posAttr.getValue());
            }
        }
        element.removeAttribute("ord");
        if (part.equals("N")) {
            element.removeAttribute("part");
        }
        element.removeAttribute("spe");
        element.removeAttribute("tok");
        if (!useReg) {
            element.removeAttribute("reg");
        }
        element.removeAttribute("eos");
        element.removeAttribute("ms");
        if (element.getName().equals("pc") || CharUtils.isPunctuation(spe)) {
            element.setName("pc");
            element.removeAttribute("lemma");
            element.removeAttribute("ana");
            element.removeAttribute("type");
            element.removeAttribute("reg");
            if (eos) {
                element.setAttribute("unit", "sentence");
                eos = false;
            }
        }
        if (eos && (parent = element.getParentElement()) != null) {
            int wIndex = parent.indexOf((Content)element);
            Element eosMarker = AdornedToSimpleTEIP5.createElement("pc");
            id = lastID + "-" + ++sentenceCount;
            JDOMUtils.setAttributeValue(eosMarker, "xml:id", id);
            eosMarker.setAttribute("unit", "sentence");
            parent.setContent(wIndex + 1, (Content)eosMarker);
        }
        return element;
    }

    protected static Element createElement(String name) {
        return new Element(name, teiNamespace);
    }

    protected static void handleGap(Content content, boolean inSplit, List<Element> splitWordElements) {
        Element element = (Element)content;
        String id = JDOMUtils.getAttributeValue(element, "xml:id", true);
        if (id == null || id.length() <= 0) {
            id = lastID + "-gap" + gapCount;
            ++gapCount;
            JDOMUtils.setAttributeValue(element, "xml:id", id);
        }
    }

    protected static void handleSup(Content content) {
        AdornedToSimpleTEIP5.replaceSupWithHi((Element)content);
    }

    protected static void replaceSupWithHi(Element element) {
        element.setName("hi");
        Attribute typeAttribute = new Attribute("rend", "sup");
        List<Attribute> attributes = ListFactory.createNewList();
        attributes.add(typeAttribute);
        element.setAttributes(attributes);
    }

    protected static String displayElement(Element element) {
        String id = JDOMUtils.getAttributeValue(element, "xml:id", true);
        StringBuffer sb = new StringBuffer();
        sb.append("Name: " + element.getName());
        if (id != null && id.length() > 0) {
            sb.append(", id: " + id);
        }
        return sb.toString();
    }

    protected static Element generateChoice(Element element, String wordText, String regText) {
        element.setText("");
        Element choiceElement = AdornedToSimpleTEIP5.createElement("choice");
        element.addContent((Content)choiceElement);
        Element origElement = AdornedToSimpleTEIP5.createElement("orig");
        if (wordText != null) {
            origElement.setText(wordText);
        }
        choiceElement.addContent((Content)origElement);
        Element regElement = AdornedToSimpleTEIP5.createElement("reg");
        regElement.setText(regText);
        choiceElement.addContent((Content)regElement);
        return origElement;
    }

    protected static int processFiles(String[] args) {
        boolean result = false;
        String[] wildCards = new String[args.length - 5];
        for (int i = 5; i < args.length; ++i) {
            wildCards[i - 5] = args[i];
        }
        String[] fileNames = FileNameUtils.expandFileNameWildcards(wildCards);
        docsToProcess = fileNames.length;
        for (int i = 0; i < fileNames.length; ++i) {
            AdornedToSimpleTEIP5.processOneFile(fileNames[i]);
        }
        try {
            SetUtils.saveSet(goodWorksSet, goodWorksFileName, "utf-8");
            SetUtils.saveSet(badWorksSet, badWorksFileName, "utf-8");
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        return fileNames.length;
    }

    protected static void terminate(int filesProcessed, long processingTime) {
        printStream.println("Processed " + Formatters.formatIntegerWithCommas(filesProcessed) + " files in " + Formatters.formatLongWithCommas(processingTime) + " seconds.");
    }

    static {
        lastID = "";
        gapCount = 0;
        sentenceCount = 0;
        badWorksFileName = "";
        badWorksSet = null;
        goodWorksFileName = "";
        goodWorksSet = null;
        teiNamespace = Namespace.getNamespace((String)"http://www.tei-c.org/ns/1.0");
    }
}

