/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.xgtagger;

import edu.northwestern.at.morphadorner.corpuslinguistics.inputter.TextInputter;
import edu.northwestern.at.morphadorner.corpuslinguistics.outputter.AdornedWordOutputter;
import edu.northwestern.at.morphadorner.corpuslinguistics.outputter.ByteStreamAdornedWordOutputter;
import edu.northwestern.at.morphadorner.corpuslinguistics.outputter.ListAdornedWordOutputter;
import edu.northwestern.at.morphadorner.xgtagger.XGMisc;
import edu.northwestern.at.morphadorner.xgtagger.XGOptions;
import edu.northwestern.at.morphadorner.xgtagger.XGPair;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.MapFactory;
import edu.northwestern.at.utils.UnicodeReader;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Comment;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.Entity;
import org.w3c.dom.EntityReference;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class XGParser {
    XGOptions options;
    Map<Integer, XGPair> hMap;
    Map<String, String> hmAttributes;
    NamedNodeMap nnmEntities;
    boolean boolDot;
    int intCountNonBlanks;
    int intCountTags;
    int intCpt;
    String strLine;
    StringBuffer sbWord;
    int intStrWordIndex;
    int intStrWordLength;
    String strWord;
    int intLongWord;
    int intID;
    UnicodeReader frCurrent;
    BufferedReader brCurrent;
    AdornedWordOutputter adornerOutputter;
    int nextAdornedWord;
    List adornedWordDataList;
    String surroundMarker;
    String surroundMarkerTrim;
    int surroundMarkerLength;
    Map<Integer, Integer> splitWords = MapFactory.createNewMap();
    int wordNodesCreated = 0;
    static final String FILE_SEPARATOR = System.getProperty("file.separator");

    public XGParser(XGOptions options, Document document) {
        this.options = options;
        this.hMap = MapFactory.createNewMap();
        this.hmAttributes = MapFactory.createNewMap();
        this.strLine = null;
        this.boolDot = false;
        this.intCpt = 0;
        this.intCountNonBlanks = 0;
        this.intCountTags = 0;
        this.strWord = "";
        this.sbWord = new StringBuffer();
        this.intStrWordIndex = 0;
        this.intStrWordLength = 0;
        this.intID = 0;
        this.frCurrent = null;
        this.brCurrent = null;
        this.surroundMarker = this.options.getSurroundMarker();
        this.surroundMarkerTrim = this.surroundMarker.trim();
        this.surroundMarkerLength = this.surroundMarkerTrim.length();
        this.nextAdornedWord = 0;
        this.adornedWordDataList = null;
        this.wordNodesCreated = 0;
        Object adornerOutputter = null;
        if (document.getDoctype() != null) {
            this.nnmEntities = document.getDoctype().getEntities();
        }
    }

    public void setRunningWordID(int runningWordID) {
        this.intID = runningWordID;
    }

    public int getRunningWordID() {
        return this.intID;
    }

    public int getNumberOfAdornedWords() {
        return this.wordNodesCreated;
    }

    protected int read() throws IOException, FileNotFoundException {
        if (this.frCurrent == null) {
            byte[] outputBytes = ((ByteStreamAdornedWordOutputter)this.adornerOutputter).getBytes();
            this.frCurrent = new UnicodeReader(new ByteArrayInputStream(outputBytes), "utf-8");
            this.brCurrent = new BufferedReader(this.frCurrent);
            return this.read();
        }
        return this.brCurrent.read();
    }

    protected void getNextEntry() throws IOException, FileNotFoundException {
        this.hmAttributes.clear();
        this.strWord = "";
        if (this.nextAdornedWord < this.adornedWordDataList.size()) {
            List adornedWordData = (List)this.adornedWordDataList.get(this.nextAdornedWord++);
            for (int i = 0; i < adornedWordData.size(); ++i) {
                String strAttName;
                String strElem = (String)adornedWordData.get(i);
                if (this.options.getWordField() == i + 1) {
                    this.strWord = strElem;
                }
                if ((strAttName = this.options.morphAdornerSettings.getXMLWordAttribute(i)).length() <= 0) continue;
                this.hmAttributes.put(strAttName, strElem);
            }
        }
        ++this.intID;
        this.intStrWordIndex = 0;
        this.intStrWordLength = this.strWord.length();
    }

    public StringBuffer extractText(Node node) throws IOException {
        Node nodeChild;
        StringBuffer sbResult = new StringBuffer();
        StringBuffer sbBuffer = new StringBuffer();
        boolean boolInternDot = false;
        NodeList nlChildren = node.getChildNodes();
        int intChildNumber = nlChildren.getLength();
        Vector<Integer> vectorTempJumpTags = new Vector<Integer>();
        for (int i = 0; i < intChildNumber; ++i) {
            nodeChild = nlChildren.item(i);
            String strChildName = nodeChild.getNodeName();
            if (nodeChild instanceof EntityReference) {
                Entity entity = (Entity)this.nnmEntities.getNamedItem(strChildName);
                if (entity.getSystemId() != null && !this.options.getEntityIgnoreFiles()) {
                    if (!this.options.isOutputDirectory() && !this.options.getEntityMerging()) {
                        this.options.getLogger().logError("Error: XML input  contains external file entity references.\n  Specified output should be a directory, or options xml.entities_not_files or xml.entities_merge should be set.\n");
                        System.exit(-1);
                    }
                    sbResult.append(this.extractText(nodeChild));
                    continue;
                }
                if (this.options.getEntityTreatAll()) {
                    sbResult.append(this.extractText(nodeChild));
                    continue;
                }
                sbResult.append(" ");
                continue;
            }
            if (nodeChild instanceof Text) {
                String strText = nodeChild.getNodeValue().replaceAll("\\s", " ");
                int nbChars = this.countNonBlankCharacters(strText);
                sbResult.append(strText);
                if (nbChars > 0) {
                    this.boolDot = false;
                    continue;
                }
                boolInternDot = true;
                continue;
            }
            if (!this.options.isJumpTag(strChildName)) {
                boolean boolSoftTag = this.options.isSoftTag(strChildName);
                if (boolInternDot && !boolSoftTag) {
                    sbResult.append(this.surroundMarker);
                    this.intCountNonBlanks += this.surroundMarkerLength;
                }
                if ((sbBuffer = this.extractText(nodeChild)).equals("")) continue;
                sbResult.append(sbBuffer);
                if (this.options.isSoftTag(strChildName)) {
                    boolInternDot = true;
                    this.boolDot = false;
                    continue;
                }
                if (!this.boolDot) {
                    sbResult.append(this.surroundMarker);
                    this.intCountNonBlanks += this.surroundMarkerLength;
                }
                this.boolDot = true;
                boolInternDot = false;
                continue;
            }
            vectorTempJumpTags.add(new Integer(i));
        }
        if (!vectorTempJumpTags.isEmpty()) {
            for (int j = 0; j < vectorTempJumpTags.size(); ++j) {
                nodeChild = nlChildren.item((Integer)vectorTempJumpTags.get(j));
                this.intCountNonBlanks += this.surroundMarkerLength;
                sbBuffer = this.extractText(nodeChild);
                sbResult.append(this.surroundMarker + sbBuffer);
            }
        }
        return sbResult;
    }

    protected int createNewNode(Document doc, Node node, Node nodeChild, String strCurrentPath, Integer integerTagNumber) {
        if (this.sbWord.length() == 0 || this.sbWord.indexOf(this.surroundMarkerTrim) >= 0) {
            this.sbWord.delete(0, this.sbWord.length());
            return 0;
        }
        String[] strArray = this.options.getSpecialSeparator() != null ? this.sbWord.toString().split(this.options.getSpecialSeparator()) : new String[]{this.sbWord.toString()};
        int splitCount = 1;
        if (this.splitWords.containsKey(this.intID)) {
            splitCount = this.splitWords.get(this.intID) + 1;
        }
        this.splitWords.put(this.intID, splitCount);
        for (int i = 0; i < strArray.length; ++i) {
            Text newText = doc.createTextNode(strArray[i]);
            String tagName = this.options.getWordTagName();
            if (XGParser.isPunctuationAndNotGap(strArray[i])) {
                tagName = this.options.getPuncTagName();
            }
            Element elementNewTag = doc.createElement(tagName);
            if (this.options.getWriteIds()) {
                elementNewTag.setAttribute(this.options.getIdArgumentName(), String.valueOf(this.intID));
            }
            if (this.options.getWritePath() % 2 == 1) {
                integerTagNumber = integerTagNumber == null ? Integer.valueOf(1) : Integer.valueOf(integerTagNumber + 1);
                elementNewTag.setAttribute(this.options.getWordPathArgumentName(), strCurrentPath + File.separator + tagName + "[" + integerTagNumber.toString() + "]");
            }
            if (i == 0 || this.options.repeatAttributes()) {
                Set<Map.Entry<String, String>> setEnum = this.hmAttributes.entrySet();
                for (Map.Entry<String, String> entry : setEnum) {
                    elementNewTag.setAttribute(entry.getKey(), entry.getValue());
                }
            }
            elementNewTag.appendChild(newText);
            node.insertBefore(elementNewTag, nodeChild);
            this.sbWord.delete(0, this.sbWord.length());
        }
        ++this.wordNodesCreated;
        return strArray.length;
    }

    protected static Node cloneNode(Node node) {
        Node nodeClone = node.cloneNode(false);
        NodeList nodeChildList = node.getChildNodes();
        int intChildNumber = nodeChildList.getLength();
        try {
            for (int i = 0; i < intChildNumber; ++i) {
                nodeClone.appendChild(XGParser.cloneNode(nodeChildList.item(i)));
            }
        }
        catch (DOMException e) {
            nodeClone = node.cloneNode(true);
        }
        return nodeClone;
    }

    protected Node cloneEntityReference(EntityReference er, Document doc) {
        Element nodeClone = doc.createElement("entityReferenceRoot");
        NodeList nodeChildList = er.getChildNodes();
        int intChildNumber = nodeChildList.getLength();
        for (int i = 0; i < intChildNumber; ++i) {
            nodeClone.appendChild(XGParser.cloneNode(nodeChildList.item(i)));
        }
        return nodeClone;
    }

    public Document modifyDOM(Node node, Document doc, String strCurrentPath) throws DOMException, IOException {
        Integer integerTagNumber;
        String strNodeChildName;
        Node nodeChild;
        String strText = null;
        NodeList nlChildren = node.getChildNodes();
        String strNewPath = null;
        int intChildNumber = nlChildren.getLength();
        boolean boolConsiderAsAnElement = false;
        Map<String, Integer> hmPaths = MapFactory.createNewMap();
        Vector<Integer> vectorTempJumpTags = new Vector<Integer>();
        for (int i = 0; i < intChildNumber; ++i) {
            nodeChild = nlChildren.item(i);
            strNodeChildName = nodeChild.getNodeName();
            if (nodeChild instanceof DocumentType) {
                Comment comment1 = doc.createComment("Document Type Description element (DOCTYPE \"" + nodeChild.getNodeName() + "\") has been removed. ");
                Comment comment2 = doc.createComment("To build a correct DTD for this document, change all #PCDATA into '" + this.options.getWordTagName() + "' element, containing #PCDATA.");
                node.insertBefore(comment1, nodeChild);
                node.insertBefore(comment2, nodeChild);
                node.removeChild(nodeChild);
                ++i;
                ++intChildNumber;
                this.options.getLogger().logError(" *** Element DOCTYPE (\"" + nodeChild.getNodeName() + "\") removed in the output (out of date) *** ");
                continue;
            }
            boolean boolT = false;
            if (nodeChild instanceof EntityReference) {
                Node nodeClone;
                Entity entity = (Entity)this.nnmEntities.getNamedItem(strNodeChildName);
                if (entity.getSystemId() == null) {
                    if (!this.options.getEntityTreatAll()) continue;
                    nodeClone = this.cloneEntityReference((EntityReference)nodeChild, doc);
                    NodeList nlGrandChildren = nodeClone.getChildNodes();
                    int intGrandChildrenNumber = nlGrandChildren.getLength();
                    for (int intGrandChild = 0; intGrandChild < intGrandChildrenNumber; ++intGrandChild) {
                        if (i != intChildNumber - 1) {
                            node.insertBefore(nlGrandChildren.item(intGrandChild), nodeClone.getNextSibling());
                        } else {
                            node.appendChild(nlGrandChildren.item(intGrandChild));
                        }
                        ++intChildNumber;
                    }
                    node.removeChild(nodeChild);
                    --intChildNumber;
                    --i;
                    continue;
                }
                if (this.options.getEntityIgnoreFiles()) continue;
                if (!this.options.isOutputDirectory() && !this.options.getEntityMerging()) {
                    this.options.getLogger().logError("Error: XML output file  contains some external file entity references.\n  Specified output should be a directory.");
                    System.exit(-1);
                    continue;
                }
                nodeClone = this.cloneEntityReference((EntityReference)nodeChild, doc);
                doc = this.modifyDOM(nodeClone, doc, strCurrentPath);
                if (this.options.getEntityMerging()) {
                    NodeList nlNewGrandChildren = nodeClone.getChildNodes();
                    int intNewGrandChildrenNumber = nlNewGrandChildren.getLength();
                    Comment nodeComment = doc.createComment(" ++ " + nodeChild.getNodeName() + " ++ Here begins the content of " + " entity " + nodeChild.getNodeName() + " inserted here in place of " + "a reference to this entity in " + " the original document.");
                    node.insertBefore(nodeComment, nodeChild);
                    ++i;
                    ++intChildNumber;
                    for (int intGrandChild = 0; intGrandChild < intNewGrandChildrenNumber; ++intGrandChild) {
                        node.insertBefore(nlNewGrandChildren.item(intGrandChild), nodeChild);
                        ++i;
                        ++intChildNumber;
                    }
                    node.insertBefore(doc.createComment(" -- " + nodeChild.getNodeName() + " -- End of entity " + nodeChild.getNodeName()), nodeChild);
                    node.removeChild(nodeChild);
                    continue;
                }
                this.options.getLogger().logError("Internal error:  attempted to write secondary XML output file.");
                continue;
            }
            if (nodeChild instanceof Text) {
                int t;
                String tagName;
                strText = nodeChild.getNodeValue().replaceAll("\\s", " ");
                ++this.intCountTags;
                XGPair pairResult = this.hMap.get(new Integer(this.intCountTags));
                int intBegin = pairResult.begin;
                int intEnd = pairResult.end;
                while (this.intCpt < intBegin && this.strWord.equals(this.surroundMarkerTrim)) {
                    this.getNextEntry();
                    ++this.intCpt;
                }
                while (this.intCpt < intEnd) {
                    if (!this.strWord.equals("")) {
                        this.sbWord.append(this.strWord.charAt(this.intStrWordIndex));
                    }
                    if (this.intStrWordIndex >= this.intStrWordLength - 1) {
                        if (this.options.getWritePath() % 2 == 1) {
                            tagName = this.options.getWordTagName();
                            if (XGParser.isPunctuationAndNotGap(this.sbWord.toString())) {
                                tagName = this.options.getPuncTagName();
                            }
                            integerTagNumber = (Integer)hmPaths.get(tagName);
                            t = this.createNewNode(doc, node, nodeChild, strCurrentPath, integerTagNumber);
                            if (integerTagNumber != null) {
                                hmPaths.put(tagName, integerTagNumber + t);
                            } else {
                                hmPaths.put(tagName, new Integer(t));
                            }
                        } else {
                            t = this.createNewNode(doc, node, nodeChild, null, 0);
                        }
                        intChildNumber += t;
                        i += t;
                        this.getNextEntry();
                    } else {
                        ++this.intStrWordIndex;
                    }
                    if (this.options.getSpecialSeparator() != null && this.strWord.length() >= this.intStrWordIndex + this.options.getSpecialSeparator().length() && this.strWord.substring(this.intStrWordIndex, this.intStrWordIndex + this.options.getSpecialSeparator().length()).equals(this.options.getSpecialSeparator())) {
                        this.sbWord.append(this.options.getSpecialSeparator());
                        this.intStrWordIndex += this.options.getSpecialSeparator().length();
                    }
                    ++this.intCpt;
                }
                if (this.sbWord.length() > 0) {
                    if (this.options.getWritePath() % 2 == 1) {
                        tagName = this.options.getWordTagName();
                        if (XGParser.isPunctuationAndNotGap(this.sbWord.toString())) {
                            tagName = this.options.getPuncTagName();
                        }
                        integerTagNumber = (Integer)hmPaths.get(tagName);
                        t = this.createNewNode(doc, node, nodeChild, strCurrentPath, integerTagNumber);
                        if (integerTagNumber != null) {
                            hmPaths.put(tagName, integerTagNumber + t);
                        } else {
                            hmPaths.put(tagName, new Integer(t));
                        }
                    } else {
                        t = this.createNewNode(doc, node, nodeChild, null, 0);
                    }
                    intChildNumber += t;
                    i += t;
                }
                if (this.intCpt < intEnd) continue;
                node.removeChild(nodeChild);
                --intChildNumber;
                --i;
                continue;
            }
            if (!this.options.isJumpTag(strNodeChildName)) {
                if (this.options.getWritePath() > 0) {
                    integerTagNumber = (Integer)hmPaths.get(strNodeChildName);
                    integerTagNumber = integerTagNumber == null ? Integer.valueOf(1) : Integer.valueOf(integerTagNumber + 1);
                    strNewPath = strCurrentPath + File.separator + strNodeChildName + "[" + integerTagNumber.toString() + "]";
                    if (this.options.getWritePath() >= 2) {
                        ((Element)nodeChild).setAttribute(this.options.getTagsPathArgumentName(), strNewPath);
                    }
                    hmPaths.put(strNodeChildName, integerTagNumber);
                }
                doc = this.modifyDOM(nodeChild, doc, strNewPath);
                continue;
            }
            vectorTempJumpTags.add(new Integer(i));
        }
        if (!vectorTempJumpTags.isEmpty()) {
            for (int j = 0; j < vectorTempJumpTags.size(); ++j) {
                nodeChild = nlChildren.item((Integer)vectorTempJumpTags.get(j));
                strNodeChildName = nodeChild.getNodeName();
                if (this.options.getWritePath() >= 0) {
                    integerTagNumber = (Integer)hmPaths.get(strNodeChildName);
                    integerTagNumber = integerTagNumber == null ? Integer.valueOf(1) : Integer.valueOf(integerTagNumber + 1);
                    strNewPath = strCurrentPath + File.separator + strNodeChildName + "[" + integerTagNumber.toString() + "]";
                    if (this.options.getWritePath() >= 2) {
                        ((Element)nodeChild).setAttribute(this.options.getTagsPathArgumentName(), strNewPath);
                    }
                    hmPaths.put(strNodeChildName, integerTagNumber);
                }
                ++this.intCountNonBlanks;
                doc = this.modifyDOM(nodeChild, doc, strNewPath);
            }
        }
        return doc;
    }

    protected int countNonBlankCharacters(String strString) throws IOException {
        ++this.intCountTags;
        int intLetters = strString.length();
        int nonBlanks = 0;
        for (int i = 0; i < intLetters; ++i) {
            if (strString.charAt(i) == ' ') continue;
            ++nonBlanks;
        }
        int intBegin = this.intCountNonBlanks;
        if (nonBlanks > 0) {
            ++intBegin;
        }
        this.intCountNonBlanks += nonBlanks;
        this.hMap.put(new Integer(this.intCountTags), new XGPair(intBegin, this.intCountNonBlanks));
        return nonBlanks;
    }

    public static Object[] extractText(XGOptions options, Document document) throws IOException {
        StringBuffer sbText = null;
        Object[] result = new Object[2];
        XGParser instance = new XGParser(options, document);
        result[1] = instance;
        sbText = instance.extractText(document);
        String strText = sbText.toString();
        result[0] = sbText.toString();
        return result;
    }

    public static Map<Integer, Integer> mergeAdornments(XGOptions options, XGParser instance, Document document, String segmentName, AdornedWordOutputter outputter, TextInputter inputter) throws IOException {
        instance.adornerOutputter = outputter;
        instance.intCountTags = 0;
        instance.nextAdornedWord = 0;
        instance.adornedWordDataList = ((ListAdornedWordOutputter)outputter).getAdornedWordDataList();
        instance.getNextEntry();
        document = instance.modifyDOM(document, document, "");
        File file = File.createTempFile("mad", null);
        String fileName = file.getAbsolutePath();
        if (XGMisc.printNodeToFile(document, fileName) == 1) {
            inputter.setSegmentText(segmentName, file);
            if (!inputter.usesSegmentFiles()) {
                file.delete();
            }
        }
        return instance.splitWords;
    }

    public static Document textToDOM(XGOptions options, String xmlText) throws IOException {
        Document result = null;
        try {
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            factory.setExpandEntityReferences(false);
            DocumentBuilder builder = factory.newDocumentBuilder();
            result = builder.parse(new InputSource(new StringReader(xmlText)));
        }
        catch (ParserConfigurationException pce) {
            System.out.println(pce.getMessage());
        }
        catch (SAXException se) {
            System.out.println(se.getMessage());
        }
        return result;
    }

    public static boolean isPunctuationAndNotGap(String s) {
        boolean result = false;
        if (!s.equals("\u3008\u2026\u3009")) {
            result = CharUtils.isPunctuation(s);
        }
        return result;
    }
}

