/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.tools.stripwordattributes;

import edu.northwestern.at.morphadorner.WordAttributeNames;
import edu.northwestern.at.morphadorner.WordAttributePatterns;
import edu.northwestern.at.utils.UnicodeReader;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class StripWordAttributes {
    protected static final String LINE_SEPARATOR = System.getProperty("line.separator");
    protected static Set<String> attrsToOmit = new HashSet<String>();
    protected static Map<String, String> entitiesMap = new HashMap<String, String>();
    protected static Pattern entitiesPattern;
    protected static Matcher entitiesMatcher;

    public static void main(String[] args) {
        if (args.length >= 3) {
            boolean trimWhitespace = false;
            boolean leaveID = false;
            for (int i = 3; i < args.length; ++i) {
                if (args[i].equals("/id")) {
                    leaveID = true;
                    continue;
                }
                if (args[i].equals("/noid")) {
                    leaveID = false;
                    continue;
                }
                if (args[i].equals("/trim")) {
                    trimWhitespace = true;
                    continue;
                }
                if (!args[i].equals("/notrim")) continue;
                trimWhitespace = false;
            }
            new StripWordAttributes(args[0], args[1], args[2], leaveID, trimWhitespace);
        } else {
            StripWordAttributes.displayUsage();
            System.exit(1);
        }
    }

    protected static void displayUsage() {
        System.out.println("Usage:");
        System.out.println();
        System.out.println("java edu.northwestern.at.morphadorner.tools.stripwordattributes.StripWordAttributes input.xml output.xml output.tab [/[no]id] [/[no]trim]");
        System.out.println();
        System.out.println("input.xml -- Input MorphAdornerd xml file.");
        System.out.println("output.xml -- Derived adorned file with word element attributes stripped.");
        System.out.println("output.tab -- Tab delimited file of word element attribute values.");
        System.out.println("/id or /noid -- Optional parameter indicating xml:id should be left attached to each word (<w>) element.  Default is /noid which removes the xml:id attribute and value.");
        System.out.println("/trim or /notrim -- Optional parameter indicating whether whitespace should be trimmed from the start and end of each XML text line.  Default is /notrim, which leaves the original whitespace intact.");
    }

    public StripWordAttributes(String inputXMLFileName, String outputXMLFileName, String outputTabFileName, boolean leaveID, boolean trimWhitespace) {
        try {
            UnicodeReader streamReader = new UnicodeReader(new FileInputStream(new File(inputXMLFileName)), "utf-8");
            BufferedReader in = new BufferedReader(streamReader);
            FileOutputStream outputStream = new FileOutputStream(outputXMLFileName, false);
            BufferedOutputStream bufferedStream = new BufferedOutputStream(outputStream);
            OutputStreamWriter writer = new OutputStreamWriter((OutputStream)bufferedStream, "utf-8");
            PrintWriter xmlPrintWriter = new PrintWriter(writer);
            FileOutputStream outputStream2 = new FileOutputStream(outputTabFileName, false);
            BufferedOutputStream bufferedStream2 = new BufferedOutputStream(outputStream2);
            OutputStreamWriter writer2 = new OutputStreamWriter((OutputStream)bufferedStream2, "utf-8");
            PrintWriter tabPrintWriter = new PrintWriter(writer2);
            boolean first = true;
            String line = in.readLine();
            while (line != null) {
                int wPos;
                if (trimWhitespace) {
                    line = line.trim();
                }
                if ((wPos = line.indexOf("<w ")) >= 0) {
                    String[] groupValues = WordAttributePatterns.wReplacer.matchGroups(line);
                    String[] idValues = WordAttributePatterns.idReplacer.matchGroups(groupValues[2]);
                    String id = idValues[2];
                    String wordText = groupValues[3];
                    line = groupValues[1] + "<w" + (leaveID ? " " + WordAttributeNames.id + "=\"" + id + "\"" : "") + ">" + groupValues[3] + "</w>" + groupValues[4];
                    StringBuffer attrs = new StringBuffer();
                    Map<String, String> attrsMap = StripWordAttributes.getAttributes(groupValues[2], groupValues[3]);
                    attrs.append(attrsMap.get(WordAttributeNames.id));
                    for (String attr : attrsMap.keySet()) {
                        if (attrsToOmit.contains(attr)) continue;
                        attrs.append("\t");
                        attrs.append(attrsMap.get(attr));
                    }
                    if (first) {
                        StringBuffer colTitles = new StringBuffer();
                        colTitles.append(WordAttributeNames.id);
                        for (String attr : attrsMap.keySet()) {
                            if (attrsToOmit.contains(attr)) continue;
                            colTitles.append("\t");
                            colTitles.append(attr);
                        }
                        tabPrintWriter.println(colTitles);
                        first = false;
                    }
                    tabPrintWriter.println(attrs);
                }
                xmlPrintWriter.println(line);
                line = in.readLine();
            }
            in.close();
            xmlPrintWriter.close();
            tabPrintWriter.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    protected static Map<String, String> getAttributes(String attrsText, String wordText) {
        TreeMap<String, String> result = new TreeMap<String, String>();
        StringTokenizer tokenizer = new StringTokenizer(attrsText);
        while (tokenizer.hasMoreTokens()) {
            String token = tokenizer.nextToken();
            int iPos = token.indexOf("=");
            String key = token.substring(0, iPos);
            String value = token.substring(iPos + 1);
            result.put(key, StripWordAttributes.cleanAttributeValue(value));
        }
        return StripWordAttributes.fillInMissingAttributes(result, wordText);
    }

    protected static String cleanAttributeValue(String attrValue) {
        if (attrValue.length() > 0 && attrValue.charAt(0) == '\"') {
            attrValue = attrValue.substring(1);
        }
        if (attrValue.length() > 0 && attrValue.charAt(attrValue.length() - 1) == '\"') {
            attrValue = attrValue.substring(0, attrValue.length() - 1);
        }
        if (attrValue.indexOf("&") >= 0) {
            StringBuffer sb = new StringBuffer();
            while (entitiesMatcher.find()) {
                String fixedEntity;
                String entityName = entitiesMatcher.group(2);
                if (entityName.charAt(0) == '#') {
                    int charNum;
                    if ((entityName = entityName.substring(1)).charAt(0) == 'x') {
                        entityName = entityName.substring(1);
                        charNum = Integer.parseInt(entityName, 16);
                    } else {
                        charNum = Integer.parseInt(entityName);
                    }
                    fixedEntity = (char)charNum + "";
                } else {
                    fixedEntity = entitiesMap.get(entityName);
                    if (fixedEntity == null) {
                        fixedEntity = "";
                    }
                }
                entitiesMatcher.appendReplacement(sb, fixedEntity);
            }
            entitiesMatcher.appendTail(sb);
            attrValue = sb.toString();
        }
        return attrValue;
    }

    protected static Map<String, String> fillInMissingAttributes(Map<String, String> attrMap, String wordText) {
        StripWordAttributes.setMissingValue(attrMap, WordAttributeNames.tok, wordText);
        StripWordAttributes.setMissingValue(attrMap, WordAttributeNames.spe, attrMap.get(WordAttributeNames.tok));
        StripWordAttributes.setMissingValue(attrMap, WordAttributeNames.reg, attrMap.get(WordAttributeNames.spe));
        StripWordAttributes.setMissingValue(attrMap, WordAttributeNames.pos, attrMap.get(WordAttributeNames.tok));
        StripWordAttributes.setMissingValue(attrMap, WordAttributeNames.lem, attrMap.get(WordAttributeNames.spe));
        StripWordAttributes.setMissingValue(attrMap, WordAttributeNames.eos, "0");
        StripWordAttributes.setMissingValue(attrMap, WordAttributeNames.part, "N");
        return attrMap;
    }

    protected static void setMissingValue(Map<String, String> attrMap, String attrName, String defaultAttrValue) {
        String attrValue = attrMap.get(attrName);
        if (attrValue == null) {
            attrMap.put(attrName, defaultAttrValue);
        }
    }

    static {
        attrsToOmit.add(WordAttributeNames.id);
        entitiesMap.put("quot", "\"");
        entitiesMap.put("apos", "'");
        entitiesMap.put("amp", "&");
        entitiesMap.put("lt", "<");
        entitiesMap.put("gt", ">");
        entitiesPattern = Pattern.compile("(&)(quot|apos|amp|lt|gt|#[0-9]+|#x[0-9]+)(;)");
        entitiesMatcher = entitiesPattern.matcher("");
    }
}

