/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.tools.xmltotab;

import edu.northwestern.at.morphadorner.tools.AdornedXMLReader;
import edu.northwestern.at.morphadorner.tools.ExtendedAdornedWord;
import edu.northwestern.at.utils.FileNameUtils;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.csv.CSVFileWriter;
import java.util.List;

public class XMLToTab {
    public static void main(String[] args) {
        if (args.length >= 2) {
            new XMLToTab(args);
        } else {
            XMLToTab.displayUsage();
            System.exit(1);
        }
    }

    public static void displayUsage() {
        System.out.println("Usage: ");
        System.out.println("");
        System.out.println("   java edu.northwestern.at.morphadorner.tool.xmltotab.XMLToTab input.xml output.tab");
        System.out.println("");
        System.out.println("      input.xml -- input XML file");
        System.out.println("      output.tab -- output tab-separated values file.");
    }

    public XMLToTab(String[] args) {
        String xmlInputFileName = args[0];
        String tabOutputFileName = args[1];
        try {
            CSVFileWriter writer = new CSVFileWriter(tabOutputFileName, "utf-8", '\t', '\u0000');
            String workID = FileNameUtils.stripPathName(xmlInputFileName);
            workID = FileNameUtils.changeFileExtension(workID, "");
            AdornedXMLReader xmlReader = new AdornedXMLReader(xmlInputFileName);
            List<String> idList = xmlReader.getAdornedWordIDs();
            for (int wordOrd = 0; wordOrd < idList.size(); ++wordOrd) {
                String id = idList.get(wordOrd);
                ExtendedAdornedWord w = xmlReader.getExtendedAdornedWord(id);
                if (!w.isFirstPart()) continue;
                writer.writeValue(workID);
                writer.writeSeparator();
                writer.writeValue(w.getID());
                writer.writeSeparator();
                writer.writeValue(w.getToken());
                writer.writeSeparator();
                writer.writeValue(StringUtils.reverseString(w.getToken()));
                writer.writeSeparator();
                writer.writeValue(w.getStandardSpelling());
                writer.writeSeparator();
                writer.writeValue(w.getLemmata());
                writer.writeSeparator();
                writer.writeValue(w.getPartsOfSpeech());
                writer.writeSeparator();
                writer.writeValue(XMLToTab.fixPath(w.getPath()));
                writer.writeSeparator();
                writer.writeValue(w.getEOS() ? "1" : "0");
                writer.writeSeparator();
                if (w.getPreviousWord() != null) {
                    writer.writeValue(w.getPreviousWord().getToken());
                } else {
                    writer.writeValue("");
                }
                writer.writeSeparator();
                if (w.getNextWord() != null) {
                    writer.writeValue(w.getNextWord().getToken());
                } else {
                    writer.writeValue("");
                }
                writer.writeSeparator();
                if (w.getPreviousWord() != null) {
                    writer.writeValue(w.getPreviousWord().getPartsOfSpeech());
                } else {
                    writer.writeValue("");
                }
                writer.writeSeparator();
                if (w.getNextWord() != null) {
                    writer.writeValue(w.getNextWord().getPartsOfSpeech());
                } else {
                    writer.writeValue("");
                }
                writer.writeSeparator();
                String[] kwic = XMLToTab.getKWIC(w.getID(), 80, idList, xmlReader);
                writer.writeValue(kwic[0]);
                writer.writeSeparator();
                writer.writeValue(kwic[2]);
                writer.writeSeparator();
                writer.writeValue(w.getLabel());
                writer.writeSeparator();
                writer.writeValue(w.getDivType());
                writer.writeSeparator();
                writer.writeValue(w.getOrd() + "");
                writer.writeEOL();
            }
            writer.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    protected static String fixPath(String path) {
        String[] tags = path.split("\\\\");
        StringBuilder result = new StringBuilder();
        for (int i = 2; i < tags.length - 1; ++i) {
            result.append("\\");
            result.append(tags[i]);
        }
        return result.toString();
    }

    public static String[] getKWIC(String id, int KWICWidth, List<String> idList, AdornedXMLReader xmlReader) {
        String token;
        String[] results = new String[3];
        StringBuffer KWICBuffer = new StringBuffer();
        ExtendedAdornedWord wordInfo = xmlReader.getExtendedAdornedWord(id);
        results[1] = token = wordInfo.getToken();
        int maxWidth = KWICWidth / 2;
        while (KWICBuffer.length() < maxWidth && wordInfo.getPreviousWord() != null) {
            wordInfo = wordInfo.getPreviousWord();
            token = wordInfo.getToken();
            if (KWICBuffer.length() > 0) {
                KWICBuffer.insert(0, " ");
            }
            KWICBuffer.insert(0, token);
        }
        results[0] = KWICBuffer.toString();
        KWICBuffer.setLength(0);
        wordInfo = xmlReader.getExtendedAdornedWord(id);
        while (KWICBuffer.length() < maxWidth && wordInfo.getNextWord() != null) {
            wordInfo = wordInfo.getNextWord();
            token = wordInfo.getToken();
            KWICBuffer.append(token);
            KWICBuffer.append(" ");
        }
        results[2] = KWICBuffer.toString();
        return results;
    }
}

