/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.tools.annolex;

import edu.northwestern.at.morphadorner.corpuslinguistics.partsofspeech.DefaultPartOfSpeechTags;
import edu.northwestern.at.morphadorner.corpuslinguistics.partsofspeech.PartOfSpeechTags;
import edu.northwestern.at.morphadorner.tools.AdornedXMLWriter;
import edu.northwestern.at.morphadorner.tools.annolex.CorrectedWord;
import edu.northwestern.at.morphadorner.tools.annolex.CorrectedWordsFileReader;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.FileNameUtils;
import edu.northwestern.at.utils.Formatters;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.SetFactory;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.xml.JDOMUtils;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.jdom2.Attribute;
import org.jdom2.Content;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.Parent;
import org.jdom2.filter.Filter;
import org.jdom2.filter.Filters;
import org.jdom2.input.SAXBuilder;
import org.jdom2.util.IteratorIterable;

public class MergeAnnolexCorrectionsIntoAdornedXML {
    protected static int docsToProcess = 0;
    protected static int currentDocNumber = 0;
    protected static String inputXMLDirectory;
    protected static String inputCorrectionsDirectory;
    protected static String outputDirectory;
    protected static PrintStream outputFileStream;
    protected static PrintStream printStream;
    protected static Document document;
    protected static Map<String, Element> wordIDsToElements;
    protected static Map<String, Element> gapIDsToElements;
    protected static Map<String, CorrectedWord> correctedWordsMap;
    protected static final int INITPARAMS = 2;
    protected static PartOfSpeechTags posTags;
    protected static Set<String> badPosTags;
    protected static Set<String> combinedBadPosTags;
    protected static Set<String> mismatches;
    protected static Set<String> combinedMismatches;
    protected static int addedWords;
    protected static int deletedWords;
    protected static int modifiedWords;
    protected static int deletedGaps;
    protected static Element clonableCElement;
    protected static List<Element> wordElementsToDelete;
    protected static List<Element> gapElementsToDelete;
    protected static boolean verbose;
    protected static boolean debug;

    public static void main(String[] args) {
        try {
            if (!MergeAnnolexCorrectionsIntoAdornedXML.initialize(args)) {
                System.exit(1);
            }
            long startTime = System.currentTimeMillis();
            int filesProcessed = MergeAnnolexCorrectionsIntoAdornedXML.processFiles(args);
            long processingTime = (System.currentTimeMillis() - startTime + 999L) / 1000L;
            MergeAnnolexCorrectionsIntoAdornedXML.terminate(filesProcessed, processingTime);
        }
        catch (Exception e) {
            printStream.println(e.getMessage());
        }
    }

    protected static boolean initialize(String[] args) throws Exception {
        printStream = new PrintStream((OutputStream)new BufferedOutputStream(System.out), true, "utf-8");
        if (args.length < 3) {
            System.err.println("Not enough parameters.");
            return false;
        }
        inputCorrectionsDirectory = args[0];
        outputDirectory = args[1];
        posTags = new DefaultPartOfSpeechTags();
        return true;
    }

    protected static void processOneFile(String xmlInputFileName) throws IOException {
        String shortInputXmlFileName = FileNameUtils.stripPathName(xmlInputFileName);
        String workName = FileNameUtils.changeFileExtension(shortInputXmlFileName, "");
        String xmlOutputFileName = new File(outputDirectory, shortInputXmlFileName).getCanonicalPath();
        String correctionsFileName = new File(inputCorrectionsDirectory, FileNameUtils.changeFileExtension(shortInputXmlFileName, ".tab")).getCanonicalPath();
        try {
            long startTime = System.currentTimeMillis();
            printStream.println("---------- Processing " + shortInputXmlFileName);
            Document document = MergeAnnolexCorrectionsIntoAdornedXML.loadXML(xmlInputFileName);
            wordIDsToElements = MergeAnnolexCorrectionsIntoAdornedXML.extractWords(document);
            gapIDsToElements = MergeAnnolexCorrectionsIntoAdornedXML.extractGaps(document);
            clonableCElement = MergeAnnolexCorrectionsIntoAdornedXML.extractCElement(document);
            long endTime = (System.currentTimeMillis() - startTime + 999L) / 1000L;
            printStream.println("XML file " + xmlInputFileName + " loaded in " + Formatters.formatLongWithCommas(endTime) + (endTime != 1L ? " seconds" : " second") + " and contains " + Formatters.formatIntegerWithCommas(wordIDsToElements.size()) + (wordIDsToElements.size() != 1 ? " words." : " word."));
            CorrectedWordsFileReader correctionsFileReader = MergeAnnolexCorrectionsIntoAdornedXML.loadCorrectionsFile(correctionsFileName);
            wordElementsToDelete.clear();
            gapElementsToDelete.clear();
            MergeAnnolexCorrectionsIntoAdornedXML.applyCorrections(document);
            wordIDsToElements = MergeAnnolexCorrectionsIntoAdornedXML.extractWords(document);
            MergeAnnolexCorrectionsIntoAdornedXML.fixSplitWordIDs(document);
            MergeAnnolexCorrectionsIntoAdornedXML.compressCElements(document);
            wordIDsToElements = MergeAnnolexCorrectionsIntoAdornedXML.extractWords(document);
            MergeAnnolexCorrectionsIntoAdornedXML.fixEOSAttributes(wordIDsToElements);
            MergeAnnolexCorrectionsIntoAdornedXML.updateWordOrdinals(wordIDsToElements);
            startTime = System.currentTimeMillis();
            new AdornedXMLWriter(document, xmlOutputFileName);
            endTime = (System.currentTimeMillis() - startTime + 999L) / 1000L;
            printStream.println("Revised XML written to " + xmlOutputFileName + " in " + Formatters.formatLongWithCommas(endTime) + (endTime != 1L ? " seconds." : " second."));
        }
        catch (Exception e) {
            printStream.println(xmlInputFileName + " failed.");
            e.printStackTrace();
            printStream.println("Error: " + e.getMessage());
        }
    }

    protected static Document loadXML(String inputXMLFileName) throws JDOMException, IOException {
        return new SAXBuilder().build(inputXMLFileName);
    }

    protected static Map<String, Element> extractWords(Document document) {
        LinkedHashMap<String, Element> wordIDsToElements = new LinkedHashMap<String, Element>();
        Filter filter = Filters.element((String)"w");
        Element root = document.getRootElement();
        IteratorIterable iterator = root.getDescendants(filter);
        while (iterator.hasNext()) {
            Element w = (Element)iterator.next();
            wordIDsToElements.put(JDOMUtils.getAttributeValue(w, "xml:id", false), w);
        }
        return wordIDsToElements;
    }

    protected static Map<String, Element> extractGaps(Document document) {
        LinkedHashMap<String, Element> gapIDsToElements = new LinkedHashMap<String, Element>();
        Filter filter = Filters.element((String)"gap");
        Element root = document.getRootElement();
        IteratorIterable iterator = root.getDescendants(filter);
        while (iterator.hasNext()) {
            Element gap = (Element)iterator.next();
            gapIDsToElements.put(JDOMUtils.getAttributeValue(gap, "xml:id", false), gap);
        }
        return gapIDsToElements;
    }

    protected static Element extractCElement(Document document) {
        Element result = null;
        Filter filter = Filters.element((String)"c");
        Element root = document.getRootElement();
        IteratorIterable iterator = root.getDescendants(filter);
        while (iterator.hasNext()) {
            Element c = (Element)iterator.next();
            if (!c.getText().equals(" ")) continue;
            result = c.clone();
            break;
        }
        return result;
    }

    protected static int validateCorrections(Map<String, Element> wordIDsToElements) {
        int result = 0;
        for (String id : correctedWordsMap.keySet()) {
            String mismatch;
            int lemmaCount;
            CorrectedWord correctedWord = correctedWordsMap.get(id);
            String checkbox = correctedWord.getCorrectionType();
            Element w = wordIDsToElements.get(id);
            if (w == null) {
                if (!verbose) continue;
                printStream.println("Adding new word with id " + id + "   [" + correctedWord.getSpelling() + "]");
                continue;
            }
            if (checkbox.equals("2") || checkbox.equals("3")) {
                if (!verbose) continue;
                printStream.println("Deleting word with id " + id + "   [" + correctedWord.getSpelling() + "]");
                continue;
            }
            String xmlSpelling = JDOMUtils.getAttributeValue(w, "tok", false);
            if (verbose && !correctedWord.getOldSpelling().equals(xmlSpelling)) {
                printStream.println(id + "\tXML has spelling " + xmlSpelling + ", corrections has " + correctedWord.getOldSpelling() + " [" + correctedWord.getSpelling() + "]");
            }
            String posTag = correctedWord.getPartsOfSpeech();
            String lemma = correctedWord.getLemmata().trim();
            int posTagCount = MergeAnnolexCorrectionsIntoAdornedXML.countSeparators(posTag, '|');
            if (posTagCount != (lemmaCount = MergeAnnolexCorrectionsIntoAdornedXML.countSeparators(lemma, '|'))) {
                mismatch = correctedWord.getSpelling() + "\t" + lemma + "\t" + posTag + "\tmismatch";
                if (verbose) {
                    printStream.println(correctedWord.getId() + "\t" + mismatch);
                }
                mismatches.add(correctedWord.getId() + "\t" + mismatch);
                combinedMismatches.add(mismatch);
            } else if (lemma.length() == 0) {
                mismatch = correctedWord.getSpelling() + "\t" + lemma + "\t" + posTag + "\tmissing lemma";
                mismatches.add(correctedWord.getId() + "\t" + mismatch);
                combinedMismatches.add(mismatch);
                if (verbose) {
                    printStream.println(correctedWord.getId() + "\t" + mismatch);
                }
            }
            String[] tags = posTags.splitTag(posTag);
            for (int i = 0; i < tags.length; ++i) {
                if (posTags.isTag(tags[i])) continue;
                String badPosTag = correctedWord.getId() + "\t" + correctedWord.getSpelling() + "\t" + lemma + "\t" + posTag + "\tbad part of speech: " + tags[i];
                badPosTags.add(badPosTag);
                combinedBadPosTags.add(tags[i]);
                if (!verbose) continue;
                printStream.println(badPosTag);
            }
        }
        return result;
    }

    protected static void applyCorrections(Document document) throws Exception {
        long startTime = System.currentTimeMillis();
        addedWords = 0;
        deletedWords = 0;
        modifiedWords = 0;
        deletedGaps = 0;
        Iterator<String> iterator = correctedWordsMap.keySet().iterator();
        List<String> correctedWordIDs = ListFactory.createNewList();
        while (iterator.hasNext()) {
            String id = iterator.next();
            correctedWordIDs.add(id);
        }
        for (int i = 0; i < correctedWordIDs.size(); ++i) {
            String idToUpdate;
            int j;
            boolean deleteTheWord;
            String correctedWordID = (String)correctedWordIDs.get(i);
            CorrectedWord correctedWord = correctedWordsMap.get(correctedWordID);
            String correctionType = correctedWord.getCorrectionType();
            boolean bl = deleteTheWord = correctionType.equals("2") || correctionType.equals("3");
            if (correctionType.equals("5")) {
                Element gap = gapIDsToElements.get(correctedWord.getId());
                gapElementsToDelete.add(gap);
                continue;
            }
            List<String> idsToUpdate = MergeAnnolexCorrectionsIntoAdornedXML.getRelatedWordIDs(correctedWordID);
            String[] spellingParts = new String[idsToUpdate.size()];
            String oldJoinedSpelling = "";
            for (j = 0; j < idsToUpdate.size(); ++j) {
                idToUpdate = idsToUpdate.get(j);
                if (!wordIDsToElements.containsKey(idToUpdate)) continue;
                Element wordElement = wordIDsToElements.get(idToUpdate);
                spellingParts[j] = wordElement.getText();
                oldJoinedSpelling = oldJoinedSpelling + spellingParts[j];
            }
            if (idsToUpdate.size() > 1) {
                String fixedNewJoined;
                if (!oldJoinedSpelling.equals(correctedWord.getSpelling()) && !oldJoinedSpelling.equals(fixedNewJoined = StringUtils.stripChars(correctedWord.getSpelling(), " "))) {
                    MergeAnnolexCorrectionsIntoAdornedXML.resplit(correctedWord.getId(), spellingParts, oldJoinedSpelling, correctedWord.getSpelling());
                }
            } else {
                spellingParts[0] = correctedWord.getSpelling();
            }
            for (j = 0; j < idsToUpdate.size(); ++j) {
                idToUpdate = idsToUpdate.get(j);
                if (wordIDsToElements.containsKey(idToUpdate)) {
                    Element wordElement = wordIDsToElements.get(idToUpdate);
                    if (deleteTheWord || spellingParts[j].length() == 0) {
                        wordElementsToDelete.add(wordElement);
                        continue;
                    }
                    if (!MergeAnnolexCorrectionsIntoAdornedXML.updateWord(document, wordElement, correctedWord, correctedWordIDs, i, spellingParts[j])) continue;
                    ++modifiedWords;
                    continue;
                }
                int added = MergeAnnolexCorrectionsIntoAdornedXML.insertWord(idToUpdate, correctedWord, correctedWordIDs, i);
                addedWords += added;
            }
        }
        MergeAnnolexCorrectionsIntoAdornedXML.deleteWordElements(wordElementsToDelete, MergeAnnolexCorrectionsIntoAdornedXML.getSortedWordIDs());
        MergeAnnolexCorrectionsIntoAdornedXML.replaceGapElementsWithWords(gapElementsToDelete);
        long endTime = (System.currentTimeMillis() - startTime + 999L) / 1000L;
        printStream.println("Update completed in " + Formatters.formatLongWithCommas(endTime) + (endTime != 1L ? " seconds." : " second."));
        printStream.println("     " + Formatters.formatIntegerWithCommas(addedWords) + " words added.");
        printStream.println("     " + Formatters.formatIntegerWithCommas(deletedWords) + " words deleted.");
        printStream.println("     " + Formatters.formatIntegerWithCommas(modifiedWords) + " words modified.");
        printStream.println("     " + Formatters.formatIntegerWithCommas(deletedGaps) + " gaps deleted.");
    }

    protected static void resplit(String id, String[] spellingParts, String oldJoinedSpelling, String updatedSpelling) {
        if (updatedSpelling.indexOf(" ") >= 0) {
            String[] tokens = updatedSpelling.split(" ");
            if (tokens.length > spellingParts.length) {
                spellingParts = new String[tokens.length];
            }
            for (int i = 0; i < spellingParts.length; ++i) {
                spellingParts[i] = i < tokens.length ? tokens[i] : "";
            }
        } else if (oldJoinedSpelling.length() == updatedSpelling.length()) {
            int j = 0;
            for (int i = 0; i < spellingParts.length; ++i) {
                spellingParts[i] = updatedSpelling.substring(j, spellingParts[i].length() + j);
                j += spellingParts[i].length();
            }
        } else if (oldJoinedSpelling.toLowerCase().startsWith(updatedSpelling.toLowerCase())) {
            int i;
            String extendedUpdatedSpelling = updatedSpelling + StringUtils.dupl(" ", oldJoinedSpelling.length());
            int j = 0;
            for (i = 0; i < spellingParts.length; ++i) {
                spellingParts[i] = extendedUpdatedSpelling.substring(j, spellingParts[i].length() + j);
                j += spellingParts[i].length();
            }
            for (i = 0; i < spellingParts.length; ++i) {
                spellingParts[i] = spellingParts[i].trim();
            }
        } else if (updatedSpelling.toLowerCase().startsWith(oldJoinedSpelling.toLowerCase())) {
            int i;
            String extendedUpdatedSpelling = updatedSpelling + StringUtils.dupl(" ", oldJoinedSpelling.length());
            int j = 0;
            for (i = 0; i < spellingParts.length; ++i) {
                spellingParts[i] = extendedUpdatedSpelling.substring(j, spellingParts[i].length() + j);
                j += spellingParts[i].length();
            }
            spellingParts[spellingParts.length - 1] = spellingParts[spellingParts.length - 1] + extendedUpdatedSpelling.substring(j);
            for (i = 0; i < spellingParts.length; ++i) {
                spellingParts[i] = spellingParts[i].trim();
            }
        } else if (spellingParts.length == 2 && spellingParts[1].equals("'s") && updatedSpelling.endsWith("'s")) {
            spellingParts[0] = updatedSpelling.substring(0, updatedSpelling.length() - 2);
        } else {
            spellingParts[0] = updatedSpelling;
            for (int i = 1; i < spellingParts.length; ++i) {
                spellingParts[i] = "";
            }
        }
    }

    protected static void compressCElements(Document document) {
        Filter filter = Filters.element();
        Element root = document.getRootElement();
        Element previousElement = null;
        IteratorIterable iterator = root.getDescendants(filter);
        List<Element> cToDelete = ListFactory.createNewList();
        while (iterator.hasNext()) {
            Element e = (Element)iterator.next();
            if (previousElement != null && e.getName().equals("c") && e.getText().equals(" ") && previousElement.getName().equals("c") && previousElement.getText().equals(" ")) {
                cToDelete.add(e);
            }
            previousElement = e;
        }
        for (int i = 0; i < cToDelete.size(); ++i) {
            Element c = (Element)cToDelete.get(i);
            Parent parent = c.getParent();
            if (parent == null) continue;
            parent.removeContent((Content)c);
        }
    }

    protected static void fixSplitWordIDs(Document document) {
        for (String wordID : wordIDsToElements.keySet()) {
            Element word = wordIDsToElements.get(wordID);
            String id = JDOMUtils.getAttributeValue(word, "xml:id", false);
            if (!id.endsWith(".1")) continue;
            List<String> relatedIDs = MergeAnnolexCorrectionsIntoAdornedXML.getRelatedWordIDs(id);
            Collections.sort(relatedIDs);
            if (relatedIDs.size() == 1) {
                id = id.substring(0, id.length() - 2);
                JDOMUtils.setAttributeValue(word, "xml:id", id);
                JDOMUtils.setAttributeValue(word, "part", "N");
                continue;
            }
            for (int i = 0; i < relatedIDs.size(); ++i) {
                Element thisWordElement = wordIDsToElements.get(relatedIDs.get(i));
                String part = "M";
                if (i == 0) {
                    part = "I";
                } else if (i == relatedIDs.size() - 1) {
                    part = "F";
                }
                JDOMUtils.setAttributeValue(thisWordElement, "part", part);
            }
        }
    }

    protected static void fixEOSAttributes(Map<String, Element> wordIDsToElements) {
        List<Element> words = ListFactory.createNewList();
        TreeSet<String> wordIDsSet = new TreeSet<String>();
        wordIDsSet.addAll(wordIDsToElements.keySet());
        Iterator iterator = wordIDsSet.iterator();
        while (iterator.hasNext()) {
            words.add(wordIDsToElements.get(iterator.next()));
        }
        Element previousWord = (Element)words.get(0);
        String previousEos = JDOMUtils.getAttributeValue(previousWord, "eos", false);
        if (previousEos == null) {
            previousEos = "0";
        }
        for (int i = 1; i < words.size(); ++i) {
            Element word = (Element)words.get(i);
            String eos = JDOMUtils.getAttributeValue(word, "eos", false);
            if (eos == null) {
                eos = "0";
            }
            if (eos.equals("1") && previousEos.equals("1")) {
                JDOMUtils.setAttributeValue(previousWord, "eos", "0");
            }
            previousWord = word;
            previousEos = eos;
        }
    }

    protected static void updateWordOrdinals(Map<String, Element> wordIDsToElements) {
        Element wordElement;
        int wordOrdinal = 0;
        for (String wordID : wordIDsToElements.keySet()) {
            wordElement = wordIDsToElements.get(wordID);
            MergeAnnolexCorrectionsIntoAdornedXML.changeAttribute(wordElement, "ord", JDOMUtils.getAttributeValue(wordElement, "ord", false), "");
        }
        for (String wordID : wordIDsToElements.keySet()) {
            wordElement = wordIDsToElements.get(wordID);
            String existingOrd = JDOMUtils.getAttributeValue(wordElement, "ord", false);
            if (existingOrd.length() != 0) continue;
            String wordElementID = JDOMUtils.getAttributeValue(wordElement, "xml:id", false);
            List<String> relatedIDs = MergeAnnolexCorrectionsIntoAdornedXML.getRelatedWordIDs(wordElementID);
            ++wordOrdinal;
            for (int i = 0; i < relatedIDs.size(); ++i) {
                Element thisWordElement = wordIDsToElements.get(relatedIDs.get(i));
                MergeAnnolexCorrectionsIntoAdornedXML.changeAttribute(thisWordElement, "ord", JDOMUtils.getAttributeValue(wordElement, "ord", false), wordOrdinal + "");
            }
        }
    }

    protected static int countSeparators(String s, char sep) {
        int result = 0;
        if (s.length() != 1 || s.charAt(0) != sep) {
            for (int i = 0; i < s.length(); ++i) {
                if (s.charAt(i) != sep) continue;
                ++result;
            }
        }
        return result;
    }

    protected static boolean changeAttribute(Element element, String attrName, String oldValue, String newValue) {
        boolean result = false;
        if (!(newValue == null || oldValue != null && newValue.equals(oldValue))) {
            JDOMUtils.setAttributeValue(element, attrName, newValue);
            result = true;
        }
        return result;
    }

    public static List<String> getRelatedWordIDs(String wordID) {
        List<String> result = ListFactory.createNewList();
        Element wordElement = wordIDsToElements.get(wordID);
        if (wordElement == null) {
            result.add(wordID);
        } else {
            String part = wordElement.getAttributeValue("part");
            if (part == null || part.equals("N")) {
                result.add(wordID);
            } else {
                int lastDotPos = wordID.lastIndexOf(46);
                if (lastDotPos >= 0) {
                    String rootWordID = wordID.substring(0, lastDotPos);
                    for (int i = 1; i < 20; ++i) {
                        String otherWordID = rootWordID + "." + i;
                        if (!wordIDsToElements.containsKey(otherWordID)) continue;
                        result.add(otherWordID);
                    }
                } else {
                    result.add(wordID);
                }
            }
        }
        return result;
    }

    protected static int processFiles(String[] args) throws Exception {
        boolean result = false;
        String[] wildCards = new String[args.length - 2];
        for (int i = 2; i < args.length; ++i) {
            wildCards[i - 2] = args[i];
        }
        String[] fileNames = FileNameUtils.expandFileNameWildcards(wildCards);
        docsToProcess = fileNames.length;
        for (int i = 0; i < fileNames.length; ++i) {
            MergeAnnolexCorrectionsIntoAdornedXML.processOneFile(fileNames[i]);
        }
        return fileNames.length;
    }

    protected static void printMismatches() {
        printStream.println();
        printStream.println();
        MergeAnnolexCorrectionsIntoAdornedXML.printSet("List of bad part of speech tags.", badPosTags);
        printStream.println();
        printStream.println();
        MergeAnnolexCorrectionsIntoAdornedXML.printSet("Combined list of bad part of speech tags ", combinedBadPosTags);
        printStream.println();
        printStream.println();
        MergeAnnolexCorrectionsIntoAdornedXML.printSet("List of individual pos/lemma mismatches", mismatches);
        printStream.println();
        printStream.println();
        MergeAnnolexCorrectionsIntoAdornedXML.printSet("Combined list of pos/lemma mismatches ", combinedMismatches);
    }

    protected static <K, V> void printMap(String mapLabel, Map<K, V> map) {
        if (map == null) {
            printStream.println(mapLabel + " is null.");
        } else if (map.size() == 0) {
            printStream.println(mapLabel + " is empty.");
        } else {
            printStream.println(mapLabel);
            Iterator<K> iterator = map.keySet().iterator();
            int i = 0;
            while (iterator.hasNext()) {
                K key = iterator.next();
                V value = map.get(key);
                if (key == null) {
                    if (value == null) {
                        printStream.println(i + ": null=null");
                    } else {
                        printStream.println(i + ": null=" + value.toString());
                    }
                } else if (value == null) {
                    printStream.println(i + ": " + key.toString() + "=null");
                } else {
                    printStream.println(i + ": " + key.toString() + "=" + value.toString());
                }
                ++i;
            }
        }
    }

    protected static <V> void printSet(String setLabel, Set<V> set) {
        if (set == null) {
            printStream.println(setLabel + " is null.");
        } else if (set.size() == 0) {
            printStream.println(setLabel + " is empty.");
        } else {
            printStream.println(setLabel);
            for (V value : set) {
                printStream.println(value.toString());
            }
        }
    }

    protected static void terminate(int filesProcessed, long processingTime) {
        printStream.println();
        printStream.println("Processed " + Formatters.formatIntegerWithCommas(filesProcessed) + " files in " + Formatters.formatLongWithCommas(processingTime) + " seconds.");
    }

    protected static CorrectedWordsFileReader loadCorrectionsFile(String correctionFileName) throws Exception {
        long startTime = System.currentTimeMillis();
        CorrectedWordsFileReader correctionsFileReader = new CorrectedWordsFileReader(correctionFileName, null);
        long endTime = (System.currentTimeMillis() - startTime + 999L) / 1000L;
        correctedWordsMap = correctionsFileReader.readAllCorrectedWords();
        printStream.println("Corrections file " + correctionFileName + " loaded in " + Formatters.formatLongWithCommas(endTime) + (endTime != 1L ? " seconds" : " second") + " and contains " + Formatters.formatIntegerWithCommas(correctedWordsMap.size()) + (correctedWordsMap.size() != 1 ? " words." : " word."));
        return correctionsFileReader;
    }

    protected static boolean updateWord(Document document, Element wordElement, CorrectedWord correctedWord, List<String> correctedWordIDs, int i, String correctedSpelling) {
        Map<String, String> oldValues = JDOMUtils.getAttributeValues(wordElement);
        String pos = correctedWord.getPartsOfSpeech();
        String spelling = correctedWord.getSpelling();
        String id = correctedWord.getId();
        if (spelling.indexOf(" ") >= 0) {
            if (verbose) {
                printStream.println("     Updating [" + spelling + "] at id=" + id + ": contains blanks");
            }
            if (id.indexOf(".") < 0) {
                if (verbose) {
                    printStream.println("        --- Is not currently a split word, must split it now.");
                }
                MergeAnnolexCorrectionsIntoAdornedXML.insertWord(id, correctedWord, correctedWordIDs, i);
                wordElementsToDelete.add(wordElement);
                return true;
            }
        }
        String correctedJoinedSpelling = StringUtils.stripChars(correctedWord.getSpelling(), " ");
        boolean c1 = MergeAnnolexCorrectionsIntoAdornedXML.changeAttribute(wordElement, "tok", oldValues.get("tok"), correctedJoinedSpelling);
        boolean c2 = MergeAnnolexCorrectionsIntoAdornedXML.changeAttribute(wordElement, "spe", oldValues.get("spe"), correctedJoinedSpelling);
        boolean c3 = MergeAnnolexCorrectionsIntoAdornedXML.changeAttribute(wordElement, "reg", oldValues.get("reg"), correctedWord.getStandardSpelling());
        boolean c4 = MergeAnnolexCorrectionsIntoAdornedXML.changeAttribute(wordElement, "pos", oldValues.get("pos"), correctedWord.getPartsOfSpeech());
        boolean c5 = MergeAnnolexCorrectionsIntoAdornedXML.changeAttribute(wordElement, "lem", oldValues.get("lem"), correctedWord.getLemmata());
        wordElement.setText(correctedSpelling);
        if (verbose) {
            printStream.println("     Updating [" + spelling + "] at id=" + correctedWord.getId());
        }
        return c1 || c2 || c3 || c4 || c5;
    }

    protected static int insertWord(String idToInsert, CorrectedWord correctedWord, List<String> correctedWordIDs, int i) {
        int result = 0;
        String idSibling = correctedWordIDs.get(Math.max(0, i - 1));
        for (int j = i - 1; j >= 0 && idSibling.indexOf("-gap") >= 0; --j) {
            idSibling = correctedWordIDs.get(Math.max(0, j));
        }
        if (wordIDsToElements.containsKey(idSibling)) {
            Element wordElement = wordIDsToElements.get(idSibling);
            Parent parent = wordElement.getParent();
            if (parent == null && (wordElement = wordIDsToElements.get(idToInsert)) != null) {
                parent = wordElement.getParent();
            }
            if (parent != null) {
                Element parentElement = (Element)parent;
                int index = parent.indexOf((Content)wordElement);
                String spelling = correctedWord.getSpelling();
                String[] spellParts = spelling.split(" ");
                spelling = StringUtils.stripChars(spelling, " ");
                int l = spellParts.length;
                if (verbose) {
                    printStream.println("     Adding [" + spelling + "] with " + l + " token parts" + " at id=" + idToInsert);
                }
                for (int k = 0; k < l; ++k) {
                    Element newWordElement = wordElement.clone();
                    newWordElement.setText(spellParts[k]);
                    int kk = k + 1;
                    String theIDValue = idToInsert;
                    if (l > 1) {
                        theIDValue = theIDValue + "." + kk;
                    }
                    JDOMUtils.setAttributeValue(newWordElement, "xml:id", theIDValue);
                    JDOMUtils.setAttributeValue(newWordElement, "tok", spellParts[k]);
                    JDOMUtils.setAttributeValue(newWordElement, "spe", spelling);
                    JDOMUtils.setAttributeValue(newWordElement, "reg", correctedWord.getStandardSpelling());
                    JDOMUtils.setAttributeValue(newWordElement, "pos", correctedWord.getPartsOfSpeech());
                    String eosValue = "0";
                    if (spelling.equals(".") || spelling.equals("?") || spelling.equals("!")) {
                        Element nextWordElement = wordIDsToElements.get(idSibling);
                        String nextEOS = JDOMUtils.getAttributeValue(nextWordElement, "eos", false);
                        if (nextEOS.equals("0")) {
                            eosValue = "1";
                        }
                        if (verbose) {
                            printStream.println("     Adding [" + spelling + "], nextEOS = [" + nextEOS + "], " + " eosValue=[" + eosValue + "] at id=" + idToInsert);
                        }
                    }
                    JDOMUtils.setAttributeValue(newWordElement, "eos", eosValue);
                    JDOMUtils.setAttributeValue(newWordElement, "lem", correctedWord.getLemmata());
                    JDOMUtils.setAttributeValue(newWordElement, "ord", "-1");
                    String partString = "M";
                    partString = l <= 1 ? "N" : (kk == 1 ? "I" : (kk >= spellParts.length ? "F" : "M"));
                    JDOMUtils.setAttributeValue(newWordElement, "part", partString);
                    if (!CharUtils.isPunctuationOrSymbol(spelling)) {
                        Element cElement = clonableCElement.clone();
                        cElement.setText(" ");
                        parentElement.addContent(++index, (Content)cElement);
                    }
                    parentElement.addContent(++index, (Content)newWordElement);
                    wordIDsToElements.put(theIDValue, newWordElement);
                    ++result;
                }
            } else {
                printStream.println("     ***** Adding at id=" + idToInsert + ": sibling id " + idSibling + " parent not found.");
            }
        } else {
            printStream.println("     ***** Adding at id=" + idToInsert + ": sibling id " + idSibling + " not found.");
        }
        return result;
    }

    protected static void deleteWordElements(List<Element> wordElementsToDelete, List<String> sortedWordIDs) {
        for (int i = 0; i < wordElementsToDelete.size(); ++i) {
            MergeAnnolexCorrectionsIntoAdornedXML.deleteWordElement(wordElementsToDelete.get(i), sortedWordIDs);
        }
    }

    protected static void deleteGapElements(List<Element> gapElementsToDelete) {
        for (int i = 0; i < gapElementsToDelete.size(); ++i) {
            MergeAnnolexCorrectionsIntoAdornedXML.deleteGapElement(gapElementsToDelete.get(i));
        }
    }

    protected static void replaceGapElementsWithWords(List<Element> gapElementsToUpdate) {
        for (int i = 0; i < gapElementsToUpdate.size(); ++i) {
            Element gapElement = gapElementsToUpdate.get(i);
            String id = JDOMUtils.getAttributeValue(gapElement, "xml:id", false);
            CorrectedWord correctedWord = correctedWordsMap.get(id);
            if (correctedWord != null) {
                MergeAnnolexCorrectionsIntoAdornedXML.gapToWord(gapElement, correctedWord);
                continue;
            }
            printStream.println("     ***** Replacing gap with id=" + id + ": failed, no matching corrected word found.");
        }
    }

    protected static void gapToWord(Element gapElement, CorrectedWord correctedWord) {
        Element nextElement;
        if (gapElement == null || correctedWord == null) {
            return;
        }
        Element parent = (Element)gapElement.getParent();
        int index = parent.indexOf((Content)gapElement);
        int i = index + 1;
        int n = parent.getContentSize();
        Content nextChild = parent.getContent(i);
        while (!(nextChild instanceof Element) && i < n) {
            nextChild = parent.getContent(i++);
        }
        String nextSpelling = null;
        if (nextChild instanceof Element && (nextElement = (Element)nextChild).getName().equals("w")) {
            nextSpelling = JDOMUtils.getAttributeValue(nextElement, "spe", false);
        }
        String id = JDOMUtils.getAttributeValue(gapElement, "xml:id", false);
        ArrayList<Attribute> attributes = new ArrayList<Attribute>();
        gapElement.setName("w");
        String spelling = correctedWord.getSpelling();
        attributes.add(new Attribute("tok", spelling));
        attributes.add(new Attribute("spe", spelling));
        attributes.add(new Attribute("reg", correctedWord.getStandardSpelling()));
        attributes.add(new Attribute("pos", correctedWord.getPartsOfSpeech()));
        attributes.add(new Attribute("eos", "0"));
        attributes.add(new Attribute("lem", correctedWord.getLemmata()));
        attributes.add(new Attribute("ord", "-1"));
        attributes.add(new Attribute("part", "N"));
        gapElement.setAttributes(attributes);
        JDOMUtils.setAttributeValue(gapElement, "xml:id", correctedWord.getUpdatedId());
        gapElement.setText(spelling);
        Element cElement = clonableCElement.clone();
        cElement.setText(" ");
        if (!CharUtils.isPunctuationOrSymbol(spelling)) {
            parent.addContent(index, (Content)cElement);
        }
        if (nextSpelling != null && !CharUtils.isPunctuationOrSymbol(nextSpelling)) {
            cElement = clonableCElement.clone();
            cElement.setText(" ");
            parent.addContent(index + 2, (Content)cElement);
        }
        ++deletedGaps;
        ++addedWords;
    }

    protected static void deleteGapElement(Element gapElement) {
        if (gapElement == null) {
            return;
        }
        Parent parent = gapElement.getParent();
        String id = JDOMUtils.getAttributeValue(gapElement, "xml:id", false);
        if (parent != null) {
            parent.removeContent((Content)gapElement);
            if (verbose) {
                printStream.println("     Deleting gap at id=" + id);
            }
            ++deletedGaps;
        }
    }

    protected static void deleteWordElement(Element wordElement, List<String> sortedWordIDs) {
        Parent parent;
        if (wordElement == null) {
            return;
        }
        String eos = JDOMUtils.getAttributeValue(wordElement, "eos", true);
        String spelling = JDOMUtils.getAttributeValue(wordElement, "spe", true);
        String id = JDOMUtils.getAttributeValue(wordElement, "xml:id", true);
        if (eos == null) {
            eos = "0";
        }
        if ((parent = wordElement.getParent()) != null) {
            parent.removeContent((Content)wordElement);
            if (verbose) {
                printStream.println("     Deleting [" + spelling + "] at id=" + id);
            }
            ++deletedWords;
        }
        if (eos.equals("1")) {
            String oldEOS;
            Element previousWordElement;
            String idSibling;
            int index = Collections.binarySearch(sortedWordIDs, id);
            if (index > 0 && wordIDsToElements.containsKey(idSibling = sortedWordIDs.get(index - 1)) && MergeAnnolexCorrectionsIntoAdornedXML.changeAttribute(previousWordElement = wordIDsToElements.get(idSibling), "eos", oldEOS = JDOMUtils.getAttributeValue(previousWordElement, "eos", true), eos)) {
                ++modifiedWords;
                if (verbose) {
                    printStream.println("        Turned on EOS for word ID " + idSibling);
                }
            }
            sortedWordIDs.remove(index);
        }
    }

    protected static List<String> getSortedWordIDs() {
        List<String> sortedWordIDs = ListFactory.createNewList();
        sortedWordIDs.addAll(wordIDsToElements.keySet());
        Collections.sort(sortedWordIDs);
        return sortedWordIDs;
    }

    protected static void deleteWordOld(Element wordElement, List<String> correctedWordIDs, int i) {
        if (wordElement != null) {
            String oldEOS;
            Element previousWordElement;
            String idSibling;
            Parent parent;
            String eos = JDOMUtils.getAttributeValue(wordElement, "eos", true);
            String spelling = JDOMUtils.getAttributeValue(wordElement, "spe", true);
            String id = JDOMUtils.getAttributeValue(wordElement, "xml:id", true);
            if (eos == null) {
                eos = "0";
            }
            if ((parent = wordElement.getParent()) != null) {
                parent.removeContent((Content)wordElement);
                if (verbose) {
                    printStream.println("     Deleting [" + spelling + "] at id=" + id);
                }
                ++deletedWords;
            }
            if (eos.equals("1") && wordIDsToElements.containsKey(idSibling = correctedWordIDs.get(Math.max(0, i - 1))) && MergeAnnolexCorrectionsIntoAdornedXML.changeAttribute(previousWordElement = wordIDsToElements.get(idSibling), "eos", oldEOS = JDOMUtils.getAttributeValue(previousWordElement, "eos", true), eos)) {
                ++modifiedWords;
                if (verbose) {
                    printStream.println("        Turned on EOS for word ID " + idSibling);
                }
            }
        }
    }

    static {
        document = null;
        wordIDsToElements = new LinkedHashMap<String, Element>();
        gapIDsToElements = new LinkedHashMap<String, Element>();
        correctedWordsMap = null;
        posTags = null;
        badPosTags = SetFactory.createNewSortedSet();
        combinedBadPosTags = SetFactory.createNewSortedSet();
        mismatches = SetFactory.createNewSortedSet();
        combinedMismatches = SetFactory.createNewSortedSet();
        addedWords = 0;
        deletedWords = 0;
        modifiedWords = 0;
        deletedGaps = 0;
        clonableCElement = null;
        wordElementsToDelete = ListFactory.createNewList();
        gapElementsToDelete = ListFactory.createNewList();
        verbose = true;
        debug = true;
    }
}

