/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner;

import edu.northwestern.at.morphadorner.AddWordAttributesFilter;
import edu.northwestern.at.morphadorner.MorphAdornerLogger;
import edu.northwestern.at.morphadorner.MorphAdornerSettings;
import edu.northwestern.at.morphadorner.MorphAdornerUtils;
import edu.northwestern.at.morphadorner.MorphAdornerXMLWriter;
import edu.northwestern.at.morphadorner.MorphAdornerXMLWriterFactory;
import edu.northwestern.at.morphadorner.PseudoPageAdderFilter;
import edu.northwestern.at.morphadorner.StripWordAttributesFilter;
import edu.northwestern.at.morphadorner.corpuslinguistics.abbreviations.Abbreviations;
import edu.northwestern.at.morphadorner.corpuslinguistics.adornedword.AdornedWord;
import edu.northwestern.at.morphadorner.corpuslinguistics.inputter.TextInputter;
import edu.northwestern.at.morphadorner.corpuslinguistics.inputter.TextInputterFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.lemmatizer.Lemmatizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.lemmatizer.LemmatizerFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.lexicon.Lexicon;
import edu.northwestern.at.morphadorner.corpuslinguistics.namerecognizer.Names;
import edu.northwestern.at.morphadorner.corpuslinguistics.namestandardizer.NameStandardizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.outputter.AdornedWordOutputter;
import edu.northwestern.at.morphadorner.corpuslinguistics.outputter.AdornedWordOutputterFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.partsofspeech.PartOfSpeechTags;
import edu.northwestern.at.morphadorner.corpuslinguistics.partsofspeech.PartOfSpeechTagsFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.PartOfSpeechRetagger;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.PartOfSpeechRetaggerFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.PartOfSpeechTagger;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.PartOfSpeechTaggerFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.guesser.PartOfSpeechGuesserFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.smoothing.contextual.ContextualSmoother;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.smoothing.contextual.ContextualSmootherFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.smoothing.lexical.LexicalSmoother;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.smoothing.lexical.LexicalSmootherFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.transitionmatrix.TransitionMatrix;
import edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.SentenceSplitter;
import edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.SentenceSplitterFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.spellingmapper.SpellingMapper;
import edu.northwestern.at.morphadorner.corpuslinguistics.spellingstandardizer.SpellingStandardizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.PennTreebankTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.PostTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.PostTokenizerFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.PreTokenizerFactory;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.WordTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.WordTokenizerFactory;
import edu.northwestern.at.morphadorner.tools.ExtendedAdornedWord;
import edu.northwestern.at.morphadorner.tools.ExtendedAdornedWordFilter;
import edu.northwestern.at.morphadorner.tools.FilterAdornedFile;
import edu.northwestern.at.morphadorner.xgtagger.XGMisc;
import edu.northwestern.at.morphadorner.xgtagger.XGParser;
import edu.northwestern.at.utils.FileNameUtils;
import edu.northwestern.at.utils.FileUtils;
import edu.northwestern.at.utils.Formatters;
import edu.northwestern.at.utils.IsCloseableObject;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.MapFactory;
import edu.northwestern.at.utils.SortedArrayList;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.TaggedStrings;
import edu.northwestern.at.utils.TaggedStringsSet;
import edu.northwestern.at.utils.URLUtils;
import edu.northwestern.at.utils.logger.UsesLogger;
import edu.northwestern.at.utils.xml.DOMUtils;
import edu.northwestern.at.utils.xml.ExtendedXMLFilterImpl;
import edu.northwestern.at.utils.xml.TEITagClassifier;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.net.URL;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.XMLReaderFactory;

public class MorphAdorner {
    protected static Map<String, MorphAdorner> storedAdorners = MapFactory.createNewSynchronizedMap();
    public int defaultKWICWidth = 80;
    public String latinWordsFileName = "resources/latinwords.txt";
    public String extraWordsFileName = "resources/extrawords.txt";
    public TaggedStrings extraWords = null;
    public WordTokenizer spellingTokenizer = new PennTreebankTokenizer();
    public PartOfSpeechTags partOfSpeechTags;
    public PartOfSpeechTagger tagger;
    public PartOfSpeechRetagger retagger;
    public Lexicon wordLexicon;
    public PartOfSpeechGuesser partOfSpeechGuesser;
    public Lexicon suffixLexicon;
    public TransitionMatrix transitionMatrix;
    public SpellingStandardizer spellingStandardizer;
    public SpellingMapper spellingMapper;
    public NameStandardizer nameStandardizer;
    public Lemmatizer lemmatizer;
    public Names names = new Names();
    public Abbreviations abbreviations = new Abbreviations();
    public Abbreviations mainAbbreviations = new Abbreviations();
    public Abbreviations sideAbbreviations = new Abbreviations();
    public String tagSeparator = "|";
    public String lemmaSeparator = "|";
    public MorphAdornerLogger morphAdornerLogger = null;
    public MorphAdornerSettings morphAdornerSettings = null;
    public MorphAdornerSettings tokenizationSettings = null;
    public TEITagClassifier tagClassifier = new TEITagClassifier();

    public MorphAdorner() {
    }

    public MorphAdorner(String[] args, String logConfiguration, String logDirectory) {
        this.morphAdornerSettings = new MorphAdornerSettings();
        try {
            this.morphAdornerLogger = new MorphAdornerLogger(logConfiguration, logDirectory, this.morphAdornerSettings);
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        this.morphAdornerSettings.initializeSettings(this.morphAdornerLogger);
        try {
            this.morphAdornerSettings.getSettings(args);
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        this.morphAdornerLogger.println("programBanner");
        this.morphAdornerLogger.println("Initializing_please_wait");
        this.initializeAdornment();
        this.morphAdornerSettings.initialized = true;
    }

    public MorphAdorner(String[] args) {
        this(args, "morphadornerlog.config", "log");
    }

    public static Map<String, MorphAdorner> getStoredAdorners() {
        return storedAdorners;
    }

    public static void setStoredAdorners(Map<String, MorphAdorner> storedAdorners) {
        MorphAdorner.storedAdorners = storedAdorners;
    }

    protected void initializeAdornment() {
        try {
            this.partOfSpeechTags = PartOfSpeechTagsFactory.newPartOfSpeechTags(this.morphAdornerSettings.properties);
            this.tagSeparator = this.partOfSpeechTags.getTagSeparator();
            PostTokenizer postTokenizer = PostTokenizerFactory.newPostTokenizer(this.morphAdornerSettings.properties);
            this.tagger = PartOfSpeechTaggerFactory.newPartOfSpeechTagger(this.morphAdornerSettings.properties);
            this.retagger = PartOfSpeechRetaggerFactory.newPartOfSpeechRetagger(this.morphAdornerSettings.properties);
            this.tagger.setPostTokenizer(postTokenizer);
            this.retagger.setPostTokenizer(postTokenizer);
            ((UsesLogger)((Object)this.tagger)).setLogger(this.morphAdornerLogger.getLogger());
            ((UsesLogger)((Object)this.retagger)).setLogger(this.morphAdornerLogger.getLogger());
            ContextualSmoother cSmoother = ContextualSmootherFactory.newContextualSmoother(this.morphAdornerSettings.properties);
            cSmoother.setPartOfSpeechTagger(this.tagger);
            LexicalSmoother lSmoother = LexicalSmootherFactory.newLexicalSmoother(this.morphAdornerSettings.properties);
            lSmoother.setPartOfSpeechTagger(this.tagger);
            this.tagger.setContextualSmoother(cSmoother);
            this.tagger.setLexicalSmoother(lSmoother);
            ContextualSmoother cSmoother2 = ContextualSmootherFactory.newContextualSmoother(this.morphAdornerSettings.properties);
            cSmoother2.setPartOfSpeechTagger(this.retagger);
            LexicalSmoother lSmoother2 = LexicalSmootherFactory.newLexicalSmoother(this.morphAdornerSettings.properties);
            lSmoother2.setPartOfSpeechTagger(this.retagger);
            this.retagger.setContextualSmoother(cSmoother2);
            this.retagger.setLexicalSmoother(lSmoother2);
            this.tagger.setRetagger(this.retagger);
            this.morphAdornerLogger.println("Using", new Object[]{this.tagger.toString()});
            this.morphAdornerLogger.println("Using", new Object[]{this.retagger.toString()});
            this.wordLexicon = MorphAdornerUtils.loadWordLexicon(this.morphAdornerSettings, this.morphAdornerLogger);
            this.wordLexicon.setPartOfSpeechTags(this.partOfSpeechTags);
            this.partOfSpeechGuesser = PartOfSpeechGuesserFactory.newPartOfSpeechGuesser(this.morphAdornerSettings.properties);
            boolean checkPossessives = this.morphAdornerSettings.getBooleanProperty("partofspeechguesser.check_possessives", false);
            this.partOfSpeechGuesser.setCheckPossessives(checkPossessives);
            this.tagger.setPartOfSpeechGuesser(this.partOfSpeechGuesser);
            this.partOfSpeechGuesser.setWordLexicon(this.wordLexicon);
            ((UsesLogger)((Object)this.partOfSpeechGuesser)).setLogger(this.morphAdornerLogger.getLogger());
            this.suffixLexicon = MorphAdornerUtils.loadSuffixLexicon(this.morphAdornerSettings, this.morphAdornerLogger);
            this.partOfSpeechGuesser.setSuffixLexicon(this.suffixLexicon);
            this.extraWords = MorphAdornerUtils.getExtraWordsList(this.extraWordsFileName, this.partOfSpeechTags.getSingularProperNounTag(), "Loaded_extra_words", this.morphAdornerSettings, this.morphAdornerLogger);
            this.partOfSpeechGuesser.addAuxiliaryWordList(this.extraWords);
            this.partOfSpeechGuesser.addAuxiliaryWordList(new TaggedStringsSet(this.names.getPlaceNames().keySet(), this.partOfSpeechTags.getSingularProperNounTag()));
            this.partOfSpeechGuesser.addAuxiliaryWordList(new TaggedStringsSet(this.names.getFirstNames(), this.partOfSpeechTags.getSingularProperNounTag()));
            this.partOfSpeechGuesser.addAuxiliaryWordList(new TaggedStringsSet(this.names.getSurnames(), this.partOfSpeechTags.getSingularProperNounTag()));
            if (this.morphAdornerSettings.useLatinWordList) {
                this.partOfSpeechGuesser.addAuxiliaryWordList(MorphAdornerUtils.getWordList(this.latinWordsFileName, this.partOfSpeechTags.getForeignWordTag("latin"), "Loaded_latin_words", this.morphAdornerSettings, this.morphAdornerLogger));
            }
            if (this.morphAdornerSettings.abbreviationsURL.length() > 0) {
                this.addAbbreviations(this.abbreviations, URLUtils.getURLFromFileNameOrURL(this.morphAdornerSettings.abbreviationsURL).toString(), "Loaded_abbreviations");
            }
            if (this.morphAdornerSettings.abbreviationsMainTextURL.length() > 0) {
                this.addAbbreviations(this.mainAbbreviations, URLUtils.getURLFromFileNameOrURL(this.morphAdornerSettings.abbreviationsMainTextURL).toString(), "Loaded_abbreviations");
            }
            if (this.morphAdornerSettings.abbreviationsSideTextURL.length() > 0) {
                this.addAbbreviations(this.sideAbbreviations, URLUtils.getURLFromFileNameOrURL(this.morphAdornerSettings.abbreviationsSideTextURL).toString(), "Loaded_abbreviations");
            }
            this.tagger.setLexicon(this.wordLexicon);
            MorphAdornerUtils.loadTaggerRules(this.tagger, this.morphAdornerSettings, this.morphAdornerLogger);
            this.transitionMatrix = MorphAdornerUtils.loadTransitionMatrix(this.tagger, this.morphAdornerSettings, this.morphAdornerLogger);
            this.spellingStandardizer = MorphAdornerUtils.createSpellingStandardizer(this.wordLexicon, this.names, this.morphAdornerSettings, this.morphAdornerLogger);
            this.spellingMapper = MorphAdornerUtils.createSpellingMapper(this.morphAdornerSettings.properties);
            this.nameStandardizer = MorphAdornerUtils.createNameStandardizer(this.wordLexicon, this.morphAdornerSettings, this.morphAdornerLogger);
            if (this.spellingStandardizer != null) {
                this.partOfSpeechGuesser.setSpellingStandardizer(this.spellingStandardizer);
            }
            this.lemmatizer = LemmatizerFactory.newLemmatizer(this.morphAdornerSettings.properties);
            this.lemmaSeparator = this.lemmatizer.getLemmaSeparator();
            this.lemmatizer.setLexicon(this.wordLexicon);
            this.lemmatizer.setDictionary(this.spellingStandardizer.getStandardSpellings());
            ((UsesLogger)((Object)this.lemmatizer)).setLogger(this.morphAdornerLogger.getLogger());
            this.partOfSpeechGuesser.setAbbreviations(this.abbreviations);
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void processInputFiles(boolean xmlTokenizeOnly) {
        long processStartTime = System.currentTimeMillis();
        switch (this.morphAdornerSettings.fileNames.length) {
            case 0: {
                this.morphAdornerLogger.println("No_files_to_process");
                break;
            }
            case 1: {
                this.morphAdornerLogger.println("One_file_to_process");
                break;
            }
            default: {
                this.morphAdornerLogger.println("Number_of_files_to_process", new Object[]{Formatters.formatIntegerWithCommas(this.morphAdornerSettings.fileNames.length)});
            }
        }
        boolean useXMLHandler = this.morphAdornerSettings.getBooleanProperty("adorner.handle_xml", false);
        MorphAdornerUtils.logMemoryUsage(this.morphAdornerLogger, "Before processing input texts: ");
        for (int i = 0; i < this.morphAdornerSettings.fileNames.length; ++i) {
            String inputFileName = this.morphAdornerSettings.fileNames[i];
            this.morphAdornerLogger.println("Processing_file", new Object[]{inputFileName});
            try {
                if (useXMLHandler) {
                    if (MorphAdornerUtils.isAdorned(inputFileName, 500)) {
                        this.readorn(inputFileName);
                        continue;
                    }
                    this.adornXML(inputFileName, xmlTokenizeOnly);
                    continue;
                }
                this.adornFile(inputFileName);
                continue;
            }
            catch (Exception e) {
                e.printStackTrace();
            }
        }
        if (this.morphAdornerSettings.fileNames.length > 0) {
            this.morphAdornerLogger.println("All_files_adorned", new Object[]{MorphAdornerUtils.durationString(this.morphAdornerSettings, processStartTime)});
        }
    }

    public void processInputFiles() {
        this.processInputFiles(false);
    }

    public void adornXML(String inputFileName, boolean tokenizeOnly) throws Exception {
        if (!this.morphAdornerSettings.adornExistingXMLFiles && this.doesOutputFileNameExist(inputFileName)) {
            this.morphAdornerLogger.println("Skipping_file_which_is_already_adorned", new Object[]{inputFileName});
            return;
        }
        TextInputter inputter = TextInputterFactory.newTextInputter(this.morphAdornerSettings.properties);
        inputter.enableGapFixer(this.morphAdornerSettings.fixGapTags);
        inputter.enableOrigFixer(this.morphAdornerSettings.fixOrigTags);
        inputter.enableSplitWordsFixer(this.morphAdornerSettings.fixSplitWords, this.morphAdornerSettings.fixSplitWordsPatternReplacers);
        URL inputFileURL = URLUtils.getURLFromFileNameOrURL(inputFileName);
        inputter.loadText(inputFileURL, "utf-8", this.morphAdornerSettings.xmlSchema);
        int nSegments = inputter.getSegmentCount();
        String sSegments = Formatters.formatIntegerWithCommas(nSegments);
        this.morphAdornerLogger.println("Input_file_split", new Object[]{inputFileName, sSegments});
        int runningWordID = 0;
        Map<Integer, Integer> splitWords = MapFactory.createNewMap();
        int totalWords = 0;
        int totalPageBreaks = 0;
        for (int j = 0; j < nSegments; ++j) {
            String segmentName = inputter.getSegmentName(j);
            if (!segmentName.startsWith("text") || segmentName.equals("text")) continue;
            this.morphAdornerLogger.println("Processing_segment", new Object[]{segmentName, Formatters.formatIntegerWithCommas(j + 1), sSegments});
            String segmentText = inputter.getSegmentText(segmentName);
            Document document = XGParser.textToDOM(this.morphAdornerSettings.xgOptions, segmentText);
            MorphAdornerUtils.fixEmptySoftTags(this.morphAdornerSettings.xgOptions, document);
            MorphAdornerUtils.fixSupTags(document);
            totalPageBreaks += MorphAdornerUtils.countPageBreaks(document);
            Object[] o = XGParser.extractText(this.morphAdornerSettings.xgOptions, document);
            XGParser xgParser = (XGParser)o[1];
            xgParser.setRunningWordID(runningWordID);
            AdornedWordOutputter outputter = this.adornText((String)o[0], null);
            this.morphAdornerLogger.println("Inserting_adornments_into_xml");
            long startTime = System.currentTimeMillis();
            Map<Integer, Integer> segmentSplitWords = XGParser.mergeAdornments(this.morphAdornerSettings.xgOptions, (XGParser)o[1], document, segmentName, outputter, inputter);
            this.fixSideWords(document, this.sideAbbreviations);
            File file = File.createTempFile("mad", null);
            String fileName = file.getAbsolutePath();
            if (XGMisc.printNodeToFile(document, fileName) == 1) {
                inputter.setSegmentText(segmentName, file);
                if (!inputter.usesSegmentFiles()) {
                    file.delete();
                }
            }
            for (int wid : segmentSplitWords.keySet()) {
                if (segmentSplitWords.get(wid) <= 1) continue;
                splitWords.put(wid, segmentSplitWords.get(wid));
            }
            this.morphAdornerLogger.println("Inserted_adornments_into_xml", new Object[]{MorphAdornerUtils.durationString(this.morphAdornerSettings, startTime)});
            runningWordID = xgParser.getRunningWordID();
            totalWords += xgParser.getNumberOfAdornedWords();
            if (!inputter.usesSegmentFiles()) {
                FileUtils.deleteFile(outputter.getOutputFileName());
            }
            xgParser = null;
            document = null;
            outputter = null;
            o[0] = null;
            o[1] = null;
            o = null;
        }
        String outputFileName = this.getOutputFileName(inputFileName);
        long startTime = System.currentTimeMillis();
        this.morphAdornerLogger.println("Merging_adorned");
        File file = File.createTempFile("mad", null);
        String tempFileName = file.getAbsolutePath();
        MorphAdorner.mergeXML(inputter, tempFileName);
        this.morphAdornerLogger.println("Writing_merged", new Object[]{outputFileName});
        MorphAdornerXMLWriter xmlWriter = MorphAdornerXMLWriterFactory.newMorphAdornerXMLWriter(this.morphAdornerSettings.properties);
        xmlWriter.writeXML(tempFileName, outputFileName, runningWordID, this.partOfSpeechTags, splitWords, totalWords, totalPageBreaks, this, tokenizeOnly);
        FileUtils.deleteFile(tempFileName);
        this.morphAdornerLogger.println("Adorned_XML_written", new Object[]{outputFileName, MorphAdornerUtils.durationString(this.morphAdornerSettings, startTime)});
        ((IsCloseableObject)((Object)inputter)).close();
        inputter = null;
        splitWords = null;
        xmlWriter = null;
        MorphAdornerUtils.logMemoryUsage(this.morphAdornerLogger, "After completing " + inputFileName + ": ");
    }

    protected void printWords(Document document) {
        NodeList nl = DOMUtils.getNodesByTagName(document, new String[]{"w", "pc"});
        if (nl == null) {
            System.out.println("printWords: null node list found");
            return;
        }
        int numWords = nl.getLength();
        for (int i = 0; i < numWords; ++i) {
            Node idNode;
            Node w = nl.item(i);
            NamedNodeMap nodeMap = w.getAttributes();
            String id = "";
            if (nodeMap != null && (idNode = nodeMap.getNamedItem("xml:id")) != null) {
                id = idNode.getTextContent();
            }
            System.out.println(w.getNodeName() + " " + id + " " + w.getTextContent() + " " + this.inSideText(w));
        }
    }

    protected void fixSideWords(Document document, Abbreviations sideAbbreviations) {
        NodeList nl = DOMUtils.getNodesByTagName(document, new String[]{"w", "pc"});
        if (nl == null) {
            System.out.println("fixSideWords: null node list found");
            return;
        }
        for (int i = nl.getLength() - 1; i >= 0; --i) {
            Element prevW;
            String mergedWord;
            String wText;
            Element w = (Element)nl.item(i);
            if (!this.inSideText(w) || !(wText = w.getTextContent()).equals(".") || !sideAbbreviations.isKnownAbbreviation(mergedWord = (prevW = (Element)nl.item(i - 1)).getTextContent() + wText)) continue;
            String id1 = prevW.getAttribute("xml:id");
            String id2 = w.getAttribute("xml:id");
            String eos = w.getAttribute("eos");
            if (eos == null) {
                eos = "0";
            }
            prevW.setTextContent(mergedWord);
            if (eos.equals("1")) {
                w.setAttribute("eos", "1");
            }
            w.getParentNode().removeChild(w);
        }
    }

    protected boolean inSideText(Node element) {
        boolean result = false;
        String name = element.getNodeName();
        if (this.tagClassifier.isSideTextTag(name)) {
            result = true;
        } else {
            Node parent = element.getParentNode();
            while (!result && parent != null && parent.getNodeType() != 3) {
                if (result = result || this.tagClassifier.isSideTextTag(parent.getNodeName())) continue;
                parent = parent.getParentNode();
            }
        }
        return result;
    }

    public String getOutputFileName(String inputFileName) throws IOException {
        String result = FileNameUtils.stripPathName(inputFileName);
        if (!FileUtils.createPathForFile(result = new File(this.morphAdornerSettings.outputDirectoryName, result).getPath())) {
            throw new IOException(this.morphAdornerSettings.getString("Unable_to_create_output_directory"));
        }
        result = FileNameUtils.createVersionedFileName(result);
        return result;
    }

    public boolean doesOutputFileNameExist(String inputFileName) {
        String outputFileName = FileNameUtils.stripPathName(inputFileName);
        return new File(this.morphAdornerSettings.outputDirectoryName, outputFileName).exists();
    }

    public AdornedWordOutputter adornFile(String fileName) throws IOException {
        this.morphAdornerLogger.println("Tagging", new Object[]{fileName});
        URL fileURL = URLUtils.getURLFromFileNameOrURL(fileName);
        if (fileURL == null) {
            this.morphAdornerLogger.println("Bad_file_name_or_URL", new Object[]{fileName});
            return null;
        }
        String fileText = "";
        long startTime = System.currentTimeMillis();
        try {
            TextInputter inputter = TextInputterFactory.newTextInputter(this.morphAdornerSettings.properties);
            inputter.enableGapFixer(this.morphAdornerSettings.fixGapTags);
            inputter.enableOrigFixer(this.morphAdornerSettings.fixOrigTags);
            inputter.loadText(fileURL, "utf-8", this.morphAdornerSettings.xmlSchema);
            fileText = inputter.getSegmentText(0);
            ((IsCloseableObject)((Object)inputter)).close();
        }
        catch (Exception e) {
            this.morphAdornerLogger.println("Unable_to_read_text", new Object[]{fileName});
            return null;
        }
        this.morphAdornerLogger.println("Loaded_text", new Object[]{fileName, MorphAdornerUtils.durationString(this.morphAdornerSettings, startTime)});
        return this.adornText(fileText, fileURL);
    }

    public AdornedWordOutputter adornText(String textToAdorn, URL outputURL) throws IOException {
        long startTime = System.currentTimeMillis();
        SentenceSplitter sentenceSplitter = SentenceSplitterFactory.newSentenceSplitter(this.morphAdornerSettings.properties);
        ((UsesLogger)((Object)sentenceSplitter)).setLogger(this.morphAdornerLogger.getLogger());
        sentenceSplitter.setPartOfSpeechGuesser(this.partOfSpeechGuesser);
        sentenceSplitter.setAbbreviations(this.abbreviations);
        WordTokenizer wordTokenizer = WordTokenizerFactory.newWordTokenizer(this.morphAdornerSettings.properties);
        wordTokenizer.setPreTokenizer(PreTokenizerFactory.newPreTokenizer(this.morphAdornerSettings.properties));
        wordTokenizer.setAbbreviations(this.abbreviations);
        List<List<String>> sentences = sentenceSplitter.extractSentences(textToAdorn, wordTokenizer);
        int[] wordAndSentenceCounts = MorphAdornerUtils.getWordAndSentenceCounts(sentences);
        int wordsToTag = wordAndSentenceCounts[1];
        this.morphAdornerLogger.println("Extracted_words", new Object[]{Formatters.formatIntegerWithCommas(wordsToTag), Formatters.formatIntegerWithCommas(wordAndSentenceCounts[0]), MorphAdornerUtils.durationString(this.morphAdornerSettings, startTime)});
        if (this.partOfSpeechGuesser != null) {
            this.partOfSpeechGuesser.setTryStandardSpellings(this.morphAdornerSettings.tryStandardSpellings);
        }
        boolean doOutputLemma = this.morphAdornerSettings.outputLemma && this.lemmatizer != null;
        boolean doOutputStandardSpelling = this.morphAdornerSettings.outputStandardSpelling && this.spellingStandardizer != null;
        boolean doOutputOriginalToken = this.morphAdornerSettings.outputOriginalToken || this.morphAdornerSettings.useXMLHandler;
        this.morphAdornerSettings.setXMLWordAttributes(doOutputOriginalToken, doOutputLemma, doOutputStandardSpelling);
        startTime = System.currentTimeMillis();
        List<List<AdornedWord>> result = this.tagger.tagSentences(sentences);
        double elapsed = System.currentTimeMillis() - startTime;
        int taggingRate = (int)((double)wordsToTag / elapsed * 1000.0);
        this.morphAdornerLogger.println("Tagging_complete", new Object[]{MorphAdornerUtils.durationString(this.morphAdornerSettings, startTime), Formatters.formatIntegerWithCommas(taggingRate)});
        this.morphAdornerLogger.println("Generating_other_adornments");
        startTime = System.currentTimeMillis();
        AdornedWordOutputter outputter = AdornedWordOutputterFactory.newAdornedWordOutputter(this.morphAdornerSettings.properties);
        outputter.setWordAttributeNames(this.morphAdornerSettings.getXMLWordAttributes());
        if (outputURL != null) {
            outputter.createOutputFile(this.getOutputFileName(URLUtils.getFileNameFromURL(outputURL, this.morphAdornerSettings.outputDirectoryName)), "utf-8", '\t');
        } else {
            File file = File.createTempFile("mad", null);
            String tempFileName = file.getAbsolutePath();
            outputter.createOutputFile(tempFileName, "utf-8", '\t');
        }
        int sentenceNumber = 0;
        int wordNumber = 0;
        String lemma = "";
        String correctedSpelling = "";
        String standardizedSpelling = "";
        String sSentenceNumber = "";
        String sWordNumber = "";
        String eosFlag = "";
        String originalToken = "";
        String partOfSpeechTag = "";
        String xmlSurroundMarker = this.morphAdornerSettings.xgOptions.getSurroundMarker().trim();
        String undeterminedPosTag = this.partOfSpeechTags.getUndeterminedTag();
        List<String> outputAdornments = ListFactory.createNewList();
        for (List<AdornedWord> sentenceFromTagger : result) {
            sSentenceNumber = ++sentenceNumber + "";
            int sentenceSizeM1 = sentenceFromTagger.size() - 1;
            if (!this.morphAdornerSettings.outputRunningWordNumbers) {
                wordNumber = 0;
            }
            for (int j = 0; j < sentenceFromTagger.size(); ++j) {
                outputAdornments.clear();
                if (this.morphAdornerSettings.outputSentenceNumber) {
                    outputAdornments.add(sSentenceNumber);
                }
                ++wordNumber;
                if (this.morphAdornerSettings.outputWordNumber) {
                    sWordNumber = wordNumber + "";
                    outputAdornments.add(sWordNumber);
                }
                AdornedWord adornedWord = sentenceFromTagger.get(j);
                originalToken = adornedWord.getToken();
                if (doOutputOriginalToken) {
                    outputAdornments.add(originalToken);
                }
                correctedSpelling = adornedWord.getSpelling();
                standardizedSpelling = adornedWord.getStandardSpelling();
                if (this.morphAdornerSettings.outputSpelling) {
                    outputAdornments.add(correctedSpelling);
                }
                partOfSpeechTag = adornedWord.getPartsOfSpeech();
                if (doOutputStandardSpelling) {
                    standardizedSpelling = MorphAdornerUtils.getStandardizedSpelling(this, correctedSpelling, standardizedSpelling, partOfSpeechTag);
                    if (this.spellingMapper != null) {
                        standardizedSpelling = this.spellingMapper.mapSpelling(standardizedSpelling);
                    }
                }
                if (doOutputLemma) {
                    lemma = !this.morphAdornerSettings.ignoreLexiconEntriesForLemmatization ? this.wordLexicon.getLemma(correctedSpelling, partOfSpeechTag) : "*";
                    if (this.lemmatizer != null && (lemma.equals("*") || this.partOfSpeechTags.countTags(partOfSpeechTag) != this.lemmatizer.countLemmata(lemma))) {
                        lemma = standardizedSpelling.length() > 0 ? MorphAdornerUtils.getLemma(this, standardizedSpelling, partOfSpeechTag) : MorphAdornerUtils.getLemma(this, correctedSpelling, partOfSpeechTag);
                    }
                    if (lemma.indexOf(this.lemmaSeparator) < 0 && !this.partOfSpeechTags.isProperNounTag(partOfSpeechTag)) {
                        lemma = lemma.toLowerCase();
                    }
                }
                if (this.lemmatizer != null) {
                    if (this.partOfSpeechTags.countTags(partOfSpeechTag) != this.lemmatizer.countLemmata(lemma)) {
                        partOfSpeechTag = undeterminedPosTag;
                    }
                    if (partOfSpeechTag.equals(undeterminedPosTag) || lemma.length() == 0) {
                        lemma = correctedSpelling.toLowerCase();
                        standardizedSpelling = correctedSpelling;
                        partOfSpeechTag = undeterminedPosTag;
                    }
                }
                if (this.morphAdornerSettings.outputPartOfSpeech) {
                    outputAdornments.add(partOfSpeechTag);
                }
                if (doOutputStandardSpelling) {
                    outputAdornments.add(standardizedSpelling);
                }
                if (doOutputLemma) {
                    outputAdornments.add(lemma);
                }
                if (this.morphAdornerSettings.outputEOSFlag || this.morphAdornerSettings.useXMLHandler) {
                    if (this.morphAdornerSettings.useXMLHandler) {
                        eosFlag = "0";
                        if (j < sentenceSizeM1) {
                            AdornedWord nextAdornedWord = sentenceFromTagger.get(j + 1);
                            if (nextAdornedWord.getToken().equals(xmlSurroundMarker) && (originalToken.endsWith(".") || originalToken.endsWith("!") || originalToken.endsWith("?") || originalToken.endsWith("'") || originalToken.endsWith("\"") || originalToken.endsWith("\u2019") || originalToken.endsWith("\u201d") || originalToken.endsWith("}") || originalToken.endsWith("]") || originalToken.endsWith(")"))) {
                                eosFlag = "1";
                            }
                        } else {
                            eosFlag = "1";
                        }
                    } else {
                        eosFlag = j >= sentenceSizeM1 ? "1" : "0";
                    }
                    outputAdornments.add(eosFlag);
                }
                if (this.morphAdornerSettings.outputKWIC) {
                    String[] kwics = MorphAdornerUtils.getKWIC(sentenceFromTagger, j, this.morphAdornerSettings.outputKWICWidth);
                    outputAdornments.add(kwics[0]);
                    outputAdornments.add(kwics[2]);
                }
                outputter.outputWordAndAdornments(outputAdornments);
            }
        }
        outputter.close();
        if (outputURL != null) {
            this.morphAdornerLogger.println("Adornments_written_to", new Object[]{this.getOutputFileName(URLUtils.getFileNameFromURL(outputURL, this.morphAdornerSettings.outputDirectoryName)), MorphAdornerUtils.durationString(this.morphAdornerSettings, startTime)});
        } else {
            this.morphAdornerLogger.println("Adornments_generated", new Object[]{MorphAdornerUtils.durationString(this.morphAdornerSettings, startTime)});
        }
        sentences.clear();
        result.clear();
        sentences = null;
        result = null;
        return outputter;
    }

    public void readorn(String inputFileName) throws SAXException, IOException, FileNotFoundException {
        AddWordAttributesFilter addFilter;
        this.morphAdornerLogger.println("Loading_previously_adorned");
        long startTime = System.currentTimeMillis();
        StripWordAttributesFilter stripFilter = new StripWordAttributesFilter(XMLReaderFactory.createXMLReader());
        ExtendedAdornedWordFilter wordInfoFilter = new ExtendedAdornedWordFilter(stripFilter);
        File file = File.createTempFile("mad", null);
        String tempFileName = file.getAbsolutePath();
        new FilterAdornedFile(inputFileName, tempFileName, wordInfoFilter);
        List<List<ExtendedAdornedWord>> sentences = wordInfoFilter.getSentences();
        this.morphAdornerLogger.println("Loaded_existing_words", new Object[]{Formatters.formatIntegerWithCommas(wordInfoFilter.getNumberOfWords()), Formatters.formatIntegerWithCommas(sentences.size()), MorphAdornerUtils.durationString(this.morphAdornerSettings, startTime)});
        boolean savedRetaggerAddOrDelete = this.retagger.getCanAddOrDeleteWords();
        if (this.retagger != null) {
            this.retagger.setCanAddOrDeleteWords(false);
        }
        startTime = System.currentTimeMillis();
        this.tagger.tagAdornedWordSentences(sentences, stripFilter.getRegIDSet());
        if (this.retagger != null) {
            this.retagger.setCanAddOrDeleteWords(savedRetaggerAddOrDelete);
        }
        double elapsed = System.currentTimeMillis() - startTime;
        int taggingRate = (int)((double)wordInfoFilter.getNumberOfWords() / elapsed * 1000.0);
        this.morphAdornerLogger.println("Tagging_complete", new Object[]{MorphAdornerUtils.durationString(this.morphAdornerSettings, startTime), Formatters.formatIntegerWithCommas(taggingRate)});
        this.morphAdornerLogger.println("Generating_other_adornments");
        startTime = System.currentTimeMillis();
        this.updateAdornedSentences(sentences, stripFilter.getRegIDSet());
        this.updateSplitWordAdornments(wordInfoFilter);
        this.morphAdornerLogger.println("Adornments_generated", new Object[]{MorphAdornerUtils.durationString(this.morphAdornerSettings, startTime)});
        ExtendedXMLFilterImpl filter = addFilter = new AddWordAttributesFilter(XMLReaderFactory.createXMLReader(), wordInfoFilter, this.morphAdornerSettings);
        if (this.morphAdornerSettings.outputPseudoPageBoundaryMilestones) {
            PseudoPageAdderFilter pseudoPageFilter = new PseudoPageAdderFilter(addFilter, this.morphAdornerSettings.pseudoPageSize, this.morphAdornerSettings.pseudoPageContainerDivTypes);
            filter = pseudoPageFilter;
        }
        String outputFileName = this.getOutputFileName(inputFileName);
        this.morphAdornerLogger.println("Writing_merged", new Object[]{outputFileName});
        startTime = System.currentTimeMillis();
        new FilterAdornedFile(tempFileName, outputFileName, filter);
        this.morphAdornerLogger.println("Adorned_XML_written", new Object[]{outputFileName, MorphAdornerUtils.durationString(this.morphAdornerSettings, startTime)});
        FileUtils.deleteFile(tempFileName);
    }

    public void updateAdornedSentences(List<List<ExtendedAdornedWord>> sentences, Set<String> regIDSet) {
        for (int i = 0; i < sentences.size(); ++i) {
            this.updateAdornedSentence(sentences.get(i), regIDSet);
        }
    }

    protected void updateSplitWordAdornments(ExtendedAdornedWordFilter wordFilter) {
        List<String> wordIDs = wordFilter.getAdornedWordIDs();
        for (int i = 0; i < wordIDs.size(); ++i) {
            ExtendedAdornedWord adornedWord = wordFilter.getExtendedAdornedWord(wordIDs.get(i));
            if (!adornedWord.isSplitWord() || !adornedWord.isFirstPart()) continue;
            String id = adornedWord.getID();
            List<String> relatedIDs = wordFilter.getRelatedSplitWordIDs(id);
            for (int j = 0; j < relatedIDs.size(); ++j) {
                ExtendedAdornedWord relatedWord = wordFilter.getExtendedAdornedWord(relatedIDs.get(j));
                relatedWord.setPartsOfSpeech(adornedWord.getPartsOfSpeech());
                relatedWord.setLemmata(adornedWord.getLemmata());
                relatedWord.setSpelling(adornedWord.getSpelling());
                relatedWord.setStandardSpelling(adornedWord.getStandardSpelling());
            }
        }
    }

    public void updateAdornedSentence(List<ExtendedAdornedWord> sentence, Set<String> regIDSet) {
        String lemma = "";
        String correctedSpelling = "";
        String standardizedSpelling = "";
        String originalToken = "";
        String partOfSpeechTag = "";
        String id = "";
        for (int j = 0; j < sentence.size(); ++j) {
            ExtendedAdornedWord adornedWord = sentence.get(j);
            id = adornedWord.getID();
            originalToken = adornedWord.getToken();
            correctedSpelling = adornedWord.getSpelling();
            standardizedSpelling = adornedWord.getStandardSpelling();
            partOfSpeechTag = adornedWord.getPartsOfSpeech();
            standardizedSpelling = MorphAdornerUtils.getStandardizedSpelling(this, regIDSet.contains(id) ? standardizedSpelling : correctedSpelling, standardizedSpelling, partOfSpeechTag);
            if (this.spellingMapper != null) {
                standardizedSpelling = this.spellingMapper.mapSpelling(standardizedSpelling);
            }
            adornedWord.setStandardSpelling(standardizedSpelling);
            lemma = !this.morphAdornerSettings.ignoreLexiconEntriesForLemmatization ? this.wordLexicon.getLemma(correctedSpelling, partOfSpeechTag) : "*";
            if (lemma.equals("*") && this.lemmatizer != null) {
                lemma = standardizedSpelling.length() > 0 ? MorphAdornerUtils.getLemma(this, standardizedSpelling, partOfSpeechTag) : MorphAdornerUtils.getLemma(this, correctedSpelling, partOfSpeechTag);
            }
            if (lemma.indexOf(this.lemmaSeparator) < 0 && !this.partOfSpeechTags.isProperNounTag(partOfSpeechTag)) {
                lemma = lemma.toLowerCase();
            }
            adornedWord.setLemmata(lemma);
        }
    }

    public void addAbbreviations(Abbreviations abbreviations, String abbreviationsURL, String loadedMessage) {
        long startTime = System.currentTimeMillis();
        int currentCount = abbreviations.getAbbreviationsCount();
        abbreviations.loadAbbreviations(abbreviationsURL);
        int added = abbreviations.getAbbreviationsCount() - currentCount;
        this.morphAdornerLogger.println(loadedMessage, new Object[]{Formatters.formatIntegerWithCommas(added), MorphAdornerUtils.durationString(this.morphAdornerSettings, startTime)});
    }

    protected static void mergeXML(TextInputter inputter, String xmlFileName) {
        try {
            String entryText;
            String entryName;
            FileOutputStream outputStream = new FileOutputStream(new File(xmlFileName), false);
            BufferedOutputStream bufferedStream = new BufferedOutputStream(outputStream);
            OutputStreamWriter writer = new OutputStreamWriter((OutputStream)bufferedStream, "utf-8");
            SortedArrayList<String> entryNames = new SortedArrayList<String>();
            int nEntries = inputter.getSegmentCount();
            for (int i = 0; i < nEntries; ++i) {
                entryNames.add(inputter.getSegmentName(i));
            }
            String endText = "";
            for (int i = 0; i < entryNames.size(); ++i) {
                entryName = ((String)entryNames.get(i)).toString();
                entryText = inputter.getSegmentText(entryName);
                if (entryName.equals("head")) {
                    int iPos = StringUtils.indexOfIgnoreCase(entryText, "</eebo");
                    if (iPos < 0) {
                        iPos = entryText.indexOf("</TEI");
                    }
                    if (iPos < 0) {
                        iPos = entryText.indexOf("</tei.");
                    }
                    if (iPos >= 0) {
                        endText = entryText.substring(iPos);
                        entryText = entryText.substring(0, iPos);
                    }
                } else if (entryName.equals("text")) {
                    entryText = entryText.trim();
                    endText = (entryText = StringUtils.replaceAll(entryText, "/>", ">")).startsWith("<group") ? "</group>" + endText : (entryText.startsWith("<GROUP") ? "</GROUP>" + endText : (entryText.startsWith("<text") ? "</text>" + endText : "</TEXT>" + endText));
                    if (entryText.endsWith("</text>") || entryText.endsWith("</TEXT>")) {
                        entryText = entryText.substring(0, entryText.length() - 7);
                    }
                }
                while (entryText.endsWith(" >")) {
                    entryText = entryText.substring(0, entryText.length() - 2) + ">";
                }
                writer.write(entryText, 0, entryText.length());
            }
            endText = StringUtils.replaceAll(endText, " >", ">");
            writer.write(endText, 0, endText.length());
            writer.close();
            bufferedStream.close();
            outputStream.close();
            writer = null;
            bufferedStream = null;
            outputStream = null;
            entryNames = null;
            endText = null;
            entryName = null;
            entryText = null;
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        System.gc();
    }

    public static void main(String[] args) {
        MorphAdorner adorner = new MorphAdorner(args);
        if (adorner.morphAdornerSettings.fileNames.length > 0) {
            adorner.processInputFiles(adorner.morphAdornerSettings.tokenizeOnly);
        } else {
            adorner.morphAdornerLogger.println("No_files_found_to_process");
        }
        adorner.morphAdornerLogger.terminate();
    }

    public static MorphAdorner createAdorner(String adornerName, boolean replaceAdorner, String[] adornerArgs, String adornerLogConfig, String adornerLogDirectory) {
        MorphAdorner adorner = storedAdorners.get(adornerName);
        if (replaceAdorner || adorner == null) {
            adorner = new MorphAdorner(adornerArgs, adornerLogConfig, adornerLogDirectory);
            storedAdorners.put(adornerName, adorner);
        } else if (!adorner.morphAdornerLogger.getLogger().isLoggerEnabled()) {
            adorner.morphAdornerLogger.setLogger(adorner.morphAdornerLogger.createWrappedLogger(adornerLogConfig, adornerLogDirectory));
        }
        return adorner;
    }

    public static MorphAdorner runAdorner(MorphAdorner adorner, String outputDirectory, String[] filesToAdorn, boolean tokenizeOnly) {
        if (adorner == null) {
            return null;
        }
        adorner.morphAdornerSettings.outputDirectoryName = outputDirectory;
        adorner.morphAdornerSettings.fileNames = FileNameUtils.expandFileNameWildcards(filesToAdorn);
        if (adorner.morphAdornerSettings.fileNames.length > 0) {
            adorner.processInputFiles(tokenizeOnly);
        } else {
            adorner.morphAdornerLogger.println("No_files_found_to_process");
        }
        return adorner;
    }

    public static MorphAdorner runAdorner(String adornerName, String outputDirectory, String[] filesToAdorn, boolean tokenizeOnly) {
        return MorphAdorner.runAdorner(storedAdorners.get(adornerName), outputDirectory, filesToAdorn, tokenizeOnly);
    }

    public static MorphAdorner createAndRunAdorner(String adornerName, boolean replaceAdorner, String[] adornerArgs, String adornerLogConfig, String adornerLogDirectory, String outputDirectory, String[] filesToAdorn, boolean tokenizeOnly) {
        MorphAdorner adorner = MorphAdorner.createAdorner(adornerName, replaceAdorner, adornerArgs, adornerLogConfig, adornerLogDirectory);
        return MorphAdorner.runAdorner(storedAdorners.get(adornerName), outputDirectory, filesToAdorn, tokenizeOnly);
    }

    public void finalize() throws Throwable {
        try {
            this.morphAdornerLogger.terminate();
        }
        catch (Exception exception) {
            // empty catch block
        }
        super.finalize();
    }
}

