/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.tools.relemmatize;

import edu.northwestern.at.morphadorner.WordAttributeNames;
import edu.northwestern.at.morphadorner.corpuslinguistics.lemmatizer.Lemmatizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.lexicon.Lexicon;
import edu.northwestern.at.morphadorner.corpuslinguistics.namestandardizer.NameStandardizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.partsofspeech.PartOfSpeechTags;
import edu.northwestern.at.morphadorner.corpuslinguistics.spellingmapper.SpellingMapper;
import edu.northwestern.at.morphadorner.corpuslinguistics.spellingstandardizer.SpellingStandardizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.PennTreebankTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.WordTokenizer;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.xml.ExtendedXMLFilterImpl;
import java.util.List;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.AttributesImpl;

public class RelemmatizeFilter
extends ExtendedXMLFilterImpl {
    protected Lexicon wordLexicon;
    protected Lemmatizer lemmatizer;
    protected NameStandardizer nameStandardizer;
    protected SpellingStandardizer standardizer;
    protected SpellingMapper spellingMapper;
    protected PartOfSpeechTags partOfSpeechTags;
    protected WordTokenizer spellingTokenizer;
    protected String lemmaSeparator;
    protected int lemmataChanged = 0;
    protected int standardChanged = 0;
    protected int wordsProcessed = 0;

    public RelemmatizeFilter(XMLReader reader, Lexicon wordLexicon, Lemmatizer lemmatizer, NameStandardizer nameStandardizer, SpellingStandardizer standardizer, SpellingMapper spellingMapper) {
        super(reader);
        this.wordLexicon = wordLexicon;
        this.lemmatizer = lemmatizer;
        this.nameStandardizer = nameStandardizer;
        this.standardizer = standardizer;
        this.spellingMapper = spellingMapper;
        this.lemmaSeparator = lemmatizer.getLemmaSeparator();
        this.partOfSpeechTags = wordLexicon.getPartOfSpeechTags();
        this.spellingTokenizer = new PennTreebankTokenizer();
    }

    @Override
    public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
        if (qName.equals("w")) {
            ++this.wordsProcessed;
            AttributesImpl newAttrs = new AttributesImpl(atts);
            String oldLemma = newAttrs.getValue(WordAttributeNames.lem);
            String partOfSpeech = newAttrs.getValue(WordAttributeNames.pos);
            String oldStandard = newAttrs.getValue(WordAttributeNames.reg);
            String spelling = newAttrs.getValue(WordAttributeNames.spe);
            String standard = this.getStandardizedSpelling(spelling, partOfSpeech);
            String lemma = this.getLemma(spelling, partOfSpeech);
            this.setAttributeValue(newAttrs, WordAttributeNames.reg, standard);
            this.setAttributeValue(newAttrs, WordAttributeNames.lem, lemma);
            if (!oldStandard.equals(standard)) {
                ++this.standardChanged;
            }
            if (!oldLemma.equals(lemma)) {
                ++this.lemmataChanged;
            }
            super.startElement(uri, localName, qName, newAttrs);
        } else if (qName.equals("c")) {
            AttributesImpl newAttrs = new AttributesImpl();
            this.removeAttribute(newAttrs, WordAttributeNames.part);
            super.startElement(uri, localName, qName, newAttrs);
        } else {
            super.startElement(uri, localName, qName, atts);
        }
    }

    public String getLemma(String spelling, String partOfSpeech) {
        String lemmata = this.wordLexicon.getLemma(spelling, partOfSpeech);
        if (lemmata.equals("*")) {
            String lemmaClass = this.partOfSpeechTags.getLemmaWordClass(partOfSpeech);
            if (!this.lemmatizer.cantLemmatize(spelling) && !lemmaClass.equals("none")) {
                List<String> wordList = this.spellingTokenizer.extractWords(spelling);
                if (!this.partOfSpeechTags.isCompoundTag(partOfSpeech) || wordList.size() == 1) {
                    if (lemmaClass.length() == 0) {
                        lemmata = this.lemmatizer.lemmatize(spelling, "compound");
                        if (lemmata.equals(spelling)) {
                            lemmata = this.lemmatizer.lemmatize(spelling);
                        }
                    } else {
                        lemmata = this.lemmatizer.lemmatize(spelling, lemmaClass);
                    }
                } else {
                    lemmata = "";
                    String lemmaPiece = "";
                    String[] posTags = this.partOfSpeechTags.splitTag(partOfSpeech);
                    if (posTags.length == wordList.size()) {
                        for (int i = 0; i < wordList.size(); ++i) {
                            String wordPiece = wordList.get(i);
                            if (i > 0) {
                                lemmata = lemmata + this.lemmaSeparator;
                            }
                            lemmaClass = this.partOfSpeechTags.getLemmaWordClass(posTags[i]);
                            lemmaPiece = this.lemmatizer.lemmatize(wordPiece, lemmaClass);
                            lemmata = lemmata + lemmaPiece;
                        }
                    }
                }
            }
        }
        if (lemmata.equals("*")) {
            lemmata = spelling;
        }
        if (lemmata.indexOf(this.lemmaSeparator) < 0 && !this.partOfSpeechTags.isProperNounTag(partOfSpeech)) {
            lemmata = lemmata.toLowerCase();
        }
        return lemmata;
    }

    protected String getStandardizedSpelling(String correctedSpelling, String partOfSpeech) {
        String spelling = correctedSpelling;
        String result = correctedSpelling;
        if (this.partOfSpeechTags.isProperNounTag(partOfSpeech)) {
            result = this.nameStandardizer.standardizeProperName(spelling);
        } else if (!(this.partOfSpeechTags.isNounTag(partOfSpeech) && CharUtils.hasInternalCaps(spelling) || this.partOfSpeechTags.isForeignWordTag(partOfSpeech) || this.partOfSpeechTags.isNumberTag(partOfSpeech) || !(result = this.standardizer.standardizeSpelling(spelling, this.partOfSpeechTags.getMajorWordClass(partOfSpeech))).equalsIgnoreCase(spelling))) {
            result = spelling;
        }
        return this.spellingMapper.mapSpelling(result);
    }

    public int getLemmataChanged() {
        return this.lemmataChanged;
    }

    public int getStandardChanged() {
        return this.standardChanged;
    }

    public int getWordsProcessed() {
        return this.wordsProcessed;
    }
}

