/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.gate;

import edu.northwestern.at.morphadorner.corpuslinguistics.adornedword.AdornedWord;
import edu.northwestern.at.morphadorner.corpuslinguistics.lemmatizer.DefaultLemmatizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.lemmatizer.Lemmatizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.partsofspeech.PartOfSpeechTags;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.DefaultPartOfSpeechTagger;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.PartOfSpeechTagger;
import edu.northwestern.at.morphadorner.corpuslinguistics.spellingstandardizer.ExtendedSimpleSpellingStandardizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.spellingstandardizer.SpellingStandardizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.PennTreebankTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.WordTokenizer;
import edu.northwestern.at.morphadorner.gate.MorphAdornerGateWrapperBase;
import edu.northwestern.at.utils.CharUtils;
import gate.Annotation;
import gate.AnnotationSet;
import gate.Resource;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.util.GateRuntimeException;
import gate.util.OffsetComparator;
import java.io.File;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;

public class PosTaggerGateWrapper
extends MorphAdornerGateWrapperBase {
    protected PartOfSpeechTagger partOfSpeechTagger = null;
    protected PartOfSpeechTags partOfSpeechTags = null;
    protected Lemmatizer lemmatizer = null;
    protected SpellingStandardizer standardizer = null;
    protected WordTokenizer spellingTokenizer = new PennTreebankTokenizer();

    @Override
    public Resource init() throws ResourceInstantiationException {
        this.commonInit();
        try {
            this.partOfSpeechTagger = new DefaultPartOfSpeechTagger();
            this.lemmatizer = new DefaultLemmatizer();
            this.standardizer = new ExtendedSimpleSpellingStandardizer();
            this.standardizer.loadStandardSpellings(new File(this.spellingsURL).toURI().toURL(), "utf-8");
            this.standardizer.loadAlternativeSpellings(new File(this.alternateSpellingsURL).toURI().toURL(), "utf-8", "\t");
            this.partOfSpeechTags = this.wordLexicon.getPartOfSpeechTags();
        }
        catch (Exception e) {
            throw new ResourceInstantiationException(e.getMessage());
        }
        return super.init();
    }

    @Override
    public void execute() throws ExecutionException {
        AnnotationSet inputAS;
        if (this.document == null) {
            throw new GateRuntimeException("There is no document to process.");
        }
        if (this.inputASName != null && this.inputASName.length() == 0) {
            this.inputASName = null;
        }
        if (this.baseSentenceAnnotationType == null || this.baseSentenceAnnotationType.trim().length() == 0) {
            throw new GateRuntimeException("No base Sentence Annotation Type provided.");
        }
        AnnotationSet annotationSet = inputAS = this.inputASName == null ? this.document.getAnnotations() : this.document.getAnnotations(this.inputASName);
        if (this.outputASName != null && this.outputASName.length() == 0) {
            this.outputASName = null;
        }
        AnnotationSet outputAS = this.outputASName == null ? this.document.getAnnotations() : this.document.getAnnotations(this.outputASName);
        try {
            this.document.getFeatures().put((Object)"Number of tokens", (Object)new Integer(inputAS.get("Token").size()).toString());
        }
        catch (NullPointerException e) {
            throw new ExecutionException("You need to run a Tokenizer first!");
        }
        try {
            this.document.getFeatures().put((Object)"Number of sentences", (Object)new Integer(inputAS.get("Sentence").size()).toString());
        }
        catch (NullPointerException e) {
            throw new ExecutionException("You need to run a Sentence Splitter first.");
        }
        AnnotationSet sentencesAS = inputAS.get("Sentence");
        ArrayList<String> sentenceForTagger = new ArrayList<String>();
        OffsetComparator offsetComparator = new OffsetComparator();
        ArrayList sentencesList = new ArrayList(sentencesAS);
        Collections.sort(sentencesList, offsetComparator);
        ArrayList tokensList = new ArrayList(inputAS.get("Token"));
        Collections.sort(tokensList, offsetComparator);
        Iterator sentencesIter = sentencesList.iterator();
        ListIterator tokensIter = tokensList.listIterator();
        ArrayList<Annotation> tokensInCurrentSentence = new ArrayList<Annotation>();
        Annotation currentToken = (Annotation)tokensIter.next();
        boolean sentIndex = false;
        int sentCnt = sentencesAS.size();
        this.fireStatusChanged("Adorning " + sentCnt + " sentences in " + this.document.getName());
        this.fireProgressChanged(0);
        long startTime = System.currentTimeMillis();
        while (sentencesIter.hasNext()) {
            Annotation currentSentence = (Annotation)sentencesIter.next();
            tokensInCurrentSentence.clear();
            sentenceForTagger.clear();
            while (currentToken != null && currentToken.getEndNode().getOffset().compareTo(currentSentence.getEndNode().getOffset()) <= 0) {
                tokensInCurrentSentence.add(currentToken);
                sentenceForTagger.add((String)currentToken.getFeatures().get((Object)"string"));
                currentToken = tokensIter.hasNext() ? tokensIter.next() : null;
            }
            List<AdornedWord> taggerResults = this.partOfSpeechTagger.tagSentence(sentenceForTagger);
            Iterator<AdornedWord> resIter = taggerResults.iterator();
            Iterator tokIter = tokensInCurrentSentence.iterator();
            while (resIter.hasNext()) {
                AdornedWord word = resIter.next();
                Annotation annotation = (Annotation)tokIter.next();
                String partOfSpeechTag = word.getPartsOfSpeech();
                String correctedSpelling = word.getSpelling();
                String standardizedSpelling = this.getStandardizedSpelling(correctedSpelling, partOfSpeechTag);
                String lemma = this.wordLexicon.getLemma(correctedSpelling, partOfSpeechTag);
                if (lemma.equals("*")) {
                    lemma = this.getLemma(this.lemmatizer, standardizedSpelling, partOfSpeechTag);
                }
                annotation.getFeatures().put((Object)"category", (Object)partOfSpeechTag);
                annotation.getFeatures().put((Object)"lemma", (Object)lemma);
                annotation.getFeatures().put((Object)"spelling", (Object)correctedSpelling);
                annotation.getFeatures().put((Object)"standard", (Object)standardizedSpelling);
            }
            this.fireProcessFinished();
            this.fireStatusChanged(this.document.getName() + " adorned in " + NumberFormat.getInstance().format((double)(System.currentTimeMillis() - startTime) / 1000.0) + " seconds.");
        }
        this.fireStatusChanged("Adornment complete.");
        this.fireProgressChanged(0);
    }

    protected String getLemma(Lemmatizer lemmatizer, String spelling, String partOfSpeech) {
        String lemmata = spelling;
        String lemmaClass = this.partOfSpeechTags.getLemmaWordClass(partOfSpeech);
        if (!lemmatizer.cantLemmatize(spelling) && !lemmaClass.equals("none")) {
            List<String> wordList = this.spellingTokenizer.extractWords(spelling);
            if (!this.partOfSpeechTags.isCompoundTag(partOfSpeech) || wordList.size() == 1) {
                if (lemmaClass.length() == 0) {
                    lemmata = lemmatizer.lemmatize(spelling, "compound");
                    if (lemmata.equals(spelling)) {
                        lemmata = lemmatizer.lemmatize(spelling);
                    }
                } else {
                    lemmata = lemmatizer.lemmatize(spelling, lemmaClass);
                }
            } else {
                lemmata = "";
                String lemmaPiece = "";
                String[] posTags = this.partOfSpeechTags.splitTag(partOfSpeech);
                if (posTags.length == wordList.size()) {
                    for (int i = 0; i < wordList.size(); ++i) {
                        String wordPiece = wordList.get(i);
                        if (i > 0) {
                            lemmata = lemmata + lemmaSeparator;
                        }
                        lemmaClass = this.partOfSpeechTags.getLemmaWordClass(posTags[i]);
                        lemmaPiece = lemmatizer.lemmatize(wordPiece, lemmaClass);
                        lemmata = lemmata + lemmaPiece;
                    }
                }
            }
        }
        if (lemmata.equals("*")) {
            lemmata = spelling;
        }
        if (lemmata.indexOf(lemmaSeparator) < 0 && !this.partOfSpeechTags.isProperNounTag(partOfSpeech)) {
            lemmata = lemmata.toLowerCase();
        }
        return lemmata;
    }

    protected String getStandardizedSpelling(String spelling, String partOfSpeech) {
        String result = spelling;
        if (!(this.partOfSpeechTags.isProperNounTag(partOfSpeech) || this.partOfSpeechTags.isNounTag(partOfSpeech) && CharUtils.hasInternalCaps(spelling) || this.partOfSpeechTags.isForeignWordTag(partOfSpeech) || this.partOfSpeechTags.isNumberTag(partOfSpeech))) {
            result = this.standardizer.standardizeSpelling(spelling, this.partOfSpeechTags.getMajorWordClass(partOfSpeech));
        }
        return result;
    }
}

