/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.examples;

import edu.northwestern.at.morphadorner.corpuslinguistics.adornedword.AdornedWord;
import edu.northwestern.at.morphadorner.corpuslinguistics.lemmatizer.DefaultLemmatizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.lemmatizer.Lemmatizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.lexicon.Lexicon;
import edu.northwestern.at.morphadorner.corpuslinguistics.partsofspeech.PartOfSpeechTags;
import edu.northwestern.at.morphadorner.corpuslinguistics.postagger.DefaultPartOfSpeechTagger;
import edu.northwestern.at.morphadorner.corpuslinguistics.sentencesplitter.DefaultSentenceSplitter;
import edu.northwestern.at.morphadorner.corpuslinguistics.spellingstandardizer.DefaultSpellingStandardizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.spellingstandardizer.SpellingStandardizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.DefaultWordTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.PennTreebankTokenizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.tokenizer.WordTokenizer;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.StringUtils;
import java.util.List;

public class AdornAString {
    public static String lemmaSeparator = "|";

    public static void main(String[] args) {
        try {
            AdornAString.adornText(args);
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void adornText(String[] args) throws Exception {
        if (args.length < 1) {
            System.out.println("No text to adorn.");
            System.exit(1);
        }
        String textToAdorn = args[0];
        DefaultPartOfSpeechTagger partOfSpeechTagger = new DefaultPartOfSpeechTagger();
        Lexicon wordLexicon = partOfSpeechTagger.getLexicon();
        PartOfSpeechTags partOfSpeechTags = wordLexicon.getPartOfSpeechTags();
        DefaultWordTokenizer wordTokenizer = new DefaultWordTokenizer();
        PennTreebankTokenizer spellingTokenizer = new PennTreebankTokenizer();
        DefaultSentenceSplitter sentenceSplitter = new DefaultSentenceSplitter();
        sentenceSplitter.setPartOfSpeechGuesser(partOfSpeechTagger.getPartOfSpeechGuesser());
        DefaultLemmatizer lemmatizer = new DefaultLemmatizer();
        DefaultSpellingStandardizer standardizer = new DefaultSpellingStandardizer();
        lemmatizer.setDictionary(standardizer.getStandardSpellings());
        List<List<String>> sentences = sentenceSplitter.extractSentences(textToAdorn, wordTokenizer);
        List<List<AdornedWord>> taggedSentences = partOfSpeechTagger.tagSentences(sentences);
        for (int i = 0; i < sentences.size(); ++i) {
            List<AdornedWord> sentence = taggedSentences.get(i);
            System.out.println(StringUtils.dupl("-", 30) + " " + (i + 1) + " " + StringUtils.dupl("-", 30));
            for (int j = 0; j < sentence.size(); ++j) {
                AdornedWord adornedWord = sentence.get(j);
                AdornAString.setStandardSpelling(adornedWord, standardizer, partOfSpeechTags);
                AdornAString.setLemma(adornedWord, wordLexicon, lemmatizer, partOfSpeechTags, spellingTokenizer);
                System.out.println(StringUtils.rpad(j + 1 + "", 3) + ": " + StringUtils.rpad(adornedWord.getSpelling(), 20) + StringUtils.rpad(adornedWord.getPartsOfSpeech(), 8) + StringUtils.rpad(adornedWord.getStandardSpelling(), 20) + adornedWord.getLemmata());
            }
        }
    }

    public static void setStandardSpelling(AdornedWord adornedWord, SpellingStandardizer standardizer, PartOfSpeechTags partOfSpeechTags) {
        String spelling;
        String standardSpelling = spelling = adornedWord.getSpelling();
        String partOfSpeech = adornedWord.getPartsOfSpeech();
        if (!(partOfSpeechTags.isProperNounTag(partOfSpeech) || partOfSpeechTags.isNounTag(partOfSpeech) && CharUtils.hasInternalCaps(spelling) || partOfSpeechTags.isForeignWordTag(partOfSpeech) || partOfSpeechTags.isNumberTag(partOfSpeech) || !(standardSpelling = standardizer.standardizeSpelling(adornedWord.getSpelling(), partOfSpeechTags.getMajorWordClass(adornedWord.getPartsOfSpeech()))).equalsIgnoreCase(spelling))) {
            standardSpelling = spelling;
        }
        adornedWord.setStandardSpelling(standardSpelling);
    }

    public static void setLemma(AdornedWord adornedWord, Lexicon lexicon, Lemmatizer lemmatizer, PartOfSpeechTags partOfSpeechTags, WordTokenizer spellingTokenizer) {
        String spelling = adornedWord.getSpelling();
        String partOfSpeech = adornedWord.getPartsOfSpeech();
        String lemmata = spelling;
        String lemmaClass = partOfSpeechTags.getLemmaWordClass(partOfSpeech);
        if (!lemmatizer.cantLemmatize(spelling) && !lemmaClass.equals("none") && (lemmata = lemmatizer.lemmatize(spelling, "compound")).equals(spelling)) {
            List<String> wordList = spellingTokenizer.extractWords(spelling);
            if (!partOfSpeechTags.isCompoundTag(partOfSpeech) || wordList.size() == 1) {
                lemmata = lemmaClass.length() == 0 ? lemmatizer.lemmatize(spelling) : lemmatizer.lemmatize(spelling, lemmaClass);
            } else {
                lemmata = "";
                String lemmaPiece = "";
                String[] posTags = partOfSpeechTags.splitTag(partOfSpeech);
                if (posTags.length == wordList.size()) {
                    for (int i = 0; i < wordList.size(); ++i) {
                        String wordPiece = wordList.get(i);
                        if (i > 0) {
                            lemmata = lemmata + lemmaSeparator;
                        }
                        lemmaClass = partOfSpeechTags.getLemmaWordClass(posTags[i]);
                        lemmaPiece = lemmatizer.lemmatize(wordPiece, lemmaClass);
                        lemmata = lemmata + lemmaPiece;
                    }
                }
            }
        }
        adornedWord.setLemmata(lemmata);
    }
}

