/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.corpuslinguistics.lemmatizer;

import edu.northwestern.at.morphadorner.corpuslinguistics.lemmatizer.AbstractLemmatizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.lemmatizer.DefaultLemmatizerRule;
import edu.northwestern.at.morphadorner.corpuslinguistics.lemmatizer.Lemmatizer;
import edu.northwestern.at.morphadorner.corpuslinguistics.lemmatizer.LemmatizerRule;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.Map2D;
import edu.northwestern.at.utils.Map2DFactory;
import edu.northwestern.at.utils.MapFactory;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.UnicodeReader;
import java.io.BufferedReader;
import java.io.IOException;
import java.net.URL;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

public class RuleBasedLemmatizer
extends AbstractLemmatizer
implements Lemmatizer {
    protected Map2D<String, String, String> irregularForms = Map2DFactory.createNewMap2D();
    protected Set<String> irregularFormsWordClasses = new TreeSet<String>();
    protected Map<String, List<LemmatizerRule>> rules = MapFactory.createNewMap();
    protected Set<String> rulesWordClasses = new TreeSet<String>();

    public void loadRules(URL url, String encoding) throws IOException {
        String line = null;
        BufferedReader buffer = new BufferedReader(new UnicodeReader(url.openStream(), encoding));
        String posTag = "";
        String[] tokens = new String[2];
        List<DefaultLemmatizerRule> rulesForTag = ListFactory.createNewList();
        while ((line = buffer.readLine()) != null) {
            if ((line = line.trim()).length() <= 0 || line.charAt(0) == '#' || (tokens = StringUtils.makeTokenArray(line)).length <= 0) continue;
            int l = tokens[0].length();
            if (tokens[0].charAt(l - 1) == ':') {
                if (rulesForTag.size() > 0) {
                    this.rules.put(posTag, rulesForTag);
                    rulesForTag = ListFactory.createNewList();
                }
                posTag = tokens[0].substring(0, l - 1);
                this.rulesWordClasses.add(posTag);
                continue;
            }
            rulesForTag.add(new DefaultLemmatizerRule(line));
        }
        if (rulesForTag.size() > 0) {
            this.rules.put(posTag, rulesForTag);
        }
        buffer.close();
    }

    public void loadIrregularForms(URL url, String encoding) throws IOException {
        String line = null;
        BufferedReader buffer = new BufferedReader(new UnicodeReader(url.openStream(), encoding));
        String posTag = "";
        String lemma = "";
        String[] tokens = new String[2];
        while ((line = buffer.readLine()) != null) {
            if ((line = line.trim()).length() <= 0 || line.charAt(0) == '#' || (tokens = StringUtils.makeTokenArray(line)).length <= 0) continue;
            int l = tokens[0].length();
            if (tokens[0].charAt(l - 1) == ':') {
                posTag = tokens[0].substring(0, l - 1);
                this.irregularFormsWordClasses.add(posTag);
                continue;
            }
            lemma = tokens.length > 1 ? tokens[1] : tokens[0];
            this.irregularForms.put(posTag, tokens[0], lemma);
        }
        buffer.close();
    }

    @Override
    public String lemmatize(String spelling, String wordClass) {
        if (wordClass == null) {
            return this.lemmatize(spelling);
        }
        String lcWordClass = wordClass.trim().toLowerCase();
        if (wordClass.length() == 0) {
            return this.lemmatize(spelling);
        }
        if (this.cantLemmatize(spelling)) {
            return spelling;
        }
        String[] lcWordClasses = lcWordClass.split(",");
        String lcSpelling = spelling.toLowerCase();
        String lemma = this.irregularForms.get(lcWordClass, spelling);
        if (lemma == null) {
            lemma = this.irregularForms.get(lcWordClass, lcSpelling);
        }
        if (lemma == null) {
            lemma = lcSpelling;
            block0: for (int i = 0; i < lcWordClasses.length; ++i) {
                List<LemmatizerRule> rulesForWordClass = this.rules.get(lcWordClasses[i]);
                if (rulesForWordClass == null || rulesForWordClass.size() <= 0) continue;
                LemmatizerRule[] wordClassRules = rulesForWordClass.toArray(new LemmatizerRule[rulesForWordClass.size()]);
                for (int j = 0; j < wordClassRules.length; ++j) {
                    String newLemma = wordClassRules[j].apply(lemma, this.dictionary);
                    if (newLemma.equals(lemma)) continue;
                    lemma = newLemma;
                    continue block0;
                }
            }
        }
        return lemma == null || lemma.length() == 0 ? spelling : this.cleanUpLemma(lemma);
    }

    public String cleanUpLemma(String lemma) {
        return StringUtils.replaceAll(lemma, "!", "");
    }

    @Override
    public String lemmatize(String spelling) {
        String result = spelling;
        if (this.cantLemmatize(spelling)) {
            return spelling;
        }
        Iterator<String> iterator = this.irregularFormsWordClasses.iterator();
        while (iterator.hasNext()) {
            String wordClass = iterator.next().toLowerCase();
            result = this.irregularForms.get(wordClass, spelling);
            if (result == null || result.equals(spelling)) continue;
            return this.cleanUpLemma(result);
        }
        String lemma = spelling.toLowerCase();
        Iterator<String> iterator2 = this.rulesWordClasses.iterator();
        while (iterator2.hasNext()) {
            String wordClass = iterator2.next().toLowerCase();
            List<LemmatizerRule> rulesForWordClass = this.rules.get(wordClass);
            if (rulesForWordClass == null || rulesForWordClass.size() <= 0) continue;
            LemmatizerRule[] wordClassRules = rulesForWordClass.toArray(new LemmatizerRule[rulesForWordClass.size()]);
            for (int i = 0; i < wordClassRules.length; ++i) {
                String newLemma = wordClassRules[i].apply(lemma, this.dictionary);
                if (newLemma.equals(lemma)) continue;
                return this.cleanUpLemma(newLemma);
            }
        }
        if (result == null || result.length() == 0) {
            result = spelling;
        }
        return this.cleanUpLemma(result);
    }
}

