public abstract class AbstractLemmatizer extends IsCloseableObject implements Lemmatizer, UsesLogger
| Modifier and Type | Field and Description |
|---|---|
protected java.util.Set<java.lang.String> |
dictionary
The dictionary.
|
protected char |
lemmaSeparator
Default lemma separator is vertical bar character,
|
protected java.lang.String |
lemmaSeparatorString |
protected Lexicon |
lexicon
The lexicon.
|
protected Logger |
logger
Logger used for output.
|
| Constructor and Description |
|---|
AbstractLemmatizer() |
| Modifier and Type | Method and Description |
|---|---|
boolean |
cantLemmatize(java.lang.String spelling)
Check for words that cannot be lemmatized.
|
int |
countLemmata(java.lang.String lemma)
Get number of lemmata comprising this lemma.
|
java.lang.String |
getLemmaSeparator()
Get the lemma separator string,
|
Logger |
getLogger()
Get the logger.
|
boolean |
isCompoundLemma(java.lang.String lemma)
Check if lemma is compound lemma.
|
java.lang.String |
joinLemmata(java.lang.String[] lemmata)
Join separate lemmata into a compound lemma.
|
java.lang.String |
joinLemmata(java.lang.String[] lemmata,
java.lang.String separator)
Join separate lemmata into a compound lemma.
|
abstract java.lang.String |
lemmatize(java.lang.String spelling)
Returns a lemma given a spelling.
|
abstract java.lang.String |
lemmatize(java.lang.String spelling,
java.lang.String wordClass)
Returns a lemma given a spelling and a part of speech.
|
void |
setDictionary(java.util.Set<java.lang.String> dictionary)
Set the dictionary for checking lemmata.
|
void |
setLexicon(Lexicon lexicon)
Set the lexicon.
|
void |
setLogger(Logger logger)
Set the logger.
|
java.lang.String[] |
splitLemma(java.lang.String lemma)
Split compound lemma into separate lemmata.
|
closeprotected char lemmaSeparator
protected java.lang.String lemmaSeparatorString
protected Logger logger
protected Lexicon lexicon
protected java.util.Set<java.lang.String> dictionary
public Logger getLogger()
getLogger in interface UsesLoggerpublic void setLogger(Logger logger)
setLogger in interface UsesLoggerlogger - The logger.public void setLexicon(Lexicon lexicon)
setLexicon in interface Lemmatizerlexicon - The lexicon.public void setDictionary(java.util.Set<java.lang.String> dictionary)
setDictionary in interface Lemmatizerdictionary - The dictionary as a string set.
May be null.public abstract java.lang.String lemmatize(java.lang.String spelling)
lemmatize in interface Lemmatizerspelling - The spelling.public abstract java.lang.String lemmatize(java.lang.String spelling,
java.lang.String wordClass)
lemmatize in interface Lemmatizerspelling - The spelling.wordClass - The word class.
The word class should be a major word class as defined in
PartOfSpeech.
public boolean cantLemmatize(java.lang.String spelling)
cantLemmatize in interface Lemmatizerspelling - The spelling to be lemmatized.public java.lang.String getLemmaSeparator()
getLemmaSeparator in interface Lemmatizerpublic java.lang.String joinLemmata(java.lang.String[] lemmata,
java.lang.String separator)
joinLemmata in interface Lemmatizerlemmata - String array of lemmata.separator - String to separate lemmata.public java.lang.String joinLemmata(java.lang.String[] lemmata)
joinLemmata in interface Lemmatizerlemmata - String array of part of speech lemmas.public java.lang.String[] splitLemma(java.lang.String lemma)
splitLemma in interface Lemmatizerlemma - The compound lemma.public boolean isCompoundLemma(java.lang.String lemma)
isCompoundLemma in interface Lemmatizerlemma - The lemma.public int countLemmata(java.lang.String lemma)
countLemmata in interface Lemmatizerlemma - The lemma.