public abstract class AbstractSpellingStandardizer extends IsCloseableObject implements SpellingStandardizer, UsesLogger
| Modifier and Type | Field and Description |
|---|---|
protected java.util.Set<java.lang.String> |
alternateSpellingsWordClasses
Word classes of alternate spellings.
|
protected static java.lang.String |
defaultSpellingsByWordClassFileName
Path to list of irregular word forms.
|
protected Lexicon |
lexicon
Lexicon associated with this standardizer.
|
protected Logger |
logger
Logger used for output.
|
protected TaggedStrings |
mappedSpellings
The map with alternate spellings as keys and standard spellings
as values.
|
protected Map2D<java.lang.String,java.lang.String,java.lang.String> |
spellingsByWordClass
Irregular forms.
|
protected java.util.Set<java.lang.String> |
standardSpellingSet
The set of standard spellings.
|
| Constructor and Description |
|---|
AbstractSpellingStandardizer()
Create abstract spelling standardizer.
|
| Modifier and Type | Method and Description |
|---|---|
void |
addCachedSpelling(java.lang.String alternateSpelling,
java.lang.String standardSpelling)
Cached a generated mapped spelling.
|
void |
addMappedSpelling(java.lang.String alternateSpelling,
java.lang.String standardSpelling)
Add a mapped spelling.
|
void |
addStandardSpelling(java.lang.String standardSpelling)
Add a standard spelling.
|
void |
addStandardSpellings(java.util.Collection<java.lang.String> standardSpellings)
Add standard spellings from a collection.
|
java.lang.String |
fixCapitalization(java.lang.String spelling,
java.lang.String standardSpelling)
Fix capitalization of standardized spelling.
|
Lexicon |
getLexicon()
Get the word lexicon.
|
Logger |
getLogger()
Get the logger.
|
TaggedStrings |
getMappedSpellings()
Return the mapped spellings.
|
int |
getNumberOfAlternateSpellings()
Returns number of alternate spellings.
|
int[] |
getNumberOfAlternateSpellingsByWordClass()
Returns number of alternate spellings by word class.
|
int |
getNumberOfStandardSpellings()
Returns number of standard spellings.
|
java.util.Set<java.lang.String> |
getStandardSpellings()
Return the standard spellings.
|
void |
loadAlternativeSpellings(java.io.Reader reader,
java.lang.String delimChars)
Loads alternative spellings from a reader.
|
void |
loadAlternativeSpellings(java.net.URL url,
boolean compressed,
java.lang.String encoding,
java.lang.String delimChars)
Loads alternate spellings from a URL.
|
void |
loadAlternativeSpellings(java.net.URL url,
java.lang.String encoding,
java.lang.String delimChars)
Loads alternate spellings from a URL.
|
void |
loadAlternativeSpellingsByWordClass(java.net.URL spellingsURL,
java.lang.String encoding)
Load alternate to standard spellings by word class.
|
void |
loadStandardSpellings(java.io.Reader reader)
Loads standard spellings from a reader.
|
void |
loadStandardSpellings(java.net.URL url,
boolean compressed,
java.lang.String encoding)
Loads standard spellings from a URL.
|
void |
loadStandardSpellings(java.net.URL url,
java.lang.String encoding)
Loads standard spellings from a URL.
|
java.lang.String |
preprocessSpelling(java.lang.String spelling)
Preprocess spelling.
|
void |
setLexicon(Lexicon lexicon)
Set the lexicon.
|
void |
setLogger(Logger logger)
Set the logger.
|
void |
setMappedSpellings(TaggedStrings mappedSpellings)
Sets map which maps alternate spellings to standard spellings.
|
void |
setStandardSpellings(java.util.Set<java.lang.String> standardSpellings)
Sets standard spellings.
|
java.lang.String[] |
standardizeSpelling(java.lang.String spelling)
Returns standard spellings given a spelling.
|
java.lang.String |
standardizeSpelling(java.lang.String spelling,
java.lang.String wordClass)
Returns a standard spelling given a standard or alternate spelling.
|
closeprotected TaggedStrings mappedSpellings
protected java.util.Set<java.lang.String> standardSpellingSet
protected Map2D<java.lang.String,java.lang.String,java.lang.String> spellingsByWordClass
Spellings disambiguated by word class are stored in a HashMap2D. The compound key consists of the word class and alternate spelling, and the value is the standardized spelling.
protected java.util.Set<java.lang.String> alternateSpellingsWordClasses
protected static java.lang.String defaultSpellingsByWordClassFileName
protected Logger logger
protected Lexicon lexicon
public AbstractSpellingStandardizer()
public void loadAlternativeSpellingsByWordClass(java.net.URL spellingsURL,
java.lang.String encoding)
throws java.io.IOException
loadAlternativeSpellingsByWordClass in interface SpellingStandardizerspellingsURL - URL of alternative spellings by word class.encoding - Character set encoding for spellingsjava.io.IOExceptionpublic void loadAlternativeSpellings(java.net.URL url,
boolean compressed,
java.lang.String encoding,
java.lang.String delimChars)
throws java.io.IOException
loadAlternativeSpellings in interface SpellingStandardizerurl - URL containing alternate spellings to
standard spellings mappings.compressed - true if gzip compressedencoding - Text encoding (utf-8, 8859_1, etc.).delimChars - Delimiter characters separating spelling pairs.java.io.IOExceptionpublic void loadAlternativeSpellings(java.net.URL url,
java.lang.String encoding,
java.lang.String delimChars)
throws java.io.IOException
loadAlternativeSpellings in interface SpellingStandardizerurl - URL containing alternate spellings to
standard spellings mappings.encoding - Text encoding (utf-8, 8859_1, etc.).delimChars - Delimiter characters separating spelling pairs.java.io.IOExceptionpublic void loadAlternativeSpellings(java.io.Reader reader,
java.lang.String delimChars)
throws java.io.IOException
loadAlternativeSpellings in interface SpellingStandardizerreader - The reader.delimChars - Delimiter characters separating spelling pairs.java.io.IOExceptionpublic void loadStandardSpellings(java.net.URL url,
boolean compressed,
java.lang.String encoding)
throws java.io.IOException
loadStandardSpellings in interface SpellingStandardizerurl - URL containing standard spellingscompressed - true if gzip compressedencoding - Character set encoding for spellingsjava.io.IOExceptionpublic void loadStandardSpellings(java.net.URL url,
java.lang.String encoding)
throws java.io.IOException
loadStandardSpellings in interface SpellingStandardizerurl - URL containing standard spellingsencoding - Character set encoding for spellingsjava.io.IOExceptionpublic void loadStandardSpellings(java.io.Reader reader)
throws java.io.IOException
loadStandardSpellings in interface SpellingStandardizerreader - The reader.java.io.IOExceptionpublic void addMappedSpelling(java.lang.String alternateSpelling,
java.lang.String standardSpelling)
addMappedSpelling in interface SpellingStandardizeralternateSpelling - The alternate spelling.standardSpelling - The corresponding standard spelling.public void addStandardSpelling(java.lang.String standardSpelling)
addStandardSpelling in interface SpellingStandardizerstandardSpelling - A standard spelling.public void addStandardSpellings(java.util.Collection<java.lang.String> standardSpellings)
addStandardSpellings in interface SpellingStandardizerstandardSpellings - A collection of standard spellings.public void addCachedSpelling(java.lang.String alternateSpelling,
java.lang.String standardSpelling)
alternateSpelling - The alternate spelling.standardSpelling - The corresponding standard spelling.public void setMappedSpellings(TaggedStrings mappedSpellings)
setMappedSpellings in interface SpellingStandardizermappedSpellings - Map with alternate spellings as keys
and standard spellings as values.public void setStandardSpellings(java.util.Set<java.lang.String> standardSpellings)
setStandardSpellings in interface SpellingStandardizerstandardSpellings - Set of standard spellings.public java.lang.String[] standardizeSpelling(java.lang.String spelling)
standardizeSpelling in interface SpellingStandardizerspelling - The spelling.If not spelling map is defined, the spelling is returned unchanged.
public java.lang.String standardizeSpelling(java.lang.String spelling,
java.lang.String wordClass)
standardizeSpelling in interface SpellingStandardizerspelling - The spelling.wordClass - The major word class.public int getNumberOfAlternateSpellings()
getNumberOfAlternateSpellings in interface SpellingStandardizerpublic int[] getNumberOfAlternateSpellingsByWordClass()
getNumberOfAlternateSpellingsByWordClass in interface SpellingStandardizerpublic int getNumberOfStandardSpellings()
getNumberOfStandardSpellings in interface SpellingStandardizerpublic TaggedStrings getMappedSpellings()
getMappedSpellings in interface SpellingStandardizerpublic java.util.Set<java.lang.String> getStandardSpellings()
getStandardSpellings in interface SpellingStandardizerpublic java.lang.String preprocessSpelling(java.lang.String spelling)
preprocessSpelling in interface SpellingStandardizerspelling - Spelling to preprocess.By default, no preprocessing is applied; the original spelling is returned unchanged.
public java.lang.String fixCapitalization(java.lang.String spelling,
java.lang.String standardSpelling)
fixCapitalization in interface SpellingStandardizerspelling - The original spelling.standardSpelling - The candidate standard spelling.public Logger getLogger()
getLogger in interface UsesLoggerpublic void setLogger(Logger logger)
setLogger in interface UsesLoggerlogger - The logger.public Lexicon getLexicon()
public void setLexicon(Lexicon lexicon)
lexicon - Lexicon used for tagging.