|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||
java.lang.Objectrita.RiObject
rita.RiLexicon
public class RiLexicon
RiLexicon represents the core 'dictionary' (or lexicon) for the RiTa tools. It contains ~35,000 words augmented with phonemic and syllabic data, as well as a list of valid parts-of-speech for each. The lexicon can be extended and/or customized for additional words, usages, or pronunciations.
Additionally the lexicon is equipped with implementations of a variety of matching algorithms (min-edit-distance, soundex, anagrams, alliteration, rhymes, looks-like, etc.) based on combinations of letters, syllables and phonemes. An example use:
RiLexicon lex = new RiLexicon(this);
String[] similars = lex.similarBySound("cat");
String[] rhymes = lex.getSimpleRhymes("cat");
// etc.
Note: If you wish to modify or customize the lexicon (e.g., add words, or change pronunciations) you can do so by editing the 'rita_addenda.txt' file, found in $SKETCH_DIR/libraries/rita folder and placing the modifed version in the 'data' folder of your sketch.
| Field Summary | |
|---|---|
static char |
STRESSED
|
static char |
UNSTRESSED
|
static boolean |
VERBOSE_WARNINGS
|
| Fields inherited from interface processing.core.PConstants |
|---|
A, AB, ADD, AG, ALPHA, ALPHA_MASK, ALT, AMBIENT, AR, ARC, ARGB, ARROW, B, BACKSPACE, BASELINE, BEEN_LIT, BEVEL, BLEND, BLUE_MASK, BLUR, BOTTOM, BOX, BURN, CENTER, CENTER_DIAMETER, CENTER_RADIUS, CHATTER, CLOSE, CMYK, CODED, COMPLAINT, CONTROL, CORNER, CORNERS, CROSS, CUSTOM, DA, DARKEST, DB, DEG_TO_RAD, DELETE, DG, DIAMETER, DIFFERENCE, DILATE, DIRECTIONAL, DISABLE_ACCURATE_TEXTURES, DISABLE_DEPTH_SORT, DISABLE_DEPTH_TEST, DISABLE_OPENGL_2X_SMOOTH, DISABLE_OPENGL_ERROR_REPORT, DODGE, DOWN, DR, DXF, EB, EDGE, EG, ELLIPSE, ENABLE_ACCURATE_TEXTURES, ENABLE_DEPTH_SORT, ENABLE_DEPTH_TEST, ENABLE_NATIVE_FONTS, ENABLE_OPENGL_2X_SMOOTH, ENABLE_OPENGL_4X_SMOOTH, ENABLE_OPENGL_ERROR_REPORT, ENTER, EPSILON, ER, ERODE, ERROR_BACKGROUND_IMAGE_FORMAT, ERROR_BACKGROUND_IMAGE_SIZE, ERROR_PUSHMATRIX_OVERFLOW, ERROR_PUSHMATRIX_UNDERFLOW, ERROR_TEXTFONT_NULL_PFONT, ESC, EXCLUSION, G, GIF, GRAY, GREEN_MASK, HALF_PI, HAND, HARD_LIGHT, HINT_COUNT, HSB, IMAGE, INVERT, JAVA2D, JPEG, LEFT, LIGHTEST, LINE, LINES, LINUX, MACOSX, MAX_FLOAT, MAX_INT, MIN_FLOAT, MIN_INT, MITER, MODEL, MULTIPLY, NORMAL, NORMALIZED, NX, NY, NZ, OPAQUE, OPEN, OPENGL, ORTHOGRAPHIC, OTHER, OVERLAY, P2D, P3D, PATH, PDF, PERSPECTIVE, PI, platformNames, POINT, POINTS, POLYGON, POSTERIZE, PROBLEM, PROJECT, QUAD, QUAD_STRIP, QUADS, QUARTER_PI, R, RAD_TO_DEG, RADIUS, RECT, RED_MASK, REPLACE, RETURN, RGB, RIGHT, ROUND, SA, SB, SCREEN, SG, SHAPE, SHIFT, SHINE, SOFT_LIGHT, SPB, SPG, SPHERE, SPOT, SPR, SQUARE, SR, SUBTRACT, SW, TAB, TARGA, THIRD_PI, THRESHOLD, TIFF, TOP, TRIANGLE, TRIANGLE_FAN, TRIANGLE_STRIP, TRIANGLES, TWO_PI, TX, TY, TZ, U, UP, V, VERTEX_FIELD_COUNT, VW, VX, VY, VZ, WAIT, WHITESPACE, WINDOWS, X, Y, Z |
| Constructor Summary | |
|---|---|
RiLexicon()
|
|
RiLexicon(processing.core.PApplet pApplet)
|
|
RiLexicon(processing.core.PApplet pApplet,
java.lang.String lexiconFile)
|
|
| Method Summary | |
|---|---|
java.lang.String[] |
containingStringsByLetter(java.lang.String input)
Returns valid words (in lexicon) using both substring and superstring matching. |
boolean |
contains(java.lang.String word)
Returns true if the word exists in the lexicon |
java.lang.String[] |
getAlliterations(java.lang.String input)
Finds alliterations by comparing the phonemes of the input string to those of each word in the lexicon |
void |
getAlliterations(java.lang.String input,
java.util.Set result)
Finds alliterations by comparing the phonemes of the input string to those of each word in the lexicon |
void |
getAlliterations(java.lang.String input,
java.util.Set result,
int minLength)
Finds alliterations by comparing the phonemes of the input string to those of each word in the lexicon |
java.util.Map |
getFeatures(java.lang.String word)
|
java.util.Map |
getLexicalData()
Returns the raw data (as a Map) used in the lexicon, allowing for deletion or modification of existing lexical entires. |
java.lang.String[] |
getPosEntries(java.lang.String word)
Return the list of possible parts-of-speech for the word , or null if
not found. |
java.lang.String |
getPosStr(java.lang.String word)
Returns |
java.lang.String |
getRandomWord()
Returns a random word from the lexicon |
java.lang.String |
getRandomWord(int targetLength)
Returns a random word from the lexicon with the specified target-length (where length>0), or null if no such word exists. |
java.lang.String |
getRandomWord(java.lang.String pos)
Returns a random word from the lexicon with the specified part-of-speech |
java.lang.String |
getRandomWord(java.lang.String pos,
int targetLength)
Returns a random word from the lexicon with the specified part-of-speech and target-length, or null if no such word exists. |
java.lang.String |
getRandomWordWithSyllableCount(int syllableCount)
Returns a random word from the lexicon with the specified syllable-count or null if no such word exists. |
java.lang.String |
getRandomWordWithSyllableCount(java.lang.String pos,
int syllableCount)
Returns a random word from the lexicon with the specified part-of-speech and syllable-count, or null if no such word exists. |
java.lang.String[] |
getRhymes(java.lang.String input)
Returns the rhymes for a given word or null if none found |
void |
getRhymes(java.lang.String input,
java.util.Set result)
Returns the rhymes for a given word or null if none found. |
java.util.Set |
getWords()
Returns the full set of words in the lexicon (including those from user-addenda) |
java.util.Set |
getWords(java.lang.String regex)
Returns the set of words in the lexicon (including those from user-addenda) that match the supplied regular expression. |
boolean |
isAlliteration(java.lang.String wordA,
java.lang.String wordB)
Returns true if the first stressed consonant of the two words match, else false. |
boolean |
isContaining(java.lang.String orig,
java.lang.String toCheck)
Returns true if orig is a sub or super-string of
toCheck. |
boolean |
isRhyme(java.lang.String wordA,
java.lang.String wordB)
Returns true if the two words rhyme (that is, if their final stressed phoneme and all following phonemes are identical) else false. |
boolean |
isStopWord(java.lang.String word)
Returns true if the word is a 'stop' (or 'closed-class') word else false. |
boolean |
isSubstring(java.lang.String orig,
java.lang.String toCheck)
Returns true if orig is a substring of
toCheck. |
boolean |
isSuperstring(java.lang.String orig,
java.lang.String toCheck)
Returns true if orig is a superstring of
toCheck. |
java.util.Iterator |
iterator()
Returns an iterator over the words in lexicon |
java.util.Iterator |
iterator(java.lang.String regex)
Returns an iterator over the words in lexicon matching the supplied regular expression. |
static void |
main(java.lang.String[] args)
|
static void |
mainX(java.lang.String[] args)
|
java.util.Iterator |
posIterator(java.lang.String pos)
Returns an iterator over the words in lexicon, for the supplied part-of-speech |
void |
preloadFeatures()
Use this method to preload the Lexicon with feature data (stress, syllables, pos, phones, etc). |
java.util.Iterator |
randomIterator()
Returns an iterator over the words in lexicon beginning at a random offset. |
static java.util.Iterator |
randomIterator(java.util.Set s)
Utility method that returns a random-iterator over the specified set. |
java.util.Iterator |
randomIterator(java.lang.String regex)
Returns an iterator over the words in the lexicon matching the supplied regular expression beginning from a random offset. |
java.util.Iterator |
randomPosIterator(java.lang.String pos)
Returns an iterator over the words in lexicon, for the supplied part-of-speech beginning at a random offset. |
void |
setLexicalData(java.util.Map lexicalData)
Sets the raw data to be used in the lexicon, replacing all default words and features with those specified in the map. |
java.lang.String[] |
similarByLetter(java.lang.String input)
Compares the characters of the input string (using a version of the min-edit distance algorithm) to each word in the lexicon, returning the set of closest matches. |
int |
similarByLetter(java.lang.String input,
java.util.Set result)
Compares the characters of the input string (using a version of the min-edit distance algorithm) to each word in the lexicon, adding the set of closest matches to result. |
int |
similarByLetter(java.lang.String input,
java.util.Set result,
boolean preserveLength)
Compares the characters of the input string (using a version of the min-edit distance algorithm) to each word in the lexicon, adding the set of closest matches to result. |
int |
similarByLetter(java.lang.String input,
java.util.Set result,
int minMed)
Compares the characters of the input string (using a version of the min-edit distance algorithm) to each word in the lexicon, adding the set of closest matches to result,
considering all matches where the edit distance >= 'minMed'. |
int |
similarByLetter(java.lang.String input,
java.util.Set result,
int minMed,
boolean preserveLength)
Compares the characters of the input string (using a version of the min-edit distance algorithm) to each word in the lexicon, adding the set of closest matches to result,
considering all matches where the edit distance >= 'minMed'. |
java.lang.String[] |
similarBySound(java.lang.String input)
Compares the phonemes of the input String to those of each word in the lexicon, returning the set of closest matches as a String[]. |
int |
similarBySound(java.lang.String input,
java.util.Set result)
|
java.lang.String[] |
similarBySoundAndLetter(java.lang.String input)
First calls similarBySound(), then filters the result set by the algorithm used in similarByLetter(); (useful when similarBySound() returns too large a result set) |
java.util.Set |
singleLetterDeletes(java.lang.String input)
|
java.util.Set |
singleLetterInsertions(java.lang.String input)
|
java.util.Set |
singleLetterSubtitutions(java.lang.String input)
|
java.lang.String[] |
substringsByLetter(java.lang.String input)
Returns all valid substrings of the input word in the lexicon |
java.lang.String[] |
substringsByLetter(java.lang.String input,
int minLength)
Returns all valid substrings of the input word in the lexicon of length at least minLength |
void |
substringsByLetter(java.lang.String input,
java.util.Set result)
|
java.lang.String[] |
superstringsByLetter(java.lang.String input)
Returns all valid superstrings of the input word in the lexicon |
void |
superstringsByLetter(java.lang.String input,
java.util.Set result)
|
static void |
testAllits()
|
static void |
testRhymes()
|
static void |
tests(java.lang.String[] args)
|
| Methods inherited from class rita.RiObject |
|---|
dispose, getId, getPApplet, nextId |
| Methods inherited from class java.lang.Object |
|---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
public static final char STRESSED
public static final char UNSTRESSED
public static boolean VERBOSE_WARNINGS
| Constructor Detail |
|---|
public RiLexicon()
public RiLexicon(processing.core.PApplet pApplet)
public RiLexicon(processing.core.PApplet pApplet,
java.lang.String lexiconFile)
| Method Detail |
|---|
public boolean isStopWord(java.lang.String word)
public java.lang.String getRandomWordWithSyllableCount(int syllableCount)
public java.lang.String getRandomWordWithSyllableCount(java.lang.String pos,
int syllableCount)
RiPosTagger
public java.lang.String getRandomWord(java.lang.String pos,
int targetLength)
RiPosTaggerpublic java.lang.String getRandomWord()
public java.lang.String getRandomWord(java.lang.String pos)
RiPosTaggerpublic java.lang.String getRandomWord(int targetLength)
public boolean isAlliteration(java.lang.String wordA,
java.lang.String wordB)
Note: returns true if wordA.equals(wordB) and false if either (or both) are null;
public boolean isRhyme(java.lang.String wordA,
java.lang.String wordB)
Note: at present doesn't use letter-to-sound engine if either word is not found in the lexicon, but instead just returns false. TODO
public boolean isSubstring(java.lang.String orig,
java.lang.String toCheck)
orig is a substring of
toCheck.
public boolean isSuperstring(java.lang.String orig,
java.lang.String toCheck)
orig is a superstring of
toCheck.
public boolean isContaining(java.lang.String orig,
java.lang.String toCheck)
orig is a sub or super-string of
toCheck.
public java.util.Iterator randomIterator()
public static java.util.Iterator randomIterator(java.util.Set s)
public java.util.Iterator randomPosIterator(java.lang.String pos)
Note: this method will create a new iterator each time it is called
public java.util.Iterator posIterator(java.lang.String pos)
RiPospublic java.util.Iterator randomIterator(java.lang.String regex)
Note: this method will create a new iterator each time it is called
public java.util.Iterator iterator()
public java.util.Iterator iterator(java.lang.String regex)
public java.util.Set getWords(java.lang.String regex)
public java.util.Set getWords()
public java.lang.String[] getRhymes(java.lang.String input)
Two words rhyme if their final stressed vowel and all following phonemes are identical.
public void getRhymes(java.lang.String input,
java.util.Set result)
Two words rhyme if their final stressed vowel and all following phonemes are identical.
public java.lang.String[] getAlliterations(java.lang.String input)
public void getAlliterations(java.lang.String input,
java.util.Set result)
public void getAlliterations(java.lang.String input,
java.util.Set result,
int minLength)
public java.lang.String[] similarBySoundAndLetter(java.lang.String input)
similarByLetter(String),
similarBySound(String)public java.lang.String[] similarBySound(java.lang.String input)
public int similarBySound(java.lang.String input,
java.util.Set result)
public java.lang.String[] similarByLetter(java.lang.String input)
public int similarByLetter(java.lang.String input,
java.util.Set result)
result.
public int similarByLetter(java.lang.String input,
java.util.Set result,
boolean preserveLength)
result.
If 'preserveLength' is true, the method will favor words of the same length as the input.
public int similarByLetter(java.lang.String input,
java.util.Set result,
int minMed)
result,
considering all matches where the edit distance >= 'minMed'.
public int similarByLetter(java.lang.String input,
java.util.Set result,
int minMed,
boolean preserveLength)
result,
considering all matches where the edit distance >= 'minMed'.If 'preserveLength' is true, the method will favor words of the same length as the input.
public java.util.Set singleLetterSubtitutions(java.lang.String input)
public java.util.Set singleLetterInsertions(java.lang.String input)
public java.util.Set singleLetterDeletes(java.lang.String input)
public java.lang.String[] containingStringsByLetter(java.lang.String input)
This method, CONTAINS(K), is equivalent to UNION( SUB(K), SUPER(K) ).
public java.lang.String[] substringsByLetter(java.lang.String input)
public java.lang.String[] substringsByLetter(java.lang.String input,
int minLength)
minLength
public void substringsByLetter(java.lang.String input,
java.util.Set result)
public java.lang.String[] superstringsByLetter(java.lang.String input)
public void superstringsByLetter(java.lang.String input,
java.util.Set result)
public java.lang.String[] getPosEntries(java.lang.String word)
word , or null if
not found.
public static void tests(java.lang.String[] args)
throws java.lang.InterruptedException
java.lang.InterruptedExceptionpublic java.util.Map getFeatures(java.lang.String word)
public void preloadFeatures()
RiLexicon lex = new RiLexicon();
lex.preloadFeatures();
// use the lexicon
public boolean contains(java.lang.String word)
public static void testRhymes()
public java.lang.String getPosStr(java.lang.String word)
word - public java.util.Map getLexicalData()
public void setLexicalData(java.util.Map lexicalData)
############################################################################## #### FORMAT##: ... | ... ############################################################################## blog: b-l-ao-g | nn vbg cepstral: k-eh1-p s-t-r-ax-l | nnp freetts: f-r-iy1 t-iy t-iy eh-s | nnp jsapi: jh-ey s-ae1-p iy | nnp
public static void testAllits()
public static void mainX(java.lang.String[] args)
public static void main(java.lang.String[] args)
|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||