|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||
java.lang.Objectrita.RiObject
rita.RiConcorder
public class RiConcorder
Maintains a simple word frequency table for a set of input data
RiConcorder ric = new RiConcorder(this);
ric.setIgnoreCase(false);
ric.setIgnoreStopWords(false);
ric.setIgnorePunctuation(false);
ric.loadFile("myTestFile.txt");
ric.dump();
String[] mostCommon = ric.getMostCommonTokens(5);
print(mostCommon);
| Field Summary |
|---|
| Fields inherited from interface processing.core.PConstants |
|---|
A, AB, ADD, AG, ALPHA, ALPHA_MASK, ALT, AMBIENT, AR, ARC, ARGB, ARROW, B, BACKSPACE, BASELINE, BEEN_LIT, BEVEL, BLEND, BLUE_MASK, BLUR, BOTTOM, BOX, BURN, CENTER, CENTER_DIAMETER, CENTER_RADIUS, CHATTER, CLOSE, CMYK, CODED, COMPLAINT, CONTROL, CORNER, CORNERS, CROSS, CUSTOM, DA, DARKEST, DB, DEG_TO_RAD, DELETE, DG, DIAMETER, DIFFERENCE, DILATE, DIRECTIONAL, DISABLE_ACCURATE_TEXTURES, DISABLE_DEPTH_SORT, DISABLE_DEPTH_TEST, DISABLE_OPENGL_2X_SMOOTH, DISABLE_OPENGL_ERROR_REPORT, DODGE, DOWN, DR, DXF, EB, EDGE, EG, ELLIPSE, ENABLE_ACCURATE_TEXTURES, ENABLE_DEPTH_SORT, ENABLE_DEPTH_TEST, ENABLE_NATIVE_FONTS, ENABLE_OPENGL_2X_SMOOTH, ENABLE_OPENGL_4X_SMOOTH, ENABLE_OPENGL_ERROR_REPORT, ENTER, EPSILON, ER, ERODE, ERROR_BACKGROUND_IMAGE_FORMAT, ERROR_BACKGROUND_IMAGE_SIZE, ERROR_PUSHMATRIX_OVERFLOW, ERROR_PUSHMATRIX_UNDERFLOW, ERROR_TEXTFONT_NULL_PFONT, ESC, EXCLUSION, G, GIF, GRAY, GREEN_MASK, HALF_PI, HAND, HARD_LIGHT, HINT_COUNT, HSB, IMAGE, INVERT, JAVA2D, JPEG, LEFT, LIGHTEST, LINE, LINES, LINUX, MACOSX, MAX_FLOAT, MAX_INT, MIN_FLOAT, MIN_INT, MITER, MODEL, MULTIPLY, NORMAL, NORMALIZED, NX, NY, NZ, OPAQUE, OPEN, OPENGL, ORTHOGRAPHIC, OTHER, OVERLAY, P2D, P3D, PATH, PDF, PERSPECTIVE, PI, platformNames, POINT, POINTS, POLYGON, POSTERIZE, PROBLEM, PROJECT, QUAD, QUAD_STRIP, QUADS, QUARTER_PI, R, RAD_TO_DEG, RADIUS, RECT, RED_MASK, REPLACE, RETURN, RGB, RIGHT, ROUND, SA, SB, SCREEN, SG, SHAPE, SHIFT, SHINE, SOFT_LIGHT, SPB, SPG, SPHERE, SPOT, SPR, SQUARE, SR, SUBTRACT, SW, TAB, TARGA, THIRD_PI, THRESHOLD, TIFF, TOP, TRIANGLE, TRIANGLE_FAN, TRIANGLE_STRIP, TRIANGLES, TWO_PI, TX, TY, TZ, U, UP, V, VERTEX_FIELD_COUNT, VW, VX, VY, VZ, WAIT, WHITESPACE, WINDOWS, X, Y, Z |
| Constructor Summary | |
|---|---|
RiConcorder()
Constructs a new RiConcorder |
|
RiConcorder(processing.core.PApplet pApplet)
Constructs a new RiConcorder |
|
RiConcorder(processing.core.PApplet pApplet,
RiTokenizer tokenizer)
Constructs a new RiConcorder using the specified tokenizer |
|
RiConcorder(processing.core.PApplet pApplet,
java.lang.String fileName)
Constructs a new RiConcorder ands loads it with the data in fileName. |
|
RiConcorder(processing.core.PApplet pApplet,
java.lang.String[] fileNames)
Constructs a new RiConcorder ands loads it with the data in fileName. |
|
RiConcorder(processing.core.PApplet pApplet,
java.lang.String[] fileNames,
RiTokenizer tokenizer)
Constructs a new RiConcorder using the specified tokenizer ands loads it with the data
in fileName(s). |
|
RiConcorder(RiTokenizer tokenizer)
Constructs a new RiConcorder using the specified tokenizer |
|
| Method Summary | |
|---|---|
void |
addLine(java.lang.String line)
Add the data from a single line into the frequency table |
void |
addWord(java.lang.String word)
Adds a single word to the model with a count of 1 if it does not yet exist, else increments its count by 1. |
void |
addWords(java.lang.String[] words)
Adds the wordsto the model, incrementing
their counts (and the total-count) for each. |
void |
clear()
Clears the model, resets variables, and prepares it for reloading with new data |
boolean |
contains(java.lang.String word)
True if the concordance contains word, else false |
void |
dump()
|
int |
getCount(java.lang.String word)
Returns the # of occurences of word
or 0 if the word does not exist in the table. |
java.lang.String[] |
getLeastCommonTokens(int numberToReturn)
Returns the numberToReturn words with the highest frequency. |
java.lang.String[] |
getMostCommonTokens(int numberToReturn)
Returns the numberToReturn words with the highest frequency. |
float |
getProbability(java.lang.String word)
Returns the normalized frequency (probability) of word,
1 if it is the only word in the model, 0 if it does not exist. |
boolean |
isIgnoringCase()
Returns whether the model is ignoring case by considering all words as lowerCase (default=false) |
boolean |
isIgnoringPunctuation()
Returns whether the model is ignoring punctuation (default = true) |
boolean |
isIgnoringStopWords()
Returns whether the model is ignoring stopWords (default = false) |
void |
loadFile(java.lang.String fileName)
Loads the data from the file into a frequency table |
void |
loadFiles(java.lang.String[] fileNames)
Loads the data from the files into a single frequency table |
static void |
main(java.lang.String[] args)
|
void |
setIgnoreCase(boolean ignoreCase)
Sets whether the model should ignore case (default=false), treating all tokens as lower-case |
void |
setIgnorePunctuation(boolean ignore)
Sets whether the model should ignore punctuation (default = true) |
void |
setIgnoreStopWords(boolean ignoreStopWords)
Sets whether the model should ignore stopWords (default = false) |
void |
setWordsToIgnore(java.lang.String[] wordsToIgnore)
Tells the model to ignore this set of words |
int |
totalCount()
Returns the total # of entries in the model. |
int |
uniqueCount()
Returns the # of unique words in the model. |
| Methods inherited from class rita.RiObject |
|---|
dispose, getId, getPApplet, nextId |
| Methods inherited from class java.lang.Object |
|---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Constructor Detail |
|---|
public RiConcorder(processing.core.PApplet pApplet,
java.lang.String[] fileNames,
RiTokenizer tokenizer)
tokenizer ands loads it with the data
in fileName(s).
public RiConcorder(processing.core.PApplet pApplet,
java.lang.String fileName)
fileName.
public RiConcorder(processing.core.PApplet pApplet,
java.lang.String[] fileNames)
fileName.
public RiConcorder(processing.core.PApplet pApplet,
RiTokenizer tokenizer)
tokenizer
public RiConcorder(processing.core.PApplet pApplet)
public RiConcorder()
public RiConcorder(RiTokenizer tokenizer)
tokenizer
| Method Detail |
|---|
public void loadFiles(java.lang.String[] fileNames)
public void setWordsToIgnore(java.lang.String[] wordsToIgnore)
public void addLine(java.lang.String line)
public int getCount(java.lang.String word)
word
or 0 if the word does not exist in the table.
public float getProbability(java.lang.String word)
word,
1 if it is the only word in the model, 0 if it does not exist.
public java.lang.String[] getMostCommonTokens(int numberToReturn)
numberToReturn words with the highest frequency.
If there are less than numberToReturn words then all items
are returned.
public java.lang.String[] getLeastCommonTokens(int numberToReturn)
numberToReturn words with the highest frequency.
If there are less than numberToReturn words then all items
are returned.
public int totalCount()
public int uniqueCount()
public void addWords(java.lang.String[] words)
wordsto the model, incrementing
their counts (and the total-count) for each.
public void addWord(java.lang.String word)
public boolean contains(java.lang.String word)
word, else false
public void loadFile(java.lang.String fileName)
public void clear()
public void dump()
public boolean isIgnoringCase()
public void setIgnoreCase(boolean ignoreCase)
public boolean isIgnoringStopWords()
RiTa.STOP_WORDSpublic void setIgnoreStopWords(boolean ignoreStopWords)
RiTa.STOP_WORDSpublic boolean isIgnoringPunctuation()
RiTa.STOP_WORDSpublic void setIgnorePunctuation(boolean ignore)
public static void main(java.lang.String[] args)
|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||