rita.support
Class RegexTokenizer

java.lang.Object
  extended by rita.RiObject
      extended by rita.support.RegexTokenizer
All Implemented Interfaces:
processing.core.PConstants, RiTokenizerIF, RiConstants

public final class RegexTokenizer
extends RiObject
implements RiTokenizerIF

Simple tokenizer for user-supplied regular expressions.

Note: defaults to splitting on white-space characters('\s');


Field Summary
static java.lang.String DEFAULT_REGEX
           
 
Fields inherited from interface rita.support.RiConstants
BEHAVIOR_COMPLETED, BOUNDING_BOX_ALPHA, BRILL_POS_TAGGER, EASE_IN, EASE_IN_CUBIC, EASE_IN_EXPO, EASE_IN_OUT, EASE_IN_OUT_CUBIC, EASE_IN_OUT_EXPO, EASE_IN_OUT_QUARTIC, EASE_IN_OUT_SINE, EASE_IN_QUARTIC, EASE_IN_SINE, EASE_OUT, EASE_OUT_CUBIC, EASE_OUT_EXPO, EASE_OUT_QUARTIC, EASE_OUT_SINE, ESS, FADE_COLOR, FADE_IN, FADE_OUT, FADE_TO_TEXT, FIRST_PERSON, FUTURE_TENSE, ID, LERP, LINEAR, MAXENT_POS_TAGGER, MINIM, MOVE, MUTABLE, PAST_TENSE, PHONEME_BOUNDARY, PHONEMES, PLING_STEMMER, PLURAL, PORTER_STEMMER, POS, PRESENT_TENSE, SCALE_TO, SECOND_PERSON, SENTENCE_BOUNDARY, SINGULAR, SONIA, SPEECH_COMPLETED, STRESSES, SYLLABLE_BOUNDARY, SYLLABLES, TEXT, TEXT_ENTERED, THIRD_PERSON, TIMER, TIMER_COMPLETED, TIMER_TICK, TOKENS, UNKNOWN, WORD_BOUNDARY
 
Fields inherited from interface processing.core.PConstants
A, AB, ADD, AG, ALPHA, ALPHA_MASK, ALT, AMBIENT, AR, ARC, ARGB, ARROW, B, BACKSPACE, BASELINE, BEEN_LIT, BEVEL, BLEND, BLUE_MASK, BLUR, BOTTOM, BOX, BURN, CENTER, CENTER_DIAMETER, CENTER_RADIUS, CHATTER, CLOSE, CMYK, CODED, COMPLAINT, CONTROL, CORNER, CORNERS, CROSS, CUSTOM, DA, DARKEST, DB, DEG_TO_RAD, DELETE, DG, DIAMETER, DIFFERENCE, DILATE, DIRECTIONAL, DISABLE_ACCURATE_TEXTURES, DISABLE_DEPTH_SORT, DISABLE_DEPTH_TEST, DISABLE_OPENGL_2X_SMOOTH, DISABLE_OPENGL_ERROR_REPORT, DODGE, DOWN, DR, DXF, EB, EDGE, EG, ELLIPSE, ENABLE_ACCURATE_TEXTURES, ENABLE_DEPTH_SORT, ENABLE_DEPTH_TEST, ENABLE_NATIVE_FONTS, ENABLE_OPENGL_2X_SMOOTH, ENABLE_OPENGL_4X_SMOOTH, ENABLE_OPENGL_ERROR_REPORT, ENTER, EPSILON, ER, ERODE, ERROR_BACKGROUND_IMAGE_FORMAT, ERROR_BACKGROUND_IMAGE_SIZE, ERROR_PUSHMATRIX_OVERFLOW, ERROR_PUSHMATRIX_UNDERFLOW, ERROR_TEXTFONT_NULL_PFONT, ESC, EXCLUSION, G, GIF, GRAY, GREEN_MASK, HALF_PI, HAND, HARD_LIGHT, HINT_COUNT, HSB, IMAGE, INVERT, JAVA2D, JPEG, LEFT, LIGHTEST, LINE, LINES, LINUX, MACOSX, MAX_FLOAT, MAX_INT, MIN_FLOAT, MIN_INT, MITER, MODEL, MULTIPLY, NORMAL, NORMALIZED, NX, NY, NZ, OPAQUE, OPEN, OPENGL, ORTHOGRAPHIC, OTHER, OVERLAY, P2D, P3D, PATH, PDF, PERSPECTIVE, PI, platformNames, POINT, POINTS, POLYGON, POSTERIZE, PROBLEM, PROJECT, QUAD, QUAD_STRIP, QUADS, QUARTER_PI, R, RAD_TO_DEG, RADIUS, RECT, RED_MASK, REPLACE, RETURN, RGB, RIGHT, ROUND, SA, SB, SCREEN, SG, SHAPE, SHIFT, SHINE, SOFT_LIGHT, SPB, SPG, SPHERE, SPOT, SPR, SQUARE, SR, SUBTRACT, SW, TAB, TARGA, THIRD_PI, THRESHOLD, TIFF, TOP, TRIANGLE, TRIANGLE_FAN, TRIANGLE_STRIP, TRIANGLES, TWO_PI, TX, TY, TZ, U, UP, V, VERTEX_FIELD_COUNT, VW, VX, VY, VZ, WAIT, WHITESPACE, WINDOWS, X, Y, Z
 
Constructor Summary
RegexTokenizer()
           
RegexTokenizer(processing.core.PApplet p)
           
RegexTokenizer(processing.core.PApplet p, java.util.regex.Pattern regex)
           
RegexTokenizer(processing.core.PApplet p, java.lang.String regex)
           
RegexTokenizer(java.lang.String regex)
           
 
Method Summary
 java.lang.String getRegex()
          Returns the regular expression used for tokenizing
 boolean isReturningDelims()
          Whether delimiters are returned as tokens or ignored
 boolean isTrimmingSpaces()
          Whether spaces are trimmed from each ends of tokens
static void main(java.lang.String[] args)
           
 void setRegex(java.util.regex.Pattern regex)
          Sets the regular expression to be used for tokenizing
 void setRegex(java.lang.String regex)
          Sets the regular expression to be used for tokenizing
 void setReturnDelims(boolean returnDelims)
          Sets whether delimiters should be returned as tokens or ignored
 void setTrimSpaces(boolean trimSpaces)
          Sets whether spaces are trimmed from each ends of tokens
 java.lang.String[] split(java.lang.String text)
          Splits the String into sentences according to the regular expression
 java.lang.String[] tokenize(java.lang.String words)
          Tokenizes the String according to the supplied regular expression.
 void tokenize(java.lang.String words, java.util.List result)
          Tokenizes the String according to the supplied regular expression and stores the result as a List in result
 
Methods inherited from class rita.RiObject
dispose, getId, getPApplet, nextId
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

DEFAULT_REGEX

public static final java.lang.String DEFAULT_REGEX
See Also:
Constant Field Values
Constructor Detail

RegexTokenizer

public RegexTokenizer(processing.core.PApplet p,
                      java.lang.String regex)

RegexTokenizer

public RegexTokenizer(processing.core.PApplet p,
                      java.util.regex.Pattern regex)

RegexTokenizer

public RegexTokenizer(java.lang.String regex)

RegexTokenizer

public RegexTokenizer(processing.core.PApplet p)

RegexTokenizer

public RegexTokenizer()
Method Detail

split

public java.lang.String[] split(java.lang.String text)
Splits the String into sentences according to the regular expression


tokenize

public void tokenize(java.lang.String words,
                     java.util.List result)
Tokenizes the String according to the supplied regular expression and stores the result as a List in result

Specified by:
tokenize in interface RiTokenizerIF

tokenize

public java.lang.String[] tokenize(java.lang.String words)
Tokenizes the String according to the supplied regular expression.

Specified by:
tokenize in interface RiTokenizerIF

getRegex

public java.lang.String getRegex()
Returns the regular expression used for tokenizing


setRegex

public void setRegex(java.util.regex.Pattern regex)
Sets the regular expression to be used for tokenizing


setRegex

public void setRegex(java.lang.String regex)
Sets the regular expression to be used for tokenizing


isTrimmingSpaces

public boolean isTrimmingSpaces()
Whether spaces are trimmed from each ends of tokens


setTrimSpaces

public void setTrimSpaces(boolean trimSpaces)
Sets whether spaces are trimmed from each ends of tokens


isReturningDelims

public boolean isReturningDelims()
Whether delimiters are returned as tokens or ignored


setReturnDelims

public void setReturnDelims(boolean returnDelims)
Sets whether delimiters should be returned as tokens or ignored


main

public static void main(java.lang.String[] args)