rita.support
Class PennWordTokenizer

java.lang.Object
  extended by rita.RiObject
      extended by rita.support.PennWordTokenizer
All Implemented Interfaces:
processing.core.PConstants, RiTokenizerIF, RiConstants

public final class PennWordTokenizer
extends RiObject
implements RiTokenizerIF

Simple word tokenizer that tokenizes according to the Penn Treebank conventions.


Field Summary
 
Fields inherited from interface rita.support.RiConstants
BEHAVIOR_COMPLETED, BOUNDING_BOX_ALPHA, BRILL_POS_TAGGER, EASE_IN, EASE_IN_CUBIC, EASE_IN_EXPO, EASE_IN_OUT, EASE_IN_OUT_CUBIC, EASE_IN_OUT_EXPO, EASE_IN_OUT_QUARTIC, EASE_IN_OUT_SINE, EASE_IN_QUARTIC, EASE_IN_SINE, EASE_OUT, EASE_OUT_CUBIC, EASE_OUT_EXPO, EASE_OUT_QUARTIC, EASE_OUT_SINE, ESS, FADE_COLOR, FADE_IN, FADE_OUT, FADE_TO_TEXT, FIRST_PERSON, FUTURE_TENSE, ID, LERP, LINEAR, MAXENT_POS_TAGGER, MINIM, MOVE, MUTABLE, PAST_TENSE, PHONEME_BOUNDARY, PHONEMES, PLING_STEMMER, PLURAL, PORTER_STEMMER, POS, PRESENT_TENSE, SCALE_TO, SECOND_PERSON, SENTENCE_BOUNDARY, SINGULAR, SONIA, SPEECH_COMPLETED, STRESSES, SYLLABLE_BOUNDARY, SYLLABLES, TEXT, TEXT_ENTERED, THIRD_PERSON, TIMER, TIMER_COMPLETED, TIMER_TICK, TOKENS, UNKNOWN, WORD_BOUNDARY
 
Fields inherited from interface processing.core.PConstants
A, AB, ADD, AG, ALPHA, ALPHA_MASK, ALT, AMBIENT, AR, ARC, ARGB, ARROW, B, BACKSPACE, BASELINE, BEEN_LIT, BEVEL, BLEND, BLUE_MASK, BLUR, BOTTOM, BOX, BURN, CENTER, CENTER_DIAMETER, CENTER_RADIUS, CHATTER, CLOSE, CMYK, CODED, COMPLAINT, CONTROL, CORNER, CORNERS, CROSS, CUSTOM, DA, DARKEST, DB, DEG_TO_RAD, DELETE, DG, DIAMETER, DIFFERENCE, DILATE, DIRECTIONAL, DISABLE_ACCURATE_TEXTURES, DISABLE_DEPTH_SORT, DISABLE_DEPTH_TEST, DISABLE_OPENGL_2X_SMOOTH, DISABLE_OPENGL_ERROR_REPORT, DODGE, DOWN, DR, DXF, EB, EDGE, EG, ELLIPSE, ENABLE_ACCURATE_TEXTURES, ENABLE_DEPTH_SORT, ENABLE_DEPTH_TEST, ENABLE_NATIVE_FONTS, ENABLE_OPENGL_2X_SMOOTH, ENABLE_OPENGL_4X_SMOOTH, ENABLE_OPENGL_ERROR_REPORT, ENTER, EPSILON, ER, ERODE, ERROR_BACKGROUND_IMAGE_FORMAT, ERROR_BACKGROUND_IMAGE_SIZE, ERROR_PUSHMATRIX_OVERFLOW, ERROR_PUSHMATRIX_UNDERFLOW, ERROR_TEXTFONT_NULL_PFONT, ESC, EXCLUSION, G, GIF, GRAY, GREEN_MASK, HALF_PI, HAND, HARD_LIGHT, HINT_COUNT, HSB, IMAGE, INVERT, JAVA2D, JPEG, LEFT, LIGHTEST, LINE, LINES, LINUX, MACOSX, MAX_FLOAT, MAX_INT, MIN_FLOAT, MIN_INT, MITER, MODEL, MULTIPLY, NORMAL, NORMALIZED, NX, NY, NZ, OPAQUE, OPEN, OPENGL, ORTHOGRAPHIC, OTHER, OVERLAY, P2D, P3D, PATH, PDF, PERSPECTIVE, PI, platformNames, POINT, POINTS, POLYGON, POSTERIZE, PROBLEM, PROJECT, QUAD, QUAD_STRIP, QUADS, QUARTER_PI, R, RAD_TO_DEG, RADIUS, RECT, RED_MASK, REPLACE, RETURN, RGB, RIGHT, ROUND, SA, SB, SCREEN, SG, SHAPE, SHIFT, SHINE, SOFT_LIGHT, SPB, SPG, SPHERE, SPOT, SPR, SQUARE, SR, SUBTRACT, SW, TAB, TARGA, THIRD_PI, THRESHOLD, TIFF, TOP, TRIANGLE, TRIANGLE_FAN, TRIANGLE_STRIP, TRIANGLES, TWO_PI, TX, TY, TZ, U, UP, V, VERTEX_FIELD_COUNT, VW, VX, VY, VZ, WAIT, WHITESPACE, WINDOWS, X, Y, Z
 
Constructor Summary
PennWordTokenizer()
           
PennWordTokenizer(boolean splitContractions)
           
PennWordTokenizer(processing.core.PApplet pApplet)
           
PennWordTokenizer(processing.core.PApplet pApplet, boolean splitContractions)
           
 
Method Summary
 boolean isSplittingContractions()
           
static void main(java.lang.String[] args)
           
 void setSplitContractions(boolean splitContractions)
           
 java.lang.String[] tokenize(java.lang.String words)
          Tokenizes the String according to the Penn Treebank conventions.
 void tokenize(java.lang.String words, java.util.List result)
          Tokenizes the String according to the Penn Treebank conventions and stores the result as a List in result
 java.lang.String tokenizeInline(java.lang.String words)
           
 
Methods inherited from class rita.RiObject
dispose, getId, getPApplet, nextId
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

PennWordTokenizer

public PennWordTokenizer()

PennWordTokenizer

public PennWordTokenizer(processing.core.PApplet pApplet)

PennWordTokenizer

public PennWordTokenizer(boolean splitContractions)

PennWordTokenizer

public PennWordTokenizer(processing.core.PApplet pApplet,
                         boolean splitContractions)
Method Detail

tokenizeInline

public java.lang.String tokenizeInline(java.lang.String words)

tokenize

public void tokenize(java.lang.String words,
                     java.util.List result)
Tokenizes the String according to the Penn Treebank conventions and stores the result as a List in result

Specified by:
tokenize in interface RiTokenizerIF

tokenize

public java.lang.String[] tokenize(java.lang.String words)
Tokenizes the String according to the Penn Treebank conventions.

Specified by:
tokenize in interface RiTokenizerIF

isSplittingContractions

public boolean isSplittingContractions()

setSplitContractions

public void setSplitContractions(boolean splitContractions)

main

public static void main(java.lang.String[] args)