public abstract class JavaCustomTokenizer extends CustomDocumentProcessor
Modifier and Type | Class and Description |
---|---|
static class |
JavaCustomTokenizer.InvalidAnnotationException
Exception thrown when a malformed annotation is pushed to the semantic pipe.
|
static class |
JavaCustomTokenizer.InvalidTokenException
Exception thrown when a malformed token is pushed to the semantic pipe.
|
Modifier and Type | Field and Description |
---|---|
protected java.util.HashSet<java.lang.String> |
contexts |
protected java.util.HashSet<java.lang.Integer> |
languages |
Constructor and Description |
---|
JavaCustomTokenizer(CVComponentConfig config) |
Modifier and Type | Method and Description |
---|---|
protected void |
addDocumentAnnotation(Annotation annotation)
Attach an annotation to the currently processed document after checking its validity.
|
abstract java.lang.String[] |
declareAnnotations()
Called at initialization to retrieve the annotation tags that are planned to be produced during tokenization.
|
abstract void |
endDocument()
Called when there is no more input to process in the current document.
|
Referential |
getReferential() |
void |
init(DocumentProcessingContext ctx)
Called at start of analysis job
|
protected Annotation |
newAnnotation(java.lang.String tag,
java.lang.String displayForm,
int nbTokens)
Allocate a new annotation with the provided tag, value and length.
|
abstract void |
newDocument()
Called when a new document is about to get processed.
|
protected AnnotatedToken |
newToken(java.lang.String form)
Allocate a new token of the provided form.
|
void |
process(DocumentProcessingContext dContext,
ProcessableDocument doc)
Process a document.
|
abstract void |
processChunk(java.lang.String text,
int language,
java.lang.String context)
Called when a new input chunk is to be processed.
|
protected void |
pushToken(AnnotatedToken token)
Send a token to the output stream.
|
void |
release()
Called at end of analysis job
|
void |
setInputContexts(java.util.Collection<java.lang.String> c) |
getCondition, setCondition
protected java.util.HashSet<java.lang.Integer> languages
protected java.util.HashSet<java.lang.String> contexts
public JavaCustomTokenizer(CVComponentConfig config) throws java.lang.Exception
java.lang.Exception
public abstract void newDocument()
public abstract void endDocument()
public abstract void processChunk(java.lang.String text, int language, java.lang.String context) throws java.lang.Exception
text
- the chunk textlanguage
- the chunk languagecontext
- the chunk contextJavaCustomTokenizer.InvalidTokenException
java.lang.Exception
newToken(), newAnnotation(), pushToken()
public abstract java.lang.String[] declareAnnotations()
protected void pushToken(AnnotatedToken token) throws JavaCustomTokenizer.InvalidTokenException
token
- A token allocated through a call to newToken()JavaCustomTokenizer.InvalidTokenException
newToken(), newAnnotation()
protected AnnotatedToken newToken(java.lang.String form) throws JavaCustomTokenizer.InvalidTokenException
form
- the new token formJavaCustomTokenizer.InvalidTokenException
protected Annotation newAnnotation(java.lang.String tag, java.lang.String displayForm, int nbTokens) throws JavaCustomTokenizer.InvalidAnnotationException
tag
- the new annotation tagdisplayForm
- the new annotation valuenbTokens
- the new annotation lengthJavaCustomTokenizer.InvalidAnnotationException
protected void addDocumentAnnotation(Annotation annotation) throws JavaCustomTokenizer.InvalidAnnotationException
annotation
- the annotation to attachJavaCustomTokenizer.InvalidAnnotationException
newAnnotation()
public final void process(DocumentProcessingContext dContext, ProcessableDocument doc) throws java.lang.Exception
DocumentProcessor
dContext
- Context to perform various operations related to processingdoc
- The document being processed.java.lang.Exception
public final void setInputContexts(java.util.Collection<java.lang.String> c)
public final Referential getReferential()
public final void init(DocumentProcessingContext ctx)
CustomDocumentProcessor
init
in interface DocumentProcessor
init
in class CustomDocumentProcessor
public final void release()
CustomDocumentProcessor
release
in interface DocumentProcessor
release
in class CustomDocumentProcessor
Copyright © 2013 Dassault Systèmes, All Rights Reserved.