public class OntologyMatcher extends SemanticProcessor implements com.exalead.util.Checkable, java.io.Serializable
SemanticProcessor.FromDataModel, SemanticProcessor.Transformer<T>, SemanticProcessor.Visitor
Modifier and Type | Field and Description |
---|---|
java.lang.String |
annotationPrefix |
java.lang.String |
annotationsToIgnore |
static java.lang.String |
DEFAULT_ANNOTATION_PREFIX |
static boolean |
DEFAULT_ENABLE_APPROX_MATCHING |
static boolean |
DEFAULT_IGNORE_SPACES |
static boolean |
DEFAULT_KEEP_LONGEST_MATCH |
static boolean |
DEFAULT_KEEP_LONGEST_MATCH_INTER_TAG |
static int |
DEFAULT_MIN_WORD_SIZE_FOR_DIST1 |
static int |
DEFAULT_MIN_WORD_SIZE_FOR_DIST2 |
static boolean |
DEFAULT_RESTRICT_LANGUAGE |
static boolean |
DEFAULT_TOKENIZE_ANNOTATIONS |
static boolean |
DEFAULT_TRUST_LEVEL_BASED_DEDUP |
boolean |
enableApproxMatching |
boolean |
ignoreSpaces |
boolean |
keepLongestMatch |
boolean |
keepLongestMatchInterTag |
int |
minWordSizeForDist1 |
int |
minWordSizeForDist2 |
java.lang.String |
resourceDir |
boolean |
restrictLanguage |
boolean |
tokenizeAnnotations |
boolean |
trustLevelBasedDedup |
contexts, dataModelClass, dataModelProperty, dataModelState, DEFAULT_DISABLED, disabled, fromDataModel, name
Constructor and Description |
---|
OntologyMatcher() |
OntologyMatcher(OntologyMatcher o)
Copy constructor
|
Modifier and Type | Method and Description |
---|---|
<T> T |
accept(SemanticProcessor.Transformer<T> transformer,
T[] t) |
void |
accept(SemanticProcessor.Visitor visitor) |
void |
check(boolean deep,
java.lang.String errorContext)
Checks this OntologyMatcher.
|
static OntologyMatcher |
fromString(java.lang.String s)
String representation of this OntologyMatcher.
|
java.lang.String |
getAnnotationPrefix()
A prefix to add to each annotation tag.
For example, if the package of the entry matched in the ontology is "exalead.location.country" and the annotationPrefix is "myOntology_", an annotation will be added with the tag "myOntology_exalead.location.country". |
java.lang.String |
getAnnotationsToIgnore()
Sets the list of annotations to be ignored (comma-separated).
|
int |
getMinWordSizeForDist1()
Minimum number of chars in token to enable the Damerau-Levenshtein distance of 1.
|
int |
getMinWordSizeForDist2()
Minimum number of chars in token to enable the Damerau-Levenshtein distance of 2.
|
java.lang.String |
getResourceDir()
URL for the directory containing the ontology (data://, file;// or resource://).
|
boolean |
isEnableApproxMatching()
Enables approximative matching in ontology.
|
boolean |
isIgnoreSpaces()
If your ontology was compiled with matchOnSeparators=false, this allows 'lemonde' to retrieve 'le monde' or 'le monde' to retrieve 'lemonde'.
If your ontology was compiled with matchOnSeparators=true, this allows 'le monde' to retrieve 'le monde'. |
boolean |
isKeepLongestMatch()
Keeps only the longest match.
For example, if you have 5 tokens ('a', 'b', 'c', 'd', 'e') and 4 annotations 'a', 'a-c', 'b-c-d' and 'd-e', this option will only keep 'b-c-d' and remove all other annotations. |
boolean |
isKeepLongestMatchInterTag()
Keeps only the longest match (tag independant).
For example, if you have 5 tokens ('a', 'b', 'c', 'd', 'e') and 4 annotations 'a', 'a-c', 'b-c-d' and 'd-e', this option will only keep 'b-c-d' and remove all other annotations. |
boolean |
isRestrictLanguage()
Keeps only the expression added with language == Language.XX or with the document language.
For example, if the Ontology contains an expression added with language=En, it will be extracted only for an English document if restrictLanguage is set to true. |
boolean |
isTokenizeAnnotations()
If you have some multi-tokens annotations (like "super market" annotation on token "supermarket", this option will automatically subtokenize "supermarket" in "super" "market" and keep original annotations.
If you enable this option, keepLongestMatch and keepLongestMatcherInterTag will be set to true. |
boolean |
isTrustLevelBasedDedup()
Keeps only the annotation with the highest trust level when several entries from a package match the same text chunk.
|
OntologyMatcher |
makeCopy()
Creates and returns a deep copy of this OntologyMatcher.
|
static OntologyMatcher |
readFrom(java.io.InputStream is)
Read this OntologyMatcher from an XML fragment.
|
void |
setAnnotationPrefix(java.lang.String annotationPrefix)
A prefix to add to each annotation tag.
For example, if the package of the entry matched in the ontology is "exalead.location.country" and the annotationPrefix is "myOntology_", an annotation will be added with the tag "myOntology_exalead.location.country". |
void |
setAnnotationsToIgnore(java.lang.String annotationsToIgnore)
Sets the list of annotations to be ignored (comma-separated).
|
void |
setEnableApproxMatching(boolean enableApproxMatching)
Enables approximative matching in ontology.
|
void |
setIgnoreSpaces(boolean ignoreSpaces)
If your ontology was compiled with matchOnSeparators=false, this allows 'lemonde' to retrieve 'le monde' or 'le monde' to retrieve 'lemonde'.
If your ontology was compiled with matchOnSeparators=true, this allows 'le monde' to retrieve 'le monde'. |
void |
setKeepLongestMatch(boolean keepLongestMatch)
Keeps only the longest match.
For example, if you have 5 tokens ('a', 'b', 'c', 'd', 'e') and 4 annotations 'a', 'a-c', 'b-c-d' and 'd-e', this option will only keep 'b-c-d' and remove all other annotations. |
void |
setKeepLongestMatchInterTag(boolean keepLongestMatchInterTag)
Keeps only the longest match (tag independant).
For example, if you have 5 tokens ('a', 'b', 'c', 'd', 'e') and 4 annotations 'a', 'a-c', 'b-c-d' and 'd-e', this option will only keep 'b-c-d' and remove all other annotations. |
void |
setMinWordSizeForDist1(int minWordSizeForDist1)
Minimum number of chars in token to enable the Damerau-Levenshtein distance of 1.
|
void |
setMinWordSizeForDist2(int minWordSizeForDist2)
Minimum number of chars in token to enable the Damerau-Levenshtein distance of 2.
|
void |
setResourceDir(java.lang.String resourceDir)
URL for the directory containing the ontology (data://, file;// or resource://).
|
void |
setRestrictLanguage(boolean restrictLanguage)
Keeps only the expression added with language == Language.XX or with the document language.
For example, if the Ontology contains an expression added with language=En, it will be extracted only for an English document if restrictLanguage is set to true. |
void |
setTokenizeAnnotations(boolean tokenizeAnnotations)
If you have some multi-tokens annotations (like "super market" annotation on token "supermarket", this option will automatically subtokenize "supermarket" in "super" "market" and keep original annotations.
If you enable this option, keepLongestMatch and keepLongestMatcherInterTag will be set to true. |
void |
setTrustLevelBasedDedup(boolean trustLevelBasedDedup)
Keeps only the annotation with the highest trust level when several entries from a package match the same text chunk.
|
java.lang.String |
toString()
String representation of this OntologyMatcher.
|
OntologyMatcher |
withAnnotationPrefix(java.lang.String annotationPrefix) |
OntologyMatcher |
withAnnotationsToIgnore(java.lang.String annotationsToIgnore) |
OntologyMatcher |
withContexts(java.lang.String contexts) |
OntologyMatcher |
withDataModelClass(java.lang.String dataModelClass) |
OntologyMatcher |
withDataModelProperty(java.lang.String dataModelProperty) |
OntologyMatcher |
withDataModelState(java.lang.String dataModelState) |
OntologyMatcher |
withDisabled(boolean disabled) |
OntologyMatcher |
withDisabled(java.lang.Boolean disabled) |
OntologyMatcher |
withEnableApproxMatching(boolean enableApproxMatching) |
OntologyMatcher |
withEnableApproxMatching(java.lang.Boolean enableApproxMatching) |
OntologyMatcher |
withFromDataModel(SemanticProcessor fromDataModel) |
OntologyMatcher |
withIgnoreSpaces(boolean ignoreSpaces) |
OntologyMatcher |
withIgnoreSpaces(java.lang.Boolean ignoreSpaces) |
OntologyMatcher |
withKeepLongestMatch(boolean keepLongestMatch) |
OntologyMatcher |
withKeepLongestMatch(java.lang.Boolean keepLongestMatch) |
OntologyMatcher |
withKeepLongestMatchInterTag(boolean keepLongestMatchInterTag) |
OntologyMatcher |
withKeepLongestMatchInterTag(java.lang.Boolean keepLongestMatchInterTag) |
OntologyMatcher |
withMinWordSizeForDist1(int minWordSizeForDist1) |
OntologyMatcher |
withMinWordSizeForDist1(java.lang.Integer minWordSizeForDist1) |
OntologyMatcher |
withMinWordSizeForDist2(int minWordSizeForDist2) |
OntologyMatcher |
withMinWordSizeForDist2(java.lang.Integer minWordSizeForDist2) |
OntologyMatcher |
withName(java.lang.String name) |
OntologyMatcher |
withResourceDir(java.lang.String resourceDir) |
OntologyMatcher |
withRestrictLanguage(boolean restrictLanguage) |
OntologyMatcher |
withRestrictLanguage(java.lang.Boolean restrictLanguage) |
OntologyMatcher |
withTokenizeAnnotations(boolean tokenizeAnnotations) |
OntologyMatcher |
withTokenizeAnnotations(java.lang.Boolean tokenizeAnnotations) |
OntologyMatcher |
withTrustLevelBasedDedup(boolean trustLevelBasedDedup) |
OntologyMatcher |
withTrustLevelBasedDedup(java.lang.Boolean trustLevelBasedDedup) |
void |
writeTo(java.io.OutputStream os)
Write this OntologyMatcher as an XML fragment
|
getContexts, getDataModelClass, getDataModelProperty, getDataModelState, getFromDataModel, getName, isDisabled, setContexts, setDataModelClass, setDataModelProperty, setDataModelState, setDisabled, setFromDataModel, setName
public boolean enableApproxMatching
public static final boolean DEFAULT_ENABLE_APPROX_MATCHING
public int minWordSizeForDist1
public static final int DEFAULT_MIN_WORD_SIZE_FOR_DIST1
public int minWordSizeForDist2
public static final int DEFAULT_MIN_WORD_SIZE_FOR_DIST2
public java.lang.String resourceDir
public boolean restrictLanguage
public static final boolean DEFAULT_RESTRICT_LANGUAGE
public boolean keepLongestMatch
public static final boolean DEFAULT_KEEP_LONGEST_MATCH
public boolean keepLongestMatchInterTag
public static final boolean DEFAULT_KEEP_LONGEST_MATCH_INTER_TAG
public boolean tokenizeAnnotations
public static final boolean DEFAULT_TOKENIZE_ANNOTATIONS
public java.lang.String annotationsToIgnore
public boolean ignoreSpaces
public static final boolean DEFAULT_IGNORE_SPACES
public java.lang.String annotationPrefix
public static final java.lang.String DEFAULT_ANNOTATION_PREFIX
public boolean trustLevelBasedDedup
public static final boolean DEFAULT_TRUST_LEVEL_BASED_DEDUP
public OntologyMatcher()
public OntologyMatcher(OntologyMatcher o)
public OntologyMatcher withName(java.lang.String name)
withName
in class SemanticProcessor
public OntologyMatcher withContexts(java.lang.String contexts)
withContexts
in class SemanticProcessor
public OntologyMatcher withFromDataModel(SemanticProcessor fromDataModel)
public OntologyMatcher withDataModelState(java.lang.String dataModelState)
withDataModelState
in class SemanticProcessor
public OntologyMatcher withDataModelClass(java.lang.String dataModelClass)
withDataModelClass
in class SemanticProcessor
public OntologyMatcher withDataModelProperty(java.lang.String dataModelProperty)
withDataModelProperty
in class SemanticProcessor
public OntologyMatcher withDisabled(boolean disabled)
withDisabled
in class SemanticProcessor
public OntologyMatcher withDisabled(java.lang.Boolean disabled)
withDisabled
in class SemanticProcessor
public void setEnableApproxMatching(boolean enableApproxMatching)
public boolean isEnableApproxMatching()
public OntologyMatcher withEnableApproxMatching(boolean enableApproxMatching)
public OntologyMatcher withEnableApproxMatching(java.lang.Boolean enableApproxMatching)
public void setMinWordSizeForDist1(int minWordSizeForDist1)
public int getMinWordSizeForDist1()
public OntologyMatcher withMinWordSizeForDist1(int minWordSizeForDist1)
public OntologyMatcher withMinWordSizeForDist1(java.lang.Integer minWordSizeForDist1)
public void setMinWordSizeForDist2(int minWordSizeForDist2)
public int getMinWordSizeForDist2()
public OntologyMatcher withMinWordSizeForDist2(int minWordSizeForDist2)
public OntologyMatcher withMinWordSizeForDist2(java.lang.Integer minWordSizeForDist2)
public void setResourceDir(java.lang.String resourceDir)
public java.lang.String getResourceDir()
public OntologyMatcher withResourceDir(java.lang.String resourceDir)
public void setRestrictLanguage(boolean restrictLanguage)
public boolean isRestrictLanguage()
public OntologyMatcher withRestrictLanguage(boolean restrictLanguage)
public OntologyMatcher withRestrictLanguage(java.lang.Boolean restrictLanguage)
public void setKeepLongestMatch(boolean keepLongestMatch)
public boolean isKeepLongestMatch()
public OntologyMatcher withKeepLongestMatch(boolean keepLongestMatch)
public OntologyMatcher withKeepLongestMatch(java.lang.Boolean keepLongestMatch)
public void setKeepLongestMatchInterTag(boolean keepLongestMatchInterTag)
public boolean isKeepLongestMatchInterTag()
public OntologyMatcher withKeepLongestMatchInterTag(boolean keepLongestMatchInterTag)
public OntologyMatcher withKeepLongestMatchInterTag(java.lang.Boolean keepLongestMatchInterTag)
public void setTokenizeAnnotations(boolean tokenizeAnnotations)
public boolean isTokenizeAnnotations()
public OntologyMatcher withTokenizeAnnotations(boolean tokenizeAnnotations)
public OntologyMatcher withTokenizeAnnotations(java.lang.Boolean tokenizeAnnotations)
public void setAnnotationsToIgnore(java.lang.String annotationsToIgnore)
public java.lang.String getAnnotationsToIgnore()
public OntologyMatcher withAnnotationsToIgnore(java.lang.String annotationsToIgnore)
public void setIgnoreSpaces(boolean ignoreSpaces)
public boolean isIgnoreSpaces()
public OntologyMatcher withIgnoreSpaces(boolean ignoreSpaces)
public OntologyMatcher withIgnoreSpaces(java.lang.Boolean ignoreSpaces)
public void setAnnotationPrefix(java.lang.String annotationPrefix)
public java.lang.String getAnnotationPrefix()
public OntologyMatcher withAnnotationPrefix(java.lang.String annotationPrefix)
public void setTrustLevelBasedDedup(boolean trustLevelBasedDedup)
public boolean isTrustLevelBasedDedup()
public OntologyMatcher withTrustLevelBasedDedup(boolean trustLevelBasedDedup)
public OntologyMatcher withTrustLevelBasedDedup(java.lang.Boolean trustLevelBasedDedup)
public OntologyMatcher makeCopy()
makeCopy
in class SemanticProcessor
public static OntologyMatcher readFrom(java.io.InputStream is) throws javax.xml.bind.JAXBException
javax.xml.bind.JAXBException
public void writeTo(java.io.OutputStream os) throws javax.xml.bind.JAXBException, java.io.IOException
writeTo
in class SemanticProcessor
javax.xml.bind.JAXBException
java.io.IOException
public static OntologyMatcher fromString(java.lang.String s) throws javax.xml.bind.JAXBException, java.io.UnsupportedEncodingException
javax.xml.bind.JAXBException
java.io.UnsupportedEncodingException
public java.lang.String toString()
toString
in class SemanticProcessor
public void check(boolean deep, java.lang.String errorContext) throws com.exalead.util.TypedException
check
in interface com.exalead.util.Checkable
check
in class SemanticProcessor
com.exalead.util.TypedException
public void accept(SemanticProcessor.Visitor visitor) throws com.exalead.util.TypedException
accept
in class SemanticProcessor
com.exalead.util.TypedException
public <T> T accept(SemanticProcessor.Transformer<T> transformer, T[] t) throws com.exalead.util.TypedException
accept
in class SemanticProcessor
com.exalead.util.TypedException
Copyright © 2021 Dassault Systèmes, All Rights Reserved.