public class OntologyMatcher extends SemanticProcessor implements com.exalead.util.Checkable, java.io.Serializable
SemanticProcessor.Transformer<T>, SemanticProcessor.Visitor
Modifier and Type | Field and Description |
---|---|
java.lang.String |
annotationPrefix |
java.lang.String |
annotationsToIgnore |
static java.lang.String |
DEFAULT_ANNOTATION_PREFIX |
static boolean |
DEFAULT_ENABLE_APPROX_MATCHING |
static boolean |
DEFAULT_IGNORE_SPACES |
static boolean |
DEFAULT_KEEP_LONGEST_MATCH |
static boolean |
DEFAULT_KEEP_LONGEST_MATCH_INTER_TAG |
static int |
DEFAULT_MIN_WORD_SIZE_FOR_DIST1 |
static int |
DEFAULT_MIN_WORD_SIZE_FOR_DIST2 |
static boolean |
DEFAULT_RESTRICT_LANGUAGE |
static boolean |
DEFAULT_TOKENIZE_ANNOTATIONS |
static boolean |
DEFAULT_TRUST_LEVEL_BASED_DEDUP |
boolean |
enableApproxMatching |
boolean |
ignoreSpaces |
boolean |
keepLongestMatch |
boolean |
keepLongestMatchInterTag |
int |
minWordSizeForDist1 |
int |
minWordSizeForDist2 |
java.lang.String |
resourceDir |
boolean |
restrictLanguage |
boolean |
tokenizeAnnotations |
boolean |
trustLevelBasedDedup |
contexts, name, src
Constructor and Description |
---|
OntologyMatcher() |
OntologyMatcher(OntologyMatcher o)
Copy constructor
|
Modifier and Type | Method and Description |
---|---|
<T> T |
accept(SemanticProcessor.Transformer<T> transformer,
T[] t) |
void |
accept(SemanticProcessor.Visitor visitor) |
void |
check(boolean deep,
java.lang.String errorContext)
Checks this OntologyMatcher.
|
static OntologyMatcher |
fromString(java.lang.String s)
String representation of this OntologyMatcher.
|
java.lang.String |
getAnnotationPrefix()
A prefix to add to the tag of every annotation.
For example, if the package of the entry matched in the ontology is "exalead.location.country" and the annotationPrefix is "myOntology_", an annotation will be added with the tag "myOntology_exalead.location.country". |
java.lang.String |
getAnnotationsToIgnore()
Set the list of annotations to be ignored (comma separated).
This feature allows to define a list of words/expressions to ignore in the recognition of this ontology. For example, if you want to match "website of the embassy", "website of embassy" and "website ambassy" then you must add the expression "of" and "the" with the tag "toIgnore" in the ontology A, and then add the expression "website embassy" in ontology B with tagsToIgnore="toIgnore". |
int |
getMinWordSizeForDist1()
Minimum number of characters in a token to enable a Damerau-Levenshtein distance of 1.
|
int |
getMinWordSizeForDist2()
Minimum number of characters in a token to enable a Damerau-Levenshtein distance of 2.
|
java.lang.String |
getResourceDir()
URL for the directory containing the ontology (data://, file;// or resource://).
|
boolean |
isEnableApproxMatching()
Enables approximate matching in the ontology.
Approximate matching uses Damerau-Levenshtein edit distance. |
boolean |
isIgnoreSpaces()
If your ontology was compiled with matchOnSeparators=false,
this allows 'lemonde' to retrieve 'le monde' or 'le monde' to retrieve 'lemonde'. If your ontology was compiled with matchOnSeparators=true, this allows 'le monde' to retrieve 'le monde'. |
boolean |
isKeepLongestMatch()
Keeps the longest match only.
|
boolean |
isKeepLongestMatchInterTag()
Keeps the longest match (tag independant) only.
for example, if you have 5 tokens ('a', 'b', 'c', 'd', 'e') and 4 annotations 'a', 'a-c', 'b-c-d' and 'd-e', this option will only keep 'b-c-d' and will remove all other annotations. |
boolean |
isRestrictLanguage()
Keeps expressions added with language == Language.XX or with document language only.
For example, if an ontology contains an expression added with language=En, it will be extracted only for English documents. |
boolean |
isTokenizeAnnotations()
If you have some multi-token annotations (like "super marche") annotation on token "supermarche", this option
will automatically subtokenize "supermarche" into "super" "marche" and keep original annotations. If you enable this option, keepLongestMatch and keepLongestMatcherInterTag will be set to true. |
boolean |
isTrustLevelBasedDedup()
Keeps only the annotations with the highest trust level when several overlap.
|
OntologyMatcher |
makeCopy()
Creates and returns a deep copy of this OntologyMatcher.
|
static OntologyMatcher |
readFrom(java.io.InputStream is)
Read this OntologyMatcher from an XML fragment.
|
void |
setAnnotationPrefix(java.lang.String annotationPrefix)
A prefix to add to the tag of every annotation.
For example, if the package of the entry matched in the ontology is "exalead.location.country" and the annotationPrefix is "myOntology_", an annotation will be added with the tag "myOntology_exalead.location.country". |
void |
setAnnotationsToIgnore(java.lang.String annotationsToIgnore)
Set the list of annotations to be ignored (comma separated).
This feature allows to define a list of words/expressions to ignore in the recognition of this ontology. For example, if you want to match "website of the embassy", "website of embassy" and "website ambassy" then you must add the expression "of" and "the" with the tag "toIgnore" in the ontology A, and then add the expression "website embassy" in ontology B with tagsToIgnore="toIgnore". |
void |
setEnableApproxMatching(boolean enableApproxMatching)
Enables approximate matching in the ontology.
Approximate matching uses Damerau-Levenshtein edit distance. |
void |
setIgnoreSpaces(boolean ignoreSpaces)
If your ontology was compiled with matchOnSeparators=false,
this allows 'lemonde' to retrieve 'le monde' or 'le monde' to retrieve 'lemonde'. If your ontology was compiled with matchOnSeparators=true, this allows 'le monde' to retrieve 'le monde'. |
void |
setKeepLongestMatch(boolean keepLongestMatch)
Keeps the longest match only.
|
void |
setKeepLongestMatchInterTag(boolean keepLongestMatchInterTag)
Keeps the longest match (tag independant) only.
for example, if you have 5 tokens ('a', 'b', 'c', 'd', 'e') and 4 annotations 'a', 'a-c', 'b-c-d' and 'd-e', this option will only keep 'b-c-d' and will remove all other annotations. |
void |
setMinWordSizeForDist1(int minWordSizeForDist1)
Minimum number of characters in a token to enable a Damerau-Levenshtein distance of 1.
|
void |
setMinWordSizeForDist2(int minWordSizeForDist2)
Minimum number of characters in a token to enable a Damerau-Levenshtein distance of 2.
|
void |
setResourceDir(java.lang.String resourceDir)
URL for the directory containing the ontology (data://, file;// or resource://).
|
void |
setRestrictLanguage(boolean restrictLanguage)
Keeps expressions added with language == Language.XX or with document language only.
For example, if an ontology contains an expression added with language=En, it will be extracted only for English documents. |
void |
setTokenizeAnnotations(boolean tokenizeAnnotations)
If you have some multi-token annotations (like "super marche") annotation on token "supermarche", this option
will automatically subtokenize "supermarche" into "super" "marche" and keep original annotations. If you enable this option, keepLongestMatch and keepLongestMatcherInterTag will be set to true. |
void |
setTrustLevelBasedDedup(boolean trustLevelBasedDedup)
Keeps only the annotations with the highest trust level when several overlap.
|
java.lang.String |
toString()
String representation of this OntologyMatcher.
|
OntologyMatcher |
withAnnotationPrefix(java.lang.String annotationPrefix) |
OntologyMatcher |
withAnnotationsToIgnore(java.lang.String annotationsToIgnore) |
OntologyMatcher |
withContexts(java.lang.String contexts) |
OntologyMatcher |
withEnableApproxMatching(boolean enableApproxMatching) |
OntologyMatcher |
withEnableApproxMatching(java.lang.Boolean enableApproxMatching) |
OntologyMatcher |
withIgnoreSpaces(boolean ignoreSpaces) |
OntologyMatcher |
withIgnoreSpaces(java.lang.Boolean ignoreSpaces) |
OntologyMatcher |
withKeepLongestMatch(boolean keepLongestMatch) |
OntologyMatcher |
withKeepLongestMatch(java.lang.Boolean keepLongestMatch) |
OntologyMatcher |
withKeepLongestMatchInterTag(boolean keepLongestMatchInterTag) |
OntologyMatcher |
withKeepLongestMatchInterTag(java.lang.Boolean keepLongestMatchInterTag) |
OntologyMatcher |
withMinWordSizeForDist1(int minWordSizeForDist1) |
OntologyMatcher |
withMinWordSizeForDist1(java.lang.Integer minWordSizeForDist1) |
OntologyMatcher |
withMinWordSizeForDist2(int minWordSizeForDist2) |
OntologyMatcher |
withMinWordSizeForDist2(java.lang.Integer minWordSizeForDist2) |
OntologyMatcher |
withName(java.lang.String name) |
OntologyMatcher |
withResourceDir(java.lang.String resourceDir) |
OntologyMatcher |
withRestrictLanguage(boolean restrictLanguage) |
OntologyMatcher |
withRestrictLanguage(java.lang.Boolean restrictLanguage) |
OntologyMatcher |
withSrc(java.lang.String src)
Deprecated.
|
OntologyMatcher |
withTokenizeAnnotations(boolean tokenizeAnnotations) |
OntologyMatcher |
withTokenizeAnnotations(java.lang.Boolean tokenizeAnnotations) |
OntologyMatcher |
withTrustLevelBasedDedup(boolean trustLevelBasedDedup) |
OntologyMatcher |
withTrustLevelBasedDedup(java.lang.Boolean trustLevelBasedDedup) |
void |
writeTo(java.io.OutputStream os)
Write this OntologyMatcher as an XML fragment
|
getContexts, getName, getSrc, setContexts, setName, setSrc
public boolean enableApproxMatching
public static final boolean DEFAULT_ENABLE_APPROX_MATCHING
public int minWordSizeForDist1
public static final int DEFAULT_MIN_WORD_SIZE_FOR_DIST1
public int minWordSizeForDist2
public static final int DEFAULT_MIN_WORD_SIZE_FOR_DIST2
public java.lang.String resourceDir
public boolean restrictLanguage
public static final boolean DEFAULT_RESTRICT_LANGUAGE
public boolean keepLongestMatch
public static final boolean DEFAULT_KEEP_LONGEST_MATCH
public boolean keepLongestMatchInterTag
public static final boolean DEFAULT_KEEP_LONGEST_MATCH_INTER_TAG
public boolean tokenizeAnnotations
public static final boolean DEFAULT_TOKENIZE_ANNOTATIONS
public java.lang.String annotationsToIgnore
public boolean ignoreSpaces
public static final boolean DEFAULT_IGNORE_SPACES
public java.lang.String annotationPrefix
public static final java.lang.String DEFAULT_ANNOTATION_PREFIX
public boolean trustLevelBasedDedup
public static final boolean DEFAULT_TRUST_LEVEL_BASED_DEDUP
public OntologyMatcher()
public OntologyMatcher(OntologyMatcher o)
public OntologyMatcher withName(java.lang.String name)
withName
in class SemanticProcessor
public OntologyMatcher withContexts(java.lang.String contexts)
withContexts
in class SemanticProcessor
@Deprecated public OntologyMatcher withSrc(java.lang.String src)
withSrc
in class SemanticProcessor
public void setEnableApproxMatching(boolean enableApproxMatching)
public boolean isEnableApproxMatching()
public OntologyMatcher withEnableApproxMatching(boolean enableApproxMatching)
public OntologyMatcher withEnableApproxMatching(java.lang.Boolean enableApproxMatching)
public void setMinWordSizeForDist1(int minWordSizeForDist1)
public int getMinWordSizeForDist1()
public OntologyMatcher withMinWordSizeForDist1(int minWordSizeForDist1)
public OntologyMatcher withMinWordSizeForDist1(java.lang.Integer minWordSizeForDist1)
public void setMinWordSizeForDist2(int minWordSizeForDist2)
public int getMinWordSizeForDist2()
public OntologyMatcher withMinWordSizeForDist2(int minWordSizeForDist2)
public OntologyMatcher withMinWordSizeForDist2(java.lang.Integer minWordSizeForDist2)
public void setResourceDir(java.lang.String resourceDir)
public java.lang.String getResourceDir()
public OntologyMatcher withResourceDir(java.lang.String resourceDir)
public void setRestrictLanguage(boolean restrictLanguage)
public boolean isRestrictLanguage()
public OntologyMatcher withRestrictLanguage(boolean restrictLanguage)
public OntologyMatcher withRestrictLanguage(java.lang.Boolean restrictLanguage)
public void setKeepLongestMatch(boolean keepLongestMatch)
public boolean isKeepLongestMatch()
public OntologyMatcher withKeepLongestMatch(boolean keepLongestMatch)
public OntologyMatcher withKeepLongestMatch(java.lang.Boolean keepLongestMatch)
public void setKeepLongestMatchInterTag(boolean keepLongestMatchInterTag)
public boolean isKeepLongestMatchInterTag()
public OntologyMatcher withKeepLongestMatchInterTag(boolean keepLongestMatchInterTag)
public OntologyMatcher withKeepLongestMatchInterTag(java.lang.Boolean keepLongestMatchInterTag)
public void setTokenizeAnnotations(boolean tokenizeAnnotations)
public boolean isTokenizeAnnotations()
public OntologyMatcher withTokenizeAnnotations(boolean tokenizeAnnotations)
public OntologyMatcher withTokenizeAnnotations(java.lang.Boolean tokenizeAnnotations)
public void setAnnotationsToIgnore(java.lang.String annotationsToIgnore)
public java.lang.String getAnnotationsToIgnore()
public OntologyMatcher withAnnotationsToIgnore(java.lang.String annotationsToIgnore)
public void setIgnoreSpaces(boolean ignoreSpaces)
public boolean isIgnoreSpaces()
public OntologyMatcher withIgnoreSpaces(boolean ignoreSpaces)
public OntologyMatcher withIgnoreSpaces(java.lang.Boolean ignoreSpaces)
public void setAnnotationPrefix(java.lang.String annotationPrefix)
public java.lang.String getAnnotationPrefix()
public OntologyMatcher withAnnotationPrefix(java.lang.String annotationPrefix)
public void setTrustLevelBasedDedup(boolean trustLevelBasedDedup)
public boolean isTrustLevelBasedDedup()
public OntologyMatcher withTrustLevelBasedDedup(boolean trustLevelBasedDedup)
public OntologyMatcher withTrustLevelBasedDedup(java.lang.Boolean trustLevelBasedDedup)
public OntologyMatcher makeCopy()
makeCopy
in class SemanticProcessor
public static OntologyMatcher readFrom(java.io.InputStream is) throws javax.xml.bind.JAXBException
javax.xml.bind.JAXBException
public void writeTo(java.io.OutputStream os) throws javax.xml.bind.JAXBException, java.io.IOException
writeTo
in class SemanticProcessor
javax.xml.bind.JAXBException
java.io.IOException
public static OntologyMatcher fromString(java.lang.String s) throws javax.xml.bind.JAXBException, java.io.UnsupportedEncodingException
javax.xml.bind.JAXBException
java.io.UnsupportedEncodingException
public java.lang.String toString()
toString
in class SemanticProcessor
public void check(boolean deep, java.lang.String errorContext) throws com.exalead.util.TypedException
check
in interface com.exalead.util.Checkable
check
in class SemanticProcessor
com.exalead.util.TypedException
public void accept(SemanticProcessor.Visitor visitor) throws com.exalead.util.TypedException
accept
in class SemanticProcessor
com.exalead.util.TypedException
public <T> T accept(SemanticProcessor.Transformer<T> transformer, T[] t) throws com.exalead.util.TypedException
accept
in class SemanticProcessor
com.exalead.util.TypedException
Copyright © 2013 Dassault Systèmes, All Rights Reserved.