public class LanguageDetector extends MultiContextDocumentProcessor implements com.exalead.util.Checkable, java.io.Serializable
MultiContextDocumentProcessor.InputContexts, MultiContextDocumentProcessor.Transformer<T>
DocumentProcessor.FromDataModel
Modifier and Type | Field and Description |
---|---|
static boolean |
DEFAULT_EXCLUDE |
static int |
DEFAULT_LANGUAGES_TO_KEEP |
static int |
DEFAULT_MIN_LANG_PERCENTAGE |
java.lang.String |
defaultLanguage |
boolean |
exclude |
java.lang.String |
languageContext |
java.lang.String |
languagesToDetect |
int |
languagesToKeep |
int |
minLangPercentage |
java.lang.String |
outputContext |
inputContexts
acceptCondition, dataModelClass, dataModelProperty, dataModelState, DEFAULT_DISABLED, disabled, fromDataModel, name
Constructor and Description |
---|
LanguageDetector() |
LanguageDetector(LanguageDetector o)
Copy constructor
|
Modifier and Type | Method and Description |
---|---|
<T> T |
accept(DocumentProcessor.Transformer<T> transformer,
T[] t) |
<T> T |
accept(MultiContextDocumentProcessor.Transformer<T> transformer,
T[] t) |
void |
check(boolean deep,
java.lang.String errorContext)
Checks this LanguageDetector.
|
static LanguageDetector |
fromString(java.lang.String s)
String representation of this LanguageDetector.
|
java.lang.String |
getDefaultLanguage()
If not null, 'defaultLanguage' will be used as the default language when automatic detection fails.
|
java.lang.String |
getLanguageContext()
If this is not null and if there is a DocumentChunk with a ContextName matching 'languageContext':
no automatic detection will be performed,
the language specified will be used as the language of the DocumentChunks associated with the ContextNames specified as input.
|
java.lang.String |
getLanguagesToDetect()
If not null, restrict the language detector to a set of languages.
If you only have a small set of languages to detect, you can restrict language detector to this set to improve precision. |
int |
getLanguagesToKeep()
Keeps the n most represented languages in the document.
|
int |
getMinLangPercentage()
Minimum ratio ([0-100]) of language to be detected (0 = always keeps a detected language)
|
java.lang.String |
getOutputContext()
ContextName of the DocumentChunk to create.
It will contain the language detected in the processed DocumentChunks as defined in ISO 639-1. |
boolean |
isExclude()
If true, "inputContexts" is an exclude list instead of an include list.
Language detection is then performed on all DocumentChunks except those whose ContextName appears in 'inputContexts'. |
LanguageDetector |
makeCopy()
Creates and returns a deep copy of this LanguageDetector.
|
static LanguageDetector |
readFrom(java.io.InputStream is)
Read this LanguageDetector from an XML fragment.
|
void |
setDefaultLanguage(java.lang.String defaultLanguage)
If not null, 'defaultLanguage' will be used as the default language when automatic detection fails.
|
void |
setExclude(boolean exclude)
If true, "inputContexts" is an exclude list instead of an include list.
Language detection is then performed on all DocumentChunks except those whose ContextName appears in 'inputContexts'. |
void |
setLanguageContext(java.lang.String languageContext)
If this is not null and if there is a DocumentChunk with a ContextName matching 'languageContext':
no automatic detection will be performed,
the language specified will be used as the language of the DocumentChunks associated with the ContextNames specified as input.
|
void |
setLanguagesToDetect(java.lang.String languagesToDetect)
If not null, restrict the language detector to a set of languages.
If you only have a small set of languages to detect, you can restrict language detector to this set to improve precision. |
void |
setLanguagesToKeep(int languagesToKeep)
Keeps the n most represented languages in the document.
|
void |
setMinLangPercentage(int minLangPercentage)
Minimum ratio ([0-100]) of language to be detected (0 = always keeps a detected language)
|
void |
setOutputContext(java.lang.String outputContext)
ContextName of the DocumentChunk to create.
It will contain the language detected in the processed DocumentChunks as defined in ISO 639-1. |
java.lang.String |
toString()
String representation of this LanguageDetector.
|
LanguageDetector |
withAcceptCondition(AcceptCondition acceptCondition) |
LanguageDetector |
withDataModelClass(java.lang.String dataModelClass) |
LanguageDetector |
withDataModelProperty(java.lang.String dataModelProperty) |
LanguageDetector |
withDataModelState(java.lang.String dataModelState) |
LanguageDetector |
withDefaultLanguage(java.lang.String defaultLanguage) |
LanguageDetector |
withDisabled(boolean disabled) |
LanguageDetector |
withDisabled(java.lang.Boolean disabled) |
LanguageDetector |
withExclude(boolean exclude) |
LanguageDetector |
withExclude(java.lang.Boolean exclude) |
LanguageDetector |
withFromDataModel(DocumentProcessor fromDataModel) |
LanguageDetector |
withInputContexts(java.util.Collection<StringValue> __values) |
LanguageDetector |
withInputContexts(StringValue... __values)
The processor will only be applied to DocumentChunks with a ContextName specified in this list.
|
LanguageDetector |
withLanguageContext(java.lang.String languageContext) |
LanguageDetector |
withLanguagesToDetect(java.lang.String languagesToDetect) |
LanguageDetector |
withLanguagesToKeep(int languagesToKeep) |
LanguageDetector |
withLanguagesToKeep(java.lang.Integer languagesToKeep) |
LanguageDetector |
withMinLangPercentage(int minLangPercentage) |
LanguageDetector |
withMinLangPercentage(java.lang.Integer minLangPercentage) |
LanguageDetector |
withName(java.lang.String name) |
LanguageDetector |
withOutputContext(java.lang.String outputContext) |
void |
writeTo(java.io.OutputStream os)
Write this LanguageDetector as an XML fragment
|
getInputContexts, setInputContexts, withInputContexts
getAcceptCondition, getDataModelClass, getDataModelProperty, getDataModelState, getFromDataModel, getName, isDisabled, setAcceptCondition, setDataModelClass, setDataModelProperty, setDataModelState, setDisabled, setFromDataModel, setName
public java.lang.String languageContext
public java.lang.String languagesToDetect
public java.lang.String defaultLanguage
public boolean exclude
public static final boolean DEFAULT_EXCLUDE
public java.lang.String outputContext
public int minLangPercentage
public static final int DEFAULT_MIN_LANG_PERCENTAGE
public int languagesToKeep
public static final int DEFAULT_LANGUAGES_TO_KEEP
public LanguageDetector()
public LanguageDetector(LanguageDetector o)
public LanguageDetector withInputContexts(StringValue... __values)
withInputContexts
in class MultiContextDocumentProcessor
public LanguageDetector withInputContexts(java.util.Collection<StringValue> __values)
withInputContexts
in class MultiContextDocumentProcessor
public LanguageDetector withAcceptCondition(AcceptCondition acceptCondition)
withAcceptCondition
in class MultiContextDocumentProcessor
public LanguageDetector withName(java.lang.String name)
withName
in class MultiContextDocumentProcessor
public LanguageDetector withDataModelState(java.lang.String dataModelState)
withDataModelState
in class MultiContextDocumentProcessor
public LanguageDetector withFromDataModel(DocumentProcessor fromDataModel)
withFromDataModel
in class MultiContextDocumentProcessor
public LanguageDetector withDataModelClass(java.lang.String dataModelClass)
withDataModelClass
in class MultiContextDocumentProcessor
public LanguageDetector withDataModelProperty(java.lang.String dataModelProperty)
withDataModelProperty
in class MultiContextDocumentProcessor
public LanguageDetector withDisabled(boolean disabled)
withDisabled
in class MultiContextDocumentProcessor
public LanguageDetector withDisabled(java.lang.Boolean disabled)
withDisabled
in class MultiContextDocumentProcessor
public void setLanguageContext(java.lang.String languageContext)
public java.lang.String getLanguageContext()
public LanguageDetector withLanguageContext(java.lang.String languageContext)
public void setLanguagesToDetect(java.lang.String languagesToDetect)
public java.lang.String getLanguagesToDetect()
public LanguageDetector withLanguagesToDetect(java.lang.String languagesToDetect)
public void setDefaultLanguage(java.lang.String defaultLanguage)
public java.lang.String getDefaultLanguage()
public LanguageDetector withDefaultLanguage(java.lang.String defaultLanguage)
public void setExclude(boolean exclude)
public boolean isExclude()
public LanguageDetector withExclude(boolean exclude)
public LanguageDetector withExclude(java.lang.Boolean exclude)
public void setOutputContext(java.lang.String outputContext)
public java.lang.String getOutputContext()
public LanguageDetector withOutputContext(java.lang.String outputContext)
public void setMinLangPercentage(int minLangPercentage)
public int getMinLangPercentage()
public LanguageDetector withMinLangPercentage(int minLangPercentage)
public LanguageDetector withMinLangPercentage(java.lang.Integer minLangPercentage)
public void setLanguagesToKeep(int languagesToKeep)
public int getLanguagesToKeep()
public LanguageDetector withLanguagesToKeep(int languagesToKeep)
public LanguageDetector withLanguagesToKeep(java.lang.Integer languagesToKeep)
public LanguageDetector makeCopy()
makeCopy
in class MultiContextDocumentProcessor
public static LanguageDetector readFrom(java.io.InputStream is) throws javax.xml.bind.JAXBException
javax.xml.bind.JAXBException
public void writeTo(java.io.OutputStream os) throws javax.xml.bind.JAXBException, java.io.IOException
writeTo
in class MultiContextDocumentProcessor
javax.xml.bind.JAXBException
java.io.IOException
public static LanguageDetector fromString(java.lang.String s) throws javax.xml.bind.JAXBException, java.io.UnsupportedEncodingException
javax.xml.bind.JAXBException
java.io.UnsupportedEncodingException
public java.lang.String toString()
toString
in class MultiContextDocumentProcessor
public void check(boolean deep, java.lang.String errorContext) throws com.exalead.util.TypedException
check
in interface com.exalead.util.Checkable
check
in class MultiContextDocumentProcessor
com.exalead.util.TypedException
public <T> T accept(MultiContextDocumentProcessor.Transformer<T> transformer, T[] t) throws com.exalead.util.TypedException
accept
in class MultiContextDocumentProcessor
com.exalead.util.TypedException
public <T> T accept(DocumentProcessor.Transformer<T> transformer, T[] t) throws com.exalead.util.TypedException
accept
in class DocumentProcessor
com.exalead.util.TypedException
Copyright © 2021 Dassault Systèmes, All Rights Reserved.