public class Crawler extends ICrawler implements com.exalead.util.Checkable, java.io.Serializable, Cloneable
Modifier and Type | Class and Description |
---|---|
static class |
Crawler.Roots |
static class |
Crawler.Rootsets |
ICrawler.MimeTypes, ICrawler.PushAPIFilter, ICrawler.SessionIdBlacklist
Modifier and Type | Field and Description |
---|---|
protected Crawler.Roots |
roots |
protected Crawler.Rootsets |
rootsets |
aggressive, archiveDocuments, buildGroup, connectorServer, crawlerServer, crawlSchedulerConfig, crawlSitemaps, customCrawlConfig, dataModel, DEFAULT_AGGRESSIVE, DEFAULT_ARCHIVE_DOCUMENTS, DEFAULT_CRAWL_SITEMAPS, DEFAULT_DEFAULT_ACCEPT, DEFAULT_DEFAULT_FOLLOW, DEFAULT_DEFAULT_FOLLOW_ROOTS, DEFAULT_DEFAULT_INDEX, DEFAULT_DISABLE_CONDITIONAL_GET, DEFAULT_ENABLE_CONSOLIDATION, DEFAULT_ENABLE_CONVERT_PROCESSOR, DEFAULT_ENABLE_SIMPLE_SITE_COLLAPSING, DEFAULT_IGNORE_ROBOTS_TXT, DEFAULT_MIME_TYPES_MODE, DEFAULT_NEAR_DUPLICATE_DETECTOR, DEFAULT_NTHREADS, DEFAULT_PATTERNS_DETECTOR, DEFAULT_SIMPLE_SITE_COLLAPSING_DEPTH, DEFAULT_SMART_REFRESH, DEFAULT_SMART_REFRESH_MAX_AGE_S, DEFAULT_SMART_REFRESH_MIN_AGE_S, DEFAULT_STORE_TEXT_ONLY, DEFAULT_THROTTLE_TIME_M_S, defaultAccept, defaultFollow, defaultFollowRoots, defaultIndex, disableConditionalGet, documentsType, enableConsolidation, enableConvertProcessor, enableSimpleSiteCollapsing, fetcher, ignoreRobotsTxt, mimeTypes, mimeTypesMode, name, nearDuplicateDetector, nthreads, patternsDetector, PushAPIFilter, rules, sessionIdBlacklist, simpleSiteCollapsingDepth, smartRefresh, smartRefreshMaxAgeS, smartRefreshMinAgeS, storeTextOnly, throttleTimeMS, urlTesterData
Constructor and Description |
---|
Crawler() |
Crawler(Crawler o)
Copy constructor
|
Modifier and Type | Method and Description |
---|---|
void |
check(boolean deep,
java.lang.String errorContext)
Checks this Crawler.
|
Crawler |
clone()
Creates and returns a deep copy of this Crawler.
|
static Crawler |
fromString(java.lang.String s)
String representation of this Crawler.
|
Crawler.Roots |
getRoots() |
Crawler.Rootsets |
getRootsets() |
Crawler |
makeCopy()
Creates and returns a deep copy of this Crawler.
|
static Crawler |
readFrom(java.io.InputStream is)
Read this Crawler from an XML fragment.
|
void |
setRoots(Crawler.Roots __value) |
void |
setRootsets(Crawler.Rootsets __value) |
java.lang.String |
toString()
String representation of this Crawler.
|
Crawler |
withAggressive(boolean aggressive) |
Crawler |
withAggressive(java.lang.Boolean aggressive) |
Crawler |
withArchiveDocuments(boolean archiveDocuments) |
Crawler |
withArchiveDocuments(java.lang.Boolean archiveDocuments) |
Crawler |
withBuildGroup(java.lang.String buildGroup) |
Crawler |
withConnectorServer(java.lang.String connectorServer) |
Crawler |
withCrawlerServer(java.lang.String crawlerServer) |
Crawler |
withCrawlSchedulerConfig(CrawlSchedulerConfig crawlSchedulerConfig) |
Crawler |
withCrawlSitemaps(boolean crawlSitemaps) |
Crawler |
withCrawlSitemaps(java.lang.Boolean crawlSitemaps) |
Crawler |
withCustomCrawlConfig(CustomCrawlConfig customCrawlConfig) |
Crawler |
withDataModel(java.lang.String dataModel) |
Crawler |
withDefaultAccept(boolean defaultAccept) |
Crawler |
withDefaultAccept(java.lang.Boolean defaultAccept) |
Crawler |
withDefaultFollow(boolean defaultFollow) |
Crawler |
withDefaultFollow(java.lang.Boolean defaultFollow) |
Crawler |
withDefaultFollowRoots(boolean defaultFollowRoots) |
Crawler |
withDefaultFollowRoots(java.lang.Boolean defaultFollowRoots) |
Crawler |
withDefaultIndex(boolean defaultIndex) |
Crawler |
withDefaultIndex(java.lang.Boolean defaultIndex) |
Crawler |
withDisableConditionalGet(boolean disableConditionalGet) |
Crawler |
withDisableConditionalGet(java.lang.Boolean disableConditionalGet) |
Crawler |
withDocumentsType(java.lang.String documentsType) |
Crawler |
withEnableConsolidation(boolean enableConsolidation) |
Crawler |
withEnableConsolidation(java.lang.Boolean enableConsolidation) |
Crawler |
withEnableConvertProcessor(boolean enableConvertProcessor) |
Crawler |
withEnableConvertProcessor(java.lang.Boolean enableConvertProcessor) |
Crawler |
withEnableSimpleSiteCollapsing(boolean enableSimpleSiteCollapsing) |
Crawler |
withEnableSimpleSiteCollapsing(java.lang.Boolean enableSimpleSiteCollapsing) |
Crawler |
withFetcher(java.lang.String fetcher) |
Crawler |
withIgnoreRobotsTxt(boolean ignoreRobotsTxt) |
Crawler |
withIgnoreRobotsTxt(java.lang.Boolean ignoreRobotsTxt) |
Crawler |
withMimeTypes(java.util.Collection<StringConstantValue> __values) |
Crawler |
withMimeTypes(StringConstantValue... __values) |
Crawler |
withMimeTypesMode(java.lang.String mimeTypesMode) |
Crawler |
withName(java.lang.String name) |
Crawler |
withNearDuplicateDetector(boolean nearDuplicateDetector) |
Crawler |
withNearDuplicateDetector(java.lang.Boolean nearDuplicateDetector) |
Crawler |
withNthreads(int nthreads) |
Crawler |
withNthreads(java.lang.Integer nthreads) |
Crawler |
withPatternsDetector(boolean patternsDetector) |
Crawler |
withPatternsDetector(java.lang.Boolean patternsDetector) |
Crawler |
withPushAPIFilter(java.util.Collection<KeyValue> __values) |
Crawler |
withPushAPIFilter(KeyValue... __values) |
Crawler |
withRoots(java.util.Collection<Root> __values) |
Crawler |
withRoots(Crawler.Roots __value) |
Crawler |
withRoots(Root... __values) |
Crawler |
withRootsets(java.util.Collection<RootSet> __values) |
Crawler |
withRootsets(Crawler.Rootsets __value) |
Crawler |
withRootsets(RootSet... __values) |
Crawler |
withRules(java.util.Collection<Rules> __values) |
Crawler |
withRules(Rules... __values) |
Crawler |
withSessionIdBlacklist(java.util.Collection<StringConstantValue> __values) |
Crawler |
withSessionIdBlacklist(StringConstantValue... __values)
SessionId blacklist.
|
Crawler |
withSimpleSiteCollapsingDepth(int simpleSiteCollapsingDepth) |
Crawler |
withSimpleSiteCollapsingDepth(java.lang.Integer simpleSiteCollapsingDepth) |
Crawler |
withSmartRefresh(boolean smartRefresh) |
Crawler |
withSmartRefresh(java.lang.Boolean smartRefresh) |
Crawler |
withSmartRefreshMaxAgeS(int smartRefreshMaxAgeS) |
Crawler |
withSmartRefreshMaxAgeS(java.lang.Integer smartRefreshMaxAgeS) |
Crawler |
withSmartRefreshMinAgeS(int smartRefreshMinAgeS) |
Crawler |
withSmartRefreshMinAgeS(java.lang.Integer smartRefreshMinAgeS) |
Crawler |
withStoreTextOnly(boolean storeTextOnly) |
Crawler |
withStoreTextOnly(java.lang.Boolean storeTextOnly) |
Crawler |
withThrottleTimeMS(int throttleTimeMS) |
Crawler |
withThrottleTimeMS(java.lang.Integer throttleTimeMS) |
Crawler |
withUrlTesterData(UrlTesterData urlTesterData) |
void |
writeTo(java.io.OutputStream os)
Write this Crawler as an XML fragment
|
getBuildGroup, getConnectorServer, getCrawlerServer, getCrawlSchedulerConfig, getCustomCrawlConfig, getDataModel, getDocumentsType, getFetcher, getMimeTypes, getMimeTypesMode, getName, getNthreads, getPushAPIFilter, getRules, getSessionIdBlacklist, getSimpleSiteCollapsingDepth, getSmartRefreshMaxAgeS, getSmartRefreshMinAgeS, getThrottleTimeMS, getUrlTesterData, isAggressive, isArchiveDocuments, isCrawlSitemaps, isDefaultAccept, isDefaultFollow, isDefaultFollowRoots, isDefaultIndex, isDisableConditionalGet, isEnableConsolidation, isEnableConvertProcessor, isEnableSimpleSiteCollapsing, isIgnoreRobotsTxt, isNearDuplicateDetector, isPatternsDetector, isSmartRefresh, isStoreTextOnly, setAggressive, setArchiveDocuments, setBuildGroup, setConnectorServer, setCrawlerServer, setCrawlSchedulerConfig, setCrawlSitemaps, setCustomCrawlConfig, setDataModel, setDefaultAccept, setDefaultFollow, setDefaultFollowRoots, setDefaultIndex, setDisableConditionalGet, setDocumentsType, setEnableConsolidation, setEnableConvertProcessor, setEnableSimpleSiteCollapsing, setFetcher, setIgnoreRobotsTxt, setMimeTypes, setMimeTypesMode, setName, setNearDuplicateDetector, setNthreads, setPatternsDetector, setPushAPIFilter, setSessionIdBlacklist, setSimpleSiteCollapsingDepth, setSmartRefresh, setSmartRefreshMaxAgeS, setSmartRefreshMinAgeS, setStoreTextOnly, setThrottleTimeMS, setUrlTesterData, withMimeTypes, withPushAPIFilter, withSessionIdBlacklist
protected Crawler.Roots roots
protected Crawler.Rootsets rootsets
public Crawler()
public Crawler(Crawler o)
public Crawler withCrawlSchedulerConfig(CrawlSchedulerConfig crawlSchedulerConfig)
withCrawlSchedulerConfig
in class ICrawler
public Crawler withCustomCrawlConfig(CustomCrawlConfig customCrawlConfig)
withCustomCrawlConfig
in class ICrawler
public Crawler withDocumentsType(java.lang.String documentsType)
withDocumentsType
in class ICrawler
public Crawler withFetcher(java.lang.String fetcher)
withFetcher
in class ICrawler
public Crawler withCrawlerServer(java.lang.String crawlerServer)
withCrawlerServer
in class ICrawler
public Crawler withConnectorServer(java.lang.String connectorServer)
withConnectorServer
in class ICrawler
public Crawler withBuildGroup(java.lang.String buildGroup)
withBuildGroup
in class ICrawler
public Crawler withDataModel(java.lang.String dataModel)
withDataModel
in class ICrawler
public Crawler withStoreTextOnly(boolean storeTextOnly)
withStoreTextOnly
in class ICrawler
public Crawler withStoreTextOnly(java.lang.Boolean storeTextOnly)
withStoreTextOnly
in class ICrawler
public Crawler withNthreads(int nthreads)
withNthreads
in class ICrawler
public Crawler withNthreads(java.lang.Integer nthreads)
withNthreads
in class ICrawler
public Crawler withAggressive(boolean aggressive)
withAggressive
in class ICrawler
public Crawler withAggressive(java.lang.Boolean aggressive)
withAggressive
in class ICrawler
public Crawler withThrottleTimeMS(int throttleTimeMS)
withThrottleTimeMS
in class ICrawler
public Crawler withThrottleTimeMS(java.lang.Integer throttleTimeMS)
withThrottleTimeMS
in class ICrawler
public Crawler withIgnoreRobotsTxt(boolean ignoreRobotsTxt)
withIgnoreRobotsTxt
in class ICrawler
public Crawler withIgnoreRobotsTxt(java.lang.Boolean ignoreRobotsTxt)
withIgnoreRobotsTxt
in class ICrawler
public Crawler withEnableConvertProcessor(boolean enableConvertProcessor)
withEnableConvertProcessor
in class ICrawler
public Crawler withEnableConvertProcessor(java.lang.Boolean enableConvertProcessor)
withEnableConvertProcessor
in class ICrawler
public Crawler withNearDuplicateDetector(boolean nearDuplicateDetector)
withNearDuplicateDetector
in class ICrawler
public Crawler withNearDuplicateDetector(java.lang.Boolean nearDuplicateDetector)
withNearDuplicateDetector
in class ICrawler
public Crawler withPatternsDetector(boolean patternsDetector)
withPatternsDetector
in class ICrawler
public Crawler withPatternsDetector(java.lang.Boolean patternsDetector)
withPatternsDetector
in class ICrawler
public Crawler withCrawlSitemaps(boolean crawlSitemaps)
withCrawlSitemaps
in class ICrawler
public Crawler withCrawlSitemaps(java.lang.Boolean crawlSitemaps)
withCrawlSitemaps
in class ICrawler
public Crawler withDisableConditionalGet(boolean disableConditionalGet)
withDisableConditionalGet
in class ICrawler
public Crawler withDisableConditionalGet(java.lang.Boolean disableConditionalGet)
withDisableConditionalGet
in class ICrawler
public Crawler withDefaultAccept(boolean defaultAccept)
withDefaultAccept
in class ICrawler
public Crawler withDefaultAccept(java.lang.Boolean defaultAccept)
withDefaultAccept
in class ICrawler
public Crawler withDefaultIndex(boolean defaultIndex)
withDefaultIndex
in class ICrawler
public Crawler withDefaultIndex(java.lang.Boolean defaultIndex)
withDefaultIndex
in class ICrawler
public Crawler withDefaultFollow(boolean defaultFollow)
withDefaultFollow
in class ICrawler
public Crawler withDefaultFollow(java.lang.Boolean defaultFollow)
withDefaultFollow
in class ICrawler
public Crawler withDefaultFollowRoots(boolean defaultFollowRoots)
withDefaultFollowRoots
in class ICrawler
public Crawler withDefaultFollowRoots(java.lang.Boolean defaultFollowRoots)
withDefaultFollowRoots
in class ICrawler
public Crawler withEnableSimpleSiteCollapsing(boolean enableSimpleSiteCollapsing)
withEnableSimpleSiteCollapsing
in class ICrawler
public Crawler withEnableSimpleSiteCollapsing(java.lang.Boolean enableSimpleSiteCollapsing)
withEnableSimpleSiteCollapsing
in class ICrawler
public Crawler withSimpleSiteCollapsingDepth(int simpleSiteCollapsingDepth)
withSimpleSiteCollapsingDepth
in class ICrawler
public Crawler withSimpleSiteCollapsingDepth(java.lang.Integer simpleSiteCollapsingDepth)
withSimpleSiteCollapsingDepth
in class ICrawler
public Crawler withMimeTypesMode(java.lang.String mimeTypesMode)
withMimeTypesMode
in class ICrawler
public Crawler withMimeTypes(StringConstantValue... __values)
withMimeTypes
in class ICrawler
public Crawler withMimeTypes(java.util.Collection<StringConstantValue> __values)
withMimeTypes
in class ICrawler
public Crawler withSessionIdBlacklist(StringConstantValue... __values)
withSessionIdBlacklist
in class ICrawler
public Crawler withSessionIdBlacklist(java.util.Collection<StringConstantValue> __values)
withSessionIdBlacklist
in class ICrawler
public Crawler withSmartRefresh(boolean smartRefresh)
withSmartRefresh
in class ICrawler
public Crawler withSmartRefresh(java.lang.Boolean smartRefresh)
withSmartRefresh
in class ICrawler
public Crawler withSmartRefreshMinAgeS(int smartRefreshMinAgeS)
withSmartRefreshMinAgeS
in class ICrawler
public Crawler withSmartRefreshMinAgeS(java.lang.Integer smartRefreshMinAgeS)
withSmartRefreshMinAgeS
in class ICrawler
public Crawler withSmartRefreshMaxAgeS(int smartRefreshMaxAgeS)
withSmartRefreshMaxAgeS
in class ICrawler
public Crawler withSmartRefreshMaxAgeS(java.lang.Integer smartRefreshMaxAgeS)
withSmartRefreshMaxAgeS
in class ICrawler
public Crawler withArchiveDocuments(boolean archiveDocuments)
withArchiveDocuments
in class ICrawler
public Crawler withArchiveDocuments(java.lang.Boolean archiveDocuments)
withArchiveDocuments
in class ICrawler
public Crawler withPushAPIFilter(KeyValue... __values)
withPushAPIFilter
in class ICrawler
public Crawler withPushAPIFilter(java.util.Collection<KeyValue> __values)
withPushAPIFilter
in class ICrawler
public Crawler withUrlTesterData(UrlTesterData urlTesterData)
withUrlTesterData
in class ICrawler
public Crawler withEnableConsolidation(boolean enableConsolidation)
withEnableConsolidation
in class ICrawler
public Crawler withEnableConsolidation(java.lang.Boolean enableConsolidation)
withEnableConsolidation
in class ICrawler
public Crawler.Roots getRoots()
public void setRoots(Crawler.Roots __value)
public Crawler withRoots(Crawler.Roots __value)
public Crawler.Rootsets getRootsets()
public void setRootsets(Crawler.Rootsets __value)
public Crawler withRootsets(Crawler.Rootsets __value)
public Crawler clone()
public Crawler makeCopy()
public static Crawler readFrom(java.io.InputStream is) throws javax.xml.bind.JAXBException
javax.xml.bind.JAXBException
public void writeTo(java.io.OutputStream os) throws javax.xml.bind.JAXBException, java.io.IOException
public static Crawler fromString(java.lang.String s) throws javax.xml.bind.JAXBException, java.io.UnsupportedEncodingException
javax.xml.bind.JAXBException
java.io.UnsupportedEncodingException
public java.lang.String toString()
Copyright © 2021 Dassault Systèmes, All Rights Reserved.