public abstract class ICrawler
extends java.lang.Object
implements java.io.Serializable, java.lang.Cloneable
Modifier and Type | Field and Description |
---|---|
protected java.lang.Boolean |
aggressive |
protected java.lang.String |
buildGroup |
protected java.lang.String |
crawlerServer |
protected java.lang.Boolean |
crawlSitemaps |
protected java.lang.String |
defaultFetcher |
protected java.lang.Boolean |
enableConvertProcessor |
protected java.lang.Boolean |
ignoreRobotsTxt |
protected java.lang.String |
name |
protected java.lang.Boolean |
nearDuplicateDetector |
protected java.lang.Integer |
nthreads |
protected java.lang.Boolean |
patternsDetector |
protected java.lang.Integer |
throttleTimeMS |
Constructor and Description |
---|
ICrawler()
Creates a new
ICrawler instance. |
ICrawler(ICrawler o)
Creates a new
ICrawler instance by deeply copying a given ICrawler instance. |
Modifier and Type | Method and Description |
---|---|
ICrawler |
clone()
Creates and returns a deep copy of this object.
|
java.lang.String |
getBuildGroup()
Target build group.
|
java.lang.String |
getCrawlerServer()
Crawler server hosting this crawler.
|
java.lang.String |
getDefaultFetcher()
Which fetcher to use with urls that belong to no source.
|
java.lang.String |
getName()
The crawler name.
|
int |
getNthreads()
The number of crawl threads which must be strictly positive.
|
int |
getThrottleTimeMS()
In the case of non-aggressive crawl, this defines the sleep interval between requests to the same host.
|
boolean |
isAggressive()
Whether to activate aggressive crawl, that never sleeps between two requests to the same host.
|
boolean |
isCrawlSitemaps()
Whether to crawl sitemaps.
|
boolean |
isEnableConvertProcessor()
Whether to enable remoteconvert-based processor for links extracting in binary documents.
|
boolean |
isIgnoreRobotsTxt()
Whether to ignore robots.txt rules.
|
boolean |
isNearDuplicateDetector()
Whether to enable near-duplicate content detector.
|
boolean |
isPatternsDetector()
Whether to enable patterns detection in pages.
|
void |
setAggressive(java.lang.Boolean value)
Sets the value of the aggressive property.
|
void |
setBuildGroup(java.lang.String value)
Sets the value of the buildGroup property.
|
void |
setCrawlerServer(java.lang.String value)
Sets the value of the crawlerServer property.
|
void |
setCrawlSitemaps(java.lang.Boolean value)
Sets the value of the crawlSitemaps property.
|
void |
setDefaultFetcher(java.lang.String value)
Sets the value of the defaultFetcher property.
|
void |
setEnableConvertProcessor(java.lang.Boolean value)
Sets the value of the enableConvertProcessor property.
|
void |
setIgnoreRobotsTxt(java.lang.Boolean value)
Sets the value of the ignoreRobotsTxt property.
|
void |
setName(java.lang.String value)
Sets the value of the name property.
|
void |
setNearDuplicateDetector(java.lang.Boolean value)
Sets the value of the nearDuplicateDetector property.
|
void |
setNthreads(java.lang.Integer value)
Sets the value of the nthreads property.
|
void |
setPatternsDetector(java.lang.Boolean value)
Sets the value of the patternsDetector property.
|
void |
setThrottleTimeMS(java.lang.Integer value)
Sets the value of the throttleTimeMS property.
|
ICrawler |
withAggressive(java.lang.Boolean value) |
ICrawler |
withBuildGroup(java.lang.String value) |
ICrawler |
withCrawlerServer(java.lang.String value) |
ICrawler |
withCrawlSitemaps(java.lang.Boolean value) |
ICrawler |
withDefaultFetcher(java.lang.String value) |
ICrawler |
withEnableConvertProcessor(java.lang.Boolean value) |
ICrawler |
withIgnoreRobotsTxt(java.lang.Boolean value) |
ICrawler |
withName(java.lang.String value) |
ICrawler |
withNearDuplicateDetector(java.lang.Boolean value) |
ICrawler |
withNthreads(java.lang.Integer value) |
ICrawler |
withPatternsDetector(java.lang.Boolean value) |
ICrawler |
withThrottleTimeMS(java.lang.Integer value) |
protected java.lang.String name
protected java.lang.String crawlerServer
protected java.lang.String buildGroup
protected java.lang.Integer nthreads
protected java.lang.Boolean aggressive
protected java.lang.Integer throttleTimeMS
protected java.lang.Boolean ignoreRobotsTxt
protected java.lang.Boolean enableConvertProcessor
protected java.lang.Boolean nearDuplicateDetector
protected java.lang.Boolean patternsDetector
protected java.lang.String defaultFetcher
protected java.lang.Boolean crawlSitemaps
public ICrawler()
ICrawler
instance.public ICrawler(ICrawler o)
ICrawler
instance by deeply copying a given ICrawler
instance.o
- The instance to copy.java.lang.NullPointerException
- if o
is null
.public java.lang.String getName()
String
public void setName(java.lang.String value)
value
- allowed object is
String
public java.lang.String getCrawlerServer()
String
public void setCrawlerServer(java.lang.String value)
value
- allowed object is
String
public java.lang.String getBuildGroup()
String
public void setBuildGroup(java.lang.String value)
value
- allowed object is
String
public int getNthreads()
Integer
public void setNthreads(java.lang.Integer value)
value
- allowed object is
Integer
public boolean isAggressive()
Boolean
public void setAggressive(java.lang.Boolean value)
value
- allowed object is
Boolean
public int getThrottleTimeMS()
Integer
public void setThrottleTimeMS(java.lang.Integer value)
value
- allowed object is
Integer
public boolean isIgnoreRobotsTxt()
Boolean
public void setIgnoreRobotsTxt(java.lang.Boolean value)
value
- allowed object is
Boolean
public boolean isEnableConvertProcessor()
Boolean
public void setEnableConvertProcessor(java.lang.Boolean value)
value
- allowed object is
Boolean
public boolean isNearDuplicateDetector()
Boolean
public void setNearDuplicateDetector(java.lang.Boolean value)
value
- allowed object is
Boolean
public boolean isPatternsDetector()
Boolean
public void setPatternsDetector(java.lang.Boolean value)
value
- allowed object is
Boolean
public java.lang.String getDefaultFetcher()
String
public void setDefaultFetcher(java.lang.String value)
value
- allowed object is
String
public boolean isCrawlSitemaps()
Boolean
public void setCrawlSitemaps(java.lang.Boolean value)
value
- allowed object is
Boolean
public ICrawler withName(java.lang.String value)
public ICrawler withCrawlerServer(java.lang.String value)
public ICrawler withBuildGroup(java.lang.String value)
public ICrawler withNthreads(java.lang.Integer value)
public ICrawler withAggressive(java.lang.Boolean value)
public ICrawler withThrottleTimeMS(java.lang.Integer value)
public ICrawler withIgnoreRobotsTxt(java.lang.Boolean value)
public ICrawler withEnableConvertProcessor(java.lang.Boolean value)
public ICrawler withNearDuplicateDetector(java.lang.Boolean value)
public ICrawler withPatternsDetector(java.lang.Boolean value)
public ICrawler withDefaultFetcher(java.lang.String value)
public ICrawler withCrawlSitemaps(java.lang.Boolean value)
public ICrawler clone()
clone
in class java.lang.Object
Copyright © 2021 Dassault Systèmes, All Rights Reserved.