Definition at line 434 of file EventObjects.py.
◆ __init__()
Definition at line 490 of file EventObjects.py.
490 def __init__(self, siteId, url, state=STATE_ENABLED, urlUpdate=None, normalizeMask=URL_NORMALIZE_MASK):
497 self.url = self.getURL(normalizeMask)
498 self.type = self.TYPE_REGULAR
500 self.status = self.STATUS_NEW
501 self.siteSelect = self.SITE_SELECT_TYPE_NONE
505 self.urlMd5 = hashlib.md5(self.url).hexdigest()
508 self.contentType = self.CONTENT_TYPE_UNDEFINED
509 self.requestDelay = 500
510 self.processingDelay = 500
511 self.httpTimeout = 30000
515 self.crawlingTime = 0
516 self.processingTime = 0
521 self.httpMethod =
"get" 527 self.rawContentMd5 =
"" 529 self.lastModified =
None 532 self.mRateCounter = 0
534 self.maxURLsFromPage = 100
535 self.contentMask = self.CONTENT_EMPTY
539 self.contentURLMd5 =
"" 541 self.urlUpdate = urlUpdate
544 self.classifierMask = 0
def __init__(self)
constructor
◆ getURL()
Definition at line 552 of file EventObjects.py.
552 def getURL(self, normalizeMask=URL_NORMALIZE_MASK):
554 if normalizeMask != UrlNormalizator.NORM_NONE:
555 url = UrlNormalizator.normalize(self.url,
None, normalizeMask)
◆ attributes
dc.EventObjects.URL.attributes |
◆ batchId
dc.EventObjects.URL.batchId |
◆ CDate
dc.EventObjects.URL.CDate |
◆ chainId
dc.EventObjects.URL.chainId |
◆ charset
dc.EventObjects.URL.charset |
◆ classifierMask
dc.EventObjects.URL.classifierMask |
◆ CONTENT_EMPTY
int dc.EventObjects.URL.CONTENT_EMPTY = 0 |
|
static |
◆ CONTENT_STORED_ON_DISK
int dc.EventObjects.URL.CONTENT_STORED_ON_DISK = 1 << 0 |
|
static |
◆ CONTENT_TYPE_TEXT_HTML
string dc.EventObjects.URL.CONTENT_TYPE_TEXT_HTML = "text/html" |
|
static |
◆ CONTENT_TYPE_UNDEFINED
string dc.EventObjects.URL.CONTENT_TYPE_UNDEFINED = "" |
|
static |
◆ contentMask
dc.EventObjects.URL.contentMask |
◆ contentType
dc.EventObjects.URL.contentType |
◆ contentURLMd5
dc.EventObjects.URL.contentURLMd5 |
◆ crawled
dc.EventObjects.URL.crawled |
◆ crawlingTime
dc.EventObjects.URL.crawlingTime |
◆ depth
dc.EventObjects.URL.depth |
◆ errorMask
dc.EventObjects.URL.errorMask |
◆ eTag
◆ freq
◆ httpCode
dc.EventObjects.URL.httpCode |
◆ httpMethod
dc.EventObjects.URL.httpMethod |
◆ httpTimeout
dc.EventObjects.URL.httpTimeout |
◆ lastModified
dc.EventObjects.URL.lastModified |
◆ linksE
dc.EventObjects.URL.linksE |
◆ linksI
dc.EventObjects.URL.linksI |
◆ maxURLsFromPage
dc.EventObjects.URL.maxURLsFromPage |
◆ mRate
dc.EventObjects.URL.mRate |
◆ mRateCounter
dc.EventObjects.URL.mRateCounter |
◆ parentMd5
dc.EventObjects.URL.parentMd5 |
◆ pDate
dc.EventObjects.URL.pDate |
◆ priority
dc.EventObjects.URL.priority |
◆ processed
dc.EventObjects.URL.processed |
◆ processingDelay
dc.EventObjects.URL.processingDelay |
◆ processingTime
dc.EventObjects.URL.processingTime |
◆ rawContentMd5
dc.EventObjects.URL.rawContentMd5 |
◆ requestDelay
dc.EventObjects.URL.requestDelay |
◆ SITE_SELECT_TYPE_AUTO
int dc.EventObjects.URL.SITE_SELECT_TYPE_AUTO = 1 |
|
static |
◆ SITE_SELECT_TYPE_EXPLICIT
int dc.EventObjects.URL.SITE_SELECT_TYPE_EXPLICIT = 0 |
|
static |
◆ SITE_SELECT_TYPE_NONE
int dc.EventObjects.URL.SITE_SELECT_TYPE_NONE = 3 |
|
static |
◆ SITE_SELECT_TYPE_QUALIFY_URL
int dc.EventObjects.URL.SITE_SELECT_TYPE_QUALIFY_URL = 2 |
|
static |
◆ siteId
dc.EventObjects.URL.siteId |
◆ siteSelect
dc.EventObjects.URL.siteSelect |
◆ size
◆ state
dc.EventObjects.URL.state |
◆ STATE_DISABLED
int dc.EventObjects.URL.STATE_DISABLED = 1 |
|
static |
◆ STATE_ENABLED
int dc.EventObjects.URL.STATE_ENABLED = 0 |
|
static |
◆ STATE_ERROR
int dc.EventObjects.URL.STATE_ERROR = 2 |
|
static |
◆ status
dc.EventObjects.URL.status |
◆ STATUS_CRAWLED
int dc.EventObjects.URL.STATUS_CRAWLED = 4 |
|
static |
◆ STATUS_CRAWLING
int dc.EventObjects.URL.STATUS_CRAWLING = 3 |
|
static |
◆ STATUS_NEW
int dc.EventObjects.URL.STATUS_NEW = 1 |
|
static |
◆ STATUS_PROCESSED
int dc.EventObjects.URL.STATUS_PROCESSED = 7 |
|
static |
◆ STATUS_PROCESSING
int dc.EventObjects.URL.STATUS_PROCESSING = 6 |
|
static |
◆ STATUS_SELECTED_CRAWLING
int dc.EventObjects.URL.STATUS_SELECTED_CRAWLING = 2 |
|
static |
◆ STATUS_SELECTED_CRAWLING_INCREMENTAL
int dc.EventObjects.URL.STATUS_SELECTED_CRAWLING_INCREMENTAL = 8 |
|
static |
◆ STATUS_SELECTED_PROCESSING
int dc.EventObjects.URL.STATUS_SELECTED_PROCESSING = 5 |
|
static |
◆ STATUS_UNDEFINED
int dc.EventObjects.URL.STATUS_UNDEFINED = 0 |
|
static |
◆ tagsCount
dc.EventObjects.URL.tagsCount |
◆ tagsMask
dc.EventObjects.URL.tagsMask |
◆ tcDate
dc.EventObjects.URL.tcDate |
◆ totalTime
dc.EventObjects.URL.totalTime |
◆ type
◆ TYPE_CHAIN
int dc.EventObjects.URL.TYPE_CHAIN = 6 |
|
static |
◆ TYPE_FETCHED
int dc.EventObjects.URL.TYPE_FETCHED = 4 |
|
static |
◆ TYPE_NEW_SITE
int dc.EventObjects.URL.TYPE_NEW_SITE = 3 |
|
static |
◆ TYPE_REAL_TIME_CRAWLER
int dc.EventObjects.URL.TYPE_REAL_TIME_CRAWLER = 5 |
|
static |
◆ TYPE_REGULAR
int dc.EventObjects.URL.TYPE_REGULAR = 0 |
|
static |
◆ TYPE_REGULAR_EXT
int dc.EventObjects.URL.TYPE_REGULAR_EXT = 2 |
|
static |
◆ TYPE_SINGLE
int dc.EventObjects.URL.TYPE_SINGLE = 1 |
|
static |
◆ UDate
dc.EventObjects.URL.UDate |
◆ url
◆ URL_NORMALIZE_MASK
◆ urlMd5
dc.EventObjects.URL.urlMd5 |
◆ urlPut
dc.EventObjects.URL.urlPut |
◆ urlUpdate
dc.EventObjects.URL.urlUpdate |
The documentation for this class was generated from the following file: