HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.
2.0.0-chaika
Hierarchical Cluster Engine Python language binding
|
Public Member Functions | |
def | __init__ (self, siteId, urlString, urlType=URLStatus.URL_TYPE_URL, stateField=None, statusField=None, normalizeMask=URL.URL_NORMALIZE_MASK, urlObject=None) |
def | fillMD5 (self, urlString, urlType) |
Public Member Functions inherited from dc.EventObjects.URL | |
def | __init__ (self, siteId, url, state=STATE_ENABLED, urlUpdate=None, normalizeMask=URL_NORMALIZE_MASK) |
def | getURL (self, normalizeMask=URL_NORMALIZE_MASK) |
Public Member Functions inherited from app.Utils.JsonSerializable | |
def | __init__ (self) |
def | toJSON (self) |
Additional Inherited Members | |
Static Public Member Functions inherited from app.Utils.JsonSerializable | |
def | json_serial (obj) |
Static Public Attributes inherited from dc.EventObjects.URL | |
int | STATE_ENABLED = 0 |
int | STATE_DISABLED = 1 |
int | STATE_ERROR = 2 |
int | STATUS_UNDEFINED = 0 |
int | STATUS_NEW = 1 |
int | STATUS_SELECTED_CRAWLING = 2 |
int | STATUS_CRAWLING = 3 |
int | STATUS_CRAWLED = 4 |
int | STATUS_SELECTED_PROCESSING = 5 |
int | STATUS_PROCESSING = 6 |
int | STATUS_PROCESSED = 7 |
int | STATUS_SELECTED_CRAWLING_INCREMENTAL = 8 |
int | CONTENT_EMPTY = 0 |
int | CONTENT_STORED_ON_DISK = 1 << 0 |
int | TYPE_REGULAR = 0 |
int | TYPE_SINGLE = 1 |
int | TYPE_REGULAR_EXT = 2 |
int | TYPE_NEW_SITE = 3 |
int | TYPE_FETCHED = 4 |
int | TYPE_REAL_TIME_CRAWLER = 5 |
int | TYPE_CHAIN = 6 |
int | SITE_SELECT_TYPE_EXPLICIT = 0 |
int | SITE_SELECT_TYPE_AUTO = 1 |
int | SITE_SELECT_TYPE_QUALIFY_URL = 2 |
int | SITE_SELECT_TYPE_NONE = 3 |
string | CONTENT_TYPE_TEXT_HTML = "text/html" |
string | CONTENT_TYPE_UNDEFINED = "" |
URL_NORMALIZE_MASK = UrlNormalizator.NORM_DEFAULT | |
Definition at line 656 of file EventObjects.py.
def dc.EventObjects.URLUpdate.__init__ | ( | self, | |
siteId, | |||
urlString, | |||
urlType = URLStatus.URL_TYPE_URL , |
|||
stateField = None , |
|||
statusField = None , |
|||
normalizeMask = URL.URL_NORMALIZE_MASK , |
|||
urlObject = None |
|||
) |
Definition at line 668 of file EventObjects.py.
def dc.EventObjects.URLUpdate.fillMD5 | ( | self, | |
urlString, | |||
urlType | |||
) |
Definition at line 734 of file EventObjects.py.
dc.EventObjects.URLUpdate.attributes |
Definition at line 718 of file EventObjects.py.
dc.EventObjects.URLUpdate.batchId |
Definition at line 690 of file EventObjects.py.
dc.EventObjects.URLUpdate.CDate |
Definition at line 697 of file EventObjects.py.
dc.EventObjects.URLUpdate.chainId |
Definition at line 716 of file EventObjects.py.
dc.EventObjects.URLUpdate.charset |
Definition at line 689 of file EventObjects.py.
dc.EventObjects.URLUpdate.classifierMask |
Definition at line 717 of file EventObjects.py.
dc.EventObjects.URLUpdate.contentType |
Definition at line 685 of file EventObjects.py.
dc.EventObjects.URLUpdate.contentURLMd5 |
Definition at line 714 of file EventObjects.py.
dc.EventObjects.URLUpdate.crawled |
Definition at line 682 of file EventObjects.py.
dc.EventObjects.URLUpdate.crawlingTime |
Definition at line 692 of file EventObjects.py.
dc.EventObjects.URLUpdate.criterions |
Definition at line 720 of file EventObjects.py.
dc.EventObjects.URLUpdate.depth |
Definition at line 703 of file EventObjects.py.
dc.EventObjects.URLUpdate.errorMask |
Definition at line 691 of file EventObjects.py.
dc.EventObjects.URLUpdate.eTag |
Definition at line 707 of file EventObjects.py.
dc.EventObjects.URLUpdate.freq |
Definition at line 702 of file EventObjects.py.
dc.EventObjects.URLUpdate.httpCode |
Definition at line 695 of file EventObjects.py.
dc.EventObjects.URLUpdate.httpMethod |
Definition at line 698 of file EventObjects.py.
dc.EventObjects.URLUpdate.httpTimeout |
Definition at line 688 of file EventObjects.py.
dc.EventObjects.URLUpdate.lastModified |
Definition at line 706 of file EventObjects.py.
dc.EventObjects.URLUpdate.linksE |
Definition at line 701 of file EventObjects.py.
dc.EventObjects.URLUpdate.linksI |
Definition at line 700 of file EventObjects.py.
dc.EventObjects.URLUpdate.maxURLsFromPage |
Definition at line 711 of file EventObjects.py.
dc.EventObjects.URLUpdate.mRate |
Definition at line 708 of file EventObjects.py.
dc.EventObjects.URLUpdate.mRateCounter |
Definition at line 709 of file EventObjects.py.
dc.EventObjects.URLUpdate.parentMd5 |
Definition at line 705 of file EventObjects.py.
dc.EventObjects.URLUpdate.priority |
Definition at line 712 of file EventObjects.py.
dc.EventObjects.URLUpdate.processed |
Definition at line 683 of file EventObjects.py.
dc.EventObjects.URLUpdate.processingDelay |
Definition at line 687 of file EventObjects.py.
dc.EventObjects.URLUpdate.processingTime |
Definition at line 693 of file EventObjects.py.
dc.EventObjects.URLUpdate.rawContentMd5 |
Definition at line 704 of file EventObjects.py.
dc.EventObjects.URLUpdate.requestDelay |
Definition at line 686 of file EventObjects.py.
dc.EventObjects.URLUpdate.siteId |
Definition at line 677 of file EventObjects.py.
dc.EventObjects.URLUpdate.siteSelect |
Definition at line 681 of file EventObjects.py.
dc.EventObjects.URLUpdate.size |
Definition at line 699 of file EventObjects.py.
dc.EventObjects.URLUpdate.state |
Definition at line 679 of file EventObjects.py.
dc.EventObjects.URLUpdate.status |
Definition at line 680 of file EventObjects.py.
dc.EventObjects.URLUpdate.tagsCount |
Definition at line 713 of file EventObjects.py.
dc.EventObjects.URLUpdate.tagsMask |
Definition at line 715 of file EventObjects.py.
dc.EventObjects.URLUpdate.tcDate |
Definition at line 710 of file EventObjects.py.
dc.EventObjects.URLUpdate.totalTime |
Definition at line 694 of file EventObjects.py.
dc.EventObjects.URLUpdate.type |
Definition at line 678 of file EventObjects.py.
dc.EventObjects.URLUpdate.UDate |
Definition at line 696 of file EventObjects.py.
dc.EventObjects.URLUpdate.url |
Definition at line 741 of file EventObjects.py.
dc.EventObjects.URLUpdate.urlMd5 |
Definition at line 742 of file EventObjects.py.