HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
dc.EventObjects.URLContentRequest Class Reference
Inheritance diagram for dc.EventObjects.URLContentRequest:
Collaboration diagram for dc.EventObjects.URLContentRequest:

Public Member Functions

def __init__ (self, siteId, urlString, contentTypeMask=CONTENT_TYPE_PROCESSED+CONTENT_TYPE_RAW_LAST, urlType=URL_TYPE_STRING)
 
def fillMD5 (self, urlString)
 
- Public Member Functions inherited from app.Utils.JsonSerializable
def __init__ (self)
 
def toJSON (self)
 

Public Attributes

 siteId
 
 url
 
 urlMd5
 
 contentTypeMask
 
 urlFetch
 
 attributeNames
 
 dbFieldsList
 
 dbFieldsListDefaultValues
 

Static Public Attributes

int CONTENT_TYPE_PROCESSED = 1
 
int CONTENT_TYPE_RAW_LAST = 2
 
int CONTENT_TYPE_RAW_FIRST = 4
 
int CONTENT_TYPE_RAW_ALL = 8
 
int CONTENT_TYPE_HEADERS = 16
 
int CONTENT_TYPE_REQUESTS = 32
 
int CONTENT_TYPE_META = 64
 
int CONTENT_TYPE_COOKIES = 128
 
int CONTENT_TYPE_TIDY = 256
 
int CONTENT_TYPE_DYNAMIC = 512
 
int CONTENT_TYPE_RAW = 1024
 
int CONTENT_TYPE_CHAIN = 2048
 
int CONTENT_TYPE_PROCESSED_INTERNAL = 4096
 
int CONTENT_TYPE_PROCESSED_CUSTOM = 8192
 
int CONTENT_TYPE_PROCESSED_ALL = 16384
 
int CONTENT_TYPE_ATTRIBUTES = 32768
 
int URL_TYPE_STRING = 0
 
int URL_TYPE_MD5 = 1
 

Additional Inherited Members

- Static Public Member Functions inherited from app.Utils.JsonSerializable
def json_serial (obj)
 

Detailed Description

Definition at line 890 of file EventObjects.py.

Constructor & Destructor Documentation

◆ __init__()

def dc.EventObjects.URLContentRequest.__init__ (   self,
  siteId,
  urlString,
  contentTypeMask = CONTENT_TYPE_PROCESSED + CONTENT_TYPE_RAW_LAST,
  urlType = URL_TYPE_STRING 
)

Definition at line 919 of file EventObjects.py.

919  urlType=URL_TYPE_STRING):
920  super(URLContentRequest, self).__init__()
921  self.siteId = siteId
922  self.url = urlString
923  if urlType == self.URL_TYPE_STRING:
924  self.urlMd5 = self.fillMD5(urlString)
925  else:
926  self.urlMd5 = urlString
927  self.contentTypeMask = contentTypeMask
928  self.urlFetch = None
929  self.attributeNames = ['*']
930  self.dbFieldsList = ["Status", "Crawled", "Processed", "ContentType", "Charset", "ErrorMask", "CrawlingTime",
931  "ProcessingTime", "HTTPCode", "Size", "LinksI", "LinksE", "RawContentMd5", "LastModified",
932  "CDate", "UDate", "TagsMask", "TagsCount", "PDate", "ContentURLMd5", "Batch_Id"]
933 
934  self.dbFieldsListDefaultValues = {"Status":1,
935  "Crawled":0,
936  "Processed":0,
937  "ContentType":"",
938  "Charset":"",
939  "ErrorMask":0,
940  "CrawlingTime":0,
941  "ProcessingTime":0,
942  "HTTPCode":0,
943  "Size":0,
944  "LinksI":0,
945  "LinksE":0,
946  "RawContentMd5":"",
947  "LastModified":None,
948  "CDate":int(time.time()),
949  "UDate":None,
950  "TagsMask":0,
951  "TagsCount":0,
952  "PDate":None,
953  "ContentURLMd5":"",
954  "Batch_Id":0}
955 
956 
def __init__(self)
constructor
Definition: UIDGenerator.py:19

Member Function Documentation

◆ fillMD5()

def dc.EventObjects.URLContentRequest.fillMD5 (   self,
  urlString 
)

Definition at line 960 of file EventObjects.py.

960  def fillMD5(self, urlString):
961  return hashlib.md5(urlString).hexdigest()
962 
963 
964 
965 # #Content object
966 #
967 # The Content object represents content data for URLContentResponse event object.
968 #

Member Data Documentation

◆ attributeNames

dc.EventObjects.URLContentRequest.attributeNames

Definition at line 929 of file EventObjects.py.

◆ CONTENT_TYPE_ATTRIBUTES

int dc.EventObjects.URLContentRequest.CONTENT_TYPE_ATTRIBUTES = 32768
static

Definition at line 907 of file EventObjects.py.

◆ CONTENT_TYPE_CHAIN

int dc.EventObjects.URLContentRequest.CONTENT_TYPE_CHAIN = 2048
static

Definition at line 903 of file EventObjects.py.

◆ CONTENT_TYPE_COOKIES

int dc.EventObjects.URLContentRequest.CONTENT_TYPE_COOKIES = 128
static

Definition at line 899 of file EventObjects.py.

◆ CONTENT_TYPE_DYNAMIC

int dc.EventObjects.URLContentRequest.CONTENT_TYPE_DYNAMIC = 512
static

Definition at line 901 of file EventObjects.py.

◆ CONTENT_TYPE_HEADERS

int dc.EventObjects.URLContentRequest.CONTENT_TYPE_HEADERS = 16
static

Definition at line 896 of file EventObjects.py.

◆ CONTENT_TYPE_META

int dc.EventObjects.URLContentRequest.CONTENT_TYPE_META = 64
static

Definition at line 898 of file EventObjects.py.

◆ CONTENT_TYPE_PROCESSED

int dc.EventObjects.URLContentRequest.CONTENT_TYPE_PROCESSED = 1
static

Definition at line 892 of file EventObjects.py.

◆ CONTENT_TYPE_PROCESSED_ALL

int dc.EventObjects.URLContentRequest.CONTENT_TYPE_PROCESSED_ALL = 16384
static

Definition at line 906 of file EventObjects.py.

◆ CONTENT_TYPE_PROCESSED_CUSTOM

int dc.EventObjects.URLContentRequest.CONTENT_TYPE_PROCESSED_CUSTOM = 8192
static

Definition at line 905 of file EventObjects.py.

◆ CONTENT_TYPE_PROCESSED_INTERNAL

int dc.EventObjects.URLContentRequest.CONTENT_TYPE_PROCESSED_INTERNAL = 4096
static

Definition at line 904 of file EventObjects.py.

◆ CONTENT_TYPE_RAW

int dc.EventObjects.URLContentRequest.CONTENT_TYPE_RAW = 1024
static

Definition at line 902 of file EventObjects.py.

◆ CONTENT_TYPE_RAW_ALL

int dc.EventObjects.URLContentRequest.CONTENT_TYPE_RAW_ALL = 8
static

Definition at line 895 of file EventObjects.py.

◆ CONTENT_TYPE_RAW_FIRST

int dc.EventObjects.URLContentRequest.CONTENT_TYPE_RAW_FIRST = 4
static

Definition at line 894 of file EventObjects.py.

◆ CONTENT_TYPE_RAW_LAST

int dc.EventObjects.URLContentRequest.CONTENT_TYPE_RAW_LAST = 2
static

Definition at line 893 of file EventObjects.py.

◆ CONTENT_TYPE_REQUESTS

int dc.EventObjects.URLContentRequest.CONTENT_TYPE_REQUESTS = 32
static

Definition at line 897 of file EventObjects.py.

◆ CONTENT_TYPE_TIDY

int dc.EventObjects.URLContentRequest.CONTENT_TYPE_TIDY = 256
static

Definition at line 900 of file EventObjects.py.

◆ contentTypeMask

dc.EventObjects.URLContentRequest.contentTypeMask

Definition at line 927 of file EventObjects.py.

◆ dbFieldsList

dc.EventObjects.URLContentRequest.dbFieldsList

Definition at line 930 of file EventObjects.py.

◆ dbFieldsListDefaultValues

dc.EventObjects.URLContentRequest.dbFieldsListDefaultValues

Definition at line 934 of file EventObjects.py.

◆ siteId

dc.EventObjects.URLContentRequest.siteId

Definition at line 921 of file EventObjects.py.

◆ url

dc.EventObjects.URLContentRequest.url

Definition at line 922 of file EventObjects.py.

◆ URL_TYPE_MD5

int dc.EventObjects.URLContentRequest.URL_TYPE_MD5 = 1
static

Definition at line 910 of file EventObjects.py.

◆ URL_TYPE_STRING

int dc.EventObjects.URLContentRequest.URL_TYPE_STRING = 0
static

Definition at line 909 of file EventObjects.py.

◆ urlFetch

dc.EventObjects.URLContentRequest.urlFetch

Definition at line 928 of file EventObjects.py.

◆ urlMd5

dc.EventObjects.URLContentRequest.urlMd5

Definition at line 924 of file EventObjects.py.


The documentation for this class was generated from the following file: