HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.
2.0.0-chaika
Hierarchical Cluster Engine Python language binding
CrawledResource.py
Go to the documentation of this file.
1
"""
2
HCE project, Python bindings, Distributed Tasks Manager application.
3
Event objects definitions.
4
5
@package: dc
6
@file CrawledResource.py
7
@author Oleksii <developers.hce@gmail.com>
8
@link: http://hierarchical-cluster-engine.com/
9
@copyright: Copyright © 2013-2014 IOIX Ukraine
10
@license: http://hierarchical-cluster-engine.com/license/
11
@since: 0.1
12
"""
13
14
15
from
dc.EventObjects
import
URL
16
# #The CrawledResource class
17
#
18
#
19
class
CrawledResource
(object):
20
21
22
def
__init__
(self):
23
# rendered unicode content for dynamic fetcher
24
self.
html_content
=
""
25
self.
binary_content
=
""
26
self.
response_header
=
""
27
self.
html_request
=
""
28
self.
content_type
= URL.CONTENT_TYPE_UNDEFINED
29
self.
charset
=
""
30
self.
error_mask
= 0
31
self.
crawling_time
= 0
32
self.
http_code
= 200
33
self.
bps
= 0
34
self.
last_modified
=
""
35
self.
etag
=
""
36
self.
resource_changed
=
True
37
# before rendered unicode content for dynamic fetcher
38
self.
meta_content
=
""
39
self.
cookies
= {}
40
self.
dynamic_fetcher_type
=
None
41
self.
dynamic_fetcher_result_type
=
None
42
dc_crawler.CrawledResource.CrawledResource.charset
charset
Definition:
CrawledResource.py:29
dc_crawler.CrawledResource.CrawledResource.html_content
html_content
Definition:
CrawledResource.py:24
dc_crawler.CrawledResource.CrawledResource.resource_changed
resource_changed
Definition:
CrawledResource.py:36
dc_crawler.CrawledResource.CrawledResource.bps
bps
Definition:
CrawledResource.py:33
dc_crawler.CrawledResource.CrawledResource.dynamic_fetcher_result_type
dynamic_fetcher_result_type
Definition:
CrawledResource.py:41
dc_crawler.CrawledResource.CrawledResource.cookies
cookies
Definition:
CrawledResource.py:39
dc_crawler.CrawledResource.CrawledResource.error_mask
error_mask
Definition:
CrawledResource.py:30
dc_crawler.CrawledResource.CrawledResource.__init__
def __init__(self)
Definition:
CrawledResource.py:22
dc_crawler.CrawledResource.CrawledResource.etag
etag
Definition:
CrawledResource.py:35
dc_crawler.CrawledResource.CrawledResource.http_code
http_code
Definition:
CrawledResource.py:32
dc_crawler.CrawledResource.CrawledResource.response_header
response_header
Definition:
CrawledResource.py:26
dc.EventObjects
Definition:
EventObjects.py:1
dc_crawler.CrawledResource.CrawledResource.content_type
content_type
Definition:
CrawledResource.py:28
dc_crawler.CrawledResource.CrawledResource
Definition:
CrawledResource.py:19
dc_crawler.CrawledResource.CrawledResource.dynamic_fetcher_type
dynamic_fetcher_type
Definition:
CrawledResource.py:40
dc_crawler.CrawledResource.CrawledResource.meta_content
meta_content
Definition:
CrawledResource.py:38
dc_crawler.CrawledResource.CrawledResource.html_request
html_request
Definition:
CrawledResource.py:27
dc_crawler.CrawledResource.CrawledResource.binary_content
binary_content
Definition:
CrawledResource.py:25
dc_crawler.CrawledResource.CrawledResource.crawling_time
crawling_time
Definition:
CrawledResource.py:31
dc_crawler.CrawledResource.CrawledResource.last_modified
last_modified
Definition:
CrawledResource.py:34
sources
hce
dc_crawler
CrawledResource.py
Generated on Fri Nov 24 2017 18:53:55 for HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings. by
1.8.13