HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
Constants.py
Go to the documentation of this file.
1 '''
2 HCE project, Python bindings, Distributed Crawler application.
3 Application level constants and enumerations.
4 
5 @package: dc
6 @author bgv bgv.hce@gmail.com
7 @link: http://hierarchical-cluster-engine.com/
8 @copyright: Copyright © 2013-2014 IOIX Ukraine
9 @license: http://hierarchical-cluster-engine.com/license/
10 @since: 0.1
11 '''
12 
13 
14 from collections import namedtuple
15 import app.Consts as APP_CONSTS
16 
17 
18 # #Event types definition, used to unique identification events by inproc messaging transport
19 #
20 #
21 class EVENT_TYPES(object):
22  # ClientInterfaceService
23  SITE_NEW = 1
24  SITE_UPDATE = 2
25  SITE_STATUS = 3
26  SITE_DELETE = 4
27  SITE_CLEANUP = 5
28 
29  URL_NEW = 6
30  URL_UPDATE = 7
31  URL_STATUS = 8
32  URL_DELETE = 9
33  URL_FETCH = 10
34  URL_CLEANUP = 11
35  URL_CONTENT = 12
36 
37  RESOURCE_FETCH = 13
38  RESOURCE_DELETE = 14
39 
40  SITE_FIND = 15
41  SQL_CUSTOM = 16
42 
43  BATCH = 17
44  URL_PURGE = 18
45  FIELD_RECALCULATE = 19
46  URL_VERIFY = 20
47  URL_AGE = 21
48  URL_PUT = 22
49  URL_HISTORY = 23
50  URL_STATS = 24
51 
52  PROXY_NEW = 25
53  PROXY_UPDATE = 26
54  PROXY_DELETE = 27
55  PROXY_STATUS = 28
56  PROXY_FIND = 29
57 
58  ATTR_SET = 30
59  ATTR_UPDATE = 31
60  ATTR_DELETE = 32
61  ATTR_FETCH = 33
62  # ClientInterfaceService
63  SITE_NEW_RESPONSE = 101
64  SITE_UPDATE_RESPONSE = 102
65  SITE_STATUS_RESPONSE = 103
66  SITE_DELETE_RESPONSE = 104
67  SITE_CLEANUP_RESPONSE = 105
68 
69  URL_NEW_RESPONSE = 106
70  URL_UPDATE_RESPONSE = 107
71  URL_STATUS_RESPONSE = 108
72  URL_DELETE_RESPONSE = 109
73  URL_FETCH_RESPONSE = 110
74  URL_CLEANUP_RESPONSE = 111
75  URL_CONTENT_RESPONSE = 112
76 
77  RESOURCE_FETCH_RESPONSE = 113
78  RESOURCE_DELETE_RESPONSE = 114
79 
80  SITE_FIND_RESPONSE = 115
81 
82  SQL_CUSTOM_RESPONSE = 116
83 
84  BATCH_RESPONSE = 117
85  URL_PURGE_RESPONSE = 118
86  FIELD_RECALCULATE_RESPONSE = 119
87  URL_VERIFY_RESPONSE = 120
88  URL_AGE_RESPONSE = 121
89 
90  URL_PUT_RESPONSE = 122
91  URL_HISTORY_RESPONSE = 123
92  URL_STATS_RESPONSE = 124
93 
94  PROXY_NEW_RESPONSE = 125
95  PROXY_UPDATE_RESPONSE = 126
96  PROXY_DELETE_RESPONSE = 127
97  PROXY_STATUS_RESPONSE = 128
98  PROXY_FIND_RESPONSE = 129
99 
100  ATTR_SET_RESPONSE = 130
101  ATTR_UPDATE_RESPONSE = 131
102  ATTR_DELETE_RESPONSE = 132
103  ATTR_FETCH_RESPONSE = 133
104  # #constructor
105  # initialize fields
106  #
107  def __init__(self):
108  pass
109 
110 
111 # Name tuple for request and response DRCE Sync tasks cover
112 DRCESyncTasksCover = namedtuple('DRCESyncTasksCover', 'eventType eventObject')
113 
114 
115 # Logger name
116 # LOGGER_NAME = "dc"
117 LOGGER_NAME = APP_CONSTS.LOGGER_NAME
118 # Total crawling batches counter name for stat variables
119 BATCHES_CRAWL_COUNTER_TOTAL_NAME = "batches_crawl_total"
120 # Crawling batches queue size counter name for stat variables
121 BATCHES_CRAWL_COUNTER_QUEUE_NAME = "batches_crawl_queue"
122 # Crawling fault batches fault counter name for stat variables
123 BATCHES_CRAWL_COUNTER_FAULT_NAME = "batches_crawl_fault"
124 # Crawling not empty batches counter name for stat variables
125 BATCHES_CRAWL_COUNTER_FILLED_NAME = "batches_crawl_filled"
126 # Crawling total urls in all batches counter name for stat variables
127 BATCHES_CRAWL_COUNTER_URLS_NAME = "batches_crawl_urls"
128 # Crawling urls in fault batches counter name for stat variables
129 BATCHES_CRAWL_COUNTER_URLS_FAULT_NAME = "batches_crawl_urls_fault"
130 # Crawling URL_FETCH requests counter name for stat variables
131 BATCHES_CRAWL_COUNTER_URL_FETCH_NAME = "batches_crawl_url_fetch"
132 # Crawling cancelled URL_FETCH requests counter name for stat variables
133 BATCHES_CRAWL_COUNTER_URL_FETCH_CANCELLED_NAME = "batches_crawl_url_fetch_cancelled"
134 # Crawling delete task requests fault counter name for stat variables
135 BATCHES_CRAWL_COUNTER_DELETE_FAULT_NAME = "batches_crawl_delete_fault"
136 # Crawling batches fault counter name for stat variables
137 BATCHES_CRAWL_COUNTER_FAULT_TTL_NAME = "batches_crawl_fault_ttl"
138 # Crawling batches check state fault counter name for stat variables
139 BATCHES_CRAWL_COUNTER_CHECK_FAULT_NAME = "batches_crawl_check_fault"
140 # Crawling batches urls returned counter name for stat variables
141 BATCHES_CRAWL_COUNTER_URLS_RET_NAME = "batches_crawl_urls_ret"
142 # Crawling incremental URL_FETCH requests counter name for stat variables
143 BATCHES_CRAWL_COUNTER_URL_FETCH_INCR_NAME = "batches_crawl_url_fetch_incr"
144 # Sites re-crawl counter name for stat variables
145 SITES_RECRAWL_COUNTER_NAME = "sites_recrawl_cnt"
146 # Sites re-crawl sites updated counter name for stat variables
147 SITES_RECRAWL_UPDATED_COUNTER_NAME = "sites_recrawl_updated_cnt"
148 # Sites re-crawl sites deleted counter name for stat variables
149 SITES_RECRAWL_DELETED_COUNTER_NAME = "sites_recrawl_deleted_cnt"
150 # Sites DRCE requests counter name for stat variables
151 SITES_DRCE_COUNTER_NAME = "sites_recrawl_drce_cnt"
152 # Avg processing time init in stat vars
153 BATCHES_CRAWL_COUNTER_TIME_AVG_NAME = "batches_crawl_time_avg"
154 # Crawling batches real-time threads number init in stat vars
155 BATCHES_REALTIME_THREADS_NAME = "batches_realtime_threads"
156 # Crawling batches real-time threads created number init in stat vars
157 BATCHES_REALTIME_THREADS_CREATED_COUNTER_NAME = "batches_realtime_threads_created"
158 # Crawling average urls/items in batches counter name for stat variables
159 BATCHES_CRAWL_COUNTER_ITEMS_AVG_NAME = "batches_crawl_items_avg"
160 # Crawling dynamic fetcher batches counter name for stat variables
161 BATCHES_CRAWL_COUNTER_FETCHER_DYNAMIC = "batches_crawl_fetcher_dynamic"
162 # Crawling static fetcher batches counter name for stat variables
163 BATCHES_CRAWL_COUNTER_FETCHER_STATIC = "batches_crawl_fetcher_static"
164 # Crawling mixed static fetcher batches counter name for stat variables
165 BATCHES_CRAWL_COUNTER_FETCHER_MIXED = "batches_crawl_fetcher_mixed"
166 
167 
168 # Crawling batches sendURLFetchRequest counter name for stat variables
169 BATCHES_CRAWL_COUNTER_URL_FETCH_REQUESTS_NAME = "batches_crawl_url_fetch_requests"
170 
171 # Recrawling threads counter name for stat variables
172 RECRAWL_THREADS_COUNTER_QUEUE_NAME = "recrawl_threads"
173 # Recrawling sites queue size counter name for stat variables
174 RECRAWL_SITES_QUEUE_NAME = "recrawl_sites_queue"
175 # Recrawling total threads created counter name for stat variables
176 RECRAWL_THREADS_CREATED_COUNTER_NAME = "recrawl_threads_created"
177 
178 # Common threads counter name for stat variables
179 COMMON_THREADS_COUNTER_QUEUE_NAME = "common_threads"
180 # Common operations counter name for stat variables
181 COMMON_OPERATIONS_COUNTER_NAME = "common_operations_cnt"
182 # Common total threads created counter name for stat variables
183 COMMON_THREADS_CREATED_COUNTER_NAME = "common_threads_created"
184 
185 # Purge current batches counter name for stat variables
186 BATCHES_PURGE_COUNTER_NAME = "purge_batches"
187 # Purge batches canceled counter name for stat variables
188 BATCHES_PURGE_COUNTER_CANCELLED_NAME = "purge_batches_canceled"
189 # Purge total batches counter name for stat variables
190 BATCHES_PURGE_COUNTER_TOTAL_NAME = "purge_batches_total"
191 # Purge batches create errors counter name for stat variables
192 BATCHES_PURGE_COUNTER_ERROR_NAME = "purge_batches_error"
193 # Purge batches execution faults counter name for stat variables
194 BATCHES_PURGE_COUNTER_FAULT_NAME = "purge_batches_fault"
195 # Purge batches task delete fault counter name for stat variables
196 BATCHES_PURGE_COUNTER_DELETE_FAULT_NAME = "purge_batches_delete_fault"
197 # Purge batches task check state fault counter name for stat variables
198 BATCHES_PURGE_COUNTER_CHECK_FAULT_NAME = "purge_batches_check_fault"
199 
200 # Process Batches counter init in stat vars
201 BATCHES_PROCESS_COUNTER_TOTAL_NAME = "batches_process_total"
202 # Process Batches in queue counter init in stat vars
203 BATCHES_PROCESS_COUNTER_QUEUE_NAME = "batches_process_queue"
204 # Process Batches that fault processing counter init in stat vars
205 BATCHES_PROCESS_COUNTER_FAULT_NAME = "batches_process_fault"
206 # Process Batches that not empty counter init in stat vars
207 BATCHES_PROCESS_COUNTER_FILLED_NAME = "batches_process_filled"
208 # Process Batches urls total counter init in stat vars
209 BATCHES_PROCESS_COUNTER_URLS_NAME = "batches_process_urls"
210 # Process batches urls fault total counter init in stat vars
211 BATCHES_PROCESS_COUNTER_URLS_FAULT_NAME = "batches_process_urls_fault"
212 # Process batches delete task requests fault counter name for stat variables
213 BATCHES_PROCESS_COUNTER_DELETE_FAULT_NAME = "batches_process_delete_fault"
214 # Process batches check task requests fault counter name for stat variables
215 BATCHES_PROCESS_COUNTER_CHECK_FAULT_NAME = "batches_process_check_fault"
216 # Process batches fault TTL counter name for stat variables
217 BATCHES_PROCESS_COUNTER_FAULT_TTL_NAME = "batches_process_fault_ttl"
218 # Process batches cancelled counter name for stat variables
219 BATCHES_PROCESS_COUNTER_CANCELLED_NAME = "batches_process_cancelled"
220 
221 # Age current batches counter name for stat variables
222 BATCHES_AGE_COUNTER_NAME = "age_batches"
223 # Age batches canceled counter name for stat variables
224 BATCHES_AGE_COUNTER_CANCELLED_NAME = "age_batches_canceled"
225 # Age total batches counter name for stat variables
226 BATCHES_AGE_COUNTER_TOTAL_NAME = "age_batches_total"
227 # Age error batches counter name for stat variables
228 BATCHES_AGE_COUNTER_ERROR_NAME = "age_batches_error"
229 # Age batches execution faults counter name for stat variables
230 BATCHES_AGE_COUNTER_FAULT_NAME = "age_batches_fault"
231 # Age batches task delete fault counter name for stat variables
232 BATCHES_AGE_COUNTER_DELETE_FAULT_NAME = "age_batches_delete_fault"
233 # Age batches task check state fault counter name for stat variables
234 BATCHES_AGE_COUNTER_CHECK_FAULT_NAME = "age_batches_check_fault"
235 
236 # incremenal crawling vars
237 INCR_MIN_FREQ_CONFIG_VAR_NAME = "INCR_MIN_FREQ"
238 INCR_MAX_DEPTH_CONFIG_VAR_NAME = "INCR_MAX_DEPTH"
239 INCR_MAX_URLS_CONFIG_VAR_NAME = "INCR_MAX_URLS"
240 
241 # Merge parameter name in event.cookie
242 MERGE_PARAM_NAME = "MERGE_RESULTS"
243 
244 # Set of RAW data file suffixes
245 RAW_DATA_SUFF = ".bin"
246 RAW_DATA_HEADERS_SUFF = ".headers.txt"
247 RAW_DATA_REQESTS_SUFF = ".requests.txt"
248 RAW_DATA_META_SUFF = ".meta.txt"
249 RAW_DATA_COOKIES_SUFF = ".cookies.txt"
250 RAW_DATA_TIDY_SUFF = ".tidy"
251 RAW_DATA_DYNAMIC_SUFF = ".dyn"
252 RAW_DATA_CHAIN_SUFF = ".chain"
253 
254 # sites_properties keys names
255 SITE_PROP_AUTO_REMOVE_RESOURCES = "AUTO_REMOVE_RESOURCES"
256 SITE_PROP_AUTO_REMOVE_ORDER = "AUTO_REMOVE_ORDER"
257 SITE_PROP_AUTO_REMOVE_WHERE = "AUTO_REMOVE_WHERE"
258 SITE_PROP_AUTO_REMOVE_WHERE_ACTIVE = "AUTO_REMOVE_WHERE_ACTIVE"
259 SITE_PROP_RECRAWL_DELETE_WHERE = "RECRAWL_DELETE_WHERE"
260 
261 SITE_PROP_SAVE_COOKIES = "STORE_COOKIES"
262 
263 DRCE_REQUEST_ROUTING_ROUND_ROBIN = '{"role":1}'
264 DRCE_REQUEST_ROUTING_RESOURCE_USAGE = '{"role":5}'
265 DRCE_REQUEST_ROUTING_MULTICAST = '{"role":0}'
266 DRCE_REQUEST_ROUTING_RND = '{"role":4}'
267