2 HCE project, Python bindings, Distributed Crawler application. 3 Application level constants and enumerations. 6 @author bgv bgv.hce@gmail.com 7 @link: http://hierarchical-cluster-engine.com/ 8 @copyright: Copyright © 2013-2014 IOIX Ukraine 9 @license: http://hierarchical-cluster-engine.com/license/ 14 from collections
import namedtuple
45 FIELD_RECALCULATE = 19
63 SITE_NEW_RESPONSE = 101
64 SITE_UPDATE_RESPONSE = 102
65 SITE_STATUS_RESPONSE = 103
66 SITE_DELETE_RESPONSE = 104
67 SITE_CLEANUP_RESPONSE = 105
69 URL_NEW_RESPONSE = 106
70 URL_UPDATE_RESPONSE = 107
71 URL_STATUS_RESPONSE = 108
72 URL_DELETE_RESPONSE = 109
73 URL_FETCH_RESPONSE = 110
74 URL_CLEANUP_RESPONSE = 111
75 URL_CONTENT_RESPONSE = 112
77 RESOURCE_FETCH_RESPONSE = 113
78 RESOURCE_DELETE_RESPONSE = 114
80 SITE_FIND_RESPONSE = 115
82 SQL_CUSTOM_RESPONSE = 116
85 URL_PURGE_RESPONSE = 118
86 FIELD_RECALCULATE_RESPONSE = 119
87 URL_VERIFY_RESPONSE = 120
88 URL_AGE_RESPONSE = 121
90 URL_PUT_RESPONSE = 122
91 URL_HISTORY_RESPONSE = 123
92 URL_STATS_RESPONSE = 124
94 PROXY_NEW_RESPONSE = 125
95 PROXY_UPDATE_RESPONSE = 126
96 PROXY_DELETE_RESPONSE = 127
97 PROXY_STATUS_RESPONSE = 128
98 PROXY_FIND_RESPONSE = 129
100 ATTR_SET_RESPONSE = 130
101 ATTR_UPDATE_RESPONSE = 131
102 ATTR_DELETE_RESPONSE = 132
103 ATTR_FETCH_RESPONSE = 133
112 DRCESyncTasksCover = namedtuple(
'DRCESyncTasksCover',
'eventType eventObject')
117 LOGGER_NAME = APP_CONSTS.LOGGER_NAME
119 BATCHES_CRAWL_COUNTER_TOTAL_NAME =
"batches_crawl_total" 121 BATCHES_CRAWL_COUNTER_QUEUE_NAME =
"batches_crawl_queue" 123 BATCHES_CRAWL_COUNTER_FAULT_NAME =
"batches_crawl_fault" 125 BATCHES_CRAWL_COUNTER_FILLED_NAME =
"batches_crawl_filled" 127 BATCHES_CRAWL_COUNTER_URLS_NAME =
"batches_crawl_urls" 129 BATCHES_CRAWL_COUNTER_URLS_FAULT_NAME =
"batches_crawl_urls_fault" 131 BATCHES_CRAWL_COUNTER_URL_FETCH_NAME =
"batches_crawl_url_fetch" 133 BATCHES_CRAWL_COUNTER_URL_FETCH_CANCELLED_NAME =
"batches_crawl_url_fetch_cancelled" 135 BATCHES_CRAWL_COUNTER_DELETE_FAULT_NAME =
"batches_crawl_delete_fault" 137 BATCHES_CRAWL_COUNTER_FAULT_TTL_NAME =
"batches_crawl_fault_ttl" 139 BATCHES_CRAWL_COUNTER_CHECK_FAULT_NAME =
"batches_crawl_check_fault" 141 BATCHES_CRAWL_COUNTER_URLS_RET_NAME =
"batches_crawl_urls_ret" 143 BATCHES_CRAWL_COUNTER_URL_FETCH_INCR_NAME =
"batches_crawl_url_fetch_incr" 145 SITES_RECRAWL_COUNTER_NAME =
"sites_recrawl_cnt" 147 SITES_RECRAWL_UPDATED_COUNTER_NAME =
"sites_recrawl_updated_cnt" 149 SITES_RECRAWL_DELETED_COUNTER_NAME =
"sites_recrawl_deleted_cnt" 151 SITES_DRCE_COUNTER_NAME =
"sites_recrawl_drce_cnt" 153 BATCHES_CRAWL_COUNTER_TIME_AVG_NAME =
"batches_crawl_time_avg" 155 BATCHES_REALTIME_THREADS_NAME =
"batches_realtime_threads" 157 BATCHES_REALTIME_THREADS_CREATED_COUNTER_NAME =
"batches_realtime_threads_created" 159 BATCHES_CRAWL_COUNTER_ITEMS_AVG_NAME =
"batches_crawl_items_avg" 161 BATCHES_CRAWL_COUNTER_FETCHER_DYNAMIC =
"batches_crawl_fetcher_dynamic" 163 BATCHES_CRAWL_COUNTER_FETCHER_STATIC =
"batches_crawl_fetcher_static" 165 BATCHES_CRAWL_COUNTER_FETCHER_MIXED =
"batches_crawl_fetcher_mixed" 169 BATCHES_CRAWL_COUNTER_URL_FETCH_REQUESTS_NAME =
"batches_crawl_url_fetch_requests" 172 RECRAWL_THREADS_COUNTER_QUEUE_NAME =
"recrawl_threads" 174 RECRAWL_SITES_QUEUE_NAME =
"recrawl_sites_queue" 176 RECRAWL_THREADS_CREATED_COUNTER_NAME =
"recrawl_threads_created" 179 COMMON_THREADS_COUNTER_QUEUE_NAME =
"common_threads" 181 COMMON_OPERATIONS_COUNTER_NAME =
"common_operations_cnt" 183 COMMON_THREADS_CREATED_COUNTER_NAME =
"common_threads_created" 186 BATCHES_PURGE_COUNTER_NAME =
"purge_batches" 188 BATCHES_PURGE_COUNTER_CANCELLED_NAME =
"purge_batches_canceled" 190 BATCHES_PURGE_COUNTER_TOTAL_NAME =
"purge_batches_total" 192 BATCHES_PURGE_COUNTER_ERROR_NAME =
"purge_batches_error" 194 BATCHES_PURGE_COUNTER_FAULT_NAME =
"purge_batches_fault" 196 BATCHES_PURGE_COUNTER_DELETE_FAULT_NAME =
"purge_batches_delete_fault" 198 BATCHES_PURGE_COUNTER_CHECK_FAULT_NAME =
"purge_batches_check_fault" 201 BATCHES_PROCESS_COUNTER_TOTAL_NAME =
"batches_process_total" 203 BATCHES_PROCESS_COUNTER_QUEUE_NAME =
"batches_process_queue" 205 BATCHES_PROCESS_COUNTER_FAULT_NAME =
"batches_process_fault" 207 BATCHES_PROCESS_COUNTER_FILLED_NAME =
"batches_process_filled" 209 BATCHES_PROCESS_COUNTER_URLS_NAME =
"batches_process_urls" 211 BATCHES_PROCESS_COUNTER_URLS_FAULT_NAME =
"batches_process_urls_fault" 213 BATCHES_PROCESS_COUNTER_DELETE_FAULT_NAME =
"batches_process_delete_fault" 215 BATCHES_PROCESS_COUNTER_CHECK_FAULT_NAME =
"batches_process_check_fault" 217 BATCHES_PROCESS_COUNTER_FAULT_TTL_NAME =
"batches_process_fault_ttl" 219 BATCHES_PROCESS_COUNTER_CANCELLED_NAME =
"batches_process_cancelled" 222 BATCHES_AGE_COUNTER_NAME =
"age_batches" 224 BATCHES_AGE_COUNTER_CANCELLED_NAME =
"age_batches_canceled" 226 BATCHES_AGE_COUNTER_TOTAL_NAME =
"age_batches_total" 228 BATCHES_AGE_COUNTER_ERROR_NAME =
"age_batches_error" 230 BATCHES_AGE_COUNTER_FAULT_NAME =
"age_batches_fault" 232 BATCHES_AGE_COUNTER_DELETE_FAULT_NAME =
"age_batches_delete_fault" 234 BATCHES_AGE_COUNTER_CHECK_FAULT_NAME =
"age_batches_check_fault" 237 INCR_MIN_FREQ_CONFIG_VAR_NAME =
"INCR_MIN_FREQ" 238 INCR_MAX_DEPTH_CONFIG_VAR_NAME =
"INCR_MAX_DEPTH" 239 INCR_MAX_URLS_CONFIG_VAR_NAME =
"INCR_MAX_URLS" 242 MERGE_PARAM_NAME =
"MERGE_RESULTS" 245 RAW_DATA_SUFF =
".bin" 246 RAW_DATA_HEADERS_SUFF =
".headers.txt" 247 RAW_DATA_REQESTS_SUFF =
".requests.txt" 248 RAW_DATA_META_SUFF =
".meta.txt" 249 RAW_DATA_COOKIES_SUFF =
".cookies.txt" 250 RAW_DATA_TIDY_SUFF =
".tidy" 251 RAW_DATA_DYNAMIC_SUFF =
".dyn" 252 RAW_DATA_CHAIN_SUFF =
".chain" 255 SITE_PROP_AUTO_REMOVE_RESOURCES =
"AUTO_REMOVE_RESOURCES" 256 SITE_PROP_AUTO_REMOVE_ORDER =
"AUTO_REMOVE_ORDER" 257 SITE_PROP_AUTO_REMOVE_WHERE =
"AUTO_REMOVE_WHERE" 258 SITE_PROP_AUTO_REMOVE_WHERE_ACTIVE =
"AUTO_REMOVE_WHERE_ACTIVE" 259 SITE_PROP_RECRAWL_DELETE_WHERE =
"RECRAWL_DELETE_WHERE" 261 SITE_PROP_SAVE_COOKIES =
"STORE_COOKIES" 263 DRCE_REQUEST_ROUTING_ROUND_ROBIN =
'{"role":1}' 264 DRCE_REQUEST_ROUTING_RESOURCE_USAGE =
'{"role":5}' 265 DRCE_REQUEST_ROUTING_MULTICAST =
'{"role":0}' 266 DRCE_REQUEST_ROUTING_RND =
'{"role":4}'