HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
Constants.py File Reference

Go to the source code of this file.

Namespaces

 dc_crawler.Constants
 

Variables

int dc_crawler.Constants.FETCHER_TIME_LIMIT_MAX = 100
 
float dc_crawler.Constants.CONNECTION_TIMEOUT = 1.0
 
int dc_crawler.Constants.MAX_HTTP_REDIRECTS_LIMIT = 5
 
int dc_crawler.Constants.MAX_HTTP_SIZE_UNLIMIT = 0
 
int dc_crawler.Constants.MAX_HTML_REDIRECTS_LIMIT = 1
 
string dc_crawler.Constants.DB_SITES = "dc_sites"
 
string dc_crawler.Constants.DB_URLS = "dc_urls"
 
string dc_crawler.Constants.RTC_FINALIZER_APP_NAME = "rtc-finalizer"
 
string dc_crawler.Constants.RTC_PREPROCESSOR_APP_NAME = "rtc-preprocessor"
 
list dc_crawler.Constants.pubdateFeedNames = ["pubdate", "published", "pubDate", "published_parsed", "updated_parsed"]
 
string dc_crawler.Constants.pubdateRssFeedHeaderName = "X-pubdateRssFeed"
 
string dc_crawler.Constants.rssFeedUrlHeaderName = "X-feed_url"
 
string dc_crawler.Constants.baseUrlHeaderName = "X-base_url"
 
int dc_crawler.Constants.HTTP_CODE_200 = 200
 
int dc_crawler.Constants.HTTP_CODE_304 = 304
 
int dc_crawler.Constants.HTTP_CODE_400 = 400
 
int dc_crawler.Constants.HTTP_CODE_403 = 403
 
list dc_crawler.Constants.REDIRECT_HTTP_CODES = [301, 302, 303, 304]
 
list dc_crawler.Constants.REDIRECT_HEADER_FIELDS_FOR_REMOVE = ['referer', 'content-type', 'Location', 'cookie']
 
dictionary dc_crawler.Constants.charsetDetectorMap
 
dictionary dc_crawler.Constants.standardEncodings