|
int | dc_crawler.Constants.FETCHER_TIME_LIMIT_MAX = 100 |
|
float | dc_crawler.Constants.CONNECTION_TIMEOUT = 1.0 |
|
int | dc_crawler.Constants.MAX_HTTP_REDIRECTS_LIMIT = 5 |
|
int | dc_crawler.Constants.MAX_HTTP_SIZE_UNLIMIT = 0 |
|
int | dc_crawler.Constants.MAX_HTML_REDIRECTS_LIMIT = 1 |
|
string | dc_crawler.Constants.DB_SITES = "dc_sites" |
|
string | dc_crawler.Constants.DB_URLS = "dc_urls" |
|
string | dc_crawler.Constants.RTC_FINALIZER_APP_NAME = "rtc-finalizer" |
|
string | dc_crawler.Constants.RTC_PREPROCESSOR_APP_NAME = "rtc-preprocessor" |
|
list | dc_crawler.Constants.pubdateFeedNames = ["pubdate", "published", "pubDate", "published_parsed", "updated_parsed"] |
|
string | dc_crawler.Constants.pubdateRssFeedHeaderName = "X-pubdateRssFeed" |
|
string | dc_crawler.Constants.rssFeedUrlHeaderName = "X-feed_url" |
|
string | dc_crawler.Constants.baseUrlHeaderName = "X-base_url" |
|
int | dc_crawler.Constants.HTTP_CODE_200 = 200 |
|
int | dc_crawler.Constants.HTTP_CODE_304 = 304 |
|
int | dc_crawler.Constants.HTTP_CODE_400 = 400 |
|
int | dc_crawler.Constants.HTTP_CODE_403 = 403 |
|
list | dc_crawler.Constants.REDIRECT_HTTP_CODES = [301, 302, 303, 304] |
|
list | dc_crawler.Constants.REDIRECT_HEADER_FIELDS_FOR_REMOVE = ['referer', 'content-type', 'Location', 'cookie'] |
|
dictionary | dc_crawler.Constants.charsetDetectorMap |
|
dictionary | dc_crawler.Constants.standardEncodings |
|