|
string | dc_crawler.CrawlerTask.DB_SITES = "dc_sites" |
|
string | dc_crawler.CrawlerTask.DB_URLS = "dc_urls" |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_LOAD_CONFIG = "Error loading config file. Exciting. " |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_LOAD_OPTIONS = "Error loading options. Exciting. " |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_LOAD_LOG_CONFIG_FILE = "Can't load logging config file. Exiting. " |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_LOAD_SITE_DATA = "Can't load site data: " |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_UPDATE_SITE_DATA = "Can't update site data: " |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_LOAD_URL_DATA = "Can't load url data: " |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_PROCESS_BATCH_ITEM = "Can't process batch item " |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_WRITE_CRAWLED_DATA = "Can't write crawled data " |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_COLLECT_URLS = "Can't collect urls " |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_ADD_URL_TO_BATCH_ITEM = "Can't add url to batch item " |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_LOAD_SITE_PROPERTIES = "Can't load site properties " |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_CRAWL_SITE = "Can't crawl site " |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_CHECK_SITE = "Site don't passed check site " |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_GET_DIR = "Can't get dir " |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_READ_SITE_FROM_DB = "Can't read site data from db" |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_EMPTY_RESPONSE_SIZE = "Empty response" |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_NOT_EXIST_ANY_VALID_PROXY = "Not exist any valid proxy" |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_EMPTY_CONFIG_FILE_NAME = "Config file name is empty." |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_WRONG_CONFIG_FILE_NAME = "Config file name is wrong: %s" |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_LOAD_APP_CONFIG = "Error loading application config file. %s" |
|
string | dc_crawler.CrawlerTask.MSG_ERROR_EXTRACT_BASE_URL = "Extract base url failed. Error: %s" |
|
string | dc_crawler.CrawlerTask.MSG_INFO_PROCESS_BATCH = "ProcessBatch " |
|
string | dc_crawler.CrawlerTask.MSG_INFO_STORE_COOKIES_FILE = "Store cookies file on disk." |
|
string | dc_crawler.CrawlerTask.MSG_DEBUG_NON_PROCESSING = "ProcessorName is NONE. Exclude batch item from further processing." |
|
string | dc_crawler.CrawlerTask.SITE_MD5_EMPTY = "d41d8cd98f00b204e9800998ecf8427e" |
|
int | dc_crawler.CrawlerTask.DEFAULT_MAX_SIZE = 1000000 |
|
string | dc_crawler.CrawlerTask.EMPTY_RESPONSE_SIZE = "0" |
|
string | dc_crawler.CrawlerTask.APP_NAME = "crawler-task" |
|
string | dc_crawler.CrawlerTask.HTTP_COOKIE = "HTTP_COOKIE" |
|
string | dc_crawler.CrawlerTask.DEFAULT_HTTP_COOKIE = "" |
|
string | dc_crawler.CrawlerTask.HTTP_HEADERS = "HTTP_HEADERS" |
|
string | dc_crawler.CrawlerTask.DEFAULT_HTTP_HEADER = "" |
|
string | dc_crawler.CrawlerTask.DC_URLS_DB_NAME = "dc_urls" |
|
string | dc_crawler.CrawlerTask.DC_URLS_TABLE_PREFIX = "urls_" |
|
string | dc_crawler.CrawlerTask.DC_SITES_DB_NAME = "dc_sites" |
|
string | dc_crawler.CrawlerTask.DC_SITES_PROPERTIES_TABLE_NAME = "sites_properties" |
|
string | dc_crawler.CrawlerTask.DC_SITES_TABLE_NAME = "sites" |
|
string | dc_crawler.CrawlerTask.DC_URLS_TABLE_NAME = "urls" |
|
string | dc_crawler.CrawlerTask.COOKIES_FILE_POSTFIX = ".cookies.txt" |
|
string | dc_crawler.CrawlerTask.NON_PROCESSING = "NONE" |
|
string | dc_crawler.CrawlerTask.HTTP_REDIRECT = "<Response [301]>" |
|
string | dc_crawler.CrawlerTask.HTML_REDIRECT = "" |
|
int | dc_crawler.CrawlerTask.MAX_HTTP_REDIRECTS_UNLIMITED = 0 |
|
int | dc_crawler.CrawlerTask.MAX_HTML_REDIRECTS_UNLIMITED = 0 |
|
string | dc_crawler.CrawlerTask.META_XPATH = "//meta[contains(@content, 'url')]/@content" |
|
| dc_crawler.CrawlerTask.Results = namedtuple("Results", "exit_code, output, err") |
|
| dc_crawler.CrawlerTask.ROBOTS_PATTERN = re.compile(r'(https?://[^/]+).*', re.I) |
|
| dc_crawler.CrawlerTask.TEXT_CONTENT_TYPE_PATTERN = re.compile('text', re.I) |
|
string | dc_crawler.CrawlerTask.ENV_CRAWLER_STORE_PATH = "ENV_CRAWLER_STORE_PATH" |
|
string | dc_crawler.CrawlerTask.DETECT_MIME_MAIN_CONTENT = "1" |
|
string | dc_crawler.CrawlerTask.RECOVER_IF_FAILED = "2" |
|
int | dc_crawler.CrawlerTask.EXIT_SUCCESS = 0 |
|
int | dc_crawler.CrawlerTask.EXIT_FAILURE = 1 |
|