HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.
2.0.0-chaika
Hierarchical Cluster Engine Python language binding
|
Classes | |
class | ProcessorTask |
Variables | |
string | APP_NAME = "processor-task" |
string | DC_URLS_DB_NAME = "dc_urls" |
string | DC_URLS_TABLE_PREFIX = "urls_" |
string | DC_SITES_DB_NAME = "dc_sites" |
string | DC_SITES_TABLE_NAME = "sites" |
string | DC_URLS_TABLE_NAME = "urls" |
string | DC_SITES_PROPERTIES_TABLE_NAME = "sites_properties" |
string | MSG_ERROR_PROCESS_BATCH_ITEM = "Error process batch item " |
string | MSG_ERROR_PROCESS_BATCH = "Error process batch. " |
string | MSG_ERROR_LOAD_CONFIG = "Error loading config file." |
string | MSG_ERROR_EMPTY_CONFIG_FILE_NAME = "Config file name is empty." |
string | MSG_ERROR_LOAD_LOG_CONFIG_FILE = "Error loading logging config file. Exiting." |
string | MSG_ERROR_LOAD_URL_DATA = "Can't load url data: " |
string | MSG_ERROR_LOAD_SITE_DATA = "Error load site data: " |
string | MSG_ERROR_READ_SITE_FROM_DB = "Error read site data from db" |
string | MSG_ERROR_PROCESS_TASK = "Can't process task " |
string | MSG_ERROR_SERIALISE_RESULT = "Error serialize result " |
string | MSG_ERROR_GET_SITE_FILE_DB = "Error get site file db " |
string | MSG_ERROR_UPDATE_RECORD = "Error update record " |
string | MSG_ERROR_UPDATE_PROCESSED_URL = "Error update processed url " |
string | MSG_ERROR_UPDATE_URL_CHARSET = "Error update url charset " |
string | MSG_ERROR_GET_RAW_CONTENT_FROM_DB = "Error get raw content from disk " |
string | MSG_ERROR_PROCESS = "Error process " |
string | MSG_ERROR_LOAD_SITE_PROPERTIES = "Error load site properties " |
string | MSG_ERROR_CHECK_SITE = "Site check is not passed. " |
string | MSG_ERROR_LOAD_OPTIONS = "Error load options. " |
string | MSG_ERROR_CONVERT_RAW_CONTENT_CHARSET = "Cannot convert raw content charset. " |
string | MSG_ERROR_UPDATE_SITE_RESOURCES = "Error update site resources. " |
string | MSG_ERROR_EMPTY_BATCH = "Error read input pickle from stdin." |
string | MSG_ERROR_CHECK_CONTENT_HASH = "Fail to check content hash" |
string | MSG_ERROR_CALC_CONTENT_HASH = "Fail to calc content hash" |
string | MSG_ERROR_CHECK_CONTENT_HASH_DUPLICATE = "Can't check content hash duplicate" |
string | MSG_INFO_PROCESSOR_CMD = "Processor cmd: " |
string | MSG_INFO_LOAD_SITE_PROPERTIES = "Mismatch load site properties " |
string | MSG_INFO_PROCESS_BATCH = "Skipped process batch. " |
string | MSG_INFO_PROCESS_BATCH_ITEM = "Skipped process batch item " |
string | MSG_INFO_PROCESSOR_EXIT_CODE = "Scraper exit_code: " |
string | MSG_INFO_PROCESSOR_OUTPUT = "Scraper output: " |
string | MSG_INFO_PROCESSOR_ERROR = "Scraper err: " |
int | EXIT_SUCCESS = 0 |
int | EXIT_FAILURE = 1 |
int | ERROR_MASK_NO_ERRORS = 0 |
int | ERROR_MASK_SITE_OK = 0 |
int | URLS_OF_MEDIA_CONTENT = 1 |
string | ENV_PROCESSOR_STORE_PATH = "ENV_PROCESSOR_STORE_PATH" |
string | SCRAPER_RESPONSE_ATTR_NAME = 'scraperResponse' |
string | DEFSULT_CHAIN_DELIMITER = ' ' |
Results = namedtuple("Results", "exit_code, output, err, scraperResponse") | |
string dc_processor.ProcessorTask.APP_NAME = "processor-task" |
Definition at line 65 of file ProcessorTask.py.
string dc_processor.ProcessorTask.DC_SITES_DB_NAME = "dc_sites" |
Definition at line 69 of file ProcessorTask.py.
string dc_processor.ProcessorTask.DC_SITES_PROPERTIES_TABLE_NAME = "sites_properties" |
Definition at line 72 of file ProcessorTask.py.
string dc_processor.ProcessorTask.DC_SITES_TABLE_NAME = "sites" |
Definition at line 70 of file ProcessorTask.py.
string dc_processor.ProcessorTask.DC_URLS_DB_NAME = "dc_urls" |
Definition at line 67 of file ProcessorTask.py.
string dc_processor.ProcessorTask.DC_URLS_TABLE_NAME = "urls" |
Definition at line 71 of file ProcessorTask.py.
string dc_processor.ProcessorTask.DC_URLS_TABLE_PREFIX = "urls_" |
Definition at line 68 of file ProcessorTask.py.
string dc_processor.ProcessorTask.DEFSULT_CHAIN_DELIMITER = ' ' |
Definition at line 118 of file ProcessorTask.py.
string dc_processor.ProcessorTask.ENV_PROCESSOR_STORE_PATH = "ENV_PROCESSOR_STORE_PATH" |
Definition at line 115 of file ProcessorTask.py.
int dc_processor.ProcessorTask.ERROR_MASK_NO_ERRORS = 0 |
Definition at line 111 of file ProcessorTask.py.
int dc_processor.ProcessorTask.ERROR_MASK_SITE_OK = 0 |
Definition at line 112 of file ProcessorTask.py.
int dc_processor.ProcessorTask.EXIT_FAILURE = 1 |
Definition at line 109 of file ProcessorTask.py.
int dc_processor.ProcessorTask.EXIT_SUCCESS = 0 |
Definition at line 108 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_CALC_CONTENT_HASH = "Fail to calc content hash" |
Definition at line 97 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_CHECK_CONTENT_HASH = "Fail to check content hash" |
Definition at line 96 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_CHECK_CONTENT_HASH_DUPLICATE = "Can't check content hash duplicate" |
Definition at line 98 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_CHECK_SITE = "Site check is not passed. " |
Definition at line 91 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_CONVERT_RAW_CONTENT_CHARSET = "Cannot convert raw content charset. " |
Definition at line 93 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_EMPTY_BATCH = "Error read input pickle from stdin." |
Definition at line 95 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_EMPTY_CONFIG_FILE_NAME = "Config file name is empty." |
Definition at line 77 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_GET_RAW_CONTENT_FROM_DB = "Error get raw content from disk " |
Definition at line 88 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_GET_SITE_FILE_DB = "Error get site file db " |
Definition at line 84 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_LOAD_CONFIG = "Error loading config file." |
Definition at line 76 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_LOAD_LOG_CONFIG_FILE = "Error loading logging config file. Exiting." |
Definition at line 78 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_LOAD_OPTIONS = "Error load options. " |
Definition at line 92 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_LOAD_SITE_DATA = "Error load site data: " |
Definition at line 80 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_LOAD_SITE_PROPERTIES = "Error load site properties " |
Definition at line 90 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_LOAD_URL_DATA = "Can't load url data: " |
Definition at line 79 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_PROCESS = "Error process " |
Definition at line 89 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_PROCESS_BATCH = "Error process batch. " |
Definition at line 75 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_PROCESS_BATCH_ITEM = "Error process batch item " |
Definition at line 74 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_PROCESS_TASK = "Can't process task " |
Definition at line 82 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_READ_SITE_FROM_DB = "Error read site data from db" |
Definition at line 81 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_SERIALISE_RESULT = "Error serialize result " |
Definition at line 83 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_UPDATE_PROCESSED_URL = "Error update processed url " |
Definition at line 86 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_UPDATE_RECORD = "Error update record " |
Definition at line 85 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_UPDATE_SITE_RESOURCES = "Error update site resources. " |
Definition at line 94 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_ERROR_UPDATE_URL_CHARSET = "Error update url charset " |
Definition at line 87 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_INFO_LOAD_SITE_PROPERTIES = "Mismatch load site properties " |
Definition at line 101 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_INFO_PROCESS_BATCH = "Skipped process batch. " |
Definition at line 102 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_INFO_PROCESS_BATCH_ITEM = "Skipped process batch item " |
Definition at line 103 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_INFO_PROCESSOR_CMD = "Processor cmd: " |
Definition at line 100 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_INFO_PROCESSOR_ERROR = "Scraper err: " |
Definition at line 106 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_INFO_PROCESSOR_EXIT_CODE = "Scraper exit_code: " |
Definition at line 104 of file ProcessorTask.py.
string dc_processor.ProcessorTask.MSG_INFO_PROCESSOR_OUTPUT = "Scraper output: " |
Definition at line 105 of file ProcessorTask.py.
dc_processor.ProcessorTask.Results = namedtuple("Results", "exit_code, output, err, scraperResponse") |
Definition at line 120 of file ProcessorTask.py.
string dc_processor.ProcessorTask.SCRAPER_RESPONSE_ATTR_NAME = 'scraperResponse' |
Definition at line 117 of file ProcessorTask.py.
int dc_processor.ProcessorTask.URLS_OF_MEDIA_CONTENT = 1 |
Definition at line 114 of file ProcessorTask.py.