HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
dc_processor.ProcessorTask Namespace Reference

Classes

class  ProcessorTask
 

Variables

string APP_NAME = "processor-task"
 
string DC_URLS_DB_NAME = "dc_urls"
 
string DC_URLS_TABLE_PREFIX = "urls_"
 
string DC_SITES_DB_NAME = "dc_sites"
 
string DC_SITES_TABLE_NAME = "sites"
 
string DC_URLS_TABLE_NAME = "urls"
 
string DC_SITES_PROPERTIES_TABLE_NAME = "sites_properties"
 
string MSG_ERROR_PROCESS_BATCH_ITEM = "Error process batch item "
 
string MSG_ERROR_PROCESS_BATCH = "Error process batch. "
 
string MSG_ERROR_LOAD_CONFIG = "Error loading config file."
 
string MSG_ERROR_EMPTY_CONFIG_FILE_NAME = "Config file name is empty."
 
string MSG_ERROR_LOAD_LOG_CONFIG_FILE = "Error loading logging config file. Exiting."
 
string MSG_ERROR_LOAD_URL_DATA = "Can't load url data: "
 
string MSG_ERROR_LOAD_SITE_DATA = "Error load site data: "
 
string MSG_ERROR_READ_SITE_FROM_DB = "Error read site data from db"
 
string MSG_ERROR_PROCESS_TASK = "Can't process task "
 
string MSG_ERROR_SERIALISE_RESULT = "Error serialize result "
 
string MSG_ERROR_GET_SITE_FILE_DB = "Error get site file db "
 
string MSG_ERROR_UPDATE_RECORD = "Error update record "
 
string MSG_ERROR_UPDATE_PROCESSED_URL = "Error update processed url "
 
string MSG_ERROR_UPDATE_URL_CHARSET = "Error update url charset "
 
string MSG_ERROR_GET_RAW_CONTENT_FROM_DB = "Error get raw content from disk "
 
string MSG_ERROR_PROCESS = "Error process "
 
string MSG_ERROR_LOAD_SITE_PROPERTIES = "Error load site properties "
 
string MSG_ERROR_CHECK_SITE = "Site check is not passed. "
 
string MSG_ERROR_LOAD_OPTIONS = "Error load options. "
 
string MSG_ERROR_CONVERT_RAW_CONTENT_CHARSET = "Cannot convert raw content charset. "
 
string MSG_ERROR_UPDATE_SITE_RESOURCES = "Error update site resources. "
 
string MSG_ERROR_EMPTY_BATCH = "Error read input pickle from stdin."
 
string MSG_ERROR_CHECK_CONTENT_HASH = "Fail to check content hash"
 
string MSG_ERROR_CALC_CONTENT_HASH = "Fail to calc content hash"
 
string MSG_ERROR_CHECK_CONTENT_HASH_DUPLICATE = "Can't check content hash duplicate"
 
string MSG_INFO_PROCESSOR_CMD = "Processor cmd: "
 
string MSG_INFO_LOAD_SITE_PROPERTIES = "Mismatch load site properties "
 
string MSG_INFO_PROCESS_BATCH = "Skipped process batch. "
 
string MSG_INFO_PROCESS_BATCH_ITEM = "Skipped process batch item "
 
string MSG_INFO_PROCESSOR_EXIT_CODE = "Scraper exit_code: "
 
string MSG_INFO_PROCESSOR_OUTPUT = "Scraper output: "
 
string MSG_INFO_PROCESSOR_ERROR = "Scraper err: "
 
int EXIT_SUCCESS = 0
 
int EXIT_FAILURE = 1
 
int ERROR_MASK_NO_ERRORS = 0
 
int ERROR_MASK_SITE_OK = 0
 
int URLS_OF_MEDIA_CONTENT = 1
 
string ENV_PROCESSOR_STORE_PATH = "ENV_PROCESSOR_STORE_PATH"
 
string SCRAPER_RESPONSE_ATTR_NAME = 'scraperResponse'
 
string DEFSULT_CHAIN_DELIMITER = ' '
 
 Results = namedtuple("Results", "exit_code, output, err, scraperResponse")
 

Variable Documentation

◆ APP_NAME

string dc_processor.ProcessorTask.APP_NAME = "processor-task"

Definition at line 65 of file ProcessorTask.py.

◆ DC_SITES_DB_NAME

string dc_processor.ProcessorTask.DC_SITES_DB_NAME = "dc_sites"

Definition at line 69 of file ProcessorTask.py.

◆ DC_SITES_PROPERTIES_TABLE_NAME

string dc_processor.ProcessorTask.DC_SITES_PROPERTIES_TABLE_NAME = "sites_properties"

Definition at line 72 of file ProcessorTask.py.

◆ DC_SITES_TABLE_NAME

string dc_processor.ProcessorTask.DC_SITES_TABLE_NAME = "sites"

Definition at line 70 of file ProcessorTask.py.

◆ DC_URLS_DB_NAME

string dc_processor.ProcessorTask.DC_URLS_DB_NAME = "dc_urls"

Definition at line 67 of file ProcessorTask.py.

◆ DC_URLS_TABLE_NAME

string dc_processor.ProcessorTask.DC_URLS_TABLE_NAME = "urls"

Definition at line 71 of file ProcessorTask.py.

◆ DC_URLS_TABLE_PREFIX

string dc_processor.ProcessorTask.DC_URLS_TABLE_PREFIX = "urls_"

Definition at line 68 of file ProcessorTask.py.

◆ DEFSULT_CHAIN_DELIMITER

string dc_processor.ProcessorTask.DEFSULT_CHAIN_DELIMITER = ' '

Definition at line 118 of file ProcessorTask.py.

◆ ENV_PROCESSOR_STORE_PATH

string dc_processor.ProcessorTask.ENV_PROCESSOR_STORE_PATH = "ENV_PROCESSOR_STORE_PATH"

Definition at line 115 of file ProcessorTask.py.

◆ ERROR_MASK_NO_ERRORS

int dc_processor.ProcessorTask.ERROR_MASK_NO_ERRORS = 0

Definition at line 111 of file ProcessorTask.py.

◆ ERROR_MASK_SITE_OK

int dc_processor.ProcessorTask.ERROR_MASK_SITE_OK = 0

Definition at line 112 of file ProcessorTask.py.

◆ EXIT_FAILURE

int dc_processor.ProcessorTask.EXIT_FAILURE = 1

Definition at line 109 of file ProcessorTask.py.

◆ EXIT_SUCCESS

int dc_processor.ProcessorTask.EXIT_SUCCESS = 0

Definition at line 108 of file ProcessorTask.py.

◆ MSG_ERROR_CALC_CONTENT_HASH

string dc_processor.ProcessorTask.MSG_ERROR_CALC_CONTENT_HASH = "Fail to calc content hash"

Definition at line 97 of file ProcessorTask.py.

◆ MSG_ERROR_CHECK_CONTENT_HASH

string dc_processor.ProcessorTask.MSG_ERROR_CHECK_CONTENT_HASH = "Fail to check content hash"

Definition at line 96 of file ProcessorTask.py.

◆ MSG_ERROR_CHECK_CONTENT_HASH_DUPLICATE

string dc_processor.ProcessorTask.MSG_ERROR_CHECK_CONTENT_HASH_DUPLICATE = "Can't check content hash duplicate"

Definition at line 98 of file ProcessorTask.py.

◆ MSG_ERROR_CHECK_SITE

string dc_processor.ProcessorTask.MSG_ERROR_CHECK_SITE = "Site check is not passed. "

Definition at line 91 of file ProcessorTask.py.

◆ MSG_ERROR_CONVERT_RAW_CONTENT_CHARSET

string dc_processor.ProcessorTask.MSG_ERROR_CONVERT_RAW_CONTENT_CHARSET = "Cannot convert raw content charset. "

Definition at line 93 of file ProcessorTask.py.

◆ MSG_ERROR_EMPTY_BATCH

string dc_processor.ProcessorTask.MSG_ERROR_EMPTY_BATCH = "Error read input pickle from stdin."

Definition at line 95 of file ProcessorTask.py.

◆ MSG_ERROR_EMPTY_CONFIG_FILE_NAME

string dc_processor.ProcessorTask.MSG_ERROR_EMPTY_CONFIG_FILE_NAME = "Config file name is empty."

Definition at line 77 of file ProcessorTask.py.

◆ MSG_ERROR_GET_RAW_CONTENT_FROM_DB

string dc_processor.ProcessorTask.MSG_ERROR_GET_RAW_CONTENT_FROM_DB = "Error get raw content from disk "

Definition at line 88 of file ProcessorTask.py.

◆ MSG_ERROR_GET_SITE_FILE_DB

string dc_processor.ProcessorTask.MSG_ERROR_GET_SITE_FILE_DB = "Error get site file db "

Definition at line 84 of file ProcessorTask.py.

◆ MSG_ERROR_LOAD_CONFIG

string dc_processor.ProcessorTask.MSG_ERROR_LOAD_CONFIG = "Error loading config file."

Definition at line 76 of file ProcessorTask.py.

◆ MSG_ERROR_LOAD_LOG_CONFIG_FILE

string dc_processor.ProcessorTask.MSG_ERROR_LOAD_LOG_CONFIG_FILE = "Error loading logging config file. Exiting."

Definition at line 78 of file ProcessorTask.py.

◆ MSG_ERROR_LOAD_OPTIONS

string dc_processor.ProcessorTask.MSG_ERROR_LOAD_OPTIONS = "Error load options. "

Definition at line 92 of file ProcessorTask.py.

◆ MSG_ERROR_LOAD_SITE_DATA

string dc_processor.ProcessorTask.MSG_ERROR_LOAD_SITE_DATA = "Error load site data: "

Definition at line 80 of file ProcessorTask.py.

◆ MSG_ERROR_LOAD_SITE_PROPERTIES

string dc_processor.ProcessorTask.MSG_ERROR_LOAD_SITE_PROPERTIES = "Error load site properties "

Definition at line 90 of file ProcessorTask.py.

◆ MSG_ERROR_LOAD_URL_DATA

string dc_processor.ProcessorTask.MSG_ERROR_LOAD_URL_DATA = "Can't load url data: "

Definition at line 79 of file ProcessorTask.py.

◆ MSG_ERROR_PROCESS

string dc_processor.ProcessorTask.MSG_ERROR_PROCESS = "Error process "

Definition at line 89 of file ProcessorTask.py.

◆ MSG_ERROR_PROCESS_BATCH

string dc_processor.ProcessorTask.MSG_ERROR_PROCESS_BATCH = "Error process batch. "

Definition at line 75 of file ProcessorTask.py.

◆ MSG_ERROR_PROCESS_BATCH_ITEM

string dc_processor.ProcessorTask.MSG_ERROR_PROCESS_BATCH_ITEM = "Error process batch item "

Definition at line 74 of file ProcessorTask.py.

◆ MSG_ERROR_PROCESS_TASK

string dc_processor.ProcessorTask.MSG_ERROR_PROCESS_TASK = "Can't process task "

Definition at line 82 of file ProcessorTask.py.

◆ MSG_ERROR_READ_SITE_FROM_DB

string dc_processor.ProcessorTask.MSG_ERROR_READ_SITE_FROM_DB = "Error read site data from db"

Definition at line 81 of file ProcessorTask.py.

◆ MSG_ERROR_SERIALISE_RESULT

string dc_processor.ProcessorTask.MSG_ERROR_SERIALISE_RESULT = "Error serialize result "

Definition at line 83 of file ProcessorTask.py.

◆ MSG_ERROR_UPDATE_PROCESSED_URL

string dc_processor.ProcessorTask.MSG_ERROR_UPDATE_PROCESSED_URL = "Error update processed url "

Definition at line 86 of file ProcessorTask.py.

◆ MSG_ERROR_UPDATE_RECORD

string dc_processor.ProcessorTask.MSG_ERROR_UPDATE_RECORD = "Error update record "

Definition at line 85 of file ProcessorTask.py.

◆ MSG_ERROR_UPDATE_SITE_RESOURCES

string dc_processor.ProcessorTask.MSG_ERROR_UPDATE_SITE_RESOURCES = "Error update site resources. "

Definition at line 94 of file ProcessorTask.py.

◆ MSG_ERROR_UPDATE_URL_CHARSET

string dc_processor.ProcessorTask.MSG_ERROR_UPDATE_URL_CHARSET = "Error update url charset "

Definition at line 87 of file ProcessorTask.py.

◆ MSG_INFO_LOAD_SITE_PROPERTIES

string dc_processor.ProcessorTask.MSG_INFO_LOAD_SITE_PROPERTIES = "Mismatch load site properties "

Definition at line 101 of file ProcessorTask.py.

◆ MSG_INFO_PROCESS_BATCH

string dc_processor.ProcessorTask.MSG_INFO_PROCESS_BATCH = "Skipped process batch. "

Definition at line 102 of file ProcessorTask.py.

◆ MSG_INFO_PROCESS_BATCH_ITEM

string dc_processor.ProcessorTask.MSG_INFO_PROCESS_BATCH_ITEM = "Skipped process batch item "

Definition at line 103 of file ProcessorTask.py.

◆ MSG_INFO_PROCESSOR_CMD

string dc_processor.ProcessorTask.MSG_INFO_PROCESSOR_CMD = "Processor cmd: "

Definition at line 100 of file ProcessorTask.py.

◆ MSG_INFO_PROCESSOR_ERROR

string dc_processor.ProcessorTask.MSG_INFO_PROCESSOR_ERROR = "Scraper err: "

Definition at line 106 of file ProcessorTask.py.

◆ MSG_INFO_PROCESSOR_EXIT_CODE

string dc_processor.ProcessorTask.MSG_INFO_PROCESSOR_EXIT_CODE = "Scraper exit_code: "

Definition at line 104 of file ProcessorTask.py.

◆ MSG_INFO_PROCESSOR_OUTPUT

string dc_processor.ProcessorTask.MSG_INFO_PROCESSOR_OUTPUT = "Scraper output: "

Definition at line 105 of file ProcessorTask.py.

◆ Results

dc_processor.ProcessorTask.Results = namedtuple("Results", "exit_code, output, err, scraperResponse")

Definition at line 120 of file ProcessorTask.py.

◆ SCRAPER_RESPONSE_ATTR_NAME

string dc_processor.ProcessorTask.SCRAPER_RESPONSE_ATTR_NAME = 'scraperResponse'

Definition at line 117 of file ProcessorTask.py.

◆ URLS_OF_MEDIA_CONTENT

int dc_processor.ProcessorTask.URLS_OF_MEDIA_CONTENT = 1

Definition at line 114 of file ProcessorTask.py.