|
string | dc_processor.Scraper.APP_NAME = "scraper" |
|
string | dc_processor.Scraper.MSG_ERROR_LOAD_CONFIG = "Error loading config file. Exciting." |
|
string | dc_processor.Scraper.MSG_ERROR_LOAD_LOG_CONFIG_FILE = "Error loading logging config file. Exiting." |
|
string | dc_processor.Scraper.MSG_ERROR_LOAD_EXTRACTORS = "Error load extractors " |
|
string | dc_processor.Scraper.MSG_ERROR_TEMPLATE_EXTRACTION = "Error template extraction " |
|
string | dc_processor.Scraper.MSG_ERROR_DYNAMIC_EXTRACTION = "Error dynamic extraction " |
|
string | dc_processor.Scraper.MSG_ERROR_LOAD_DB_BACKEND = "Error load db backend" |
|
string | dc_processor.Scraper.MSG_ERROR_LOAD_OPTIONS = "Error load options" |
|
string | dc_processor.Scraper.MSG_INFO_PREPARE_CONTENT = "Prepare content: " |
|
string | dc_processor.Scraper.MSG_ERROR_ADJUST_PR = "Error adjust partial references. " |
|
string | dc_processor.Scraper.MSG_ERROR_ADJUST_PUBDATE = "PUBDATE_ERROR " |
|
string | dc_processor.Scraper.MSG_ERROR_ADJUST_TITLE = "Can't adjust title. " |
|
int | dc_processor.Scraper.EXIT_SUCCESS = 0 |
|
int | dc_processor.Scraper.EXIT_FAILURE = 1 |
|
int | dc_processor.Scraper.SQLITE_TIMEOUT = 30 |
|
string | dc_processor.Scraper.ENV_SCRAPER_STORE_PATH = "ENV_SCRAPER_STORE_PATH" |
|
list | dc_processor.Scraper.CONTENT_REPLACEMENT_LIST = ['\n', '\r\n', '\t', ' ', '<br>', '<p>', '</p>'] |
|
int | dc_processor.Scraper.DEFAULT_TAG_REDUCE_MASK = 65535 |
|
dictionary | dc_processor.Scraper.EXTENDED_NEWS_TAGS = {"description": ["//meta[@name='description']//@content"]} |
|
list | dc_processor.Scraper.LINKS_NEWS_TAGS = [CONSTS.TAG_MEDIA, CONSTS.TAG_LINK, CONSTS.TAG_MEDIA_CONTENT, "links", "href"] |
|
list | dc_processor.Scraper.DATA_NEWS_TAGS = [] |
|
list | dc_processor.Scraper.TAGS_DATETIME_NEWS_NAMES = [CONSTS.TAG_PUB_DATE, CONSTS.TAG_DC_DATE] |
|
list | dc_processor.Scraper.TAGS_DATETIME_TEMPLATE_TYPES = [CONSTS.TAG_TYPE_DATETIME] |
|
string | dc_processor.Scraper.OPTION_SECTION_DATETIME_NEWS_NAMES = 'tags_datetime_news_names' |
|
string | dc_processor.Scraper.OPTION_SECTION_DATETIME_TEMPLATE_TYPES = 'tags_datetime_template_types' |
|
string | dc_processor.Scraper.OPTION_SECTION_TAGS_TYPE = 'tagsTypes' |
|
string | dc_processor.Scraper.OPTION_SECTION_URL_SOURCES_RULES = 'urlSourcesRules' |
|
string | dc_processor.Scraper.URL_SOURCES_RULE_DATA_URL = 'd_url' |
|
string | dc_processor.Scraper.URL_SOURCES_RULE_REDIRECT_URL = 'r_url ' |
|
string | dc_processor.Scraper.URL_SOURCES_RULE_FEED_URL = 'f_url' |
|