HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
dc_processor.Constants Namespace Reference

Classes

class  HTML5_SEMANTIC_TAGS
 

Variables

 LOGGER_NAME = APP_CONSTS.LOGGER_NAME
 
string TAG_MEDIA = "media"
 
string TAG_TITLE = "title"
 
string TAG_LINK = "link"
 
string TAG_DESCRIPTION = "description"
 
string TAG_PUB_DATE = "pubdate"
 
string TAG_DC_DATE = "dc_date"
 
string TAG_AUTHOR = "author"
 
string TAG_GUID = "guid"
 
string TAG_CONTENT_UTF8_ENCODED = "content_encoded"
 
string TAG_KEYWORDS = "keywords"
 
string TAG_MEDIA_THUMBNAIL = "media_thumbnail"
 
string TAG_MEDIA_CONTENT = "media_content"
 
string TAG_ENCLOSURE = "enclosure"
 
string TAG_GOOGLE = "google_search"
 
string TAG_GOOGLE_TOTAL = "google_search_total"
 
string TAG_SUMMARY_LANG = "summary_lang"
 
string HTML_LANG = "html_lang"
 
string PARENT_RSS_FEED = "parent_rss_feed"
 
string PARENT_RSS_FEED_URLMD5 = "parent_rss_feed_urlMd5"
 
string SUMMARY = "summary"
 
string SUMMARY_DETAIL = "summary_detail"
 
string COMMENTNS = "comments"
 
string TAGS = "tags"
 
string PUBLISHED = "published"
 
string CONTENT = "content"
 
string UPDATED = "updated"
 
string UPDATED_PARSED = "updated_parsed"
 
string TAG_ORDER_NUMBER = "order_number"
 
string TAG_SOURCE_URL = "source_url"
 
string TAG_FEED_URL = "feed_url"
 
string TAG_TYPE_DATETIME = 'datetime'
 
string TAG_PUBDATE_TZ = 'pubdate_tz'
 
int CONTENT_HASH_ALGORITHM_EMPTY = 0
 
int CONTENT_HASH_ALGORITHM_MD5 = 1
 
int CONTENT_HASH_ALGORITHM_CRC32 = 2
 
int CONTENT_HASH_ALGORITHM_SOUNDEX = 3
 
int CONTENT_HASH_ALGORITHM_SHA1 = 4
 
int CONTENT_HASH_ALGORITHM_SDHASH = 5
 
int CONTENT_HASH_ALGORITHM_BBHASH = 6
 
int CONTENT_HASH_ALGORITHM_MRSH_V2 = 7
 
int CONTENT_HASH_ALGORITHM_MVHASH_B = 8
 
int CONTENT_HASH_ALGORITHM_MD5_WITHOUT_HTML = 9
 
int CONTENT_HASH_ACTION_DELETE = 1
 
string PARENT_URL_MD5 = ""
 
int TAGS_RULES_MASK_DEFAULT_VALUE = 4
 
int TAGS_RULES_MASK_RULE_PRIORITY = 2
 
int TAGS_RULES_MASK_MANDATORY_FIELD = 1
 
string PROCESS_ALGORITHM_REGULAR = "regular"
 
string PROCESS_ALGORITHM_TRAINING = "training"
 
string PROCESS_ALGORITHM_PREDICTION = "prediction"
 
string PROCESS_ALGORITHM_CONCURRENCY = "concurrency"
 
string PROCESS_ALGORITHM_METRIC = "metric_based"
 
string PROCESS_ALGORITHM_FEED_PARSER = "feed_parser"
 
string PROCESS_ALGORITHM_ALCHEMY = "ALCHEMY"
 
string PROCESS_ALGORITHM_BOILERPIPE = "BOILERPIPE"
 
string PROCESS_ALGORITHM_NEWSPAPER = "NEWSPAPER"
 
string PROCESS_ALGORITHM_GOOSE = "GOOSE"
 
string PROCESS_ALGORITHM_SCRAPY = "SCRAPY"
 
string PROCESS_ALGORITHM_ML = "ML"
 
string TRAINING_QUEUE = "TRAINING_QUEUE"
 
string TRAINED_QUEUE = "TRAINED_QUEUE"
 
string CONCURRENCY_QUEUE = "CONCURRENCY_QUEUE"
 
string DB_SECTION = "mysql"
 
string DB_HOST = "db_host"
 
string DB_PORT = "db_port"
 
string DB_USER = "db_user"
 
string DB_PWD = "db_pwd"
 
string DB_SITES = "db_dc_sites"
 
string DB_URLS = "db_dc_urls"
 
string DB_SCRAPERS = "db_dc_scrapers"
 
string DC_CONTENTS_DB_NAME = "db_dc_contents"
 
string SQL_TMP_TABLE = "metrics"
 
string MYSQL_ENGINE = "mysql_engine"
 
string MSG_ERROR_OK = ""
 
string MSG_ERROR_LOAD_DB_BACKEND = "Error loading DB backend. "
 
string MSG_ERROR_LOAD_CONFIG = "Error loading config file."
 
string MSG_ERROR_LOAD_LOG_CONFIG_FILE = "Error loading logging config file."
 
string MSG_ERROR_LOAD_EXTRACTORS = "Error load extractors "
 
string MSG_ERROR_TEMPLATE_EXTRACTION = "Error template extraction "
 
string MSG_ERROR_DYNAMIC_EXTRACTION = "Error dynamic extraction "
 
string MSG_ERROR_LOAD_OPTIONS = "Error load options"
 
string MSG_INFO_PREPARE_CONTENT = "Prepare content: "
 
string MSG_ERROR_ADJUST_PR = "Error adjust partial references. "
 
string MSG_ERROR_PROCESS = "Processor Storing Contents process batch error: "
 
string MSG_ERROR_CALC_METRICS = "Smth goes wrong. See traceback: "
 
int ERROR_OK = 0
 
int EXIT_SUCCESS = 0
 
int EXIT_FAILURE = 1
 
int SQLITE_TIMEOUT = 30
 
int TIME_EXECUTION_LIMIT = 20
 
string PYTHON_BINARY = "/usr/bin/python"
 
string PROCESSOR_EMPTY = ""
 
string SCRAPER_BINARY = "./scraper.py"
 
string SCRAPER_CFG = "--config=../ini/scraper.ini"
 
string PROCESSOR_STORE = "STORE"
 
string STORE_PROCESSOR_BINARY = "./processor_store_content_kvdb.py"
 
string STORE_PROCESSOR_CFG = "--config=../ini/processor-store-content-in-kvdb.ini"
 
string PROCESSOR_FEED_PARSER = "FEED_PARSER"
 
string PROCESSOR_RSS = "RSS"
 
string REPROCESS_KEY = "reprocess"
 
int REPROCESS_VALUE_NO = 0
 
string RECRAWL_KEY = "recrawl"
 
int RECRAWL_VALUE_NO = 0
 
string PROCESSOR_FEED_PARSER_BINARY = "./processor_feed_parser.py"
 
string PROCESSOR_FEED_PARSER_CFG = "--config=../ini/processor_feed_parser.ini"
 
string PROCESSOR_SCRAPER_MULTI_ITEMS = "SCRAPER_MULTI_ITEMS"
 
string SCRAPER_MULTI_ITEMS_BINARY = "./scraper_multi_items_task.py"
 
string SCRAPER_MULTI_ITEMS_CFG = "--config=../ini/scraper_multi_items_task.ini"
 
string PROCESSOR_SCRAPER_CUSTOM = "SCRAPER_CUSTOM"
 
string SCRAPER_CUSTOM_BINARY = "./scraper_custom_task.py"
 
string SCRAPER_CUSTOM_CFG = "--config=../ini/scraper_custom_task.ini"
 
string EXTRACTOR_NAME_ML = "ML extractor"
 
string EXTRACTOR_NAME_ALCHEMY = "Alchemy extractor"
 
string EXTRACTOR_NAME_BOILERPIPE = "Boilerpipe extractor"
 
string MODULES_KEY = "modules"
 
string ALGORITHM_KEY = "algorithm"
 
string ALGORITHM_NAME_KEY = "algorithm_name"
 
string PROPERTIES_KEY = "properties"
 
string TEMPLATE_KEY = "template"
 
string RANK_KEY = "rank"
 
string USE_HTML5_KEY = "html5"
 
int SCRAPER_RANK_INIT = 10
 
int USE_HTML5_YES = 1
 
int USE_HTML5_NO = 0
 
list TIMEZONE_LIST = ["JST"]
 
string COMMON_DATE_FORMAT = '%Y-%m-%d %H:%M:%S'
 
int DEFAULT_TRESHOLD_VALUE = 0
 
int DEFAULT_METRIC_VALUE = 0
 
string DEFAULT_COMPARATOR = ""
 
int WORDS_TRESHOLD_VALUE = 100
 
string WORDS_COMPARATOR = "round"
 
int SENTENCES_TRESHOLD_VALUE = 5
 
string SENTENCES_COMPARATOR = "round"
 
int ARI_TRESHOLD_VALUE = 1
 
string ARI_COMPARATOR = "round"
 
string ARTICLE_CORPUS = "content_encoded"
 
string GOOGLE_SEARCH_SITE_ID = "google_search"
 
string CABINET_SEARCH_SITE_ID = "cabinet_search"
 
string OLD_GOOGLE_SEARCH_SITE_ID = "d57f144e7b26c9976769ea94f18b9064"
 
string OLD_CABINET_SEARCH_SITE_ID = "1fe592caf03fd50c5f065c30f82b13bb"
 
string SCRAPER_APP_CLASS_NAME = "Scraper"
 
string SCRAPER_APP_CLASS_CFG = "../ini/scraper.ini"
 
string STORE_APP_CLASS_NAME = "???"
 
string STORE_APP_CLASS_CFG = "../ini/processor-store-content-in-kvdb.ini"
 
string PROCESSOR_FEED_PARSER_CLASS_NAME = "ProcessorFeedParser"
 
string PROCESSOR_FEED_PARSER_CLASS_CFG = "../ini/processor_feed_parser.ini"
 
string SCRAPER_MULTI_ITEMS_APP_CLASS_NAME = "ScraperMultiItemsTask"
 
string SCRAPER_MULTI_ITEMS_APP_CLASS_CFG = "../ini/scraper_multi_items_task.ini"
 
string SCRAPER_CUSTOM_JSON_APP_CLASS_NAME = "ScraperCustomJson"
 
string SCRAPER_CUSTOM_JSON_APP_CLASS_CFG = "../ini/scraper_custom_task.ini"
 
string TAG_REDUCE_MASK_PROP_NAME = "SCRAPER_TEXT_REDUCER_MASK"
 
string TAG_REDUCE_PROP_NAME = "SCRAPER_TEXT_REDUCER"
 
string TAG_MARKUP_PROP_NAME = "SCRAPER_TEXT_MARKUP"
 
string TAG_KEEP_ATTRIBUTES_PROP_NAME = "SCRAPER_KEEP_ATTRIBUTES"
 
string TAG_CLOSE_VOID_PROP_NAME = "CLOSE_VOID"
 
string TAGS_TYPES_NAME = "TAGS_TYPES"
 
string PDATE_TIMEZONES_NAME = "PDATE_TIMEZONES"
 
string PDATE_DAY_MONTH_ORDER_NAME = "PDATE_DAY_MONTH_ORDER"
 
string LANG_PROP_NAME = "SCRAPER_LANG_DETECT"
 
string MEDIA_LIMITS_NAME = "MEDIA_LIMITS"
 
string HTTP_REDIRECT_LINK_NAME = "HTTP_REDIRECT_LINK"
 
string LOCATION_NAME = "Location"
 
int HTTP_REDIRECT_LINK_VALUE_URL = 1
 
int HTTP_REDIRECT_LINK_VALUE_LOCATION = 2
 
int HTTP_REDIRECT_LINK_VALUE_REDIRECT_URL = 3
 
int HTTP_REDIRECT_LINK_VALUE_SOURCE_URL = 4
 
string HTTP_REDIRECT_LINK_LINK_TAG_NAME = 'link'
 
string REDIRECT_URL_NAME = 'redirect_url'
 
int TEMPLATE_CONDITION_TYPE_URL = 0
 

Detailed Description

@package docstring
 @file Constants.py
 @author Alexey, bgv <developers.hce@gmail.com>, Alexander Vybornyh <alexander.hce.cluster@gmail.com>
 @link http://hierarchical-cluster-engine.com/
 @copyright Copyright &copy; 2013-2015 IOIX Ukraine
 @license http://hierarchical-cluster-engine.com/license/
 @package HCE project node API
 @since 0.1

Variable Documentation

◆ ALGORITHM_KEY

string dc_processor.Constants.ALGORITHM_KEY = "algorithm"

Definition at line 165 of file Constants.py.

◆ ALGORITHM_NAME_KEY

string dc_processor.Constants.ALGORITHM_NAME_KEY = "algorithm_name"

Definition at line 166 of file Constants.py.

◆ ARI_COMPARATOR

string dc_processor.Constants.ARI_COMPARATOR = "round"

Definition at line 201 of file Constants.py.

◆ ARI_TRESHOLD_VALUE

int dc_processor.Constants.ARI_TRESHOLD_VALUE = 1

Definition at line 199 of file Constants.py.

◆ ARTICLE_CORPUS

string dc_processor.Constants.ARTICLE_CORPUS = "content_encoded"

Definition at line 203 of file Constants.py.

◆ CABINET_SEARCH_SITE_ID

string dc_processor.Constants.CABINET_SEARCH_SITE_ID = "cabinet_search"

Definition at line 208 of file Constants.py.

◆ COMMENTNS

string dc_processor.Constants.COMMENTNS = "comments"

Definition at line 35 of file Constants.py.

◆ COMMON_DATE_FORMAT

string dc_processor.Constants.COMMON_DATE_FORMAT = '%Y-%m-%d %H:%M:%S'

Definition at line 177 of file Constants.py.

◆ CONCURRENCY_QUEUE

string dc_processor.Constants.CONCURRENCY_QUEUE = "CONCURRENCY_QUEUE"

Definition at line 81 of file Constants.py.

◆ CONTENT

string dc_processor.Constants.CONTENT = "content"

Definition at line 38 of file Constants.py.

◆ CONTENT_HASH_ACTION_DELETE

int dc_processor.Constants.CONTENT_HASH_ACTION_DELETE = 1

Definition at line 58 of file Constants.py.

◆ CONTENT_HASH_ALGORITHM_BBHASH

int dc_processor.Constants.CONTENT_HASH_ALGORITHM_BBHASH = 6

Definition at line 54 of file Constants.py.

◆ CONTENT_HASH_ALGORITHM_CRC32

int dc_processor.Constants.CONTENT_HASH_ALGORITHM_CRC32 = 2

Definition at line 50 of file Constants.py.

◆ CONTENT_HASH_ALGORITHM_EMPTY

int dc_processor.Constants.CONTENT_HASH_ALGORITHM_EMPTY = 0

Definition at line 48 of file Constants.py.

◆ CONTENT_HASH_ALGORITHM_MD5

int dc_processor.Constants.CONTENT_HASH_ALGORITHM_MD5 = 1

Definition at line 49 of file Constants.py.

◆ CONTENT_HASH_ALGORITHM_MD5_WITHOUT_HTML

int dc_processor.Constants.CONTENT_HASH_ALGORITHM_MD5_WITHOUT_HTML = 9

Definition at line 57 of file Constants.py.

◆ CONTENT_HASH_ALGORITHM_MRSH_V2

int dc_processor.Constants.CONTENT_HASH_ALGORITHM_MRSH_V2 = 7

Definition at line 55 of file Constants.py.

◆ CONTENT_HASH_ALGORITHM_MVHASH_B

int dc_processor.Constants.CONTENT_HASH_ALGORITHM_MVHASH_B = 8

Definition at line 56 of file Constants.py.

◆ CONTENT_HASH_ALGORITHM_SDHASH

int dc_processor.Constants.CONTENT_HASH_ALGORITHM_SDHASH = 5

Definition at line 53 of file Constants.py.

◆ CONTENT_HASH_ALGORITHM_SHA1

int dc_processor.Constants.CONTENT_HASH_ALGORITHM_SHA1 = 4

Definition at line 52 of file Constants.py.

◆ CONTENT_HASH_ALGORITHM_SOUNDEX

int dc_processor.Constants.CONTENT_HASH_ALGORITHM_SOUNDEX = 3

Definition at line 51 of file Constants.py.

◆ DB_HOST

string dc_processor.Constants.DB_HOST = "db_host"

Definition at line 84 of file Constants.py.

◆ DB_PORT

string dc_processor.Constants.DB_PORT = "db_port"

Definition at line 85 of file Constants.py.

◆ DB_PWD

string dc_processor.Constants.DB_PWD = "db_pwd"

Definition at line 87 of file Constants.py.

◆ DB_SCRAPERS

string dc_processor.Constants.DB_SCRAPERS = "db_dc_scrapers"

Definition at line 90 of file Constants.py.

◆ DB_SECTION

string dc_processor.Constants.DB_SECTION = "mysql"

Definition at line 83 of file Constants.py.

◆ DB_SITES

string dc_processor.Constants.DB_SITES = "db_dc_sites"

Definition at line 88 of file Constants.py.

◆ DB_URLS

string dc_processor.Constants.DB_URLS = "db_dc_urls"

Definition at line 89 of file Constants.py.

◆ DB_USER

string dc_processor.Constants.DB_USER = "db_user"

Definition at line 86 of file Constants.py.

◆ DC_CONTENTS_DB_NAME

string dc_processor.Constants.DC_CONTENTS_DB_NAME = "db_dc_contents"

Definition at line 91 of file Constants.py.

◆ DEFAULT_COMPARATOR

string dc_processor.Constants.DEFAULT_COMPARATOR = ""

Definition at line 184 of file Constants.py.

◆ DEFAULT_METRIC_VALUE

int dc_processor.Constants.DEFAULT_METRIC_VALUE = 0

Definition at line 183 of file Constants.py.

◆ DEFAULT_TRESHOLD_VALUE

int dc_processor.Constants.DEFAULT_TRESHOLD_VALUE = 0

Definition at line 182 of file Constants.py.

◆ ERROR_OK

int dc_processor.Constants.ERROR_OK = 0

Definition at line 112 of file Constants.py.

◆ EXIT_FAILURE

int dc_processor.Constants.EXIT_FAILURE = 1

Definition at line 116 of file Constants.py.

◆ EXIT_SUCCESS

int dc_processor.Constants.EXIT_SUCCESS = 0

Definition at line 115 of file Constants.py.

◆ EXTRACTOR_NAME_ALCHEMY

string dc_processor.Constants.EXTRACTOR_NAME_ALCHEMY = "Alchemy extractor"

Definition at line 161 of file Constants.py.

◆ EXTRACTOR_NAME_BOILERPIPE

string dc_processor.Constants.EXTRACTOR_NAME_BOILERPIPE = "Boilerpipe extractor"

Definition at line 162 of file Constants.py.

◆ EXTRACTOR_NAME_ML

string dc_processor.Constants.EXTRACTOR_NAME_ML = "ML extractor"

Definition at line 160 of file Constants.py.

◆ GOOGLE_SEARCH_SITE_ID

string dc_processor.Constants.GOOGLE_SEARCH_SITE_ID = "google_search"

Definition at line 207 of file Constants.py.

◆ HTML_LANG

string dc_processor.Constants.HTML_LANG = "html_lang"

Definition at line 30 of file Constants.py.

◆ HTTP_REDIRECT_LINK_LINK_TAG_NAME

string dc_processor.Constants.HTTP_REDIRECT_LINK_LINK_TAG_NAME = 'link'

Definition at line 250 of file Constants.py.

◆ HTTP_REDIRECT_LINK_NAME

string dc_processor.Constants.HTTP_REDIRECT_LINK_NAME = "HTTP_REDIRECT_LINK"

Definition at line 244 of file Constants.py.

◆ HTTP_REDIRECT_LINK_VALUE_LOCATION

int dc_processor.Constants.HTTP_REDIRECT_LINK_VALUE_LOCATION = 2

Definition at line 247 of file Constants.py.

◆ HTTP_REDIRECT_LINK_VALUE_REDIRECT_URL

int dc_processor.Constants.HTTP_REDIRECT_LINK_VALUE_REDIRECT_URL = 3

Definition at line 248 of file Constants.py.

◆ HTTP_REDIRECT_LINK_VALUE_SOURCE_URL

int dc_processor.Constants.HTTP_REDIRECT_LINK_VALUE_SOURCE_URL = 4

Definition at line 249 of file Constants.py.

◆ HTTP_REDIRECT_LINK_VALUE_URL

int dc_processor.Constants.HTTP_REDIRECT_LINK_VALUE_URL = 1

Definition at line 246 of file Constants.py.

◆ LANG_PROP_NAME

string dc_processor.Constants.LANG_PROP_NAME = "SCRAPER_LANG_DETECT"

Definition at line 239 of file Constants.py.

◆ LOCATION_NAME

string dc_processor.Constants.LOCATION_NAME = "Location"

Definition at line 245 of file Constants.py.

◆ LOGGER_NAME

dc_processor.Constants.LOGGER_NAME = APP_CONSTS.LOGGER_NAME

Definition at line 11 of file Constants.py.

◆ MEDIA_LIMITS_NAME

string dc_processor.Constants.MEDIA_LIMITS_NAME = "MEDIA_LIMITS"

Definition at line 241 of file Constants.py.

◆ MODULES_KEY

string dc_processor.Constants.MODULES_KEY = "modules"

Definition at line 164 of file Constants.py.

◆ MSG_ERROR_ADJUST_PR

string dc_processor.Constants.MSG_ERROR_ADJUST_PR = "Error adjust partial references. "

Definition at line 106 of file Constants.py.

◆ MSG_ERROR_CALC_METRICS

string dc_processor.Constants.MSG_ERROR_CALC_METRICS = "Smth goes wrong. See traceback: "

Definition at line 108 of file Constants.py.

◆ MSG_ERROR_DYNAMIC_EXTRACTION

string dc_processor.Constants.MSG_ERROR_DYNAMIC_EXTRACTION = "Error dynamic extraction "

Definition at line 103 of file Constants.py.

◆ MSG_ERROR_LOAD_CONFIG

string dc_processor.Constants.MSG_ERROR_LOAD_CONFIG = "Error loading config file."

Definition at line 99 of file Constants.py.

◆ MSG_ERROR_LOAD_DB_BACKEND

string dc_processor.Constants.MSG_ERROR_LOAD_DB_BACKEND = "Error loading DB backend. "

Definition at line 98 of file Constants.py.

◆ MSG_ERROR_LOAD_EXTRACTORS

string dc_processor.Constants.MSG_ERROR_LOAD_EXTRACTORS = "Error load extractors "

Definition at line 101 of file Constants.py.

◆ MSG_ERROR_LOAD_LOG_CONFIG_FILE

string dc_processor.Constants.MSG_ERROR_LOAD_LOG_CONFIG_FILE = "Error loading logging config file."

Definition at line 100 of file Constants.py.

◆ MSG_ERROR_LOAD_OPTIONS

string dc_processor.Constants.MSG_ERROR_LOAD_OPTIONS = "Error load options"

Definition at line 104 of file Constants.py.

◆ MSG_ERROR_OK

string dc_processor.Constants.MSG_ERROR_OK = ""

Definition at line 97 of file Constants.py.

◆ MSG_ERROR_PROCESS

string dc_processor.Constants.MSG_ERROR_PROCESS = "Processor Storing Contents process batch error: "

Definition at line 107 of file Constants.py.

◆ MSG_ERROR_TEMPLATE_EXTRACTION

string dc_processor.Constants.MSG_ERROR_TEMPLATE_EXTRACTION = "Error template extraction "

Definition at line 102 of file Constants.py.

◆ MSG_INFO_PREPARE_CONTENT

string dc_processor.Constants.MSG_INFO_PREPARE_CONTENT = "Prepare content: "

Definition at line 105 of file Constants.py.

◆ MYSQL_ENGINE

string dc_processor.Constants.MYSQL_ENGINE = "mysql_engine"

Definition at line 94 of file Constants.py.

◆ OLD_CABINET_SEARCH_SITE_ID

string dc_processor.Constants.OLD_CABINET_SEARCH_SITE_ID = "1fe592caf03fd50c5f065c30f82b13bb"

Definition at line 211 of file Constants.py.

◆ OLD_GOOGLE_SEARCH_SITE_ID

string dc_processor.Constants.OLD_GOOGLE_SEARCH_SITE_ID = "d57f144e7b26c9976769ea94f18b9064"

Definition at line 210 of file Constants.py.

◆ PARENT_RSS_FEED

string dc_processor.Constants.PARENT_RSS_FEED = "parent_rss_feed"

Definition at line 31 of file Constants.py.

◆ PARENT_RSS_FEED_URLMD5

string dc_processor.Constants.PARENT_RSS_FEED_URLMD5 = "parent_rss_feed_urlMd5"

Definition at line 32 of file Constants.py.

◆ PARENT_URL_MD5

string dc_processor.Constants.PARENT_URL_MD5 = ""

Definition at line 59 of file Constants.py.

◆ PDATE_DAY_MONTH_ORDER_NAME

string dc_processor.Constants.PDATE_DAY_MONTH_ORDER_NAME = "PDATE_DAY_MONTH_ORDER"

Definition at line 237 of file Constants.py.

◆ PDATE_TIMEZONES_NAME

string dc_processor.Constants.PDATE_TIMEZONES_NAME = "PDATE_TIMEZONES"

Definition at line 236 of file Constants.py.

◆ PROCESS_ALGORITHM_ALCHEMY

string dc_processor.Constants.PROCESS_ALGORITHM_ALCHEMY = "ALCHEMY"

Definition at line 72 of file Constants.py.

◆ PROCESS_ALGORITHM_BOILERPIPE

string dc_processor.Constants.PROCESS_ALGORITHM_BOILERPIPE = "BOILERPIPE"

Definition at line 73 of file Constants.py.

◆ PROCESS_ALGORITHM_CONCURRENCY

string dc_processor.Constants.PROCESS_ALGORITHM_CONCURRENCY = "concurrency"

Definition at line 69 of file Constants.py.

◆ PROCESS_ALGORITHM_FEED_PARSER

string dc_processor.Constants.PROCESS_ALGORITHM_FEED_PARSER = "feed_parser"

Definition at line 71 of file Constants.py.

◆ PROCESS_ALGORITHM_GOOSE

string dc_processor.Constants.PROCESS_ALGORITHM_GOOSE = "GOOSE"

Definition at line 75 of file Constants.py.

◆ PROCESS_ALGORITHM_METRIC

string dc_processor.Constants.PROCESS_ALGORITHM_METRIC = "metric_based"

Definition at line 70 of file Constants.py.

◆ PROCESS_ALGORITHM_ML

string dc_processor.Constants.PROCESS_ALGORITHM_ML = "ML"

Definition at line 77 of file Constants.py.

◆ PROCESS_ALGORITHM_NEWSPAPER

string dc_processor.Constants.PROCESS_ALGORITHM_NEWSPAPER = "NEWSPAPER"

Definition at line 74 of file Constants.py.

◆ PROCESS_ALGORITHM_PREDICTION

string dc_processor.Constants.PROCESS_ALGORITHM_PREDICTION = "prediction"

Definition at line 68 of file Constants.py.

◆ PROCESS_ALGORITHM_REGULAR

string dc_processor.Constants.PROCESS_ALGORITHM_REGULAR = "regular"

Definition at line 66 of file Constants.py.

◆ PROCESS_ALGORITHM_SCRAPY

string dc_processor.Constants.PROCESS_ALGORITHM_SCRAPY = "SCRAPY"

Definition at line 76 of file Constants.py.

◆ PROCESS_ALGORITHM_TRAINING

string dc_processor.Constants.PROCESS_ALGORITHM_TRAINING = "training"

Definition at line 67 of file Constants.py.

◆ PROCESSOR_EMPTY

string dc_processor.Constants.PROCESSOR_EMPTY = ""

Definition at line 127 of file Constants.py.

◆ PROCESSOR_FEED_PARSER

string dc_processor.Constants.PROCESSOR_FEED_PARSER = "FEED_PARSER"

Definition at line 137 of file Constants.py.

◆ PROCESSOR_FEED_PARSER_BINARY

string dc_processor.Constants.PROCESSOR_FEED_PARSER_BINARY = "./processor_feed_parser.py"

Definition at line 146 of file Constants.py.

◆ PROCESSOR_FEED_PARSER_CFG

string dc_processor.Constants.PROCESSOR_FEED_PARSER_CFG = "--config=../ini/processor_feed_parser.ini"

Definition at line 147 of file Constants.py.

◆ PROCESSOR_FEED_PARSER_CLASS_CFG

string dc_processor.Constants.PROCESSOR_FEED_PARSER_CLASS_CFG = "../ini/processor_feed_parser.ini"

Definition at line 220 of file Constants.py.

◆ PROCESSOR_FEED_PARSER_CLASS_NAME

string dc_processor.Constants.PROCESSOR_FEED_PARSER_CLASS_NAME = "ProcessorFeedParser"

Definition at line 219 of file Constants.py.

◆ PROCESSOR_RSS

string dc_processor.Constants.PROCESSOR_RSS = "RSS"

Definition at line 138 of file Constants.py.

◆ PROCESSOR_SCRAPER_CUSTOM

string dc_processor.Constants.PROCESSOR_SCRAPER_CUSTOM = "SCRAPER_CUSTOM"

Definition at line 155 of file Constants.py.

◆ PROCESSOR_SCRAPER_MULTI_ITEMS

string dc_processor.Constants.PROCESSOR_SCRAPER_MULTI_ITEMS = "SCRAPER_MULTI_ITEMS"

Definition at line 150 of file Constants.py.

◆ PROCESSOR_STORE

string dc_processor.Constants.PROCESSOR_STORE = "STORE"

Definition at line 132 of file Constants.py.

◆ PROPERTIES_KEY

string dc_processor.Constants.PROPERTIES_KEY = "properties"

Definition at line 167 of file Constants.py.

◆ PUBLISHED

string dc_processor.Constants.PUBLISHED = "published"

Definition at line 37 of file Constants.py.

◆ PYTHON_BINARY

string dc_processor.Constants.PYTHON_BINARY = "/usr/bin/python"

Definition at line 124 of file Constants.py.

◆ RANK_KEY

string dc_processor.Constants.RANK_KEY = "rank"

Definition at line 169 of file Constants.py.

◆ RECRAWL_KEY

string dc_processor.Constants.RECRAWL_KEY = "recrawl"

Definition at line 143 of file Constants.py.

◆ RECRAWL_VALUE_NO

int dc_processor.Constants.RECRAWL_VALUE_NO = 0

Definition at line 144 of file Constants.py.

◆ REDIRECT_URL_NAME

string dc_processor.Constants.REDIRECT_URL_NAME = 'redirect_url'

Definition at line 251 of file Constants.py.

◆ REPROCESS_KEY

string dc_processor.Constants.REPROCESS_KEY = "reprocess"

Definition at line 141 of file Constants.py.

◆ REPROCESS_VALUE_NO

int dc_processor.Constants.REPROCESS_VALUE_NO = 0

Definition at line 142 of file Constants.py.

◆ SCRAPER_APP_CLASS_CFG

string dc_processor.Constants.SCRAPER_APP_CLASS_CFG = "../ini/scraper.ini"

Definition at line 216 of file Constants.py.

◆ SCRAPER_APP_CLASS_NAME

string dc_processor.Constants.SCRAPER_APP_CLASS_NAME = "Scraper"

Definition at line 215 of file Constants.py.

◆ SCRAPER_BINARY

string dc_processor.Constants.SCRAPER_BINARY = "./scraper.py"

Definition at line 128 of file Constants.py.

◆ SCRAPER_CFG

string dc_processor.Constants.SCRAPER_CFG = "--config=../ini/scraper.ini"

Definition at line 129 of file Constants.py.

◆ SCRAPER_CUSTOM_BINARY

string dc_processor.Constants.SCRAPER_CUSTOM_BINARY = "./scraper_custom_task.py"

Definition at line 156 of file Constants.py.

◆ SCRAPER_CUSTOM_CFG

string dc_processor.Constants.SCRAPER_CUSTOM_CFG = "--config=../ini/scraper_custom_task.ini"

Definition at line 157 of file Constants.py.

◆ SCRAPER_CUSTOM_JSON_APP_CLASS_CFG

string dc_processor.Constants.SCRAPER_CUSTOM_JSON_APP_CLASS_CFG = "../ini/scraper_custom_task.ini"

Definition at line 226 of file Constants.py.

◆ SCRAPER_CUSTOM_JSON_APP_CLASS_NAME

string dc_processor.Constants.SCRAPER_CUSTOM_JSON_APP_CLASS_NAME = "ScraperCustomJson"

Definition at line 225 of file Constants.py.

◆ SCRAPER_MULTI_ITEMS_APP_CLASS_CFG

string dc_processor.Constants.SCRAPER_MULTI_ITEMS_APP_CLASS_CFG = "../ini/scraper_multi_items_task.ini"

Definition at line 223 of file Constants.py.

◆ SCRAPER_MULTI_ITEMS_APP_CLASS_NAME

string dc_processor.Constants.SCRAPER_MULTI_ITEMS_APP_CLASS_NAME = "ScraperMultiItemsTask"

Definition at line 222 of file Constants.py.

◆ SCRAPER_MULTI_ITEMS_BINARY

string dc_processor.Constants.SCRAPER_MULTI_ITEMS_BINARY = "./scraper_multi_items_task.py"

Definition at line 151 of file Constants.py.

◆ SCRAPER_MULTI_ITEMS_CFG

string dc_processor.Constants.SCRAPER_MULTI_ITEMS_CFG = "--config=../ini/scraper_multi_items_task.ini"

Definition at line 152 of file Constants.py.

◆ SCRAPER_RANK_INIT

int dc_processor.Constants.SCRAPER_RANK_INIT = 10

Definition at line 172 of file Constants.py.

◆ SENTENCES_COMPARATOR

string dc_processor.Constants.SENTENCES_COMPARATOR = "round"

Definition at line 196 of file Constants.py.

◆ SENTENCES_TRESHOLD_VALUE

int dc_processor.Constants.SENTENCES_TRESHOLD_VALUE = 5

Definition at line 194 of file Constants.py.

◆ SQL_TMP_TABLE

string dc_processor.Constants.SQL_TMP_TABLE = "metrics"

Definition at line 92 of file Constants.py.

◆ SQLITE_TIMEOUT

int dc_processor.Constants.SQLITE_TIMEOUT = 30

Definition at line 119 of file Constants.py.

◆ STORE_APP_CLASS_CFG

string dc_processor.Constants.STORE_APP_CLASS_CFG = "../ini/processor-store-content-in-kvdb.ini"

Definition at line 218 of file Constants.py.

◆ STORE_APP_CLASS_NAME

string dc_processor.Constants.STORE_APP_CLASS_NAME = "???"

Definition at line 217 of file Constants.py.

◆ STORE_PROCESSOR_BINARY

string dc_processor.Constants.STORE_PROCESSOR_BINARY = "./processor_store_content_kvdb.py"

Definition at line 133 of file Constants.py.

◆ STORE_PROCESSOR_CFG

string dc_processor.Constants.STORE_PROCESSOR_CFG = "--config=../ini/processor-store-content-in-kvdb.ini"

Definition at line 134 of file Constants.py.

◆ SUMMARY

string dc_processor.Constants.SUMMARY = "summary"

Definition at line 33 of file Constants.py.

◆ SUMMARY_DETAIL

string dc_processor.Constants.SUMMARY_DETAIL = "summary_detail"

Definition at line 34 of file Constants.py.

◆ TAG_AUTHOR

string dc_processor.Constants.TAG_AUTHOR = "author"

Definition at line 20 of file Constants.py.

◆ TAG_CLOSE_VOID_PROP_NAME

string dc_processor.Constants.TAG_CLOSE_VOID_PROP_NAME = "CLOSE_VOID"

Definition at line 232 of file Constants.py.

◆ TAG_CONTENT_UTF8_ENCODED

string dc_processor.Constants.TAG_CONTENT_UTF8_ENCODED = "content_encoded"

Definition at line 22 of file Constants.py.

◆ TAG_DC_DATE

string dc_processor.Constants.TAG_DC_DATE = "dc_date"

Definition at line 19 of file Constants.py.

◆ TAG_DESCRIPTION

string dc_processor.Constants.TAG_DESCRIPTION = "description"

Definition at line 17 of file Constants.py.

◆ TAG_ENCLOSURE

string dc_processor.Constants.TAG_ENCLOSURE = "enclosure"

Definition at line 26 of file Constants.py.

◆ TAG_FEED_URL

string dc_processor.Constants.TAG_FEED_URL = "feed_url"

Definition at line 43 of file Constants.py.

◆ TAG_GOOGLE

string dc_processor.Constants.TAG_GOOGLE = "google_search"

Definition at line 27 of file Constants.py.

◆ TAG_GOOGLE_TOTAL

string dc_processor.Constants.TAG_GOOGLE_TOTAL = "google_search_total"

Definition at line 28 of file Constants.py.

◆ TAG_GUID

string dc_processor.Constants.TAG_GUID = "guid"

Definition at line 21 of file Constants.py.

◆ TAG_KEEP_ATTRIBUTES_PROP_NAME

string dc_processor.Constants.TAG_KEEP_ATTRIBUTES_PROP_NAME = "SCRAPER_KEEP_ATTRIBUTES"

Definition at line 231 of file Constants.py.

◆ TAG_KEYWORDS

string dc_processor.Constants.TAG_KEYWORDS = "keywords"

Definition at line 23 of file Constants.py.

◆ TAG_LINK

string dc_processor.Constants.TAG_LINK = "link"

Definition at line 16 of file Constants.py.

◆ TAG_MARKUP_PROP_NAME

string dc_processor.Constants.TAG_MARKUP_PROP_NAME = "SCRAPER_TEXT_MARKUP"

Definition at line 230 of file Constants.py.

◆ TAG_MEDIA

string dc_processor.Constants.TAG_MEDIA = "media"

Definition at line 14 of file Constants.py.

◆ TAG_MEDIA_CONTENT

string dc_processor.Constants.TAG_MEDIA_CONTENT = "media_content"

Definition at line 25 of file Constants.py.

◆ TAG_MEDIA_THUMBNAIL

string dc_processor.Constants.TAG_MEDIA_THUMBNAIL = "media_thumbnail"

Definition at line 24 of file Constants.py.

◆ TAG_ORDER_NUMBER

string dc_processor.Constants.TAG_ORDER_NUMBER = "order_number"

Definition at line 41 of file Constants.py.

◆ TAG_PUB_DATE

string dc_processor.Constants.TAG_PUB_DATE = "pubdate"

Definition at line 18 of file Constants.py.

◆ TAG_PUBDATE_TZ

string dc_processor.Constants.TAG_PUBDATE_TZ = 'pubdate_tz'

Definition at line 46 of file Constants.py.

◆ TAG_REDUCE_MASK_PROP_NAME

string dc_processor.Constants.TAG_REDUCE_MASK_PROP_NAME = "SCRAPER_TEXT_REDUCER_MASK"

Definition at line 228 of file Constants.py.

◆ TAG_REDUCE_PROP_NAME

string dc_processor.Constants.TAG_REDUCE_PROP_NAME = "SCRAPER_TEXT_REDUCER"

Definition at line 229 of file Constants.py.

◆ TAG_SOURCE_URL

string dc_processor.Constants.TAG_SOURCE_URL = "source_url"

Definition at line 42 of file Constants.py.

◆ TAG_SUMMARY_LANG

string dc_processor.Constants.TAG_SUMMARY_LANG = "summary_lang"

Definition at line 29 of file Constants.py.

◆ TAG_TITLE

string dc_processor.Constants.TAG_TITLE = "title"

Definition at line 15 of file Constants.py.

◆ TAG_TYPE_DATETIME

string dc_processor.Constants.TAG_TYPE_DATETIME = 'datetime'

Definition at line 45 of file Constants.py.

◆ TAGS

string dc_processor.Constants.TAGS = "tags"

Definition at line 36 of file Constants.py.

◆ TAGS_RULES_MASK_DEFAULT_VALUE

int dc_processor.Constants.TAGS_RULES_MASK_DEFAULT_VALUE = 4

Definition at line 61 of file Constants.py.

◆ TAGS_RULES_MASK_MANDATORY_FIELD

int dc_processor.Constants.TAGS_RULES_MASK_MANDATORY_FIELD = 1

Definition at line 63 of file Constants.py.

◆ TAGS_RULES_MASK_RULE_PRIORITY

int dc_processor.Constants.TAGS_RULES_MASK_RULE_PRIORITY = 2

Definition at line 62 of file Constants.py.

◆ TAGS_TYPES_NAME

string dc_processor.Constants.TAGS_TYPES_NAME = "TAGS_TYPES"

Definition at line 234 of file Constants.py.

◆ TEMPLATE_CONDITION_TYPE_URL

int dc_processor.Constants.TEMPLATE_CONDITION_TYPE_URL = 0

Definition at line 254 of file Constants.py.

◆ TEMPLATE_KEY

string dc_processor.Constants.TEMPLATE_KEY = "template"

Definition at line 168 of file Constants.py.

◆ TIME_EXECUTION_LIMIT

int dc_processor.Constants.TIME_EXECUTION_LIMIT = 20

Definition at line 122 of file Constants.py.

◆ TIMEZONE_LIST

list dc_processor.Constants.TIMEZONE_LIST = ["JST"]

Definition at line 176 of file Constants.py.

◆ TRAINED_QUEUE

string dc_processor.Constants.TRAINED_QUEUE = "TRAINED_QUEUE"

Definition at line 80 of file Constants.py.

◆ TRAINING_QUEUE

string dc_processor.Constants.TRAINING_QUEUE = "TRAINING_QUEUE"

Definition at line 79 of file Constants.py.

◆ UPDATED

string dc_processor.Constants.UPDATED = "updated"

Definition at line 39 of file Constants.py.

◆ UPDATED_PARSED

string dc_processor.Constants.UPDATED_PARSED = "updated_parsed"

Definition at line 40 of file Constants.py.

◆ USE_HTML5_KEY

string dc_processor.Constants.USE_HTML5_KEY = "html5"

Definition at line 170 of file Constants.py.

◆ USE_HTML5_NO

int dc_processor.Constants.USE_HTML5_NO = 0

Definition at line 174 of file Constants.py.

◆ USE_HTML5_YES

int dc_processor.Constants.USE_HTML5_YES = 1

Definition at line 173 of file Constants.py.

◆ WORDS_COMPARATOR

string dc_processor.Constants.WORDS_COMPARATOR = "round"

Definition at line 190 of file Constants.py.

◆ WORDS_TRESHOLD_VALUE

int dc_processor.Constants.WORDS_TRESHOLD_VALUE = 100

Definition at line 188 of file Constants.py.