HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.
2.0.0-chaika
Hierarchical Cluster Engine Python language binding
|
Classes | |
class | HTML5_SEMANTIC_TAGS |
Variables | |
LOGGER_NAME = APP_CONSTS.LOGGER_NAME | |
string | TAG_MEDIA = "media" |
string | TAG_TITLE = "title" |
string | TAG_LINK = "link" |
string | TAG_DESCRIPTION = "description" |
string | TAG_PUB_DATE = "pubdate" |
string | TAG_DC_DATE = "dc_date" |
string | TAG_AUTHOR = "author" |
string | TAG_GUID = "guid" |
string | TAG_CONTENT_UTF8_ENCODED = "content_encoded" |
string | TAG_KEYWORDS = "keywords" |
string | TAG_MEDIA_THUMBNAIL = "media_thumbnail" |
string | TAG_MEDIA_CONTENT = "media_content" |
string | TAG_ENCLOSURE = "enclosure" |
string | TAG_GOOGLE = "google_search" |
string | TAG_GOOGLE_TOTAL = "google_search_total" |
string | TAG_SUMMARY_LANG = "summary_lang" |
string | HTML_LANG = "html_lang" |
string | PARENT_RSS_FEED = "parent_rss_feed" |
string | PARENT_RSS_FEED_URLMD5 = "parent_rss_feed_urlMd5" |
string | SUMMARY = "summary" |
string | SUMMARY_DETAIL = "summary_detail" |
string | COMMENTNS = "comments" |
string | TAGS = "tags" |
string | PUBLISHED = "published" |
string | CONTENT = "content" |
string | UPDATED = "updated" |
string | UPDATED_PARSED = "updated_parsed" |
string | TAG_ORDER_NUMBER = "order_number" |
string | TAG_SOURCE_URL = "source_url" |
string | TAG_FEED_URL = "feed_url" |
string | TAG_TYPE_DATETIME = 'datetime' |
string | TAG_PUBDATE_TZ = 'pubdate_tz' |
int | CONTENT_HASH_ALGORITHM_EMPTY = 0 |
int | CONTENT_HASH_ALGORITHM_MD5 = 1 |
int | CONTENT_HASH_ALGORITHM_CRC32 = 2 |
int | CONTENT_HASH_ALGORITHM_SOUNDEX = 3 |
int | CONTENT_HASH_ALGORITHM_SHA1 = 4 |
int | CONTENT_HASH_ALGORITHM_SDHASH = 5 |
int | CONTENT_HASH_ALGORITHM_BBHASH = 6 |
int | CONTENT_HASH_ALGORITHM_MRSH_V2 = 7 |
int | CONTENT_HASH_ALGORITHM_MVHASH_B = 8 |
int | CONTENT_HASH_ALGORITHM_MD5_WITHOUT_HTML = 9 |
int | CONTENT_HASH_ACTION_DELETE = 1 |
string | PARENT_URL_MD5 = "" |
int | TAGS_RULES_MASK_DEFAULT_VALUE = 4 |
int | TAGS_RULES_MASK_RULE_PRIORITY = 2 |
int | TAGS_RULES_MASK_MANDATORY_FIELD = 1 |
string | PROCESS_ALGORITHM_REGULAR = "regular" |
string | PROCESS_ALGORITHM_TRAINING = "training" |
string | PROCESS_ALGORITHM_PREDICTION = "prediction" |
string | PROCESS_ALGORITHM_CONCURRENCY = "concurrency" |
string | PROCESS_ALGORITHM_METRIC = "metric_based" |
string | PROCESS_ALGORITHM_FEED_PARSER = "feed_parser" |
string | PROCESS_ALGORITHM_ALCHEMY = "ALCHEMY" |
string | PROCESS_ALGORITHM_BOILERPIPE = "BOILERPIPE" |
string | PROCESS_ALGORITHM_NEWSPAPER = "NEWSPAPER" |
string | PROCESS_ALGORITHM_GOOSE = "GOOSE" |
string | PROCESS_ALGORITHM_SCRAPY = "SCRAPY" |
string | PROCESS_ALGORITHM_ML = "ML" |
string | TRAINING_QUEUE = "TRAINING_QUEUE" |
string | TRAINED_QUEUE = "TRAINED_QUEUE" |
string | CONCURRENCY_QUEUE = "CONCURRENCY_QUEUE" |
string | DB_SECTION = "mysql" |
string | DB_HOST = "db_host" |
string | DB_PORT = "db_port" |
string | DB_USER = "db_user" |
string | DB_PWD = "db_pwd" |
string | DB_SITES = "db_dc_sites" |
string | DB_URLS = "db_dc_urls" |
string | DB_SCRAPERS = "db_dc_scrapers" |
string | DC_CONTENTS_DB_NAME = "db_dc_contents" |
string | SQL_TMP_TABLE = "metrics" |
string | MYSQL_ENGINE = "mysql_engine" |
string | MSG_ERROR_OK = "" |
string | MSG_ERROR_LOAD_DB_BACKEND = "Error loading DB backend. " |
string | MSG_ERROR_LOAD_CONFIG = "Error loading config file." |
string | MSG_ERROR_LOAD_LOG_CONFIG_FILE = "Error loading logging config file." |
string | MSG_ERROR_LOAD_EXTRACTORS = "Error load extractors " |
string | MSG_ERROR_TEMPLATE_EXTRACTION = "Error template extraction " |
string | MSG_ERROR_DYNAMIC_EXTRACTION = "Error dynamic extraction " |
string | MSG_ERROR_LOAD_OPTIONS = "Error load options" |
string | MSG_INFO_PREPARE_CONTENT = "Prepare content: " |
string | MSG_ERROR_ADJUST_PR = "Error adjust partial references. " |
string | MSG_ERROR_PROCESS = "Processor Storing Contents process batch error: " |
string | MSG_ERROR_CALC_METRICS = "Smth goes wrong. See traceback: " |
int | ERROR_OK = 0 |
int | EXIT_SUCCESS = 0 |
int | EXIT_FAILURE = 1 |
int | SQLITE_TIMEOUT = 30 |
int | TIME_EXECUTION_LIMIT = 20 |
string | PYTHON_BINARY = "/usr/bin/python" |
string | PROCESSOR_EMPTY = "" |
string | SCRAPER_BINARY = "./scraper.py" |
string | SCRAPER_CFG = "--config=../ini/scraper.ini" |
string | PROCESSOR_STORE = "STORE" |
string | STORE_PROCESSOR_BINARY = "./processor_store_content_kvdb.py" |
string | STORE_PROCESSOR_CFG = "--config=../ini/processor-store-content-in-kvdb.ini" |
string | PROCESSOR_FEED_PARSER = "FEED_PARSER" |
string | PROCESSOR_RSS = "RSS" |
string | REPROCESS_KEY = "reprocess" |
int | REPROCESS_VALUE_NO = 0 |
string | RECRAWL_KEY = "recrawl" |
int | RECRAWL_VALUE_NO = 0 |
string | PROCESSOR_FEED_PARSER_BINARY = "./processor_feed_parser.py" |
string | PROCESSOR_FEED_PARSER_CFG = "--config=../ini/processor_feed_parser.ini" |
string | PROCESSOR_SCRAPER_MULTI_ITEMS = "SCRAPER_MULTI_ITEMS" |
string | SCRAPER_MULTI_ITEMS_BINARY = "./scraper_multi_items_task.py" |
string | SCRAPER_MULTI_ITEMS_CFG = "--config=../ini/scraper_multi_items_task.ini" |
string | PROCESSOR_SCRAPER_CUSTOM = "SCRAPER_CUSTOM" |
string | SCRAPER_CUSTOM_BINARY = "./scraper_custom_task.py" |
string | SCRAPER_CUSTOM_CFG = "--config=../ini/scraper_custom_task.ini" |
string | EXTRACTOR_NAME_ML = "ML extractor" |
string | EXTRACTOR_NAME_ALCHEMY = "Alchemy extractor" |
string | EXTRACTOR_NAME_BOILERPIPE = "Boilerpipe extractor" |
string | MODULES_KEY = "modules" |
string | ALGORITHM_KEY = "algorithm" |
string | ALGORITHM_NAME_KEY = "algorithm_name" |
string | PROPERTIES_KEY = "properties" |
string | TEMPLATE_KEY = "template" |
string | RANK_KEY = "rank" |
string | USE_HTML5_KEY = "html5" |
int | SCRAPER_RANK_INIT = 10 |
int | USE_HTML5_YES = 1 |
int | USE_HTML5_NO = 0 |
list | TIMEZONE_LIST = ["JST"] |
string | COMMON_DATE_FORMAT = '%Y-%m-%d %H:%M:%S' |
int | DEFAULT_TRESHOLD_VALUE = 0 |
int | DEFAULT_METRIC_VALUE = 0 |
string | DEFAULT_COMPARATOR = "" |
int | WORDS_TRESHOLD_VALUE = 100 |
string | WORDS_COMPARATOR = "round" |
int | SENTENCES_TRESHOLD_VALUE = 5 |
string | SENTENCES_COMPARATOR = "round" |
int | ARI_TRESHOLD_VALUE = 1 |
string | ARI_COMPARATOR = "round" |
string | ARTICLE_CORPUS = "content_encoded" |
string | GOOGLE_SEARCH_SITE_ID = "google_search" |
string | CABINET_SEARCH_SITE_ID = "cabinet_search" |
string | OLD_GOOGLE_SEARCH_SITE_ID = "d57f144e7b26c9976769ea94f18b9064" |
string | OLD_CABINET_SEARCH_SITE_ID = "1fe592caf03fd50c5f065c30f82b13bb" |
string | SCRAPER_APP_CLASS_NAME = "Scraper" |
string | SCRAPER_APP_CLASS_CFG = "../ini/scraper.ini" |
string | STORE_APP_CLASS_NAME = "???" |
string | STORE_APP_CLASS_CFG = "../ini/processor-store-content-in-kvdb.ini" |
string | PROCESSOR_FEED_PARSER_CLASS_NAME = "ProcessorFeedParser" |
string | PROCESSOR_FEED_PARSER_CLASS_CFG = "../ini/processor_feed_parser.ini" |
string | SCRAPER_MULTI_ITEMS_APP_CLASS_NAME = "ScraperMultiItemsTask" |
string | SCRAPER_MULTI_ITEMS_APP_CLASS_CFG = "../ini/scraper_multi_items_task.ini" |
string | SCRAPER_CUSTOM_JSON_APP_CLASS_NAME = "ScraperCustomJson" |
string | SCRAPER_CUSTOM_JSON_APP_CLASS_CFG = "../ini/scraper_custom_task.ini" |
string | TAG_REDUCE_MASK_PROP_NAME = "SCRAPER_TEXT_REDUCER_MASK" |
string | TAG_REDUCE_PROP_NAME = "SCRAPER_TEXT_REDUCER" |
string | TAG_MARKUP_PROP_NAME = "SCRAPER_TEXT_MARKUP" |
string | TAG_KEEP_ATTRIBUTES_PROP_NAME = "SCRAPER_KEEP_ATTRIBUTES" |
string | TAG_CLOSE_VOID_PROP_NAME = "CLOSE_VOID" |
string | TAGS_TYPES_NAME = "TAGS_TYPES" |
string | PDATE_TIMEZONES_NAME = "PDATE_TIMEZONES" |
string | PDATE_DAY_MONTH_ORDER_NAME = "PDATE_DAY_MONTH_ORDER" |
string | LANG_PROP_NAME = "SCRAPER_LANG_DETECT" |
string | MEDIA_LIMITS_NAME = "MEDIA_LIMITS" |
string | HTTP_REDIRECT_LINK_NAME = "HTTP_REDIRECT_LINK" |
string | LOCATION_NAME = "Location" |
int | HTTP_REDIRECT_LINK_VALUE_URL = 1 |
int | HTTP_REDIRECT_LINK_VALUE_LOCATION = 2 |
int | HTTP_REDIRECT_LINK_VALUE_REDIRECT_URL = 3 |
int | HTTP_REDIRECT_LINK_VALUE_SOURCE_URL = 4 |
string | HTTP_REDIRECT_LINK_LINK_TAG_NAME = 'link' |
string | REDIRECT_URL_NAME = 'redirect_url' |
int | TEMPLATE_CONDITION_TYPE_URL = 0 |
@package docstring @file Constants.py @author Alexey, bgv <developers.hce@gmail.com>, Alexander Vybornyh <alexander.hce.cluster@gmail.com> @link http://hierarchical-cluster-engine.com/ @copyright Copyright © 2013-2015 IOIX Ukraine @license http://hierarchical-cluster-engine.com/license/ @package HCE project node API @since 0.1
string dc_processor.Constants.ALGORITHM_KEY = "algorithm" |
Definition at line 165 of file Constants.py.
string dc_processor.Constants.ALGORITHM_NAME_KEY = "algorithm_name" |
Definition at line 166 of file Constants.py.
string dc_processor.Constants.ARI_COMPARATOR = "round" |
Definition at line 201 of file Constants.py.
int dc_processor.Constants.ARI_TRESHOLD_VALUE = 1 |
Definition at line 199 of file Constants.py.
string dc_processor.Constants.ARTICLE_CORPUS = "content_encoded" |
Definition at line 203 of file Constants.py.
string dc_processor.Constants.CABINET_SEARCH_SITE_ID = "cabinet_search" |
Definition at line 208 of file Constants.py.
string dc_processor.Constants.COMMENTNS = "comments" |
Definition at line 35 of file Constants.py.
string dc_processor.Constants.COMMON_DATE_FORMAT = '%Y-%m-%d %H:%M:%S' |
Definition at line 177 of file Constants.py.
string dc_processor.Constants.CONCURRENCY_QUEUE = "CONCURRENCY_QUEUE" |
Definition at line 81 of file Constants.py.
string dc_processor.Constants.CONTENT = "content" |
Definition at line 38 of file Constants.py.
int dc_processor.Constants.CONTENT_HASH_ACTION_DELETE = 1 |
Definition at line 58 of file Constants.py.
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_BBHASH = 6 |
Definition at line 54 of file Constants.py.
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_CRC32 = 2 |
Definition at line 50 of file Constants.py.
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_EMPTY = 0 |
Definition at line 48 of file Constants.py.
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_MD5 = 1 |
Definition at line 49 of file Constants.py.
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_MD5_WITHOUT_HTML = 9 |
Definition at line 57 of file Constants.py.
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_MRSH_V2 = 7 |
Definition at line 55 of file Constants.py.
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_MVHASH_B = 8 |
Definition at line 56 of file Constants.py.
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_SDHASH = 5 |
Definition at line 53 of file Constants.py.
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_SHA1 = 4 |
Definition at line 52 of file Constants.py.
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_SOUNDEX = 3 |
Definition at line 51 of file Constants.py.
string dc_processor.Constants.DB_HOST = "db_host" |
Definition at line 84 of file Constants.py.
string dc_processor.Constants.DB_PORT = "db_port" |
Definition at line 85 of file Constants.py.
string dc_processor.Constants.DB_PWD = "db_pwd" |
Definition at line 87 of file Constants.py.
string dc_processor.Constants.DB_SCRAPERS = "db_dc_scrapers" |
Definition at line 90 of file Constants.py.
string dc_processor.Constants.DB_SECTION = "mysql" |
Definition at line 83 of file Constants.py.
string dc_processor.Constants.DB_SITES = "db_dc_sites" |
Definition at line 88 of file Constants.py.
string dc_processor.Constants.DB_URLS = "db_dc_urls" |
Definition at line 89 of file Constants.py.
string dc_processor.Constants.DB_USER = "db_user" |
Definition at line 86 of file Constants.py.
string dc_processor.Constants.DC_CONTENTS_DB_NAME = "db_dc_contents" |
Definition at line 91 of file Constants.py.
string dc_processor.Constants.DEFAULT_COMPARATOR = "" |
Definition at line 184 of file Constants.py.
int dc_processor.Constants.DEFAULT_METRIC_VALUE = 0 |
Definition at line 183 of file Constants.py.
int dc_processor.Constants.DEFAULT_TRESHOLD_VALUE = 0 |
Definition at line 182 of file Constants.py.
int dc_processor.Constants.ERROR_OK = 0 |
Definition at line 112 of file Constants.py.
int dc_processor.Constants.EXIT_FAILURE = 1 |
Definition at line 116 of file Constants.py.
int dc_processor.Constants.EXIT_SUCCESS = 0 |
Definition at line 115 of file Constants.py.
string dc_processor.Constants.EXTRACTOR_NAME_ALCHEMY = "Alchemy extractor" |
Definition at line 161 of file Constants.py.
string dc_processor.Constants.EXTRACTOR_NAME_BOILERPIPE = "Boilerpipe extractor" |
Definition at line 162 of file Constants.py.
string dc_processor.Constants.EXTRACTOR_NAME_ML = "ML extractor" |
Definition at line 160 of file Constants.py.
string dc_processor.Constants.GOOGLE_SEARCH_SITE_ID = "google_search" |
Definition at line 207 of file Constants.py.
string dc_processor.Constants.HTML_LANG = "html_lang" |
Definition at line 30 of file Constants.py.
string dc_processor.Constants.HTTP_REDIRECT_LINK_LINK_TAG_NAME = 'link' |
Definition at line 250 of file Constants.py.
string dc_processor.Constants.HTTP_REDIRECT_LINK_NAME = "HTTP_REDIRECT_LINK" |
Definition at line 244 of file Constants.py.
int dc_processor.Constants.HTTP_REDIRECT_LINK_VALUE_LOCATION = 2 |
Definition at line 247 of file Constants.py.
int dc_processor.Constants.HTTP_REDIRECT_LINK_VALUE_REDIRECT_URL = 3 |
Definition at line 248 of file Constants.py.
int dc_processor.Constants.HTTP_REDIRECT_LINK_VALUE_SOURCE_URL = 4 |
Definition at line 249 of file Constants.py.
int dc_processor.Constants.HTTP_REDIRECT_LINK_VALUE_URL = 1 |
Definition at line 246 of file Constants.py.
string dc_processor.Constants.LANG_PROP_NAME = "SCRAPER_LANG_DETECT" |
Definition at line 239 of file Constants.py.
string dc_processor.Constants.LOCATION_NAME = "Location" |
Definition at line 245 of file Constants.py.
dc_processor.Constants.LOGGER_NAME = APP_CONSTS.LOGGER_NAME |
Definition at line 11 of file Constants.py.
string dc_processor.Constants.MEDIA_LIMITS_NAME = "MEDIA_LIMITS" |
Definition at line 241 of file Constants.py.
string dc_processor.Constants.MODULES_KEY = "modules" |
Definition at line 164 of file Constants.py.
string dc_processor.Constants.MSG_ERROR_ADJUST_PR = "Error adjust partial references. " |
Definition at line 106 of file Constants.py.
string dc_processor.Constants.MSG_ERROR_CALC_METRICS = "Smth goes wrong. See traceback: " |
Definition at line 108 of file Constants.py.
string dc_processor.Constants.MSG_ERROR_DYNAMIC_EXTRACTION = "Error dynamic extraction " |
Definition at line 103 of file Constants.py.
string dc_processor.Constants.MSG_ERROR_LOAD_CONFIG = "Error loading config file." |
Definition at line 99 of file Constants.py.
string dc_processor.Constants.MSG_ERROR_LOAD_DB_BACKEND = "Error loading DB backend. " |
Definition at line 98 of file Constants.py.
string dc_processor.Constants.MSG_ERROR_LOAD_EXTRACTORS = "Error load extractors " |
Definition at line 101 of file Constants.py.
string dc_processor.Constants.MSG_ERROR_LOAD_LOG_CONFIG_FILE = "Error loading logging config file." |
Definition at line 100 of file Constants.py.
string dc_processor.Constants.MSG_ERROR_LOAD_OPTIONS = "Error load options" |
Definition at line 104 of file Constants.py.
string dc_processor.Constants.MSG_ERROR_OK = "" |
Definition at line 97 of file Constants.py.
string dc_processor.Constants.MSG_ERROR_PROCESS = "Processor Storing Contents process batch error: " |
Definition at line 107 of file Constants.py.
string dc_processor.Constants.MSG_ERROR_TEMPLATE_EXTRACTION = "Error template extraction " |
Definition at line 102 of file Constants.py.
string dc_processor.Constants.MSG_INFO_PREPARE_CONTENT = "Prepare content: " |
Definition at line 105 of file Constants.py.
string dc_processor.Constants.MYSQL_ENGINE = "mysql_engine" |
Definition at line 94 of file Constants.py.
string dc_processor.Constants.OLD_CABINET_SEARCH_SITE_ID = "1fe592caf03fd50c5f065c30f82b13bb" |
Definition at line 211 of file Constants.py.
string dc_processor.Constants.OLD_GOOGLE_SEARCH_SITE_ID = "d57f144e7b26c9976769ea94f18b9064" |
Definition at line 210 of file Constants.py.
string dc_processor.Constants.PARENT_RSS_FEED = "parent_rss_feed" |
Definition at line 31 of file Constants.py.
string dc_processor.Constants.PARENT_RSS_FEED_URLMD5 = "parent_rss_feed_urlMd5" |
Definition at line 32 of file Constants.py.
string dc_processor.Constants.PARENT_URL_MD5 = "" |
Definition at line 59 of file Constants.py.
string dc_processor.Constants.PDATE_DAY_MONTH_ORDER_NAME = "PDATE_DAY_MONTH_ORDER" |
Definition at line 237 of file Constants.py.
string dc_processor.Constants.PDATE_TIMEZONES_NAME = "PDATE_TIMEZONES" |
Definition at line 236 of file Constants.py.
string dc_processor.Constants.PROCESS_ALGORITHM_ALCHEMY = "ALCHEMY" |
Definition at line 72 of file Constants.py.
string dc_processor.Constants.PROCESS_ALGORITHM_BOILERPIPE = "BOILERPIPE" |
Definition at line 73 of file Constants.py.
string dc_processor.Constants.PROCESS_ALGORITHM_CONCURRENCY = "concurrency" |
Definition at line 69 of file Constants.py.
string dc_processor.Constants.PROCESS_ALGORITHM_FEED_PARSER = "feed_parser" |
Definition at line 71 of file Constants.py.
string dc_processor.Constants.PROCESS_ALGORITHM_GOOSE = "GOOSE" |
Definition at line 75 of file Constants.py.
string dc_processor.Constants.PROCESS_ALGORITHM_METRIC = "metric_based" |
Definition at line 70 of file Constants.py.
string dc_processor.Constants.PROCESS_ALGORITHM_ML = "ML" |
Definition at line 77 of file Constants.py.
string dc_processor.Constants.PROCESS_ALGORITHM_NEWSPAPER = "NEWSPAPER" |
Definition at line 74 of file Constants.py.
string dc_processor.Constants.PROCESS_ALGORITHM_PREDICTION = "prediction" |
Definition at line 68 of file Constants.py.
string dc_processor.Constants.PROCESS_ALGORITHM_REGULAR = "regular" |
Definition at line 66 of file Constants.py.
string dc_processor.Constants.PROCESS_ALGORITHM_SCRAPY = "SCRAPY" |
Definition at line 76 of file Constants.py.
string dc_processor.Constants.PROCESS_ALGORITHM_TRAINING = "training" |
Definition at line 67 of file Constants.py.
string dc_processor.Constants.PROCESSOR_EMPTY = "" |
Definition at line 127 of file Constants.py.
string dc_processor.Constants.PROCESSOR_FEED_PARSER = "FEED_PARSER" |
Definition at line 137 of file Constants.py.
string dc_processor.Constants.PROCESSOR_FEED_PARSER_BINARY = "./processor_feed_parser.py" |
Definition at line 146 of file Constants.py.
string dc_processor.Constants.PROCESSOR_FEED_PARSER_CFG = "--config=../ini/processor_feed_parser.ini" |
Definition at line 147 of file Constants.py.
string dc_processor.Constants.PROCESSOR_FEED_PARSER_CLASS_CFG = "../ini/processor_feed_parser.ini" |
Definition at line 220 of file Constants.py.
string dc_processor.Constants.PROCESSOR_FEED_PARSER_CLASS_NAME = "ProcessorFeedParser" |
Definition at line 219 of file Constants.py.
string dc_processor.Constants.PROCESSOR_RSS = "RSS" |
Definition at line 138 of file Constants.py.
string dc_processor.Constants.PROCESSOR_SCRAPER_CUSTOM = "SCRAPER_CUSTOM" |
Definition at line 155 of file Constants.py.
string dc_processor.Constants.PROCESSOR_SCRAPER_MULTI_ITEMS = "SCRAPER_MULTI_ITEMS" |
Definition at line 150 of file Constants.py.
string dc_processor.Constants.PROCESSOR_STORE = "STORE" |
Definition at line 132 of file Constants.py.
string dc_processor.Constants.PROPERTIES_KEY = "properties" |
Definition at line 167 of file Constants.py.
string dc_processor.Constants.PUBLISHED = "published" |
Definition at line 37 of file Constants.py.
string dc_processor.Constants.PYTHON_BINARY = "/usr/bin/python" |
Definition at line 124 of file Constants.py.
string dc_processor.Constants.RANK_KEY = "rank" |
Definition at line 169 of file Constants.py.
string dc_processor.Constants.RECRAWL_KEY = "recrawl" |
Definition at line 143 of file Constants.py.
int dc_processor.Constants.RECRAWL_VALUE_NO = 0 |
Definition at line 144 of file Constants.py.
string dc_processor.Constants.REDIRECT_URL_NAME = 'redirect_url' |
Definition at line 251 of file Constants.py.
string dc_processor.Constants.REPROCESS_KEY = "reprocess" |
Definition at line 141 of file Constants.py.
int dc_processor.Constants.REPROCESS_VALUE_NO = 0 |
Definition at line 142 of file Constants.py.
string dc_processor.Constants.SCRAPER_APP_CLASS_CFG = "../ini/scraper.ini" |
Definition at line 216 of file Constants.py.
string dc_processor.Constants.SCRAPER_APP_CLASS_NAME = "Scraper" |
Definition at line 215 of file Constants.py.
string dc_processor.Constants.SCRAPER_BINARY = "./scraper.py" |
Definition at line 128 of file Constants.py.
string dc_processor.Constants.SCRAPER_CFG = "--config=../ini/scraper.ini" |
Definition at line 129 of file Constants.py.
string dc_processor.Constants.SCRAPER_CUSTOM_BINARY = "./scraper_custom_task.py" |
Definition at line 156 of file Constants.py.
string dc_processor.Constants.SCRAPER_CUSTOM_CFG = "--config=../ini/scraper_custom_task.ini" |
Definition at line 157 of file Constants.py.
string dc_processor.Constants.SCRAPER_CUSTOM_JSON_APP_CLASS_CFG = "../ini/scraper_custom_task.ini" |
Definition at line 226 of file Constants.py.
string dc_processor.Constants.SCRAPER_CUSTOM_JSON_APP_CLASS_NAME = "ScraperCustomJson" |
Definition at line 225 of file Constants.py.
string dc_processor.Constants.SCRAPER_MULTI_ITEMS_APP_CLASS_CFG = "../ini/scraper_multi_items_task.ini" |
Definition at line 223 of file Constants.py.
string dc_processor.Constants.SCRAPER_MULTI_ITEMS_APP_CLASS_NAME = "ScraperMultiItemsTask" |
Definition at line 222 of file Constants.py.
string dc_processor.Constants.SCRAPER_MULTI_ITEMS_BINARY = "./scraper_multi_items_task.py" |
Definition at line 151 of file Constants.py.
string dc_processor.Constants.SCRAPER_MULTI_ITEMS_CFG = "--config=../ini/scraper_multi_items_task.ini" |
Definition at line 152 of file Constants.py.
int dc_processor.Constants.SCRAPER_RANK_INIT = 10 |
Definition at line 172 of file Constants.py.
string dc_processor.Constants.SENTENCES_COMPARATOR = "round" |
Definition at line 196 of file Constants.py.
int dc_processor.Constants.SENTENCES_TRESHOLD_VALUE = 5 |
Definition at line 194 of file Constants.py.
string dc_processor.Constants.SQL_TMP_TABLE = "metrics" |
Definition at line 92 of file Constants.py.
int dc_processor.Constants.SQLITE_TIMEOUT = 30 |
Definition at line 119 of file Constants.py.
string dc_processor.Constants.STORE_APP_CLASS_CFG = "../ini/processor-store-content-in-kvdb.ini" |
Definition at line 218 of file Constants.py.
string dc_processor.Constants.STORE_APP_CLASS_NAME = "???" |
Definition at line 217 of file Constants.py.
string dc_processor.Constants.STORE_PROCESSOR_BINARY = "./processor_store_content_kvdb.py" |
Definition at line 133 of file Constants.py.
string dc_processor.Constants.STORE_PROCESSOR_CFG = "--config=../ini/processor-store-content-in-kvdb.ini" |
Definition at line 134 of file Constants.py.
string dc_processor.Constants.SUMMARY = "summary" |
Definition at line 33 of file Constants.py.
string dc_processor.Constants.SUMMARY_DETAIL = "summary_detail" |
Definition at line 34 of file Constants.py.
string dc_processor.Constants.TAG_AUTHOR = "author" |
Definition at line 20 of file Constants.py.
string dc_processor.Constants.TAG_CLOSE_VOID_PROP_NAME = "CLOSE_VOID" |
Definition at line 232 of file Constants.py.
string dc_processor.Constants.TAG_CONTENT_UTF8_ENCODED = "content_encoded" |
Definition at line 22 of file Constants.py.
string dc_processor.Constants.TAG_DC_DATE = "dc_date" |
Definition at line 19 of file Constants.py.
string dc_processor.Constants.TAG_DESCRIPTION = "description" |
Definition at line 17 of file Constants.py.
string dc_processor.Constants.TAG_ENCLOSURE = "enclosure" |
Definition at line 26 of file Constants.py.
string dc_processor.Constants.TAG_FEED_URL = "feed_url" |
Definition at line 43 of file Constants.py.
string dc_processor.Constants.TAG_GOOGLE = "google_search" |
Definition at line 27 of file Constants.py.
string dc_processor.Constants.TAG_GOOGLE_TOTAL = "google_search_total" |
Definition at line 28 of file Constants.py.
string dc_processor.Constants.TAG_GUID = "guid" |
Definition at line 21 of file Constants.py.
string dc_processor.Constants.TAG_KEEP_ATTRIBUTES_PROP_NAME = "SCRAPER_KEEP_ATTRIBUTES" |
Definition at line 231 of file Constants.py.
string dc_processor.Constants.TAG_KEYWORDS = "keywords" |
Definition at line 23 of file Constants.py.
string dc_processor.Constants.TAG_LINK = "link" |
Definition at line 16 of file Constants.py.
string dc_processor.Constants.TAG_MARKUP_PROP_NAME = "SCRAPER_TEXT_MARKUP" |
Definition at line 230 of file Constants.py.
string dc_processor.Constants.TAG_MEDIA = "media" |
Definition at line 14 of file Constants.py.
string dc_processor.Constants.TAG_MEDIA_CONTENT = "media_content" |
Definition at line 25 of file Constants.py.
string dc_processor.Constants.TAG_MEDIA_THUMBNAIL = "media_thumbnail" |
Definition at line 24 of file Constants.py.
string dc_processor.Constants.TAG_ORDER_NUMBER = "order_number" |
Definition at line 41 of file Constants.py.
string dc_processor.Constants.TAG_PUB_DATE = "pubdate" |
Definition at line 18 of file Constants.py.
string dc_processor.Constants.TAG_PUBDATE_TZ = 'pubdate_tz' |
Definition at line 46 of file Constants.py.
string dc_processor.Constants.TAG_REDUCE_MASK_PROP_NAME = "SCRAPER_TEXT_REDUCER_MASK" |
Definition at line 228 of file Constants.py.
string dc_processor.Constants.TAG_REDUCE_PROP_NAME = "SCRAPER_TEXT_REDUCER" |
Definition at line 229 of file Constants.py.
string dc_processor.Constants.TAG_SOURCE_URL = "source_url" |
Definition at line 42 of file Constants.py.
string dc_processor.Constants.TAG_SUMMARY_LANG = "summary_lang" |
Definition at line 29 of file Constants.py.
string dc_processor.Constants.TAG_TITLE = "title" |
Definition at line 15 of file Constants.py.
string dc_processor.Constants.TAG_TYPE_DATETIME = 'datetime' |
Definition at line 45 of file Constants.py.
string dc_processor.Constants.TAGS = "tags" |
Definition at line 36 of file Constants.py.
int dc_processor.Constants.TAGS_RULES_MASK_DEFAULT_VALUE = 4 |
Definition at line 61 of file Constants.py.
int dc_processor.Constants.TAGS_RULES_MASK_MANDATORY_FIELD = 1 |
Definition at line 63 of file Constants.py.
int dc_processor.Constants.TAGS_RULES_MASK_RULE_PRIORITY = 2 |
Definition at line 62 of file Constants.py.
string dc_processor.Constants.TAGS_TYPES_NAME = "TAGS_TYPES" |
Definition at line 234 of file Constants.py.
int dc_processor.Constants.TEMPLATE_CONDITION_TYPE_URL = 0 |
Definition at line 254 of file Constants.py.
string dc_processor.Constants.TEMPLATE_KEY = "template" |
Definition at line 168 of file Constants.py.
int dc_processor.Constants.TIME_EXECUTION_LIMIT = 20 |
Definition at line 122 of file Constants.py.
list dc_processor.Constants.TIMEZONE_LIST = ["JST"] |
Definition at line 176 of file Constants.py.
string dc_processor.Constants.TRAINED_QUEUE = "TRAINED_QUEUE" |
Definition at line 80 of file Constants.py.
string dc_processor.Constants.TRAINING_QUEUE = "TRAINING_QUEUE" |
Definition at line 79 of file Constants.py.
string dc_processor.Constants.UPDATED = "updated" |
Definition at line 39 of file Constants.py.
string dc_processor.Constants.UPDATED_PARSED = "updated_parsed" |
Definition at line 40 of file Constants.py.
string dc_processor.Constants.USE_HTML5_KEY = "html5" |
Definition at line 170 of file Constants.py.
int dc_processor.Constants.USE_HTML5_NO = 0 |
Definition at line 174 of file Constants.py.
int dc_processor.Constants.USE_HTML5_YES = 1 |
Definition at line 173 of file Constants.py.
string dc_processor.Constants.WORDS_COMPARATOR = "round" |
Definition at line 190 of file Constants.py.
int dc_processor.Constants.WORDS_TRESHOLD_VALUE = 100 |
Definition at line 188 of file Constants.py.