HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
Constants.py File Reference

Go to the source code of this file.

Classes

class  dc_processor.Constants.HTML5_SEMANTIC_TAGS
 

Namespaces

 dc_processor.Constants
 

Variables

 dc_processor.Constants.LOGGER_NAME = APP_CONSTS.LOGGER_NAME
 
string dc_processor.Constants.TAG_MEDIA = "media"
 
string dc_processor.Constants.TAG_TITLE = "title"
 
string dc_processor.Constants.TAG_LINK = "link"
 
string dc_processor.Constants.TAG_DESCRIPTION = "description"
 
string dc_processor.Constants.TAG_PUB_DATE = "pubdate"
 
string dc_processor.Constants.TAG_DC_DATE = "dc_date"
 
string dc_processor.Constants.TAG_AUTHOR = "author"
 
string dc_processor.Constants.TAG_GUID = "guid"
 
string dc_processor.Constants.TAG_CONTENT_UTF8_ENCODED = "content_encoded"
 
string dc_processor.Constants.TAG_KEYWORDS = "keywords"
 
string dc_processor.Constants.TAG_MEDIA_THUMBNAIL = "media_thumbnail"
 
string dc_processor.Constants.TAG_MEDIA_CONTENT = "media_content"
 
string dc_processor.Constants.TAG_ENCLOSURE = "enclosure"
 
string dc_processor.Constants.TAG_GOOGLE = "google_search"
 
string dc_processor.Constants.TAG_GOOGLE_TOTAL = "google_search_total"
 
string dc_processor.Constants.TAG_SUMMARY_LANG = "summary_lang"
 
string dc_processor.Constants.HTML_LANG = "html_lang"
 
string dc_processor.Constants.PARENT_RSS_FEED = "parent_rss_feed"
 
string dc_processor.Constants.PARENT_RSS_FEED_URLMD5 = "parent_rss_feed_urlMd5"
 
string dc_processor.Constants.SUMMARY = "summary"
 
string dc_processor.Constants.SUMMARY_DETAIL = "summary_detail"
 
string dc_processor.Constants.COMMENTNS = "comments"
 
string dc_processor.Constants.TAGS = "tags"
 
string dc_processor.Constants.PUBLISHED = "published"
 
string dc_processor.Constants.CONTENT = "content"
 
string dc_processor.Constants.UPDATED = "updated"
 
string dc_processor.Constants.UPDATED_PARSED = "updated_parsed"
 
string dc_processor.Constants.TAG_ORDER_NUMBER = "order_number"
 
string dc_processor.Constants.TAG_SOURCE_URL = "source_url"
 
string dc_processor.Constants.TAG_FEED_URL = "feed_url"
 
string dc_processor.Constants.TAG_TYPE_DATETIME = 'datetime'
 
string dc_processor.Constants.TAG_PUBDATE_TZ = 'pubdate_tz'
 
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_EMPTY = 0
 
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_MD5 = 1
 
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_CRC32 = 2
 
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_SOUNDEX = 3
 
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_SHA1 = 4
 
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_SDHASH = 5
 
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_BBHASH = 6
 
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_MRSH_V2 = 7
 
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_MVHASH_B = 8
 
int dc_processor.Constants.CONTENT_HASH_ALGORITHM_MD5_WITHOUT_HTML = 9
 
int dc_processor.Constants.CONTENT_HASH_ACTION_DELETE = 1
 
string dc_processor.Constants.PARENT_URL_MD5 = ""
 
int dc_processor.Constants.TAGS_RULES_MASK_DEFAULT_VALUE = 4
 
int dc_processor.Constants.TAGS_RULES_MASK_RULE_PRIORITY = 2
 
int dc_processor.Constants.TAGS_RULES_MASK_MANDATORY_FIELD = 1
 
string dc_processor.Constants.PROCESS_ALGORITHM_REGULAR = "regular"
 
string dc_processor.Constants.PROCESS_ALGORITHM_TRAINING = "training"
 
string dc_processor.Constants.PROCESS_ALGORITHM_PREDICTION = "prediction"
 
string dc_processor.Constants.PROCESS_ALGORITHM_CONCURRENCY = "concurrency"
 
string dc_processor.Constants.PROCESS_ALGORITHM_METRIC = "metric_based"
 
string dc_processor.Constants.PROCESS_ALGORITHM_FEED_PARSER = "feed_parser"
 
string dc_processor.Constants.PROCESS_ALGORITHM_ALCHEMY = "ALCHEMY"
 
string dc_processor.Constants.PROCESS_ALGORITHM_BOILERPIPE = "BOILERPIPE"
 
string dc_processor.Constants.PROCESS_ALGORITHM_NEWSPAPER = "NEWSPAPER"
 
string dc_processor.Constants.PROCESS_ALGORITHM_GOOSE = "GOOSE"
 
string dc_processor.Constants.PROCESS_ALGORITHM_SCRAPY = "SCRAPY"
 
string dc_processor.Constants.PROCESS_ALGORITHM_ML = "ML"
 
string dc_processor.Constants.TRAINING_QUEUE = "TRAINING_QUEUE"
 
string dc_processor.Constants.TRAINED_QUEUE = "TRAINED_QUEUE"
 
string dc_processor.Constants.CONCURRENCY_QUEUE = "CONCURRENCY_QUEUE"
 
string dc_processor.Constants.DB_SECTION = "mysql"
 
string dc_processor.Constants.DB_HOST = "db_host"
 
string dc_processor.Constants.DB_PORT = "db_port"
 
string dc_processor.Constants.DB_USER = "db_user"
 
string dc_processor.Constants.DB_PWD = "db_pwd"
 
string dc_processor.Constants.DB_SITES = "db_dc_sites"
 
string dc_processor.Constants.DB_URLS = "db_dc_urls"
 
string dc_processor.Constants.DB_SCRAPERS = "db_dc_scrapers"
 
string dc_processor.Constants.DC_CONTENTS_DB_NAME = "db_dc_contents"
 
string dc_processor.Constants.SQL_TMP_TABLE = "metrics"
 
string dc_processor.Constants.MYSQL_ENGINE = "mysql_engine"
 
string dc_processor.Constants.MSG_ERROR_OK = ""
 
string dc_processor.Constants.MSG_ERROR_LOAD_DB_BACKEND = "Error loading DB backend. "
 
string dc_processor.Constants.MSG_ERROR_LOAD_CONFIG = "Error loading config file."
 
string dc_processor.Constants.MSG_ERROR_LOAD_LOG_CONFIG_FILE = "Error loading logging config file."
 
string dc_processor.Constants.MSG_ERROR_LOAD_EXTRACTORS = "Error load extractors "
 
string dc_processor.Constants.MSG_ERROR_TEMPLATE_EXTRACTION = "Error template extraction "
 
string dc_processor.Constants.MSG_ERROR_DYNAMIC_EXTRACTION = "Error dynamic extraction "
 
string dc_processor.Constants.MSG_ERROR_LOAD_OPTIONS = "Error load options"
 
string dc_processor.Constants.MSG_INFO_PREPARE_CONTENT = "Prepare content: "
 
string dc_processor.Constants.MSG_ERROR_ADJUST_PR = "Error adjust partial references. "
 
string dc_processor.Constants.MSG_ERROR_PROCESS = "Processor Storing Contents process batch error: "
 
string dc_processor.Constants.MSG_ERROR_CALC_METRICS = "Smth goes wrong. See traceback: "
 
int dc_processor.Constants.ERROR_OK = 0
 
int dc_processor.Constants.EXIT_SUCCESS = 0
 
int dc_processor.Constants.EXIT_FAILURE = 1
 
int dc_processor.Constants.SQLITE_TIMEOUT = 30
 
int dc_processor.Constants.TIME_EXECUTION_LIMIT = 20
 
string dc_processor.Constants.PYTHON_BINARY = "/usr/bin/python"
 
string dc_processor.Constants.PROCESSOR_EMPTY = ""
 
string dc_processor.Constants.SCRAPER_BINARY = "./scraper.py"
 
string dc_processor.Constants.SCRAPER_CFG = "--config=../ini/scraper.ini"
 
string dc_processor.Constants.PROCESSOR_STORE = "STORE"
 
string dc_processor.Constants.STORE_PROCESSOR_BINARY = "./processor_store_content_kvdb.py"
 
string dc_processor.Constants.STORE_PROCESSOR_CFG = "--config=../ini/processor-store-content-in-kvdb.ini"
 
string dc_processor.Constants.PROCESSOR_FEED_PARSER = "FEED_PARSER"
 
string dc_processor.Constants.PROCESSOR_RSS = "RSS"
 
string dc_processor.Constants.REPROCESS_KEY = "reprocess"
 
int dc_processor.Constants.REPROCESS_VALUE_NO = 0
 
string dc_processor.Constants.RECRAWL_KEY = "recrawl"
 
int dc_processor.Constants.RECRAWL_VALUE_NO = 0
 
string dc_processor.Constants.PROCESSOR_FEED_PARSER_BINARY = "./processor_feed_parser.py"
 
string dc_processor.Constants.PROCESSOR_FEED_PARSER_CFG = "--config=../ini/processor_feed_parser.ini"
 
string dc_processor.Constants.PROCESSOR_SCRAPER_MULTI_ITEMS = "SCRAPER_MULTI_ITEMS"
 
string dc_processor.Constants.SCRAPER_MULTI_ITEMS_BINARY = "./scraper_multi_items_task.py"
 
string dc_processor.Constants.SCRAPER_MULTI_ITEMS_CFG = "--config=../ini/scraper_multi_items_task.ini"
 
string dc_processor.Constants.PROCESSOR_SCRAPER_CUSTOM = "SCRAPER_CUSTOM"
 
string dc_processor.Constants.SCRAPER_CUSTOM_BINARY = "./scraper_custom_task.py"
 
string dc_processor.Constants.SCRAPER_CUSTOM_CFG = "--config=../ini/scraper_custom_task.ini"
 
string dc_processor.Constants.EXTRACTOR_NAME_ML = "ML extractor"
 
string dc_processor.Constants.EXTRACTOR_NAME_ALCHEMY = "Alchemy extractor"
 
string dc_processor.Constants.EXTRACTOR_NAME_BOILERPIPE = "Boilerpipe extractor"
 
string dc_processor.Constants.MODULES_KEY = "modules"
 
string dc_processor.Constants.ALGORITHM_KEY = "algorithm"
 
string dc_processor.Constants.ALGORITHM_NAME_KEY = "algorithm_name"
 
string dc_processor.Constants.PROPERTIES_KEY = "properties"
 
string dc_processor.Constants.TEMPLATE_KEY = "template"
 
string dc_processor.Constants.RANK_KEY = "rank"
 
string dc_processor.Constants.USE_HTML5_KEY = "html5"
 
int dc_processor.Constants.SCRAPER_RANK_INIT = 10
 
int dc_processor.Constants.USE_HTML5_YES = 1
 
int dc_processor.Constants.USE_HTML5_NO = 0
 
list dc_processor.Constants.TIMEZONE_LIST = ["JST"]
 
string dc_processor.Constants.COMMON_DATE_FORMAT = '%Y-%m-%d %H:%M:%S'
 
int dc_processor.Constants.DEFAULT_TRESHOLD_VALUE = 0
 
int dc_processor.Constants.DEFAULT_METRIC_VALUE = 0
 
string dc_processor.Constants.DEFAULT_COMPARATOR = ""
 
int dc_processor.Constants.WORDS_TRESHOLD_VALUE = 100
 
string dc_processor.Constants.WORDS_COMPARATOR = "round"
 
int dc_processor.Constants.SENTENCES_TRESHOLD_VALUE = 5
 
string dc_processor.Constants.SENTENCES_COMPARATOR = "round"
 
int dc_processor.Constants.ARI_TRESHOLD_VALUE = 1
 
string dc_processor.Constants.ARI_COMPARATOR = "round"
 
string dc_processor.Constants.ARTICLE_CORPUS = "content_encoded"
 
string dc_processor.Constants.GOOGLE_SEARCH_SITE_ID = "google_search"
 
string dc_processor.Constants.CABINET_SEARCH_SITE_ID = "cabinet_search"
 
string dc_processor.Constants.OLD_GOOGLE_SEARCH_SITE_ID = "d57f144e7b26c9976769ea94f18b9064"
 
string dc_processor.Constants.OLD_CABINET_SEARCH_SITE_ID = "1fe592caf03fd50c5f065c30f82b13bb"
 
string dc_processor.Constants.SCRAPER_APP_CLASS_NAME = "Scraper"
 
string dc_processor.Constants.SCRAPER_APP_CLASS_CFG = "../ini/scraper.ini"
 
string dc_processor.Constants.STORE_APP_CLASS_NAME = "???"
 
string dc_processor.Constants.STORE_APP_CLASS_CFG = "../ini/processor-store-content-in-kvdb.ini"
 
string dc_processor.Constants.PROCESSOR_FEED_PARSER_CLASS_NAME = "ProcessorFeedParser"
 
string dc_processor.Constants.PROCESSOR_FEED_PARSER_CLASS_CFG = "../ini/processor_feed_parser.ini"
 
string dc_processor.Constants.SCRAPER_MULTI_ITEMS_APP_CLASS_NAME = "ScraperMultiItemsTask"
 
string dc_processor.Constants.SCRAPER_MULTI_ITEMS_APP_CLASS_CFG = "../ini/scraper_multi_items_task.ini"
 
string dc_processor.Constants.SCRAPER_CUSTOM_JSON_APP_CLASS_NAME = "ScraperCustomJson"
 
string dc_processor.Constants.SCRAPER_CUSTOM_JSON_APP_CLASS_CFG = "../ini/scraper_custom_task.ini"
 
string dc_processor.Constants.TAG_REDUCE_MASK_PROP_NAME = "SCRAPER_TEXT_REDUCER_MASK"
 
string dc_processor.Constants.TAG_REDUCE_PROP_NAME = "SCRAPER_TEXT_REDUCER"
 
string dc_processor.Constants.TAG_MARKUP_PROP_NAME = "SCRAPER_TEXT_MARKUP"
 
string dc_processor.Constants.TAG_KEEP_ATTRIBUTES_PROP_NAME = "SCRAPER_KEEP_ATTRIBUTES"
 
string dc_processor.Constants.TAG_CLOSE_VOID_PROP_NAME = "CLOSE_VOID"
 
string dc_processor.Constants.TAGS_TYPES_NAME = "TAGS_TYPES"
 
string dc_processor.Constants.PDATE_TIMEZONES_NAME = "PDATE_TIMEZONES"
 
string dc_processor.Constants.PDATE_DAY_MONTH_ORDER_NAME = "PDATE_DAY_MONTH_ORDER"
 
string dc_processor.Constants.LANG_PROP_NAME = "SCRAPER_LANG_DETECT"
 
string dc_processor.Constants.MEDIA_LIMITS_NAME = "MEDIA_LIMITS"
 
string dc_processor.Constants.HTTP_REDIRECT_LINK_NAME = "HTTP_REDIRECT_LINK"
 
string dc_processor.Constants.LOCATION_NAME = "Location"
 
int dc_processor.Constants.HTTP_REDIRECT_LINK_VALUE_URL = 1
 
int dc_processor.Constants.HTTP_REDIRECT_LINK_VALUE_LOCATION = 2
 
int dc_processor.Constants.HTTP_REDIRECT_LINK_VALUE_REDIRECT_URL = 3
 
int dc_processor.Constants.HTTP_REDIRECT_LINK_VALUE_SOURCE_URL = 4
 
string dc_processor.Constants.HTTP_REDIRECT_LINK_LINK_TAG_NAME = 'link'
 
string dc_processor.Constants.REDIRECT_URL_NAME = 'redirect_url'
 
int dc_processor.Constants.TEMPLATE_CONDITION_TYPE_URL = 0