2 Created on Feb 26, 2014 10 VERSION_STRING =
"2.0.0-chaika" 14 LOGGER_NAME_PROFILER =
"profiler" 16 LOGGER_NAME_TRACEBACK =
"traceback" 18 START_DATE_NAME =
"START_DATE" 26 ERROR_REQUEST_TIMEOUT = 1 << 1
27 ERROR_HTTP_ERROR = 1 << 2
28 ERROR_EMPTY_RESPONSE = 1 << 3
29 ERROR_WRONG_MIME = 1 << 4
30 ERROR_CONNECTION_ERROR = 1 << 5
31 ERROR_PAGE_CONVERT_ERROR = 1 << 6
33 ERROR_RESPONSE_SIZE_ERROR = 1 << 8
34 ERROR_AUTH_ERROR = 1 << 9
35 ERROR_WRITE_FILE_ERROR = 1 << 10
36 ERROR_ROBOTS_NOT_ALLOW = 1 << 11
37 ERROR_PARSE_ERROR = 1 << 12
38 ERROR_BAD_ENCODING = 1 << 13
39 ERROR_SITE_MAX_ERRORS = 1 << 14
40 ERROR_SYNCHRONIZE_URL_WITH_DB = 1 << 15
41 ERROR_CRAWLER_FILTERS_BREAK = 1 << 16
42 ERROR_MAX_ALLOW_HTTP_REDIRECTS = 1 << 17
43 ERROR_MAX_ALLOW_HTML_REDIRECTS = 1 << 18
44 ERROR_GENERAL_CRAWLER = 1 << 19
45 ERROR_DTD_INVALID = 1 << 20
46 ERROR_MACRO_DESERIALIZATION = 1 << 21
47 ERROR_FETCH_AMBIGUOUS_REQUEST = 1 << 22
48 ERROR_FETCH_CONNECTION_ERROR = 1 << 23
49 ERROR_FETCH_HTTP_ERROR = 1 << 24
50 ERROR_FETCH_INVALID_URL = 1 << 25
51 ERROR_FETCH_TOO_MANY_REDIRECTS = 1 << 26
52 ERROR_FETCH_CONNECTION_TIMEOUT = 1 << 27
53 ERROR_FETCH_READ_TIMEOUT = 1 << 28
54 ERROR_FETCH_TIMEOUT = 1 << 29
55 ERROR_FETCHER_INTERNAL = 1 << 30
58 ERROR_MASK_SITE_MAX_RESOURCES_NUMBER = 1 << 31
59 ERROR_DATABASE_ERROR = 1 << 32
60 ERROR_MASK_SITE_MAX_RESOURCES_SIZE = 1 << 33
61 ERROR_MASK_SITE_UNSUPPORTED_CONTENT_TYPE = 1 << 34
62 ERROR_MASK_URL_ENCODING_ERROR = 1 << 35
63 ERROR_MASK_SCRAPER_ERROR = 1 << 36
64 ERROR_MASK_MISSED_RAW_CONTENT_ON_DISK = 1 << 37
65 ERROR_RE_ERROR = 1 << 38
66 ERROR_MANDATORY_TEMPLATE = 1 << 39
67 ERROR_PROCESSOR_FILTERS_BREAK = 1 << 40
68 ERROR_MASK_SITE_STATE = 1 << 41
69 ERROR_MAX_ITEMS = 1 << 42
70 ERROR_MAX_URLS_FROM_PAGE = 1 << 43
71 ERROR_TEMPLATE_SOURCE = 1 << 44
74 ERROR_RSS_EMPTY = 1 << 45
75 ERROR_URLS_SCHEMA_EXTERNAL = 1 << 46
76 ERROR_NOT_EXIST_ANY_VALID_PROXY = 1 << 47
77 ERROR_FETCH_FORBIDDEN = 1 << 48
78 ERROR_NO_TIME_WINDOW = 1 << 49
79 ERROR_CRAWLER_FATAL_INITIALIZATION_PROJECT_ERROR = 1 << 50
80 ERROR_PROCESSOR_BATCH_ITEM_PROCESS = 1 << 51
81 ERROR_MAX_EXECUTION_TIME = 1 << 52
83 CONFIG_APPLICATION_SECTION_NAME =
"Application" 84 CONFIG_PROFILER_SECTION_NAME =
"Profiler" 86 LOGGER_DELIMITER_LINE =
"============================================\n" 89 APP_USAGE_MODEL_PROCESS = 0
90 APP_USAGE_MODEL_MODULE = 1
93 URLS_TO_BATCH_TASK_APP_NAME =
"UrlsToBatchTask" 94 URLS_FETCH_JSON_TO_DBTASK_APP_NAME =
"URLFetchToJsonDBTaskConvertor" 95 SOCIAL_TASK_APP_NAME =
"SocialTask" 96 CONTENT_UPDATER_APP_NAME =
"ContentUpdater" 97 POST_PROCESSOR_APP_NAME =
"PostprocessorTask" 98 SOCIAL_PROFILES_VERIFIER_APP_NAME =
"SocialProfilesVerifier" 99 SOCIAL_DATA_GET_API_APP_NAME =
"SocialDataGetApi" 100 SOCIAL_DATA_UPDATER_APP_NAME =
"SocialDataUpdater" 103 PDATE_SOURCES_MASK_PROP_NAME =
"PDATE_SOURCES_MASK" 104 PDATE_SOURCES_MASK_OVERWRITE_PROP_NAME =
"PDATE_SOURCES_MASK_OVERWRITE" 105 PDATE_SOURCES_EXPRESSION_PROP_NAME =
"PDATE_SOURCES_EXPRESSION" 108 SQL_EXPRESSION_FIELDS_UPDATE_CRAWLER =
'SQL_EXPRESSION_FIELDS_UPDATE_CRAWLER' 109 SQL_EXPRESSION_FIELDS_UPDATE_PROCESSOR =
'SQL_EXPRESSION_FIELDS_UPDATE_PROCESSOR' 110 SQL_EXPRESSION_FIELDS_PDATE_TIME =
'PDATE_TIME' 112 REPLACEMENT_CONTENT_DATA =
'REPLACE' 114 URL_NORMALIZE =
'URL_NORMALIZE' 118 PDATE_SOURCES_MASK_URL_NAME = 1
119 PDATE_SOURCES_MASK_RSS_FEED = 1 << 1
120 PDATE_SOURCES_MASK_HTTP_DATE = 1 << 2
121 PDATE_SOURCES_MASK_HTTP_LAST_MODIFIED = 1 << 3
123 PDATE_SOURCES_MASK_DC_DATE = 1 << 4
124 PDATE_SOURCES_MASK_PUBDATE = 1 << 5
125 PDATE_SOURCES_MASK_NOW = 1 << 6
126 PDATE_SOURCES_MASK_SQL_EXPRESSION = 1 << 7
128 PDATE_SOURCES_MASK_BIT_DEFAULT = 255
129 PDATE_SOURCES_MASK_OVERWRITE_DEFAULT = 192