HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
dc_co.Constants Namespace Reference

Variables

int EXIT_SUCCESS = 0
 
int EXIT_FAILURE = 1
 
int SITE_ALL = 0
 
string DEFAULT_CFG_FILE = "../ini/crawling-optimizer.ini"
 
string APP_NAME = "crawling-optimizer"
 
string MSG_INFO_LOAD_DEFAULT_CONFIG_FILE = "Loading default config file: "
 
string MSG_INFO_LOAD_CONFIG_FILE = "Loading config file: "
 
string MSG_INFO_LOAD_DEFAULT_SITE_ID = "Load default site id: "
 
string MSG_INFO_LOAD_SITE_ID = "Load site id: "
 
string MSG_ERROR_LOAD_CONFIG = "Can't load config file"
 
string MSG_ERROR_LOAD_LOG_CONFIG_FILE = "Can't load logging config file"
 
string MSG_ERROR_PROCESS_GENERAL = "Can't process query"
 
string MSG_ERROR_COLLECT_SITE_DATA = "Can't collect site's data"
 
string MSG_ERROR_STORE_SITE_DATA = "Can't store site's data"
 
string DB_URLS = "dc_urls"
 
string DB_CO = "dc_co"
 
string SQL_QUERY_NEW_URLS
 
string SQL_QUERY_RECRAWL_PERIOD_START
 
string SQL_QUERY_RECRAWL_END
 
string SQL_QUERY_RECRAWL_PERIOD_END = """SELECT `RecrawlDate` FROM dc_sites.`sites` WHERE `Id`='%s'"""
 
string SQL_QUERY_NEW_SITE_TABLE
 
string SQL_QUERY_INSERT_SITE_DATA = """INSERT INTO `%s` VALUES('%s', %s,'%s','%s','%s','%s','%s',NOW()) ON DUPLICATE KEY UPDATE `Contents`=%s, `LastAdded`='%s', `minPDate`='%s', `maxPDate`='%s', `CDate`=NOW()"""
 

Detailed Description

@package docstring
  @file Constants.py
  @author Alexey <developers.hce@gmail.com>
  @link http://hierarchical-cluster-engine.com/
  @copyright Copyright &copy; 2013 IOIX Ukraine
  @license http://hierarchical-cluster-engine.com/license/
  @package HCE project node API
  @since 0.1

Variable Documentation

◆ APP_NAME

string dc_co.Constants.APP_NAME = "crawling-optimizer"

Definition at line 19 of file Constants.py.

◆ DB_CO

string dc_co.Constants.DB_CO = "dc_co"

Definition at line 37 of file Constants.py.

◆ DB_URLS

string dc_co.Constants.DB_URLS = "dc_urls"

Definition at line 36 of file Constants.py.

◆ DEFAULT_CFG_FILE

string dc_co.Constants.DEFAULT_CFG_FILE = "../ini/crawling-optimizer.ini"

Definition at line 18 of file Constants.py.

◆ EXIT_FAILURE

int dc_co.Constants.EXIT_FAILURE = 1

Definition at line 13 of file Constants.py.

◆ EXIT_SUCCESS

int dc_co.Constants.EXIT_SUCCESS = 0

Definition at line 12 of file Constants.py.

◆ MSG_ERROR_COLLECT_SITE_DATA

string dc_co.Constants.MSG_ERROR_COLLECT_SITE_DATA = "Can't collect site's data"

Definition at line 32 of file Constants.py.

◆ MSG_ERROR_LOAD_CONFIG

string dc_co.Constants.MSG_ERROR_LOAD_CONFIG = "Can't load config file"

Definition at line 29 of file Constants.py.

◆ MSG_ERROR_LOAD_LOG_CONFIG_FILE

string dc_co.Constants.MSG_ERROR_LOAD_LOG_CONFIG_FILE = "Can't load logging config file"

Definition at line 30 of file Constants.py.

◆ MSG_ERROR_PROCESS_GENERAL

string dc_co.Constants.MSG_ERROR_PROCESS_GENERAL = "Can't process query"

Definition at line 31 of file Constants.py.

◆ MSG_ERROR_STORE_SITE_DATA

string dc_co.Constants.MSG_ERROR_STORE_SITE_DATA = "Can't store site's data"

Definition at line 33 of file Constants.py.

◆ MSG_INFO_LOAD_CONFIG_FILE

string dc_co.Constants.MSG_INFO_LOAD_CONFIG_FILE = "Loading config file: "

Definition at line 24 of file Constants.py.

◆ MSG_INFO_LOAD_DEFAULT_CONFIG_FILE

string dc_co.Constants.MSG_INFO_LOAD_DEFAULT_CONFIG_FILE = "Loading default config file: "

Definition at line 23 of file Constants.py.

◆ MSG_INFO_LOAD_DEFAULT_SITE_ID

string dc_co.Constants.MSG_INFO_LOAD_DEFAULT_SITE_ID = "Load default site id: "

Definition at line 25 of file Constants.py.

◆ MSG_INFO_LOAD_SITE_ID

string dc_co.Constants.MSG_INFO_LOAD_SITE_ID = "Load site id: "

Definition at line 26 of file Constants.py.

◆ SITE_ALL

int dc_co.Constants.SITE_ALL = 0

Definition at line 15 of file Constants.py.

◆ SQL_QUERY_INSERT_SITE_DATA

string dc_co.Constants.SQL_QUERY_INSERT_SITE_DATA = """INSERT INTO `%s` VALUES('%s', %s,'%s','%s','%s','%s','%s',NOW()) ON DUPLICATE KEY UPDATE `Contents`=%s, `LastAdded`='%s', `minPDate`='%s', `maxPDate`='%s', `CDate`=NOW()"""

Definition at line 96 of file Constants.py.

◆ SQL_QUERY_NEW_SITE_TABLE

string dc_co.Constants.SQL_QUERY_NEW_SITE_TABLE
Initial value:
1 = """CREATE TABLE IF NOT EXISTS `%s` (
2  `host` varchar(126) DEFAULT NULL,
3  `Contents` bigint(20) unsigned NOT NULL DEFAULT '0',
4  `RecrawlStart` datetime DEFAULT NULL COMMENT 'Start date of re-crawl',
5  `RecrawlEnd` datetime DEFAULT NULL COMMENT 'End date of re-crawl',
6  `minPDate` datetime DEFAULT NULL COMMENT 'When resource was appeared ',
7  `maxPDate` datetime DEFAULT NULL,
8  `LastAdded` datetime DEFAULT NULL COMMENT 'When content was inserted to the system',
9  `CDate` datetime NOT NULL COMMENT 'Date insert row',
10  UNIQUE KEY `RecrawlEnd` (`RecrawlEnd`)
11  ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
12 """

Definition at line 81 of file Constants.py.

◆ SQL_QUERY_NEW_URLS

string dc_co.Constants.SQL_QUERY_NEW_URLS
Initial value:
1 = """SELECT count(*), max(`TcDate`), min(`LastModified`), max(`LastModified`) FROM dc_urls.`urls_%s`
2  WHERE
3  `CDate`
4  BETWEEN
5  (SELECT DATE_SUB(`RecrawlDate`, INTERVAL `RecrawlPeriod` minute) FROM dc_sites.`sites` WHERE `Id`='%s')
6  AND
7  (SELECT `RecrawlDate` FROM dc_sites.`sites` WHERE `Id`='%s')
8  AND
9  `ParentMd5`<>''
10  AND
11  `Crawled`<>0
12  AND
13  `Processed`<>0
14  AND
15  `TagsCount`<>0
16  AND
17  `Status`=7"""

Definition at line 40 of file Constants.py.

◆ SQL_QUERY_RECRAWL_END

string dc_co.Constants.SQL_QUERY_RECRAWL_END
Initial value:
1 = """SELECT max(`TcDate`) FROM dc_urls.`urls_%s`
2  WHERE
3  `CDate`
4  BETWEEN
5  (SELECT DATE_SUB(`RecrawlDate`, INTERVAL `RecrawlPeriod` minute) FROM dc_sites.`sites` WHERE `Id`='%s')
6  AND
7  (SELECT `RecrawlDate` FROM dc_sites.`sites` WHERE `Id`='%s')"""

Definition at line 67 of file Constants.py.

◆ SQL_QUERY_RECRAWL_PERIOD_END

string dc_co.Constants.SQL_QUERY_RECRAWL_PERIOD_END = """SELECT `RecrawlDate` FROM dc_sites.`sites` WHERE `Id`='%s'"""

Definition at line 77 of file Constants.py.

◆ SQL_QUERY_RECRAWL_PERIOD_START

string dc_co.Constants.SQL_QUERY_RECRAWL_PERIOD_START
Initial value:
1 = """SELECT
2  DATE_SUB(`RecrawlDate`, INTERVAL `RecrawlPeriod` minute)
3  FROM dc_sites.`sites`
4  WHERE `Id`='%s'"""

Definition at line 60 of file Constants.py.