HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
FieldRecalculatorDefaultCriterions.py
Go to the documentation of this file.
1 '''
2 @package: dc
3 @author scorp
4 @link: http://hierarchical-cluster-engine.com/
5 @copyright: Copyright © 2013-2014 IOIX Ukraine
6 @license: http://hierarchical-cluster-engine.com/license/
7 @since: 0.1
8 '''
9 
10 import dc.EventObjects
11 import dc_db.Constants as Constants
12 import app.SQLCriterions
13 import app.Utils as Utils # pylint: disable=F0401
14 
15 
16 logger = Utils.MPLogger().getLogger()
17 
18 
19 CRIT_RESOURCES = "COUNTER_CRIT_RESOURCES"
20 CRIT_CONTENTS = "COUNTER_CRIT_CONTENTS"
21 CRIT_CLURLS = "COUNTER_CRIT_CLURLS"
22 CRIT_NURLS = "COUNTER_CRIT_NURLS"
23 CRIT_DURLS = "COUNTER_CRIT_DURLS"
24 CRIT_CRURLS = "COUNTER_CRIT_CRURLS"
25 CRIT_PURLS = "COUNTER_CRIT_PURLS"
26 CRIT_ERRORS = "COUNTER_CRIT_ERRORS"
27 
28 CRIT_CRAWLED_THIS_NODE = '( NOT (`Status`=' + str(dc.EventObjects.URL.STATUS_CRAWLED) + ' AND `Crawled`=0 ))'
29 
30 DefaultRecalculatorCriterions = {CRIT_RESOURCES: {"WHERE": "`Status`=" + \
31  str(dc.EventObjects.URL.STATUS_CRAWLED) + " AND `Crawled`>0 AND `Size`>0 " + \
32  "AND ((`ErrorMask` & 4198399) = 0) AND `ContentType`='text/html'"},
33  CRIT_CONTENTS: {"WHERE": " `Status`=7 AND `TagsCount`>0 AND `Processed`>0"},
34  CRIT_CLURLS: {'WHERE': "`ParentMd5`<>'' AND " + CRIT_CRAWLED_THIS_NODE},
35  CRIT_NURLS: {"WHERE": "`Status`=" + str(dc.EventObjects.URL.STATUS_NEW)},
36  CRIT_DURLS: {},
37  CRIT_CRURLS: {},
38  CRIT_PURLS: {},
39  CRIT_ERRORS: {'WHERE': "`ErrorMask`>0 AND " + CRIT_CRAWLED_THIS_NODE}}
40 
41 
42 def getDefaultCriterions(criterionName, siteId, queryCallback):
43  ret = ""
44  SQL_SELECT_TEMPLATE = "SELECT `Value` FROM `sites_properties` WHERE `Name`='%s' AND `Site_Id`='%s'"
45  criterionDict = {}
46  query = SQL_SELECT_TEMPLATE % (criterionName, siteId)
47  res = queryCallback(query, Constants.PRIMARY_DB_ID)
48  if res is not None and len(res) > 0 and res[0] is not None:
49  criterionDict[app.SQLCriterions.CRITERION_WHERE] = res[0][0]
50  elif criterionName in DefaultRecalculatorCriterions:
51  criterionDict = DefaultRecalculatorCriterions[criterionName]
52  ret = app.SQLCriterions.generateCriterionSQL(criterionDict, None, siteId)
53  logger.debug(">>> Recalculate Def Ret = " + ret)
54  return ret
def generateCriterionSQL(criterions, additionWhere=None, siteId=None)
def getDefaultCriterions(criterionName, siteId, queryCallback)