HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
FieldRecalculator.py
Go to the documentation of this file.
1 '''
2 @package: dc
3 @author scorp
4 @link: http://hierarchical-cluster-engine.com/
5 @copyright: Copyright © 2013-2014 IOIX Ukraine
6 @license: http://hierarchical-cluster-engine.com/license/
7 @since: 0.1
8 '''
9 
10 import dc_db.Constants as Constants
11 import dc_db.FieldRecalculatorDefaultCriterions as DefCriterions
12 import dc.EventObjects
13 import app.Utils as Utils # pylint: disable=F0401
14 
15 logger = Utils.MPLogger().getLogger()
16 
17 
18 # #FieldRecalculator class makes come common processing of databse fields recalculation (using in Task classes)
19 class FieldRecalculator(object):
20 
21  def __init__(self):
22  pass
23 
24 
25  # #commonSiteRecalculate - common recalculate method
26  #
27  # @param queryCallback function for queries execution
28  # @param additionCause additional SQL cause
29  # @param fieldName - processing field name (of `sites` tables)
30  # @param siteId - site id
31  def commonSiteRecalculate(self, defaultCritName, fieldName, siteId, queryCallback):
32  UPDATE_SQL_TEMPLATE = "UPDATE `sites` SET `%s`=(SELECT COUNT(*) FROM dc_urls.%s %s) WHERE `id` = '%s'"
33  tableName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % siteId
34  criterionsString = DefCriterions.getDefaultCriterions(defaultCritName, siteId, queryCallback)
35  query = UPDATE_SQL_TEMPLATE % (fieldName, tableName, criterionsString, siteId)
36  queryCallback(query, Constants.PRIMARY_DB_ID)
37 
38  # #siteResourcesRecalculate - recalculate sites.Resources field
39  #
40  def siteResourcesRecalculate(self, siteId, queryCallback):
41  # self.commonSiteRecalculate(queryCallback, "State>3 AND Crawled>0", "Resources", siteId)
42  # self.commonSiteRecalculate("Crawled>0 AND Size>0", "Resources", siteId, queryCallback)
43  self.commonSiteRecalculate(DefCriterions.CRIT_RESOURCES, "Resources", siteId, queryCallback)
44 
45 
46  # #siteContentsRecalculate - recalculate sites.Contents field
47  #
48  def siteContentsRecalculate(self, siteId, queryCallback):
49  # self.commonSiteRecalculate(queryCallback, "State=7 AND Crawled>0 AND Processed>0", "Contents", siteId)
50  self.commonSiteRecalculate(DefCriterions.CRIT_CONTENTS, "Contents", siteId, queryCallback)
51 
52 
53  # updateCollectedURLs updates sites.CollectedURLs field
54  #
55  # @param siteId - siteId
56  # @param queryCallback - callback sql function
57  def updateCollectedURLs(self, siteId, queryCallback):
58  QUERY_TEMPLATE = "UPDATE `sites` SET `CollectedURLs`=(SELECT count(*) FROM dc_urls.%s %s) WHERE `Id`='%s'"
59  tableName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % siteId
60  criterionsString = DefCriterions.getDefaultCriterions(DefCriterions.CRIT_CLURLS, siteId, queryCallback)
61  query = QUERY_TEMPLATE % (tableName, criterionsString, siteId)
62  queryCallback(query, Constants.PRIMARY_DB_ID)
63 
64 
65  # updateNewURLs updates sites.newURLs field
66  #
67  # @param siteId - siteId
68  # @param queryCallback - callback sql function
69  def updateNewURLs(self, siteId, queryCallback):
70  QUERY_TEMPLATE = "UPDATE `sites` SET `NewURLs`=(SELECT count(*) FROM dc_urls.%s %s) WHERE `Id`='%s'"
71  tableName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % siteId
72  criterionsString = DefCriterions.getDefaultCriterions(DefCriterions.CRIT_NURLS, siteId, queryCallback)
73  query = QUERY_TEMPLATE % (tableName, criterionsString, siteId)
74  queryCallback(query, Constants.PRIMARY_DB_ID)
75 
76 
77  # updateErrors updates sites.Errors field
78  #
79  # @param siteId - siteId
80  # @param queryCallback - callback sql function
81  def updateErrors(self, siteId, queryCallback):
82  QUERY_TEMPLATE = "UPDATE `sites` SET `Errors`=(SELECT count(*) FROM dc_urls.%s %s) WHERE `Id`='%s'"
83  tableName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % siteId
84  criterionsString = DefCriterions.getDefaultCriterions(DefCriterions.CRIT_ERRORS, siteId, queryCallback)
85  query = QUERY_TEMPLATE % (tableName, criterionsString, siteId)
86  queryCallback(query, Constants.PRIMARY_DB_ID)
87 
88 
89  # updateDeletedURLs updates sites.deletedURLs field
90  #
91  # @param siteId - siteId
92  # @param queryCallback - callback sql function
93  def updateDeletedURLs(self, siteId, queryCallback):
94  QUERY_TEMPLATE_SELECT = "SELECT count(*) FROM %s %s"
95  tableName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % siteId
96  criterionsString = DefCriterions.getDefaultCriterions(DefCriterions.CRIT_DURLS, siteId, queryCallback)
97  query = QUERY_TEMPLATE_SELECT % (tableName, criterionsString)
98  res = queryCallback(query, Constants.FOURTH_DB_ID, Constants.EXEC_INDEX, True)
99  if res is not None and len(res) > 0 and len(res[0]) > 0:
100  count = res[0][0]
101  QUERY_TEMPLATE_UPDATE = "UPDATE `sites` SET `DeletedURLs`=%s WHERE `Id`='%s'"
102  query = QUERY_TEMPLATE_UPDATE % (str(count), siteId)
103  queryCallback(query, Constants.PRIMARY_DB_ID)
104 
105 
106  # commonRecalc method makes all recalculations
107  #
108  # @param siteId - siteId
109  # @param queryCallback - callback sql function
110  # @param recalcType - full or partial recalculating
111  def commonRecalc(self, siteId, queryCallback, recalcType=dc.EventObjects.FieldRecalculatorObj.FULL_RECALC):
112  self.siteResourcesRecalculate(siteId, queryCallback)
113  self.siteContentsRecalculate(siteId, queryCallback)
114  if recalcType == dc.EventObjects.FieldRecalculatorObj.FULL_RECALC:
115  self.updateCollectedURLs(siteId, queryCallback)
116  self.updateNewURLs(siteId, queryCallback)
117  self.updateDeletedURLs(siteId, queryCallback)
118  self.updateSiteCleanupFields(siteId, queryCallback)
119 
120 
121  # updateSiteCleanupFields recalculates some site's fields in SiteCleanUpTask operation
122  #
123  # @param siteId - siteId
124  # @param queryCallback - callback sql function
125  def updateSiteCleanupFields(self, siteId, queryCallback):
126  QUERY_TEMPLATE = "UPDATE `sites` SET `Size`=%s, `Errors`=%s, `ErrorMask`=%s, `AVGSpeed`=%s WHERE `Id`='%s'"
127  tableName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % siteId
128  localSize = "`Size`"
129  localErrors = "`Errors`"
130  localErrorMask = "`ErrorMask`"
131  localSpeed = "`AVGSpeed`"
132  TMP_QUERY_TEMPLATE = "SELECT SUM(`Size`) FROM %s WHERE " + DefCriterions.CRIT_CRAWLED_THIS_NODE
133  query = TMP_QUERY_TEMPLATE % tableName
134  res = queryCallback(query, Constants.SECONDARY_DB_ID)
135  if res is not None and len(res) > 0 and res[0] is not None and len(res[0]) > 0 and res[0][0] is not None:
136  localSize = str(res[0][0])
137  TMP_QUERY_TEMPLATE = "SELECT COUNT(*) FROM %s WHERE `errorMask` > 0 AND " + DefCriterions.CRIT_CRAWLED_THIS_NODE
138  query = TMP_QUERY_TEMPLATE % tableName
139  res = queryCallback(query, Constants.SECONDARY_DB_ID)
140  if res is not None and len(res) > 0 and res[0] is not None and len(res[0]) > 0 and res[0][0] is not None:
141  localErrors = str(res[0][0])
142  TMP_QUERY_TEMPLATE = "SELECT BIT_OR(`errorMask`) FROM %s WHERE " + DefCriterions.CRIT_CRAWLED_THIS_NODE
143  query = TMP_QUERY_TEMPLATE % tableName
144  res = queryCallback(query, Constants.SECONDARY_DB_ID)
145  if res is not None and len(res) > 0 and res[0] is not None and len(res[0]) > 0 and res[0][0] is not None:
146  localErrorMask = str(res[0][0])
147  TMP_QUERY_TEMPLATE = "SELECT AVG(`size`/`crawlingTime`*1000) FROM %s WHERE `crawlingTime` > 0 AND " + \
148  DefCriterions.CRIT_CRAWLED_THIS_NODE
149  query = TMP_QUERY_TEMPLATE % tableName
150  res = queryCallback(query, Constants.SECONDARY_DB_ID)
151  if res is not None and len(res) > 0 and res[0] is not None and len(res[0]) > 0 and res[0][0] is not None:
152  localSpeed = str(res[0][0])
153  query = QUERY_TEMPLATE % (localSize, localErrors, localErrorMask, localSpeed, siteId)
154  queryCallback(query, Constants.PRIMARY_DB_ID)
def updateDeletedURLs(self, siteId, queryCallback)
def updateNewURLs(self, siteId, queryCallback)
def commonSiteRecalculate(self, defaultCritName, fieldName, siteId, queryCallback)
def updateSiteCleanupFields(self, siteId, queryCallback)
def updateErrors(self, siteId, queryCallback)
def siteContentsRecalculate(self, siteId, queryCallback)
def commonRecalc(self, siteId, queryCallback, recalcType=dc.EventObjects.FieldRecalculatorObj.FULL_RECALC)
def siteResourcesRecalculate(self, siteId, queryCallback)
def updateCollectedURLs(self, siteId, queryCallback)