HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
SiteStatusTask.py
Go to the documentation of this file.
1 '''
2 @package: dc
3 @author igor
4 @link: http://hierarchical-cluster-engine.com/
5 @copyright: Copyright © 2013-2014 IOIX Ukraine
6 @license: http://hierarchical-cluster-engine.com/license/
7 @since: 0.1
8 '''
9 
10 import dc_db.Constants as Constants
11 from dc_db.URLStatusTask import URLStatusTask
12 from dc_db.SiteTask import SiteTask
13 from dc_db import FieldRecalculator
14 import dc.EventObjects
15 import app.Utils as Utils # pylint: disable=F0401
16 
17 logger = Utils.MPLogger().getLogger()
18 
19 
20 # #process siteStatus event
21 class SiteStatusTask(object):
22 
23 
24  # #constructor
25  #
26  def __init__(self):
28 
29 
30  # #make all necessary actions to get site status
31  #
32  # @param siteStatus instance of SiteStatus object
33  # @param queryCallback function for queries execution
34  # @return instance of Site object
35  def process(self, siteStatus, queryCallback):
36  # recalculation site values
38  recalculator.commonRecalc(siteStatus.id, queryCallback)
39 
40  site = dc.EventObjects.Site("")
41  localSQLCause = ("`Id` = '%s'" % siteStatus.id)
42  query = Constants.SELECT_SQL_TEMPLATE % ("sites", localSQLCause)
43  res = queryCallback(query, Constants.PRIMARY_DB_ID, Constants.EXEC_NAME)
44  # add filling from other tables(filters, urls, ...)
45  if hasattr(res, '__iter__') and len(res) > 0:
46  for row in res:
47  for field in Constants.siteDict.keys():
48  if hasattr(site, field) and Constants.siteDict[field] in row:
49  setattr(site, field, row[Constants.siteDict[field]])
50  site.uDate = Constants.readDataTimeField("UDate", row)
51  site.tcDate = Constants.readDataTimeField("TcDate", row)
52  site.tcDateProcess = Constants.readDataTimeField("TcDateProcess", row)
53  site.cDate = Constants.readDataTimeField("CDate", row)
54  site.recrawlDate = Constants.readDataTimeField("RecrawlDate", row)
55  if SiteTask.FIELD_NAME_URLS not in siteStatus.excludeList:
56  site.urls = self.fillUrls(siteStatus, queryCallback)
57  else:
58  site.urls = None
59 
60  logger.debug('>>> siteStatus.excludeList: ' + str(siteStatus.excludeList))
61 
62  if SiteTask.FIELD_NAME_PROPERTIES not in siteStatus.excludeList:
63  site.properties = self.fillProperties(siteStatus, queryCallback)
64  # logger.debug('>>> site.properties: ' + str(site.properties))
65  else:
66  site.properties = None
67  # logger.debug('>>> site.properties: ' + str(site.properties))
68 
69  if SiteTask.FIELD_NAME_FILTERS not in siteStatus.excludeList:
70  site.filters = self.fillFilters(siteStatus, queryCallback)
71  else:
72  site.filters = None
73  else:
74  site.state = dc.EventObjects.Site.STATE_NOT_FOUND
75  return site
76 
77 
78  # #Extracts and returns urls, selected from databse
79  #
80  # @param siteStatus instance of SiteStatus object
81  # @param queryCallback function for queries execution
82  # @return urls list
83  def fillUrls(self, siteUpdate, queryCallback):
84  URL_SQL_QUERY = "SELECT * FROM `sites_urls` WHERE `Site_Id` = '%s'"
85  query = URL_SQL_QUERY % siteUpdate.id
86  res = queryCallback(query, Constants.PRIMARY_DB_ID, Constants.EXEC_NAME)
87  urls = self.urlStatusTask.fillUrlsList(res, dc.EventObjects.SiteURL, Constants.SiteURLTableDitct)
88  return urls
89 
90 
91  # #Extracts and returns filters, selected from databse
92  #
93  # @param siteStatus instance of SiteStatus object
94  # @param queryCallback function for queries execution
95  # @return filters list
96  def fillFilters(self, siteUpdate, queryCallback):
97  filters = []
98  localFilter = None
99  GET_FILTERS_SQL_QUERY = "SELECT * FROM `sites_filters` WHERE Site_Id = '%s'"
100  query = GET_FILTERS_SQL_QUERY % siteUpdate.id
101  res = queryCallback(query, Constants.PRIMARY_DB_ID, Constants.EXEC_NAME)
102  if hasattr(res, '__iter__'):
103  for row in res:
104  if Constants.filterDict["siteId"] in row and row[Constants.filterDict["siteId"]] is not None and \
105  Constants.filterDict["pattern"] in row and row[Constants.filterDict["pattern"]] is not None:
106  localFilter = dc.EventObjects.SiteFilter(row[Constants.filterDict["siteId"]],
107  row[Constants.filterDict["pattern"]])
108  for elem in Constants.filterDict.keys():
109  if Constants.filterDict[elem] in row and hasattr(localFilter, elem):
110  setattr(localFilter, elem, row[Constants.filterDict[elem]])
111  localFilter.uDate = Constants.readDataTimeField("UDate", row)
112  localFilter.cDate = Constants.readDataTimeField("CDate", row)
113  filters.append(localFilter)
114  else:
115  logger.error("fillFilters return None")
116  return filters
117 
118 
119  # #Extracts and returns properties, selected from databse
120  #
121  # @param siteStatus instance of SiteStatus object
122  # @param queryCallback function for queries execution
123  # @return properties dict
124  def fillProperties(self, siteUpdate, queryCallback):
125  properties = []
126  GET_PROPERTIES_SQL_QUERY = "SELECT * FROM `sites_properties` WHERE Site_Id = '%s'"
127  query = GET_PROPERTIES_SQL_QUERY % siteUpdate.id
128  res = queryCallback(query, Constants.PRIMARY_DB_ID, Constants.EXEC_NAME)
129  if hasattr(res, '__iter__'):
130  for row in res:
131  prop = {}
132  for elem in Constants.propDict.keys():
133  if Constants.propDict[elem] in row:
134  prop[elem] = row[Constants.propDict[elem]]
135  if len(prop.keys()) > 0:
136  localDate = Constants.readDataTimeField("UDate", row)
137  if localDate is not None:
138  prop["uDate"] = localDate
139  localDate = Constants.readDataTimeField("CDate", row)
140  if localDate is not None:
141  prop["cDate"] = localDate
142  properties.append(prop)
143  else:
144  logger.error("fillProperties return None")
145  return properties
def fillProperties(self, siteUpdate, queryCallback)
def fillUrls(self, siteUpdate, queryCallback)
def process(self, siteStatus, queryCallback)
def fillFilters(self, siteUpdate, queryCallback)