HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
URLHistoryTask.py
Go to the documentation of this file.
1 '''
2 @package: dc
3 @author scorp
4 @link: http://hierarchical-cluster-engine.com/
5 @copyright: Copyright © 2013-2014 IOIX Ukraine
6 @license: http://hierarchical-cluster-engine.com/license/
7 @since: 0.1
8 '''
9 
10 import dc.EventObjects
11 import dc_db.Constants as Constants
12 from dc_db.BaseTask import BaseTask
13 from dc_db.URLCleanupTask import URLCleanUpTask
14 import app.Utils as Utils # pylint: disable=F0401
15 
16 logger = Utils.MPLogger().getLogger()
17 
18 # #process URLHistoryTask task
20 
21  SQL_LOG_TEMPLATE = "SELECT * FROM %s WHERE `URLMd5`='%s'"
22  SQL_LOG_TEMPLATE_SHORT = "SELECT * FROM %s"
23 
24  # #constructor
25  #
26  def __init__(self, keyValueStorageDir, rawDataDir, dBDataTask):
27  super(URLHistoryTask, self).__init__()
28  self.uRLCleanUpTask = URLCleanUpTask(keyValueStorageDir, rawDataDir, dBDataTask)
29 
30 
31  # #process - main class's execution point.
32  #
33  # @param urlHistories incoming urlHistories element (list of urlHistory)
34  # @param queryCallback function for queries execution
35  # @return uRLHistoryResponses element
36  def process(self, urlHistories, queryCallback):
37  uRLHistoryResponses = []
38  for urlHistory in urlHistories:
39  uRLHistoryResponse = None
40  if urlHistory is not None:
41  localMd5s = []
42  if urlHistory.urlMd5 is None:
43  if urlHistory.urlCriterions is not None:
44  localMd5s = self.uRLCleanUpTask.extractUrlByCriterions(urlHistory.siteId, False,
45  urlHistory.urlCriterions, queryCallback)
46  else:
47  localMd5s.append(urlHistory.urlMd5)
48  logger.debug(">>> [URLHistoryTask] localUrls size = " + str(len(localMd5s)))
49  for localMd5 in localMd5s:
50  try:
51  urlHistory.urlMd5 = localMd5
52  res = self.fetchLogsFromDB(urlHistory, queryCallback, urlHistory.logCriterions)
53  if uRLHistoryResponse is None:
54  uRLHistoryResponse = dc.EventObjects.URLHistoryResponse([], urlHistory.siteId)
55  if res is not None and len(res) > 0:
56  uRLHistoryResponse.logRows.extend(res)
57  except Exception as ex:
58  logger.debug(">>> [URLHistoryTask] Some Type Exception = " + str(type(ex)) + " " + str(ex))
59  uRLHistoryResponses.append(uRLHistoryResponse)
60  return uRLHistoryResponses
61 
62 
63  # #fetchLogsFromDB - method makes SQL response for fetching log data
64  #
65  # @param urlHistory element of urlHistory object
66  # @param queryCallback function for queries execution
67  # @param logCriterions addition criterion for sql request
68  # @return SQL response element
69  def fetchLogsFromDB(self, urlHistory, queryCallback, logCriterions=None):
70  tableName = Constants.DC_LOG_TABLE_NAME_TEMPLATE % urlHistory.siteId
71  if logCriterions is None:
72  query = self.SQL_LOG_TEMPLATE % (tableName, urlHistory.urlMd5)
73  else:
74  additionWere = "`URLMd5` = '%s'"
75  additionWere = (additionWere % urlHistory.urlMd5)
76  query = self.SQL_LOG_TEMPLATE_SHORT % tableName
77  query += self.generateCriterionSQL(logCriterions, additionWere)
78  ret = queryCallback(query, Constants.LOG_DB_ID, Constants.EXEC_NAME)
79  if ret is not None:
80  for elem in ret:
81  if "CDate" in elem:
82  elem["CDate"] = str(elem["CDate"])
83  if "ODate" in elem:
84  elem["ODate"] = str(elem["ODate"])
85  return ret
def process(self, urlHistories, queryCallback)
def fetchLogsFromDB(self, urlHistory, queryCallback, logCriterions=None)
def __init__(self, keyValueStorageDir, rawDataDir, dBDataTask)
def generateCriterionSQL(self, criterions, additionWhere=None, siteId=None)
Definition: BaseTask.py:46