HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
URLVerifyTask.py
Go to the documentation of this file.
1 '''
2 @package: dc
3 @author igor
4 @link: http://hierarchical-cluster-engine.com/
5 @copyright: Copyright © 2013-2014 IOIX Ukraine
6 @license: http://hierarchical-cluster-engine.com/license/
7 @since: 0.1
8 '''
9 
10 import hashlib
11 import dc.EventObjects
12 from dc_db.BaseTask import BaseTask
13 from dc_db.URLFetchTask import URLFetchTask
14 import dc_db.Constants as Constants
15 import app.Utils as Utils # pylint: disable=F0401
16 
17 logger = Utils.MPLogger().getLogger()
18 
19 # #process urlFetch event
21 
22 
23  # #make all necessary actions to url verifieng
24  #
25  # @param urlVerifies list of URLVerify objects
26  # @param queryCallback function for queries execution
27  # @param bdResolveFunc pointer to resolve DB external function
28  # @return list of URL objects
29  def process(self, urlVerifies, queryCallback, bdResolveFunc):
30  urls = []
31  for urlVerify in urlVerifies:
32  urls.append(self.fetchUrl(urlVerify, queryCallback, bdResolveFunc))
33  return urls
34 
35 
36  # #Verify current url in DB
37  #
38  # @param urlVerify instance of URLVerify object
39  # @param queryCallback function for queries execution
40  # @param bdResolveFunc pointer to resolve DB external function
41  # @return list of URL objects
42  def fetchUrl(self, urlVerify, queryCallback, bdResolveFunc):
43  url = None
44  if urlVerify.dbName is not None:
45  dbIndex = bdResolveFunc(urlVerify.dbName)
46  if dbIndex is not None:
47  tableName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % urlVerify.siteId
48  query = Constants.SELECT_SQL_TEMPLATE_SIMPLE % ("*", tableName)
49  if urlVerify.urlType == dc.EventObjects.URLStatus.URL_TYPE_URL:
50  localUrlMd5 = hashlib.md5(urlVerify.url).hexdigest()
51  else:
52  localUrlMd5 = urlVerify.url
53  additionWere = ("`UrlMd5`= '%s'" % localUrlMd5)
54  if urlVerify.criterions is not None:
55  additionQueryStr = self.generateCriterionSQL(urlVerify.criterions, additionWere)
56  else:
57  additionQueryStr = self.generateCriterionSQL({}, additionWere)
58  if additionQueryStr is not None and len(additionQueryStr) > 0:
59  query += " "
60  query += additionQueryStr
61  res = queryCallback(query, dbIndex, Constants.EXEC_NAME)
62  if res is not None and len(res) > 0:
63  url = URLFetchTask.fillUrlObj(res[0])
64  else:
65  logger.error("Error: there isn't %s database connection", str(url.dbName))
66  return url
def fetchUrl(self, urlVerify, queryCallback, bdResolveFunc)
def process(self, urlVerifies, queryCallback, bdResolveFunc)
def generateCriterionSQL(self, criterions, additionWhere=None, siteId=None)
Definition: BaseTask.py:46