HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
URLPutTask.py
Go to the documentation of this file.
1 '''
2 @package: dc
3 @author scorp
4 @link: http://hierarchical-cluster-engine.com/
5 @copyright: Copyright © 2013-2014 IOIX Ukraine
6 @license: http://hierarchical-cluster-engine.com/license/
7 @since: 0.1
8 '''
9 
10 import base64
11 import dc.EventObjects
12 import dc_db.Constants as Constants
13 from dc_db.BaseTask import BaseTask
14 from dc_db.URLCleanupTask import URLCleanUpTask
15 from app.Utils import ExceptionLog
16 import app.Utils as Utils # pylint: disable=F0401
17 
18 logger = Utils.MPLogger().getLogger()
19 
20 
21 # #process URLPutTask event
23 
24 
25  # #constructor
26  #
27  # @param dBDataTask instance of DBDataTask module
28  def __init__(self, keyValueStorageDir, rawDataDir, dBDataTask):
29  super(URLPutTask, self).__init__()
30  self.uRLCleanUpTask = URLCleanUpTask(keyValueStorageDir, rawDataDir, dBDataTask)
31  self.dBDataTask = dBDataTask
32 
33 
34  # #make all necessary actions to get urls content data from storages
35  #
36  # @param urlPuts list of URLPut objects
37  # @param queryCallback function for queries execution
38  # @return list of urlPutResponses objects
39  def process(self, urlPuts, queryCallback): # pylint: disable=W0613
40  urlPutResponses = []
41  for urlPut in urlPuts:
42  urlsCount = 0
43  localMd5s = []
44  if urlPut.urlMd5 is None:
45  logger.debug(">>> urlPuts.urlMd5 is None, fetch by criterions")
46  localMd5s = self.uRLCleanUpTask.extractUrlByCriterions(urlPuts.siteId, False, urlPuts.criterions,
47  queryCallback, Constants.SECONDARY_DB_ID)
48  else:
49  localMd5s.append(urlPut.urlMd5)
50  logger.debug(">>> [URL_PUT] localUrls size = " + str(len(localMd5s)))
51 
52  if "data" in urlPut.putDict and urlPut.contentType != dc.EventObjects.Content.CONTENT_PROCESSOR_CONTENT:
53  try:
54  urlPut.putDict["data"] = base64.b64decode(urlPut.putDict["data"])
55  except TypeError:
56  pass
57  for localMd5 in localMd5s:
58  try:
59  urlPut.urlMd5 = localMd5
60  urlPutResponses.append(self.dBDataTask.process(urlPut, queryCallback))
61  urlsCount = urlsCount + 1
62  except Exception as err:
63  ExceptionLog.handler(logger, err, ">>> [URL_PUT] Exception:")
64  logger.debug(">>> [URL_PUT] Some Type Exception [LOOP] = " + str(type(err)))
65 
66  return urlPutResponses
def __init__(self, keyValueStorageDir, rawDataDir, dBDataTask)
Definition: URLPutTask.py:28
def process(self, urlPuts, queryCallback)
Definition: URLPutTask.py:39