HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
dc_db.URLUpdateTask.URLUpdateTask Class Reference
Inheritance diagram for dc_db.URLUpdateTask.URLUpdateTask:
Collaboration diagram for dc_db.URLUpdateTask.URLUpdateTask:

Public Member Functions

def __init__ (self, keyValueStorageDir, rawDataDir, dBDataTask)
 
def process (self, urlUpdates, queryCallback)
 
def statisticUpdate (self, urlUpdate, queryCallback)
 
def updateURL (self, urlUpdate, queryCallback)
 
def urlPutOperation (self, urlObject, urlPutObject, queryCallback)
 
def attributesUpdate (self, attributes, queryCallback)
 
- Public Member Functions inherited from dc_db.BaseTask.BaseTask
def isSiteExist (self, siteId, queryCallback, userId=None)
 
def generateCriterionSQL (self, criterions, additionWhere=None, siteId=None)
 
def fetchByCriterions (self, criterions, queryCallback)
 
def dbLock (self, mutexName, queryCallback, sleepTime=1, mutexLockTTL=Constants.DEFAULT_LOCK_TTL)
 
def dbUnlock (self, mutexName, queryCallback)
 
def createUrlsInsertQuery (self, siteId, localKeys, localValues)
 
def copyUrlsToDcUrls (self, siteId, queryCallback)
 
def statisticLogUpdate (self, localObj, urlMd5, siteId, status, queryCallback, isInsert=False)
 
def calculateMd5FormUrl (self, url, urlType, useNormilize=False)
 

Public Attributes

 urlPutTask
 

Additional Inherited Members

- Static Public Member Functions inherited from dc_db.BaseTask.BaseTask
def readValueFromSiteProp (siteId, propName, queryCallback, urlMd5=None)
 

Detailed Description

Definition at line 21 of file URLUpdateTask.py.

Constructor & Destructor Documentation

◆ __init__()

def dc_db.URLUpdateTask.URLUpdateTask.__init__ (   self,
  keyValueStorageDir,
  rawDataDir,
  dBDataTask 
)

Definition at line 25 of file URLUpdateTask.py.

25  def __init__(self, keyValueStorageDir, rawDataDir, dBDataTask):
26  super(URLUpdateTask, self).__init__()
27  self.urlPutTask = URLPutTask(keyValueStorageDir, rawDataDir, dBDataTask)
28 
29 
def __init__(self)
constructor
Definition: UIDGenerator.py:19

Member Function Documentation

◆ attributesUpdate()

def dc_db.URLUpdateTask.URLUpdateTask.attributesUpdate (   self,
  attributes,
  queryCallback 
)

Definition at line 120 of file URLUpdateTask.py.

120  def attributesUpdate(self, attributes, queryCallback):
121  logger.debug(">>> URLUpdateTask.attributesUpdate (len) = " + str(len(attributes)))
122  attrUpdateTask = AttrUpdateTask()
123  res = attrUpdateTask.process(attributes, queryCallback)
124  logger.debug(">>> URLUpdateTask.attributesUpdate (res) == " + str(res))
125 
Here is the caller graph for this function:

◆ process()

def dc_db.URLUpdateTask.URLUpdateTask.process (   self,
  urlUpdates,
  queryCallback 
)

Definition at line 35 of file URLUpdateTask.py.

35  def process(self, urlUpdates, queryCallback):
36  ret = GeneralResponse()
37  status = False
38  for urlUpdate in urlUpdates:
39  status = False
40  if urlUpdate.siteId == "":
41  urlUpdate.siteId = "0"
42  if not hasattr(urlUpdate, "urlMd5"):
43  urlUpdate.fillMD5(urlUpdate.url, urlUpdate.type)
44  if self.isSiteExist(urlUpdate.siteId, queryCallback):
45  self.statisticUpdate(urlUpdate, queryCallback)
46  status = self.updateURL(urlUpdate, queryCallback)
47  if status and urlUpdate.attributes is not None and len(urlUpdate.attributes) > 0:
48  self.attributesUpdate(urlUpdate.attributes, queryCallback)
49  ret.statuses.append(status)
50  if "urlPut" in urlUpdate.__dict__ and urlUpdate.urlPut is not None:
51  self.urlPutOperation(urlUpdate, urlUpdate.urlPut, queryCallback)
52  return ret
53 
54 
Here is the call graph for this function:
Here is the caller graph for this function:

◆ statisticUpdate()

def dc_db.URLUpdateTask.URLUpdateTask.statisticUpdate (   self,
  urlUpdate,
  queryCallback 
)

Definition at line 59 of file URLUpdateTask.py.

59  def statisticUpdate(self, urlUpdate, queryCallback):
60  prevStatus = None
61  SQL_SELECT_STATUS_TEMPLATE = "SELECT `Status` FROM `%s` WHERE `URLMD5` = '%s'"
62  tableName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % urlUpdate.siteId
63  query = SQL_SELECT_STATUS_TEMPLATE % (tableName, urlUpdate.urlMd5)
64  ret = queryCallback(query, Constants.SECONDARY_DB_ID)
65  if ret is not None and len(ret) > 0 and len(ret[0]) > 0 and ret[0][0] is not None:
66  prevStatus = int(ret[0][0])
67  StatisticLogManager.statisticUpdate(queryCallback, Constants.StatFreqConstants.FREQ_UPDATE, 1,
68  urlUpdate.siteId, urlUpdate.urlMd5)
69  StatisticLogManager.logUpdate(queryCallback, "LOG_UPDATE", urlUpdate, urlUpdate.siteId, urlUpdate.urlMd5)
70  if prevStatus is None or prevStatus != urlUpdate.status:
71  self.statisticLogUpdate(urlUpdate, urlUpdate.urlMd5, urlUpdate.siteId, urlUpdate.status, queryCallback)
72 
73 
Here is the call graph for this function:
Here is the caller graph for this function:

◆ updateURL()

def dc_db.URLUpdateTask.URLUpdateTask.updateURL (   self,
  urlUpdate,
  queryCallback 
)

Definition at line 78 of file URLUpdateTask.py.

78  def updateURL(self, urlUpdate, queryCallback):
79  ret = False
80  SQL_UPDATE_URLSITE_TEMPLATE = "UPDATE IGNORE `%s` SET %s"
81  if urlUpdate.eTag is not None:
82  urlUpdate.eTag = urlUpdate.eTag.strip("\"'")
83  fields, values = Constants.getFieldsValuesTuple(urlUpdate, Constants.URLTableDict)
84  fieldValueString = Constants.createFieldsValuesString(fields, values, Constants.urlExcludeList)
85  if fieldValueString and len(fieldValueString) > 0:
86  tableName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % urlUpdate.siteId
87  query = SQL_UPDATE_URLSITE_TEMPLATE % (tableName, fieldValueString)
88  additionWhere = None
89  if urlUpdate.urlMd5 is not None:
90  additionWhere = ("`URLMD5` = '%s'" % urlUpdate.urlMd5)
91  additionQueryStr = self.generateCriterionSQL(urlUpdate.criterions, additionWhere)
92  if len(additionQueryStr) > 0:
93  query += " "
94  query += additionQueryStr
95  queryCallback(query, Constants.SECONDARY_DB_ID)
96  ret = True
97  return ret
98 
99 
def updateURL(input_url, site)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ urlPutOperation()

def dc_db.URLUpdateTask.URLUpdateTask.urlPutOperation (   self,
  urlObject,
  urlPutObject,
  queryCallback 
)

Definition at line 105 of file URLUpdateTask.py.

105  def urlPutOperation(self, urlObject, urlPutObject, queryCallback):
106  if urlPutObject.siteId is None and urlObject.siteId is not None:
107  urlPutObject.siteId = urlObject.siteId
108  logger.debug(">>> URLPut.siteId is None and set to the = " + urlPutObject.siteId)
109  if urlPutObject.urlMd5 is None and urlObject.urlMd5 is not None:
110  urlPutObject.urlMd5 = urlObject.urlMd5
111  logger.debug(">>> URLPut.urlMd5 is None and set to the = " + urlPutObject.urlMd5)
112  logger.debug(">>> Call internal URLPut")
113  self.urlPutTask.process([urlPutObject], queryCallback)
114 
115 
Here is the call graph for this function:
Here is the caller graph for this function:

Member Data Documentation

◆ urlPutTask

dc_db.URLUpdateTask.URLUpdateTask.urlPutTask

Definition at line 27 of file URLUpdateTask.py.


The documentation for this class was generated from the following file: