HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
dc_db.URLStatsTask.URLStatsTask Class Reference
Inheritance diagram for dc_db.URLStatsTask.URLStatsTask:
Collaboration diagram for dc_db.URLStatsTask.URLStatsTask:

Public Member Functions

def __init__ (self, keyValueStorageDir, rawDataDir, dBDataTask)
 
def process (self, urlStatses, queryCallback)
 
def fetchStatsFromDB (self, urlStats, queryCallback)
 
- Public Member Functions inherited from dc_db.BaseTask.BaseTask
def isSiteExist (self, siteId, queryCallback, userId=None)
 
def generateCriterionSQL (self, criterions, additionWhere=None, siteId=None)
 
def fetchByCriterions (self, criterions, queryCallback)
 
def dbLock (self, mutexName, queryCallback, sleepTime=1, mutexLockTTL=Constants.DEFAULT_LOCK_TTL)
 
def dbUnlock (self, mutexName, queryCallback)
 
def createUrlsInsertQuery (self, siteId, localKeys, localValues)
 
def copyUrlsToDcUrls (self, siteId, queryCallback)
 
def statisticLogUpdate (self, localObj, urlMd5, siteId, status, queryCallback, isInsert=False)
 
def calculateMd5FormUrl (self, url, urlType, useNormilize=False)
 

Public Attributes

 uRLCleanUpTask
 

Static Public Attributes

string SQL_STATS_TEMPLATE = "SELECT * FROM %s WHERE `URLMd5`='%s'"
 

Additional Inherited Members

- Static Public Member Functions inherited from dc_db.BaseTask.BaseTask
def readValueFromSiteProp (siteId, propName, queryCallback, urlMd5=None)
 

Detailed Description

Definition at line 20 of file URLStatsTask.py.

Constructor & Destructor Documentation

◆ __init__()

def dc_db.URLStatsTask.URLStatsTask.__init__ (   self,
  keyValueStorageDir,
  rawDataDir,
  dBDataTask 
)

Definition at line 26 of file URLStatsTask.py.

26  def __init__(self, keyValueStorageDir, rawDataDir, dBDataTask):
27  super(URLStatsTask, self).__init__()
28  self.uRLCleanUpTask = URLCleanUpTask(keyValueStorageDir, rawDataDir, dBDataTask)
29 
30 
def __init__(self)
constructor
Definition: UIDGenerator.py:19

Member Function Documentation

◆ fetchStatsFromDB()

def dc_db.URLStatsTask.URLStatsTask.fetchStatsFromDB (   self,
  urlStats,
  queryCallback 
)

Definition at line 112 of file URLStatsTask.py.

112  def fetchStatsFromDB(self, urlStats, queryCallback):
113  tableName = Constants.DC_FREQ_TABLE_NAME_TEMPLATE % urlStats.siteId
114  query = self.SQL_STATS_TEMPLATE % (tableName, urlStats.urlMd5)
115  ret = queryCallback(query, Constants.STAT_DB_ID, Constants.EXEC_NAME)
116  if ret is not None:
117  for elem in ret:
118  if "CDate" in elem:
119  elem["CDate"] = str(elem["CDate"])
120  if "MDate" in elem:
121  elem["MDate"] = str(elem["MDate"])
122  return ret
123 
Here is the caller graph for this function:

◆ process()

def dc_db.URLStatsTask.URLStatsTask.process (   self,
  urlStatses,
  queryCallback 
)

Definition at line 36 of file URLStatsTask.py.

36  def process(self, urlStatses, queryCallback):
37  uRLStatsResponses = []
38  for urlStats in urlStatses:
39  uRLStatsResponse = None
40  localMd5s = []
41  if urlStats is not None:
42  if urlStats.urlMd5 is None:
43  if urlStats.urlCriterions is not None and len(urlStats.urlCriterions) > 0:
44  localMd5s = self.uRLCleanUpTask.extractUrlByCriterions(urlStats.siteId, False,
45  urlStats.urlCriterions, queryCallback)
46  if urlStats.statsCriterions is not None:
47  statsMd5s = []
48  if urlStats.urlCriterions is None or len(urlStats.urlCriterions) == 0:
49  statsMd5s = self.uRLCleanUpTask.extractUrlByCriterions(urlStats.siteId,
50  False,
51  urlStats.statsCriterions, queryCallback,
52  Constants.STAT_DB_ID,
53  Constants.DC_FREQ_TABLE_NAME_TEMPLATE)
54  else:
55  SQL_WHERE_TMPL = "`UrlMd5` = '%s'"
56  statsCriterionCopy = copy.deepcopy(urlStats.statsCriterions)
57  for localMd5 in localMd5s:
58  urlStats.statsCriterions = copy.deepcopy(statsCriterionCopy)
59  if dc.EventObjects.URLFetch.CRITERION_WHERE in urlStats.statsCriterions \
60  and urlStats.statsCriterions[dc.EventObjects.URLFetch.CRITERION_WHERE] is not None:
61  urlStats.statsCriterions[dc.EventObjects.URLFetch.CRITERION_WHERE] = ' AND ' + \
62  (SQL_WHERE_TMPL % localMd5)
63  else:
64  urlStats.statsCriterions[dc.EventObjects.URLFetch.CRITERION_WHERE] = (SQL_WHERE_TMPL % localMd5)
65  statsMd5s += self.uRLCleanUpTask.extractUrlByCriterions(urlStats.siteId,
66  False,
67  urlStats.statsCriterions,
68  queryCallback,
69  Constants.STAT_DB_ID,
70  Constants.DC_FREQ_TABLE_NAME_TEMPLATE)
71  localMd5s = statsMd5s
72 
73 # '''
74 # if urlStats.urlCriterions is not None:
75 # urlsMd5s = self.uRLCleanUpTask.extractUrlByCriterions(urlStats.siteId, False,
76 # urlStats.urlCriterions, queryCallback)
77 #
78 # if urlStats.statsCriterions is not None:
79 # statsMd5s = self.uRLCleanUpTask.extractUrlByCriterions(urlStats.siteId, False,
80 # urlStats.statsCriterions, queryCallback, Constants.STAT_DB_ID,
81 # Constants.DC_FREQ_TABLE_NAME_TEMPLATE)
82 # if len(statsMd5s) > 0 and len(urlsMd5s) > 0:
83 # localMd5s = [x for x in statsMd5s if x in urlsMd5s]
84 # elif len(statsMd5s) > 0:
85 # localMd5s = statsMd5s
86 # elif len(urlsMd5s) > 0:
87 # localMd5s = urlsMd5s
88 # '''
89  else:
90  localMd5s.append(urlStats.urlMd5)
91  logger.debug(">>> [URLStatsTask] localUrls size = " + str(len(localMd5s)))
92  for localMd5 in localMd5s:
93  try:
94  urlStats.urlMd5 = localMd5
95  res = self.fetchStatsFromDB(urlStats, queryCallback)
96  if uRLStatsResponse is None:
97  uRLStatsResponse = dc.EventObjects.URLStatsResponse([], urlStats.siteId)
98  if res is not None and len(res) > 0:
99  uRLStatsResponse.freqRows.extend(res)
100  except Exception as ex:
101  logger.debug(">>> [URLStatsTask] Some Type Exception = " + str(type(ex)) + " " + str(ex))
102  uRLStatsResponses.append(uRLStatsResponse)
103 
104  return uRLStatsResponses
105 
106 
Here is the call graph for this function:
Here is the caller graph for this function:

Member Data Documentation

◆ SQL_STATS_TEMPLATE

string dc_db.URLStatsTask.URLStatsTask.SQL_STATS_TEMPLATE = "SELECT * FROM %s WHERE `URLMd5`='%s'"
static

Definition at line 22 of file URLStatsTask.py.

◆ uRLCleanUpTask

dc_db.URLStatsTask.URLStatsTask.uRLCleanUpTask

Definition at line 28 of file URLStatsTask.py.


The documentation for this class was generated from the following file: