HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
dc_db.URLPurgeTask.URLPurgeTask Class Reference
Inheritance diagram for dc_db.URLPurgeTask.URLPurgeTask:
Collaboration diagram for dc_db.URLPurgeTask.URLPurgeTask:

Public Member Functions

def __init__ (self, keyValueStorageDir, rawDataDir, dBDataTask)
 
def isAvailableUrls (self, urlPurge, tbName, queryCallback)
 
def getAdditionPurges (self, urlPurge, siteLimits, queryCallback)
 
def isDeleteTableExist (self, siteId, queryCallback)
 
def process (self, urlPurges, queryCallback)
 
def deleteUrlDBField (self, urlPurge, queryCallback)
 
def checkUrlInDcUrls (self, urlPurge, queryCallback)
 
- Public Member Functions inherited from dc_db.BaseTask.BaseTask
def isSiteExist (self, siteId, queryCallback, userId=None)
 
def generateCriterionSQL (self, criterions, additionWhere=None, siteId=None)
 
def fetchByCriterions (self, criterions, queryCallback)
 
def dbLock (self, mutexName, queryCallback, sleepTime=1, mutexLockTTL=Constants.DEFAULT_LOCK_TTL)
 
def dbUnlock (self, mutexName, queryCallback)
 
def createUrlsInsertQuery (self, siteId, localKeys, localValues)
 
def copyUrlsToDcUrls (self, siteId, queryCallback)
 
def statisticLogUpdate (self, localObj, urlMd5, siteId, status, queryCallback, isInsert=False)
 
def calculateMd5FormUrl (self, url, urlType, useNormilize=False)
 

Public Attributes

 uRLCleanUpTask
 
 dBDataTask
 
 urlMd5
 

Additional Inherited Members

- Static Public Member Functions inherited from dc_db.BaseTask.BaseTask
def readValueFromSiteProp (siteId, propName, queryCallback, urlMd5=None)
 

Detailed Description

Definition at line 24 of file URLPurgeTask.py.

Constructor & Destructor Documentation

◆ __init__()

def dc_db.URLPurgeTask.URLPurgeTask.__init__ (   self,
  keyValueStorageDir,
  rawDataDir,
  dBDataTask 
)

Definition at line 30 of file URLPurgeTask.py.

30  def __init__(self, keyValueStorageDir, rawDataDir, dBDataTask):
31  super(URLPurgeTask, self).__init__()
32  self.uRLCleanUpTask = URLCleanUpTask(keyValueStorageDir, rawDataDir, dBDataTask)
33  self.dBDataTask = dBDataTask
34  self.urlMd5 = None
35 
36 
def __init__(self)
constructor
Definition: UIDGenerator.py:19

Member Function Documentation

◆ checkUrlInDcUrls()

def dc_db.URLPurgeTask.URLPurgeTask.checkUrlInDcUrls (   self,
  urlPurge,
  queryCallback 
)

Definition at line 185 of file URLPurgeTask.py.

185  def checkUrlInDcUrls(self, urlPurge, queryCallback):
186  ret = False
187  SQL_DELETE_TEMPLATE = "SELECT url FROM %s WHERE `UrlMd5` = '%s' AND `tcDate` NOT IN " + \
188  "(SELECT `tcDate` FROM dc_urls_deleted.%s WHERE `UrlMd5` = '%s') LIMIT 1"
189  dbName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % urlPurge.siteId
190  if urlPurge.urlType == dc.EventObjects.URLStatus.URL_TYPE_URL:
191  urlMd5 = hashlib.md5(urlPurge.url).hexdigest()
192  else:
193  urlMd5 = urlPurge.url
194  query = SQL_DELETE_TEMPLATE % (dbName, urlMd5, dbName, urlMd5)
195  res = queryCallback(query, Constants.SECONDARY_DB_ID)
196  if res is not None and len(res) > 0:
197  ret = True
198  logger.debug(">>> [PURGE] checkUrlInDcUrls 'UrlMd5' = " + urlMd5)
199  if ret:
200  logger.debug(" has record in dc_urls")
201  else:
202  logger.debug(" DOESN'T has record in dc_urls")
203  return ret
204 
Here is the caller graph for this function:

◆ deleteUrlDBField()

def dc_db.URLPurgeTask.URLPurgeTask.deleteUrlDBField (   self,
  urlPurge,
  queryCallback 
)

Definition at line 170 of file URLPurgeTask.py.

170  def deleteUrlDBField(self, urlPurge, queryCallback):
171  SQL_DELETE_TEMPLATE = "DELETE FROM %s WHERE `UrlMd5` = '%s'"
172  dbName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % urlPurge.siteId
173  if urlPurge.urlType == dc.EventObjects.URLStatus.URL_TYPE_URL:
174  self.urlMd5 = hashlib.md5(urlPurge.url).hexdigest()
175  else:
176  self.urlMd5 = urlPurge.url
177  query = SQL_DELETE_TEMPLATE % (dbName, self.urlMd5)
178  queryCallback(query, Constants.FOURTH_DB_ID)
179 
180 
Here is the caller graph for this function:

◆ getAdditionPurges()

def dc_db.URLPurgeTask.URLPurgeTask.getAdditionPurges (   self,
  urlPurge,
  siteLimits,
  queryCallback 
)

Definition at line 63 of file URLPurgeTask.py.

63  def getAdditionPurges(self, urlPurge, siteLimits, queryCallback):
64  ret = []
65  if siteLimits is not None and hasattr(siteLimits, '__iter__') and len(siteLimits) >= 2 and int(siteLimits[0]) >= 0:
66  query = "SHOW TABLES"
67  res = queryCallback(query, Constants.FOURTH_DB_ID)
68  if res is not None:
69  startLimit = int(siteLimits[0])
70  countLimit = int(siteLimits[1])
71  if countLimit == dc.EventObjects.URLPurge.ALL_SITES:
72  countLimit = len(res)
73  i = startLimit
74  for num in xrange(i, len(res)):
75  if len(ret) >= countLimit:
76  break
77  if res[num] is not None and res[num][0] is not None and \
78  self.isAvailableUrls(urlPurge, res[num][0], queryCallback):
79  localPurge = copy.deepcopy(urlPurge)
80  localPurge.siteId = res[num][0][5:]
81  localPurge.url = None
82  ret.append(localPurge)
83  else:
84  logger.error(">>> siteLimits field must be type of [x, x] and not None")
85  return ret
86 
87 
Here is the call graph for this function:
Here is the caller graph for this function:

◆ isAvailableUrls()

def dc_db.URLPurgeTask.URLPurgeTask.isAvailableUrls (   self,
  urlPurge,
  tbName,
  queryCallback 
)

Definition at line 43 of file URLPurgeTask.py.

43  def isAvailableUrls(self, urlPurge, tbName, queryCallback):
44  ret = False
45  localUrls = self.uRLCleanUpTask.extractUrlByCriterions(tbName[5:], True, urlPurge.criterions,
46  queryCallback, Constants.FOURTH_DB_ID)
47  if localUrls is not None and len(localUrls) > 0:
48  ret = True
49  if ret:
50  logger.debug(">>> Has urls by criterions, bdName = " + tbName)
51  else:
52  logger.debug(">>> Not content urls by criterions, bdName = " + tbName)
53  return ret
54 
55 
56 
Here is the caller graph for this function:

◆ isDeleteTableExist()

def dc_db.URLPurgeTask.URLPurgeTask.isDeleteTableExist (   self,
  siteId,
  queryCallback 
)

Definition at line 93 of file URLPurgeTask.py.

93  def isDeleteTableExist(self, siteId, queryCallback):
94  ret = False
95  query = "SHOW TABLES"
96  dbName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % siteId
97  res = queryCallback(query, Constants.FOURTH_DB_ID)
98  logger.debug(">>> Delete tables = " + str(res))
99  if res is not None and hasattr(res, '__iter__'):
100  for table in res:
101  if table is not None and hasattr(table, '__iter__') and dbName in table:
102  ret = True
103  break
104  return ret
105 
106 
Here is the caller graph for this function:

◆ process()

def dc_db.URLPurgeTask.URLPurgeTask.process (   self,
  urlPurges,
  queryCallback 
)

Definition at line 112 of file URLPurgeTask.py.

112  def process(self, urlPurges, queryCallback):
113  generalResponse = GeneralResponse()
114 
115  newPurges = copy.deepcopy(urlPurges)
116  for urlPurge in urlPurges:
117  if urlPurge.siteId is None:
118  logger.debug(">>> Site Limits = " + str(urlPurge.siteLimits))
119  newPurges = newPurges + self.getAdditionPurges(urlPurge, urlPurge.siteLimits, queryCallback)
120 
121  if len(urlPurges) != len(newPurges):
122  logger.debug(">>> Purges reassign")
123  urlPurges = newPurges
124 
125  for urlPurge in urlPurges:
126  # @todo add more complex case
127  urlsCount = 0
128  if urlPurge.siteId == "":
129  urlPurge.siteId = "0"
130  if self.isDeleteTableExist(urlPurge.siteId, queryCallback):
131  try:
132  localUrls = []
133  if urlPurge.url is None:
134  isUrlExtract = False
135  logger.debug(">>> UrlType = " + str(urlPurge.urlType))
136  if urlPurge.urlType == dc.EventObjects.URLStatus.URL_TYPE_URL:
137  isUrlExtract = True
138  localUrls = self.uRLCleanUpTask.extractUrlByCriterions(urlPurge.siteId, isUrlExtract, urlPurge.criterions,
139  queryCallback, Constants.FOURTH_DB_ID)
140  else:
141  localUrls.append(urlPurge.url)
142  logger.debug(">>> [PURGE] localUrls size = " + str(len(localUrls)))
143  for localUrl in localUrls:
144  try:
145  urlPurge.url = localUrl
146  if not self.checkUrlInDcUrls(urlPurge, queryCallback):
147  self.uRLCleanUpTask.deleteFromDataStorage(urlPurge, queryCallback)
148  self.uRLCleanUpTask.deleteFromRawStorage(urlPurge)
149  self.deleteUrlDBField(urlPurge, queryCallback)
150  if self.urlMd5 is not None:
151  StatisticLogManager.statisticUpdate(queryCallback, Constants.StatFreqConstants.FREQ_PURGED_STATE, 1,
152  urlPurge.siteId, self.urlMd5)
153  urlsCount = urlsCount + 1
154  except Exception as ex:
155  logger.debug(">>> [PURGE] Some Type Exception [LOOP] = " + str(type(ex)) + " " + str(ex))
156  except Exception as ex:
157  logger.debug(">>> [PURGE] Some Type Exception = " + str(type(ex)) + " " + str(ex))
158  else:
159  logger.debug(">>> [PURGE] Table not found, SiteId = " + str(urlPurge.siteId))
160 
161  generalResponse.statuses.append([urlPurge.siteId, urlsCount])
162  logger.debug(">>> [PURGE] Rsult = " + str([urlPurge.siteId, urlsCount]))
163  return generalResponse
164 
165 
Here is the call graph for this function:
Here is the caller graph for this function:

Member Data Documentation

◆ dBDataTask

dc_db.URLPurgeTask.URLPurgeTask.dBDataTask

Definition at line 33 of file URLPurgeTask.py.

◆ uRLCleanUpTask

dc_db.URLPurgeTask.URLPurgeTask.uRLCleanUpTask

Definition at line 32 of file URLPurgeTask.py.

◆ urlMd5

dc_db.URLPurgeTask.URLPurgeTask.urlMd5

Definition at line 34 of file URLPurgeTask.py.


The documentation for this class was generated from the following file: