4 @link: http://hierarchical-cluster-engine.com/ 5 @copyright: Copyright © 2013-2014 IOIX Ukraine 6 @license: http://hierarchical-cluster-engine.com/license/ 30 def __init__(self, keyValueStorageDir, rawDataDir, dBDataTask):
45 localUrls = self.
uRLCleanUpTask.extractUrlByCriterions(tbName[5:],
True, urlPurge.criterions,
46 queryCallback, Constants.FOURTH_DB_ID)
47 if localUrls
is not None and len(localUrls) > 0:
50 logger.debug(
">>> Has urls by criterions, bdName = " + tbName)
52 logger.debug(
">>> Not content urls by criterions, bdName = " + tbName)
65 if siteLimits
is not None and hasattr(siteLimits,
'__iter__')
and len(siteLimits) >= 2
and int(siteLimits[0]) >= 0:
67 res = queryCallback(query, Constants.FOURTH_DB_ID)
69 startLimit = int(siteLimits[0])
70 countLimit = int(siteLimits[1])
71 if countLimit == dc.EventObjects.URLPurge.ALL_SITES:
74 for num
in xrange(i, len(res)):
75 if len(ret) >= countLimit:
77 if res[num]
is not None and res[num][0]
is not None and \
79 localPurge = copy.deepcopy(urlPurge)
80 localPurge.siteId = res[num][0][5:]
82 ret.append(localPurge)
84 logger.error(
">>> siteLimits field must be type of [x, x] and not None")
96 dbName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % siteId
97 res = queryCallback(query, Constants.FOURTH_DB_ID)
98 logger.debug(
">>> Delete tables = " + str(res))
99 if res
is not None and hasattr(res,
'__iter__'):
101 if table
is not None and hasattr(table,
'__iter__')
and dbName
in table:
115 newPurges = copy.deepcopy(urlPurges)
116 for urlPurge
in urlPurges:
117 if urlPurge.siteId
is None:
118 logger.debug(
">>> Site Limits = " + str(urlPurge.siteLimits))
119 newPurges = newPurges + self.
getAdditionPurges(urlPurge, urlPurge.siteLimits, queryCallback)
121 if len(urlPurges) != len(newPurges):
122 logger.debug(
">>> Purges reassign")
123 urlPurges = newPurges
125 for urlPurge
in urlPurges:
128 if urlPurge.siteId ==
"":
129 urlPurge.siteId =
"0" 133 if urlPurge.url
is None:
135 logger.debug(
">>> UrlType = " + str(urlPurge.urlType))
136 if urlPurge.urlType == dc.EventObjects.URLStatus.URL_TYPE_URL:
138 localUrls = self.
uRLCleanUpTask.extractUrlByCriterions(urlPurge.siteId, isUrlExtract, urlPurge.criterions,
139 queryCallback, Constants.FOURTH_DB_ID)
141 localUrls.append(urlPurge.url)
142 logger.debug(
">>> [PURGE] localUrls size = " + str(len(localUrls)))
143 for localUrl
in localUrls:
145 urlPurge.url = localUrl
147 self.
uRLCleanUpTask.deleteFromDataStorage(urlPurge, queryCallback)
150 if self.
urlMd5 is not None:
151 StatisticLogManager.statisticUpdate(queryCallback, Constants.StatFreqConstants.FREQ_PURGED_STATE, 1,
152 urlPurge.siteId, self.
urlMd5)
153 urlsCount = urlsCount + 1
154 except Exception
as ex:
155 logger.debug(
">>> [PURGE] Some Type Exception [LOOP] = " + str(
type(ex)) +
" " + str(ex))
156 except Exception
as ex:
157 logger.debug(
">>> [PURGE] Some Type Exception = " + str(
type(ex)) +
" " + str(ex))
159 logger.debug(
">>> [PURGE] Table not found, SiteId = " + str(urlPurge.siteId))
161 generalResponse.statuses.append([urlPurge.siteId, urlsCount])
162 logger.debug(
">>> [PURGE] Rsult = " + str([urlPurge.siteId, urlsCount]))
163 return generalResponse
171 SQL_DELETE_TEMPLATE =
"DELETE FROM %s WHERE `UrlMd5` = '%s'" 172 dbName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % urlPurge.siteId
173 if urlPurge.urlType == dc.EventObjects.URLStatus.URL_TYPE_URL:
174 self.
urlMd5 = hashlib.md5(urlPurge.url).hexdigest()
176 self.
urlMd5 = urlPurge.url
177 query = SQL_DELETE_TEMPLATE % (dbName, self.
urlMd5)
178 queryCallback(query, Constants.FOURTH_DB_ID)
187 SQL_DELETE_TEMPLATE =
"SELECT url FROM %s WHERE `UrlMd5` = '%s' AND `tcDate` NOT IN " + \
188 "(SELECT `tcDate` FROM dc_urls_deleted.%s WHERE `UrlMd5` = '%s') LIMIT 1" 189 dbName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % urlPurge.siteId
190 if urlPurge.urlType == dc.EventObjects.URLStatus.URL_TYPE_URL:
191 urlMd5 = hashlib.md5(urlPurge.url).hexdigest()
193 urlMd5 = urlPurge.url
194 query = SQL_DELETE_TEMPLATE % (dbName, urlMd5, dbName, urlMd5)
195 res = queryCallback(query, Constants.SECONDARY_DB_ID)
196 if res
is not None and len(res) > 0:
198 logger.debug(
">>> [PURGE] checkUrlInDcUrls 'UrlMd5' = " + urlMd5)
200 logger.debug(
" has record in dc_urls")
202 logger.debug(
" DOESN'T has record in dc_urls")
def isAvailableUrls(self, urlPurge, tbName, queryCallback)
def deleteUrlDBField(self, urlPurge, queryCallback)
GeneralResponse event object, represents general state response for multipurpose usage.
def __init__(self, keyValueStorageDir, rawDataDir, dBDataTask)
def isDeleteTableExist(self, siteId, queryCallback)
def checkUrlInDcUrls(self, urlPurge, queryCallback)
def process(self, urlPurges, queryCallback)
def getAdditionPurges(self, urlPurge, siteLimits, queryCallback)