4 @link: http://hierarchical-cluster-engine.com/ 5 @copyright: Copyright © 2013-2014 IOIX Ukraine 6 @license: http://hierarchical-cluster-engine.com/license/ 14 from dc_db
import FieldRecalculator
15 from dc_db
import Constants
36 def __init__(self, keyValueStorageDir, rawDataDir, dBDataTask):
37 super(URLCleanUpTask, self).
__init__()
50 def process(self, urlCleanups, queryCallback):
52 for urlCleanup
in urlCleanups:
54 if urlCleanup.siteId ==
"":
55 urlCleanup.siteId =
"0" 56 if self.
isSiteExist(urlCleanup.siteId, queryCallback):
59 if urlCleanup.url
is None:
61 dc.EventObjects.URLStatus.URL_TYPE_URL), urlCleanup.criterions,
64 localUrls.append(urlCleanup.url)
65 for localUrl
in localUrls:
66 urlCleanup.url = localUrl
68 StatisticLogManager.logUpdate(queryCallback,
"LOG_URL_CLEANUP", urlCleanup, urlCleanup.siteId, self.
urlMd5)
69 if urlCleanup.delayedType == dc.EventObjects.NOT_DELAYED_OPERATION:
73 self.
updateMysqlDB(urlCleanup, queryCallback, urlCleanup.siteId)
74 if urlCleanup.delayedType == dc.EventObjects.DELAYED_OPERATION:
78 AttrDeleteTask.deleteUrlsAttributes(urlCleanup.siteId, self.
urlMd5, queryCallback)
80 generalResponse.statuses.append(
True)
81 self.
recalculator.commonRecalc(urlCleanup.siteId, queryCallback, \
82 dc.EventObjects.FieldRecalculatorObj.PARTITION_RECALC)
84 generalResponse.statuses.append(
False)
85 type_, value_, traceback_ = sys.exc_info()
86 stack = traceback.format_tb(traceback_)
87 logger.error(str(stack.pop()))
89 generalResponse.statuses.append(
False)
90 return generalResponse
98 def extractUrlByCriterions(self, siteId, isUrlExtract, criterions, queryCallback, dbName=Constants.SECONDARY_DB_ID,
99 tablePrefix=Constants.DC_URLS_TABLE_NAME_TEMPLATE):
101 tableName = tablePrefix % siteId
103 SQLUrlExtractor =
"SELECT `URL` FROM `%s`" % tableName
105 SQLUrlExtractor =
"SELECT `URLMd5` FROM `%s`" % tableName
107 res = queryCallback(query, dbName)
108 if hasattr(res,
'__iter__'):
109 logger.debug(
">>> Select URL len(res) = " + str(len(res)))
111 retUrls.append(row[0])
120 def getSiteFields(self, siteId, queryCallback, dbName=Constants.PRIMARY_DB_ID):
123 query =
"SELECT * FROM `dc_sites`.`sites` WHERE `Id` = '%s' LIMIT 1" % siteId
124 res = queryCallback(query, dbName, Constants.EXEC_NAME)
152 dataDir = self.
rawDataDir +
'/' + urlCleanup.siteId +
'/' +
PathMaker(localUrlMd5).getDir()
153 logger.debug(
">>> CLEANUP DIR = " + str(dataDir))
154 if os.path.isdir(dataDir):
156 shutil.rmtree(dataDir)
157 hiLevelDir = dataDir[0: dataDir.rfind(
'/')
if dataDir.rfind(
'/') >= 0
else len(dataDir)]
158 if len(os.listdir(hiLevelDir)) == 0:
159 shutil.rmtree(hiLevelDir)
160 except OSError
as ex:
161 logger.debug(
">>> [%s] Dir delete error - MSG [%s]", dataDir, str(ex.message))
170 localState = urlCleanup.state
if urlCleanup.state
is not None else "state" 171 localStatus = urlCleanup.status
if urlCleanup.status
is not None else "status" 173 if localStatus == dc.EventObjects.URL.STATUS_NEW:
175 if sf
is not None and 'RecrawlPeriod' in sf:
176 uDate =
", `UDate`=DATE_SUB(`UDate`, INTERVAL %s MINUTE)" % sf[
'RecrawlPeriod']
178 uDate =
", `UDate`=NOW()" 179 sqlt =
"UPDATE `%s` SET `TcDate`=NOW()%s, `state` = '%s', `status` = '%s' WHERE `URLMD5` = '%s' LIMIT 1" 181 tableName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % urlCleanup.siteId
182 query = sqlt % (tableName, uDate, localState, localStatus, localUrlMd5)
183 queryCallback(query, Constants.SECONDARY_DB_ID)
191 SQL_COPY_QUERY_TEMPLATE =
"INSERT INTO %s SELECT * FROM `dc_urls`.`%s` WHERE `URLMD5` = '%s'" 192 tbName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % urlCleanup.siteId
193 query = Constants.SQL_CREATE_QUERY_TEMPLATE % (tbName, tbName)
194 queryCallback(query, Constants.FOURTH_DB_ID)
195 query = SQL_COPY_QUERY_TEMPLATE % (tbName, tbName, self.
urlMd5)
196 queryCallback(query, Constants.FOURTH_DB_ID)
def process(self, urlCleanups, queryCallback)
def isSiteExist(self, siteId, queryCallback, userId=None)
def deleteFromRawStorage(self, urlCleanup)
def calculateMd5FormUrl(self, url, urlType, useNormilize=False)
def copyUrlToDeleteDB(self, urlCleanup, queryCallback)
GeneralResponse event object, represents general state response for multipurpose usage.
def getSiteFields(self, siteId, queryCallback, dbName=Constants.PRIMARY_DB_ID)
def deleteFromDataStorage(self, urlCleanup, queryCallback)
def __init__(self, keyValueStorageDir, rawDataDir, dBDataTask)
def extractUrlByCriterions(self, siteId, isUrlExtract, criterions, queryCallback, dbName=Constants.SECONDARY_DB_ID, tablePrefix=Constants.DC_URLS_TABLE_NAME_TEMPLATE)
def generateCriterionSQL(self, criterions, additionWhere=None, siteId=None)
def updateMysqlDB(self, urlCleanup, queryCallback, siteId)