4 @link: http://hierarchical-cluster-engine.com/ 5 @copyright: Copyright © 2013-2014 IOIX Ukraine 6 @license: http://hierarchical-cluster-engine.com/license/ 31 if siteId
is not None:
32 query = Constants.CHECK_TABLE_SQL_TEMPLATE % siteId
33 if userId
is not None:
34 query += (Constants.CHECK_TABLE_SQL_ADDITION % str(userId))
35 res = queryCallback(query, Constants.PRIMARY_DB_ID)
36 if res
is not None and len(res) > 0
and len(res[0]) > 0
and res[0][0] > 0:
58 if additionWhere
is not None and additionWhere !=
"":
59 query =
"SELECT `id` FROM `sites`" + additionWhere
60 res = queryCallback(query, Constants.PRIMARY_DB_ID)
61 if hasattr(res,
'__iter__'):
73 def dbLock(self, mutexName, queryCallback, sleepTime=1, mutexLockTTL=Constants.DEFAULT_LOCK_TTL):
74 logger.debug(
">>> BaseTask Class. Lock start name=" + mutexName)
75 LOCK_QUERY_TEMPLATE =
"SELECT mutexLock('%s', %s, %s)" 76 query = LOCK_QUERY_TEMPLATE % (mutexName, str(Constants.DB_LOCK_APPLICATION_ID), str(mutexLockTTL))
77 res = queryCallback(query, Constants.PRIMARY_DB_ID)
78 while res
is not None and len(res) > 0
and res[0][0] == 0:
80 res = queryCallback(query, Constants.PRIMARY_DB_ID)
81 logger.debug(
">>> BaseTask Class. Lock finish name=" + mutexName)
89 logger.debug(
">>> BaseTask Class. Unlock start name=" + mutexName)
90 LOCK_QUERY_TEMPLATE =
"SELECT mutexUnlock('%s', %s)" 91 query = LOCK_QUERY_TEMPLATE % (mutexName, str(Constants.DB_LOCK_APPLICATION_ID))
92 queryCallback(query, Constants.PRIMARY_DB_ID)
93 logger.debug(
">>> BaseTask Class. Unlock finish name=" + mutexName)
102 logger.debug(
">>> Create Url Insert request")
104 tbName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % siteId
105 fieldValueString = Constants.createFieldsValuesString(localKeys, localValues)
106 if fieldValueString
is not None and fieldValueString !=
"":
107 query = Constants.INSERT_COMMON_TEMPLATE % (tbName, fieldValueString)
116 logger.debug(
">>> Urls copy operation")
117 COPY_SELECT_SQL_TEMPLATE =
"SELECT * FROM `sites_urls` WHERE `Site_Id`='%s'" 118 query = COPY_SELECT_SQL_TEMPLATE % siteId
119 res = queryCallback(query, Constants.PRIMARY_DB_ID, Constants.EXEC_NAME)
122 for urlRecord
in res:
123 logger.debug(
">>> Urls copy operation KEY - " + str(urlRecord))
126 for keyRecord
in urlRecord:
127 for keySample
in Constants.URLTableDict:
128 if keyRecord == Constants.URLTableDict[keySample]
and urlRecord[keyRecord]
is not None:
129 localKeys.append(keyRecord)
130 if isinstance(urlRecord[keyRecord], basestring)
or \
131 isinstance(urlRecord[keyRecord], datetime.datetime):
133 escapingStr = Utils.escape(str(urlRecord[keyRecord]))
134 localValues.append((
"'" + escapingStr +
"'"))
136 localValues.append(str(urlRecord[keyRecord]))
138 logger.debug(
">>> Urls copy operation LEN - " + str(len(localKeys)))
139 if len(localKeys) > 0:
141 if query
is not None:
142 res = queryCallback(query, Constants.SECONDARY_DB_ID)
144 if 'URLMd5' in urlRecord
and urlRecord[
'URLMd5']
is not None:
145 StatisticLogManager.addNewRecord(queryCallback, siteId, urlRecord[
'URLMd5'])
146 if 'Status' in urlRecord
and urlRecord[
'Status']
is not None:
147 self.
statisticLogUpdate(
None, urlRecord[
'URLMd5'], siteId, urlRecord[
'Status'], queryCallback,
True)
155 if urlMd5
is not None:
156 StatisticLogManager.addNewRecord(queryCallback, siteId, urlMd5)
158 StatisticLogManager.statisticUpdate(queryCallback, Constants.StatFreqConstants.FREQ_INSERT, 1, siteId,
160 StatisticLogManager.logUpdate(queryCallback,
"LOG_INSERT", localObj, siteId, urlMd5)
161 if status == dc.EventObjects.URL.STATUS_NEW:
162 StatisticLogManager.statisticUpdate(queryCallback, Constants.StatFreqConstants.FREQ_NEW_STATUS, 1,
164 StatisticLogManager.logUpdate(queryCallback,
"LOG_NEW", localObj, siteId, urlMd5)
165 elif status == dc.EventObjects.URL.STATUS_SELECTED_CRAWLING:
166 StatisticLogManager.logUpdate(queryCallback,
"LOG_SELECTED_CRAWLING", localObj, siteId, urlMd5)
167 elif status == dc.EventObjects.URL.STATUS_CRAWLING:
168 StatisticLogManager.logUpdate(queryCallback,
"LOG_CRAWLING", localObj, siteId, urlMd5)
169 elif status == dc.EventObjects.URL.STATUS_CRAWLED:
170 StatisticLogManager.statisticUpdate(queryCallback, Constants.StatFreqConstants.FREQ_CRAWLED_STATUS, 1,
172 StatisticLogManager.logUpdate(queryCallback,
"LOG_CRAWLED", localObj, siteId, urlMd5)
173 elif status == dc.EventObjects.URL.STATUS_SELECTED_PROCESSING:
174 StatisticLogManager.logUpdate(queryCallback,
"LOG_SELECTED_PROCESSING", localObj, siteId, urlMd5)
175 elif status == dc.EventObjects.URL.STATUS_PROCESSING:
176 StatisticLogManager.logUpdate(queryCallback,
"LOG_PROCESSING", localObj, siteId, urlMd5)
177 elif status == dc.EventObjects.URL.STATUS_PROCESSED:
178 StatisticLogManager.statisticUpdate(queryCallback, Constants.StatFreqConstants.FREQ_PROCESSED_STATS, 1,
180 StatisticLogManager.logUpdate(queryCallback,
"LOG_PROCESSED", localObj, siteId, urlMd5)
190 if urlType == dc.EventObjects.URLStatus.URL_TYPE_URL:
191 logger.debug(
"calculateMd5FormUrl url: %s", str(url))
199 ret = hashlib.md5(url).hexdigest()
213 query =
"SELECT `Value` FROM `sites_properties` WHERE `Site_Id`='%s' AND `Name`='%s'" 214 query = (query % (siteId, propName))
215 if urlMd5
is not None:
216 query += (
" AND `URLMd5`='%s'" % urlMd5)
218 res = queryCallback(query, Constants.PRIMARY_DB_ID)
219 if res
is not None and len(res) > 0
and len(res[0]) > 0:
def isSiteExist(self, siteId, queryCallback, userId=None)
def readValueFromSiteProp(siteId, propName, queryCallback, urlMd5=None)
def calculateMd5FormUrl(self, url, urlType, useNormilize=False)
def generateCriterionSQL(criterions, additionWhere=None, siteId=None)
def dbLock(self, mutexName, queryCallback, sleepTime=1, mutexLockTTL=Constants.DEFAULT_LOCK_TTL)
def createUrlsInsertQuery(self, siteId, localKeys, localValues)
def statisticLogUpdate(self, localObj, urlMd5, siteId, status, queryCallback, isInsert=False)
def dbUnlock(self, mutexName, queryCallback)
def generateCriterionSQL(self, criterions, additionWhere=None, siteId=None)
def copyUrlsToDcUrls(self, siteId, queryCallback)
def fetchByCriterions(self, criterions, queryCallback)