4 @author Scorp <developers.hce@gmail.com> 5 @link: http://hierarchical-cluster-engine.com/ 6 @copyright: Copyright © 2013-2014 IOIX Ukraine 7 @license: http://hierarchical-cluster-engine.com/license/ 35 PROXY_SQL_QUERY = (
"SELECT * FROM `sites_proxy` WHERE `State` = 1 AND (`Site_Id` = '%s' OR `Site_Id` = '*')" +
36 " ORDER BY `Priority`")
38 PROXY_SQL_UPDATE_FAULTS_QUERY =
"UPDATE `sites_proxy` SET `Faults`= %s WHERE `Site_Id` = '%s' AND `Host` = '%s'" 39 PROXY_SQL_DISABLE_QUERY =
"UPDATE `sites_proxy` SET `State`= 0 WHERE `Site_Id` = '%s' AND `Host` = '%s'" 41 PROXY_SQL_DB =
"dc_sites" 43 LIMITS = [
"MIN",
"HOUR",
"DAY",
"MONTH",
"YEAR"]
44 SECONDS_MULTI = [60, 3600, 86400, 2590000, 31100000]
49 STATUS_UPDATE_MIN_ALLOWED_VALUE = 1
50 STATUS_UPDATE_MAX_ALLOWED_VALUE = 7
52 RAW_CONTENT_CHECK_ROTATE_DEFAULT = 1
53 RAW_CONTENT_CHECK_FAULTS_DEFAULT = 1
61 def __init__(self, siteProperties, dbWrapper, siteId, url=None):
68 self.
domain = UrlParser.getDomain(url)
if url
is not None else None 81 if siteProperties
is not None:
82 if "HTTP_PROXY_HOST" in siteProperties
and "HTTP_PROXY_PORT" in siteProperties:
83 self.
proxyTuple = (siteProperties[
"HTTP_PROXY_HOST"], siteProperties[
"HTTP_PROXY_PORT"])
84 elif "USER_PROXY" in siteProperties:
86 proxyJson = json.loads(siteProperties[
"USER_PROXY"])
87 except Exception
as excp:
88 ExceptionLog.handler(logger, excp,
">>> Bad json in USER_PROXY property: " + \
89 str(siteProperties[
"USER_PROXY"]))
92 if proxyJson
is not None and "source" in proxyJson:
93 self.
source = int(proxyJson[
"source"])
96 if "proxies" in proxyJson:
101 logger.debug(
'>>> self.proxyStruct: ' + str(self.
proxyStruct))
104 if proxyJson
is not None and "status_update_empty_proxy_list" in proxyJson
and \
109 if proxyJson
is not None and "status_update_no_available_proxy" in proxyJson
and \
114 if proxyJson
is not None and "status_update_tries_limit" in proxyJson
and \
120 if proxyJson
is not None and "raw_content_check" in proxyJson:
121 rawContentCheck = proxyJson[
"raw_content_check"]
122 if "patterns" in rawContentCheck:
124 if "rotate" in rawContentCheck:
126 if "faults" in rawContentCheck:
129 logger.error(
"Mandatory parameter 'patterns' for 'raw_content_check' not found")
131 if self.
proxyTuple is None and proxyJson
is not None:
132 if "file_path" in proxyJson:
158 except Exception
as err:
159 ExceptionLog.handler(logger, err,
">>> ProxyResolver exception", (), \
160 {ExceptionLog.LEVEL_NAME_ERROR:ExceptionLog.LEVEL_VALUE_DEBUG})
170 if os.path.isfile(fileName):
173 fd = open(fileName,
"r") 176 logger.debug(
'>>> readIndexFile fileData: ' + str(fileData) +
' length: ' + str(len(fileData)) +
' bytes.')
177 ret = json.loads(str(fileData))
181 except Exception
as excp:
182 ExceptionLog.handler(logger, excp,
">>> readIndexFile error, file name = " + str(fileName))
196 if jsonData
is not None and len(jsonData) > 0:
200 fileData = json.dumps(jsonData)
201 fd = open(fileName,
"w")
205 except Exception
as excp:
206 ExceptionLog.handler(logger, excp,
">>> saveIndexInFile error, file name = " + str(fileName))
227 for fieldName
in inDict:
228 for key, value
in DB_CONSTS.ProxyTableDict.items():
229 if value == fieldName:
230 ret[key] = inDict[fieldName]
243 saveDBMode = dbWrapper.affect_db
244 dbWrapper.affect_db =
True 246 dbi.EventObjects.CustomRequest.SQL_BY_NAME)
247 dbWrapper.affect_db = saveDBMode
248 if result
is not None and len(result) > 0:
251 ret[elemInLower[
"host"]] = elemInLower
256 if "limits" in ret[elemInLower[
"host"]]
and ret[elemInLower[
"host"]][
"limits"]
is not None:
257 if not ret[elemInLower[
"host"]][
"limits"]:
258 ret[elemInLower[
"host"]][
"limits"] = []
260 ret[elemInLower[
"host"]][
"limits"] = json.loads(ret[elemInLower[
"host"]][
"limits"])
261 except Exception, err:
262 ExceptionLog.handler(logger, err,
">>> Wrong json in 'limits': " + \
263 varDump(ret[elemInLower[
"host"]][
"limits"]) + \
264 ", host = " + (elem[
"host"]
if "host" in elem
else "None"))
266 if "domains" in ret[elemInLower[
"host"]]
and ret[elemInLower[
"host"]][
"domains"]
is not None:
267 if ret[elemInLower[
"host"]][
"domains"] ==
"":
268 ret[elemInLower[
"host"]][
"domains"] = [
'*']
270 ret[elemInLower[
"host"]][
"domains"] = json.loads(ret[elemInLower[
"host"]][
"domains"])
271 except Exception
as err:
272 ExceptionLog.handler(logger, err,
">>> Wrong json in 'domains': " + \
273 varDump(ret[elemInLower[
"host"]][
"domains"]) +
", host = " +
274 (elem[
"host"]
if "host" in elem
else "None"))
276 if "cDate" in ret[elemInLower[
"host"]]
and ret[elemInLower[
"host"]][
"cDate"]
is not None:
277 ret[elemInLower[
"host"]][
"cDate"] = str(ret[elemInLower[
"host"]][
"cDate"])
278 except Exception
as err:
279 ExceptionLog.handler(logger, err,
">>> Wrong json in 'cDate': " + \
280 varDump(ret[elemInLower[
"host"]][
"cDate"]) +
", host = " +
281 (elem[
"host"]
if "host" in elem
else "None"))
283 if "uDate" in ret[elemInLower[
"host"]]
and ret[elemInLower[
"host"]][
"uDate"]
is not None:
284 ret[elemInLower[
"host"]][
"uDate"] = str(ret[elemInLower[
"host"]][
"uDate"])
285 except Exception
as err:
286 ExceptionLog.handler(logger, err,
">>> Wrong json in 'uDate': " + \
287 varDump(ret[elemInLower[
"host"]][
"uDate"]) +
", host = " +
288 (elem[
"host"]
if "host" in elem
else "None"))
343 isinstance(self.
proxyStruct[key][
"limits_stat"], dict)
and \
346 curTimeStamp = int(time.time())
348 if index >= len(self.
LIMITS):
351 if curTimeStamp - self.
proxyStruct[key][
"limits_stat"][self.
LIMITS[index] +
"_START_POINT"] >= \
353 self.
proxyStruct[key][
"limits_stat"][self.
LIMITS[index] +
"_START_POINT"] = curTimeStamp
355 if limit > 0
and self.
LIMITS[index] +
"_FREQ" in self.
proxyStruct[key][
"limits_stat"]
and \
381 logger.debug(
'>>> commonIncrementLimits enter...')
383 if "freq" in container[key]:
384 logger.debug(
'>>> container[key]["freq"] += 1')
385 container[key][
"freq"] += 1
387 logger.debug(
'>>> container[key].update({"freq":1})')
388 container[key].update({
"freq":1})
390 if "limits_stat" in container[key]
and len(container[key][
"limits_stat"]) > 0:
392 if elem +
"_FREQ" in container[key][
"limits_stat"]:
393 container[key][
"limits_stat"][elem +
"_FREQ"] += 1
395 logger.debug(
'>>> container[key].update({"limits_stat":{}})')
396 container[key].update({
"limits_stat":{}})
399 container.update({key:{
"host":key,
"freq":1,
"limits_stat":{}}})
415 if len(elem[
"host"].split(
':')) > 1:
416 ret = (elem[
"host"].split(
':')[0], elem[
"host"].split(
':')[1])
427 logger.debug(
'>>> getProxy enter...')
429 saveIndexFile =
False 433 logger.debug(
'>>> elif self.proxyStruct is not None')
434 if previousProxy
is None and "priority" in self.
proxyStruct:
435 logger.debug(
'>>> previousProxy is None')
436 for elem
in sorted(self.
proxyStruct.values(), key=
lambda x: x[
"freq"] + \
437 x[
"priority"] * sys.maxint
if "freq" in x
else x[
"priority"]):
439 logger.debug(
'>>> if self.checkLimits(elem["host"]) and self.checkDomains(elem["host"])')
445 logger.debug(
'>>> else')
447 logger.debug(
'>>> self.checkLimits: ' + str(bool(self.
checkLimits(elem[
"host"]))))
448 logger.debug(
'>>> self.checkDomains: ' + str(bool(self.
checkDomains(elem[
"host"]))))
459 logger.debug(
'>>> self.indexFileName: ' + str(self.
indexFileName))
460 logger.debug(
'>>> saveIndexFile: ' + str(saveIndexFile))
465 logger.debug(
'>>> getProxy leave... ret: ' + str(ret))
476 if "USER_PROXY" in siteProperties:
478 proxyJson = json.loads(siteProperties[
"USER_PROXY"])
479 if "tries_count" in proxyJson:
480 ret = int(proxyJson[
"tries_count"])
481 except Exception
as excp:
482 ExceptionLog.handler(logger, excp,
">>> Bad json in USER_PROXY property: " + str(siteProperties[
"USER_PROXY"]))
492 logger.debug(
'addFault enter ... proxyName: ' + str(proxyName))
495 faultsMax = int(self.
proxyStruct[proxyName][
"faultsMax"])
496 faults = int(self.
proxyStruct[proxyName][
"faults"])
497 faults += incrementSize
498 self.
proxyStruct[proxyName].update({
"faults":faults})
504 dbi.EventObjects.CustomRequest.SQL_BY_NAME)
506 logger.debug(
'customRequest result: ' +
varDump(result))
508 if faultsMax > 0
and faults >= faultsMax:
516 dbi.EventObjects.CustomRequest.SQL_BY_NAME)
518 logger.debug(
'customRequest result: ' +
varDump(result))
546 if re.search(pattern, rawContent, re.M | re.U)
is not None:
statusUpdateNoAvailableProxy
def checkPattern(self, rawContent)
int RAW_CONTENT_CHECK_FAULTS_DEFAULT
string PROXY_SQL_UPDATE_FAULTS_QUERY
def incrementLimits(self, key)
def getTriesCount(siteProperties)
def checkDomains(self, key)
def readSQLProxy(self, dbWrapper, siteId)
int STATUS_UPDATE_MIN_ALLOWED_VALUE
def addFault(self, proxyName, incrementSize=1)
statusUpdateEmptyProxyList
def saveIndexInFile(fileName, jsonData)
def fieldsToObjectName(self, inDict)
int RAW_CONTENT_CHECK_ROTATE_DEFAULT
def fillProxyTuple(self, elem)
def __init__(self, siteProperties, dbWrapper, siteId, url=None)
def readIndexFile(fileName)
def checkLimits(self, key)
def getProxy(self, previousProxy=None)
def isEmptyProxiesList(self)
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
int STATUS_UPDATE_MAX_ALLOWED_VALUE
string PROXY_SQL_DISABLE_QUERY
def commonIncrementLimits(self, container, key)