4 HCE project, Python bindings, Distributed Tasks Manager application. 5 HTTPProxyResolver it's class content main http proxy functional. 8 @file HTTPProxyResolver.py 9 @author Alexander Vybornyh <alexander.hce.cluster@gmail.com> 10 @link: http://hierarchical-cluster-engine.com/ 11 @copyright: Copyright © 2013-2017 IOIX Ukraine 12 @license: http://hierarchical-cluster-engine.com/license/ 34 USER_PROXY_PROPERTY_NAME =
'USER_PROXY' 35 HTTP_PROXY_HOST_NAME =
'HTTP_PROXY_HOST' 36 HTTP_PROXY_PORT_NAME =
'HTTP_PROXY_PORT' 38 INDEX_FILE_EXTENTION =
'json' 40 USAGE_ALGORITM_FREQUENCY = 0
41 DEFAULT_USAGE_ALGORITM = USAGE_ALGORITM_FREQUENCY
43 DEFAULT_VALUE_INCREMENT_FAULTS = ProxyJsonWrapper.DEFAULT_VALUE_INCREMENT_FAULTS
46 ERROR_MSG_LOAD_SITE_PTOPERTIES =
"Load site properties failed. Error: %s" 47 ERROR_MSG_NOT_SUPPORT_ALGORITHM =
"Not support algorithm type = %s" 48 ERROR_MSG_CHECK_ALLOWED_DOMAINS =
"Check is allowed domains '%s' has error: %s" 49 ERROR_MSG_CHECK_ALLOWED_LIMITS =
"Check is allowed limits '%s' has error: %s" 51 ERROR_MSG_EMPTY_PROXIES_LIST =
"Empty proxies list." 52 ERROR_MSG_NOT_EXIST_ANY_VALID_PROXY =
"No available proxy in proxies list." 53 ERROR_MSG_TRIES_LIMIT_EXCEEDED =
"Tries usage proxies limit was exceeded." 67 userProxyJsonWrapper =
None 69 if HTTPProxyResolver.USER_PROXY_PROPERTY_NAME
in siteProperties:
71 USER_PROXY_PROPERTY_NAME]))
72 elif HTTPProxyResolver.USER_PROXY_PROPERTY_NAME.lower()
in siteProperties:
74 USER_PROXY_PROPERTY_NAME.lower()]))
75 except Exception, err:
76 logger.error(HTTPProxyResolver.ERROR_MSG_LOAD_SITE_PTOPERTIES, str(err))
78 return userProxyJsonWrapper
90 logger.debug(
"filePath: %s", str(filePath))
92 if isinstance(filePath, basestring)
and filePath !=
"":
93 dirName, fileName = os.path.split(filePath)
94 if os.extsep
in fileName:
99 if siteId
is not None:
100 ret = (dirName
if fileName ==
'' else filePath) + os.sep + str(siteId) + os.extsep + \
101 HTTPProxyResolver.INDEX_FILE_EXTENTION
115 if fileName
is not None:
117 fullName = os.path.expanduser(fileName)
118 if os.path.exists(fullName):
119 f = open(fullName,
'r') 122 except Exception, err:
123 logger.error(str(err))
135 if fileName
is not None and jsonData
is not None and len(jsonData) > 0:
138 fullName = os.path.expanduser(fileName)
140 dirName = os.path.dirname(fullName)
141 if not os.path.exists(dirName):
144 f = open(fullName,
'w')
145 json.dump(jsonData, f)
147 except (IOError, Exception), err:
148 logger.error(str(err))
162 if isinstance(proxyList, list):
167 for proxy
in proxyList:
168 if isinstance(proxy, Proxy)
and isinstance(url, basestring):
171 if isinstance(proxy.domains, basestring):
172 domains = json.loads(proxy.domains)
174 domains = proxy.domains
176 if isinstance(domains, list):
177 for domain
in domains:
178 if domain ==
'*' or (url !=
"" and re.search(domain, url, re.I + re.U)
is not None):
179 resList.append(proxy)
181 allowedDomains.append(domain)
184 except Exception, err:
185 logger.error(HTTPProxyResolver.ERROR_MSG_CHECK_ALLOWED_DOMAINS, str(proxy.domains), str(err))
187 logger.debug(
"Found allowed domains: %s",
varDump(list(set(allowedDomains))))
200 if isinstance(proxyList, list):
201 for proxy
in proxyList:
202 if isinstance(proxy, Proxy):
205 if isinstance(proxy.limits, basestring):
206 limits = json.loads(proxy.limits)
208 limits = proxy.limits
214 elif isinstance(limits, list):
225 logger.debug(
"!!! Default case checking limits")
226 resList.append(proxy)
228 except Exception, err:
229 logger.error(HTTPProxyResolver.ERROR_MSG_CHECK_ALLOWED_LIMITS, str(proxy.limits), str(err))
248 def __debugPrint(proxyList, msg):
250 if isinstance(proxyList, list):
251 for proxy
in proxyList:
252 if hasattr(proxy, ProxyJsonWrapper.PROXIES_HOST_NAME)
and hasattr(proxy, ProxyJsonWrapper.PROXIES_FREQ_NAME) \
253 and hasattr(proxy, ProxyJsonWrapper.PROXIES_PRIORITY_NAME):
254 out.append(
"priority: %s, freq: %s, host: %s" % (getattr(proxy, ProxyJsonWrapper.PROXIES_PRIORITY_NAME),
255 getattr(proxy, ProxyJsonWrapper.PROXIES_FREQ_NAME),
256 getattr(proxy, ProxyJsonWrapper.PROXIES_HOST_NAME)))
261 __debugPrint(proxyList,
"Before sort")
263 proxyList.sort(key=
lambda obj: getattr(obj, ProxyJsonWrapper.PROXIES_FREQ_NAME) * \
264 getattr(obj, ProxyJsonWrapper.PROXIES_PRIORITY_NAME) \
265 if hasattr(obj, ProxyJsonWrapper.PROXIES_FREQ_NAME)
and \
266 hasattr(obj, ProxyJsonWrapper.PROXIES_PRIORITY_NAME)
else sys.maxint)
269 __debugPrint(proxyList,
"After sort")
271 if len(proxyList) > 0
and hasattr(proxyList[0], ProxyJsonWrapper.PROXIES_HOST_NAME):
272 proxyName = getattr(proxyList[0], ProxyJsonWrapper.PROXIES_HOST_NAME)
287 if algorithmType == HTTPProxyResolver.USAGE_ALGORITM_FREQUENCY:
288 ret = HTTPProxyResolver.__usageAlgorithmFrequency(proxyList)
290 logger.error(HTTPProxyResolver.ERROR_MSG_NOT_SUPPORT_ALGORITHM, str(HTTPProxyResolver))
303 if HTTPProxyResolver.HTTP_PROXY_HOST_NAME
in siteProperties
and \
304 HTTPProxyResolver.HTTP_PROXY_PORT_NAME
in siteProperties:
305 proxyName =
"%s:%s" % (str(siteProperties[HTTPProxyResolver.HTTP_PROXY_HOST_NAME]),
306 str(siteProperties[HTTPProxyResolver.HTTP_PROXY_PORT_NAME]))
319 def getProxy(siteProperties, siteId, url, dbProxyWrapper=None):
321 proxyName = HTTPProxyResolver.__getDefaultProxyName(siteProperties)
323 userProxyJsonWrapper = HTTPProxyResolver.__getUserProxyJsonWrapper(siteProperties)
324 if userProxyJsonWrapper
is not None:
326 fileName = HTTPProxyResolver.__makFileName(userProxyJsonWrapper.getFilePath(), siteId)
327 logger.debug(
"Usage file name: %s", str(fileName))
330 jsonData = HTTPProxyResolver.__readJsonFile(fileName)
332 logger.debug(
"Read json from index file: %s",
varDump(proxyJsonWrapper.getData()))
335 proxyList = userProxyJsonWrapper.getProxyList()
336 logger.debug(
"Extract proxies list from site property: %s",
varDump(proxyList))
338 proxyJsonWrapper.addProxyList(proxyList)
341 if dbProxyWrapper
is not None and userProxyJsonWrapper.getSource() == UserProxyJsonWrapper.SOURCE_DATABASE:
342 enaibledProxiesList = dbProxyWrapper.getEnaibledProxies(siteId)
343 logger.debug(
"Extract enabled proxies list from DB: %s",
varDump(enaibledProxiesList))
344 proxyJsonWrapper.addProxyList(enaibledProxiesList)
347 fullProxiesList = proxyJsonWrapper.getProxyList()
348 logger.debug(
"Full proxies list: %s",
varDump(fullProxiesList))
349 if len(fullProxiesList) == 0:
350 raise ProxyException(message=HTTPProxyResolver.ERROR_MSG_EMPTY_PROXIES_LIST,
351 statusUpdate=userProxyJsonWrapper.getStatusUpdateEmptyProxyList())
354 proxyList = proxyJsonWrapper.getProxyList(ProxyJsonWrapper.PROXY_STATE_ENABLED)
355 logger.debug(
"Only enabled proxies: %s",
varDump(proxyList))
358 if len(proxyList) == 0:
359 raise ProxyException(message=HTTPProxyResolver.ERROR_MSG_NOT_EXIST_ANY_VALID_PROXY,
360 statusUpdate=userProxyJsonWrapper.getStatusUpdateNoAvailableProxy())
363 proxyList = HTTPProxyResolver.__getProxyListAllowedDomains(proxyList, url)
364 logger.debug(
"Only allowed domains: %s",
varDump(proxyList))
367 proxyList = HTTPProxyResolver.__getProxyListAllowedLimits(proxyList)
368 logger.debug(
"Only allowed limits: %s",
varDump(proxyList))
371 proxyName = HTTPProxyResolver.__usageAlgorithm(proxyList, algorithmType=HTTPProxyResolver.DEFAULT_USAGE_ALGORITM)
373 logger.debug(
"Result proxy name: %s",
varDump(proxyName))
374 if proxyName
is not None:
376 proxyJsonWrapper.addFrequency(proxyName)
379 HTTPProxyResolver.__saveJsonFile(fileName, jsonData)
393 def addFaults(siteProperties, siteId, proxyName, dbProxyWrapper=None, incrementSize=DEFAULT_VALUE_INCREMENT_FAULTS):
395 userProxyJsonWrapper = HTTPProxyResolver.__getUserProxyJsonWrapper(siteProperties)
396 if userProxyJsonWrapper
is not None:
398 fileName = HTTPProxyResolver.__makFileName(userProxyJsonWrapper.getFilePath(), siteId)
399 logger.debug(
"Usage file name: %s", str(fileName))
402 jsonData = HTTPProxyResolver.__readJsonFile(fileName)
404 logger.debug(
"Read json from index file: %s",
varDump(proxyJsonWrapper.getData()))
407 proxyJsonWrapper.addFaults(proxyName, incrementSize)
410 HTTPProxyResolver.__saveJsonFile(fileName, jsonData)
411 logger.debug(
"Save json to file: %s",
varDump(jsonData))
414 if dbProxyWrapper
is not None:
415 dbProxyWrapper.addFaults(proxyName, incrementSize)
425 userProxyJsonWrapper = HTTPProxyResolver.__getUserProxyJsonWrapper(siteProperties)
426 if userProxyJsonWrapper
is not None:
427 triesCount = userProxyJsonWrapper.getTriesCount()
428 if triesCount
is not None and int(currentTriesCount) >= int(triesCount):
429 raise ProxyException(message=HTTPProxyResolver.ERROR_MSG_TRIES_LIMIT_EXCEEDED,
430 statusUpdate=userProxyJsonWrapper.getStatusUpdateTriesLimits())
440 triesCount = UserProxyJsonWrapper.DEFAULT_VALUE_TRIES_COUNT
441 userProxyJsonWrapper = HTTPProxyResolver.__getUserProxyJsonWrapper(siteProperties)
442 if userProxyJsonWrapper
is not None:
443 triesCount = userProxyJsonWrapper.getTriesCount()
461 if rawContent
is not None:
462 userProxyJsonWrapper = HTTPProxyResolver.__getUserProxyJsonWrapper(siteProperties)
463 if userProxyJsonWrapper
is not None:
464 patterns = userProxyJsonWrapper.getRawContentCheckPatterns()
465 if isinstance(patterns, list):
466 for pattern
in patterns:
467 if re.search(pattern, rawContent, re.M | re.U)
is not None:
470 if int(userProxyJsonWrapper.getRawContentCheckFaults()) > 0:
471 HTTPProxyResolver.addFaults(siteProperties, siteId, proxyName, dbProxyWrapper)
474 ret = bool(int(userProxyJsonWrapper.getRawContentCheckRotate()) > 0)
def __usageAlgorithm(proxyList, algorithmType=DEFAULT_USAGE_ALGORITM)
def __usageAlgorithmFrequency(proxyList)
def isNeedRotateProxy(siteProperties, siteId, proxyName, dbProxyWrapper, rawContent)
def __getDefaultProxyName(siteProperties)
def __getUserProxyJsonWrapper(siteProperties)
def checkTriesCount(siteProperties, currentTriesCount)
def getProxy(siteProperties, siteId, url, dbProxyWrapper=None)
def __saveJsonFile(fileName, jsonData)
def __makFileName(filePath, siteId)
def __readJsonFile(fileName)
def getTriesCount(siteProperties)
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
def __getProxyListAllowedLimits(proxyList)
def __getProxyListAllowedDomains(proxyList, url)
def addFaults(siteProperties, siteId, proxyName, dbProxyWrapper=None, incrementSize=DEFAULT_VALUE_INCREMENT_FAULTS)