52 def checkHost(self, path, url, siteId):
53 ret = self.ITEM_PROCESS
54 domain = Utils.UrlParser.getDomain(url)
55 if path
is None and self.httpFreqLimits
is not None and self.PATH_CONFIG_KEY
in self.httpFreqLimits:
56 path = self.httpFreqLimits[self.PATH_CONFIG_KEY]
58 if self.httpFreqLimits
is not None and path
is not None and domain
is not None and siteId
is not None:
60 for elem
in self.httpFreqLimits:
61 if elem != self.PATH_CONFIG_KEY:
62 if re.compile(elem).match(domain)
is not None:
63 httpFreqLimit = self.httpFreqLimits[elem]
65 if httpFreqLimit
is not None:
66 fpath = path + self.TAIL_SLASH_LAMBDA(path) + domain +
'/' + \
67 self.HTTP_FREQ_LIMITS_FILE_NAME_PREFIX + str(siteId) + self.JSON_SUFF
70 with open(fpath,
'r') as fd: 72 freqJson = json.loads(freqBuf) 73 except Exception
as excp:
74 logger.debug(
">>> wrong in HostRequestStorage.checkHost method; " + str(excp))
92 if freqJson
is not None and "start" in freqJson
and "count" in freqJson:
93 nowTime = int(time.time())
94 start = int(freqJson[
"start"])
95 count = int(freqJson[
"count"])
99 delta = int(nowTime - start)
101 freqAverage = float(count) / delta
103 delayTime = delta / count
105 logger.debug(
'freqAverage = ' + str(freqAverage) +
' compare ' + str(float(httpFreqLimit[
"max_freq"])))
106 logger.debug(
'delayTime = ' + str(delayTime) +
' compare ' + str(int(httpFreqLimit[
"max_delay"])))
107 logger.debug(
'count = ' + str(count))
109 if float(freqAverage) > float(httpFreqLimit[
"max_freq"]):
110 logger.debug(
"Checking 'max_freq' passed: " + str(freqAverage) +
" > " + \
111 str(float(httpFreqLimit[
"max_freq"])))
112 ret = self.ITEM_BREAK
118 if delayTime <= int(httpFreqLimit[
"max_delay"]):
119 delayTime = int(httpFreqLimit[
"max_delay"]) - delayTime
121 if "randomized" in httpFreqLimit
and int(httpFreqLimit[
"randomized"]) > 0:
122 delayTime += random.randint(1, int(httpFreqLimit[
"max_delay"]))
123 logger.debug(
">>> Use 'randomized' for delay")
125 if delayTime > 0
and delayTime <= int(httpFreqLimit[
"max_delay"]):
126 logger.debug(
">>> start max_delay = " + str(delayTime))
127 time.sleep(delayTime)
128 logger.debug(
">>> finish max_delay = " + str(delayTime))
130 freqJson[
"count"] = count + 1
132 freqJson = {
"start": int(time.time()),
"count":0}
135 if os.path.exists(path):
136 if not os.path.exists(path + self.TAIL_SLASH_LAMBDA(path) + domain):
138 os.mkdir(path + self.TAIL_SLASH_LAMBDA(path) + domain)
139 except Exception
as excp:
140 logger.debug(
">>> makedir exception " + str(excp))
141 with open(fpath,
'w')
as fd:
142 freqBuf = json.dumps(freqJson)
145 logger.debug(
">>> path not exists !!! path: " + path)