HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
app.HostRequestStorage.HostRequestStorage Class Reference
Inheritance diagram for app.HostRequestStorage.HostRequestStorage:
Collaboration diagram for app.HostRequestStorage.HostRequestStorage:

Public Member Functions

def __init__ (self, httpFreqLimits)
 
def checkHost (self, path, url, siteId)
 

Public Attributes

 httpFreqLimits
 

Static Public Attributes

int ITEM_PROCESS = 0
 
int ITEM_BREAK = 1
 
string JSON_SUFF = ".json"
 
string PATH_CONFIG_KEY = "PATH_CONFIG"
 
string TAIL_SLASH_LAMBDA = lambda self, path: '' if (path is None or len(path) == 0 or path[-1] == '/') else '/'
 
int FREQ_AVERAGE_LIST_SIZE = 5
 
string HTTP_FREQ_LIMITS_FILE_NAME_PREFIX = "http_freq_limits_"
 

Detailed Description

Definition at line 25 of file HostRequestStorage.py.

Constructor & Destructor Documentation

◆ __init__()

def app.HostRequestStorage.HostRequestStorage.__init__ (   self,
  httpFreqLimits 
)

Definition at line 38 of file HostRequestStorage.py.

38  def __init__(self, httpFreqLimits):
39  self.httpFreqLimits = None
40  try:
41  self.httpFreqLimits = json.loads(httpFreqLimits)
42  except Exception as excp:
43  logger.debug(">>> wrong with httpFreqLimits json.loads; " + str(excp))
44 
45 
def __init__(self)
constructor
Definition: UIDGenerator.py:19

Member Function Documentation

◆ checkHost()

def app.HostRequestStorage.HostRequestStorage.checkHost (   self,
  path,
  url,
  siteId 
)

Definition at line 52 of file HostRequestStorage.py.

52  def checkHost(self, path, url, siteId):
53  ret = self.ITEM_PROCESS
54  domain = Utils.UrlParser.getDomain(url)
55  if path is None and self.httpFreqLimits is not None and self.PATH_CONFIG_KEY in self.httpFreqLimits:
56  path = self.httpFreqLimits[self.PATH_CONFIG_KEY]
57 
58  if self.httpFreqLimits is not None and path is not None and domain is not None and siteId is not None:
59  httpFreqLimit = None
60  for elem in self.httpFreqLimits:
61  if elem != self.PATH_CONFIG_KEY:
62  if re.compile(elem).match(domain) is not None:
63  httpFreqLimit = self.httpFreqLimits[elem]
64  break
65  if httpFreqLimit is not None:
66  fpath = path + self.TAIL_SLASH_LAMBDA(path) + domain + '/' + \
67  self.HTTP_FREQ_LIMITS_FILE_NAME_PREFIX + str(siteId) + self.JSON_SUFF
68  freqJson = None
69  try:
70  with open(fpath, 'r') as fd:
71  freqBuf = fd.read()
72  freqJson = json.loads(freqBuf)
73  except Exception as excp:
74  logger.debug(">>> wrong in HostRequestStorage.checkHost method; " + str(excp))
75 # if freqJson is not None and "requestData" in freqJson:
76 # nowTime = int(time.time())
77 # if (freqJson["requestData"] + httpFreqLimit["max_freq"]) > nowTime:
78 # delayTime = (freqJson["requestData"] + httpFreqLimit["max_freq"]) - nowTime
79 # if delayTime > httpFreqLimit["max_delay"]:
80 # ret = self.ITEM_BREAK
81 # else:
82 # if "randomized" in httpFreqLimit and int(httpFreqLimit["randomized"]) > 0:
83 # delayTime += random.randint(1, int(httpFreqLimit["max_delay"]))
84 # logger.debug(">>> Use 'randomized' for delay")
85 #
86 # logger.debug(">>> start delay = " + str(delayTime))
87 # time.sleep(delayTime)
88 # logger.debug(">>> finish delay = " + str(delayTime))
89 # freqJson["requestData"] = nowTime
90 # else:
91 # freqJson = {"requestData": int(time.time())}
92  if freqJson is not None and "start" in freqJson and "count" in freqJson:
93  nowTime = int(time.time())
94  start = int(freqJson["start"])
95  count = int(freqJson["count"])
96 
97  freqAverage = 0.0
98  delayTime = 0
99  delta = int(nowTime - start)
100  if delta > 0:
101  freqAverage = float(count) / delta
102  if count > 0:
103  delayTime = delta / count
104 
105  logger.debug('freqAverage = ' + str(freqAverage) + ' compare ' + str(float(httpFreqLimit["max_freq"])))
106  logger.debug('delayTime = ' + str(delayTime) + ' compare ' + str(int(httpFreqLimit["max_delay"])))
107  logger.debug('count = ' + str(count))
108 
109  if float(freqAverage) > float(httpFreqLimit["max_freq"]):
110  logger.debug("Checking 'max_freq' passed: " + str(freqAverage) + " > " + \
111  str(float(httpFreqLimit["max_freq"])))
112  ret = self.ITEM_BREAK
113  # elif delayTime > 2 * int(httpFreqLimit["max_delay"]):
114  # logger.debug("Checking 'max_delay' passed: " + str(delayTime) + " > " + \
115  # str(float(httpFreqLimit["max_delay"])))
116  # ret = self.ITEM_BREAK
117  else:
118  if delayTime <= int(httpFreqLimit["max_delay"]):
119  delayTime = int(httpFreqLimit["max_delay"]) - delayTime
120 
121  if "randomized" in httpFreqLimit and int(httpFreqLimit["randomized"]) > 0:
122  delayTime += random.randint(1, int(httpFreqLimit["max_delay"]))
123  logger.debug(">>> Use 'randomized' for delay")
124 
125  if delayTime > 0 and delayTime <= int(httpFreqLimit["max_delay"]):
126  logger.debug(">>> start max_delay = " + str(delayTime))
127  time.sleep(delayTime)
128  logger.debug(">>> finish max_delay = " + str(delayTime))
129 
130  freqJson["count"] = count + 1
131  else:
132  freqJson = {"start": int(time.time()), "count":0}
133 
134 
135  if os.path.exists(path):
136  if not os.path.exists(path + self.TAIL_SLASH_LAMBDA(path) + domain):
137  try:
138  os.mkdir(path + self.TAIL_SLASH_LAMBDA(path) + domain)
139  except Exception as excp:
140  logger.debug(">>> makedir exception " + str(excp))
141  with open(fpath, 'w') as fd:
142  freqBuf = json.dumps(freqJson)
143  fd.write(freqBuf)
144  else:
145  logger.debug(">>> path not exists !!! path: " + path)
146  return ret
147 

Member Data Documentation

◆ FREQ_AVERAGE_LIST_SIZE

int app.HostRequestStorage.HostRequestStorage.FREQ_AVERAGE_LIST_SIZE = 5
static

Definition at line 32 of file HostRequestStorage.py.

◆ HTTP_FREQ_LIMITS_FILE_NAME_PREFIX

string app.HostRequestStorage.HostRequestStorage.HTTP_FREQ_LIMITS_FILE_NAME_PREFIX = "http_freq_limits_"
static

Definition at line 33 of file HostRequestStorage.py.

◆ httpFreqLimits

app.HostRequestStorage.HostRequestStorage.httpFreqLimits

Definition at line 39 of file HostRequestStorage.py.

◆ ITEM_BREAK

int app.HostRequestStorage.HostRequestStorage.ITEM_BREAK = 1
static

Definition at line 28 of file HostRequestStorage.py.

◆ ITEM_PROCESS

int app.HostRequestStorage.HostRequestStorage.ITEM_PROCESS = 0
static

Definition at line 27 of file HostRequestStorage.py.

◆ JSON_SUFF

string app.HostRequestStorage.HostRequestStorage.JSON_SUFF = ".json"
static

Definition at line 29 of file HostRequestStorage.py.

◆ PATH_CONFIG_KEY

string app.HostRequestStorage.HostRequestStorage.PATH_CONFIG_KEY = "PATH_CONFIG"
static

Definition at line 30 of file HostRequestStorage.py.

◆ TAIL_SLASH_LAMBDA

string app.HostRequestStorage.HostRequestStorage.TAIL_SLASH_LAMBDA = lambda self, path: '' if (path is None or len(path) == 0 or path[-1] == '/') else '/'
static

Definition at line 31 of file HostRequestStorage.py.


The documentation for this class was generated from the following file: