6 @author Scorp <developers.hce@gmail.com> 7 @link: http://hierarchical-cluster-engine.com/ 8 @copyright: Copyright © 2013-2014 IOIX Ukraine 9 @license: http://hierarchical-cluster-engine.com/license/ 18 from datetime
import datetime
36 SCHEMA_INCREMENTAL_INT = 2
48 BATCH_INSERT_NO_ONE_ITEMS = 0
49 BATCH_INSERT_ALL_NEW_ITEMS = 1
50 BATCH_INSERT_ONLY_FIRST_ITEM = 2
51 BATCH_INSERT_DEFAULT = BATCH_INSERT_NO_ONE_ITEMS
52 BATCH_INSERT_MIN_ALLOWED_VALUE = BATCH_INSERT_NO_ONE_ITEMS
53 BATCH_INSERT_MAX_ALLOWED_VALUE = BATCH_INSERT_ONLY_FIRST_ITEM
56 URL_SCHEMA_DATA_FILE_NAME_PREFIX =
"url_schema_data_" 62 def __init__(self, schema=None, siteId=None, urlSchemaDataDir=None):
71 if isinstance(urlSchemaDataDir, basestring):
72 if not os.path.isdir(urlSchemaDataDir):
73 logger.debug(
"Create urlSchemaDataDir: %s", str(urlSchemaDataDir))
75 os.makedirs(urlSchemaDataDir)
77 logger.debug(
"Creation of %s return error: %s", str(urlSchemaDataDir), str(err))
79 if urlSchemaDataDir[-1] !=
'/':
80 urlSchemaDataDir +=
'/' 84 logger.debug(
">>> readJsonFile '" + str(self.
indexFileName) +
"' - SUCCESS")
88 except Exception
as excp:
89 ExceptionLog.handler(logger, excp,
">>> UrlSchema wrong json loads")
101 fd = open(fileName,
"r") 103 ret = json.loads(fd.read())
105 except Exception, err:
106 logger.debug(
">>> readJsonFile error, file name = " + str(fileName) +
" | " + str(err))
119 for paramKey
in parametrs:
120 macroName =
'%' + paramKey +
'%' 121 if inUrl.find(macroName) >= 0:
131 for val
in parametrs[paramKey]:
133 elements.update({val:{
"frequency":0,
"time":0}})
137 for val
in parametrs[paramKey]:
138 elements.update({val:{
"frequency":0,
"time":0}})
142 for key, element
in elements.items():
143 if "frequency" in element
and "time" in element:
144 paramList.append(key)
145 frequencyList.append(int(element[
"frequency"]))
146 timeList.append(int(element[
"time"]))
151 mixIndex = frequencyList.index(min(frequencyList))
152 logger.debug(
'mixIndex: ' + str(mixIndex))
153 logger.debug(
'paramList[mixIndex]: ' + str(paramList[mixIndex]))
155 logger.debug(
'>>> inUrl 1: ' + str(inUrl))
156 inUrl = unicode(inUrl.replace(macroName, paramList[mixIndex]))
157 logger.debug(
'>>> inUrl 2: ' + str(inUrl))
158 frequencyList[mixIndex] += 1
159 timeList[mixIndex] = int((datetime.now() - datetime.fromtimestamp(0)).total_seconds())
163 self.
indexStruct[paramKey].update({paramList[mixIndex]:{
"frequency":frequencyList[mixIndex], \
164 "time":timeList[mixIndex]}})
177 logger.debug(
"schemaIncrementalInt() enter ... parameters: " + str(parameters) +
"\ninUrl: " + str(inUrl) + \
178 "\nmaxItems: " + str(maxItems))
182 for paramKey
in parameters:
183 macroName =
'%' + paramKey +
'%' 184 if inUrl.find(macroName) >= 0:
208 if maxItems > int(parameters[paramKey][
"max"]):
211 int(parameters[paramKey][
"min"]),
212 int(parameters[paramKey][
"max"]),
213 int(parameters[paramKey][
"step"]))
219 logger.debug(
"minPos from structure = " + str(minPos))
221 minPos = int(parameters[paramKey][
"min"])
223 nextPos = maxItems * int(parameters[paramKey][
"step"]) + minPos
224 if nextPos >= int(parameters[paramKey][
"max"]):
225 nextPos = int(parameters[paramKey][
"max"])
231 int(parameters[paramKey][
"step"]))
236 logger.debug(
"nextPos = " + str(nextPos))
237 if nextPos >= int(parameters[paramKey][
"max"]):
240 logger.debug(
"nextIndex after truncate = " + str(nextPos))
260 for x
in range(minPos, maxPos, step):
261 localUrl = copy.copy(inUrl)
262 logger.debug(
"Before replace inUrl = " + str(localUrl))
263 localUrl = localUrl.replace(macroName, str(x))
264 logger.debug(
"After replace inUrl = " + str(localUrl))
266 if localRet != localUrl
and localUrl
not in ret:
278 for paramKey
in parametrs:
279 macroName =
'%' + paramKey +
'%' 280 if inUrl.find(macroName) >= 0:
281 inUrl = inUrl.replace(macroName, str(random.randint(parametrs[paramKey][
"min"], parametrs[paramKey][
"max"])))
291 lowAsciiSet = string.ascii_lowercase
292 hexdigitsSet =
''.
join([ch
for ch
in string.hexdigits
if not ch.isupper()])
293 for paramKey
in parametrs:
294 macroName =
'%' + paramKey +
'%' 295 if inUrl.find(macroName) >= 0:
296 valueLen = random.randint(parametrs[paramKey][
"min"], parametrs[paramKey][
"max"])
298 for _
in xrange(0, valueLen):
300 valueStr += lowAsciiSet[random.randint(0, len(lowAsciiSet) - 1)]
302 valueStr += hexdigitsSet[random.randint(0, len(hexdigitsSet) - 1)]
303 if parametrs[paramKey][
"case"] == self.
CHAR_LOWER:
304 valueStr = valueStr.lower()
305 elif parametrs[paramKey][
"case"] == self.
CHAR_UPPER:
306 valueStr = valueStr.upper()
307 inUrl = inUrl.replace(macroName, valueStr)
318 fd = open(fileName,
"w")
320 fd.write(json.dumps(self.
indexStruct, ensure_ascii=
False))
322 except Exception, err:
323 ExceptionLog.handler(logger, err,
">>> saveJsonInFile error, file name = " + str(fileName))
334 if defaultValue
is None:
342 result = requests.get(url)
343 except Exception
as excp:
345 logger.debug(
">>> bad url request; url=" + url +
";err= " + str(excp))
346 if result
is not None and result.status_code == 200
and result.text
is not None:
348 newParams = json.loads(result.text)
349 except Exception
as excp:
351 logger.debug(
">>> bad external parameters json" + str(excp))
352 if newParams
is not None:
369 logger.debug(
'!!! parameters: ' + str(parameters))
371 if formatValue ==
'plain-text':
372 for paramName
in parameters:
373 logger.debug(
"paramName: '" + str(paramName) +
"' type: " + str(
type(paramName)))
374 logger.debug(
"paramValue: '" + str(parameters[paramName]) +
"' type: " + str(
type(parameters[paramName])))
376 if isinstance(parameters[paramName], basestring):
379 parameters[paramName] = unicode(parameters[paramName]).splitlines()
382 parameters[paramName] = unicode(parameters[paramName]).split(delimiter)
385 parameters[paramName] = [elem
for elem
in parameters[paramName]
if elem]
388 logger.debug(
'!!! ret: ' + str(ret))
390 elif formatValue ==
'json':
393 logger.error(
"Unsupported format value: '" + str(formatValue) +
"'")
408 if urlEncode
is not None and int(urlEncode) > 0:
409 for paramName
in parameters:
410 if isinstance(parameters[paramName], list)
or isinstance(parameters[paramName], unicode):
412 for elem
in parameters[paramName]:
413 if isinstance(elem, str)
or isinstance(elem, unicode):
415 encodedStr = urllib.urlencode({
'':elem})
416 if len(encodedStr) > 0
and encodedStr[0] ==
'=':
417 encodedStr = encodedStr[1:]
418 paramsList.append(encodedStr)
419 except Exception, err:
420 logger.debug(
"urlencode '" + str(elem) +
"' has error: " + str(err))
421 paramsList.append(unicode(elem))
423 parameters[paramName] = paramsList
437 for values
in parameters.values():
439 if isinstance(values, list):
440 countsList.append(len(values))
442 return max(countsList)
451 logger.debug(
">>> resolveParametersFromFile enter fileName: " + str(fileName))
460 fd = open(fileName,
"r") 463 if len(buff) > 0
and buff[0] ==
'{':
464 parameters = json.loads(buff)
466 parameters = {
"":buff}
468 except Exception, err:
469 logger.debug(
">>> resolveParametersFromFile error, file name = " + str(fileName) +
" | " + str(err))
474 if len(parameters) > 0:
477 logger.debug(
"Wrong file name: '" + str(fileName) +
"', expected '<file_name>.json'")
489 if self.
schema is not None:
494 if "file_path" in self.
schema:
496 self.
schema[
"parameters"])
500 if "format" in self.
schema:
502 if "delimiter" in self.
schema:
503 delimiter = self.
schema[
"delimiter"]
506 self.
schema[
"parameters"])
508 if "url_encode" in self.
schema:
511 if "batch_insert" in self.
schema and \
518 logger.debug(
'maxCountParameters: ' + str(maxCountParameters))
521 itemsLen = int(self.
schema[
"max_items"])
522 for _
in xrange(0, itemsLen):
538 if localRet != inUrl
and localRet
not in ret:
540 if len(ret) >= int(maxCountParameters):
541 logger.debug(
'>>> break len(ret) = ' + str(len(ret)))
545 except Exception
as excp:
546 ExceptionLog.handler(logger, excp,
">>> generateUrlSchema has some error")
548 logger.debug(
">>> urlSchema len = " + str(len(ret)))
def saveJsonInFile(self, fileName)
def schemaRandomStr(self, inUrl, parametrs)
int BATCH_INSERT_MAX_ALLOWED_VALUE
def resolveParametersFromFile(self, fileName, defaultValue=None)
def __init__(self, schema=None, siteId=None, urlSchemaDataDir=None)
def resolveParametersByHTTP(self, urls, defaultValue=None)
def schemaIncrementalInt(self, inUrl, parameters, maxItems)
def urlEncodeToParameters(self, parameters, urlEncode)
def generateUrlSchema(self, inUrl)
int BATCH_INSERT_MIN_ALLOWED_VALUE
def getMaxCountParameters(self, parameters)
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
def readJsonFile(self, fileName)
def resolveParametersByFormat(self, parameters, delimiter=' ', formatValue='json', defaultValue=None)
def schemaPredefined(self, inUrl, parametrs)
int SCHEMA_INCREMENTAL_INT
string URL_SCHEMA_DATA_FILE_NAME_PREFIX
def replaceSchemaIncrementalInt(self, inUrl, macroName, minPos, maxPos, step)
def schemaRandomInt(self, inUrl, parametrs)