2 Created on Mar 28, 2014 6 @link: http://hierarchical-cluster-engine.com/ 7 @copyright: Copyright © 2013-2014 IOIX Ukraine 8 @license: http://hierarchical-cluster-engine.com/license/ 14 import cPickle
as pickle
31 from subprocess
import Popen
32 from subprocess
import PIPE
33 from datetime
import datetime
34 from decimal
import Decimal
37 from stat
import ST_MTIME
38 from HTMLParser
import HTMLParser
49 logger = logging.getLogger(APP_CONSTS.LOGGER_NAME)
51 lock = threading.Lock()
58 META_REDIRECT =
r"http-equiv\W*refresh.+?url\W+?(.+?)\"" 60 SEARCH_COMMENT_SIMPLE_PATTERN =
r"<!--(.|\n)*?-->" 61 SEARCH_COMMENT_PATTERN =
r"<![ \r\n\t]*(--([^\-]|[\r\n]|-[^\-])*--[ \r\n\t]*)>" 62 SEARCH_NOSCRIPT_PATTERN =
r"<noscript>(.|\n)*?</noscript>" 77 for localValue
in classType.__dict__:
78 if str(localValue).find(prefix) == 0:
79 if value == getattr(classType, localValue,
None):
93 def getPath(dictionary, jsonString, path):
94 if jsonString !=
None:
95 dictionary = json.loads(jsonString)
96 for i, p
in re.findall(
r'(\d+)|(\w+)', path):
97 dictionary = dictionary[p
or int(i)]
114 if isinstance(obj, datetime):
115 return obj.isoformat()
117 if isinstance(obj, Decimal):
121 if isinstance(obj, types.DictProxyType):
130 return json.dumps(self.__dict__, default=JsonSerializable.json_serial, sort_keys=
True, indent=4)
135 super(SQLExpression, self).
__init__()
136 if stringExpression
is None:
139 self.
str =
str(stringExpression)
154 SUBDIR_LEVEL1_LEN = 2
157 def __init__(self, string, subdirLen=SUBDIR_LEVEL1_LEN):
163 if "CONTENT_STORE_PATH" in os.environ
and os.environ[
"CONTENT_STORE_PATH"] !=
"":
164 logger.debug(
"os.environ[CONTENT_STORE_PATH]: set to %s", os.environ[
"CONTENT_STORE_PATH"])
166 self.
string += os.environ[
"CONTENT_STORE_PATH"]
168 logger.debug(
"os.environ[CONTENT_STORE_PATH]: not set.")
185 for dictionary
in initial_data:
186 for key
in dictionary:
187 setattr(self, key, dictionary[key])
189 setattr(self, key, kwargs[key])
203 if parser
and parser.has_option(section, option):
205 ret = parser.get(section, option, defValue)
222 type_, value_, traceback_ = sys.exc_info()
223 stack = traceback.format_tb(traceback_)
227 ret = ret +
"\n" + (str(item))
229 if linesNumberMax !=
None and n == linesNumberMax:
238 tracebackTimeQueue = []
239 tracebackIdent =
False 240 tracebackIdentFiller =
"-" 241 tracebackMessageCall =
"call" 242 tracebackMessageExit =
"exit" 243 tracebackmessageDelimiter =
":" 244 tracebackTimeMark =
True 245 tracebackTimeMarkFormat =
"%Y-%m-%d %H:%M:%S.%f" 246 tracebackTimeMarkDelimiter =
" " 247 tracebackIncludeInternalCalls =
False 248 tracebackIncludeLineNumber =
True 249 tracebackIncludeLineNumberDelimiter =
":" 250 tracebackIncludeFileNumber =
True 251 tracebackIncludeFileNumberDelimiter =
":" 252 tracebackFunctionNameDelimiter =
":" 253 tracebackExcludeModulePath = [
"/usr/lib/",
"/usr/local/lib/"]
254 tracebackExcludeFunctionName = [
"varDump"]
255 tracebackExcludeFunctionNameStarts = [
"<"]
256 tracebackIncludeExitCalls =
True 257 tracebackRecursionlimit = 0
258 tracebackRecursionlimitErrorMsg =
"RECURSION STACK LIMIT REACHED " 259 tracebackIncludeLocals =
False 260 tracebackIncludeArg =
False 261 tracebackIncludeLocalsPrefix =
"\nLOCALS:\n" 262 tracebackIncludeArgPrefix =
"\nARG:\n" 263 tracebackLogger =
None 264 tracebackElapsedTimeDelimiter =
"" 265 tracebackElapsedTimeFormat =
"{:.6f}" 266 tracebackUnknownExceptionMsg =
"Unknown exception!" 277 if event ==
"call" or event ==
"return":
284 idents = tracebackIdentFiller * indent[0]
287 message = tracebackMessageCall
288 tracebackTimeQueue.append(time.time())
290 elif event ==
"return":
292 idents = tracebackIdentFiller * indent[0]
296 message = tracebackMessageExit
297 te =
"{:.6f}".
format(time.time() - tracebackTimeQueue.pop())
299 if tracebackTimeMark:
301 t = datetime.now().strftime(
'%Y-%m-%d %H:%M:%S.%f')[:-3]
305 if tracebackIncludeLineNumber:
306 ln = str(frame.f_lineno)
310 if tracebackIncludeFileNumber:
311 fn = str(frame.f_code.co_filename)
316 for item
in tracebackExcludeModulePath:
317 if item
in frame.f_code.co_filename:
322 for item
in tracebackExcludeFunctionName:
323 if frame.f_code.co_name == item:
328 for item
in tracebackExcludeFunctionNameStarts:
329 if frame.f_code.co_name.startswith(item):
333 if tracebackIncludeLocals
or tracebackIncludeArg:
334 oldRL = sys.getrecursionlimit()
335 if oldRL < tracebackRecursionlimit:
336 sys.setrecursionlimit(tracebackRecursionlimit)
340 if tracebackIncludeLocals:
344 localsDump = str(frame.f_locals)
345 localsDump = tracebackIncludeLocalsPrefix + localsDump
347 localsDump = tracebackRecursionlimitErrorMsg + str(tracebackRecursionlimit)
352 if tracebackIncludeArg:
357 argDump = tracebackIncludeArgPrefix + argDump
359 argDump = tracebackRecursionlimitErrorMsg + str(tracebackRecursionlimit)
364 if oldRL
is not None:
365 sys.setrecursionlimit(oldRL)
370 if (
not (tracebackIncludeInternalCalls
is False and frame.f_code.co_name.startswith(
"__")))
and\
371 (
not (tracebackIncludeExitCalls
is False and event ==
"return"))
and\
372 (
not excludedP)
and (
not excludedF)
and (
not excludedF2):
373 tmsg = idents + message + tracebackmessageDelimiter + \
374 fn + tracebackIncludeFileNumberDelimiter + \
375 ln + tracebackIncludeLineNumberDelimiter + \
376 frame.f_code.co_name +
"()" + tracebackFunctionNameDelimiter + \
377 tracebackElapsedTimeDelimiter + te + localsDump + argDump
378 if tracebackLogger
is None:
379 tracebackList.append(t + tracebackTimeMarkDelimiter + tmsg)
381 tracebackLogger.debug(
"%s", tmsg)
382 if len(tracebackTimeQueue) == 0:
383 tracebackLogger.debug(
"%s", APP_CONSTS.LOGGER_DELIMITER_LINE)
385 except Exception
as e:
386 if tracebackLogger
is None:
387 tracebackList.append(
"Exception: " + str(e))
389 tracebackLogger.error(
"%s", str(e))
391 if tracebackLogger
is None:
392 tracebackList.append(tracebackUnknownExceptionMsg)
394 tracebackLogger.error(
"%s", tracebackUnknownExceptionMsg)
409 def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False,
410 objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10):
411 if objectsHash
is None:
418 if isinstance(obj, list):
421 newobj.append(
varDump(item,
False, strTypeMaxLen, strTypeCutSuffix, stringifyType, ignoreErrors,
422 objectsHash, depth, indent, ensure_ascii, maxDepth))
423 elif isinstance(obj, tuple):
426 temp.append(
varDump(item,
False, strTypeMaxLen, strTypeCutSuffix, stringifyType, ignoreErrors,
427 objectsHash, depth, indent, ensure_ascii, maxDepth))
429 elif isinstance(obj, set):
432 temp.append(str(
varDump(item,
False, strTypeMaxLen, strTypeCutSuffix, stringifyType, ignoreErrors,
433 objectsHash, depth, indent, ensure_ascii, maxDepth)))
435 elif isinstance(obj, dict):
437 for key, value
in obj.items():
438 newobj[str(
varDump(key,
False, strTypeMaxLen, strTypeCutSuffix))] = \
439 varDump(value,
False, strTypeMaxLen, strTypeCutSuffix, stringifyType, ignoreErrors,
440 objectsHash, depth, indent, ensure_ascii, maxDepth)
443 elif '__dict__' in dir(obj):
445 for k
in obj.__dict__.keys():
448 if isinstance(obj.__dict__[k], basestring):
449 newobj[k] = obj.__dict__[k]
450 if strTypeMaxLen > 0
and len(newobj[k]) > strTypeMaxLen:
451 newobj[k] = newobj[k][:strTypeMaxLen] + strTypeCutSuffix
453 if '__dict__' in dir(obj.__dict__[k]):
454 sobj = str(obj.__dict__[k])
455 if sobj
in objectsHash:
456 newobj[k] =
'OBJECT RECURSION: ' + sobj
458 objectsHash.append(sobj)
459 newobj[k] =
varDump(obj.__dict__[k],
False, strTypeMaxLen, strTypeCutSuffix, stringifyType,
460 ignoreErrors, objectsHash, depth, indent, ensure_ascii, maxDepth)
462 newobj[k] =
varDump(obj.__dict__[k],
False, strTypeMaxLen, strTypeCutSuffix, stringifyType,
463 ignoreErrors, objectsHash, depth, indent, ensure_ascii, maxDepth)
465 if ' object at ' in sobj
and '__type__' not in newobj:
466 newobj[
'__type__'] = sobj.replace(
" object at ",
" #").replace(
"__main__.",
"")
468 if stringifyType == 0:
470 s = json.dumps(newobj, indent=indent, ensure_ascii=ensure_ascii)
472 except Exception
as err:
474 except Exception
as err:
480 newobj =
'MAX OBJECTS EMBED DEPTH ' + str(maxDepth) +
' REACHED!' 483 if stringifyType == 0:
485 newobj = json.dumps(newobj, indent=indent, ensure_ascii=ensure_ascii)
486 except Exception
as err:
490 newobj =
'To json error: ' + str(err)
501 return '''%s: mem=%s mb 502 ''' % (point, resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000)
522 parseUrl = urlparse.urlparse(url)
523 if parseUrl.scheme !=
None and parseUrl.netloc !=
None:
535 parseUrl = urlparse.urlparse(url)
536 if UrlParser.isValidURL(url):
537 ret = parseUrl.scheme +
"://" + parseUrl.netloc
549 auth = urlparse.urlsplit(url.strip())[1]
550 ret = (re.search(
'([^@]*@)?([^:]*):?(.*)', auth).groups())[1]
566 def prepareNormalization(path):
569 replaceSimbolDict = {
'\a':
'/a',
578 replaceStartSimbolDict = {
'://':
''}
580 for src, dest
in replaceStartSimbolDict.items():
581 if pathStr.startswith(src):
582 pathStr = pathStr.replace(src, dest)
584 for src, dest
in replaceSimbolDict.items():
585 pathStr = pathStr.replace(src, dest)
587 for i
in range(0, 32):
588 pathStr = pathStr.replace(str(chr(i)), str(
'/%o' % i))
590 for s
in pathStr.split(
"\\"):
593 out = [elem
for elem
in out
if elem !=
'']
598 if isinstance(url, basestring):
606 resUrl = prepareNormalization(url)
607 if isinstance(base, basestring):
609 baseUrl = prepareNormalization(base)
611 if baseUrl != resUrl:
612 resUrl = urlparse.urljoin(baseUrl, resUrl)
614 if url != resUrl
and log
is not None:
615 log.debug(
'==== Urls different ====')
616 log.debug(
"base: %s", str(baseUrl))
617 log.debug(
"url: %s", str(url))
618 log.debug(
"res: %s", str(resUrl))
623 if isinstance(supportProtocols, list):
625 log.debug(
"supportProtocols: %s, res: %s", str(supportProtocols), str(res))
627 if isinstance(res, basestring):
628 v = urlparse.urlsplit(res)
629 if v.scheme
not in supportProtocols:
631 log.debug(
"Not support protocol: %s", str(v.scheme))
635 log.debug(
"before normalization res: %s", str(res))
639 localUrls = res.split()
642 log.debug(
"localUrls: %s", str(localUrls))
644 for localUrl
in localUrls:
649 log.debug(
"resUrls: %s",
varDump(resUrls))
650 res =
','.
join(resUrls)
652 log.debug(
"res: %s", str(res))
663 NORM_USE_VALIDATOR = 2
665 NORM_DEFAULT = NORM_MAIN
666 BAD_URL_PREFIX =
"normalization-error://?" 678 def normalize(url, supportProtocols=None, normMask=NORM_DEFAULT):
679 norm_url = url.strip()
681 logger.debug(
"None zero normMask: %s", str(normMask))
683 if supportProtocols
is not None and isinstance(supportProtocols, list):
684 colonPos = norm_url.find(
':')
685 slashPos = norm_url.find(
'/')
686 if colonPos != -1
and (slashPos == -1
or slashPos > colonPos):
687 if len(norm_url.split(
':')) > 1:
688 protocol = norm_url.split(
':')[0]
689 if protocol
not in supportProtocols:
691 norm_url = UrlNormalizator.BAD_URL_PREFIX + urllib.quote(norm_url)
692 except Exception
as err:
693 logger.debug(
">>> urllib.quote error = " + str(err))
694 norm_url = UrlNormalizator.BAD_URL_PREFIX + norm_url
698 stripWWW =
True if normMask & UrlNormalizator.NORM_SKIP_WWW
else False 699 useValidator =
True if normMask & UrlNormalizator.NORM_USE_VALIDATOR
else False 700 enableAdditionNormalize =
True if normMask & UrlNormalizator.NORM_MAIN
else False 701 norm_url = str(
urinormpath(url.strip(), stripWWW, useValidator, enableAdditionNormalize))
707 except Exception
as e:
708 logger.error(
"Normalization error: " + str(e) +
"\nURL: [" + url +
"]\n" + str(
getTracebackInfo()))
719 return False if url.find(UrlNormalizator.BAD_URL_PREFIX) == 0
else True 731 entities = {
"&":
"&"}
734 le = len(entities[k])
738 p = ret.find(k, p + 1)
742 if (p + le - 1 > l)
or ((p + le - 1 <= l)
and (ret[p:p + le] != entities[k])):
743 ret = ret[:p] + entities[k] + ret[p + 1:]
755 if env_path
in os.environ
and os.environ[env_path] !=
"":
756 logger.debug(
"os.environ[%s]: set to %s", env_path, os.environ[env_path])
757 open(os.environ[env_path] + file_name,
"wb").write(input_pickled_object)
759 logger.debug(
"os.environ[%s]: not set.", env_path)
764 def urinormpath(path, stripWWW=False, useValidator=False, enableAdditionNormalize=True):
771 if path
is None or path ==
"":
775 for s
in path.split(
'/'):
783 if path.startswith(
'/')
and (
not out
or out[0]):
785 if path.endswith(
'/.')
or path.endswith(
'/..'):
790 splitPath = path.split(
"?")
791 if len(splitPath) > 0:
792 splitPath[0] = splitPath[0].replace(
"://www.",
"://")
793 localPath = splitPath[0]
794 for elem
in splitPath[1:]:
800 if enableAdditionNormalize:
801 resultUrlDict =
Url(localPath)
802 if useValidator
and not Url.GetStats([resultUrlDict])[0][
"valid"]:
803 raise Exception(path +
" NOT VALIDATE!")
804 ret1 = Url.GetStats([resultUrlDict])[0][
"canonicalized"]
808 if ret
is not None and ret1
is not None and ret != ret1:
809 logger.debug(
"--->>>> URLS DIFFERTNT <<<<---")
812 except Exception
as e:
813 logger.error(
"Normalization error: " + str(e) +
"\npath: [" + path +
"]\n" + str(
getTracebackInfo()))
832 for i
in range(min_number, max_number):
837 lock_socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
840 lock_socket.bind(
'\0' +
"dc_process_lock_" + sock_prefix + str(i))
854 lg = logging.getLogger(
'')
857 for h
in lg.__dict__[
'handlers']:
858 if h.__class__.__name__ ==
'FileHandler':
859 log_file = h.baseFilename
860 if newFile
is not None:
861 h.baseFilename = newFile
866 if os.path.exists(newFile):
867 t = os.stat(newFile)[ST_MTIME]
870 h.rolloverAt = h.computeRollover(t)
871 if rollover
and h.shouldRollover(
''):
874 elif h.__class__.__name__ ==
'TimedRotatingFileHandler':
875 log_file = h.baseFilename
876 if newFile
is not None:
877 h.baseFilename = newFile
882 if rollover
and h.shouldRollover(
''):
894 for h
in loggerObj.handlers:
895 if h.__class__.__name__ ==
'FileHandler' or h.__class__.__name__ ==
'TimedRotatingFileHandler':
907 if substrList
is None or not isinstance(substrList, list):
908 raise Exception(
">>> error substrList is None or not List type")
909 if prefixes
is None or not isinstance(prefixes, list):
910 raise Exception(
">>> error prefixes is None or not List type")
911 if len(substrList) != len(prefixes):
912 raise Exception(
">>> error substrList and prefixes lists have different lengths")
914 for substr
in substrList:
915 if isinstance(substr, str)
or isinstance(substr, unicode):
916 if isinstance(prefixes[i], str)
or isinstance(prefixes[i], unicode):
917 ret += str(prefixes[i])
926 CUR_YEAR_FULL =
"@CUR_YEAR_FULL" 927 CUR_YEAR_SHORT =
"@CUR_YEAR_SHORT" 928 CUR_MONTH =
"@CUR_MONTH" 936 ret[DataReplacementConstants.CUR_YEAR_FULL] = datetime.now().strftime(
"%Y")
937 ret[DataReplacementConstants.CUR_YEAR_SHORT] = datetime.now().strftime(
"%y")
938 ret[DataReplacementConstants.CUR_MONTH] = datetime.now().strftime(
"%m")
939 ret[DataReplacementConstants.CUR_DAY] = datetime.now().strftime(
"%d")
949 if urlparse.urlparse(url).hostname:
950 host =
'.'.
join(urlparse.urlparse(url).hostname.split(
'.')[-2:])
959 stamp = time.mktime(date_str.timetuple())
961 return time.strftime(
'%a, %d %b %Y %H:%M:%S GMT', time.gmtime(stamp))
972 log.debug(
"set siteId = '0' from 'autoFillSiteId'")
983 from bs4
import Comment
986 if soup
is not None and hType == 0:
987 for elem
in soup.findAll(text=
lambda text: isinstance(text, Comment)):
989 elif htmlBuf
is not None and hType == 1:
990 ret = re.sub(SEARCH_COMMENT_PATTERN,
"", htmlBuf)
991 logger.debug(
"!!! use pattern: %s", str(SEARCH_COMMENT_PATTERN))
992 elif htmlBuf
is not None and hType == 2:
993 ret = re.sub(SEARCH_COMMENT_SIMPLE_PATTERN,
"", htmlBuf)
994 logger.debug(
"!!! use pattern: %s", str(SEARCH_COMMENT_SIMPLE_PATTERN))
995 elif htmlBuf
is not None and hType == 3:
1017 if ret.find(startStr) != -1:
1018 p = ret.index(startStr)
1021 if ret.find(finishStr, p) != -1:
1022 p1 = ret.index(finishStr, p) + len(finishStr)
1025 if ret.find(finishDefault, p) != -1:
1026 p1 = ret.index(finishDefault, p) + len(finishDefault)
1032 ret = ret[0:p] + ret[p1:]
1035 if greediness > 0
and i == greediness:
1048 if htmlBuf
is not None:
1067 if htmlTxt
is not None and htmlTxt.strip() !=
'':
1069 from bs4
import BeautifulSoup
1070 ret = joinGlue.join(BeautifulSoup(htmlTxt,
'lxml').findAll(text=
True))
1071 elif method == 1
or method == 2:
1072 if regExp
is not None:
1078 r =
r'(<!--.*?-->|<[^>]*>)' 1079 ret = re.sub(r, joinGlue, htmlTxt)
1083 ret = ret.get_data()
1088 if c ==
'<' and not quote:
1090 elif c ==
'>' and not quote:
1092 elif (c ==
'"' or c ==
"'")
and tag:
1095 ret = ret + joinGlue + c
1098 ret = joinGlue.join(xml.etree.ElementTree.fromstring(htmlTxt).itertext())
1100 if method == 1
or method == 2:
1102 ret = cgi.escape(ret)
1103 ret = re.sub(
'[<>]',
'', ret)
1131 from bs4
import BeautifulSoup
1133 soup = BeautifulSoup(htmlBuf,
'lxml')
1136 for elem
in soup.findAll(name=
'script'):
1141 return ''.
join(soup.findAll(text=
True))
1147 def innerText(selectorList, delimiter=' ', innerDelimiter=' ', tagReplacers=None, REconditions=None,
1148 attrConditions=None, keepAttributes=None, baseUrl=None, closeVoid=None, excludeNodes=None):
1149 extendInnerText =
ExtendInnerText(tagReplacers, delimiter, innerDelimiter, REconditions, attrConditions,
1150 keepAttributes, baseUrl, closeVoid, excludeNodes)
1151 extendInnerText.innerText(
None, selectorList,
None)
1152 ret = extendInnerText.stripHtml
1159 def innerTextToList(selectorList, delimiter=' ', innerDelimiter=' ', tagReplacers=None, REconditions=None,
1160 attrConditions=None, keepAttributes=None, baseUrl=None, closeVoid=None, excludeNodes=None):
1161 extendInnerText =
ExtendInnerText(tagReplacers, delimiter, innerDelimiter, REconditions, attrConditions,
1162 keepAttributes, baseUrl, closeVoid, excludeNodes)
1163 extendInnerText.innerTextToList(
None, selectorList,
None)
1164 ret = extendInnerText.stripHtmlList
1176 retXPathValue =
None 1177 for subXPath
in subXPathes:
1178 retXPath = xpath + (subXPathPattern % subXPath)
1180 retXPathValue = sel.xpath(retXPath).extract()
1181 except Exception
as excp:
1182 logger.info(
">>> Common xPath extractor exception, = " + retXPath +
" excp=" + str(excp))
1183 retXPathValue =
None 1185 if len(retXPathValue) > 0
and ''.
join(retXPathValue).strip() !=
'':
1187 return retXPath, retXPathValue
1196 if isinstance(incomeDict, dict):
1197 for key
in incomeDict:
1198 if isinstance(incomeDict[key], str)
or isinstance(incomeDict[key], unicode):
1199 ret[key] =
splitPairs(incomeDict[key], splitters)
1209 splitStr = buf.split(splitters)
1210 for elem
in splitStr:
1211 localStr = elem.split(
'=')
1212 if isinstance(localStr, list)
and len(localStr) >= 2:
1213 ret[localStr[0]] = localStr[1]
1223 if str1.find(str2) > 0
and ((len(str1) - str1.find(str2)) == len(str2)):
1235 replaceValue = localValue.replace(replaceFrom, replaceTo)
1236 while len(replaceValue) != len(localValue):
1237 localValue = replaceValue
1238 replaceValue = localValue.replace(replaceFrom, replaceTo)
1284 if match
is not None:
1285 resUrl = match.groups()[0].strip()
1287 log.debug(
'resUrl: ' + str(resUrl))
1288 urlObj =
Url(resUrl)
1289 if urlObj.isValid():
1292 log.debug(
'ret: ' + str(ret))
1304 splitHref = href.split(
'?')
1305 if splitHref
is not None and len(splitHref) > 0:
1306 adresses = splitHref[0]
1307 adresses = adresses.split(
',')
1310 for adress
in adresses:
1311 adress = adress.split(
'@')
1312 if adress
is not None and len(adress) > 0:
1313 names.append(adress[0])
1316 for adress
in adresses:
1328 ROTATED_ATTRIBUTE_NAME =
'__rotated' 1341 def getLogger(self, loggerName=None, fileNameSuffix='', restore=False):
1342 if loggerName
is None:
1343 ln = APP_CONSTS.LOGGER_NAME
1347 if fileNameSuffix !=
'' or restore
is True:
1354 lg = logging.getLogger(ln)
1357 fname = lfn.findReplace()
1358 if fname
is not None and fname !=
'':
1359 if restore
is False:
1362 fname += fileNameSuffix
1363 pin = lfn.getFreeProcInstanceNumber(os.path.basename(fname))
1364 if pin !=
'' and ((pin !=
'0' and fileNameSuffix ==
'')
or (pin ==
'0' and fileNameSuffix !=
'')):
1365 pin =
'.' + pin +
'.log' 1366 fname = lfn.findReplace(fname + pin, rollover=rollover)
1368 lg = logging.getLogger(ln)
1372 fname = lfn.findReplace(self.
fnameOld, rollover=rollover)
1373 lg = logging.getLogger(ln)
1375 except Exception, err:
1376 raise Exception(
'Logger initialization error:' + str(err) +
"\n" +
getTracebackInfo())
1382 if isinstance(inputStr, str):
1383 ret = inputStr.decode(
'utf-8')
1395 ret = inStr.split(delimiter)
1397 if joingGlue
is None:
1407 item = item.lstrip()
1409 item = item.rstrip()
1412 if skipEmpty
and item ==
'':
1419 return glue.join(ret).strip()
1427 return len(re.findall(
r'\{.+?\}', content))
1434 LEVEL_NAME_ERROR =
'error' 1435 LEVEL_NAME_INFO =
'info' 1436 LEVEL_NAME_DEBUG =
'debug' 1438 LEVEL_VALUE_ERROR = logging.ERROR
1439 LEVEL_VALUE_INFO = logging.INFO
1440 LEVEL_VALUE_DEBUG = logging.DEBUG
1449 super(ExceptionLog, self).
__init__()
1463 def handler(log, error, message, objects=(), levels={}):
1466 ExceptionLog.LEVEL_NAME_ERROR:ExceptionLog.LEVEL_VALUE_ERROR, \
1467 ExceptionLog.LEVEL_NAME_INFO:ExceptionLog.LEVEL_VALUE_INFO, \
1468 ExceptionLog.LEVEL_NAME_DEBUG:ExceptionLog.LEVEL_VALUE_DEBUG \
1472 for name, level
in levels.items():
1473 if levelsDict.has_key(name):
1474 levelsDict[name] = level
1478 if isinstance(str(error), str)
or isinstance(str(error), unicode):
1479 errorMsg = str(error)
1480 except Exception, err:
1481 log.log(levelsDict[ExceptionLog.LEVEL_NAME_DEBUG],
'Try make str(err) return error: ' + str(err))
1484 log.log(levelsDict[ExceptionLog.LEVEL_NAME_ERROR], message +
' ' + errorMsg)
1490 if isinstance(objects, tuple):
1492 log.log(levelsDict[ExceptionLog.LEVEL_NAME_DEBUG],
varDump(obj))
1505 ERROR_CODE_GENERAL_EXCEPTION = 1
1506 ERROR_CODE_APPLIED_EXCEPTION = 2
1509 threading.Thread.__init__(self)
1521 except Exception, err:
1522 if self.
logger is not None:
1523 self.
logger.
error(
"Error of execution of thread class InterruptableThread(): %s\nargs: %s",
1524 str(err), str(self.
args))
1531 self.
errorMessage =
'Undefined error of execution of thread class InterruptableThread(), args: ' + str(self.
args)
1532 if self.
logger is not None:
1560 log.error(
"an not stop thread with _Thread__stop()!")
1567 log.error(
"Can not stop thread with __stop()!")
1570 it._Thread__delete()
1574 log.error(
"Can not stop thread with _Thread__delete()!")
1576 if it.errorCode == it.ERROR_CODE_APPLIED_EXCEPTION:
1578 log.error(
"Error1 code %s, exception: %s", str(it.errorCode), str(it.errorException))
1579 raise it.errorException
1582 if it.errorCode == it.ERROR_CODE_APPLIED_EXCEPTION:
1584 log.error(
"Error2 code %s, exception: %s", str(it.errorCode), str(it.errorException))
1585 raise it.errorException
1598 if fileReference.startswith(protocolPrefix):
1600 f = fileReference[len(protocolPrefix):]
1602 except Exception, err:
1603 if loggerObj
is not None:
1604 loggerObj.error(
"Error load from file `%s` by reference: %s", f, str(err))
1615 with open(inFile,
'r') as f: 1619 ret = ret.decode(
'utf8')
1629 return string.replace(
"\\",
"\\\\").replace(
'"',
'\\\"').replace(
"'",
"\\\'").replace(
"\n",
"\\n").\
1630 replace(
"\r",
"\\r").replace("\0", "\\0")
1638 return False if isinstance(validators.url(url), validators.ValidationFailure)
else True 1649 def getHash(strBuf, binSize=32, digestType=0, fixedMode=0, valLimit=18446744073709552000L):
1653 d = hashlib.md5(strBuf)
1655 d = hashlib.sha1(strBuf)
1665 for i
in xrange(1, s - 1):
1666 v = int(h[:s - i], 16)
1669 elif fixedMode == 1:
1670 v = ctypes.c_uint32(zlib.crc32(strBuf, int(time.time()))).value
1672 v = ctypes.c_ulong(zlib.crc32(strBuf, int(time.time()))).value
1683 def strToFloat(val, defaultValue=0.0, log=None, positivePrefixes=None):
1686 if positivePrefixes
is None:
1687 posPrefixes = {
'K':
'1E3',
'M':
'1E6',
'G':
'1E9',
'T':
'1E12',
'P':
'1E15',
'E':
'1E18',
'Z':
'1E21',
'Y':
'1E24'}
1689 posPrefixes = positivePrefixes
1693 if val[-1]
in posPrefixes.keys():
1694 v = Decimal(val[:-1])
1695 ret = float(v * Decimal(posPrefixes[val[-1]]))
1698 except Exception, err:
1713 proxy_type = proxy_host = proxy_port = proxy_user = proxy_passwd =
None 1714 if isinstance(proxyString, basestring)
and proxyString !=
"":
1716 pattern =
'(.*)://(.*):(.*)@(.*):(.*)' 1717 match = re.search(pattern, proxyString, re.I + re.U)
1718 if match
is not None:
1719 proxy_type, proxy_user, proxy_passwd, proxy_host, proxy_port = match.groups()
1722 pattern =
'(.*)://(.*):(.*)' 1723 match = re.search(pattern, proxyString, re.I + re.U)
1724 if match
is not None:
1725 proxy_type, proxy_host, proxy_port = match.groups()
1727 pattern =
'(.*):(.*)' 1728 match = re.search(pattern, proxyString, re.I + re.U)
1729 if match
is not None:
1730 proxy_host, proxy_port = match.groups()
1731 proxy_type = defaultProxyType
1733 ret = (proxy_type, proxy_host, proxy_port, proxy_user, proxy_passwd)
1734 except Exception, err:
1736 log.error(
"Error: %s", str(err))
1751 exitCode = APP_CONSTS.EXIT_FAILURE
1754 log.debug(
"Popen: %s", str(cmd))
1756 process = Popen(cmd, stdout=PIPE, stdin=PIPE, stderr=PIPE, shell=
True, close_fds=
True, executable=
'/bin/bash')
1758 log.debug(
"len(inputStream)= %s", str(len(inputStream)))
1760 (output, errMsg) = process.communicate(input=inputStream)
1761 exitCode = process.wait()
1764 log.debug(
"Process response has exitCode = %s, stdout len = %s, stderr: %s",
1765 str(exitCode), str(len(output)), str(errMsg))
1767 except Exception, err:
1769 log.error(
"Popen execution error: %s", str(err))
1772 PopenResult = collections.namedtuple(
'PopenResult', [
'stdout',
'stderr',
'exitCode'])
1773 popenResult = PopenResult(stdout=output, stderr=errMsg, exitCode=exitCode)
1787 if jsonString
is not None and jsonString !=
'':
1788 if isinstance(jsonString, basestring):
1789 ret = json.loads(jsonString)
1793 log.debug(
"Input object type is: %s",
type(jsonString))
1794 except Exception, err:
1796 log.error(
"Error pars json: %s; source string:\n%s", str(err), jsonString)
1810 if isinstance(word, basestring)
and isinstance(buff, basestring):
1812 if word.startswith(
u'/'):
1814 if re.search(pattern=word, string=buff, flags=re.U + re.I + re.M)
is not None:
1817 ret = (word.upper() == buff.upper())
1819 except Exception, err:
1821 log.error(
"Expression: %s, Error: %s", str(word), str(err))
def generateDomainUrl(url)
def getContentCSSMarkupEntrancesNumber(content)
def loadFromFileByReference(fileReference, initString=None, protocolPrefix='file://', loggerObj=None)
def isTailSubstr(str1, str2)
def removeDuplicated(inStr, delimiter="\, joingGlue=None, trimMode=1, skipEmpty=False)
def getFreeProcInstanceNumber(self, sock_prefix="module", min_number=0, max_number=32)
def executeCommand(cmd, inputStream='', log=None)
def __init__(self, loggerInst=None)
def __init__(self, stringExpression="")
int ERROR_CODE_GENERAL_EXCEPTION
def autoFillSiteId(siteId, log)
def getConfigParameter(parser, section, option, defValue)
def getPairsDicts(incomeDict, splitters=')
def cutSubstringEntrances(buf, startStr='<!--', finishStr='-->', behaveMask=0, greediness=0, finishDefault='\n')
def isValueIn(classType, prefix, value)
def reMatch(word, buff, log=None)
def url_normalize(url, charset='utf-8')
def innerHTMLText(htmlBuf, stripComment=True, stripScript=True)
def getHTMLRedirectUrl(buff, log)
def getFirstNotEmptySubXPath(xpath, sel, subXPathPattern, subXPathes)
def getPath(dictionary, jsonString, path)
def loggerFlush(loggerObj)
def eraseNoScript(htmlBuf=None)
def emailParse(href, onlyName=False, defaultSeparator=' ')
def urinormpath(path, stripWWW=False, useValidator=False, enableAdditionNormalize=True)
def jsonLoadsSafe(jsonString, default=None, log=None)
def innerText(selectorList, delimiter=' ', innerDelimiter=' ', tagReplacers=None, REconditions=None, attrConditions=None, keepAttributes=None, baseUrl=None, closeVoid=None, excludeNodes=None)
def stripHTMLComments(htmlBuf=None, soup=None, hType=3)
def readFile(inFile, decodeUTF8=True)
def splitPairs(buf, splitters=')
def strToUnicode(inputStr)
def __init__(self, joinGlue=' ')
def __init__(self, initial_data, kwargs)
def executeWithTimeout(func, args=None, kwargs=None, timeout=1, default=None, log=None)
def getLogger(self, loggerName=None, fileNameSuffix='', restore=False)
def handler(log, error, message, objects=(), levels={})
def __init__(self, string, subdirLen=SUBDIR_LEVEL1_LEN)
int ERROR_CODE_APPLIED_EXCEPTION
def entitiesEncode(url, entities=None)
def findReplace(self, newFile=None, rollover=True)
def normalize(url, supportProtocols=None, normMask=NORM_DEFAULT)
def __init__(self, func, args, kwargs, default, log)
def replaceLoopValue(buf, replaceFrom, replaceTo)
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
def urlNormalization(base, url, supportProtocols=None, log=None)
def innerTextToList(selectorList, delimiter=' ', innerDelimiter=' ', tagReplacers=None, REconditions=None, attrConditions=None, keepAttributes=None, baseUrl=None, closeVoid=None, excludeNodes=None)
def generateReplacementDict()
def accumulateSubstrings(substrList, prefixes)
string ROTATED_ATTRIBUTE_NAME
def strToFloat(val, defaultValue=0.0, log=None, positivePrefixes=None)
def tracefunc(frame, event, arg, indent=None)
def getHash(strBuf, binSize=32, digestType=0, fixedMode=0, valLimit=18446744073709552000L)
def stripHTMLTags(htmlTxt, method=0, joinGlue=' ', regExp=None)
def strToProxy(proxyString, log=None, defaultProxyType='http')
def storePickleOnDisk(input_pickled_object, env_path, file_name)
def __init__(self, log, error, message, objects)
def convertToHttpDateFmt(date_str)
def getTracebackInfo(linesNumberMax=None)