Classes
class	ConfigParamsList

class	DataReplacementConstants

class	ExceptionLog

class	InterruptableThread

class	JsonSerializable

class	LoggerFileName

class	MLStripper

class	MPLogger

class	PathMaker

class	PropertiesValidator

class	SQLExpression

class	UrlNormalizator

class	UrlParser

Functions
def	getPath (dictionary, jsonString, path)

def	getConfigParameter (parser, section, option, defValue)

def	getTracebackInfo (linesNumberMax=None)

def	tracefunc (frame, event, arg, indent=None)

def	varDump (obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)

def	memUsage (point="")

def	urlNormalization (base, url, supportProtocols=None, log=None)

def	storePickleOnDisk (input_pickled_object, env_path, file_name)

def	urinormpath (path, stripWWW=False, useValidator=False, enableAdditionNormalize=True)

def	loggerFlush (loggerObj)

def	accumulateSubstrings (substrList, prefixes)

def	generateReplacementDict ()

def	parseHost (url)

def	convertToHttpDateFmt (date_str)

def	autoFillSiteId (siteId, log)

def	stripHTMLComments (htmlBuf=None, soup=None, hType=3)

def	cutSubstringEntrances (buf, startStr='<!--', finishStr='-->', behaveMask=0, greediness=0, finishDefault='\n')

def	eraseNoScript (htmlBuf=None)

def	stripHTMLTags (htmlTxt, method=0, joinGlue=' ', regExp=None)

def	innerHTMLText (htmlBuf, stripComment=True, stripScript=True)

def	innerText (selectorList, delimiter=' ', innerDelimiter=' ', tagReplacers=None, REconditions=None, attrConditions=None, keepAttributes=None, baseUrl=None, closeVoid=None, excludeNodes=None)

def	innerTextToList (selectorList, delimiter=' ', innerDelimiter=' ', tagReplacers=None, REconditions=None, attrConditions=None, keepAttributes=None, baseUrl=None, closeVoid=None, excludeNodes=None)

def	getFirstNotEmptySubXPath (xpath, sel, subXPathPattern, subXPathes)

def	getPairsDicts (incomeDict, splitters=')

def	splitPairs (buf, splitters=')

def	isTailSubstr (str1, str2)

def	replaceLoopValue (buf, replaceFrom, replaceTo)

def	getHTMLRedirectUrl (buff, log)

def	emailParse (href, onlyName=False, defaultSeparator=' ')

def	strToUnicode (inputStr)

def	removeDuplicated (inStr, delimiter="\, joingGlue=None, trimMode=1, skipEmpty=False)

def	getContentCSSMarkupEntrancesNumber (content)

def	executeWithTimeout (func, args=None, kwargs=None, timeout=1, default=None, log=None)

def	loadFromFileByReference (fileReference, initString=None, protocolPrefix='file://', loggerObj=None)

def	readFile (inFile, decodeUTF8=True)

def	escape (string)

def	isValidURL (url)

def	getHash (strBuf, binSize=32, digestType=0, fixedMode=0, valLimit=18446744073709552000L)

def	strToFloat (val, defaultValue=0.0, log=None, positivePrefixes=None)

def	strToProxy (proxyString, log=None, defaultProxyType='http')

def	executeCommand (cmd, inputStream='', log=None)

def	jsonLoadsSafe (jsonString, default=None, log=None)

def	reMatch (word, buff, log=None)

Variables
	logger = logging.getLogger(APP_CONSTS.LOGGER_NAME)

	lock = threading.Lock()

string	META_REDIRECT = r"http-equiv\W*refresh.+?url\W+?(.+?)\""

string	SEARCH_COMMENT_SIMPLE_PATTERN = r"<!--(.\|\n)*?-->"

string	SEARCH_COMMENT_PATTERN = r"<![ \r\n\t](--([^\-]\|[\r\n]\|-[^\-])--[ \r\n\t]*)>"

string	SEARCH_NOSCRIPT_PATTERN = r"<noscript>(.\|\n)*?</noscript>"

list	tracebackList = []

list	tracebackTimeQueue = []

bool	tracebackIdent = False

string	tracebackIdentFiller = "-"

string	tracebackMessageCall = "call"

string	tracebackMessageExit = "exit"

string	tracebackmessageDelimiter = ":"

bool	tracebackTimeMark = True

string	tracebackTimeMarkFormat = "%Y-%m-%d %H:%M:%S.%f"

string	tracebackTimeMarkDelimiter = " "

bool	tracebackIncludeInternalCalls = False

bool	tracebackIncludeLineNumber = True

string	tracebackIncludeLineNumberDelimiter = ":"

bool	tracebackIncludeFileNumber = True

string	tracebackIncludeFileNumberDelimiter = ":"

string	tracebackFunctionNameDelimiter = ":"

list	tracebackExcludeModulePath = ["/usr/lib/", "/usr/local/lib/"]

list	tracebackExcludeFunctionName = ["varDump"]

list	tracebackExcludeFunctionNameStarts = ["<"]

bool	tracebackIncludeExitCalls = True

int	tracebackRecursionlimit = 0

string	tracebackRecursionlimitErrorMsg = "RECURSION STACK LIMIT REACHED "

bool	tracebackIncludeLocals = False

bool	tracebackIncludeArg = False

string	tracebackIncludeLocalsPrefix = "\nLOCALS:\n"

string	tracebackIncludeArgPrefix = "\nARG:\n"

	tracebackLogger = None

string	tracebackElapsedTimeDelimiter = ""

string	tracebackElapsedTimeFormat = "{:.6f}"

string	tracebackUnknownExceptionMsg = "Unknown exception!"

Detailed Description

Created on Mar 28, 2014

@package: app
@author: scorp
@link: http://hierarchical-cluster-engine.com/
@copyright: Copyright &copy; 2013-2014 IOIX Ukraine
@license: http://hierarchical-cluster-engine.com/license/
@since: 0.1

Function Documentation

◆ accumulateSubstrings()

def app.Utils.accumulateSubstrings	(	substrList,
		prefixes
	)

Definition at line 905 of file Utils.py.

 def accumulateSubstrings(substrList, prefixes):
   ret = ""
   if substrList is None or not isinstance(substrList, list):  # # type(substrList) is not types.ListType:
     raise Exception(">>> error substrList is None or not List type")
   if prefixes is None or not isinstance(prefixes, list):  # #  type(prefixes) is not types.ListType:
     raise Exception(">>> error prefixes is None or not List type")
   if len(substrList) != len(prefixes):
     raise Exception(">>> error substrList and prefixes lists have different lengths")
   i = 0
   for substr in substrList:
     if isinstance(substr, str) or isinstance(substr, unicode):
       if isinstance(prefixes[i], str) or isinstance(prefixes[i], unicode):
         ret += str(prefixes[i])
       ret += str(substr)
     i += 1
   return ret
 
 
 

◆ autoFillSiteId()

def app.Utils.autoFillSiteId	(	siteId,
		log
	)

Definition at line 967 of file Utils.py.

 def autoFillSiteId(siteId, log):
   ret = siteId
   if siteId is None:
     ret = "0"
     if log is not None:
       log.debug("set siteId = '0' from 'autoFillSiteId'")
 
   return ret
 
 
 # # method strips incoming html from html comments
 # @param htmlBuf incoming content in string format
 # @param soup incoming content as bs object
 # @param hType -hType of handler
 # @return clean html buff

◆ convertToHttpDateFmt()

def app.Utils.convertToHttpDateFmt ( date_str )

Definition at line 958 of file Utils.py.

 def convertToHttpDateFmt(date_str):
   stamp = time.mktime(date_str.timetuple())
   # stamp = time.mktime(time.strptime(date_str, '%Y-%m-%d %H:%M:%S'))
   return time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(stamp))
 
 
 # # method returns siteId, substitutes to "0" value if incoming siteId is None
 # @param siteId - ID of site
 # @param log - logger instanse for log usage

◆ cutSubstringEntrances()

def app.Utils.cutSubstringEntrances	(	buf,
		startStr = `'<!--'`,
		finishStr = `'-->'`,
		behaveMask = `0`,
		greediness = `0`,
		finishDefault = `'\n'`
	)

Definition at line 1011 of file Utils.py.

 def cutSubstringEntrances(buf, startStr='<!--', finishStr='-->', behaveMask=0, greediness=0, finishDefault='\n'):
   ret = buf
   i = 0
   while True:
     i += 1
     replaced = False
     if ret.find(startStr) != -1:
       p = ret.index(startStr)
       if p is not None:
         p1 = None
         if ret.find(finishStr, p) != -1:
           p1 = ret.index(finishStr, p) + len(finishStr)
         else:
           if behaveMask == 1:
             if ret.find(finishDefault, p) != -1:
               p1 = ret.index(finishDefault, p) + len(finishDefault)
             else:
               p1 = len(ret)
           if behaveMask == 2:
             p1 = len(ret)
         if p1 is not None:
           ret = ret[0:p] + ret[p1:]
           # print ret
           replaced = True
     if greediness > 0 and i == greediness:
       break
     if not replaced:
       break
 
   return ret
 
 
 # # method erase incoming html from noscript blocks
 # @param htmlBuf - incoming content in string format
 # @return clean html buff

Here is the caller graph for this function:

◆ emailParse()

def app.Utils.emailParse	(	href,
		onlyName = `False`,
		defaultSeparator = `' '`
	)

Definition at line 1302 of file Utils.py.

 def emailParse(href, onlyName=False, defaultSeparator=' '):  # pylint: disable=W0613
   ret = href
   splitHref = href.split('?')
   if splitHref is not None and len(splitHref) > 0:
     adresses = splitHref[0]
     adresses = adresses.split(',')
     if onlyName:
       names = []
       for adress in adresses:
         adress = adress.split('@')
         if adress is not None and len(adress) > 0:
           names.append(adress[0])
       adresses = names
     ret = ''
     for adress in adresses:
       ret += adress
       ret += ' '
     ret = ret.strip()
   return ret
 
 
 
 # #Multi process logger
 #

◆ eraseNoScript()

def app.Utils.eraseNoScript ( htmlBuf = None )

Definition at line 1046 of file Utils.py.

 def eraseNoScript(htmlBuf=None):
   ret = htmlBuf
   if htmlBuf is not None:
 #     ret = re.sub(SEARCH_NOSCRIPT_PATTERN, "", htmlBuf)
 #     logger.debug("!!! use pattern: %s", str(SEARCH_NOSCRIPT_PATTERN))
     ret = cutSubstringEntrances(htmlBuf, startStr='<noscript>', finishStr='</noscript>', behaveMask=2)
 #     logger.debug("!!! htmlBuf: %s", varDump(htmlBuf, strTypeMaxLen=10))
 #     logger.debug("!!! ret: %s", varDump(ret, strTypeMaxLen=10))
 
   return ret
 
 
 # Strips from all HTML tags with set of different methods
 # @param htmlTxt input content
 # @param method 0 - by BeautifulSoup, 1 - with RE 1, 2 - RE 2, 3 - HTML parser, 4 - clear Python w/o lib, 5 - xml lib
 # @param joinGlue - the glue string to joing parts
 # @param regExp - the custom re for the method 1 or 2
 # @return cleared content

Here is the call graph for this function:

◆ escape()

def app.Utils.escape ( string )

Definition at line 1628 of file Utils.py.

 def escape(string):
   return string.replace("\\", "\\\\").replace('"', '\\\"').replace("'", "\\\'").replace("\n", "\\n").\
     replace("\r", "\\r").replace("\0", "\\0")
 
 # #Validate URL string
 #
 # @param url - url string
 # @return True if valid or otherwise False

◆ executeCommand()

def app.Utils.executeCommand	(	cmd,
		inputStream = `''`,
		log = `None`
	)

Definition at line 1747 of file Utils.py.

 def executeCommand(cmd, inputStream='', log=None):
   # variables for result tuple
   output = ''
   errMsg = ''
   exitCode = APP_CONSTS.EXIT_FAILURE
   try:
     if log is not None:
       log.debug("Popen: %s", str(cmd))
 
     process = Popen(cmd, stdout=PIPE, stdin=PIPE, stderr=PIPE, shell=True, close_fds=True, executable='/bin/bash')
     if log is not None:
       log.debug("len(inputStream)= %s", str(len(inputStream)))
 
     (output, errMsg) = process.communicate(input=inputStream)
     exitCode = process.wait()
 
     if log is not None:
       log.debug("Process response has exitCode = %s, stdout len = %s, stderr: %s",
                 str(exitCode), str(len(output)), str(errMsg))
 
   except Exception, err:
     if log is not None:
       log.error("Popen execution error: %s", str(err))
 
   # make result tuple
   PopenResult = collections.namedtuple('PopenResult', ['stdout', 'stderr', 'exitCode'])
   popenResult = PopenResult(stdout=output, stderr=errMsg, exitCode=exitCode)
 
   return popenResult
 
 
 # # Parse json and return dict if okay or None if not
 #
 # @param jsonString json to pars
 # @param log - logger instance
 # @return resulted dict

◆ executeWithTimeout()

def app.Utils.executeWithTimeout	(	func,
		args = `None`,
		kwargs = `None`,
		timeout = `1`,
		default = `None`,
		log = `None`
	)

Definition at line 1544 of file Utils.py.

 def executeWithTimeout(func, args=None, kwargs=None, timeout=1, default=None, log=None):
   if args is None:
     args = ()
   # import threading
   if kwargs is None:
     kwargs = {}
 
   it = InterruptableThread(func, args, kwargs, default, log)
   it.start()
   it.join(timeout)
   if it.isAlive():
     try:
       it._Thread__stop()  # pylint: disable=W0212
       time.sleep(1)
     except:
       if log is not None:
         log.error("an not stop thread with _Thread__stop()!")
     if it.isAlive():
       try:
         it.__stop()  # pylint: disable=W0212
         time.sleep(1)
       except:
         if log is not None:
           log.error("Can not stop thread with __stop()!")
       if it.isAlive():
         try:
           it._Thread__delete()  # pylint: disable=W0212
           time.sleep(1)
         except:
           if log is not None:
             log.error("Can not stop thread with _Thread__delete()!")
 
     if it.errorCode == it.ERROR_CODE_APPLIED_EXCEPTION:
       if log is not None:
         log.error("Error1 code %s, exception: %s", str(it.errorCode), str(it.errorException))
       raise it.errorException
     return default
   else:
     if it.errorCode == it.ERROR_CODE_APPLIED_EXCEPTION:
       if log is not None:
         log.error("Error2 code %s, exception: %s", str(it.errorCode), str(it.errorException))
       raise it.errorException
     return it.result
 
 
 # #Load file data by protocoled reference
 #
 # @param initString string in json format or @file:// reference
 # @param protocolPrefix
 # @param loggerObj
 # @return initString unchanged, value from file loaded by link or empty string if load error

Here is the caller graph for this function:

◆ generateReplacementDict()

def app.Utils.generateReplacementDict ( )

Definition at line 934 of file Utils.py.

 def generateReplacementDict():
   ret = {}
   ret[DataReplacementConstants.CUR_YEAR_FULL] = datetime.now().strftime("%Y")
   ret[DataReplacementConstants.CUR_YEAR_SHORT] = datetime.now().strftime("%y")
   ret[DataReplacementConstants.CUR_MONTH] = datetime.now().strftime("%m")
   ret[DataReplacementConstants.CUR_DAY] = datetime.now().strftime("%d")
   return ret
 
 
 # #parseHost parse the root host name from url
 # for example: the result of http://s1.y1.example.com/path/to is example.com
 # @param url the full url
 # @return host of the url, eg: example.com

◆ getConfigParameter()

def app.Utils.getConfigParameter	(	parser,
		section,
		option,
		defValue
	)

Definition at line 200 of file Utils.py.

 def getConfigParameter(parser, section, option, defValue):
   ret = defValue
 
   if parser and parser.has_option(section, option):
     try:
       ret = parser.get(section, option, defValue)
     except Exception:
       ret = defValue
 
   return ret
 
 
 # #The function to get traceback information string prepared for logging
 #
 # This function collects traceback information and creates sreing representation ready to log it
 # @param linesNumberMax max number of traceback lines to include in to the collection, None - signs all
 # @ret return string
 #

◆ getContentCSSMarkupEntrancesNumber()

def app.Utils.getContentCSSMarkupEntrancesNumber ( content )

Definition at line 1426 of file Utils.py.

 def getContentCSSMarkupEntrancesNumber(content):
   return len(re.findall(r'\{.+?\}', content))
 
 
 # Class ExceptionLog for logging of the exception common way

◆ getFirstNotEmptySubXPath()

def app.Utils.getFirstNotEmptySubXPath	(	xpath,
		sel,
		subXPathPattern,
		subXPathes
	)

Definition at line 1174 of file Utils.py.

 def getFirstNotEmptySubXPath(xpath, sel, subXPathPattern, subXPathes):
   retXPath = None
   retXPathValue = None
   for subXPath in subXPathes:
     retXPath = xpath + (subXPathPattern % subXPath)
     try:
       retXPathValue = sel.xpath(retXPath).extract()
     except Exception as excp:
       logger.info(">>> Common xPath extractor exception, = " + retXPath + " excp=" + str(excp))
       retXPathValue = None
       continue
     if len(retXPathValue) > 0 and ''.join(retXPathValue).strip() != '':
       break
   return retXPath, retXPathValue
 
 
 # # function call splitPairs for each element in incomeDict and fills return dict
 # @param incomeDict incoming dict
 # @param splitters incoming splitters
 # @return result dict

◆ getHash()

def app.Utils.getHash	(	strBuf,
		binSize = `32`,
		digestType = `0`,
		fixedMode = `0`,
		valLimit = `18446744073709552000L`
	)

Definition at line 1649 of file Utils.py.

 def getHash(strBuf, binSize=32, digestType=0, fixedMode=0, valLimit=18446744073709552000L):
 
   if fixedMode == 0:
     if digestType == 0:
       d = hashlib.md5(strBuf)
     else:
       d = hashlib.sha1(strBuf)  # pylint: disable=R0204
     if binSize == 32:
       s = 8
     elif binSize == 64:
       s = 16
     else:
       s = 32
     h = d.hexdigest()
     v = int(h[:s], 16)
     if v > valLimit:
       for i in xrange(1, s - 1):
         v = int(h[:s - i], 16)
         if v < valLimit:
           break
   elif fixedMode == 1:
     v = ctypes.c_uint32(zlib.crc32(strBuf, int(time.time()))).value
   else:
     v = ctypes.c_ulong(zlib.crc32(strBuf, int(time.time()))).value
 
   return v
 
 
 # # Convert string to float
 # @param val - input value as string
 # @param defaultValue - default value for result
 # @param log - logger instance
 # @param positivePrefixes - positive prefixes dictionary
 # @return result float value

◆ getHTMLRedirectUrl()

def app.Utils.getHTMLRedirectUrl	(	buff,
		log
	)

Definition at line 1278 of file Utils.py.

 def getHTMLRedirectUrl(buff, log):
   # variable for result
   ret = None
   resUrl = ''
 
   match = re.search(META_REDIRECT, stripHTMLComments(buff), re.I | re.U)
   if match is not None:
     resUrl = match.groups()[0].strip()
 
   log.debug('resUrl: ' + str(resUrl))
   urlObj = Url(resUrl)
   if urlObj.isValid():
     ret = resUrl
 
   log.debug('ret: ' + str(ret))
 
   return ret
 
 
 # # function parse incoming email adress
 # @param href - incoming email href
 # @param onlyName - extract email names instead full email names
 # @param defaultSeparator - default separator between email elements
 # @return parsed email

Here is the call graph for this function:

◆ getPairsDicts()

def app.Utils.getPairsDicts	(	incomeDict,
		splitters = `'`
	)

Definition at line 1194 of file Utils.py.

 def getPairsDicts(incomeDict, splitters=','):
   ret = {}
   if isinstance(incomeDict, dict):
     for key in incomeDict:
       if isinstance(incomeDict[key], str) or isinstance(incomeDict[key], unicode):
         ret[key] = splitPairs(incomeDict[key], splitters)
   return ret
 
 
 # # function extracts splits incoming string by splitters into dict of name=value pairs
 # @param buf incoming text buf
 # @param splitters incoming splitters
 # @return result dict

Here is the call graph for this function:

◆ getPath()

def app.Utils.getPath	(	dictionary,
		jsonString,
		path
	)

Definition at line 93 of file Utils.py.

 def getPath(dictionary, jsonString, path):
   if jsonString != None:
     dictionary = json.loads(jsonString)
   for i, p in re.findall(r'(\d+)|(\w+)', path):
     dictionary = dictionary[p or int(i)]
   return dictionary
 
 
 
 # #Json serialization
 #

◆ getTracebackInfo()

def app.Utils.getTracebackInfo ( linesNumberMax = None )

Definition at line 218 of file Utils.py.

 def getTracebackInfo(linesNumberMax=None):
   ret = ""
   n = 0
 
   type_, value_, traceback_ = sys.exc_info()
   stack = traceback.format_tb(traceback_)
   del type_
   del value_
   for item in stack:
     ret = ret + "\n" + (str(item))
     n = n + 1
     if linesNumberMax != None and n == linesNumberMax:
       break
 
   return ret
 
 
 
 # #The function to get accumulate the traceback information in global variable __tracebackList

Here is the caller graph for this function:

◆ innerHTMLText()

def app.Utils.innerHTMLText	(	htmlBuf,
		stripComment = `True`,
		stripScript = `True`
	)

Definition at line 1130 of file Utils.py.

 def innerHTMLText(htmlBuf, stripComment=True, stripScript=True):
   from bs4 import BeautifulSoup
 
   soup = BeautifulSoup(htmlBuf, 'lxml')
 
   if stripScript:
     for elem in soup.findAll(name='script'):
       elem.extract()
   if stripComment:
     stripHTMLComments(htmlBuf=None, soup=soup)
 
   return ''.join(soup.findAll(text=True))
 
 
 # # function concatinates all HTMLTags from extractor also strips elements
 # @param selectorList incoming Selector
 # @return inner text from incoming selector

Here is the call graph for this function:

◆ innerText()

def app.Utils.innerText	(	selectorList,
		delimiter = `' '`,
		innerDelimiter = `' '`,
		tagReplacers = `None`,
		REconditions = `None`,
		attrConditions = `None`,
		keepAttributes = `None`,
		baseUrl = `None`,
		closeVoid = `None`,
		excludeNodes = `None`
	)

Definition at line 1148 of file Utils.py.

               attrConditions=None, keepAttributes=None, baseUrl=None, closeVoid=None, excludeNodes=None):
   extendInnerText = ExtendInnerText(tagReplacers, delimiter, innerDelimiter, REconditions, attrConditions,
                                     keepAttributes, baseUrl, closeVoid, excludeNodes)
   extendInnerText.innerText(None, selectorList, None)
   ret = extendInnerText.stripHtml
   return ret
 
 
 # # function concatinates all HTMLTags from extractor also strips elements
 # @param selectorList incoming Selector
 # @return list of inner text from incoming selector

Here is the call graph for this function:

Here is the caller graph for this function:

◆ innerTextToList()

def app.Utils.innerTextToList	(	selectorList,
		delimiter = `' '`,
		innerDelimiter = `' '`,
		tagReplacers = `None`,
		REconditions = `None`,
		attrConditions = `None`,
		keepAttributes = `None`,
		baseUrl = `None`,
		closeVoid = `None`,
		excludeNodes = `None`
	)

Definition at line 1160 of file Utils.py.

                     attrConditions=None, keepAttributes=None, baseUrl=None, closeVoid=None, excludeNodes=None):
   extendInnerText = ExtendInnerText(tagReplacers, delimiter, innerDelimiter, REconditions, attrConditions,
                                     keepAttributes, baseUrl, closeVoid, excludeNodes)
   extendInnerText.innerTextToList(None, selectorList, None)
   ret = extendInnerText.stripHtmlList
   return ret
 
 
 # # function looks fiers not empty extracted XPath from subXPathes, using subXPathPattern for real xpath creating
 # @param xpath - incoming root xpath
 # @param sel - incoming selector
 # @param subXPathPattern - subXPath creation pattern
 # @param subXPathes - list of subXPathes
 # @return retXPath and retXPathValue values

Here is the caller graph for this function:

◆ isTailSubstr()

def app.Utils.isTailSubstr	(	str1,
		str2
	)

Definition at line 1221 of file Utils.py.

 def isTailSubstr(str1, str2):
   ret = False
   if str1.find(str2) > 0 and ((len(str1) - str1.find(str2)) == len(str2)):
     ret = True
   return ret
 
 
 # # function make string raplacement while
 # @param buf incoming text buf
 # @param replaceFrom substring for replacement from
 # @param replaceTo substring for replacement to
 # @return replacement string

◆ isValidURL()

def app.Utils.isValidURL ( url )

Definition at line 1637 of file Utils.py.

 def isValidURL(url):
   return False if isinstance(validators.url(url), validators.ValidationFailure) else True
 
 
 # #Get some hash of a string limited bit size
 #
 # @param strBuf - string buffer
 # @param binSize - binary value size bits, supported values 32, 64 and 128
 # @param digestType - 0 - md5, 1 - sha1
 # @param fixedMode - 0 digests play, 1 - crc32 to uint32, 2 - crc32 to ulong
 # @param valLimit - limit of a value useful to fix a DB type size (MySQL 8 bytes BIGINT(20))
 # @return True if valid or otherwise False

Here is the caller graph for this function:

◆ jsonLoadsSafe()

def app.Utils.jsonLoadsSafe	(	jsonString,
		default = `None`,
		log = `None`
	)

Definition at line 1783 of file Utils.py.

 def jsonLoadsSafe(jsonString, default=None, log=None):
   # variable for result
   ret = default
   try:
     if jsonString is not None and jsonString != '':
       if isinstance(jsonString, basestring):
         ret = json.loads(jsonString)
       else:
         ret = jsonString
         if log is not None:
           log.debug("Input object type is: %s", type(jsonString))
   except Exception, err:
     if log is not None:
       log.error("Error pars json: %s; source string:\n%s", str(err), jsonString)
 
   return ret
 
 
 # simple re match check for search word definition
 #
 # @param word - word for search
 # @param buff - buffer where is search
 # @param log - logger instance
 # @return True if match exist or False otherwise

◆ loadFromFileByReference()

def app.Utils.loadFromFileByReference	(	fileReference,
		initString = `None`,
		protocolPrefix = `'file://'`,
		loggerObj = `None`
	)

Definition at line 1595 of file Utils.py.

 def loadFromFileByReference(fileReference, initString=None, protocolPrefix='file://', loggerObj=None):
   ret = initString
 
   if fileReference.startswith(protocolPrefix):
     try:
       f = fileReference[len(protocolPrefix):]
       ret = readFile(f)
     except Exception, err:
       if loggerObj is not None:
         loggerObj.error("Error load from file `%s` by reference: %s", f, str(err))
 
   return ret
 
 
 # #Read file
 #
 # @param inFile - name of file to read
 # @param decodeUTF8 - decode utf8 or not after read from file
 # @return - the buffer

Here is the call graph for this function:

◆ loggerFlush()

def app.Utils.loggerFlush ( loggerObj )

Definition at line 893 of file Utils.py.

 def loggerFlush(loggerObj):
   for h in loggerObj.handlers:
     if h.__class__.__name__ == 'FileHandler' or h.__class__.__name__ == 'TimedRotatingFileHandler':
       h.flush()
 
 
 
 # #accumulateSubstrings accumulates substr list in one string and returns it, also adds prefixies between
 #  substrings in resulting string. substrList and prefixes must be List[str] type with equal length
 # @param substrList - substrings list
 # @param prefixes - prefixies list
 # @returns - accumulate string

◆ memUsage()

def app.Utils.memUsage ( point = "" )

Definition at line 498 of file Utils.py.

 def memUsage(point=""):
   import resource
   # usage = resource.getrusage(resource.RUSAGE_SELF)
   return '''%s: mem=%s mb
       ''' % (point, resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000)
 
 
 
 # #class UrlParser makes URL operation
 #

◆ parseHost()

def app.Utils.parseHost ( url )

Definition at line 947 of file Utils.py.

 def parseHost(url):
   host = None
   if urlparse.urlparse(url).hostname:
     host = '.'.join(urlparse.urlparse(url).hostname.split('.')[-2:])
   return host
 
 
 # # convert date str to HTTP header format
 # 2014-07-29 20:31:50 (GMT+8) to Tue, 29 Jul 2014 12:31:50 GMT
 # @param date_str date str, 2014-07-29 20:31:50
 # @return HTTP header formated date str : Tue, 29 Jul 2014 12:31:50 GMT

Here is the caller graph for this function:

◆ readFile()

def app.Utils.readFile	(	inFile,
		decodeUTF8 = `True`
	)

Definition at line 1614 of file Utils.py.

 def readFile(inFile, decodeUTF8=True):
   with open(inFile, 'r') as f:
     ret = f.read()
 
   if decodeUTF8:
     ret = ret.decode('utf8')
 
   return ret
 
 
 # #Escape string value
 #
 # @param string
 # @return escaped string

Here is the caller graph for this function:

◆ reMatch()

def app.Utils.reMatch	(	word,
		buff,
		log = `None`
	)

Definition at line 1807 of file Utils.py.

 def reMatch(word, buff, log=None):
   # variable for result
   ret = False
   if isinstance(word, basestring) and isinstance(buff, basestring):
     try:
       if word.startswith(u'/'):
         word = word[1:]
         if re.search(pattern=word, string=buff, flags=re.U + re.I + re.M) is not None:
           ret = True
       else:
         ret = (word.upper() == buff.upper())
   
     except Exception, err:
       if log is not None:
         log.error("Expression: %s, Error: %s", str(word), str(err))
 
   return ret

◆ removeDuplicated()

def app.Utils.removeDuplicated	(	inStr,
		delimiter = `"\n"`,
		joingGlue = `None`,
		trimMode = `1`,
		skipEmpty = `False`
	)

Definition at line 1394 of file Utils.py.

 def removeDuplicated(inStr, delimiter="\n", joingGlue=None, trimMode=1, skipEmpty=False):
   ret = inStr.split(delimiter)
 
   if joingGlue is None:
     glue = delimiter
   else:
     glue = joingGlue
 
   prev = None
   new = []
   for item in ret:
     if trimMode > 0:
       if trimMode == 1:
         item = item.lstrip()
       elif trimMode == 2:
         item = item.rstrip()
       else:
         item = item.strip()
     if skipEmpty and item == '':
       continue
     if item != prev:
       new.append(item)
     prev = item
   ret = new
 
   return glue.join(ret).strip()
 
 
 # Checks is the input content possible contains an CSS markup, possible is an in-line STYLE tag innerHTML
 #
 # @param content - to analyse
 # @return zero if presence of the CSS markup is not detected or number of the detected fragments

◆ replaceLoopValue()

def app.Utils.replaceLoopValue	(	buf,
		replaceFrom,
		replaceTo
	)

Definition at line 1233 of file Utils.py.

 def replaceLoopValue(buf, replaceFrom, replaceTo):
   localValue = buf
   replaceValue = localValue.replace(replaceFrom, replaceTo)
   while len(replaceValue) != len(localValue):
     localValue = replaceValue
     replaceValue = localValue.replace(replaceFrom, replaceTo)
   return localValue
 
 
 # # # function extract html redirect link from meta
 # # @param utf8Buff incoming buff of html page
 # # @param log - logger instance
 # # @return html redirect link
 # def extractHTMLRedirectFromMeta(utf8Buff, log):
 #   # variable for result
 #   ret = None
 #
 #   localREList = re.findall(META_RE_0, utf8Buff, re.I)
 #   if len(localREList) > 0:
 #     log.debug("!!! Found pattern: '%s' - HTML redirect is exist...", str(META_RE_0))
 #     match = re.search(META_RE_1, utf8Buff, re.I | re.U)
 #     if match is not None:
 #       log.debug("!!! Found pattern: '%s' - HTML redirect blocked by comment...", str(META_RE_1))
 #     else:
 #       for bodyStr in localREList:
 #         match = re.search(META_RE_2, bodyStr, re.I | re.U)
 #         log.debug("!!! bodyStr: %s, pattern: '%s', match: %s", str(bodyStr), str(META_RE_2), varDump(match))
 #         if match is not None:
 #           ret = match.group(1)
 #         else:
 #           match = re.search(META_RE_3, bodyStr, re.I | re.U)
 #           log.debug("!!! bodyStr: %s, pattern: '%s', match: %s", str(bodyStr), str(META_RE_3), varDump(match))
 #           if match is not None:
 #             ret = match.group(1)
 #
 #         if ret is not None:
 #           break
 #
 #   return ret
 
 
 # # extract html redirect link from meta
 # @param buff - raw contant of html page
 # @param log - logger instance
 # @return - html redirect link

◆ splitPairs()

def app.Utils.splitPairs	(	buf,
		splitters = `'`
	)

Definition at line 1207 of file Utils.py.

 def splitPairs(buf, splitters=','):
   ret = {}
   splitStr = buf.split(splitters)
   for elem in splitStr:
     localStr = elem.split('=')
     if isinstance(localStr, list) and len(localStr) >= 2:
       ret[localStr[0]] = localStr[1]
   return ret
 
 
 # # function looks is str2 an a tail of str1
 # @param str1 main string
 # @param str2 searching tail substring
 # @return False or True

Here is the caller graph for this function:

◆ storePickleOnDisk()

def app.Utils.storePickleOnDisk	(	input_pickled_object,
		env_path,
		file_name
	)

Definition at line 754 of file Utils.py.

 def storePickleOnDisk(input_pickled_object, env_path, file_name):
   if env_path in os.environ and os.environ[env_path] != "":
     logger.debug("os.environ[%s]: set to %s", env_path, os.environ[env_path])
     open(os.environ[env_path] + file_name, "wb").write(input_pickled_object)
   else:
     logger.debug("os.environ[%s]: not set.", env_path)
 
 
 
 # This function taken from uritools module as it was removed from module

◆ stripHTMLComments()

def app.Utils.stripHTMLComments	(	htmlBuf = `None`,
		soup = `None`,
		hType = `3`
	)

Definition at line 982 of file Utils.py.

 def stripHTMLComments(htmlBuf=None, soup=None, hType=3):
   from bs4 import Comment
 
   ret = htmlBuf
   if soup is not None and hType == 0:
     for elem in soup.findAll(text=lambda text: isinstance(text, Comment)):
       elem.extract()
   elif htmlBuf is not None and hType == 1:
     ret = re.sub(SEARCH_COMMENT_PATTERN, "", htmlBuf)
     logger.debug("!!! use pattern: %s", str(SEARCH_COMMENT_PATTERN))
   elif htmlBuf is not None and hType == 2:
     ret = re.sub(SEARCH_COMMENT_SIMPLE_PATTERN, "", htmlBuf)
     logger.debug("!!! use pattern: %s", str(SEARCH_COMMENT_SIMPLE_PATTERN))
   elif htmlBuf is not None and hType == 3:
     ret = cutSubstringEntrances(htmlBuf, behaveMask=2)
 
   return ret
 
 
 # Cuts substring entrances in source buffer started and finished with strings
 #
 # @param buf - source buffer
 # @param startStr - start string
 # @param finishStr - finish string
 # @param behaveMask - bit set mask defines a behavior in case of finishStr not found, 0 - do nothing,
 #                    1 - cut up to finishDefault or end of buffer if no end of line found, 2 - cut up to end of buffer
 # @param greediness - max cutting number, 0 - means unlimited
 # @param finishDefault - default finish string used if behaveMask == 1 and finishStr is not found
 # @return resulted string

Here is the call graph for this function:

Here is the caller graph for this function:

◆ stripHTMLTags()

def app.Utils.stripHTMLTags	(	htmlTxt,
		method = `0`,
		joinGlue = `' '`,
		regExp = `None`
	)

Definition at line 1064 of file Utils.py.

 def stripHTMLTags(htmlTxt, method=0, joinGlue=' ', regExp=None):
   ret = ''
 
   if htmlTxt is not None and htmlTxt.strip() != '':
     if method == 0:
       from bs4 import BeautifulSoup
       ret = joinGlue.join(BeautifulSoup(htmlTxt, 'lxml').findAll(text=True))
     elif method == 1 or method == 2:
       if regExp is not None:
         r = regExp
       else:
         if method == 1:
           r = r'<[^<]+?>'
         else:
           r = r'(<!--.*?-->|<[^>]*>)'
       ret = re.sub(r, joinGlue, htmlTxt)
     elif method == 3:
       ret = MLStripper()  # pylint: disable=R0204
       ret.feed(htmlTxt)
       ret = ret.get_data()
     elif method == 4:
       tag = False
       quote = False
       for c in htmlTxt:
         if c == '<' and not quote:
           tag = True
         elif c == '>' and not quote:
           tag = False
         elif (c == '"' or c == "'") and tag:
           quote = not quote
         elif not tag:
           ret = ret + joinGlue + c
     elif method == 5:
       import xml
       ret = joinGlue.join(xml.etree.ElementTree.fromstring(htmlTxt).itertext())
 
   if method == 1 or method == 2:
     import cgi
     ret = cgi.escape(ret)
     ret = re.sub('[<>]', '', ret)
 
   return ret.strip()
 
 
 

◆ strToFloat()

def app.Utils.strToFloat	(	val,
		defaultValue = `0.0`,
		log = `None`,
		positivePrefixes = `None`
	)

Definition at line 1683 of file Utils.py.

 def strToFloat(val, defaultValue=0.0, log=None, positivePrefixes=None):
   # variable for result
   ret = defaultValue
   if positivePrefixes is None:
     posPrefixes = {'K':'1E3', 'M':'1E6', 'G':'1E9', 'T':'1E12', 'P':'1E15', 'E':'1E18', 'Z':'1E21', 'Y':'1E24'}
   else:
     posPrefixes = positivePrefixes
 
   try:
     val = val.upper()
     if val[-1] in posPrefixes.keys():
       v = Decimal(val[:-1])
       ret = float(v * Decimal(posPrefixes[val[-1]]))
     else:
       ret = float(val)
   except Exception, err:
     if log is not None:
       log.debug(str(err))
 
   return ret
 
 
 # #Convert string to proxy tuple (proxy_type, proxy_host, proxy_port, proxy_user, proxy_passwd)
 #
 # @param proxyString - proxy string
 # @param log - logger instance
 # @return proxy tuple if success or None otherwise

◆ strToProxy()

def app.Utils.strToProxy	(	proxyString,
		log = `None`,
		defaultProxyType = `'http'`
	)

Definition at line 1710 of file Utils.py.

 def strToProxy(proxyString, log=None, defaultProxyType='http'):
   # variables for result
   ret = None
   proxy_type = proxy_host = proxy_port = proxy_user = proxy_passwd = None
   if isinstance(proxyString, basestring) and proxyString != "":
     try:
       pattern = '(.*)://(.*):(.*)@(.*):(.*)'
       match = re.search(pattern, proxyString, re.I + re.U)
       if match is not None:
         proxy_type, proxy_user, proxy_passwd, proxy_host, proxy_port = match.groups()
 
       else:
         pattern = '(.*)://(.*):(.*)'
         match = re.search(pattern, proxyString, re.I + re.U)
         if match is not None:
           proxy_type, proxy_host, proxy_port = match.groups()
         else:
           pattern = '(.*):(.*)'
           match = re.search(pattern, proxyString, re.I + re.U)
           if match is not None:
             proxy_host, proxy_port = match.groups()
             proxy_type = defaultProxyType
 
       ret = (proxy_type, proxy_host, proxy_port, proxy_user, proxy_passwd)
     except Exception, err:
       if log is not None:
         log.error("Error: %s", str(err))
 
   return ret
 
 
 # # execute command line command
 #
 # @param cmd - command line string
 # @param inputStream - input stream to popen
 # @param log - logger instance
 # @return result named tuple with support names: 'stdout', 'stderr', 'exitCode'

Here is the caller graph for this function:

◆ strToUnicode()

def app.Utils.strToUnicode ( inputStr )

Definition at line 1379 of file Utils.py.

 def strToUnicode(inputStr):
   ret = inputStr
 
   if isinstance(inputStr, str):
     ret = inputStr.decode('utf-8')
 
   return ret
 
 
 # Split string removes duplicated peaces and joing back
 # @param inStr - input string
 # @param delimiter - splitter delimiter
 # @param joingGlue - optional glue string to joing with, if None or omitted - the delimiter used
 # @param trimMode - peaces trim mode: 0 - not trimmed, 1 - trimmed left, 2 - trimmed right, 3 - trimmed both
 # @return string with duplicated peaces removed

◆ tracefunc()

def app.Utils.tracefunc	(	frame,
		event,
		arg,
		indent = `None`
	)

Definition at line 273 of file Utils.py.

 def tracefunc(frame, event, arg, indent=None):  # pylint: disable=W0613
   if indent is None:
     indent = [0]
 
   if event == "call" or event == "return":
     lock.acquire()
 
     try:
       if event == "call":
         indent[0] += 2
         if tracebackIdent:
           idents = tracebackIdentFiller * indent[0]
         else:
           idents = ""
         message = tracebackMessageCall
         tracebackTimeQueue.append(time.time())
         te = ""
       elif event == "return":
         if tracebackIdent:
           idents = tracebackIdentFiller * indent[0]
         else:
           idents = ""
         indent[0] -= 2
         message = tracebackMessageExit
         te = "{:.6f}".format(time.time() - tracebackTimeQueue.pop())
 
       if tracebackTimeMark:
         # t = time.strftime(tracebackTimeMarkFormat)
         t = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
       else:
         t = ""
 
       if tracebackIncludeLineNumber:
         ln = str(frame.f_lineno)
       else:
         ln = ""
 
       if tracebackIncludeFileNumber:
         fn = str(frame.f_code.co_filename)
       else:
         fn = ""
 
       excludedP = False
       for item in tracebackExcludeModulePath:
         if item in frame.f_code.co_filename:
           excludedP = True
           break
 
       excludedF = False
       for item in tracebackExcludeFunctionName:
         if frame.f_code.co_name == item:
           excludedF = True
           break
 
       excludedF2 = False
       for item in tracebackExcludeFunctionNameStarts:
         if frame.f_code.co_name.startswith(item):
           excludedF2 = True
           break
 
       if tracebackIncludeLocals or tracebackIncludeArg:
         oldRL = sys.getrecursionlimit()
         if oldRL < tracebackRecursionlimit:
           sys.setrecursionlimit(tracebackRecursionlimit)
         else:
           oldRL = None
 
         if tracebackIncludeLocals:
           localsDump = ""
           try:
             # localsDump = varDump(frame.f_locals)
             localsDump = str(frame.f_locals)
             localsDump = tracebackIncludeLocalsPrefix + localsDump
           except:  # pylint:disable=W0702
             localsDump = tracebackRecursionlimitErrorMsg + str(tracebackRecursionlimit)
             # pass
         else:
           localsDump = ""
 
         if tracebackIncludeArg:
           argDump = ""
           try:
             # argDump = varDump(arg)
             argDump = str(arg)
             argDump = tracebackIncludeArgPrefix + argDump
           except:  # pylint:disable=W0702
             argDump = tracebackRecursionlimitErrorMsg + str(tracebackRecursionlimit)
             # pass
         else:
           argDump = ""
 
         if oldRL is not None:
           sys.setrecursionlimit(oldRL)
       else:
         localsDump = ""
         argDump = ""
 
       if (not (tracebackIncludeInternalCalls is False and frame.f_code.co_name.startswith("__"))) and\
         (not (tracebackIncludeExitCalls is False and event == "return")) and\
         (not excludedP) and (not excludedF) and (not excludedF2):
         tmsg = idents + message + tracebackmessageDelimiter + \
                              fn + tracebackIncludeFileNumberDelimiter + \
                              ln + tracebackIncludeLineNumberDelimiter + \
                              frame.f_code.co_name + "()" + tracebackFunctionNameDelimiter + \
                              tracebackElapsedTimeDelimiter + te + localsDump + argDump
         if tracebackLogger is None:
           tracebackList.append(t + tracebackTimeMarkDelimiter + tmsg)
         else:
           tracebackLogger.debug("%s", tmsg)
           if len(tracebackTimeQueue) == 0:
             tracebackLogger.debug("%s", APP_CONSTS.LOGGER_DELIMITER_LINE)
 
     except Exception as e:
       if tracebackLogger is None:
         tracebackList.append("Exception: " + str(e))
       else:
         tracebackLogger.error("%s", str(e))
     except:  # pylint: disable=W0702
       if tracebackLogger is None:
         tracebackList.append(tracebackUnknownExceptionMsg)
       else:
         tracebackLogger.error("%s", tracebackUnknownExceptionMsg)
 
     lock.release()
 
   return tracefunc
 
 
 
 # #The function to get a printable representation of an object for debugging
 #
 #
 # @param obj The object to print
 # @param stringifyType - 0 - json, 1 - str
 # @ret return string dump
 #

Here is the call graph for this function:

◆ urinormpath()

def app.Utils.urinormpath	(	path,
		stripWWW = `False`,
		useValidator = `False`,
		enableAdditionNormalize = `True`
	)

Definition at line 764 of file Utils.py.

 def urinormpath(path, stripWWW=False, useValidator=False, enableAdditionNormalize=True):  # pylint: disable=W0613
   # Remove '.' and '..' path segments from a URI path.
   # RFC 3986 5.2.4. Remove Dot Segments
   ret = None
   ret1 = None
 
   try:
     if path is None or path == "":
       ret1 = path
     else:
       out = []
       for s in path.split('/'):
         if s == '.':
           continue
         elif s != '..':
           out.append(s)
         elif out:
           out.pop()
       # Fix leading/trailing slashes
       if path.startswith('/') and (not out or out[0]):
         out.insert(0, '')
       if path.endswith('/.') or path.endswith('/..'):
         out.append('')
       ret = '/'.join(out)
 
       if stripWWW:
         splitPath = path.split("?")
         if len(splitPath) > 0:
           splitPath[0] = splitPath[0].replace("://www.", "://")
           localPath = splitPath[0]
           for elem in splitPath[1:]:
             localPath += "?"
             localPath += elem
       else:
         localPath = path
 
       if enableAdditionNormalize:
         resultUrlDict = Url(localPath)
         if useValidator and not Url.GetStats([resultUrlDict])[0]["valid"]:
           raise Exception(path + " NOT VALIDATE!")
         ret1 = Url.GetStats([resultUrlDict])[0]["canonicalized"]
       else:
         ret1 = localPath
 
       if ret is not None and ret1 is not None and ret != ret1:
         logger.debug("--->>>> URLS DIFFERTNT <<<<---")
         logger.debug(ret)
         logger.debug(ret1)
   except Exception as e:
     logger.error("Normalization error: " + str(e) + "\npath: [" + path + "]\n" + str(getTracebackInfo()))
 
   return ret1
 
 
 
 # #Logger file name generator
 #
 #

Here is the call graph for this function:

Here is the caller graph for this function:

◆ urlNormalization()

def app.Utils.urlNormalization	(	base,
		url,
		supportProtocols = `None`,
		log = `None`
	)

Definition at line 561 of file Utils.py.

 def urlNormalization(base, url, supportProtocols=None, log=None):
   # variable for result
   res = None
 
   # Internal function for prepare before normalization
   def prepareNormalization(path):
     out = []
     pathStr = path
     replaceSimbolDict = {'\a':'/a',
                          '\b':'/b',
                          '\f':'/f',
                          '\n':'/n',
                          '\r':'/r',
                          '\t':'/t',
                          '\v':'/v',
                          '\\':'\\\\'}
 
     replaceStartSimbolDict = {'://': ''}
 
     for src, dest in replaceStartSimbolDict.items():
       if pathStr.startswith(src):
         pathStr = pathStr.replace(src, dest)
 
     for src, dest in replaceSimbolDict.items():
       pathStr = pathStr.replace(src, dest)      
 
     for i in range(0, 32):
       pathStr = pathStr.replace(str(chr(i)), str('/%o' % i))
 
     for s in pathStr.split("\\"):
       out.append(s)
 
     out = [elem for elem in out if elem != '']
 
     return '/'.join(out)
 
 
   if isinstance(url, basestring):
     # validate
 #     if Url(url).isValid():
 #       if log is not None:
 #         log.debug("return as valid url: %s", str(url))
 #       res = url
 #     else:
     # set default result
     resUrl = prepareNormalization(url)
     if isinstance(base, basestring):
       # normalization url
       baseUrl = prepareNormalization(base)
 
       if baseUrl != resUrl:
         resUrl = urlparse.urljoin(baseUrl, resUrl)
 
       if url != resUrl and log is not None:
         log.debug('==== Urls different ====')
         log.debug("base: %s", str(baseUrl))
         log.debug("url: %s", str(url))
         log.debug("res: %s", str(resUrl))
 
       res = resUrl
 
   # check support protocols
   if isinstance(supportProtocols, list):
     if log is not None:
       log.debug("supportProtocols: %s, res: %s", str(supportProtocols), str(res))
     # extract protocol schema from url
     if isinstance(res, basestring):
       v = urlparse.urlsplit(res)
       if v.scheme not in supportProtocols:
         if log is not None:
           log.debug("Not support protocol: %s", str(v.scheme))
         res = None
 
   if log is not None:
     log.debug("before normalization res: %s", str(res))
 
   # normalization
   if res is not None:
     localUrls = res.split()
     resUrls = []
     if log is not None:
       log.debug("localUrls: %s", str(localUrls))
 
     for localUrl in localUrls:
       if localUrl != "":
         resUrls.append(url_normalize(localUrl))
 
     if log is not None:
       log.debug("resUrls: %s", varDump(resUrls))
     res = ','.join(resUrls)
     if log is not None:
       log.debug("res: %s", str(res))
 
   return res
 
 
 # #class UrlNormalizator makes URL normalization
 #

Here is the call graph for this function:

Here is the caller graph for this function:

◆ varDump()

def app.Utils.varDump	(	obj,
		stringify = `True`,
		strTypeMaxLen = `256`,
		strTypeCutSuffix = `'...'`,
		stringifyType = `1`,
		ignoreErrors = `False`,
		objectsHash = `None`,
		depth = `0`,
		indent = `2`,
		ensure_ascii = `False`,
		maxDepth = `10`
	)

Definition at line 410 of file Utils.py.

             objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10):
   if objectsHash is None:
     objectsHash = []
   # print 'depth: ' + str(depth)
   depth += 1
   if depth < maxDepth:
     newobj = obj
     try:
       if isinstance(obj, list):
         newobj = []
         for item in obj:
           newobj.append(varDump(item, False, strTypeMaxLen, strTypeCutSuffix, stringifyType, ignoreErrors,
                                 objectsHash, depth, indent, ensure_ascii, maxDepth))
       elif isinstance(obj, tuple):
         temp = []
         for item in obj:
           temp.append(varDump(item, False, strTypeMaxLen, strTypeCutSuffix, stringifyType, ignoreErrors,
                               objectsHash, depth, indent, ensure_ascii, maxDepth))
         newobj = tuple(temp)  # pylint: disable=R0204
       elif isinstance(obj, set):
         temp = []
         for item in obj:
           temp.append(str(varDump(item, False, strTypeMaxLen, strTypeCutSuffix, stringifyType, ignoreErrors,
                                   objectsHash, depth, indent, ensure_ascii, maxDepth)))
         newobj = set(temp)
       elif isinstance(obj, dict):
         newobj = {}
         for key, value in obj.items():
           newobj[str(varDump(key, False, strTypeMaxLen, strTypeCutSuffix))] = \
            varDump(value, False, strTypeMaxLen, strTypeCutSuffix, stringifyType, ignoreErrors,
                    objectsHash, depth, indent, ensure_ascii, maxDepth)
       # elif isinstance(obj, types.FunctionType):
       #  newobj = repr(obj)
       elif '__dict__' in dir(obj):
         newobj = {}
         for k in obj.__dict__.keys():
           # print 'k:' + str(k)
           # print 'v:' + str(obj.__dict__[k])
           if isinstance(obj.__dict__[k], basestring):
             newobj[k] = obj.__dict__[k]
             if strTypeMaxLen > 0 and len(newobj[k]) > strTypeMaxLen:
               newobj[k] = newobj[k][:strTypeMaxLen] + strTypeCutSuffix
           else:
             if '__dict__' in dir(obj.__dict__[k]):
               sobj = str(obj.__dict__[k])
               if sobj in objectsHash:
                 newobj[k] = 'OBJECT RECURSION: ' + sobj
               else:
                 objectsHash.append(sobj)
                 newobj[k] = varDump(obj.__dict__[k], False, strTypeMaxLen, strTypeCutSuffix, stringifyType,
                                     ignoreErrors, objectsHash, depth, indent, ensure_ascii, maxDepth)
             else:
               newobj[k] = varDump(obj.__dict__[k], False, strTypeMaxLen, strTypeCutSuffix, stringifyType,
                                   ignoreErrors, objectsHash, depth, indent, ensure_ascii, maxDepth)
         sobj = str(obj)
         if ' object at ' in sobj and '__type__' not in newobj:
           newobj['__type__'] = sobj.replace(" object at ", " #").replace("__main__.", "")
       else:
         if stringifyType == 0:
           try:
             s = json.dumps(newobj, indent=indent, ensure_ascii=ensure_ascii)
             del s
           except Exception as err:
             newobj = str(newobj)
     except Exception as err:
       if ignoreErrors:
         newobj = ''
       else:
         newobj = 'General error: ' + str(err) + "\n" + getTracebackInfo()
   else:
     newobj = 'MAX OBJECTS EMBED DEPTH ' + str(maxDepth) + ' REACHED!'
 
   if stringify:
     if stringifyType == 0:
       try:
         newobj = json.dumps(newobj, indent=indent, ensure_ascii=ensure_ascii)
       except Exception as err:
         if ignoreErrors:
           newobj = ''
         else:
           newobj = 'To json error: ' + str(err)
     else:
       newobj = str(newobj)
 
   return newobj
 
 
 # pylint: disable=W0702

Here is the call graph for this function:

Variable Documentation

◆ lock

app.Utils.lock = threading.Lock()

Definition at line 51 of file Utils.py.

◆ logger

app.Utils.logger = logging.getLogger(APP_CONSTS.LOGGER_NAME)

Definition at line 49 of file Utils.py.

◆ META_REDIRECT

string app.Utils.META_REDIRECT = r"http-equiv\W*refresh.+?url\W+?(.+?)\""

Definition at line 58 of file Utils.py.

◆ SEARCH_COMMENT_PATTERN

string app.Utils.SEARCH_COMMENT_PATTERN = r"<![ \r\n\t]*(--([^\-]|[\r\n]|-[^\-])*--[ \r\n\t]*)>"

Definition at line 61 of file Utils.py.

◆ SEARCH_COMMENT_SIMPLE_PATTERN

string app.Utils.SEARCH_COMMENT_SIMPLE_PATTERN = r""

Definition at line 60 of file Utils.py.

◆ SEARCH_NOSCRIPT_PATTERN

string app.Utils.SEARCH_NOSCRIPT_PATTERN = r"<noscript>(.|\n)*?</noscript>"

Definition at line 62 of file Utils.py.

◆ tracebackElapsedTimeDelimiter

string app.Utils.tracebackElapsedTimeDelimiter = ""

Definition at line 264 of file Utils.py.

◆ tracebackElapsedTimeFormat

string app.Utils.tracebackElapsedTimeFormat = "{:.6f}"

Definition at line 265 of file Utils.py.

◆ tracebackExcludeFunctionName

list app.Utils.tracebackExcludeFunctionName = ["varDump"]

Definition at line 254 of file Utils.py.

◆ tracebackExcludeFunctionNameStarts

list app.Utils.tracebackExcludeFunctionNameStarts = ["<"]

Definition at line 255 of file Utils.py.

◆ tracebackExcludeModulePath

list app.Utils.tracebackExcludeModulePath = ["/usr/lib/", "/usr/local/lib/"]

Definition at line 253 of file Utils.py.

◆ tracebackFunctionNameDelimiter

string app.Utils.tracebackFunctionNameDelimiter = ":"

Definition at line 252 of file Utils.py.

◆ tracebackIdent

bool app.Utils.tracebackIdent = False

Definition at line 239 of file Utils.py.

◆ tracebackIdentFiller

string app.Utils.tracebackIdentFiller = "-"

Definition at line 240 of file Utils.py.

◆ tracebackIncludeArg

bool app.Utils.tracebackIncludeArg = False

Definition at line 260 of file Utils.py.

◆ tracebackIncludeArgPrefix

string app.Utils.tracebackIncludeArgPrefix = "\nARG:\n"

Definition at line 262 of file Utils.py.

◆ tracebackIncludeExitCalls

bool app.Utils.tracebackIncludeExitCalls = True

Definition at line 256 of file Utils.py.

◆ tracebackIncludeFileNumber

bool app.Utils.tracebackIncludeFileNumber = True

Definition at line 250 of file Utils.py.

◆ tracebackIncludeFileNumberDelimiter

string app.Utils.tracebackIncludeFileNumberDelimiter = ":"

Definition at line 251 of file Utils.py.

◆ tracebackIncludeInternalCalls

bool app.Utils.tracebackIncludeInternalCalls = False

Definition at line 247 of file Utils.py.

◆ tracebackIncludeLineNumber

bool app.Utils.tracebackIncludeLineNumber = True

Definition at line 248 of file Utils.py.

◆ tracebackIncludeLineNumberDelimiter

string app.Utils.tracebackIncludeLineNumberDelimiter = ":"

Definition at line 249 of file Utils.py.

◆ tracebackIncludeLocals

bool app.Utils.tracebackIncludeLocals = False

Definition at line 259 of file Utils.py.

◆ tracebackIncludeLocalsPrefix

string app.Utils.tracebackIncludeLocalsPrefix = "\nLOCALS:\n"

Definition at line 261 of file Utils.py.

◆ tracebackList

list app.Utils.tracebackList = []

Definition at line 237 of file Utils.py.

◆ tracebackLogger

app.Utils.tracebackLogger = None

Definition at line 263 of file Utils.py.

◆ tracebackMessageCall

string app.Utils.tracebackMessageCall = "call"

Definition at line 241 of file Utils.py.

◆ tracebackmessageDelimiter

string app.Utils.tracebackmessageDelimiter = ":"

Definition at line 243 of file Utils.py.

◆ tracebackMessageExit

string app.Utils.tracebackMessageExit = "exit"

Definition at line 242 of file Utils.py.

◆ tracebackRecursionlimit

int app.Utils.tracebackRecursionlimit = 0

Definition at line 257 of file Utils.py.

◆ tracebackRecursionlimitErrorMsg

string app.Utils.tracebackRecursionlimitErrorMsg = "RECURSION STACK LIMIT REACHED "

Definition at line 258 of file Utils.py.

◆ tracebackTimeMark

bool app.Utils.tracebackTimeMark = True

Definition at line 244 of file Utils.py.

◆ tracebackTimeMarkDelimiter

string app.Utils.tracebackTimeMarkDelimiter = " "

Definition at line 246 of file Utils.py.

◆ tracebackTimeMarkFormat

string app.Utils.tracebackTimeMarkFormat = "%Y-%m-%d %H:%M:%S.%f"

Definition at line 245 of file Utils.py.

◆ tracebackTimeQueue

list app.Utils.tracebackTimeQueue = []

Definition at line 238 of file Utils.py.

◆ tracebackUnknownExceptionMsg

string app.Utils.tracebackUnknownExceptionMsg = "Unknown exception!"

Definition at line 266 of file Utils.py.

app
Utils
Generated on Fri Nov 24 2017 18:54:20 for HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings. by 1.8.13

Classes

Functions

Variables

Detailed Description

Function Documentation

◆ accumulateSubstrings()

◆ autoFillSiteId()

◆ convertToHttpDateFmt()

◆ cutSubstringEntrances()

◆ emailParse()

◆ eraseNoScript()

◆ escape()

◆ executeCommand()

◆ executeWithTimeout()

◆ generateReplacementDict()

◆ getConfigParameter()

◆ getContentCSSMarkupEntrancesNumber()

◆ getFirstNotEmptySubXPath()

◆ getHash()

◆ getHTMLRedirectUrl()

◆ getPairsDicts()

◆ getPath()

◆ getTracebackInfo()

◆ innerHTMLText()

◆ innerText()

◆ innerTextToList()

◆ isTailSubstr()

◆ isValidURL()

◆ jsonLoadsSafe()

◆ loadFromFileByReference()

◆ loggerFlush()

◆ memUsage()

◆ parseHost()

◆ readFile()

◆ reMatch()

◆ removeDuplicated()

◆ replaceLoopValue()

◆ splitPairs()

◆ storePickleOnDisk()

◆ stripHTMLComments()

◆ stripHTMLTags()

◆ strToFloat()

◆ strToProxy()

◆ strToUnicode()

◆ tracefunc()

◆ urinormpath()

◆ urlNormalization()

◆ varDump()

Variable Documentation

◆ lock

◆ logger

◆ META_REDIRECT

◆ SEARCH_COMMENT_PATTERN

◆ SEARCH_COMMENT_SIMPLE_PATTERN

◆ SEARCH_NOSCRIPT_PATTERN

◆ tracebackElapsedTimeDelimiter

◆ tracebackElapsedTimeFormat

◆ tracebackExcludeFunctionName

◆ tracebackExcludeFunctionNameStarts

◆ tracebackExcludeModulePath

◆ tracebackFunctionNameDelimiter

◆ tracebackIdent

◆ tracebackIdentFiller

◆ tracebackIncludeArg

◆ tracebackIncludeArgPrefix

◆ tracebackIncludeExitCalls

◆ tracebackIncludeFileNumber

◆ tracebackIncludeFileNumberDelimiter

◆ tracebackIncludeInternalCalls

◆ tracebackIncludeLineNumber

◆ tracebackIncludeLineNumberDelimiter

◆ tracebackIncludeLocals

◆ tracebackIncludeLocalsPrefix

◆ tracebackList

◆ tracebackLogger

◆ tracebackMessageCall

◆ tracebackmessageDelimiter

◆ tracebackMessageExit

◆ tracebackRecursionlimit

◆ tracebackRecursionlimitErrorMsg