HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
app.Utils Namespace Reference

Classes

class  ConfigParamsList
 
class  DataReplacementConstants
 
class  ExceptionLog
 
class  InterruptableThread
 
class  JsonSerializable
 
class  LoggerFileName
 
class  MLStripper
 
class  MPLogger
 
class  PathMaker
 
class  PropertiesValidator
 
class  SQLExpression
 
class  UrlNormalizator
 
class  UrlParser
 

Functions

def getPath (dictionary, jsonString, path)
 
def getConfigParameter (parser, section, option, defValue)
 
def getTracebackInfo (linesNumberMax=None)
 
def tracefunc (frame, event, arg, indent=None)
 
def varDump (obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
 
def memUsage (point="")
 
def urlNormalization (base, url, supportProtocols=None, log=None)
 
def storePickleOnDisk (input_pickled_object, env_path, file_name)
 
def urinormpath (path, stripWWW=False, useValidator=False, enableAdditionNormalize=True)
 
def loggerFlush (loggerObj)
 
def accumulateSubstrings (substrList, prefixes)
 
def generateReplacementDict ()
 
def parseHost (url)
 
def convertToHttpDateFmt (date_str)
 
def autoFillSiteId (siteId, log)
 
def stripHTMLComments (htmlBuf=None, soup=None, hType=3)
 
def cutSubstringEntrances (buf, startStr='<!--', finishStr='-->', behaveMask=0, greediness=0, finishDefault='\n')
 
def eraseNoScript (htmlBuf=None)
 
def stripHTMLTags (htmlTxt, method=0, joinGlue=' ', regExp=None)
 
def innerHTMLText (htmlBuf, stripComment=True, stripScript=True)
 
def innerText (selectorList, delimiter=' ', innerDelimiter=' ', tagReplacers=None, REconditions=None, attrConditions=None, keepAttributes=None, baseUrl=None, closeVoid=None, excludeNodes=None)
 
def innerTextToList (selectorList, delimiter=' ', innerDelimiter=' ', tagReplacers=None, REconditions=None, attrConditions=None, keepAttributes=None, baseUrl=None, closeVoid=None, excludeNodes=None)
 
def getFirstNotEmptySubXPath (xpath, sel, subXPathPattern, subXPathes)
 
def getPairsDicts (incomeDict, splitters=')
 
def splitPairs (buf, splitters=')
 
def isTailSubstr (str1, str2)
 
def replaceLoopValue (buf, replaceFrom, replaceTo)
 
def getHTMLRedirectUrl (buff, log)
 
def emailParse (href, onlyName=False, defaultSeparator=' ')
 
def strToUnicode (inputStr)
 
def removeDuplicated (inStr, delimiter="\, joingGlue=None, trimMode=1, skipEmpty=False)
 
def getContentCSSMarkupEntrancesNumber (content)
 
def executeWithTimeout (func, args=None, kwargs=None, timeout=1, default=None, log=None)
 
def loadFromFileByReference (fileReference, initString=None, protocolPrefix='file://', loggerObj=None)
 
def readFile (inFile, decodeUTF8=True)
 
def escape (string)
 
def isValidURL (url)
 
def getHash (strBuf, binSize=32, digestType=0, fixedMode=0, valLimit=18446744073709552000L)
 
def strToFloat (val, defaultValue=0.0, log=None, positivePrefixes=None)
 
def strToProxy (proxyString, log=None, defaultProxyType='http')
 
def executeCommand (cmd, inputStream='', log=None)
 
def jsonLoadsSafe (jsonString, default=None, log=None)
 
def reMatch (word, buff, log=None)
 

Variables

 logger = logging.getLogger(APP_CONSTS.LOGGER_NAME)
 
 lock = threading.Lock()
 
string META_REDIRECT = r"http-equiv\W*refresh.+?url\W+?(.+?)\""
 
string SEARCH_COMMENT_SIMPLE_PATTERN = r"<!--(.|\n)*?-->"
 
string SEARCH_COMMENT_PATTERN = r"<![ \r\n\t]*(--([^\-]|[\r\n]|-[^\-])*--[ \r\n\t]*)>"
 
string SEARCH_NOSCRIPT_PATTERN = r"<noscript>(.|\n)*?</noscript>"
 
list tracebackList = []
 
list tracebackTimeQueue = []
 
bool tracebackIdent = False
 
string tracebackIdentFiller = "-"
 
string tracebackMessageCall = "call"
 
string tracebackMessageExit = "exit"
 
string tracebackmessageDelimiter = ":"
 
bool tracebackTimeMark = True
 
string tracebackTimeMarkFormat = "%Y-%m-%d %H:%M:%S.%f"
 
string tracebackTimeMarkDelimiter = " "
 
bool tracebackIncludeInternalCalls = False
 
bool tracebackIncludeLineNumber = True
 
string tracebackIncludeLineNumberDelimiter = ":"
 
bool tracebackIncludeFileNumber = True
 
string tracebackIncludeFileNumberDelimiter = ":"
 
string tracebackFunctionNameDelimiter = ":"
 
list tracebackExcludeModulePath = ["/usr/lib/", "/usr/local/lib/"]
 
list tracebackExcludeFunctionName = ["varDump"]
 
list tracebackExcludeFunctionNameStarts = ["<"]
 
bool tracebackIncludeExitCalls = True
 
int tracebackRecursionlimit = 0
 
string tracebackRecursionlimitErrorMsg = "RECURSION STACK LIMIT REACHED "
 
bool tracebackIncludeLocals = False
 
bool tracebackIncludeArg = False
 
string tracebackIncludeLocalsPrefix = "\nLOCALS:\n"
 
string tracebackIncludeArgPrefix = "\nARG:\n"
 
 tracebackLogger = None
 
string tracebackElapsedTimeDelimiter = ""
 
string tracebackElapsedTimeFormat = "{:.6f}"
 
string tracebackUnknownExceptionMsg = "Unknown exception!"
 

Detailed Description

Created on Mar 28, 2014

@package: app
@author: scorp
@link: http://hierarchical-cluster-engine.com/
@copyright: Copyright &copy; 2013-2014 IOIX Ukraine
@license: http://hierarchical-cluster-engine.com/license/
@since: 0.1

Function Documentation

◆ accumulateSubstrings()

def app.Utils.accumulateSubstrings (   substrList,
  prefixes 
)

Definition at line 905 of file Utils.py.

905 def accumulateSubstrings(substrList, prefixes):
906  ret = ""
907  if substrList is None or not isinstance(substrList, list): # # type(substrList) is not types.ListType:
908  raise Exception(">>> error substrList is None or not List type")
909  if prefixes is None or not isinstance(prefixes, list): # # type(prefixes) is not types.ListType:
910  raise Exception(">>> error prefixes is None or not List type")
911  if len(substrList) != len(prefixes):
912  raise Exception(">>> error substrList and prefixes lists have different lengths")
913  i = 0
914  for substr in substrList:
915  if isinstance(substr, str) or isinstance(substr, unicode):
916  if isinstance(prefixes[i], str) or isinstance(prefixes[i], unicode):
917  ret += str(prefixes[i])
918  ret += str(substr)
919  i += 1
920  return ret
921 
922 
923 
def accumulateSubstrings(substrList, prefixes)
Definition: Utils.py:905

◆ autoFillSiteId()

def app.Utils.autoFillSiteId (   siteId,
  log 
)

Definition at line 967 of file Utils.py.

967 def autoFillSiteId(siteId, log):
968  ret = siteId
969  if siteId is None:
970  ret = "0"
971  if log is not None:
972  log.debug("set siteId = '0' from 'autoFillSiteId'")
973 
974  return ret
975 
976 
977 # # method strips incoming html from html comments
978 # @param htmlBuf incoming content in string format
979 # @param soup incoming content as bs object
980 # @param hType -hType of handler
981 # @return clean html buff
def autoFillSiteId(siteId, log)
Definition: Utils.py:967

◆ convertToHttpDateFmt()

def app.Utils.convertToHttpDateFmt (   date_str)

Definition at line 958 of file Utils.py.

958 def convertToHttpDateFmt(date_str):
959  stamp = time.mktime(date_str.timetuple())
960  # stamp = time.mktime(time.strptime(date_str, '%Y-%m-%d %H:%M:%S'))
961  return time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(stamp))
962 
963 
964 # # method returns siteId, substitutes to "0" value if incoming siteId is None
965 # @param siteId - ID of site
966 # @param log - logger instanse for log usage
def convertToHttpDateFmt(date_str)
Definition: Utils.py:958

◆ cutSubstringEntrances()

def app.Utils.cutSubstringEntrances (   buf,
  startStr = '<!--',
  finishStr = '-->',
  behaveMask = 0,
  greediness = 0,
  finishDefault = '\n' 
)

Definition at line 1011 of file Utils.py.

1011 def cutSubstringEntrances(buf, startStr='<!--', finishStr='-->', behaveMask=0, greediness=0, finishDefault='\n'):
1012  ret = buf
1013  i = 0
1014  while True:
1015  i += 1
1016  replaced = False
1017  if ret.find(startStr) != -1:
1018  p = ret.index(startStr)
1019  if p is not None:
1020  p1 = None
1021  if ret.find(finishStr, p) != -1:
1022  p1 = ret.index(finishStr, p) + len(finishStr)
1023  else:
1024  if behaveMask == 1:
1025  if ret.find(finishDefault, p) != -1:
1026  p1 = ret.index(finishDefault, p) + len(finishDefault)
1027  else:
1028  p1 = len(ret)
1029  if behaveMask == 2:
1030  p1 = len(ret)
1031  if p1 is not None:
1032  ret = ret[0:p] + ret[p1:]
1033  # print ret
1034  replaced = True
1035  if greediness > 0 and i == greediness:
1036  break
1037  if not replaced:
1038  break
1039 
1040  return ret
1041 
1042 
1043 # # method erase incoming html from noscript blocks
1044 # @param htmlBuf - incoming content in string format
1045 # @return clean html buff
def cutSubstringEntrances(buf, startStr='<!--', finishStr='-->', behaveMask=0, greediness=0, finishDefault='\n')
Definition: Utils.py:1011
Here is the caller graph for this function:

◆ emailParse()

def app.Utils.emailParse (   href,
  onlyName = False,
  defaultSeparator = ' ' 
)

Definition at line 1302 of file Utils.py.

1302 def emailParse(href, onlyName=False, defaultSeparator=' '): # pylint: disable=W0613
1303  ret = href
1304  splitHref = href.split('?')
1305  if splitHref is not None and len(splitHref) > 0:
1306  adresses = splitHref[0]
1307  adresses = adresses.split(',')
1308  if onlyName:
1309  names = []
1310  for adress in adresses:
1311  adress = adress.split('@')
1312  if adress is not None and len(adress) > 0:
1313  names.append(adress[0])
1314  adresses = names
1315  ret = ''
1316  for adress in adresses:
1317  ret += adress
1318  ret += ' '
1319  ret = ret.strip()
1320  return ret
1321 
1322 
1323 
1324 # #Multi process logger
1325 #
def emailParse(href, onlyName=False, defaultSeparator=' ')
Definition: Utils.py:1302

◆ eraseNoScript()

def app.Utils.eraseNoScript (   htmlBuf = None)

Definition at line 1046 of file Utils.py.

1046 def eraseNoScript(htmlBuf=None):
1047  ret = htmlBuf
1048  if htmlBuf is not None:
1049 # ret = re.sub(SEARCH_NOSCRIPT_PATTERN, "", htmlBuf)
1050 # logger.debug("!!! use pattern: %s", str(SEARCH_NOSCRIPT_PATTERN))
1051  ret = cutSubstringEntrances(htmlBuf, startStr='<noscript>', finishStr='</noscript>', behaveMask=2)
1052 # logger.debug("!!! htmlBuf: %s", varDump(htmlBuf, strTypeMaxLen=10))
1053 # logger.debug("!!! ret: %s", varDump(ret, strTypeMaxLen=10))
1054 
1055  return ret
1056 
1057 
1058 # Strips from all HTML tags with set of different methods
1059 # @param htmlTxt input content
1060 # @param method 0 - by BeautifulSoup, 1 - with RE 1, 2 - RE 2, 3 - HTML parser, 4 - clear Python w/o lib, 5 - xml lib
1061 # @param joinGlue - the glue string to joing parts
1062 # @param regExp - the custom re for the method 1 or 2
1063 # @return cleared content
def cutSubstringEntrances(buf, startStr='<!--', finishStr='-->', behaveMask=0, greediness=0, finishDefault='\n')
Definition: Utils.py:1011
def eraseNoScript(htmlBuf=None)
Definition: Utils.py:1046
Here is the call graph for this function:

◆ escape()

def app.Utils.escape (   string)

Definition at line 1628 of file Utils.py.

1628 def escape(string):
1629  return string.replace("\\", "\\\\").replace('"', '\\\"').replace("'", "\\\'").replace("\n", "\\n").\
1630  replace("\r", "\\r").replace("\0", "\\0")
1631 
1632 # #Validate URL string
1633 #
1634 # @param url - url string
1635 # @return True if valid or otherwise False
1636 
def escape(string)
Definition: Utils.py:1628

◆ executeCommand()

def app.Utils.executeCommand (   cmd,
  inputStream = '',
  log = None 
)

Definition at line 1747 of file Utils.py.

1747 def executeCommand(cmd, inputStream='', log=None):
1748  # variables for result tuple
1749  output = ''
1750  errMsg = ''
1751  exitCode = APP_CONSTS.EXIT_FAILURE
1752  try:
1753  if log is not None:
1754  log.debug("Popen: %s", str(cmd))
1755 
1756  process = Popen(cmd, stdout=PIPE, stdin=PIPE, stderr=PIPE, shell=True, close_fds=True, executable='/bin/bash')
1757  if log is not None:
1758  log.debug("len(inputStream)= %s", str(len(inputStream)))
1759 
1760  (output, errMsg) = process.communicate(input=inputStream)
1761  exitCode = process.wait()
1762 
1763  if log is not None:
1764  log.debug("Process response has exitCode = %s, stdout len = %s, stderr: %s",
1765  str(exitCode), str(len(output)), str(errMsg))
1766 
1767  except Exception, err:
1768  if log is not None:
1769  log.error("Popen execution error: %s", str(err))
1770 
1771  # make result tuple
1772  PopenResult = collections.namedtuple('PopenResult', ['stdout', 'stderr', 'exitCode'])
1773  popenResult = PopenResult(stdout=output, stderr=errMsg, exitCode=exitCode)
1774 
1775  return popenResult
1776 
1777 
1778 # # Parse json and return dict if okay or None if not
1779 #
1780 # @param jsonString json to pars
1781 # @param log - logger instance
1782 # @return resulted dict
def executeCommand(cmd, inputStream='', log=None)
Definition: Utils.py:1747

◆ executeWithTimeout()

def app.Utils.executeWithTimeout (   func,
  args = None,
  kwargs = None,
  timeout = 1,
  default = None,
  log = None 
)

Definition at line 1544 of file Utils.py.

1544 def executeWithTimeout(func, args=None, kwargs=None, timeout=1, default=None, log=None):
1545  if args is None:
1546  args = ()
1547  # import threading
1548  if kwargs is None:
1549  kwargs = {}
1550 
1551  it = InterruptableThread(func, args, kwargs, default, log)
1552  it.start()
1553  it.join(timeout)
1554  if it.isAlive():
1555  try:
1556  it._Thread__stop() # pylint: disable=W0212
1557  time.sleep(1)
1558  except:
1559  if log is not None:
1560  log.error("an not stop thread with _Thread__stop()!")
1561  if it.isAlive():
1562  try:
1563  it.__stop() # pylint: disable=W0212
1564  time.sleep(1)
1565  except:
1566  if log is not None:
1567  log.error("Can not stop thread with __stop()!")
1568  if it.isAlive():
1569  try:
1570  it._Thread__delete() # pylint: disable=W0212
1571  time.sleep(1)
1572  except:
1573  if log is not None:
1574  log.error("Can not stop thread with _Thread__delete()!")
1575 
1576  if it.errorCode == it.ERROR_CODE_APPLIED_EXCEPTION:
1577  if log is not None:
1578  log.error("Error1 code %s, exception: %s", str(it.errorCode), str(it.errorException))
1579  raise it.errorException
1580  return default
1581  else:
1582  if it.errorCode == it.ERROR_CODE_APPLIED_EXCEPTION:
1583  if log is not None:
1584  log.error("Error2 code %s, exception: %s", str(it.errorCode), str(it.errorException))
1585  raise it.errorException
1586  return it.result
1587 
1588 
1589 # #Load file data by protocoled reference
1590 #
1591 # @param initString string in json format or @file:// reference
1592 # @param protocolPrefix
1593 # @param loggerObj
1594 # @return initString unchanged, value from file loaded by link or empty string if load error
def executeWithTimeout(func, args=None, kwargs=None, timeout=1, default=None, log=None)
Definition: Utils.py:1544
Here is the caller graph for this function:

◆ generateReplacementDict()

def app.Utils.generateReplacementDict ( )

Definition at line 934 of file Utils.py.

935  ret = {}
936  ret[DataReplacementConstants.CUR_YEAR_FULL] = datetime.now().strftime("%Y")
937  ret[DataReplacementConstants.CUR_YEAR_SHORT] = datetime.now().strftime("%y")
938  ret[DataReplacementConstants.CUR_MONTH] = datetime.now().strftime("%m")
939  ret[DataReplacementConstants.CUR_DAY] = datetime.now().strftime("%d")
940  return ret
941 
942 
943 # #parseHost parse the root host name from url
944 # for example: the result of http://s1.y1.example.com/path/to is example.com
945 # @param url the full url
946 # @return host of the url, eg: example.com
def generateReplacementDict()
Definition: Utils.py:934

◆ getConfigParameter()

def app.Utils.getConfigParameter (   parser,
  section,
  option,
  defValue 
)

Definition at line 200 of file Utils.py.

200 def getConfigParameter(parser, section, option, defValue):
201  ret = defValue
202 
203  if parser and parser.has_option(section, option):
204  try:
205  ret = parser.get(section, option, defValue)
206  except Exception:
207  ret = defValue
208 
209  return ret
210 
211 
212 # #The function to get traceback information string prepared for logging
213 #
214 # This function collects traceback information and creates sreing representation ready to log it
215 # @param linesNumberMax max number of traceback lines to include in to the collection, None - signs all
216 # @ret return string
217 #
def getConfigParameter(parser, section, option, defValue)
Definition: Utils.py:200

◆ getContentCSSMarkupEntrancesNumber()

def app.Utils.getContentCSSMarkupEntrancesNumber (   content)

Definition at line 1426 of file Utils.py.

1427  return len(re.findall(r'\{.+?\}', content))
1428 
1429 
1430 # Class ExceptionLog for logging of the exception common way
def getContentCSSMarkupEntrancesNumber(content)
Definition: Utils.py:1426

◆ getFirstNotEmptySubXPath()

def app.Utils.getFirstNotEmptySubXPath (   xpath,
  sel,
  subXPathPattern,
  subXPathes 
)

Definition at line 1174 of file Utils.py.

1174 def getFirstNotEmptySubXPath(xpath, sel, subXPathPattern, subXPathes):
1175  retXPath = None
1176  retXPathValue = None
1177  for subXPath in subXPathes:
1178  retXPath = xpath + (subXPathPattern % subXPath)
1179  try:
1180  retXPathValue = sel.xpath(retXPath).extract()
1181  except Exception as excp:
1182  logger.info(">>> Common xPath extractor exception, = " + retXPath + " excp=" + str(excp))
1183  retXPathValue = None
1184  continue
1185  if len(retXPathValue) > 0 and ''.join(retXPathValue).strip() != '':
1186  break
1187  return retXPath, retXPathValue
1188 
1189 
1190 # # function call splitPairs for each element in incomeDict and fills return dict
1191 # @param incomeDict incoming dict
1192 # @param splitters incoming splitters
1193 # @return result dict
def getFirstNotEmptySubXPath(xpath, sel, subXPathPattern, subXPathes)
Definition: Utils.py:1174
Definition: join.py:1

◆ getHash()

def app.Utils.getHash (   strBuf,
  binSize = 32,
  digestType = 0,
  fixedMode = 0,
  valLimit = 18446744073709552000L 
)

Definition at line 1649 of file Utils.py.

1649 def getHash(strBuf, binSize=32, digestType=0, fixedMode=0, valLimit=18446744073709552000L):
1650 
1651  if fixedMode == 0:
1652  if digestType == 0:
1653  d = hashlib.md5(strBuf)
1654  else:
1655  d = hashlib.sha1(strBuf) # pylint: disable=R0204
1656  if binSize == 32:
1657  s = 8
1658  elif binSize == 64:
1659  s = 16
1660  else:
1661  s = 32
1662  h = d.hexdigest()
1663  v = int(h[:s], 16)
1664  if v > valLimit:
1665  for i in xrange(1, s - 1):
1666  v = int(h[:s - i], 16)
1667  if v < valLimit:
1668  break
1669  elif fixedMode == 1:
1670  v = ctypes.c_uint32(zlib.crc32(strBuf, int(time.time()))).value
1671  else:
1672  v = ctypes.c_ulong(zlib.crc32(strBuf, int(time.time()))).value
1673 
1674  return v
1675 
1676 
1677 # # Convert string to float
1678 # @param val - input value as string
1679 # @param defaultValue - default value for result
1680 # @param log - logger instance
1681 # @param positivePrefixes - positive prefixes dictionary
1682 # @return result float value
def getHash(strBuf, binSize=32, digestType=0, fixedMode=0, valLimit=18446744073709552000L)
Definition: Utils.py:1649

◆ getHTMLRedirectUrl()

def app.Utils.getHTMLRedirectUrl (   buff,
  log 
)

Definition at line 1278 of file Utils.py.

1278 def getHTMLRedirectUrl(buff, log):
1279  # variable for result
1280  ret = None
1281  resUrl = ''
1282 
1283  match = re.search(META_REDIRECT, stripHTMLComments(buff), re.I | re.U)
1284  if match is not None:
1285  resUrl = match.groups()[0].strip()
1286 
1287  log.debug('resUrl: ' + str(resUrl))
1288  urlObj = Url(resUrl)
1289  if urlObj.isValid():
1290  ret = resUrl
1291 
1292  log.debug('ret: ' + str(ret))
1293 
1294  return ret
1295 
1296 
1297 # # function parse incoming email adress
1298 # @param href - incoming email href
1299 # @param onlyName - extract email names instead full email names
1300 # @param defaultSeparator - default separator between email elements
1301 # @return parsed email
def getHTMLRedirectUrl(buff, log)
Definition: Utils.py:1278
def stripHTMLComments(htmlBuf=None, soup=None, hType=3)
Definition: Utils.py:982
Here is the call graph for this function:

◆ getPairsDicts()

def app.Utils.getPairsDicts (   incomeDict,
  splitters = ' 
)

Definition at line 1194 of file Utils.py.

1194 def getPairsDicts(incomeDict, splitters=','):
1195  ret = {}
1196  if isinstance(incomeDict, dict):
1197  for key in incomeDict:
1198  if isinstance(incomeDict[key], str) or isinstance(incomeDict[key], unicode):
1199  ret[key] = splitPairs(incomeDict[key], splitters)
1200  return ret
1201 
1202 
1203 # # function extracts splits incoming string by splitters into dict of name=value pairs
1204 # @param buf incoming text buf
1205 # @param splitters incoming splitters
1206 # @return result dict
def getPairsDicts(incomeDict, splitters=')
Definition: Utils.py:1194
def splitPairs(buf, splitters=')
Definition: Utils.py:1207
Here is the call graph for this function:

◆ getPath()

def app.Utils.getPath (   dictionary,
  jsonString,
  path 
)

Definition at line 93 of file Utils.py.

93 def getPath(dictionary, jsonString, path):
94  if jsonString != None:
95  dictionary = json.loads(jsonString)
96  for i, p in re.findall(r'(\d+)|(\w+)', path):
97  dictionary = dictionary[p or int(i)]
98  return dictionary
99 
100 
101 
102 # #Json serialization
103 #
def getPath(dictionary, jsonString, path)
Definition: Utils.py:93

◆ getTracebackInfo()

def app.Utils.getTracebackInfo (   linesNumberMax = None)

Definition at line 218 of file Utils.py.

218 def getTracebackInfo(linesNumberMax=None):
219  ret = ""
220  n = 0
221 
222  type_, value_, traceback_ = sys.exc_info()
223  stack = traceback.format_tb(traceback_)
224  del type_
225  del value_
226  for item in stack:
227  ret = ret + "\n" + (str(item))
228  n = n + 1
229  if linesNumberMax != None and n == linesNumberMax:
230  break
231 
232  return ret
233 
234 
235 
236 # #The function to get accumulate the traceback information in global variable __tracebackList
def getTracebackInfo(linesNumberMax=None)
Definition: Utils.py:218
Here is the caller graph for this function:

◆ innerHTMLText()

def app.Utils.innerHTMLText (   htmlBuf,
  stripComment = True,
  stripScript = True 
)

Definition at line 1130 of file Utils.py.

1130 def innerHTMLText(htmlBuf, stripComment=True, stripScript=True):
1131  from bs4 import BeautifulSoup
1132 
1133  soup = BeautifulSoup(htmlBuf, 'lxml')
1134 
1135  if stripScript:
1136  for elem in soup.findAll(name='script'):
1137  elem.extract()
1138  if stripComment:
1139  stripHTMLComments(htmlBuf=None, soup=soup)
1140 
1141  return ''.join(soup.findAll(text=True))
1142 
1143 
1144 # # function concatinates all HTMLTags from extractor also strips elements
1145 # @param selectorList incoming Selector
1146 # @return inner text from incoming selector
def innerHTMLText(htmlBuf, stripComment=True, stripScript=True)
Definition: Utils.py:1130
def stripHTMLComments(htmlBuf=None, soup=None, hType=3)
Definition: Utils.py:982
Definition: join.py:1
Here is the call graph for this function:

◆ innerText()

def app.Utils.innerText (   selectorList,
  delimiter = ' ',
  innerDelimiter = ' ',
  tagReplacers = None,
  REconditions = None,
  attrConditions = None,
  keepAttributes = None,
  baseUrl = None,
  closeVoid = None,
  excludeNodes = None 
)

Definition at line 1148 of file Utils.py.

1148  attrConditions=None, keepAttributes=None, baseUrl=None, closeVoid=None, excludeNodes=None):
1149  extendInnerText = ExtendInnerText(tagReplacers, delimiter, innerDelimiter, REconditions, attrConditions,
1150  keepAttributes, baseUrl, closeVoid, excludeNodes)
1151  extendInnerText.innerText(None, selectorList, None)
1152  ret = extendInnerText.stripHtml
1153  return ret
1154 
1155 
1156 # # function concatinates all HTMLTags from extractor also strips elements
1157 # @param selectorList incoming Selector
1158 # @return list of inner text from incoming selector
Here is the call graph for this function:
Here is the caller graph for this function:

◆ innerTextToList()

def app.Utils.innerTextToList (   selectorList,
  delimiter = ' ',
  innerDelimiter = ' ',
  tagReplacers = None,
  REconditions = None,
  attrConditions = None,
  keepAttributes = None,
  baseUrl = None,
  closeVoid = None,
  excludeNodes = None 
)

Definition at line 1160 of file Utils.py.

1160  attrConditions=None, keepAttributes=None, baseUrl=None, closeVoid=None, excludeNodes=None):
1161  extendInnerText = ExtendInnerText(tagReplacers, delimiter, innerDelimiter, REconditions, attrConditions,
1162  keepAttributes, baseUrl, closeVoid, excludeNodes)
1163  extendInnerText.innerTextToList(None, selectorList, None)
1164  ret = extendInnerText.stripHtmlList
1165  return ret
1166 
1167 
1168 # # function looks fiers not empty extracted XPath from subXPathes, using subXPathPattern for real xpath creating
1169 # @param xpath - incoming root xpath
1170 # @param sel - incoming selector
1171 # @param subXPathPattern - subXPath creation pattern
1172 # @param subXPathes - list of subXPathes
1173 # @return retXPath and retXPathValue values
Here is the caller graph for this function:

◆ isTailSubstr()

def app.Utils.isTailSubstr (   str1,
  str2 
)

Definition at line 1221 of file Utils.py.

1221 def isTailSubstr(str1, str2):
1222  ret = False
1223  if str1.find(str2) > 0 and ((len(str1) - str1.find(str2)) == len(str2)):
1224  ret = True
1225  return ret
1226 
1227 
1228 # # function make string raplacement while
1229 # @param buf incoming text buf
1230 # @param replaceFrom substring for replacement from
1231 # @param replaceTo substring for replacement to
1232 # @return replacement string
def isTailSubstr(str1, str2)
Definition: Utils.py:1221

◆ isValidURL()

def app.Utils.isValidURL (   url)

Definition at line 1637 of file Utils.py.

1637 def isValidURL(url):
1638  return False if isinstance(validators.url(url), validators.ValidationFailure) else True
1639 
1640 
1641 # #Get some hash of a string limited bit size
1642 #
1643 # @param strBuf - string buffer
1644 # @param binSize - binary value size bits, supported values 32, 64 and 128
1645 # @param digestType - 0 - md5, 1 - sha1
1646 # @param fixedMode - 0 digests play, 1 - crc32 to uint32, 2 - crc32 to ulong
1647 # @param valLimit - limit of a value useful to fix a DB type size (MySQL 8 bytes BIGINT(20))
1648 # @return True if valid or otherwise False
def isValidURL(url)
Definition: Utils.py:1637
Here is the caller graph for this function:

◆ jsonLoadsSafe()

def app.Utils.jsonLoadsSafe (   jsonString,
  default = None,
  log = None 
)

Definition at line 1783 of file Utils.py.

1783 def jsonLoadsSafe(jsonString, default=None, log=None):
1784  # variable for result
1785  ret = default
1786  try:
1787  if jsonString is not None and jsonString != '':
1788  if isinstance(jsonString, basestring):
1789  ret = json.loads(jsonString)
1790  else:
1791  ret = jsonString
1792  if log is not None:
1793  log.debug("Input object type is: %s", type(jsonString))
1794  except Exception, err:
1795  if log is not None:
1796  log.error("Error pars json: %s; source string:\n%s", str(err), jsonString)
1797 
1798  return ret
1799 
1800 
1801 # simple re match check for search word definition
1802 #
1803 # @param word - word for search
1804 # @param buff - buffer where is search
1805 # @param log - logger instance
1806 # @return True if match exist or False otherwise
def jsonLoadsSafe(jsonString, default=None, log=None)
Definition: Utils.py:1783

◆ loadFromFileByReference()

def app.Utils.loadFromFileByReference (   fileReference,
  initString = None,
  protocolPrefix = 'file://',
  loggerObj = None 
)

Definition at line 1595 of file Utils.py.

1595 def loadFromFileByReference(fileReference, initString=None, protocolPrefix='file://', loggerObj=None):
1596  ret = initString
1597 
1598  if fileReference.startswith(protocolPrefix):
1599  try:
1600  f = fileReference[len(protocolPrefix):]
1601  ret = readFile(f)
1602  except Exception, err:
1603  if loggerObj is not None:
1604  loggerObj.error("Error load from file `%s` by reference: %s", f, str(err))
1605 
1606  return ret
1607 
1608 
1609 # #Read file
1610 #
1611 # @param inFile - name of file to read
1612 # @param decodeUTF8 - decode utf8 or not after read from file
1613 # @return - the buffer
def loadFromFileByReference(fileReference, initString=None, protocolPrefix='file://', loggerObj=None)
Definition: Utils.py:1595
def readFile(inFile, decodeUTF8=True)
Definition: Utils.py:1614
Here is the call graph for this function:

◆ loggerFlush()

def app.Utils.loggerFlush (   loggerObj)

Definition at line 893 of file Utils.py.

893 def loggerFlush(loggerObj):
894  for h in loggerObj.handlers:
895  if h.__class__.__name__ == 'FileHandler' or h.__class__.__name__ == 'TimedRotatingFileHandler':
896  h.flush()
897 
898 
899 
900 # #accumulateSubstrings accumulates substr list in one string and returns it, also adds prefixies between
901 # substrings in resulting string. substrList and prefixes must be List[str] type with equal length
902 # @param substrList - substrings list
903 # @param prefixes - prefixies list
904 # @returns - accumulate string
def loggerFlush(loggerObj)
Definition: Utils.py:893

◆ memUsage()

def app.Utils.memUsage (   point = "")

Definition at line 498 of file Utils.py.

498 def memUsage(point=""):
499  import resource
500  # usage = resource.getrusage(resource.RUSAGE_SELF)
501  return '''%s: mem=%s mb
502  ''' % (point, resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000)
503 
504 
505 
506 # #class UrlParser makes URL operation
507 #
def memUsage(point="")
Definition: Utils.py:498

◆ parseHost()

def app.Utils.parseHost (   url)

Definition at line 947 of file Utils.py.

947 def parseHost(url):
948  host = None
949  if urlparse.urlparse(url).hostname:
950  host = '.'.join(urlparse.urlparse(url).hostname.split('.')[-2:])
951  return host
952 
953 
954 # # convert date str to HTTP header format
955 # 2014-07-29 20:31:50 (GMT+8) to Tue, 29 Jul 2014 12:31:50 GMT
956 # @param date_str date str, 2014-07-29 20:31:50
957 # @return HTTP header formated date str : Tue, 29 Jul 2014 12:31:50 GMT
def parseHost(url)
Definition: Utils.py:947
Definition: join.py:1
Here is the caller graph for this function:

◆ readFile()

def app.Utils.readFile (   inFile,
  decodeUTF8 = True 
)

Definition at line 1614 of file Utils.py.

1614 def readFile(inFile, decodeUTF8=True):
1615  with open(inFile, 'r') as f:
1616  ret = f.read()
1617 
1618  if decodeUTF8:
1619  ret = ret.decode('utf8')
1620 
1621  return ret
1622 
1623 
1624 # #Escape string value
1625 #
1626 # @param string
1627 # @return escaped string
def readFile(inFile, decodeUTF8=True)
Definition: Utils.py:1614
Here is the caller graph for this function:

◆ reMatch()

def app.Utils.reMatch (   word,
  buff,
  log = None 
)

Definition at line 1807 of file Utils.py.

1807 def reMatch(word, buff, log=None):
1808  # variable for result
1809  ret = False
1810  if isinstance(word, basestring) and isinstance(buff, basestring):
1811  try:
1812  if word.startswith(u'/'):
1813  word = word[1:]
1814  if re.search(pattern=word, string=buff, flags=re.U + re.I + re.M) is not None:
1815  ret = True
1816  else:
1817  ret = (word.upper() == buff.upper())
1818 
1819  except Exception, err:
1820  if log is not None:
1821  log.error("Expression: %s, Error: %s", str(word), str(err))
1822 
1823  return ret
1824 
def reMatch(word, buff, log=None)
Definition: Utils.py:1807

◆ removeDuplicated()

def app.Utils.removeDuplicated (   inStr,
  delimiter = "\n",
  joingGlue = None,
  trimMode = 1,
  skipEmpty = False 
)

Definition at line 1394 of file Utils.py.

1394 def removeDuplicated(inStr, delimiter="\n", joingGlue=None, trimMode=1, skipEmpty=False):
1395  ret = inStr.split(delimiter)
1396 
1397  if joingGlue is None:
1398  glue = delimiter
1399  else:
1400  glue = joingGlue
1401 
1402  prev = None
1403  new = []
1404  for item in ret:
1405  if trimMode > 0:
1406  if trimMode == 1:
1407  item = item.lstrip()
1408  elif trimMode == 2:
1409  item = item.rstrip()
1410  else:
1411  item = item.strip()
1412  if skipEmpty and item == '':
1413  continue
1414  if item != prev:
1415  new.append(item)
1416  prev = item
1417  ret = new
1418 
1419  return glue.join(ret).strip()
1420 
1421 
1422 # Checks is the input content possible contains an CSS markup, possible is an in-line STYLE tag innerHTML
1423 #
1424 # @param content - to analyse
1425 # @return zero if presence of the CSS markup is not detected or number of the detected fragments
def removeDuplicated(inStr, delimiter="\, joingGlue=None, trimMode=1, skipEmpty=False)
Definition: Utils.py:1394

◆ replaceLoopValue()

def app.Utils.replaceLoopValue (   buf,
  replaceFrom,
  replaceTo 
)

Definition at line 1233 of file Utils.py.

1233 def replaceLoopValue(buf, replaceFrom, replaceTo):
1234  localValue = buf
1235  replaceValue = localValue.replace(replaceFrom, replaceTo)
1236  while len(replaceValue) != len(localValue):
1237  localValue = replaceValue
1238  replaceValue = localValue.replace(replaceFrom, replaceTo)
1239  return localValue
1240 
1241 
1242 # # # function extract html redirect link from meta
1243 # # @param utf8Buff incoming buff of html page
1244 # # @param log - logger instance
1245 # # @return html redirect link
1246 # def extractHTMLRedirectFromMeta(utf8Buff, log):
1247 # # variable for result
1248 # ret = None
1249 #
1250 # localREList = re.findall(META_RE_0, utf8Buff, re.I)
1251 # if len(localREList) > 0:
1252 # log.debug("!!! Found pattern: '%s' - HTML redirect is exist...", str(META_RE_0))
1253 # match = re.search(META_RE_1, utf8Buff, re.I | re.U)
1254 # if match is not None:
1255 # log.debug("!!! Found pattern: '%s' - HTML redirect blocked by comment...", str(META_RE_1))
1256 # else:
1257 # for bodyStr in localREList:
1258 # match = re.search(META_RE_2, bodyStr, re.I | re.U)
1259 # log.debug("!!! bodyStr: %s, pattern: '%s', match: %s", str(bodyStr), str(META_RE_2), varDump(match))
1260 # if match is not None:
1261 # ret = match.group(1)
1262 # else:
1263 # match = re.search(META_RE_3, bodyStr, re.I | re.U)
1264 # log.debug("!!! bodyStr: %s, pattern: '%s', match: %s", str(bodyStr), str(META_RE_3), varDump(match))
1265 # if match is not None:
1266 # ret = match.group(1)
1267 #
1268 # if ret is not None:
1269 # break
1270 #
1271 # return ret
1272 
1273 
1274 # # extract html redirect link from meta
1275 # @param buff - raw contant of html page
1276 # @param log - logger instance
1277 # @return - html redirect link
def replaceLoopValue(buf, replaceFrom, replaceTo)
Definition: Utils.py:1233

◆ splitPairs()

def app.Utils.splitPairs (   buf,
  splitters = ' 
)

Definition at line 1207 of file Utils.py.

1207 def splitPairs(buf, splitters=','):
1208  ret = {}
1209  splitStr = buf.split(splitters)
1210  for elem in splitStr:
1211  localStr = elem.split('=')
1212  if isinstance(localStr, list) and len(localStr) >= 2:
1213  ret[localStr[0]] = localStr[1]
1214  return ret
1215 
1216 
1217 # # function looks is str2 an a tail of str1
1218 # @param str1 main string
1219 # @param str2 searching tail substring
1220 # @return False or True
def splitPairs(buf, splitters=')
Definition: Utils.py:1207
Here is the caller graph for this function:

◆ storePickleOnDisk()

def app.Utils.storePickleOnDisk (   input_pickled_object,
  env_path,
  file_name 
)

Definition at line 754 of file Utils.py.

754 def storePickleOnDisk(input_pickled_object, env_path, file_name):
755  if env_path in os.environ and os.environ[env_path] != "":
756  logger.debug("os.environ[%s]: set to %s", env_path, os.environ[env_path])
757  open(os.environ[env_path] + file_name, "wb").write(input_pickled_object)
758  else:
759  logger.debug("os.environ[%s]: not set.", env_path)
760 
761 
762 
763 # This function taken from uritools module as it was removed from module
def storePickleOnDisk(input_pickled_object, env_path, file_name)
Definition: Utils.py:754

◆ stripHTMLComments()

def app.Utils.stripHTMLComments (   htmlBuf = None,
  soup = None,
  hType = 3 
)

Definition at line 982 of file Utils.py.

982 def stripHTMLComments(htmlBuf=None, soup=None, hType=3):
983  from bs4 import Comment
984 
985  ret = htmlBuf
986  if soup is not None and hType == 0:
987  for elem in soup.findAll(text=lambda text: isinstance(text, Comment)):
988  elem.extract()
989  elif htmlBuf is not None and hType == 1:
990  ret = re.sub(SEARCH_COMMENT_PATTERN, "", htmlBuf)
991  logger.debug("!!! use pattern: %s", str(SEARCH_COMMENT_PATTERN))
992  elif htmlBuf is not None and hType == 2:
993  ret = re.sub(SEARCH_COMMENT_SIMPLE_PATTERN, "", htmlBuf)
994  logger.debug("!!! use pattern: %s", str(SEARCH_COMMENT_SIMPLE_PATTERN))
995  elif htmlBuf is not None and hType == 3:
996  ret = cutSubstringEntrances(htmlBuf, behaveMask=2)
997 
998  return ret
999 
1000 
1001 # Cuts substring entrances in source buffer started and finished with strings
1002 #
1003 # @param buf - source buffer
1004 # @param startStr - start string
1005 # @param finishStr - finish string
1006 # @param behaveMask - bit set mask defines a behavior in case of finishStr not found, 0 - do nothing,
1007 # 1 - cut up to finishDefault or end of buffer if no end of line found, 2 - cut up to end of buffer
1008 # @param greediness - max cutting number, 0 - means unlimited
1009 # @param finishDefault - default finish string used if behaveMask == 1 and finishStr is not found
1010 # @return resulted string
def cutSubstringEntrances(buf, startStr='<!--', finishStr='-->', behaveMask=0, greediness=0, finishDefault='\n')
Definition: Utils.py:1011
def stripHTMLComments(htmlBuf=None, soup=None, hType=3)
Definition: Utils.py:982
Here is the call graph for this function:
Here is the caller graph for this function:

◆ stripHTMLTags()

def app.Utils.stripHTMLTags (   htmlTxt,
  method = 0,
  joinGlue = ' ',
  regExp = None 
)

Definition at line 1064 of file Utils.py.

1064 def stripHTMLTags(htmlTxt, method=0, joinGlue=' ', regExp=None):
1065  ret = ''
1066 
1067  if htmlTxt is not None and htmlTxt.strip() != '':
1068  if method == 0:
1069  from bs4 import BeautifulSoup
1070  ret = joinGlue.join(BeautifulSoup(htmlTxt, 'lxml').findAll(text=True))
1071  elif method == 1 or method == 2:
1072  if regExp is not None:
1073  r = regExp
1074  else:
1075  if method == 1:
1076  r = r'<[^<]+?>'
1077  else:
1078  r = r'(<!--.*?-->|<[^>]*>)'
1079  ret = re.sub(r, joinGlue, htmlTxt)
1080  elif method == 3:
1081  ret = MLStripper() # pylint: disable=R0204
1082  ret.feed(htmlTxt)
1083  ret = ret.get_data()
1084  elif method == 4:
1085  tag = False
1086  quote = False
1087  for c in htmlTxt:
1088  if c == '<' and not quote:
1089  tag = True
1090  elif c == '>' and not quote:
1091  tag = False
1092  elif (c == '"' or c == "'") and tag:
1093  quote = not quote
1094  elif not tag:
1095  ret = ret + joinGlue + c
1096  elif method == 5:
1097  import xml
1098  ret = joinGlue.join(xml.etree.ElementTree.fromstring(htmlTxt).itertext())
1099 
1100  if method == 1 or method == 2:
1101  import cgi
1102  ret = cgi.escape(ret)
1103  ret = re.sub('[<>]', '', ret)
1104 
1105  return ret.strip()
1106 
1107 
1108 
def stripHTMLTags(htmlTxt, method=0, joinGlue=' ', regExp=None)
Definition: Utils.py:1064

◆ strToFloat()

def app.Utils.strToFloat (   val,
  defaultValue = 0.0,
  log = None,
  positivePrefixes = None 
)

Definition at line 1683 of file Utils.py.

1683 def strToFloat(val, defaultValue=0.0, log=None, positivePrefixes=None):
1684  # variable for result
1685  ret = defaultValue
1686  if positivePrefixes is None:
1687  posPrefixes = {'K':'1E3', 'M':'1E6', 'G':'1E9', 'T':'1E12', 'P':'1E15', 'E':'1E18', 'Z':'1E21', 'Y':'1E24'}
1688  else:
1689  posPrefixes = positivePrefixes
1690 
1691  try:
1692  val = val.upper()
1693  if val[-1] in posPrefixes.keys():
1694  v = Decimal(val[:-1])
1695  ret = float(v * Decimal(posPrefixes[val[-1]]))
1696  else:
1697  ret = float(val)
1698  except Exception, err:
1699  if log is not None:
1700  log.debug(str(err))
1701 
1702  return ret
1703 
1704 
1705 # #Convert string to proxy tuple (proxy_type, proxy_host, proxy_port, proxy_user, proxy_passwd)
1706 #
1707 # @param proxyString - proxy string
1708 # @param log - logger instance
1709 # @return proxy tuple if success or None otherwise
def strToFloat(val, defaultValue=0.0, log=None, positivePrefixes=None)
Definition: Utils.py:1683

◆ strToProxy()

def app.Utils.strToProxy (   proxyString,
  log = None,
  defaultProxyType = 'http' 
)

Definition at line 1710 of file Utils.py.

1710 def strToProxy(proxyString, log=None, defaultProxyType='http'):
1711  # variables for result
1712  ret = None
1713  proxy_type = proxy_host = proxy_port = proxy_user = proxy_passwd = None
1714  if isinstance(proxyString, basestring) and proxyString != "":
1715  try:
1716  pattern = '(.*)://(.*):(.*)@(.*):(.*)'
1717  match = re.search(pattern, proxyString, re.I + re.U)
1718  if match is not None:
1719  proxy_type, proxy_user, proxy_passwd, proxy_host, proxy_port = match.groups()
1720 
1721  else:
1722  pattern = '(.*)://(.*):(.*)'
1723  match = re.search(pattern, proxyString, re.I + re.U)
1724  if match is not None:
1725  proxy_type, proxy_host, proxy_port = match.groups()
1726  else:
1727  pattern = '(.*):(.*)'
1728  match = re.search(pattern, proxyString, re.I + re.U)
1729  if match is not None:
1730  proxy_host, proxy_port = match.groups()
1731  proxy_type = defaultProxyType
1732 
1733  ret = (proxy_type, proxy_host, proxy_port, proxy_user, proxy_passwd)
1734  except Exception, err:
1735  if log is not None:
1736  log.error("Error: %s", str(err))
1737 
1738  return ret
1739 
1740 
1741 # # execute command line command
1742 #
1743 # @param cmd - command line string
1744 # @param inputStream - input stream to popen
1745 # @param log - logger instance
1746 # @return result named tuple with support names: 'stdout', 'stderr', 'exitCode'
def strToProxy(proxyString, log=None, defaultProxyType='http')
Definition: Utils.py:1710
Here is the caller graph for this function:

◆ strToUnicode()

def app.Utils.strToUnicode (   inputStr)

Definition at line 1379 of file Utils.py.

1379 def strToUnicode(inputStr):
1380  ret = inputStr
1381 
1382  if isinstance(inputStr, str):
1383  ret = inputStr.decode('utf-8')
1384 
1385  return ret
1386 
1387 
1388 # Split string removes duplicated peaces and joing back
1389 # @param inStr - input string
1390 # @param delimiter - splitter delimiter
1391 # @param joingGlue - optional glue string to joing with, if None or omitted - the delimiter used
1392 # @param trimMode - peaces trim mode: 0 - not trimmed, 1 - trimmed left, 2 - trimmed right, 3 - trimmed both
1393 # @return string with duplicated peaces removed
def strToUnicode(inputStr)
Definition: Utils.py:1379

◆ tracefunc()

def app.Utils.tracefunc (   frame,
  event,
  arg,
  indent = None 
)

Definition at line 273 of file Utils.py.

273 def tracefunc(frame, event, arg, indent=None): # pylint: disable=W0613
274  if indent is None:
275  indent = [0]
276 
277  if event == "call" or event == "return":
278  lock.acquire()
279 
280  try:
281  if event == "call":
282  indent[0] += 2
283  if tracebackIdent:
284  idents = tracebackIdentFiller * indent[0]
285  else:
286  idents = ""
287  message = tracebackMessageCall
288  tracebackTimeQueue.append(time.time())
289  te = ""
290  elif event == "return":
291  if tracebackIdent:
292  idents = tracebackIdentFiller * indent[0]
293  else:
294  idents = ""
295  indent[0] -= 2
296  message = tracebackMessageExit
297  te = "{:.6f}".format(time.time() - tracebackTimeQueue.pop())
298 
299  if tracebackTimeMark:
300  # t = time.strftime(tracebackTimeMarkFormat)
301  t = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
302  else:
303  t = ""
304 
305  if tracebackIncludeLineNumber:
306  ln = str(frame.f_lineno)
307  else:
308  ln = ""
309 
310  if tracebackIncludeFileNumber:
311  fn = str(frame.f_code.co_filename)
312  else:
313  fn = ""
314 
315  excludedP = False
316  for item in tracebackExcludeModulePath:
317  if item in frame.f_code.co_filename:
318  excludedP = True
319  break
320 
321  excludedF = False
322  for item in tracebackExcludeFunctionName:
323  if frame.f_code.co_name == item:
324  excludedF = True
325  break
326 
327  excludedF2 = False
328  for item in tracebackExcludeFunctionNameStarts:
329  if frame.f_code.co_name.startswith(item):
330  excludedF2 = True
331  break
332 
333  if tracebackIncludeLocals or tracebackIncludeArg:
334  oldRL = sys.getrecursionlimit()
335  if oldRL < tracebackRecursionlimit:
336  sys.setrecursionlimit(tracebackRecursionlimit)
337  else:
338  oldRL = None
339 
340  if tracebackIncludeLocals:
341  localsDump = ""
342  try:
343  # localsDump = varDump(frame.f_locals)
344  localsDump = str(frame.f_locals)
345  localsDump = tracebackIncludeLocalsPrefix + localsDump
346  except: # pylint:disable=W0702
347  localsDump = tracebackRecursionlimitErrorMsg + str(tracebackRecursionlimit)
348  # pass
349  else:
350  localsDump = ""
351 
352  if tracebackIncludeArg:
353  argDump = ""
354  try:
355  # argDump = varDump(arg)
356  argDump = str(arg)
357  argDump = tracebackIncludeArgPrefix + argDump
358  except: # pylint:disable=W0702
359  argDump = tracebackRecursionlimitErrorMsg + str(tracebackRecursionlimit)
360  # pass
361  else:
362  argDump = ""
363 
364  if oldRL is not None:
365  sys.setrecursionlimit(oldRL)
366  else:
367  localsDump = ""
368  argDump = ""
369 
370  if (not (tracebackIncludeInternalCalls is False and frame.f_code.co_name.startswith("__"))) and\
371  (not (tracebackIncludeExitCalls is False and event == "return")) and\
372  (not excludedP) and (not excludedF) and (not excludedF2):
373  tmsg = idents + message + tracebackmessageDelimiter + \
374  fn + tracebackIncludeFileNumberDelimiter + \
375  ln + tracebackIncludeLineNumberDelimiter + \
376  frame.f_code.co_name + "()" + tracebackFunctionNameDelimiter + \
377  tracebackElapsedTimeDelimiter + te + localsDump + argDump
378  if tracebackLogger is None:
379  tracebackList.append(t + tracebackTimeMarkDelimiter + tmsg)
380  else:
381  tracebackLogger.debug("%s", tmsg)
382  if len(tracebackTimeQueue) == 0:
383  tracebackLogger.debug("%s", APP_CONSTS.LOGGER_DELIMITER_LINE)
384 
385  except Exception as e:
386  if tracebackLogger is None:
387  tracebackList.append("Exception: " + str(e))
388  else:
389  tracebackLogger.error("%s", str(e))
390  except: # pylint: disable=W0702
391  if tracebackLogger is None:
392  tracebackList.append(tracebackUnknownExceptionMsg)
393  else:
394  tracebackLogger.error("%s", tracebackUnknownExceptionMsg)
395 
396  lock.release()
397 
398  return tracefunc
399 
400 
401 
402 # #The function to get a printable representation of an object for debugging
403 #
404 #
405 # @param obj The object to print
406 # @param stringifyType - 0 - json, 1 - str
407 # @ret return string dump
408 #
def tracefunc(frame, event, arg, indent=None)
Definition: Utils.py:273
Here is the call graph for this function:

◆ urinormpath()

def app.Utils.urinormpath (   path,
  stripWWW = False,
  useValidator = False,
  enableAdditionNormalize = True 
)

Definition at line 764 of file Utils.py.

764 def urinormpath(path, stripWWW=False, useValidator=False, enableAdditionNormalize=True): # pylint: disable=W0613
765  # Remove '.' and '..' path segments from a URI path.
766  # RFC 3986 5.2.4. Remove Dot Segments
767  ret = None
768  ret1 = None
769 
770  try:
771  if path is None or path == "":
772  ret1 = path
773  else:
774  out = []
775  for s in path.split('/'):
776  if s == '.':
777  continue
778  elif s != '..':
779  out.append(s)
780  elif out:
781  out.pop()
782  # Fix leading/trailing slashes
783  if path.startswith('/') and (not out or out[0]):
784  out.insert(0, '')
785  if path.endswith('/.') or path.endswith('/..'):
786  out.append('')
787  ret = '/'.join(out)
788 
789  if stripWWW:
790  splitPath = path.split("?")
791  if len(splitPath) > 0:
792  splitPath[0] = splitPath[0].replace("://www.", "://")
793  localPath = splitPath[0]
794  for elem in splitPath[1:]:
795  localPath += "?"
796  localPath += elem
797  else:
798  localPath = path
799 
800  if enableAdditionNormalize:
801  resultUrlDict = Url(localPath)
802  if useValidator and not Url.GetStats([resultUrlDict])[0]["valid"]:
803  raise Exception(path + " NOT VALIDATE!")
804  ret1 = Url.GetStats([resultUrlDict])[0]["canonicalized"]
805  else:
806  ret1 = localPath
807 
808  if ret is not None and ret1 is not None and ret != ret1:
809  logger.debug("--->>>> URLS DIFFERTNT <<<<---")
810  logger.debug(ret)
811  logger.debug(ret1)
812  except Exception as e:
813  logger.error("Normalization error: " + str(e) + "\npath: [" + path + "]\n" + str(getTracebackInfo()))
814 
815  return ret1
816 
817 
818 
819 # #Logger file name generator
820 #
821 #
def urinormpath(path, stripWWW=False, useValidator=False, enableAdditionNormalize=True)
Definition: Utils.py:764
Definition: join.py:1
def getTracebackInfo(linesNumberMax=None)
Definition: Utils.py:218
Here is the call graph for this function:
Here is the caller graph for this function:

◆ urlNormalization()

def app.Utils.urlNormalization (   base,
  url,
  supportProtocols = None,
  log = None 
)

Definition at line 561 of file Utils.py.

561 def urlNormalization(base, url, supportProtocols=None, log=None):
562  # variable for result
563  res = None
564 
565  # Internal function for prepare before normalization
566  def prepareNormalization(path):
567  out = []
568  pathStr = path
569  replaceSimbolDict = {'\a':'/a',
570  '\b':'/b',
571  '\f':'/f',
572  '\n':'/n',
573  '\r':'/r',
574  '\t':'/t',
575  '\v':'/v',
576  '\\':'\\\\'}
577 
578  replaceStartSimbolDict = {'://': ''}
579 
580  for src, dest in replaceStartSimbolDict.items():
581  if pathStr.startswith(src):
582  pathStr = pathStr.replace(src, dest)
583 
584  for src, dest in replaceSimbolDict.items():
585  pathStr = pathStr.replace(src, dest)
586 
587  for i in range(0, 32):
588  pathStr = pathStr.replace(str(chr(i)), str('/%o' % i))
589 
590  for s in pathStr.split("\\"):
591  out.append(s)
592 
593  out = [elem for elem in out if elem != '']
594 
595  return '/'.join(out)
596 
597 
598  if isinstance(url, basestring):
599  # validate
600 # if Url(url).isValid():
601 # if log is not None:
602 # log.debug("return as valid url: %s", str(url))
603 # res = url
604 # else:
605  # set default result
606  resUrl = prepareNormalization(url)
607  if isinstance(base, basestring):
608  # normalization url
609  baseUrl = prepareNormalization(base)
610 
611  if baseUrl != resUrl:
612  resUrl = urlparse.urljoin(baseUrl, resUrl)
613 
614  if url != resUrl and log is not None:
615  log.debug('==== Urls different ====')
616  log.debug("base: %s", str(baseUrl))
617  log.debug("url: %s", str(url))
618  log.debug("res: %s", str(resUrl))
619 
620  res = resUrl
621 
622  # check support protocols
623  if isinstance(supportProtocols, list):
624  if log is not None:
625  log.debug("supportProtocols: %s, res: %s", str(supportProtocols), str(res))
626  # extract protocol schema from url
627  if isinstance(res, basestring):
628  v = urlparse.urlsplit(res)
629  if v.scheme not in supportProtocols:
630  if log is not None:
631  log.debug("Not support protocol: %s", str(v.scheme))
632  res = None
633 
634  if log is not None:
635  log.debug("before normalization res: %s", str(res))
636 
637  # normalization
638  if res is not None:
639  localUrls = res.split()
640  resUrls = []
641  if log is not None:
642  log.debug("localUrls: %s", str(localUrls))
643 
644  for localUrl in localUrls:
645  if localUrl != "":
646  resUrls.append(url_normalize(localUrl))
647 
648  if log is not None:
649  log.debug("resUrls: %s", varDump(resUrls))
650  res = ','.join(resUrls)
651  if log is not None:
652  log.debug("res: %s", str(res))
653 
654  return res
655 
656 
657 # #class UrlNormalizator makes URL normalization
658 #
def url_normalize(url, charset='utf-8')
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
Definition: Utils.py:410
def urlNormalization(base, url, supportProtocols=None, log=None)
Definition: Utils.py:561
Definition: join.py:1
Here is the call graph for this function:
Here is the caller graph for this function:

◆ varDump()

def app.Utils.varDump (   obj,
  stringify = True,
  strTypeMaxLen = 256,
  strTypeCutSuffix = '...',
  stringifyType = 1,
  ignoreErrors = False,
  objectsHash = None,
  depth = 0,
  indent = 2,
  ensure_ascii = False,
  maxDepth = 10 
)

Definition at line 410 of file Utils.py.

410  objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10):
411  if objectsHash is None:
412  objectsHash = []
413  # print 'depth: ' + str(depth)
414  depth += 1
415  if depth < maxDepth:
416  newobj = obj
417  try:
418  if isinstance(obj, list):
419  newobj = []
420  for item in obj:
421  newobj.append(varDump(item, False, strTypeMaxLen, strTypeCutSuffix, stringifyType, ignoreErrors,
422  objectsHash, depth, indent, ensure_ascii, maxDepth))
423  elif isinstance(obj, tuple):
424  temp = []
425  for item in obj:
426  temp.append(varDump(item, False, strTypeMaxLen, strTypeCutSuffix, stringifyType, ignoreErrors,
427  objectsHash, depth, indent, ensure_ascii, maxDepth))
428  newobj = tuple(temp) # pylint: disable=R0204
429  elif isinstance(obj, set):
430  temp = []
431  for item in obj:
432  temp.append(str(varDump(item, False, strTypeMaxLen, strTypeCutSuffix, stringifyType, ignoreErrors,
433  objectsHash, depth, indent, ensure_ascii, maxDepth)))
434  newobj = set(temp)
435  elif isinstance(obj, dict):
436  newobj = {}
437  for key, value in obj.items():
438  newobj[str(varDump(key, False, strTypeMaxLen, strTypeCutSuffix))] = \
439  varDump(value, False, strTypeMaxLen, strTypeCutSuffix, stringifyType, ignoreErrors,
440  objectsHash, depth, indent, ensure_ascii, maxDepth)
441  # elif isinstance(obj, types.FunctionType):
442  # newobj = repr(obj)
443  elif '__dict__' in dir(obj):
444  newobj = {}
445  for k in obj.__dict__.keys():
446  # print 'k:' + str(k)
447  # print 'v:' + str(obj.__dict__[k])
448  if isinstance(obj.__dict__[k], basestring):
449  newobj[k] = obj.__dict__[k]
450  if strTypeMaxLen > 0 and len(newobj[k]) > strTypeMaxLen:
451  newobj[k] = newobj[k][:strTypeMaxLen] + strTypeCutSuffix
452  else:
453  if '__dict__' in dir(obj.__dict__[k]):
454  sobj = str(obj.__dict__[k])
455  if sobj in objectsHash:
456  newobj[k] = 'OBJECT RECURSION: ' + sobj
457  else:
458  objectsHash.append(sobj)
459  newobj[k] = varDump(obj.__dict__[k], False, strTypeMaxLen, strTypeCutSuffix, stringifyType,
460  ignoreErrors, objectsHash, depth, indent, ensure_ascii, maxDepth)
461  else:
462  newobj[k] = varDump(obj.__dict__[k], False, strTypeMaxLen, strTypeCutSuffix, stringifyType,
463  ignoreErrors, objectsHash, depth, indent, ensure_ascii, maxDepth)
464  sobj = str(obj)
465  if ' object at ' in sobj and '__type__' not in newobj:
466  newobj['__type__'] = sobj.replace(" object at ", " #").replace("__main__.", "")
467  else:
468  if stringifyType == 0:
469  try:
470  s = json.dumps(newobj, indent=indent, ensure_ascii=ensure_ascii)
471  del s
472  except Exception as err:
473  newobj = str(newobj)
474  except Exception as err:
475  if ignoreErrors:
476  newobj = ''
477  else:
478  newobj = 'General error: ' + str(err) + "\n" + getTracebackInfo()
479  else:
480  newobj = 'MAX OBJECTS EMBED DEPTH ' + str(maxDepth) + ' REACHED!'
481 
482  if stringify:
483  if stringifyType == 0:
484  try:
485  newobj = json.dumps(newobj, indent=indent, ensure_ascii=ensure_ascii)
486  except Exception as err:
487  if ignoreErrors:
488  newobj = ''
489  else:
490  newobj = 'To json error: ' + str(err)
491  else:
492  newobj = str(newobj)
493 
494  return newobj
495 
496 
497 # pylint: disable=W0702
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
Definition: Utils.py:410
def getTracebackInfo(linesNumberMax=None)
Definition: Utils.py:218
Here is the call graph for this function:

Variable Documentation

◆ lock

app.Utils.lock = threading.Lock()

Definition at line 51 of file Utils.py.

◆ logger

app.Utils.logger = logging.getLogger(APP_CONSTS.LOGGER_NAME)

Definition at line 49 of file Utils.py.

◆ META_REDIRECT

string app.Utils.META_REDIRECT = r"http-equiv\W*refresh.+?url\W+?(.+?)\""

Definition at line 58 of file Utils.py.

◆ SEARCH_COMMENT_PATTERN

string app.Utils.SEARCH_COMMENT_PATTERN = r"<![ \r\n\t]*(--([^\-]|[\r\n]|-[^\-])*--[ \r\n\t]*)>"

Definition at line 61 of file Utils.py.

◆ SEARCH_COMMENT_SIMPLE_PATTERN

string app.Utils.SEARCH_COMMENT_SIMPLE_PATTERN = r"<!--(.|\n)*?-->"

Definition at line 60 of file Utils.py.

◆ SEARCH_NOSCRIPT_PATTERN

string app.Utils.SEARCH_NOSCRIPT_PATTERN = r"<noscript>(.|\n)*?</noscript>"

Definition at line 62 of file Utils.py.

◆ tracebackElapsedTimeDelimiter

string app.Utils.tracebackElapsedTimeDelimiter = ""

Definition at line 264 of file Utils.py.

◆ tracebackElapsedTimeFormat

string app.Utils.tracebackElapsedTimeFormat = "{:.6f}"

Definition at line 265 of file Utils.py.

◆ tracebackExcludeFunctionName

list app.Utils.tracebackExcludeFunctionName = ["varDump"]

Definition at line 254 of file Utils.py.

◆ tracebackExcludeFunctionNameStarts

list app.Utils.tracebackExcludeFunctionNameStarts = ["<"]

Definition at line 255 of file Utils.py.

◆ tracebackExcludeModulePath

list app.Utils.tracebackExcludeModulePath = ["/usr/lib/", "/usr/local/lib/"]

Definition at line 253 of file Utils.py.

◆ tracebackFunctionNameDelimiter

string app.Utils.tracebackFunctionNameDelimiter = ":"

Definition at line 252 of file Utils.py.

◆ tracebackIdent

bool app.Utils.tracebackIdent = False

Definition at line 239 of file Utils.py.

◆ tracebackIdentFiller

string app.Utils.tracebackIdentFiller = "-"

Definition at line 240 of file Utils.py.

◆ tracebackIncludeArg

bool app.Utils.tracebackIncludeArg = False

Definition at line 260 of file Utils.py.

◆ tracebackIncludeArgPrefix

string app.Utils.tracebackIncludeArgPrefix = "\nARG:\n"

Definition at line 262 of file Utils.py.

◆ tracebackIncludeExitCalls

bool app.Utils.tracebackIncludeExitCalls = True

Definition at line 256 of file Utils.py.

◆ tracebackIncludeFileNumber

bool app.Utils.tracebackIncludeFileNumber = True

Definition at line 250 of file Utils.py.

◆ tracebackIncludeFileNumberDelimiter

string app.Utils.tracebackIncludeFileNumberDelimiter = ":"

Definition at line 251 of file Utils.py.

◆ tracebackIncludeInternalCalls

bool app.Utils.tracebackIncludeInternalCalls = False

Definition at line 247 of file Utils.py.

◆ tracebackIncludeLineNumber

bool app.Utils.tracebackIncludeLineNumber = True

Definition at line 248 of file Utils.py.

◆ tracebackIncludeLineNumberDelimiter

string app.Utils.tracebackIncludeLineNumberDelimiter = ":"

Definition at line 249 of file Utils.py.

◆ tracebackIncludeLocals

bool app.Utils.tracebackIncludeLocals = False

Definition at line 259 of file Utils.py.

◆ tracebackIncludeLocalsPrefix

string app.Utils.tracebackIncludeLocalsPrefix = "\nLOCALS:\n"

Definition at line 261 of file Utils.py.

◆ tracebackList

list app.Utils.tracebackList = []

Definition at line 237 of file Utils.py.

◆ tracebackLogger

app.Utils.tracebackLogger = None

Definition at line 263 of file Utils.py.

◆ tracebackMessageCall

string app.Utils.tracebackMessageCall = "call"

Definition at line 241 of file Utils.py.

◆ tracebackmessageDelimiter

string app.Utils.tracebackmessageDelimiter = ":"

Definition at line 243 of file Utils.py.

◆ tracebackMessageExit

string app.Utils.tracebackMessageExit = "exit"

Definition at line 242 of file Utils.py.

◆ tracebackRecursionlimit

int app.Utils.tracebackRecursionlimit = 0

Definition at line 257 of file Utils.py.

◆ tracebackRecursionlimitErrorMsg

string app.Utils.tracebackRecursionlimitErrorMsg = "RECURSION STACK LIMIT REACHED "

Definition at line 258 of file Utils.py.

◆ tracebackTimeMark

bool app.Utils.tracebackTimeMark = True

Definition at line 244 of file Utils.py.

◆ tracebackTimeMarkDelimiter

string app.Utils.tracebackTimeMarkDelimiter = " "

Definition at line 246 of file Utils.py.

◆ tracebackTimeMarkFormat

string app.Utils.tracebackTimeMarkFormat = "%Y-%m-%d %H:%M:%S.%f"

Definition at line 245 of file Utils.py.

◆ tracebackTimeQueue

list app.Utils.tracebackTimeQueue = []

Definition at line 238 of file Utils.py.

◆ tracebackUnknownExceptionMsg

string app.Utils.tracebackUnknownExceptionMsg = "Unknown exception!"

Definition at line 266 of file Utils.py.