HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
Utils.py File Reference

Go to the source code of this file.

Classes

class  app.Utils.PropertiesValidator
 
class  app.Utils.JsonSerializable
 
class  app.Utils.SQLExpression
 
class  app.Utils.PathMaker
 
class  app.Utils.ConfigParamsList
 
class  app.Utils.UrlParser
 
class  app.Utils.UrlNormalizator
 
class  app.Utils.LoggerFileName
 
class  app.Utils.DataReplacementConstants
 
class  app.Utils.MLStripper
 
class  app.Utils.MPLogger
 
class  app.Utils.ExceptionLog
 
class  app.Utils.InterruptableThread
 

Namespaces

 app.Utils
 

Functions

def app.Utils.getPath (dictionary, jsonString, path)
 
def app.Utils.getConfigParameter (parser, section, option, defValue)
 
def app.Utils.getTracebackInfo (linesNumberMax=None)
 
def app.Utils.tracefunc (frame, event, arg, indent=None)
 
def app.Utils.varDump (obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
 
def app.Utils.memUsage (point="")
 
def app.Utils.urlNormalization (base, url, supportProtocols=None, log=None)
 
def app.Utils.storePickleOnDisk (input_pickled_object, env_path, file_name)
 
def app.Utils.urinormpath (path, stripWWW=False, useValidator=False, enableAdditionNormalize=True)
 
def app.Utils.loggerFlush (loggerObj)
 
def app.Utils.accumulateSubstrings (substrList, prefixes)
 
def app.Utils.generateReplacementDict ()
 
def app.Utils.parseHost (url)
 
def app.Utils.convertToHttpDateFmt (date_str)
 
def app.Utils.autoFillSiteId (siteId, log)
 
def app.Utils.stripHTMLComments (htmlBuf=None, soup=None, hType=3)
 
def app.Utils.cutSubstringEntrances (buf, startStr='<!--', finishStr='-->', behaveMask=0, greediness=0, finishDefault='\n')
 
def app.Utils.eraseNoScript (htmlBuf=None)
 
def app.Utils.stripHTMLTags (htmlTxt, method=0, joinGlue=' ', regExp=None)
 
def app.Utils.innerHTMLText (htmlBuf, stripComment=True, stripScript=True)
 
def app.Utils.innerText (selectorList, delimiter=' ', innerDelimiter=' ', tagReplacers=None, REconditions=None, attrConditions=None, keepAttributes=None, baseUrl=None, closeVoid=None, excludeNodes=None)
 
def app.Utils.innerTextToList (selectorList, delimiter=' ', innerDelimiter=' ', tagReplacers=None, REconditions=None, attrConditions=None, keepAttributes=None, baseUrl=None, closeVoid=None, excludeNodes=None)
 
def app.Utils.getFirstNotEmptySubXPath (xpath, sel, subXPathPattern, subXPathes)
 
def app.Utils.getPairsDicts (incomeDict, splitters=')
 
def app.Utils.splitPairs (buf, splitters=')
 
def app.Utils.isTailSubstr (str1, str2)
 
def app.Utils.replaceLoopValue (buf, replaceFrom, replaceTo)
 
def app.Utils.getHTMLRedirectUrl (buff, log)
 
def app.Utils.emailParse (href, onlyName=False, defaultSeparator=' ')
 
def app.Utils.strToUnicode (inputStr)
 
def app.Utils.removeDuplicated (inStr, delimiter="\, joingGlue=None, trimMode=1, skipEmpty=False)
 
def app.Utils.getContentCSSMarkupEntrancesNumber (content)
 
def app.Utils.executeWithTimeout (func, args=None, kwargs=None, timeout=1, default=None, log=None)
 
def app.Utils.loadFromFileByReference (fileReference, initString=None, protocolPrefix='file://', loggerObj=None)
 
def app.Utils.readFile (inFile, decodeUTF8=True)
 
def app.Utils.escape (string)
 
def app.Utils.isValidURL (url)
 
def app.Utils.getHash (strBuf, binSize=32, digestType=0, fixedMode=0, valLimit=18446744073709552000L)
 
def app.Utils.strToFloat (val, defaultValue=0.0, log=None, positivePrefixes=None)
 
def app.Utils.strToProxy (proxyString, log=None, defaultProxyType='http')
 
def app.Utils.executeCommand (cmd, inputStream='', log=None)
 
def app.Utils.jsonLoadsSafe (jsonString, default=None, log=None)
 
def app.Utils.reMatch (word, buff, log=None)
 

Variables

 app.Utils.logger = logging.getLogger(APP_CONSTS.LOGGER_NAME)
 
 app.Utils.lock = threading.Lock()
 
string app.Utils.META_REDIRECT = r"http-equiv\W*refresh.+?url\W+?(.+?)\""
 
string app.Utils.SEARCH_COMMENT_SIMPLE_PATTERN = r"<!--(.|\n)*?-->"
 
string app.Utils.SEARCH_COMMENT_PATTERN = r"<![ \r\n\t]*(--([^\-]|[\r\n]|-[^\-])*--[ \r\n\t]*)>"
 
string app.Utils.SEARCH_NOSCRIPT_PATTERN = r"<noscript>(.|\n)*?</noscript>"
 
list app.Utils.tracebackList = []
 
list app.Utils.tracebackTimeQueue = []
 
bool app.Utils.tracebackIdent = False
 
string app.Utils.tracebackIdentFiller = "-"
 
string app.Utils.tracebackMessageCall = "call"
 
string app.Utils.tracebackMessageExit = "exit"
 
string app.Utils.tracebackmessageDelimiter = ":"
 
bool app.Utils.tracebackTimeMark = True
 
string app.Utils.tracebackTimeMarkFormat = "%Y-%m-%d %H:%M:%S.%f"
 
string app.Utils.tracebackTimeMarkDelimiter = " "
 
bool app.Utils.tracebackIncludeInternalCalls = False
 
bool app.Utils.tracebackIncludeLineNumber = True
 
string app.Utils.tracebackIncludeLineNumberDelimiter = ":"
 
bool app.Utils.tracebackIncludeFileNumber = True
 
string app.Utils.tracebackIncludeFileNumberDelimiter = ":"
 
string app.Utils.tracebackFunctionNameDelimiter = ":"
 
list app.Utils.tracebackExcludeModulePath = ["/usr/lib/", "/usr/local/lib/"]
 
list app.Utils.tracebackExcludeFunctionName = ["varDump"]
 
list app.Utils.tracebackExcludeFunctionNameStarts = ["<"]
 
bool app.Utils.tracebackIncludeExitCalls = True
 
int app.Utils.tracebackRecursionlimit = 0
 
string app.Utils.tracebackRecursionlimitErrorMsg = "RECURSION STACK LIMIT REACHED "
 
bool app.Utils.tracebackIncludeLocals = False
 
bool app.Utils.tracebackIncludeArg = False
 
string app.Utils.tracebackIncludeLocalsPrefix = "\nLOCALS:\n"
 
string app.Utils.tracebackIncludeArgPrefix = "\nARG:\n"
 
 app.Utils.tracebackLogger = None
 
string app.Utils.tracebackElapsedTimeDelimiter = ""
 
string app.Utils.tracebackElapsedTimeFormat = "{:.6f}"
 
string app.Utils.tracebackUnknownExceptionMsg = "Unknown exception!"