HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.
2.0.0-chaika
Hierarchical Cluster Engine Python language binding
|
Public Member Functions | |
def | __init__ (self, tagReplacers=None, delimiter=' ', innerDelimiter=' ', REconditions=None, attrConditions=None, keepAttributes=None, baseUrl=None, closeVoid=None, excludeNodes=None) |
def | nodeCallbackOpenHandler (self, nodeElem, level) |
def | nodeCallbackCloseHandler (self, nodeElem, level) |
def | textCallbackHandler (self, nodeElem, level, excludeTags) |
def | innerText (self, contentBuf, xPath, tagRemoves=None) |
def | innerTextToList (self, contentBuf, xPath, tagRemoves=None) |
def | extractAttributes (self, nodeElem, tagName, keepAttributes, baseUrl) |
def | applyCloseVoid (self, nodeElem, tagName) |
Static Public Member Functions | |
def | checkElemAttributes (attrConditions, elem) |
def | traversalNodes (elemList, level=0, nodeCallbackOpen=None, nodeCallbackClose=None, textCallback=None, excludeTags=None, attrConditions=None, excludeNodes=None) |
def | isExcludeNode (excludeNodes, elem) |
check is exlude node More... | |
Public Attributes | |
stripHtml | |
stripHtmlList | |
errorString | |
delimiter | |
innerDelimiter | |
REconditions | |
attrConditions | |
tagReplacers | |
keepAttributes | |
baseUrl | |
closeVoid | |
excludeNodes | |
Static Public Attributes | |
list | NONE_CLOSED_HTML_TAGS |
list | CANONIZATION_TAGS = ['href', 'src'] |
string | MACRO_ATTRIBUTES = '%ATTRIBUTES%' |
string | PATTERN_CLOSE_VOID = r"<%s.*?(/)>" |
int | CLOSE_VOID_NOT_CLOSE = 0 |
int | CLOSE_VOID_CLOSE = 1 |
int | CLOSE_VOID_AUTO = 2 |
values = elem.xpath('@' + attrName).extract() | |
bool | found = False |
bool | ret = True |
Definition at line 20 of file ExtendInnerText.py.
def app.ExtendInnerText.ExtendInnerText.__init__ | ( | self, | |
tagReplacers = None , |
|||
delimiter = ' ' , |
|||
innerDelimiter = ' ' , |
|||
REconditions = None , |
|||
attrConditions = None , |
|||
keepAttributes = None , |
|||
baseUrl = None , |
|||
closeVoid = None , |
|||
excludeNodes = None |
|||
) |
Definition at line 36 of file ExtendInnerText.py.
def app.ExtendInnerText.ExtendInnerText.applyCloseVoid | ( | self, | |
nodeElem, | |||
tagName | |||
) |
|
static |
def app.ExtendInnerText.ExtendInnerText.extractAttributes | ( | self, | |
nodeElem, | |||
tagName, | |||
keepAttributes, | |||
baseUrl | |||
) |
Definition at line 236 of file ExtendInnerText.py.
def app.ExtendInnerText.ExtendInnerText.innerText | ( | self, | |
contentBuf, | |||
xPath, | |||
tagRemoves = None |
|||
) |
Definition at line 109 of file ExtendInnerText.py.
def app.ExtendInnerText.ExtendInnerText.innerTextToList | ( | self, | |
contentBuf, | |||
xPath, | |||
tagRemoves = None |
|||
) |
Definition at line 198 of file ExtendInnerText.py.
|
static |
check is exlude node
excludeNodes | - dictionary with criterion for exclude |
elem | - element for check |
Definition at line 301 of file ExtendInnerText.py.
def app.ExtendInnerText.ExtendInnerText.nodeCallbackCloseHandler | ( | self, | |
nodeElem, | |||
level | |||
) |
Definition at line 78 of file ExtendInnerText.py.
def app.ExtendInnerText.ExtendInnerText.nodeCallbackOpenHandler | ( | self, | |
nodeElem, | |||
level | |||
) |
Definition at line 51 of file ExtendInnerText.py.
def app.ExtendInnerText.ExtendInnerText.textCallbackHandler | ( | self, | |
nodeElem, | |||
level, | |||
excludeTags | |||
) |
|
static |
Definition at line 168 of file ExtendInnerText.py.
app.ExtendInnerText.ExtendInnerText.attrConditions |
Definition at line 43 of file ExtendInnerText.py.
app.ExtendInnerText.ExtendInnerText.baseUrl |
Definition at line 46 of file ExtendInnerText.py.
|
static |
Definition at line 25 of file ExtendInnerText.py.
|
static |
Definition at line 33 of file ExtendInnerText.py.
|
static |
Definition at line 32 of file ExtendInnerText.py.
|
static |
Definition at line 31 of file ExtendInnerText.py.
app.ExtendInnerText.ExtendInnerText.closeVoid |
Definition at line 47 of file ExtendInnerText.py.
app.ExtendInnerText.ExtendInnerText.delimiter |
Definition at line 40 of file ExtendInnerText.py.
app.ExtendInnerText.ExtendInnerText.errorString |
Definition at line 39 of file ExtendInnerText.py.
app.ExtendInnerText.ExtendInnerText.excludeNodes |
Definition at line 48 of file ExtendInnerText.py.
|
static |
Definition at line 330 of file ExtendInnerText.py.
app.ExtendInnerText.ExtendInnerText.innerDelimiter |
Definition at line 41 of file ExtendInnerText.py.
app.ExtendInnerText.ExtendInnerText.keepAttributes |
Definition at line 45 of file ExtendInnerText.py.
|
static |
Definition at line 27 of file ExtendInnerText.py.
|
static |
Definition at line 22 of file ExtendInnerText.py.
|
static |
Definition at line 29 of file ExtendInnerText.py.
app.ExtendInnerText.ExtendInnerText.REconditions |
Definition at line 42 of file ExtendInnerText.py.
|
static |
Definition at line 338 of file ExtendInnerText.py.
app.ExtendInnerText.ExtendInnerText.stripHtml |
Definition at line 37 of file ExtendInnerText.py.
app.ExtendInnerText.ExtendInnerText.stripHtmlList |
Definition at line 38 of file ExtendInnerText.py.
app.ExtendInnerText.ExtendInnerText.tagReplacers |
Definition at line 44 of file ExtendInnerText.py.
|
static |
Definition at line 327 of file ExtendInnerText.py.