3 HCE project, Python bindings, DC dependencies 4 The scrapy xpath nodes walking research tests. 7 @author bgv bgv.hce@gmail.com 8 @link: http://hierarchical-cluster-engine.com/ 9 @copyright: Copyright © 2015 IOIX Ukraine 10 @license: http://hierarchical-cluster-engine.com/license/ 13 http://effbot.org/zone/element-index.htm 14 http://doc.scrapy.org/en/latest/topics/selectors.html#working-with-relative-xpaths 15 http://doc.scrapy.org/en/0.7/topics/selectors.html 21 sys.setdefaultencoding(
'utf8')
23 from scrapy.selector
import Selector
25 <div class='content'>\ 42 sel = Selector(text=r)
43 print "-->" + str(sel._root) +
"<--" 47 print "-->" + str(c[0]._root) +
"<--" 48 print "-->>" + str(dir(c[0])) +
"<<--" 50 d = c[0].xpath(
'../../*')
52 print "-->" + str(d[0]._root) +
"<--" 53 print "-->>" + str(dir(d[0]._root)) +
"<<--" 54 print "prefix -->>" + str(d[0]._root.prefix) +
"<<--" 55 print "tag -->>" + str(d[0]._root.tag) +
"<<--" 56 print "text -->>" + str(d[0]._root.text) +
"<<--" 58 d = c[0]._root.getparent()
60 print "-->>" + str(dir(d)) +
"<<--" 61 print "prefix -->>" + str(d.prefix) +
"<<--" 62 print "tag -->>" + str(d.tag) +
"<<--" 63 print "text -->>" + str(d.text) +
"<<--" 72 p = etreeElement.getparent()
74 index = p.index(etreeElement) + 1
75 rpath.insert(0, (etreeElement.tag, str(index)))
78 rpath.insert(0, (etreeElement.tag, 0))
def get_path(etreeElement, path=None)