HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
ftest_scrapy_xpath_parent_node.py
Go to the documentation of this file.
1 #coding: utf-8
2 '''
3 HCE project, Python bindings, DC dependencies
4 The scrapy xpath nodes walking research tests.
5 
6 @package: DC
7 @author bgv bgv.hce@gmail.com
8 @link: http://hierarchical-cluster-engine.com/
9 @copyright: Copyright © 2015 IOIX Ukraine
10 @license: http://hierarchical-cluster-engine.com/license/
11 @since: 0.1
12 
13 http://effbot.org/zone/element-index.htm
14 http://doc.scrapy.org/en/latest/topics/selectors.html#working-with-relative-xpaths
15 http://doc.scrapy.org/en/0.7/topics/selectors.html
16 '''
17 
18 
19 import sys
20 reload(sys)
21 sys.setdefaultencoding('utf8')
22 
23 from scrapy.selector import Selector
24 r = "\
25 <div class='content'>\
26  <ul>\
27  <li>Item 1a</li>\
28  <li>Item 2a</li>\
29  <li>Item 3a</li>\
30  </ul>\
31  <ul>\
32  <li>Item 1b</li>\
33  <li>Item 2b</li>\
34  <li>Item 3b</li>\
35  </ul>\
36  <ul>\
37  <li>Item 1c</li>\
38  <li>Item 2c</li>\
39  <li>Item 3c</li>\
40  </ul>\
41 </div>"
42 sel = Selector(text=r)
43 print "-->" + str(sel._root) + "<--"
44 #c = sel.xpath('//div[@class="content"]/ul/li')
45 c = sel.xpath('//li')
46 print str(c)
47 print "-->" + str(c[0]._root) + "<--"
48 print "-->>" + str(dir(c[0])) + "<<--"
49 
50 d = c[0].xpath('../../*')
51 print str(d)
52 print "-->" + str(d[0]._root) + "<--"
53 print "-->>" + str(dir(d[0]._root)) + "<<--"
54 print "prefix -->>" + str(d[0]._root.prefix) + "<<--"
55 print "tag -->>" + str(d[0]._root.tag) + "<<--"
56 print "text -->>" + str(d[0]._root.text) + "<<--"
57 
58 d = c[0]._root.getparent()
59 print str(d)
60 print "-->>" + str(dir(d)) + "<<--"
61 print "prefix -->>" + str(d.prefix) + "<<--"
62 print "tag -->>" + str(d.tag) + "<<--"
63 print "text -->>" + str(d.text) + "<<--"
64 
65 
66 def get_path(etreeElement, path=None):
67  if path is None:
68  rpath = []
69  else:
70  rpath = path
71 
72  p = etreeElement.getparent()
73  if p is not None:
74  index = p.index(etreeElement) + 1
75  rpath.insert(0, (etreeElement.tag, str(index)))
76  return get_path(p, rpath)
77  else:
78  rpath.insert(0, (etreeElement.tag, 0))
79  return rpath
80 
81 for ci in c:
82  print get_path(ci._root)
83 
84