HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
ftest_lxml_xpath_parent_node.py
Go to the documentation of this file.
1 #coding: utf-8
2 '''
3 HCE project, Python bindings, DC dependencies
4 The scrapy xpath nodes walking research tests.
5 
6 @package: DC
7 @author bgv bgv.hce@gmail.com
8 @link: http://hierarchical-cluster-engine.com/
9 @copyright: Copyright © 2015 IOIX Ukraine
10 @license: http://hierarchical-cluster-engine.com/license/
11 @since: 0.1
12 
13 http://effbot.org/zone/element-index.htm
14 http://doc.scrapy.org/en/latest/topics/selectors.html#working-with-relative-xpaths
15 http://doc.scrapy.org/en/0.7/topics/selectors.html
16 http://lxml.de/api/lxml.etree._Element-class.html
17 http://lxml.de/extensions.html
18 
19 '''
20 
21 
22 import sys
23 reload(sys)
24 sys.setdefaultencoding('utf8')
25 
26 
27 '''
28 import feedparser
29 f = feedparser.parse("http://www.spiegel.de/schlagzeilen/tops/index.rss")
30 print str(f)
31 
32 import lxml
33 parser = lxml.etree.HTMLParser(encoding='utf-8')
34 ret = lxml.html.fromstring(rendered_unicode_content.encode("utf-8"), parser=parser)
35 '''
36 
37 
38 from lxml import etree
39 
40 r = "\
41 <div class='content'>\
42  <ul>\
43  <li>Item 1</li>\
44  <li>Item 2</li>\
45  <li>Item 3</li>\
46  </ul>\
47 </div>"
48 root = etree.XML(r)
49 nodes = root.xpath('//li')
50 print str(nodes)
51 print dir(nodes[0])
52 print nodes[0].text
53 print nodes[0].tag
54 p = nodes[0].getparent()
55 print p.text
56 print p.tag
57 
58 text = root.xpath('string(//ul)')
59 print str(text)
60