HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
ftest_feedparser.py
Go to the documentation of this file.
1 #coding: utf-8
2 '''
3 HCE project, Python bindings, DC dependencies
4 The feedparser research tests.
5 
6 @package: drce
7 @author bgv bgv.hce@gmail.com
8 @link: http://hierarchical-cluster-engine.com/
9 @copyright: Copyright © 2015 IOIX Ukraine
10 @license: http://hierarchical-cluster-engine.com/license/
11 @since: 0.1
12 '''
13 
14 
15 import feedparser
16 import requests
17 
18 def _parse_date_fixes(aDateString):
19  ret = None
20  ds = aDateString
21 
22  # Assumes that date format broken and contains the semicolon ":" in TZ like: "Wed, 19 Aug 2015 08:45:53 +01:00"
23  parts = ds.split(' ')
24  if ("+" in parts[len(parts) - 1] or "-" in parts[len(parts) - 1]) and ":" in parts[len(parts) - 1]:
25  parts[len(parts) - 1] = parts[len(parts) - 1].replace(":", "")
26  ds = " ".join(parts)
27  #ret = feedparser._parse_date_rfc822(ds)
28  ret = feedparser._parse_date(ds)
29 
30  return ret
31 
32 
33 feedparser.registerDateHandler(_parse_date_fixes)
34 
35 
36 #a = "Mon, 17 Aug 2015 17:29:47 +0000"
37 a = "Wed, 19 Aug 2015 08:45:53 +01:00"
38 #print feedparser._parse_date(a)
39 
40 print feedparser._FeedParserMixin._start_pubdate
41 
42 #url = 'http://www.dailyfinance.com/rss.xml'
43 #url = 'http://washingtonmonthly.com/ten-miles-square/atom.xml'
44 url = 'http://www.politico.com/rss/politicopicks.xml'
45 r = requests.get(url)
46 d = feedparser.parse(r.content)
47 print dict(d)
48 print "\nentries=" + str(len(d.entries))
49 
50 for e in d.entries:
51  if hasattr(e, 'link'):
52  print e.link
53  else:
54  print 'item ' + str(e) + ' has no link field'
55 
def _parse_date_fixes(aDateString)
Definition: join.py:1