HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
scraper_json_viewer.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 
3 
4 """
5 HCE project, Python bindings, Distributed Tasks Manager application.
6 Event objects definitions.
7 
8 @package: dc
9 @file scraper_json_viewer.py
10 @author Oleksii <developers.hce@gmail.com>
11 @link: http://hierarchical-cluster-engine.com/
12 @copyright: Copyright &copy; 2013-2014 IOIX Ukraine
13 @license: http://hierarchical-cluster-engine.com/license/
14 @since: 0.1
15 """
16 
17 import ppath
18 from ppath import sys
19 
20 import pickle
21 import sqlite3 as lite
22 from dc_processor.scraper_utils import decode
23 
24 
25 config_db_dir = "../data/dc_dbdata"
26 
28  json = None
29  # read pickled batch object from stdin and unpickle it
30  input_pickled_object = sys.stdin.read()
31  # print input_pickled_object
32  input_data = (pickle.loads(input_pickled_object)).items[0]
33  # print("Batch item: siteId: %s, urlId: %s" %(input_data.siteId, input_data.urlId))
34  if len(input_data.siteId):
35  db_name = config_db_dir + "/" + input_data.siteId + ".db"
36  else:
37  db_name = config_db_dir + "/0.db"
38  con = lite.connect(db_name)
39  with con:
40  cur = con.cursor()
41  query = "SELECT `data` FROM `articles` WHERE `id`='%s' order by `CDate` DESC LIMIT 1" % (input_data.urlId)
42  cur.execute(query)
43  json = cur.fetchone()
44  print decode(json[0])
45 
46 
47 if __name__ == "__main__":
48  processBatch()