HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
ftest_dc_full_batch_processing.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 
3 
4 """
5 HCE project, Python bindings, Distributed Tasks Manager application.
6 Event objects definitions.
7 
8 @package: dc
9 @file ftest_dc_full_batch_processing.py
10 @author Oleksii <developers.hce@gmail.com>
11 @link: http://hierarchical-cluster-engine.com/
12 @copyright: Copyright &copy; 2013-2014 IOIX Ukraine
13 @license: http://hierarchical-cluster-engine.com/license/
14 @since: 0.1
15 """
16 
17 
18 import md5
19 #import pickle
20 try:
21  import cPickle as pickle
22 except ImportError:
23  import pickle
24 from collections import namedtuple
25 from subprocess import Popen
26 from subprocess import PIPE
27 from dc.EventObjects import Batch
28 from dc.EventObjects import BatchItem
29 from dtm.EventObjects import GeneralResponse
30 
31 
32 siteId1 = str(md5.new("http://www.yomiuri.co.jp").hexdigest())
33 urlId1 = str(md5.new("http://www.yomiuri.co.jp/sports/mlb/20140407-OYT1T50015.html?from=ytop_top").hexdigest())
34 bItem1 = BatchItem(siteId1, urlId1)
35 
36 siteId2 = str(md5.new("http://localhost/www.yomiuri.co.jp").hexdigest())
37 urlId2 = str(md5.new("http://localhost/www.yomiuri.co.jp/template1.html").hexdigest())
38 bItem2 = BatchItem(siteId2, urlId2)
39 
40 siteId3 = ""
41 urlId3 = str(md5.new("http://localhost/www.yomiuri.co.jp/template1.html").hexdigest())
42 bItem3 = BatchItem(siteId3, urlId3)
43 
44 
45 url_list = [
46  #bItem1,
47  bItem2,
48  bItem3
49  #"http://192.168.1.61/article1.html"
50  #"http://www.yomiuri.co.jp/sports/mlb/20140407-OYT1T50015.html?from=ytop_top"
51  ]
52 
53 
54 PWD="cd ../../bin"
55 PYTHON_BINARY="/usr/bin/python"
56 CRAWLER_TASK_BINARY="./crawler-task.py"
57 CRAWLER_TASK_CFG="--config=../ini/crawler-task.ini"
58 PROCESSOR_TASK_BINARY="./processor-task.py"
59 PROCESSOR_TASK_CFG="--config=../ini/processor-task.ini"
60 
61 
62 Results = namedtuple("Results", "exit_code, output, err")
63 
64 
65 def processFullBatch(input_object):
66  input_pickled_object = pickle.dumps(input_object)
67  #process = Popen([PYTHON_BINARY, CRAWLER_TASK_BINARY, CRAWLER_TASK_CFG, " | ", PYTHON_BINARY, PROCESSOR_TASK_BINARY, PROCESSOR_TASK_CFG], stdout=PIPE, stdin=PIPE, shell=True)
68  process = Popen(PWD+" && "+PYTHON_BINARY+" "+CRAWLER_TASK_BINARY+" "+CRAWLER_TASK_CFG+" | "+PYTHON_BINARY+" "+PROCESSOR_TASK_BINARY+" "+PROCESSOR_TASK_CFG, stdout=PIPE, stdin=PIPE, shell=True)
69  (output, err) = process.communicate(input=input_pickled_object)
70  #print output
71  exit_code = process.wait()
72  return Results(exit_code, output, err)
73 
74 
75 if __name__ == "__main__":
76  #create batch object from list of urls
77  #url_list = [str(md5.new(url).hexdigest()) for url in url_list]
78  input_object = Batch(1,url_list)
79  #TODO main work
80  result = processFullBatch(input_object)
81  #get response object
82  response = pickle.loads(result.output)
83  #check if all OK
84  #assert generalResponse.errorCode == GeneralResponse.ERROR_OK