HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
ftest_dc_CrawlerTask_batch_processing.py
Go to the documentation of this file.
1 """
2 HCE project, Python bindings, Distributed Tasks Manager application.
3 Event objects definitions.
4 
5 @package: dc
6 @file ftest_dc_CrawlerTask_batch_processing.py
7 @author Oleksii <developers.hce@gmail.com>
8 @link: http://hierarchical-cluster-engine.com/
9 @copyright: Copyright &copy; 2013-2014 IOIX Ukraine
10 @license: http://hierarchical-cluster-engine.com/license/
11 @since: 0.1
12 """
13 
14 import pickle
15 from collections import namedtuple
16 from subprocess import Popen
17 from subprocess import PIPE
18 from dc.EventObjects import Batch
19 
20 
21 url_list = [
22  "http://www.yomiuri.co.jp/sports/mlb/20140407-OYT1T50015.html?from=ytop_top"
23  ]
24 
25 
26 PYTHON_BINARY="/usr/bin/python"
27 CRAWLER_BINARY="../../bin/crawler.py"
28 CFG="--config=../../ini/crawler.ini"
29 
30 
31 Results = namedtuple("Results", "exit_code output err")
32 
33 
34 def processFullBatch(input_object):
35  input_pickled_object = pickle.dumps(input_object)
36  process = Popen([PYTHON_BINARY, CRAWLER_BINARY, CFG], stdout=PIPE, stdin=PIPE)
37  (output, err) = process.communicate(input=input_pickled_object)
38  exit_code = process.wait()
39  return Results(exit_code, output, err)
40 
41 
42 if __name__ == "__main__":
43  #create batch object from list of urls
44  input_object = Batch(url_list)
45  #TODO main work
46  result = processFullBatch(input_object)
47  #get response object
48  generalResponse = pickle.loads(result.output)
49  #check if all OK
50  assert generalResponse.errorCode == GeneralResponse.ERROR_OK