5 HCE project, Python bindings, Distributed Tasks Manager application. 6 Event objects definitions. 9 @file ftest_dc_full_batch_processing.py 10 @author Oleksii <developers.hce@gmail.com> 11 @link: http://hierarchical-cluster-engine.com/ 12 @copyright: Copyright © 2013-2014 IOIX Ukraine 13 @license: http://hierarchical-cluster-engine.com/license/ 21 import cPickle
as pickle
24 from collections
import namedtuple
25 from subprocess
import Popen
26 from subprocess
import PIPE
32 siteId1 = str(md5.new(
"http://www.yomiuri.co.jp").hexdigest())
33 urlId1 = str(md5.new(
"http://www.yomiuri.co.jp/sports/mlb/20140407-OYT1T50015.html?from=ytop_top").hexdigest())
36 siteId2 = str(md5.new(
"http://localhost/www.yomiuri.co.jp").hexdigest())
37 urlId2 = str(md5.new(
"http://localhost/www.yomiuri.co.jp/template1.html").hexdigest())
41 urlId3 = str(md5.new(
"http://localhost/www.yomiuri.co.jp/template1.html").hexdigest())
55 PYTHON_BINARY=
"/usr/bin/python" 56 CRAWLER_TASK_BINARY=
"./crawler-task.py" 57 CRAWLER_TASK_CFG=
"--config=../ini/crawler-task.ini" 58 PROCESSOR_TASK_BINARY=
"./processor-task.py" 59 PROCESSOR_TASK_CFG=
"--config=../ini/processor-task.ini" 62 Results = namedtuple(
"Results",
"exit_code, output, err")
66 input_pickled_object = pickle.dumps(input_object)
68 process = Popen(PWD+
" && "+PYTHON_BINARY+
" "+CRAWLER_TASK_BINARY+
" "+CRAWLER_TASK_CFG+
" | "+PYTHON_BINARY+
" "+PROCESSOR_TASK_BINARY+
" "+PROCESSOR_TASK_CFG, stdout=PIPE, stdin=PIPE, shell=
True)
69 (output, err) = process.communicate(input=input_pickled_object)
71 exit_code = process.wait()
72 return Results(exit_code, output, err)
75 if __name__ ==
"__main__":
78 input_object =
Batch(1,url_list)
82 response = pickle.loads(result.output)
def processFullBatch(input_object)