5 HCE project, Python bindings, Distributed Tasks Manager application. 6 Event objects definitions. 9 @file ftest_dc_ProcessorTask_batch_processing.py 10 @author Oleksii <developers.hce@gmail.com> 11 @link: http://hierarchical-cluster-engine.com/ 12 @copyright: Copyright © 2013-2014 IOIX Ukraine 13 @license: http://hierarchical-cluster-engine.com/license/ 21 import cPickle
as pickle
24 from collections
import namedtuple
25 from subprocess
import Popen
26 from subprocess
import PIPE
32 #url = "http://www.yomiuri.co.jp/economy/20140424-OYT1T50032.html?from=ycont_top_txt" 34 #urlId = "fb04cc869245f17a34e1691054e6b5ea" 35 #insert into `urls_0`(`URLMd5`, `URL`) values("fb04cc869245f17a34e1691054e6b5ea", "http://www.yomiuri.co.jp/economy/20140424-OYT1T50032.html?from=ycont_top_txt") 40 siteId1 = str(md5.new(
"http://www.yomiuri.co.jp").hexdigest())
41 urlId1 = str(md5.new(
"http://www.yomiuri.co.jp/politics/20140422-OYT1T50105.html?from=ycont_top_txt").hexdigest())
55 siteId2 = str(md5.new(
"http://www.asahi.com").hexdigest())
56 urlId2 = str(md5.new(
"http://www.asahi.com/articles/ASG4Q4D0DG4QUTQP00Z.html").hexdigest())
64 siteId3 = str(md5.new(
"http://mainichi.jp").hexdigest())
65 urlId3 = str(md5.new(
"http://mainichi.jp/opinion/news/20140422k0000e070249000c.html").hexdigest())
77 siteId4 = str(md5.new(
"http://sankei.jp.msn.com").hexdigest())
78 urlId4 = str(md5.new(
"http://sankei.jp.msn.com/politics/news/140401/stt14040112470002-n1.htm").hexdigest())
88 siteId5 = str(md5.new(
"http://www.jiji.com").hexdigest())
89 urlId5 = str(md5.new(
"http://www.jiji.com/jc/zc?k=201403/2014033000221&rel=j&g=int&relid=1_1").hexdigest())
97 siteId6 = str(md5.new(
"http://www.kyodo.co.jp").hexdigest())
98 urlId6 = str(md5.new(
"http://www.kyodo.co.jp/release-news/2014-04-07_522084/").hexdigest())
106 siteId7 = str(md5.new(
"http://www3.nhk.or.jp").hexdigest())
107 urlId7 = str(md5.new(
"http://www3.nhk.or.jp/chihouhatsu/").hexdigest())
115 siteId8 = str(md5.new(
"http://jp.reuters.com").hexdigest())
116 urlId8 = str(md5.new(
"http://jp.reuters.com/article/topNews/idJPTYEA3700120140408").hexdigest())
123 siteId9 = str(md5.new(
"http://www.nikkei.com").hexdigest())
124 urlId9 = str(md5.new(
"http://www.nikkei.com/article/DGXNASGM0703U_Y4A400C1MM0000/?dg=1").hexdigest())
131 siteId10 = str(md5.new(
"http://www.tokyo-np.co.jp").hexdigest())
132 urlId10 = str(md5.new(
"http://www.tokyo-np.co.jp/s/article/2014042290135558.html").hexdigest())
154 PYTHON_BINARY =
"/usr/bin/python" 155 CRAWLER_TASK_BINARY =
"./crawler-task.py" 156 CRAWLER_TASK_CFG =
"--config=../ini/crawler-task.ini" 157 PROCESSOR_TASK_BINARY =
"./processor-task.py" 158 PROCESSOR_TASK_CFG =
"--config=../ini/processor-task.ini" 159 PREPAIRER =
"./prepairer.py" 160 JSON_VIEWER =
"./scraper_json_viewer.py" 162 Results = namedtuple(
"Results",
"exit_code, output, err")
166 input_pickled_object = pickle.dumps(input_object)
168 cmd = PWD +
" && " + PYTHON_BINARY +
" " + PROCESSOR_TASK_BINARY +
" " + PROCESSOR_TASK_CFG
170 process = Popen(cmd, stdout=PIPE, stdin=PIPE, shell=
True)
171 (output, err) = process.communicate(input=input_pickled_object)
174 exit_code = process.wait()
175 return Results(exit_code, output, err)
178 if __name__ ==
"__main__":
186 generalResponse = pickle.loads(result.output)
def processFullBatch(input_object)