5 HCE project, Python bindings, Distributed Tasks Manager application. 6 Event objects definitions. 10 @author Oleksii <developers.hce@gmail.com> 11 @link: http://hierarchical-cluster-engine.com/ 12 @copyright: Copyright © 2013-2014 IOIX Ukraine 13 @license: http://hierarchical-cluster-engine.com/license/ 24 from subprocess
import Popen
25 from subprocess
import PIPE
33 logging.basicConfig(filename=
"/tmp/search_engine.log", filemode=
"w")
34 logger = logging.getLogger(
"search_engine")
35 logger.setLevel(
"DEBUG")
39 logger.debug(
"input: %s" % input_data)
40 splitted_data = input_data.split(
',')
41 url = splitted_data[0]
42 site_id =
"d57f144e7b26c9976769ea94f18b9064" if "google" in url
else "1fe592caf03fd50c5f065c30f82b13bb" 44 logger.debug(
"site_id: %s" % str(site_id))
46 if len(splitted_data)==2:
47 template = splitted_data[1]
49 lastModified = datetime.datetime.now().strftime(
"%Y-%m-%d %H:%M:%S")
50 input =
ScraperInData(url,
None, site_id, content,
"",
None, lastModified,
None)
51 input_pickled_object = pickle.dumps(input)
53 cmd =
"./scraper.py --config=../ini/scraper_search_engine.ini" 54 process = Popen(cmd, stdout=PIPE, stdin=PIPE, stderr=PIPE, shell=
True, close_fds=
True)
55 (output, err) = process.communicate(input=input_pickled_object)
56 logger.debug(
"scraper response output: %s", str(output))
57 logger.debug(
"scraper response error: %s", str(err))
58 exit_code = process.wait()
64 cmd =
"wget -qO- -S --no-check-certificate -U 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3' '" + url +
"'" 66 process = Popen(cmd, stdout=PIPE, stdin=PIPE, stderr=PIPE, shell=
True, close_fds=
True)
67 (output, err) = process.communicate()
68 exit_code = process.wait()
84 if __name__ ==
"__main__":
85 for input_url
in sys.stdin: