5 HCE project, Python bindings, Distributed Tasks Manager application. 6 Event objects definitions. 10 @author Oleksii <developers.hce@gmail.com> 11 @link: http://hierarchical-cluster-engine.com/ 12 @copyright: Copyright © 2013-2014 IOIX Ukraine 13 @license: http://hierarchical-cluster-engine.com/license/ 23 from subprocess
import Popen
24 from subprocess
import PIPE
26 import MySQLdb.cursors
27 from contextlib
import closing
31 from urlparse
import urlparse
40 logging.basicConfig(filename=
"prepairer.log", filemode=
"w")
41 logger = logging.getLogger(
"Prepairer")
42 logger.setLevel(
"DEBUG")
45 dc_sites_db_connect =
None 46 dc_urls_db_connect =
None 51 with closing(db_connector.cursor(MySQLdb.cursors.DictCursor))
as cursor:
54 return cursor.fetchall()
55 except mdb.Error
as err:
56 db_connector.rollback()
62 global dc_sites_db_connect
63 global dc_urls_db_connect
70 db_dc_sites =
"dc_sites" 71 db_dc_urls =
"dc_urls" 73 dc_sites_db_connect = mdb.connect(dbHost, dbUser, dbPWD, db_dc_sites, dbPort)
74 dc_urls_db_connect = mdb.connect(dbHost, dbUser, dbPWD, db_dc_urls, dbPort)
78 site =
Site(input_url)
88 url =
URL(site.id, input_url)
89 url.status = URL.STATUS_SELECTED_CRAWLING
90 url.type = URL.TYPE_SINGLE
95 file_name =
"site_" + str(site.id) +
".json" 96 open(file_name,
"w").write(site.toJSON())
97 cmd =
"./dc-client.py --config=../ini/dc-client.ini --command=SITE_NEW --file=./%s" % file_name
98 process = Popen(cmd, stdout=PIPE, stdin=PIPE, shell=
True, close_fds=
True)
99 (output, err) = process.communicate()
100 exit_code = process.wait()
101 open(
"dc-client_new_site_output.txt",
"w").write(output)
106 file_name =
"url_" + str(site.id) +
".json" 107 open(file_name,
"w").write(
"[" + url.toJSON() +
"]")
108 cmd =
"./dc-client.py --config=../ini/dc-client.ini --command=URL_NEW --file=./%s" % file_name
109 process = Popen(cmd, stdout=PIPE, stdin=PIPE, shell=
True, close_fds=
True)
110 (output, err) = process.communicate()
111 exit_code = process.wait()
112 open(
"dc-client_new_url_output.txt",
"w").write(output)
117 url_updated =
URLUpdate(site.id, input_url)
118 url_updated.status = URL.STATUS_SELECTED_CRAWLING
119 url_updated.type = URL.TYPE_SINGLE
120 file_name =
"url_" + str(url_updated.urlMd5) +
".json" 121 open(file_name,
"w").write(
"[" + url_updated.toJSON() +
"]")
122 cmd =
"./dc-client.py --config=../ini/dc-client.ini --command=URL_UPDATE --file=./%s" % file_name
123 process = Popen(cmd, stdout=PIPE, stdin=PIPE, shell=
True, close_fds=
True)
124 (output, err) = process.communicate()
125 exit_code = process.wait()
126 open(
"dc-client_update_url_output.txt",
"w").write(output)
132 site_updated.state = Site.STATE_ACTIVE
133 file_name =
"updated_site_" + str(site_updated.id) +
".json" 134 open(file_name,
"w").write(site_updated.toJSON())
135 cmd =
"./dc-client.py --config=../ini/dc-client.ini --command=SITE_UPDATE --file=./%s" % file_name
136 process = Popen(cmd, stdout=PIPE, stdin=PIPE, shell=
True, close_fds=
True)
137 (output, err) = process.communicate()
138 exit_code = process.wait()
139 open(
"dc-client_update_site_output.txt",
"w").write(output)
144 for input_url
in sys.stdin:
146 logger.debug(input_url)
150 open(site.id,
"w").write(input_url)
160 input_object =
Batch(11, url_list)
161 input_pickled_object = pickle.dumps(input_object)
162 print input_pickled_object
165 if __name__ ==
"__main__":
def updateURL(input_url, site)
def createURLObj(site, input_url)
def createSiteObj(input_url)
def executeQuery(db_connector, query)