5 HCE project, Python bindings, Distributed Tasks Manager application. 6 Event objects definitions. 10 @author Oleksii <developers.hce@gmail.com> 11 @link: http://hierarchical-cluster-engine.com/ 12 @copyright: Copyright © 2013-2014 IOIX Ukraine 13 @license: http://hierarchical-cluster-engine.com/license/ 25 from subprocess
import Popen
26 from subprocess
import PIPE
28 import MySQLdb.cursors
29 from contextlib
import closing
33 from urlparse
import urlparse
42 logging.basicConfig(filename=
"prepairer.log", filemode=
"w")
43 logger = logging.getLogger(
"Prepairer")
44 logger.setLevel(
"DEBUG")
47 dc_sites_db_connect =
None 48 dc_urls_db_connect =
None 51 site_templates_dic = {}
56 query =
"SELECT sites_urls.URL, sites_properties.`Value` FROM `sites_properties` INNER JOIN sites_urls ON sites_urls.Site_Id = sites_properties.Site_Id AND sites_properties.Name = 'template'" 65 a = urlparse(url).netloc.split(
":")[0].split(
".")
66 if len(a) > 2
and a[-3] !=
"www":
68 b = str(arr[-3] +
"." + arr[-2] +
"." + arr[-1])
71 b = str(arr[-2] +
"." + arr[-1])
78 for template
in templates:
80 site_templates_dic[template[
"URL"]] = template[
"Value"]
81 with open(
"sites_templates_dic",
"w")
as f:
82 f.write(json.dumps(site_templates_dic))
86 global site_templates_dic
87 print site_templates_dic
88 for (key, value)
in site_templates_dic.items():
90 md5 = hashlib.md5(url).hexdigest()
91 templates_dic[md5] = MySQLdb.escape_string(value)
95 global site_templates_dic
96 with open(
"sites_templates_dic",
"r") as f: 97 site_templates_dic = json.loads(f.read()) 98 print site_templates_dic
103 with closing(db_connector.cursor(MySQLdb.cursors.DictCursor))
as cursor:
104 cursor.execute(query)
105 db_connector.commit()
106 return cursor.fetchall()
107 except mdb.Error
as err:
108 db_connector.rollback()
114 global dc_sites_db_connect
115 global dc_urls_db_connect
122 db_dc_sites =
"dc_sites" 123 db_dc_urls =
"dc_urls" 125 dc_sites_db_connect = mdb.connect(dbHost, dbUser, dbPWD, db_dc_sites, dbPort)
126 dc_urls_db_connect = mdb.connect(dbHost, dbUser, dbPWD, db_dc_urls, dbPort)
132 input_url = input_url.strip()
138 norm_url =
cutURL(input_url)
142 site =
Site(norm_url)
143 site.urls = [input_url]
146 site_filter_pattern =
".*" +
cutURL(input_url) +
".*" 147 site_filters =
SiteFilter(site.id, site_filter_pattern)
153 if site.id
in templates_dic:
154 site.properties[
"template"] = templates_dic[site.id]
159 site.filters = [site_filters]
160 site.maxResources = 100000
161 site.maxURLs = 100000
162 site.maxErrors = 100000
163 site.maxResourceSize = 1000000
170 file_name =
"site_" + str(site.id) +
".json" 171 open(file_name,
"w").write(site.toJSON())
175 if __name__ ==
"__main__":
180 for input_url
in sys.stdin:
def executeQuery(db_connector, query)
def readTemplatesFromMySQL()
def createSiteObj(input_url)
def readTemplatesFromFile()