HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
generate_sites_jsons Namespace Reference

Functions

def readTemplatesFromMySQL ()
 
def cutURL (url)
 
def generateTemplates ()
 
def fillTemplates ()
 
def readTemplatesFromFile ()
 
def executeQuery (db_connector, query)
 
def loadDBBackend ()
 
def createSiteObj (input_url)
 
def addSite (site)
 

Variables

 filename
 
 filemode
 
 logger = logging.getLogger("Prepairer")
 
 db_connector = None
 
 dc_sites_db_connect = None
 
 dc_urls_db_connect = None
 
dictionary site_templates_dic = {}
 
dictionary templates_dic = {}
 
def site = createSiteObj(input_url)
 

Detailed Description

HCE project, Python bindings, Distributed Tasks Manager application.
Event objects definitions.

@package: dc
@file prepairer.py
@author Oleksii <developers.hce@gmail.com>
@link: http://hierarchical-cluster-engine.com/
@copyright: Copyright &copy; 2013-2014 IOIX Ukraine
@license: http://hierarchical-cluster-engine.com/license/
@since: 0.1

Function Documentation

◆ addSite()

def generate_sites_jsons.addSite (   site)

Definition at line 169 of file generate_sites_jsons.py.

169 def addSite(site):
170  file_name = "site_" + str(site.id) + ".json"
171  open(file_name, "w").write(site.toJSON())
172 
173 
174 
Here is the call graph for this function:

◆ createSiteObj()

def generate_sites_jsons.createSiteObj (   input_url)

Definition at line 129 of file generate_sites_jsons.py.

129 def createSiteObj(input_url):
130 
131  # strip input url
132  input_url = input_url.strip()
133 
134  # root url
135  root_url = input_url
136 
137  # get url for md5
138  norm_url = cutURL(input_url)
139  # norm_url = input_url
140 
141  # create site
142  site = Site(norm_url)
143  site.urls = [input_url]
144  # create site filters
145  # site_filter_pattern = ".*" + norm_url + ".*"
146  site_filter_pattern = ".*" + cutURL(input_url) + ".*"
147  site_filters = SiteFilter(site.id, site_filter_pattern)
148 
149 
150  # create site properties templates
151  print templates_dic
152  print site.id
153  if site.id in templates_dic:
154  site.properties["template"] = templates_dic[site.id]
155 
156  # fill site
157  # site.urls = [root_url]
158  # site.urls = []
159  site.filters = [site_filters]
160  site.maxResources = 100000
161  site.maxURLs = 100000
162  site.maxErrors = 100000
163  site.maxResourceSize = 1000000
164  # site.state = Site.STATE_SUSPENDED
165  # site.filters = [SiteFilter(site.id, "(.*)")]
166  return site
167 
168 
Here is the call graph for this function:

◆ cutURL()

def generate_sites_jsons.cutURL (   url)

Definition at line 62 of file generate_sites_jsons.py.

62 def cutURL(url):
63  b = url
64  arr = None
65  a = urlparse(url).netloc.split(":")[0].split(".")
66  if len(a) > 2 and a[-3] != "www":
67  arr = a[-3:]
68  b = str(arr[-3] + "." + arr[-2] + "." + arr[-1])
69  else:
70  arr = a[-2:]
71  b = str(arr[-2] + "." + arr[-1])
72  return b
73 
74 
Here is the caller graph for this function:

◆ executeQuery()

def generate_sites_jsons.executeQuery (   db_connector,
  query 
)

Definition at line 101 of file generate_sites_jsons.py.

101 def executeQuery(db_connector, query):
102  try:
103  with closing(db_connector.cursor(MySQLdb.cursors.DictCursor)) as cursor:
104  cursor.execute(query)
105  db_connector.commit()
106  return cursor.fetchall()
107  except mdb.Error as err: # @todo logging in db_task
108  db_connector.rollback()
109  raise
110 
111 
def executeQuery(db_connector, query)
Here is the caller graph for this function:

◆ fillTemplates()

def generate_sites_jsons.fillTemplates ( )

Definition at line 85 of file generate_sites_jsons.py.

85 def fillTemplates():
86  global site_templates_dic
87  print site_templates_dic
88  for (key, value) in site_templates_dic.items():
89  url = cutURL(key)
90  md5 = hashlib.md5(url).hexdigest()
91  templates_dic[md5] = MySQLdb.escape_string(value)
92 
93 
Here is the call graph for this function:
Here is the caller graph for this function:

◆ generateTemplates()

def generate_sites_jsons.generateTemplates ( )

Definition at line 75 of file generate_sites_jsons.py.

75 def generateTemplates():
76  templates = readTemplatesFromMySQL()
77  print templates
78  for template in templates:
79  # print template
80  site_templates_dic[template["URL"]] = template["Value"]
81  with open("sites_templates_dic", "w") as f:
82  f.write(json.dumps(site_templates_dic))
83 
84 
Here is the call graph for this function:

◆ loadDBBackend()

def generate_sites_jsons.loadDBBackend ( )

Definition at line 112 of file generate_sites_jsons.py.

112 def loadDBBackend():
113  global db_connector
114  global dc_sites_db_connect
115  global dc_urls_db_connect
116 
117  dbHost = "127.0.0.1"
118  dbPort = 3306
119  dbUser = "hce"
120  dbPWD = "hce12345"
121 
122  db_dc_sites = "dc_sites"
123  db_dc_urls = "dc_urls"
124 
125  dc_sites_db_connect = mdb.connect(dbHost, dbUser, dbPWD, db_dc_sites, dbPort)
126  dc_urls_db_connect = mdb.connect(dbHost, dbUser, dbPWD, db_dc_urls, dbPort)
127 
128 
Here is the caller graph for this function:

◆ readTemplatesFromFile()

def generate_sites_jsons.readTemplatesFromFile ( )

Definition at line 94 of file generate_sites_jsons.py.

95  global site_templates_dic
96  with open("sites_templates_dic", "r") as f:
97  site_templates_dic = json.loads(f.read())
98  print site_templates_dic
99 
100 
Here is the caller graph for this function:

◆ readTemplatesFromMySQL()

def generate_sites_jsons.readTemplatesFromMySQL ( )

Definition at line 55 of file generate_sites_jsons.py.

56  query = "SELECT sites_urls.URL, sites_properties.`Value` FROM `sites_properties` INNER JOIN sites_urls ON sites_urls.Site_Id = sites_properties.Site_Id AND sites_properties.Name = 'template'"
57  print query
58  rows = executeQuery(dc_sites_db_connect, query)
59  return rows
60 
61 
def executeQuery(db_connector, query)
Here is the call graph for this function:
Here is the caller graph for this function:

Variable Documentation

◆ db_connector

generate_sites_jsons.db_connector = None

Definition at line 46 of file generate_sites_jsons.py.

◆ dc_sites_db_connect

generate_sites_jsons.dc_sites_db_connect = None

Definition at line 47 of file generate_sites_jsons.py.

◆ dc_urls_db_connect

generate_sites_jsons.dc_urls_db_connect = None

Definition at line 48 of file generate_sites_jsons.py.

◆ filemode

generate_sites_jsons.filemode

Definition at line 42 of file generate_sites_jsons.py.

◆ filename

generate_sites_jsons.filename

Definition at line 42 of file generate_sites_jsons.py.

◆ logger

generate_sites_jsons.logger = logging.getLogger("Prepairer")

Definition at line 43 of file generate_sites_jsons.py.

◆ site

def generate_sites_jsons.site = createSiteObj(input_url)

Definition at line 181 of file generate_sites_jsons.py.

◆ site_templates_dic

dictionary generate_sites_jsons.site_templates_dic = {}

Definition at line 51 of file generate_sites_jsons.py.

◆ templates_dic

dictionary generate_sites_jsons.templates_dic = {}

Definition at line 52 of file generate_sites_jsons.py.