HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
dc_db.SiteFindTask.SiteFindTask Class Reference
Inheritance diagram for dc_db.SiteFindTask.SiteFindTask:
Collaboration diagram for dc_db.SiteFindTask.SiteFindTask:

Public Member Functions

def __init__ (self, dcSiteTemplate, keyValueDefaultFile, keyValueStorageDir, dBDataTask, dcStatTemplates, dcLogTemplate, dcAttrTemplate)
 
def process (self, siteFind, queryCallback)
 
def loadSiteFromDB (self, siteFind, site_id, site, queryCallback)
 
def loadListOfSitesFromDB (self, siteFind, queryCallback)
 
def siteFind (self, siteFind, queryCallback)
 
- Public Member Functions inherited from dc_db.SiteTask.SiteTask
def __init__ (self, dcSiteTemplate, keyValueDefaultFile, keyValueStorageDir, dBDataTask, dcStatTemplates, dcLogTemplate, dcAttrTemplate, tasksManager=None)
 
def process (self, site, queryCallback)
 
def addSite (self, site, queryCallback)
 
def addSitesFilter (self, site, queryCallback)
 
def updateSitesFilter (self, site, queryCallback)
 
def createPropDict (self, field, site)
 
def isIsoFormatDate (self, dateValue)
 
def addSiteProperties (self, site, queryCallback)
 
def updateSiteProperties (self, site, queryCallback)
 
def addSiteURLSites (self, site, queryCallback)
 
def updateSiteURLSites (self, site, queryCallback)
 
def addSiteURLURLs (self, site, queryCallback)
 
def createTableFromTemplate (self, site, template, dbId, queryCallback, replaceDic=None)
 
def addSiteInKVDB (self, site, queryCallback)
 
def siteDelete (self, site, queryCallback)
 
- Public Member Functions inherited from dc_db.BaseTask.BaseTask
def isSiteExist (self, siteId, queryCallback, userId=None)
 
def generateCriterionSQL (self, criterions, additionWhere=None, siteId=None)
 
def fetchByCriterions (self, criterions, queryCallback)
 
def dbLock (self, mutexName, queryCallback, sleepTime=1, mutexLockTTL=Constants.DEFAULT_LOCK_TTL)
 
def dbUnlock (self, mutexName, queryCallback)
 
def createUrlsInsertQuery (self, siteId, localKeys, localValues)
 
def copyUrlsToDcUrls (self, siteId, queryCallback)
 
def statisticLogUpdate (self, localObj, urlMd5, siteId, status, queryCallback, isInsert=False)
 
def calculateMd5FormUrl (self, url, urlType, useNormilize=False)
 

Public Attributes

 siteStatusTask
 
- Public Attributes inherited from dc_db.SiteTask.SiteTask
 dcSiteTemplate
 
 keyValueDefaultFile
 
 keyValueStorageDir
 
 dBDataTask
 
 dcStatTemplates
 
 dcLogTemplate
 
 dcAttrTemplate
 
 siteDeleteTask
 
 tasksManager
 

Additional Inherited Members

- Static Public Member Functions inherited from dc_db.SiteTask.SiteTask
def execSiteCriterions (criterions, queryCallback)
 
- Static Public Member Functions inherited from dc_db.BaseTask.BaseTask
def readValueFromSiteProp (siteId, propName, queryCallback, urlMd5=None)
 
- Static Public Attributes inherited from dc_db.SiteTask.SiteTask
string FIELD_NAME_URLS = "urls"
 
string FIELD_NAME_PROPERTIES = "properties"
 
string FIELD_NAME_FILTERS = "filters"
 

Detailed Description

Definition at line 32 of file SiteFindTask.py.

Constructor & Destructor Documentation

◆ __init__()

def dc_db.SiteFindTask.SiteFindTask.__init__ (   self,
  dcSiteTemplate,
  keyValueDefaultFile,
  keyValueStorageDir,
  dBDataTask,
  dcStatTemplates,
  dcLogTemplate,
  dcAttrTemplate 
)

Definition at line 39 of file SiteFindTask.py.

39  dcLogTemplate, dcAttrTemplate):
40  super(SiteFindTask, self).__init__(dcSiteTemplate, keyValueDefaultFile, keyValueStorageDir, dBDataTask,
41  dcStatTemplates, dcLogTemplate, dcAttrTemplate)
42  self.siteStatusTask = None
43 
44 
def __init__(self)
constructor
Definition: UIDGenerator.py:19

Member Function Documentation

◆ loadListOfSitesFromDB()

def dc_db.SiteFindTask.SiteFindTask.loadListOfSitesFromDB (   self,
  siteFind,
  queryCallback 
)

Definition at line 96 of file SiteFindTask.py.

96  def loadListOfSitesFromDB(self, siteFind, queryCallback):
97  if siteFind.url is not None and (not siteFind.criterions):
98  query = "SELECT `Site_Id` FROM sites_urls WHERE `URL` LIKE '" + MySQLdb.escape_string(siteFind.url) + "%' GROUP BY `Site_Id`" # pylint: disable=E1101,C0301
99  elif (siteFind.url is not None) and (siteFind.criterions is not None):
100  additionCriterion = " `URL` LIKE '" + MySQLdb.escape_string(siteFind.url) + "%' " # pylint: disable=E1101,C0301
101  query = "SELECT `Site_Id` FROM sites_urls " + self.generateCriterionSQL(siteFind.criterions, additionCriterion)
102  else:
103  # Fix for tables list to use both "sites" and "sites_urls" tables
104  tableName = "sites_urls"
105  if ("WHERE" in siteFind.criterions) and (siteFind.criterions["WHERE"] is not None) and \
106  (tableName in siteFind.criterions["WHERE"]):
107  addTable = ", " + tableName
108  else:
109  addTable = ""
110 
111  sitesTableName = 'sites'
112  if dc.EventObjects.SiteFind.CRITERION_TABLES in siteFind.criterions and \
113  siteFind.criterions[dc.EventObjects.SiteFind.CRITERION_TABLES] is not None and \
114  siteFind.criterions[dc.EventObjects.SiteFind.CRITERION_TABLES] != "":
115  if sitesTableName not in siteFind.criterions[dc.EventObjects.SiteFind.CRITERION_TABLES]:
116  addTable = ", " + siteFind.criterions[dc.EventObjects.SiteFind.CRITERION_TABLES]
117 
118  query = "SELECT `Id` AS Site_Id FROM " + sitesTableName + addTable + self.generateCriterionSQL(siteFind.criterions)
119 
120  # logger.debug("query: %s", str(query))
121  site_ids = queryCallback(query, Constants.PRIMARY_DB_ID, Constants.EXEC_NAME)
122  logger.debug("List of Site_Id: %s", str(site_ids))
123 
124  return site_ids
125 
126 
Here is the call graph for this function:
Here is the caller graph for this function:

◆ loadSiteFromDB()

def dc_db.SiteFindTask.SiteFindTask.loadSiteFromDB (   self,
  siteFind,
  site_id,
  site,
  queryCallback 
)

Definition at line 58 of file SiteFindTask.py.

58  def loadSiteFromDB(self, siteFind, site_id, site, queryCallback):
59  tableName = 'sites'
60  tables = tableName
61  if dc.EventObjects.SiteFind.CRITERION_TABLES in siteFind.criterions and \
62  siteFind.criterions[dc.EventObjects.SiteFind.CRITERION_TABLES] is not None and \
63  siteFind.criterions[dc.EventObjects.SiteFind.CRITERION_TABLES] != "":
64  if tableName not in siteFind.criterions[dc.EventObjects.SiteFind.CRITERION_TABLES]:
65  tables = tableName + ", " + siteFind.criterions[dc.EventObjects.SiteFind.CRITERION_TABLES]
66  else:
67  tables = siteFind.criterions[dc.EventObjects.SiteFind.CRITERION_TABLES]
68 
69  query = GET_SITE_SQL_TEMPLATE % (tables, site_id["Site_Id"])
70  # logger.debug("query: %s", str(query))
71  site_row = queryCallback(query, Constants.PRIMARY_DB_ID, Constants.EXEC_NAME)
72  logger.debug("Get site from sites: %s", str(site_row))
73  for (key, value) in Constants.siteDict.items():
74  if str(value)[:1] == "`":
75  value = str(value)[1:-1]
76  # logger.debug("key: %s; value: %s", str(key), str(value))
77  logger.debug("site field: %s; table field: %s", str(site.__dict__[key]), str(site_row[0].get(value, None)))
78  if key == "uDate":
79  site.__dict__[key] = str(site_row[0].get(value, None))
80  elif key == "tcDate":
81  site.__dict__[key] = str(site_row[0].get(value, None))
82  elif key == "tcDateProcess":
83  site.__dict__[key] = str(site_row[0].get(value, None))
84  elif key == "cDate":
85  site.__dict__[key] = str(site_row[0].get(value, None))
86  elif key == "recrawlDate":
87  site.__dict__[key] = str(site_row[0].get(value, None))
88  else:
89  site.__dict__[key] = site_row[0].get(value, "a")
90  # site.cDate = str(site_row[0]["CDate"])
91  return site
92 
93 
Here is the caller graph for this function:

◆ process()

def dc_db.SiteFindTask.SiteFindTask.process (   self,
  siteFind,
  queryCallback 
)

Definition at line 50 of file SiteFindTask.py.

50  def process(self, siteFind, queryCallback):
51  self.siteStatusTask = SiteStatusTask()
52  sites = self.siteFind(siteFind, queryCallback)
53  return sites
54 
55 
Here is the call graph for this function:
Here is the caller graph for this function:

◆ siteFind()

def dc_db.SiteFindTask.SiteFindTask.siteFind (   self,
  siteFind,
  queryCallback 
)

Definition at line 132 of file SiteFindTask.py.

132  def siteFind(self, siteFind, queryCallback):
133  sites = []
134  # get all UNIQ site id's with urls for given url
135  site_ids = self.loadListOfSitesFromDB(siteFind, queryCallback)
136  if hasattr(site_ids, "__iter__"):
137  # for each site fill it fields
138  for site_id in site_ids:
139  site = dc.EventObjects.Site("")
140  # load site from sites table
141  self.loadSiteFromDB(siteFind, site_id, site, queryCallback)
142  if SiteTask.FIELD_NAME_URLS not in siteFind.excludeList:
143  site.urls = self.siteStatusTask.fillUrls(site, queryCallback)
144  else:
145  site.urls = None
146  if SiteTask.FIELD_NAME_PROPERTIES not in siteFind.excludeList:
147  site.properties = self.siteStatusTask.fillProperties(site, queryCallback)
148  else:
149  site.properties = None
150  if SiteTask.FIELD_NAME_FILTERS not in siteFind.excludeList:
151  site.filters = self.siteStatusTask.fillFilters(site, queryCallback)
152  else:
153  site.filters = None
154  sites.append(site)
155 
156  return sites
Here is the call graph for this function:
Here is the caller graph for this function:

Member Data Documentation

◆ siteStatusTask

dc_db.SiteFindTask.SiteFindTask.siteStatusTask

Definition at line 42 of file SiteFindTask.py.


The documentation for this class was generated from the following file: