HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
dc_db.SiteCleanUpTask.SiteCleanUpTask Class Reference
Inheritance diagram for dc_db.SiteCleanUpTask.SiteCleanUpTask:
Collaboration diagram for dc_db.SiteCleanUpTask.SiteCleanUpTask:

Public Member Functions

def __init__ (self, keyValueStorageDir, rawDataDir, dBDataTask)
 
def process (self, siteCleanups, queryCallback)
 
def staticUpdate (self, sqlTemplate, siteCleanup, queryCallback)
 
def cleanUpMysqlStorage (self, siteCleanup, queryCallback)
 
def trancateArbitraryTable (self, tablePrefix, siteCleanup, dbId, queryCallback)
 
def cleanUpMysqlSiteTable (self, siteCleanup, queryCallback)
 
def cleanUpDBStorage (self, siteCleanup, filesSuffix, queryCallback)
 
def cleanUpRawDataStorage (self, siteCleanup)
 
- Public Member Functions inherited from dc_db.BaseTask.BaseTask
def isSiteExist (self, siteId, queryCallback, userId=None)
 
def generateCriterionSQL (self, criterions, additionWhere=None, siteId=None)
 
def fetchByCriterions (self, criterions, queryCallback)
 
def dbLock (self, mutexName, queryCallback, sleepTime=1, mutexLockTTL=Constants.DEFAULT_LOCK_TTL)
 
def dbUnlock (self, mutexName, queryCallback)
 
def createUrlsInsertQuery (self, siteId, localKeys, localValues)
 
def copyUrlsToDcUrls (self, siteId, queryCallback)
 
def statisticLogUpdate (self, localObj, urlMd5, siteId, status, queryCallback, isInsert=False)
 
def calculateMd5FormUrl (self, url, urlType, useNormilize=False)
 

Public Attributes

 keyValueStorageDir
 
 rawDataDir
 
 errorCode
 
 errorMessage
 
 dBDataTask
 
 fieldRecalculator
 

Additional Inherited Members

- Static Public Member Functions inherited from dc_db.BaseTask.BaseTask
def readValueFromSiteProp (siteId, propName, queryCallback, urlMd5=None)
 

Detailed Description

Definition at line 33 of file SiteCleanUpTask.py.

Constructor & Destructor Documentation

◆ __init__()

def dc_db.SiteCleanUpTask.SiteCleanUpTask.__init__ (   self,
  keyValueStorageDir,
  rawDataDir,
  dBDataTask 
)

Definition at line 40 of file SiteCleanUpTask.py.

40  def __init__(self, keyValueStorageDir, rawDataDir, dBDataTask):
41  self.keyValueStorageDir = keyValueStorageDir
42  self.rawDataDir = rawDataDir
43  self.errorCode = 0
44  self.errorMessage = "OK"
45  self.dBDataTask = dBDataTask
46  self.fieldRecalculator = FieldRecalculator()
47 
48 
def __init__(self)
constructor
Definition: UIDGenerator.py:19

Member Function Documentation

◆ cleanUpDBStorage()

def dc_db.SiteCleanUpTask.SiteCleanUpTask.cleanUpDBStorage (   self,
  siteCleanup,
  filesSuffix,
  queryCallback 
)

Definition at line 168 of file SiteCleanUpTask.py.

168  def cleanUpDBStorage(self, siteCleanup, filesSuffix, queryCallback):
169  ret = None
170  if self.dBDataTask is not None:
171  dataDeleteRequest = dc.EventObjects.DataDeleteRequest(siteCleanup.id, None, filesSuffix)
172  ret = self.dBDataTask.process(dataDeleteRequest, queryCallback)
173  return ret
174 
175 
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cleanUpMysqlSiteTable()

def dc_db.SiteCleanUpTask.SiteCleanUpTask.cleanUpMysqlSiteTable (   self,
  siteCleanup,
  queryCallback 
)

Definition at line 158 of file SiteCleanUpTask.py.

158  def cleanUpMysqlSiteTable(self, siteCleanup, queryCallback):
159  CLEAR_SITE_RECORS_SQL = ("UPDATE `sites` SET TcDate=NOW(), Resources=0, Iterations=0, State=%s, " +
160  "ErrorMask=0, Errors=0, Contents=0, CollectedURLs=0 WHERE id = '%s'")
161  query = CLEAR_SITE_RECORS_SQL % (str(siteCleanup.state), siteCleanup.id)
162  queryCallback(query, Constants.PRIMARY_DB_ID)
163 
164 
Here is the caller graph for this function:

◆ cleanUpMysqlStorage()

def dc_db.SiteCleanUpTask.SiteCleanUpTask.cleanUpMysqlStorage (   self,
  siteCleanup,
  queryCallback 
)

Definition at line 117 of file SiteCleanUpTask.py.

117  def cleanUpMysqlStorage(self, siteCleanup, queryCallback):
118  if siteCleanup.saveRootUrls:
119  SQL_COPY_QUERY_TEMPLATE = "INSERT INTO %s SELECT * FROM dc_urls.%s WHERE dc_urls.%s.ParentMd5 != ''"
120  SQL_DEL_QUERY_TEMPLATE = "DELETE FROM `%s` WHERE ParentMd5 != ''"
121  self.staticUpdate("SELECT `URLMd5` FROM %s WHERE ParentMd5 != ''", siteCleanup, queryCallback)
122  else:
123  SQL_COPY_QUERY_TEMPLATE = "INSERT INTO %s SELECT * FROM dc_urls.%s"
124  SQL_DEL_QUERY_TEMPLATE = "TRUNCATE TABLE `%s`"
125  self.staticUpdate("SELECT `URLMd5` FROM %s", siteCleanup, queryCallback)
126  tbName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % siteCleanup.id
127  query = SQL_DEL_QUERY_TEMPLATE % tbName
128  if siteCleanup.delayedType == dc.EventObjects.NOT_DELAYED_OPERATION:
129  queryCallback(query, Constants.SECONDARY_DB_ID)
130  elif siteCleanup.delayedType == dc.EventObjects.DELAYED_OPERATION:
131  query = Constants.SQL_CREATE_QUERY_TEMPLATE % (tbName, tbName)
132  queryCallback(query, Constants.FOURTH_DB_ID)
133  if siteCleanup.saveRootUrls:
134  query = SQL_COPY_QUERY_TEMPLATE % (tbName, tbName, tbName)
135  else:
136  query = SQL_COPY_QUERY_TEMPLATE % (tbName, tbName)
137  queryCallback(query, Constants.FOURTH_DB_ID)
138  query = SQL_DEL_QUERY_TEMPLATE % tbName
139  queryCallback(query, Constants.SECONDARY_DB_ID)
140 
141 
Here is the call graph for this function:
Here is the caller graph for this function:

◆ cleanUpRawDataStorage()

def dc_db.SiteCleanUpTask.SiteCleanUpTask.cleanUpRawDataStorage (   self,
  siteCleanup 
)

Definition at line 179 of file SiteCleanUpTask.py.

179  def cleanUpRawDataStorage(self, siteCleanup):
180  try:
181  tmpDirName = self.rawDataDir + "/" + os.path.basename(tempfile.NamedTemporaryFile().name)
182  originDirName = self.rawDataDir + "/" + siteCleanup.id
183  logger.debug(">>> originDir = %s", str(originDirName))
184  os.rename(originDirName, tmpDirName)
185  shutil.rmtree(tmpDirName)
186  except Exception as err:
187  type_, value_, traceback_ = sys.exc_info()
188  logger.debug("type_ = %s, value_ = %s", str(type_), str(value_))
189  stack = traceback.format_tb(traceback_)
190  logger.debug("Error: %s\n%s", str(err), str(stack.pop()))
191  logger.debug(">>> [cleanUpRawDataStorage] CURRENT DIR " + str(os.getcwd()))
192 # self.errorCode = 2
193 # self.errorMessage = (">>> cleanUpRawDataStorage Error")
194 
Here is the caller graph for this function:

◆ process()

def dc_db.SiteCleanUpTask.SiteCleanUpTask.process (   self,
  siteCleanups,
  queryCallback 
)

Definition at line 54 of file SiteCleanUpTask.py.

54  def process(self, siteCleanups, queryCallback):
55  ret = GeneralResponse()
56  if not isinstance(siteCleanups, list):
57  siteCleanups = [siteCleanups]
58 
59  for siteCleanup in siteCleanups:
60  self.errorCode = 0
61  self.errorMessage = "OK"
62  if self.isSiteExist(siteCleanup.id, queryCallback):
63  self.cleanUpMysqlStorage(siteCleanup, queryCallback)
64  if siteCleanup.historyCleanUp == dc.EventObjects.SiteCleanup.HISTORY_CLEANUP_LOG or \
65  siteCleanup.historyCleanUp == dc.EventObjects.SiteCleanup.HISTORY_CLEANUP_FULL:
66  self.trancateArbitraryTable(Constants.DC_LOG_TABLE_NAME_TEMPLATE, siteCleanup, Constants.LOG_DB_ID,
67  queryCallback)
68  if siteCleanup.historyCleanUp == dc.EventObjects.SiteCleanup.HISTORY_CLEANUP_FULL:
69  self.trancateArbitraryTable(Constants.DC_FREQ_TABLE_NAME_TEMPLATE, siteCleanup, Constants.STAT_DB_ID,
70  queryCallback)
71  if siteCleanup.delayedType == dc.EventObjects.NOT_DELAYED_OPERATION:
72  self.cleanUpDBStorage(siteCleanup, KEY_VALUE_FILE_NAME_TEMPLATE, queryCallback)
73  self.cleanUpDBStorage(siteCleanup, KEY_VALUE_FIELDS_FILE_NAME_TEMPLATE, queryCallback)
74  self.cleanUpRawDataStorage(siteCleanup)
75  self.cleanUpMysqlSiteTable(siteCleanup, queryCallback)
76  if siteCleanup.moveURLs:
77  self.copyUrlsToDcUrls(siteCleanup.id, queryCallback)
78  self.fieldRecalculator.updateSiteCleanupFields(siteCleanup.id, queryCallback)
79 
80  # cleaunup attributes
81  self.trancateArbitraryTable(Constants.DC_ATT_TABLE_NAME_TEMPLATE, siteCleanup, Constants.ATT_DB_ID,
82  queryCallback)
83  else:
84  self.errorCode = Constants.EXIT_CODE_GLOBAL_ERROR
85  self.errorMessage = (">>> Site id [%s] not found" % siteCleanup.id)
86 
87  ret.errorCode = self.errorCode
88  ret.statuses.append(ret.errorCode)
89  if ret.errorMessage is None or ret.errorMessage == "":
90  ret.errorMessage = self.errorMessage
91  else:
92  ret.errorMessage += ("-" + self.errorMessage)
93 
94  return ret
95 
96 
Here is the call graph for this function:
Here is the caller graph for this function:

◆ staticUpdate()

def dc_db.SiteCleanUpTask.SiteCleanUpTask.staticUpdate (   self,
  sqlTemplate,
  siteCleanup,
  queryCallback 
)

Definition at line 102 of file SiteCleanUpTask.py.

102  def staticUpdate(self, sqlTemplate, siteCleanup, queryCallback):
103  tbName = Constants.DC_URLS_TABLE_NAME_TEMPLATE % siteCleanup.id
104  query = sqlTemplate % tbName
105  res = queryCallback(query, Constants.SECONDARY_DB_ID)
106  if res is not None:
107  for elem in res:
108  if elem[0] is not None:
109  StatisticLogManager.statisticUpdate(queryCallback, Constants.StatFreqConstants.FREQ_DELETED_STATE, 1,
110  siteCleanup.id, elem[0])
111 
112 
Here is the caller graph for this function:

◆ trancateArbitraryTable()

def dc_db.SiteCleanUpTask.SiteCleanUpTask.trancateArbitraryTable (   self,
  tablePrefix,
  siteCleanup,
  dbId,
  queryCallback 
)

Definition at line 147 of file SiteCleanUpTask.py.

147  def trancateArbitraryTable(self, tablePrefix, siteCleanup, dbId, queryCallback):
148  tbName = tablePrefix % siteCleanup.id
149  SQL_TRUNCATE_QUERY_TEMPLATE = "TRUNCATE TABLE `%s`"
150  query = SQL_TRUNCATE_QUERY_TEMPLATE % tbName
151  queryCallback(query, dbId)
152 
153 
Here is the caller graph for this function:

Member Data Documentation

◆ dBDataTask

dc_db.SiteCleanUpTask.SiteCleanUpTask.dBDataTask

Definition at line 45 of file SiteCleanUpTask.py.

◆ errorCode

dc_db.SiteCleanUpTask.SiteCleanUpTask.errorCode

Definition at line 43 of file SiteCleanUpTask.py.

◆ errorMessage

dc_db.SiteCleanUpTask.SiteCleanUpTask.errorMessage

Definition at line 44 of file SiteCleanUpTask.py.

◆ fieldRecalculator

dc_db.SiteCleanUpTask.SiteCleanUpTask.fieldRecalculator

Definition at line 46 of file SiteCleanUpTask.py.

◆ keyValueStorageDir

dc_db.SiteCleanUpTask.SiteCleanUpTask.keyValueStorageDir

Definition at line 41 of file SiteCleanUpTask.py.

◆ rawDataDir

dc_db.SiteCleanUpTask.SiteCleanUpTask.rawDataDir

Definition at line 42 of file SiteCleanUpTask.py.


The documentation for this class was generated from the following file: