HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
app.ContentUpdater.ContentUpdater Class Reference
Inheritance diagram for app.ContentUpdater.ContentUpdater:
Collaboration diagram for app.ContentUpdater.ContentUpdater:

Classes

class  ConfigOptions
 
class  Meta
 

Public Member Functions

def __init__ (self)
 
def setup (self)
 
def run (self)
 
def getInputPickle (self)
 
def unpickleInput (self, inputPickle)
 
def createOutputPickle (self, outputBatch)
 
def sendPickle (self, outputPickle)
 
def process (self)
 
def updateProcessedContents (self, inputBatch)
 
def updateAttributesOnly (self, inputBatch)
 

Public Attributes

 exitCode
 
 logger
 
 dbWrapper
 
 errorMsg
 

Static Public Attributes

string MSG_ERROR_EMPTY_CONFIG_FILE_NAME = "Config file name is empty."
 
string MSG_ERROR_WRONG_CONFIG_FILE_NAME = "Config file name is wrong"
 
string MSG_ERROR_LOAD_APP_CONFIG = "Error loading application config file."
 
string MSG_ERROR_READ_LOG_CONFIG = "Error read log config file."
 
string MSG_ERROR_MISSED_SECTION = "Missed mandatory section '%s'"
 
string MSG_ERROR_DATABASE_OPERATION = "Database operation has error: %s"
 
string MSG_ERROR_UPDATE_PROCESSED_CONTENTS = "Update processed contents has error: %s"
 
string MSG_DEBUG_INPUT_PICKLE = "Input pickle: "
 
string MSG_DEBUG_INPUT_UNPICKLED = "input unpickled: "
 
string MSG_DEBUG_OUTPUT_BATCH = "Output batch: "
 
string MSG_DEBUG_OUTPUT_PICKLE = "Output pickle: "
 
string MSG_DEBUG_SEND_PICKLE = "Send pickle. Done."
 
string ATTRIBUTE_ERROR_MESSAGE_NAME = 'errorMessage'
 

Private Member Functions

def __initApp (self)
 
def __loadAppConfig (self, configName)
 
def __loadLogConfig (self, configName)
 
def __createDBTasksWrapper (self, configName)
 

Detailed Description

Definition at line 40 of file ContentUpdater.py.

Constructor & Destructor Documentation

◆ __init__()

def app.ContentUpdater.ContentUpdater.__init__ (   self)

Definition at line 77 of file ContentUpdater.py.

77  def __init__(self):
78  # call base class __init__ method
79  foundation.CementApp.__init__(self)
80  self.exitCode = APP_CONSTS.EXIT_SUCCESS
81  self.logger = None
82  self.dbWrapper = None
83  self.errorMsg = None
84 
85 
def __init__(self)
constructor
Definition: UIDGenerator.py:19

Member Function Documentation

◆ __createDBTasksWrapper()

def app.ContentUpdater.ContentUpdater.__createDBTasksWrapper (   self,
  configName 
)
private

Definition at line 182 of file ContentUpdater.py.

182  def __createDBTasksWrapper(self, configName):
183  # variable for result
184  dbTasksWrapper = None
185  try:
186  if configName == "":
187  raise Exception(self.MSG_ERROR_EMPTY_CONFIG_FILE_NAME)
188 
189  config = ConfigParser.ConfigParser()
190  config.optionxform = str
191 
192  readOk = config.read(configName)
193 
194  if len(readOk) == 0:
195  raise Exception(self.MSG_ERROR_WRONG_CONFIG_FILE_NAME + ": " + configName)
196 
197  dbTasksWrapper = DBTasksWrapper(config)
198 
199  except Exception, err:
200  raise Exception(self.MSG_ERROR_LOAD_APP_CONFIG + ' ' + str(err))
201 
202  return dbTasksWrapper
203 
204 
Here is the caller graph for this function:

◆ __initApp()

def app.ContentUpdater.ContentUpdater.__initApp (   self)
private

Definition at line 111 of file ContentUpdater.py.

111  def __initApp(self):
112  if self.pargs.config:
113  # load app config
114  configOptions = self.__loadAppConfig(self.pargs.config)
115 
116  # load log config
117  self.__loadLogConfig(configOptions.confLogFileName)
118 
119  # set attribute values of application
120  self.dbWrapper = self.__createDBTasksWrapper(configOptions.dbTaskIniFile)
121 
122  else:
123  raise Exception(self.MSG_ERROR_LOAD_APP_CONFIG)
124 
125  if self.pargs.error:
126  self.errorMsg = str(self.pargs.error)
127 
128 
def __initApp(self, configName=None)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __loadAppConfig()

def app.ContentUpdater.ContentUpdater.__loadAppConfig (   self,
  configName 
)
private

Definition at line 133 of file ContentUpdater.py.

133  def __loadAppConfig(self, configName):
134  # variable for result
135  configOptions = None
136  try:
137  config = ConfigParser.ConfigParser()
138  config.optionxform = str
139 
140  readOk = config.read(configName)
141 
142  if len(readOk) == 0:
143  raise Exception(self.MSG_ERROR_WRONG_CONFIG_FILE_NAME + ": " + configName)
144 
145  if not config.has_section(APP_CONSTS.CONFIG_APPLICATION_SECTION_NAME):
146  raise Exception(self.MSG_ERROR_MISSED_SECTION % str(APP_CONSTS.CONFIG_APPLICATION_SECTION_NAME))
147 
148  configOptions = ContentUpdater.ConfigOptions(
149  str(config.get(APP_CONSTS.CONFIG_APPLICATION_SECTION_NAME,
150  ContentUpdater.ConfigOptions.CONTENT_UPDATER_OPTION_LOG)),
151  str(config.get(APP_CONSTS.CONFIG_APPLICATION_SECTION_NAME,
152  ContentUpdater.ConfigOptions.CONTENT_UPDATER_OPTION_DB_TASK_INI)))
153 
154  except Exception, err:
155  raise Exception(self.MSG_ERROR_LOAD_APP_CONFIG + ' ' + str(err))
156 
157  return configOptions
158 
159 
Here is the caller graph for this function:

◆ __loadLogConfig()

def app.ContentUpdater.ContentUpdater.__loadLogConfig (   self,
  configName 
)
private

Definition at line 164 of file ContentUpdater.py.

164  def __loadLogConfig(self, configName):
165  try:
166  if isinstance(configName, str) and len(configName) == 0:
167  raise Exception(self.MSG_ERROR_EMPTY_CONFIG_FILE_NAME)
168 
169  logging.config.fileConfig(configName)
170 
171  # call rotation log files and initialization logger
172  self.logger = Utils.MPLogger().getLogger()
173 
174  except Exception, err:
175  raise Exception(self.MSG_ERROR_READ_LOG_CONFIG + ' ' + str(err))
176 
177 
Here is the call graph for this function:
Here is the caller graph for this function:

◆ createOutputPickle()

def app.ContentUpdater.ContentUpdater.createOutputPickle (   self,
  outputBatch 
)

Definition at line 230 of file ContentUpdater.py.

230  def createOutputPickle(self, outputBatch):
231  # self.logger.debug(self.MSG_DEBUG_OUTPUT_BATCH + varDump(outputBatch))
232  outputPickle = pickle.dumps(outputBatch)
233  # self.logger.debug(self.MSG_DEBUG_OUTPUT_PICKLE + str(outputPickle))
234 
235  return outputPickle
236 
237 
Here is the caller graph for this function:

◆ getInputPickle()

def app.ContentUpdater.ContentUpdater.getInputPickle (   self)

Definition at line 209 of file ContentUpdater.py.

209  def getInputPickle(self):
210  inputPickle = sys.stdin.read()
211  # self.logger.debug(self.MSG_DEBUG_INPUT_PICKLE + '\n' + str(inputPickle))
212 
213  return inputPickle
214 
215 
Here is the caller graph for this function:

◆ process()

def app.ContentUpdater.ContentUpdater.process (   self)

Definition at line 251 of file ContentUpdater.py.

251  def process(self):
252  try:
253  inputBatchObj = self.unpickleInput(self.getInputPickle())
254 
255  if self.errorMsg is None:
256  self.updateProcessedContents(inputBatchObj)
257  else:
258  self.updateAttributesOnly(inputBatchObj)
259 
260  self.sendPickle(self.createOutputPickle(inputBatchObj))
261  except Exception, err:
262  self.logger.error(str(err))
263  self.exitCode = APP_CONSTS.EXIT_FAILURE
264 
265 
-mask-info
Here is the call graph for this function:
Here is the caller graph for this function:

◆ run()

def app.ContentUpdater.ContentUpdater.run (   self)

Definition at line 93 of file ContentUpdater.py.

93  def run(self):
94  # call base class run method
95  foundation.CementApp.run(self)
96 
97  # call initialization application
98  self.__initApp()
99 
100  # call internal processing
101  self.process()
102 
103  # Finish logging
104  self.logger.info(APP_CONSTS.LOGGER_DELIMITER_LINE)
105 
106 
Here is the call graph for this function:

◆ sendPickle()

def app.ContentUpdater.ContentUpdater.sendPickle (   self,
  outputPickle 
)

Definition at line 242 of file ContentUpdater.py.

242  def sendPickle(self, outputPickle):
243  sys.stdout.write(outputPickle)
244  self.logger.debug(self.MSG_DEBUG_SEND_PICKLE)
245 
246 
Here is the caller graph for this function:

◆ setup()

def app.ContentUpdater.ContentUpdater.setup (   self)

Definition at line 87 of file ContentUpdater.py.

87  def setup(self):
88  # call base class setup method
89  foundation.CementApp.setup(self)
90 
91 

◆ unpickleInput()

def app.ContentUpdater.ContentUpdater.unpickleInput (   self,
  inputPickle 
)

Definition at line 219 of file ContentUpdater.py.

219  def unpickleInput(self, inputPickle):
220  inputUnpickled = pickle.loads(inputPickle)
221  # self.logger.debug(self.MSG_DEBUG_INPUT_UNPICKLED + varDump(inputUnpickled))
222 
223  return inputUnpickled
224 
225 
Here is the caller graph for this function:

◆ updateAttributesOnly()

def app.ContentUpdater.ContentUpdater.updateAttributesOnly (   self,
  inputBatch 
)

Definition at line 357 of file ContentUpdater.py.

357  def updateAttributesOnly(self, inputBatch):
358  attributes = []
359 
360  for batchItem in inputBatch.items:
361  self.logger.debug("batchItem: %s", varDump(batchItem))
362  self.logger.debug("batchItem.urlContentResponse: %s", varDump(batchItem.urlContentResponse))
363 
364  try:
365  # accumulate attributes
366  attributes.append(Attribute(siteId=batchItem.siteId,
367  name=self.ATTRIBUTE_ERROR_MESSAGE_NAME,
368  urlMd5=batchItem.urlId,
369  value=self.dbWrapper.dbTask.dbConnections[DB_CONSTS.PRIMARY_DB_ID].\
370  escape_string(str(self.errorMsg))))
371 
372  self.logger.debug("Made attributes: %s", varDump(attributes))
373  except Exception, err:
374  self.logger.error("Make attributes error: %s", str(err))
375  self.logger.debug(getTracebackInfo())
376 
377  try:
378  # execute database operations
379  affectDB = self.dbWrapper.affect_db
380  self.dbWrapper.affect_db = True
381  self.dbWrapper.putAttributes(attributes)
382  self.dbWrapper.affect_db = affectDB
383 
384  self.logger.debug('Database operations executed...')
385 
386  except DatabaseException, err:
387  self.logger.error(self.MSG_ERROR_DATABASE_OPERATION, str(err))
388  self.logger.debug(getTracebackInfo())
389 
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
Definition: Utils.py:410
-mask-info
def getTracebackInfo(linesNumberMax=None)
Definition: Utils.py:218
Here is the call graph for this function:
Here is the caller graph for this function:

◆ updateProcessedContents()

def app.ContentUpdater.ContentUpdater.updateProcessedContents (   self,
  inputBatch 
)

Definition at line 270 of file ContentUpdater.py.

270  def updateProcessedContents(self, inputBatch):
271 
272  urlPuts = []
273  attributes = []
274 
275  self.logger.debug("The processing of batch Id = %s started", str(inputBatch.id))
276 
277  for batchItem in inputBatch.items:
278 # self.logger.debug("batchItem: %s", varDump(batchItem))
279 # self.logger.debug("batchItem.urlContentResponse: %s", varDump(batchItem.urlContentResponse))
280 
281  if batchItem.urlContentResponse is not None:
282  for processedContent in batchItem.urlContentResponse.processedContents:
283 
284  self.logger.debug("!!! processedContent: %s", varDump(processedContent, stringifyType=0))
285  try:
286  # create URLPut object
287  putDict = {}
288  putDict["id"] = batchItem.urlId
289  putDict["data"] = processedContent
290  putDict["cDate"] = SQLExpression("NOW()")
291 
292  urlPut = dc_event.URLPut(batchItem.siteId,
293  batchItem.urlId,
294  dc_event.Content.CONTENT_PROCESSOR_CONTENT,
295  putDict)
296 
297  # accumulate URLPut objects
298  urlPuts.append(urlPut)
299 
300  except Exception, err:
301  self.logger.error(self.MSG_ERROR_UPDATE_PROCESSED_CONTENTS, str(err))
302  self.logger.debug(getTracebackInfo())
303 
304  try:
305 # self.logger.debug("type: %s, batchItem.urlContentResponse.attributes: %s",
306 # str(type(batchItem.urlContentResponse.attributes)),
307 # varDump(batchItem.urlContentResponse.attributes,
308 # maxDepth=15))
309 
310 
311  # accumulate attributes
312  for attrJson in batchItem.urlContentResponse.attributes:
313  attrDict = json.loads(attrJson)
314 
315 # self.logger.debug("type: %s, attrDict: %s", str(type(attrDict)), str(attrDict))
316 
317  attrValue = json.dumps(attrDict['value'], ensure_ascii=False, encoding='utf-8')
318 
319  attribute = Attribute(siteId=attrDict['siteId'],
320  name=attrDict['name'],
321  urlMd5=attrDict['urlMd5'],
322  value=attrValue)
323 
324  attributes.append(attribute)
325 
326  if len(attributes) > 0:
327  self.logger.debug("Made attributes: %s", varDump(attributes))
328 
329  except Exception, err:
330  self.logger.error("Make attributes error: %s", str(err))
331  self.logger.debug(getTracebackInfo())
332 
333  try:
334  # execute database operations
335  affectDB = self.dbWrapper.affect_db
336  self.dbWrapper.affect_db = True
337  self.dbWrapper.putURLContent(urlPuts)
338  self.dbWrapper.putAttributes(attributes)
339  self.dbWrapper.affect_db = affectDB
340 
341  self.logger.debug('Database operations executed...')
342 
343  except DatabaseException, err:
344  self.logger.error(self.MSG_ERROR_DATABASE_OPERATION, str(err))
345  self.logger.debug(getTracebackInfo())
346  except Exception, err:
347  self.logger.error(self.MSG_ERROR_DATABASE_OPERATION, str(err))
348  self.logger.debug(getTracebackInfo())
349 
350  self.logger.debug("The processing of batch Id = %s finished", str(inputBatch.id))
351 
352 
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
Definition: Utils.py:410
-mask-info
def getTracebackInfo(linesNumberMax=None)
Definition: Utils.py:218
Here is the call graph for this function:
Here is the caller graph for this function:

Member Data Documentation

◆ ATTRIBUTE_ERROR_MESSAGE_NAME

string app.ContentUpdater.ContentUpdater.ATTRIBUTE_ERROR_MESSAGE_NAME = 'errorMessage'
static

Definition at line 56 of file ContentUpdater.py.

◆ dbWrapper

app.ContentUpdater.ContentUpdater.dbWrapper

Definition at line 82 of file ContentUpdater.py.

◆ errorMsg

app.ContentUpdater.ContentUpdater.errorMsg

Definition at line 83 of file ContentUpdater.py.

◆ exitCode

app.ContentUpdater.ContentUpdater.exitCode

Definition at line 80 of file ContentUpdater.py.

◆ logger

app.ContentUpdater.ContentUpdater.logger

Definition at line 81 of file ContentUpdater.py.

◆ MSG_DEBUG_INPUT_PICKLE

string app.ContentUpdater.ContentUpdater.MSG_DEBUG_INPUT_PICKLE = "Input pickle: "
static

Definition at line 50 of file ContentUpdater.py.

◆ MSG_DEBUG_INPUT_UNPICKLED

string app.ContentUpdater.ContentUpdater.MSG_DEBUG_INPUT_UNPICKLED = "input unpickled: "
static

Definition at line 51 of file ContentUpdater.py.

◆ MSG_DEBUG_OUTPUT_BATCH

string app.ContentUpdater.ContentUpdater.MSG_DEBUG_OUTPUT_BATCH = "Output batch: "
static

Definition at line 52 of file ContentUpdater.py.

◆ MSG_DEBUG_OUTPUT_PICKLE

string app.ContentUpdater.ContentUpdater.MSG_DEBUG_OUTPUT_PICKLE = "Output pickle: "
static

Definition at line 53 of file ContentUpdater.py.

◆ MSG_DEBUG_SEND_PICKLE

string app.ContentUpdater.ContentUpdater.MSG_DEBUG_SEND_PICKLE = "Send pickle. Done."
static

Definition at line 54 of file ContentUpdater.py.

◆ MSG_ERROR_DATABASE_OPERATION

string app.ContentUpdater.ContentUpdater.MSG_ERROR_DATABASE_OPERATION = "Database operation has error: %s"
static

Definition at line 47 of file ContentUpdater.py.

◆ MSG_ERROR_EMPTY_CONFIG_FILE_NAME

string app.ContentUpdater.ContentUpdater.MSG_ERROR_EMPTY_CONFIG_FILE_NAME = "Config file name is empty."
static

Definition at line 42 of file ContentUpdater.py.

◆ MSG_ERROR_LOAD_APP_CONFIG

string app.ContentUpdater.ContentUpdater.MSG_ERROR_LOAD_APP_CONFIG = "Error loading application config file."
static

Definition at line 44 of file ContentUpdater.py.

◆ MSG_ERROR_MISSED_SECTION

string app.ContentUpdater.ContentUpdater.MSG_ERROR_MISSED_SECTION = "Missed mandatory section '%s'"
static

Definition at line 46 of file ContentUpdater.py.

◆ MSG_ERROR_READ_LOG_CONFIG

string app.ContentUpdater.ContentUpdater.MSG_ERROR_READ_LOG_CONFIG = "Error read log config file."
static

Definition at line 45 of file ContentUpdater.py.

◆ MSG_ERROR_UPDATE_PROCESSED_CONTENTS

string app.ContentUpdater.ContentUpdater.MSG_ERROR_UPDATE_PROCESSED_CONTENTS = "Update processed contents has error: %s"
static

Definition at line 48 of file ContentUpdater.py.

◆ MSG_ERROR_WRONG_CONFIG_FILE_NAME

string app.ContentUpdater.ContentUpdater.MSG_ERROR_WRONG_CONFIG_FILE_NAME = "Config file name is wrong"
static

Definition at line 43 of file ContentUpdater.py.


The documentation for this class was generated from the following file: