HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
dc_postprocessor.SocialModule.SocialModule Class Reference
Inheritance diagram for dc_postprocessor.SocialModule.SocialModule:
Collaboration diagram for dc_postprocessor.SocialModule.SocialModule:

Public Member Functions

def __init__ (self, getConfigOption=None, log=None)
 
def init (self)
 
def executeCommand (self, cmd, inputStream='')
 
def processBatch (self, batch)
 
def executeSocialTask (self, inputBatch)
 
- Public Member Functions inherited from dc_postprocessor.PostProcessingModuleClass.PostProcessingModuleClass
def __init__ (self, getConfigOption=None, log=None)
 
def init (self)
 
def processBatch (self, batchObj)
 
def processBatchItem (self, batchItemObj)
 

Public Attributes

 cmd
 
 dbWrapper
 
- Public Attributes inherited from dc_postprocessor.PostProcessingModuleClass.PostProcessingModuleClass
 getConfigOption
 
 logger
 

Static Public Attributes

string SOCIAL_RATE_PROPERTY_NAME = 'SOCIAL_RATE'
 
string USER_PROXY_PROPERTY_NAME = 'USER_PROXY'
 
string PARAM_USER_PROXY = 'user_proxy'
 
string OPTION_EXECUTION_LOCAL = 'executionLocal'
 
string OPTION_EXECUTION_REMOTE = 'executionRemote'
 
string OPTION_EXECUTION_TYPE = 'executionType'
 
string OPTION_DB_TASK_INI = 'db_task_ini'
 
int EXECUTION_TYPE_LOCAL = 0
 
int EXECUTION_TYPE_REMOTE = 1
 
int EXECUTION_TYPE_DEFAULT = EXECUTION_TYPE_LOCAL
 
string TMP_INPUT_FILE_NAME = 'in'
 
string TMP_OUTPUT_FILE_NAME = 'out'
 
list TMP_FILE_NAMES_LIST = [TMP_INPUT_FILE_NAME, TMP_OUTPUT_FILE_NAME]
 
string MACRO_INPUT_FILE = '%INPUT_FILE%'
 
string MACRO_OUTPUT_FILE = '%OUTPUT_FILE%'
 
string MACRO_USER_NAME = '%USER_NAME%'
 
string ERROR_MSG_INITIALIZATION_CALLBACK = "Error initialization of callback function for get config options."
 
string ERROR_MSG_INITIALIZATION_LOGGER = "Error initialization of self.logger."
 
string ERROR_MSG_EXECUTION_TYPE = "Wrong execution type ( %s ) was got from config file."
 
string ERROR_MSG_EXECUTION_CMD_EMPTY = "Execution command line is empty."
 
string ERROR_MSG_CREATION_DBTASK_WRAPPER = "Creation DBTaskWrapper failed. Error: %s"
 
string ERROR_MSG_LOAD_USER_PROXY = "Load parameter '" + PARAM_USER_PROXY + "' from site property failed. Error: %s"
 

Private Member Functions

def __getDBWrapper (self)
 
def __getCmd (self)
 
def __createTemporaryFiles (self)
 
def __removeTemporaryFiles (self, tempFiles)
 
def __makeInputFile (self, tempFiles, inputBatch)
 
def __readOutputFile (self, tempFiles)
 
def __makeCmdLine (self, tempFiles, templateCmdLine)
 
def __fillUserProxyData (self, batchItem)
 

Detailed Description

Definition at line 37 of file SocialModule.py.

Constructor & Destructor Documentation

◆ __init__()

def dc_postprocessor.SocialModule.SocialModule.__init__ (   self,
  getConfigOption = None,
  log = None 
)

Definition at line 74 of file SocialModule.py.

74  def __init__(self, getConfigOption=None, log=None):
75  PostProcessingModuleClass.__init__(self, getConfigOption, log)
76 
77  self.cmd = None
78  self.dbWrapper = None
79 
80 
def __init__(self)
constructor
Definition: UIDGenerator.py:19

Member Function Documentation

◆ __createTemporaryFiles()

def dc_postprocessor.SocialModule.SocialModule.__createTemporaryFiles (   self)
private

Definition at line 165 of file SocialModule.py.

165  def __createTemporaryFiles(self):
166  # variable for result
167  files = {}
168  for name in self.TMP_FILE_NAMES_LIST:
169  files[name] = tempfile.NamedTemporaryFile(delete=False)
170 
171  return files
172 
173 
Here is the caller graph for this function:

◆ __fillUserProxyData()

def dc_postprocessor.SocialModule.SocialModule.__fillUserProxyData (   self,
  batchItem 
)
private

Definition at line 248 of file SocialModule.py.

248  def __fillUserProxyData(self, batchItem):
249 
250  if self.PARAM_USER_PROXY in batchItem.properties[self.SOCIAL_RATE_PROPERTY_NAME]:
251  try:
252  socialRateProperties = json.loads(batchItem.properties[self.SOCIAL_RATE_PROPERTY_NAME])
253 
254  if self.PARAM_USER_PROXY in socialRateProperties:
255  self.logger.debug("!!! user_proxy: %s", str(socialRateProperties[self.PARAM_USER_PROXY]))
256  userProxyJsonWrapper = UserProxyJsonWrapper(socialRateProperties[self.PARAM_USER_PROXY])
257  self.logger.debug("!!! source: %s", str(userProxyJsonWrapper.getSource()))
258  self.logger.debug("!!! proxies: %s", str(userProxyJsonWrapper.getProxies()))
259 
260  if userProxyJsonWrapper.getSource() == UserProxyJsonWrapper.SOURCE_DATABASE:
261  self.logger.debug("Getting proxies list from DB.")
262 
263  self.logger.debug("!!! batchItem.siteId: %s", str(batchItem.siteId))
264 
265  proxyWrapper = DBProxyWrapper(self.dbWrapper)
266  proxiesList = proxyWrapper.getEnaibledProxies(batchItem.siteId)
267  self.logger.debug("!!! type: %s, proxiesList: %s", str(type(proxiesList)), str(proxiesList))
268 
269  userProxyJsonWrapper.addProxyList(proxiesList)
270  userProxyJsonWrapper.setSource(UserProxyJsonWrapper.SOURCE_PROPERTY)
271  self.logger.debug("!!! userProxyJsonWrapper.getProxies(): %s", str(userProxyJsonWrapper.getProxies()))
272  self.logger.debug("!!! userProxyJsonWrapper.getSource(): %s", str(userProxyJsonWrapper.getSource()))
273 
274  batchItem.properties[self.SOCIAL_RATE_PROPERTY_NAME] = json.dumps(socialRateProperties)
275 
276  except Exception, err:
277  self.logger.error(self.ERROR_MSG_LOAD_USER_PROXY, str(err))
278 
279  self.logger.debug("!!! batchItem.properties: %s", str(batchItem.properties))
280 
281 
282  return batchItem
283 
284 
-mask-info
Here is the caller graph for this function:

◆ __getCmd()

def dc_postprocessor.SocialModule.SocialModule.__getCmd (   self)
private

Definition at line 117 of file SocialModule.py.

117  def __getCmd(self):
118  # variable for result
119  ret = None
120  executionType = int(self.getConfigOption(sectionName=self.__class__.__name__,
121  optionName=self.OPTION_EXECUTION_TYPE,
122  defaultValue=self.EXECUTION_TYPE_DEFAULT))
123 
124  if executionType == self.EXECUTION_TYPE_LOCAL:
125  ret = self.getConfigOption(sectionName=self.__class__.__name__,
126  optionName=self.OPTION_EXECUTION_LOCAL,
127  defaultValue='')
128 
129  elif executionType == self.EXECUTION_TYPE_REMOTE:
130  ret = self.getConfigOption(sectionName=self.__class__.__name__,
131  optionName=self.OPTION_EXECUTION_REMOTE,
132  defaultValue='')
133 
134  else:
135  raise Exception(self.ERROR_MSG_EXECUTION_TYPE % str(executionType))
136 
137  if ret == "":
138  raise Exception(self.ERROR_MSG_EXECUTION_CMD_EMPTY)
139 
140  return ret
141 
142 
Here is the caller graph for this function:

◆ __getDBWrapper()

def dc_postprocessor.SocialModule.SocialModule.__getDBWrapper (   self)
private

Definition at line 100 of file SocialModule.py.

100  def __getDBWrapper(self):
101  # variable for result
102  ret = None
103  try:
104  configParser = ConfigParser.ConfigParser()
105  configParser.read(self.getConfigOption(sectionName=self.__class__.__name__, optionName=self.OPTION_DB_TASK_INI))
106  ret = DBTasksWrapper(configParser)
107  except Exception, err:
108  raise Exception(self.ERROR_MSG_CREATION_DBTASK_WRAPPER % str(err))
109 
110  return ret
111 
112 
Here is the caller graph for this function:

◆ __makeCmdLine()

def dc_postprocessor.SocialModule.SocialModule.__makeCmdLine (   self,
  tempFiles,
  templateCmdLine 
)
private

Definition at line 216 of file SocialModule.py.

216  def __makeCmdLine(self, tempFiles, templateCmdLine):
217  # variable for result
218  ret = templateCmdLine
219 
220  # set temporary file names
221  if isinstance(tempFiles, dict):
222 
223  if self.MACRO_INPUT_FILE.upper() in ret and tempFiles[self.TMP_INPUT_FILE_NAME] is not None:
224  ret = ret.replace(self.MACRO_INPUT_FILE.upper(), tempFiles[self.TMP_INPUT_FILE_NAME].name)
225 
226  if self.MACRO_INPUT_FILE.lower() in ret and tempFiles[self.TMP_INPUT_FILE_NAME] is not None:
227  ret = ret.replace(self.MACRO_INPUT_FILE.lower(), tempFiles[self.TMP_INPUT_FILE_NAME].name)
228 
229  if self.MACRO_OUTPUT_FILE.upper() in ret and tempFiles[self.TMP_OUTPUT_FILE_NAME] is not None:
230  ret = ret.replace(self.MACRO_OUTPUT_FILE.upper(), tempFiles[self.TMP_OUTPUT_FILE_NAME].name)
231 
232  if self.MACRO_OUTPUT_FILE.lower() in ret and tempFiles[self.TMP_OUTPUT_FILE_NAME] is not None:
233  ret = ret.replace(self.MACRO_OUTPUT_FILE.lower(), tempFiles[self.TMP_OUTPUT_FILE_NAME].name)
234 
235  if self.MACRO_USER_NAME .upper() in ret:
236  ret = ret.replace(self.MACRO_USER_NAME.upper(), getpass.getuser())
237 
238  if self.MACRO_USER_NAME .lower() in ret:
239  ret = ret.replace(self.MACRO_USER_NAME.lower(), getpass.getuser())
240 
241  return ret
242 
243 
Here is the caller graph for this function:

◆ __makeInputFile()

def dc_postprocessor.SocialModule.SocialModule.__makeInputFile (   self,
  tempFiles,
  inputBatch 
)
private

Definition at line 190 of file SocialModule.py.

190  def __makeInputFile(self, tempFiles, inputBatch):
191 
192  if isinstance(tempFiles, dict) and self.TMP_INPUT_FILE_NAME in tempFiles:
193  tempFiles[self.TMP_INPUT_FILE_NAME].write(pickle.dumps(inputBatch))
194  tempFiles[self.TMP_INPUT_FILE_NAME].close()
195 
196 
Here is the caller graph for this function:

◆ __readOutputFile()

def dc_postprocessor.SocialModule.SocialModule.__readOutputFile (   self,
  tempFiles 
)
private

Definition at line 201 of file SocialModule.py.

201  def __readOutputFile(self, tempFiles):
202  # variable for result
203  ret = None
204  if tempFiles[self.TMP_OUTPUT_FILE_NAME] is not None:
205  ret = pickle.loads(tempFiles[self.TMP_OUTPUT_FILE_NAME].read())
206  tempFiles[self.TMP_OUTPUT_FILE_NAME].close()
207 
208  return ret
209 
210 
Here is the caller graph for this function:

◆ __removeTemporaryFiles()

def dc_postprocessor.SocialModule.SocialModule.__removeTemporaryFiles (   self,
  tempFiles 
)
private

Definition at line 178 of file SocialModule.py.

178  def __removeTemporaryFiles(self, tempFiles):
179  if isinstance(tempFiles, dict):
180  for f in tempFiles.values():
181  if f is not None and os.path.isfile(f.name):
182  os.unlink(f.name)
183 
184 
Here is the caller graph for this function:

◆ executeCommand()

def dc_postprocessor.SocialModule.SocialModule.executeCommand (   self,
  cmd,
  inputStream = '' 
)

Definition at line 148 of file SocialModule.py.

148  def executeCommand(self, cmd, inputStream=''):
149  self.logger.debug("Popen: %s", str(cmd))
150  process = Popen(cmd, stdout=PIPE, stdin=PIPE, stderr=PIPE, shell=True, close_fds=True, executable='/bin/bash')
151  self.logger.debug("process.communicate(), len(inputStream)=" + str(len(inputStream)))
152  (output, err) = process.communicate(input=inputStream)
153  self.logger.debug("Process std_error=: %s", str(err))
154  self.logger.debug("Process output len=:" + str(len(output)))
155  exitCode = process.wait()
156  self.logger.debug("Process response exitCode: %s", str(exitCode))
157 
158  return output, exitCode
159 
160 
def executeCommand(cmd, inputStream='', log=None)
Definition: Utils.py:1747
Here is the caller graph for this function:

◆ executeSocialTask()

def dc_postprocessor.SocialModule.SocialModule.executeSocialTask (   self,
  inputBatch 
)

Definition at line 329 of file SocialModule.py.

329  def executeSocialTask(self, inputBatch):
330  # variable for result
331  ret = inputBatch
332  tempFiles = self.__createTemporaryFiles()
333  self.logger.debug("!!! tempFiles: %s", str(tempFiles))
334  try:
335  if self.cmd is None or self.cmd == "":
336  raise Exception(self.ERROR_MSG_EXECUTION_CMD_EMPTY)
337 
338  self.__makeInputFile(tempFiles, inputBatch)
339 
340  self.logger.debug("!!! template cmd: %s", str(self.cmd))
341  cmd = self.__makeCmdLine(tempFiles, self.cmd)
342  self.logger.debug("!!! execute cmd: %s", str(cmd))
343 
344  output, exitCode = self.executeCommand(cmd)
345  self.logger.debug("!!! output: %s", str(output))
346 
347  if int(exitCode) == 0:
348  ret = self.__readOutputFile(tempFiles)
349 
350  except Exception, err:
351  self.logger.error(str(err))
352  finally:
353  self.__removeTemporaryFiles(tempFiles)
354 
355  return ret
356 
-mask-info
Here is the call graph for this function:
Here is the caller graph for this function:

◆ init()

def dc_postprocessor.SocialModule.SocialModule.init (   self)

Definition at line 85 of file SocialModule.py.

85  def init(self):
86  if self.getConfigOption is None:
87  raise Exception(self.ERROR_MSG_INITIALIZATION_CALLBACK)
88 
89  if self.logger is None:
90  raise Exception(self.ERROR_MSG_INITIALIZATION_LOGGER)
91 
92  self.cmd = self.__getCmd()
93  self.dbWrapper = self.__getDBWrapper()
94 
95 
Here is the call graph for this function:
Here is the caller graph for this function:

◆ processBatch()

def dc_postprocessor.SocialModule.SocialModule.processBatch (   self,
  batch 
)

Definition at line 289 of file SocialModule.py.

289  def processBatch(self, batch):
290 
291  if isinstance(batch, Batch):
292  localBatchItems = []
293  # accumulate batch items for send to social task processing
294  for i in xrange(len(batch.items)):
295  if self.SOCIAL_RATE_PROPERTY_NAME in batch.items[i].properties:
296  localBatchItems.append(self.__fillUserProxyData(batch.items[i]))
297 
298  if len(localBatchItems) > 0:
299  localBatch = Batch(batchId=batch.id)
300  localBatch.items = localBatchItems
301  self.logger.debug("Accumulated %s items from %s total for send to SocialTask",
302  str(len(localBatchItems)), str(len(batch.items)))
303 
304  localBatch = self.executeSocialTask(localBatch)
305  self.logger.debug("Recived %s items from SocialTask", str(len(localBatch.items)))
306 
307  foundCount = 0
308  # update batch items after processing of the social task
309  for i in xrange(len(batch.items)):
310  for batchItem in localBatch.items:
311  if batch.items[i].urlId == batchItem.urlId and batch.items[i].siteId == batchItem.siteId:
312  batch.items[i] = batchItem
313  self.logger.debug("Found result for %s", str(batch.items[i].urlId))
314  foundCount += 1
315  break
316 
317  self.logger.debug("Found results for %s items", str(foundCount))
318 
319  else:
320  self.logger.error("Input object has type: %s", str(type(batch)))
321 
322  return batch
323 
324 
-mask-info
Here is the call graph for this function:
Here is the caller graph for this function:

Member Data Documentation

◆ cmd

dc_postprocessor.SocialModule.SocialModule.cmd

Definition at line 77 of file SocialModule.py.

◆ dbWrapper

dc_postprocessor.SocialModule.SocialModule.dbWrapper

Definition at line 78 of file SocialModule.py.

◆ ERROR_MSG_CREATION_DBTASK_WRAPPER

string dc_postprocessor.SocialModule.SocialModule.ERROR_MSG_CREATION_DBTASK_WRAPPER = "Creation DBTaskWrapper failed. Error: %s"
static

Definition at line 69 of file SocialModule.py.

◆ ERROR_MSG_EXECUTION_CMD_EMPTY

string dc_postprocessor.SocialModule.SocialModule.ERROR_MSG_EXECUTION_CMD_EMPTY = "Execution command line is empty."
static

Definition at line 68 of file SocialModule.py.

◆ ERROR_MSG_EXECUTION_TYPE

string dc_postprocessor.SocialModule.SocialModule.ERROR_MSG_EXECUTION_TYPE = "Wrong execution type ( %s ) was got from config file."
static

Definition at line 67 of file SocialModule.py.

◆ ERROR_MSG_INITIALIZATION_CALLBACK

string dc_postprocessor.SocialModule.SocialModule.ERROR_MSG_INITIALIZATION_CALLBACK = "Error initialization of callback function for get config options."
static

Definition at line 65 of file SocialModule.py.

◆ ERROR_MSG_INITIALIZATION_LOGGER

string dc_postprocessor.SocialModule.SocialModule.ERROR_MSG_INITIALIZATION_LOGGER = "Error initialization of self.logger."
static

Definition at line 66 of file SocialModule.py.

◆ ERROR_MSG_LOAD_USER_PROXY

string dc_postprocessor.SocialModule.SocialModule.ERROR_MSG_LOAD_USER_PROXY = "Load parameter '" + PARAM_USER_PROXY + "' from site property failed. Error: %s"
static

Definition at line 70 of file SocialModule.py.

◆ EXECUTION_TYPE_DEFAULT

int dc_postprocessor.SocialModule.SocialModule.EXECUTION_TYPE_DEFAULT = EXECUTION_TYPE_LOCAL
static

Definition at line 52 of file SocialModule.py.

◆ EXECUTION_TYPE_LOCAL

int dc_postprocessor.SocialModule.SocialModule.EXECUTION_TYPE_LOCAL = 0
static

Definition at line 50 of file SocialModule.py.

◆ EXECUTION_TYPE_REMOTE

int dc_postprocessor.SocialModule.SocialModule.EXECUTION_TYPE_REMOTE = 1
static

Definition at line 51 of file SocialModule.py.

◆ MACRO_INPUT_FILE

string dc_postprocessor.SocialModule.SocialModule.MACRO_INPUT_FILE = '%INPUT_FILE%'
static

Definition at line 60 of file SocialModule.py.

◆ MACRO_OUTPUT_FILE

string dc_postprocessor.SocialModule.SocialModule.MACRO_OUTPUT_FILE = '%OUTPUT_FILE%'
static

Definition at line 61 of file SocialModule.py.

◆ MACRO_USER_NAME

string dc_postprocessor.SocialModule.SocialModule.MACRO_USER_NAME = '%USER_NAME%'
static

Definition at line 62 of file SocialModule.py.

◆ OPTION_DB_TASK_INI

string dc_postprocessor.SocialModule.SocialModule.OPTION_DB_TASK_INI = 'db_task_ini'
static

Definition at line 48 of file SocialModule.py.

◆ OPTION_EXECUTION_LOCAL

string dc_postprocessor.SocialModule.SocialModule.OPTION_EXECUTION_LOCAL = 'executionLocal'
static

Definition at line 45 of file SocialModule.py.

◆ OPTION_EXECUTION_REMOTE

string dc_postprocessor.SocialModule.SocialModule.OPTION_EXECUTION_REMOTE = 'executionRemote'
static

Definition at line 46 of file SocialModule.py.

◆ OPTION_EXECUTION_TYPE

string dc_postprocessor.SocialModule.SocialModule.OPTION_EXECUTION_TYPE = 'executionType'
static

Definition at line 47 of file SocialModule.py.

◆ PARAM_USER_PROXY

string dc_postprocessor.SocialModule.SocialModule.PARAM_USER_PROXY = 'user_proxy'
static

Definition at line 42 of file SocialModule.py.

◆ SOCIAL_RATE_PROPERTY_NAME

string dc_postprocessor.SocialModule.SocialModule.SOCIAL_RATE_PROPERTY_NAME = 'SOCIAL_RATE'
static

Definition at line 40 of file SocialModule.py.

◆ TMP_FILE_NAMES_LIST

list dc_postprocessor.SocialModule.SocialModule.TMP_FILE_NAMES_LIST = [TMP_INPUT_FILE_NAME, TMP_OUTPUT_FILE_NAME]
static

Definition at line 57 of file SocialModule.py.

◆ TMP_INPUT_FILE_NAME

string dc_postprocessor.SocialModule.SocialModule.TMP_INPUT_FILE_NAME = 'in'
static

Definition at line 55 of file SocialModule.py.

◆ TMP_OUTPUT_FILE_NAME

string dc_postprocessor.SocialModule.SocialModule.TMP_OUTPUT_FILE_NAME = 'out'
static

Definition at line 56 of file SocialModule.py.

◆ USER_PROXY_PROPERTY_NAME

string dc_postprocessor.SocialModule.SocialModule.USER_PROXY_PROPERTY_NAME = 'USER_PROXY'
static

Definition at line 41 of file SocialModule.py.


The documentation for this class was generated from the following file: