HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
app.UrlsToBatchTask.UrlsToBatchTask Class Reference
Inheritance diagram for app.UrlsToBatchTask.UrlsToBatchTask:
Collaboration diagram for app.UrlsToBatchTask.UrlsToBatchTask:

Classes

class  Meta
 

Public Member Functions

def __init__ (self)
 
def setup (self)
 
def run (self)
 
def getInputPickle (self)
 
def unpickleInput (self, input_pickle)
 
def loadListOfURLs (self, input_unpickled_obj)
 
def getListOfUniqueURLs (self, list_of_url_obj)
 
def createBatchId (self)
 
def createBatchItems (self, list_of_uniq_urls)
 
def createOutputBatch (self, batch_id, list_of_batch_items)
 
def createOutputPickle (self, output_batch)
 
def sendPickle (self, output_pickle)
 
def process (self)
 

Public Attributes

 logger
 
 exitCode
 
 id
 

Static Public Attributes

int STATUS_EMPTY_BATCH = 2
 
string MSG_ERROR_EMPTY_CONFIG_FILE_NAME = "Config file name is empty."
 
string MSG_ERROR_WRONG_CONFIG_FILE_NAME = "Config file name is wrong"
 
string MSG_ERROR_LOAD_APP_CONFIG = "Error loading application config file."
 
string MSG_ERROR_READ_LOG_CONFIG = "Error read log config file."
 
string MSG_ERROR_EXIT_STATUS = "Execution"
 
string MSG_DEBUG_INPUT_PICKLE = "Input pickle: "
 
string MSG_DEBUG_INPUT_UNPICKLE = "Input unpickle: "
 
string MSG_DEBUG_LEN_URL_LIST = "Input url list count: "
 
string MSG_DEBUG_INPUT_URL_LIST = "Append url: "
 
string MSG_DEBUG_UNIQ_URL_LIST = "Append uniq url: "
 
string MSG_DEBUG_OUTPUT_BATCH_ITEM = "Output batch item: "
 
string MSG_DEBUG_OUTPUT_BATCH = "Output batch: "
 
string MSG_DEBUG_OUTPUT_PICKLE = "Output pickle: "
 
string MSG_DEBUG_SEND_PICKLE = "Send pickle. Done."
 
string MSG_ERROR_UNKNOWN_EXCEPTION = "Unknown exception!"
 
string MSG_DEBUG_EMPTY_BATCH = "Empty Batch, exit code " + str(STATUS_EMPTY_BATCH)
 
string URLS_TO_BATCH_TASK_OPTION_LOG = "log"
 

Private Member Functions

def __initApp (self)
 
def __loadAppConfig (self, configName)
 
def __loadLogConfig (self, configName)
 

Detailed Description

Definition at line 42 of file UrlsToBatchTask.py.

Constructor & Destructor Documentation

◆ __init__()

def app.UrlsToBatchTask.UrlsToBatchTask.__init__ (   self)

Definition at line 78 of file UrlsToBatchTask.py.

78  def __init__(self):
79  # call base class __init__ method
80  foundation.CementApp.__init__(self)
81 
82  self.logger = None
83  self.exitCode = APP_CONSTS.EXIT_SUCCESS
84 
85 
def __init__(self)
constructor
Definition: UIDGenerator.py:19

Member Function Documentation

◆ __initApp()

def app.UrlsToBatchTask.UrlsToBatchTask.__initApp (   self)
private

Definition at line 111 of file UrlsToBatchTask.py.

111  def __initApp(self):
112  if self.pargs.config:
113  self.__loadLogConfig(self.__loadAppConfig(self.pargs.config))
114  else:
115  raise Exception(self.MSG_ERROR_LOAD_APP_CONFIG)
116 
117 
def __initApp(self, configName=None)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ __loadAppConfig()

def app.UrlsToBatchTask.UrlsToBatchTask.__loadAppConfig (   self,
  configName 
)
private

Definition at line 122 of file UrlsToBatchTask.py.

122  def __loadAppConfig(self, configName):
123  # variable for result
124  confLogFileName = ""
125 
126  try:
127  config = ConfigParser.ConfigParser()
128  config.optionxform = str
129 
130  readOk = config.read(configName)
131 
132  if len(readOk) == 0:
133  raise Exception(self.MSG_ERROR_WRONG_CONFIG_FILE_NAME + ": " + configName)
134 
135  if config.has_section(APP_CONSTS.CONFIG_APPLICATION_SECTION_NAME):
136  confLogFileName = str(config.get(APP_CONSTS.CONFIG_APPLICATION_SECTION_NAME,
137  self.URLS_TO_BATCH_TASK_OPTION_LOG))
138 
139  except Exception, err:
140  raise Exception(self.MSG_ERROR_LOAD_APP_CONFIG + ' ' + str(err))
141 
142  return confLogFileName
143 
144 
Here is the caller graph for this function:

◆ __loadLogConfig()

def app.UrlsToBatchTask.UrlsToBatchTask.__loadLogConfig (   self,
  configName 
)
private

Definition at line 149 of file UrlsToBatchTask.py.

149  def __loadLogConfig(self, configName):
150  try:
151  if isinstance(configName, str) and len(configName) == 0:
152  raise Exception(self.MSG_ERROR_EMPTY_CONFIG_FILE_NAME)
153 
154  logging.config.fileConfig(configName)
155 
156  # call rotation log files and initialization logger
157  self.logger = Utils.MPLogger().getLogger()
158 
159  except Exception, err:
160  raise Exception(self.MSG_ERROR_READ_LOG_CONFIG + ' ' + str(err))
161 
162 
163 
Here is the call graph for this function:
Here is the caller graph for this function:

◆ createBatchId()

def app.UrlsToBatchTask.UrlsToBatchTask.createBatchId (   self)

Definition at line 199 of file UrlsToBatchTask.py.

199  def createBatchId(self):
200  idGenerator = IDGenerator()
201  #batch_id = ctypes.c_uint32(zlib.crc32(idGenerator.get_connection_uid(), int(time.time()))).value
202  batch_id = self.id = getHash(idGenerator.get_connection_uid())
203 
204  return batch_id
205 
206 
def getHash(strBuf, binSize=32, digestType=0, fixedMode=0, valLimit=18446744073709552000L)
Definition: Utils.py:1649
Here is the caller graph for this function:

◆ createBatchItems()

def app.UrlsToBatchTask.UrlsToBatchTask.createBatchItems (   self,
  list_of_uniq_urls 
)

Definition at line 207 of file UrlsToBatchTask.py.

207  def createBatchItems(self, list_of_uniq_urls):
208  list_of_batch_items = []
209  for url_obj in list_of_uniq_urls:
210  url_obj.contentMask = dc_event.URL.CONTENT_STORED_ON_DISK
211  site_id = url_obj.siteId
212  url_id = url_obj.urlMd5
213  batch_item = BatchItem(site_id, url_id, url_obj)
214  self.logger.debug(self.MSG_DEBUG_OUTPUT_BATCH_ITEM + Utils.varDump(batch_item))
215  list_of_batch_items.append(batch_item)
216 
217  return list_of_batch_items
218 
219 
Here is the caller graph for this function:

◆ createOutputBatch()

def app.UrlsToBatchTask.UrlsToBatchTask.createOutputBatch (   self,
  batch_id,
  list_of_batch_items 
)

Definition at line 220 of file UrlsToBatchTask.py.

220  def createOutputBatch(self, batch_id, list_of_batch_items):
221  output_batch = Batch(batch_id, list_of_batch_items)
222  self.logger.info("Output batch id: %s, items: %s", str(output_batch.id), str(len(output_batch.items)))
223  self.logger.debug(self.MSG_DEBUG_OUTPUT_BATCH + varDump(output_batch))
224 
225  return output_batch
226 
227 
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
Definition: Utils.py:410
Here is the call graph for this function:
Here is the caller graph for this function:

◆ createOutputPickle()

def app.UrlsToBatchTask.UrlsToBatchTask.createOutputPickle (   self,
  output_batch 
)

Definition at line 228 of file UrlsToBatchTask.py.

228  def createOutputPickle(self, output_batch):
229  output_pickle = pickle.dumps(output_batch)
230  #self.logger.debug(self.MSG_DEBUG_OUTPUT_PICKLE + str(output_pickle))
231 
232  return output_pickle
233 
234 
Here is the caller graph for this function:

◆ getInputPickle()

def app.UrlsToBatchTask.UrlsToBatchTask.getInputPickle (   self)

Definition at line 164 of file UrlsToBatchTask.py.

164  def getInputPickle(self):
165  input_pickle = sys.stdin.read()
166  #self.logger.debug(self.MSG_DEBUG_INPUT_PICKLE + '\n' + str(input_pickle))
167 
168  return input_pickle
169 
170 
Here is the caller graph for this function:

◆ getListOfUniqueURLs()

def app.UrlsToBatchTask.UrlsToBatchTask.getListOfUniqueURLs (   self,
  list_of_url_obj 
)

Definition at line 190 of file UrlsToBatchTask.py.

190  def getListOfUniqueURLs(self, list_of_url_obj):
191  seen = set()
192  list_of_uniq_urls = [url_obj for url_obj in list_of_url_obj if url_obj.urlMd5 not in seen and
193  not seen.add(url_obj.urlMd5)]
194  self.logger.debug(self.MSG_DEBUG_UNIQ_URL_LIST + Utils.varDump(list_of_uniq_urls))
195 
196  return list_of_uniq_urls
197 
198 

◆ loadListOfURLs()

def app.UrlsToBatchTask.UrlsToBatchTask.loadListOfURLs (   self,
  input_unpickled_obj 
)

Definition at line 182 of file UrlsToBatchTask.py.

182  def loadListOfURLs(self, input_unpickled_obj):
183  list_of_url_obj = input_unpickled_obj
184  self.logger.info(self.MSG_DEBUG_LEN_URL_LIST + str(len(list_of_url_obj)))
185  self.logger.debug(self.MSG_DEBUG_INPUT_URL_LIST + varDump(list_of_url_obj))
186 
187  return list_of_url_obj
188 
189 
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
Definition: Utils.py:410
Here is the call graph for this function:
Here is the caller graph for this function:

◆ process()

def app.UrlsToBatchTask.UrlsToBatchTask.process (   self)

Definition at line 240 of file UrlsToBatchTask.py.

240  def process(self):
241  try:
242  input_pickle = self.getInputPickle()
243  input_unpickled_obj = self.unpickleInput(input_pickle)
244  list_of_url_obj = self.loadListOfURLs(input_unpickled_obj)
245 # list_of_uniq_urls = self.getListOfUniqueURLs(list_of_url_obj)
246  list_of_uniq_urls = list_of_url_obj
247  batch_id = self.createBatchId()
248 
249  self.logger.debug('>>> list_of_uniq_urls: ' + varDump(list_of_uniq_urls))
250 
251  list_of_batch_items = self.createBatchItems(list_of_uniq_urls)
252  output_batch = self.createOutputBatch(batch_id, list_of_batch_items)
253  output_pickle = self.createOutputPickle(output_batch)
254  self.sendPickle(output_pickle)
255 
256  if len(output_batch.items) == 0:
257  self.logger.debug(self.MSG_DEBUG_EMPTY_BATCH)
258  self.exitCode = self.STATUS_EMPTY_BATCH
259 
260  except Exception:
261  self.exitCode = APP_CONSTS.EXIT_FAILURE
262 
263 
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
Definition: Utils.py:410
Here is the call graph for this function:
Here is the caller graph for this function:

◆ run()

def app.UrlsToBatchTask.UrlsToBatchTask.run (   self)

Definition at line 93 of file UrlsToBatchTask.py.

93  def run(self):
94  # call base class run method
95  foundation.CementApp.run(self)
96 
97  # call initialization application
98  self.__initApp()
99 
100  # call internal processing
101  self.process()
102 
103  # Finish logging
104  self.logger.info(APP_CONSTS.LOGGER_DELIMITER_LINE)
105 
106 
Here is the call graph for this function:

◆ sendPickle()

def app.UrlsToBatchTask.UrlsToBatchTask.sendPickle (   self,
  output_pickle 
)

Definition at line 235 of file UrlsToBatchTask.py.

235  def sendPickle(self, output_pickle):
236  sys.stdout.write(output_pickle)
237  self.logger.debug(self.MSG_DEBUG_SEND_PICKLE)
238 
239 
Here is the caller graph for this function:

◆ setup()

def app.UrlsToBatchTask.UrlsToBatchTask.setup (   self)

Definition at line 87 of file UrlsToBatchTask.py.

87  def setup(self):
88  # call base class setup method
89  foundation.CementApp.setup(self)
90 
91 

◆ unpickleInput()

def app.UrlsToBatchTask.UrlsToBatchTask.unpickleInput (   self,
  input_pickle 
)

Definition at line 171 of file UrlsToBatchTask.py.

171  def unpickleInput(self, input_pickle):
172  #input_unpickled_obj = pickle.loads(input_pickle).eventObject
173  input_unpickled = pickle.loads(input_pickle)
174  self.logger.debug('>>> input_unpickled: ' + Utils.varDump(input_unpickled))
175 
176  input_unpickled_obj = input_unpickled.eventObject
177  self.logger.debug(self.MSG_DEBUG_INPUT_UNPICKLE + '\n' + Utils.varDump(input_unpickled_obj))
178 
179  return input_unpickled_obj
180 
181 
Here is the caller graph for this function:

Member Data Documentation

◆ exitCode

app.UrlsToBatchTask.UrlsToBatchTask.exitCode

Definition at line 83 of file UrlsToBatchTask.py.

◆ id

app.UrlsToBatchTask.UrlsToBatchTask.id

Definition at line 202 of file UrlsToBatchTask.py.

◆ logger

app.UrlsToBatchTask.UrlsToBatchTask.logger

Definition at line 82 of file UrlsToBatchTask.py.

◆ MSG_DEBUG_EMPTY_BATCH

string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_EMPTY_BATCH = "Empty Batch, exit code " + str(STATUS_EMPTY_BATCH)
static

Definition at line 64 of file UrlsToBatchTask.py.

◆ MSG_DEBUG_INPUT_PICKLE

string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_INPUT_PICKLE = "Input pickle: "
static

Definition at line 54 of file UrlsToBatchTask.py.

◆ MSG_DEBUG_INPUT_UNPICKLE

string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_INPUT_UNPICKLE = "Input unpickle: "
static

Definition at line 55 of file UrlsToBatchTask.py.

◆ MSG_DEBUG_INPUT_URL_LIST

string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_INPUT_URL_LIST = "Append url: "
static

Definition at line 57 of file UrlsToBatchTask.py.

◆ MSG_DEBUG_LEN_URL_LIST

string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_LEN_URL_LIST = "Input url list count: "
static

Definition at line 56 of file UrlsToBatchTask.py.

◆ MSG_DEBUG_OUTPUT_BATCH

string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_OUTPUT_BATCH = "Output batch: "
static

Definition at line 60 of file UrlsToBatchTask.py.

◆ MSG_DEBUG_OUTPUT_BATCH_ITEM

string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_OUTPUT_BATCH_ITEM = "Output batch item: "
static

Definition at line 59 of file UrlsToBatchTask.py.

◆ MSG_DEBUG_OUTPUT_PICKLE

string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_OUTPUT_PICKLE = "Output pickle: "
static

Definition at line 61 of file UrlsToBatchTask.py.

◆ MSG_DEBUG_SEND_PICKLE

string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_SEND_PICKLE = "Send pickle. Done."
static

Definition at line 62 of file UrlsToBatchTask.py.

◆ MSG_DEBUG_UNIQ_URL_LIST

string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_UNIQ_URL_LIST = "Append uniq url: "
static

Definition at line 58 of file UrlsToBatchTask.py.

◆ MSG_ERROR_EMPTY_CONFIG_FILE_NAME

string app.UrlsToBatchTask.UrlsToBatchTask.MSG_ERROR_EMPTY_CONFIG_FILE_NAME = "Config file name is empty."
static

Definition at line 48 of file UrlsToBatchTask.py.

◆ MSG_ERROR_EXIT_STATUS

string app.UrlsToBatchTask.UrlsToBatchTask.MSG_ERROR_EXIT_STATUS = "Execution"
static

Definition at line 53 of file UrlsToBatchTask.py.

◆ MSG_ERROR_LOAD_APP_CONFIG

string app.UrlsToBatchTask.UrlsToBatchTask.MSG_ERROR_LOAD_APP_CONFIG = "Error loading application config file."
static

Definition at line 50 of file UrlsToBatchTask.py.

◆ MSG_ERROR_READ_LOG_CONFIG

string app.UrlsToBatchTask.UrlsToBatchTask.MSG_ERROR_READ_LOG_CONFIG = "Error read log config file."
static

Definition at line 51 of file UrlsToBatchTask.py.

◆ MSG_ERROR_UNKNOWN_EXCEPTION

string app.UrlsToBatchTask.UrlsToBatchTask.MSG_ERROR_UNKNOWN_EXCEPTION = "Unknown exception!"
static

Definition at line 63 of file UrlsToBatchTask.py.

◆ MSG_ERROR_WRONG_CONFIG_FILE_NAME

string app.UrlsToBatchTask.UrlsToBatchTask.MSG_ERROR_WRONG_CONFIG_FILE_NAME = "Config file name is wrong"
static

Definition at line 49 of file UrlsToBatchTask.py.

◆ STATUS_EMPTY_BATCH

int app.UrlsToBatchTask.UrlsToBatchTask.STATUS_EMPTY_BATCH = 2
static

Definition at line 45 of file UrlsToBatchTask.py.

◆ URLS_TO_BATCH_TASK_OPTION_LOG

string app.UrlsToBatchTask.UrlsToBatchTask.URLS_TO_BATCH_TASK_OPTION_LOG = "log"
static

Definition at line 67 of file UrlsToBatchTask.py.


The documentation for this class was generated from the following file: