Definition at line 42 of file UrlsToBatchTask.py.
◆ __init__()
def app.UrlsToBatchTask.UrlsToBatchTask.__init__ |
( |
|
self | ) |
|
Definition at line 78 of file UrlsToBatchTask.py.
80 foundation.CementApp.__init__(self)
83 self.exitCode = APP_CONSTS.EXIT_SUCCESS
def __init__(self)
constructor
◆ __initApp()
def app.UrlsToBatchTask.UrlsToBatchTask.__initApp |
( |
|
self | ) |
|
|
private |
Definition at line 111 of file UrlsToBatchTask.py.
112 if self.pargs.config:
113 self.__loadLogConfig(self.__loadAppConfig(self.pargs.config))
115 raise Exception(self.MSG_ERROR_LOAD_APP_CONFIG)
def __initApp(self, configName=None)
◆ __loadAppConfig()
def app.UrlsToBatchTask.UrlsToBatchTask.__loadAppConfig |
( |
|
self, |
|
|
|
configName |
|
) |
| |
|
private |
Definition at line 122 of file UrlsToBatchTask.py.
127 config = ConfigParser.ConfigParser()
128 config.optionxform = str
130 readOk = config.read(configName)
133 raise Exception(self.MSG_ERROR_WRONG_CONFIG_FILE_NAME +
": " + configName)
135 if config.has_section(APP_CONSTS.CONFIG_APPLICATION_SECTION_NAME):
136 confLogFileName = str(config.get(APP_CONSTS.CONFIG_APPLICATION_SECTION_NAME,
137 self.URLS_TO_BATCH_TASK_OPTION_LOG))
139 except Exception, err:
140 raise Exception(self.MSG_ERROR_LOAD_APP_CONFIG +
' ' + str(err))
142 return confLogFileName
def __loadAppConfig(self, configName)
◆ __loadLogConfig()
def app.UrlsToBatchTask.UrlsToBatchTask.__loadLogConfig |
( |
|
self, |
|
|
|
configName |
|
) |
| |
|
private |
Definition at line 149 of file UrlsToBatchTask.py.
151 if isinstance(configName, str)
and len(configName) == 0:
152 raise Exception(self.MSG_ERROR_EMPTY_CONFIG_FILE_NAME)
154 logging.config.fileConfig(configName)
157 self.logger = Utils.MPLogger().
getLogger()
159 except Exception, err:
160 raise Exception(self.MSG_ERROR_READ_LOG_CONFIG +
' ' + str(err))
def __loadLogConfig(self, configName)
◆ createBatchId()
def app.UrlsToBatchTask.UrlsToBatchTask.createBatchId |
( |
|
self | ) |
|
Definition at line 199 of file UrlsToBatchTask.py.
199 def createBatchId(self):
200 idGenerator = IDGenerator()
202 batch_id = self.id =
getHash(idGenerator.get_connection_uid())
def getHash(strBuf, binSize=32, digestType=0, fixedMode=0, valLimit=18446744073709552000L)
◆ createBatchItems()
def app.UrlsToBatchTask.UrlsToBatchTask.createBatchItems |
( |
|
self, |
|
|
|
list_of_uniq_urls |
|
) |
| |
Definition at line 207 of file UrlsToBatchTask.py.
207 def createBatchItems(self, list_of_uniq_urls):
208 list_of_batch_items = []
209 for url_obj
in list_of_uniq_urls:
210 url_obj.contentMask = dc_event.URL.CONTENT_STORED_ON_DISK
211 site_id = url_obj.siteId
212 url_id = url_obj.urlMd5
213 batch_item = BatchItem(site_id, url_id, url_obj)
214 self.logger.debug(self.MSG_DEBUG_OUTPUT_BATCH_ITEM + Utils.varDump(batch_item))
215 list_of_batch_items.append(batch_item)
217 return list_of_batch_items
◆ createOutputBatch()
def app.UrlsToBatchTask.UrlsToBatchTask.createOutputBatch |
( |
|
self, |
|
|
|
batch_id, |
|
|
|
list_of_batch_items |
|
) |
| |
Definition at line 220 of file UrlsToBatchTask.py.
220 def createOutputBatch(self, batch_id, list_of_batch_items):
221 output_batch = Batch(batch_id, list_of_batch_items)
222 self.logger.info(
"Output batch id: %s, items: %s", str(output_batch.id), str(len(output_batch.items)))
223 self.logger.debug(self.MSG_DEBUG_OUTPUT_BATCH +
varDump(output_batch))
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
◆ createOutputPickle()
def app.UrlsToBatchTask.UrlsToBatchTask.createOutputPickle |
( |
|
self, |
|
|
|
output_batch |
|
) |
| |
Definition at line 228 of file UrlsToBatchTask.py.
228 def createOutputPickle(self, output_batch):
229 output_pickle = pickle.dumps(output_batch)
◆ getInputPickle()
def app.UrlsToBatchTask.UrlsToBatchTask.getInputPickle |
( |
|
self | ) |
|
Definition at line 164 of file UrlsToBatchTask.py.
164 def getInputPickle(self):
165 input_pickle = sys.stdin.read()
◆ getListOfUniqueURLs()
def app.UrlsToBatchTask.UrlsToBatchTask.getListOfUniqueURLs |
( |
|
self, |
|
|
|
list_of_url_obj |
|
) |
| |
Definition at line 190 of file UrlsToBatchTask.py.
190 def getListOfUniqueURLs(self, list_of_url_obj):
192 list_of_uniq_urls = [url_obj
for url_obj
in list_of_url_obj
if url_obj.urlMd5
not in seen
and 193 not seen.add(url_obj.urlMd5)]
194 self.logger.debug(self.MSG_DEBUG_UNIQ_URL_LIST + Utils.varDump(list_of_uniq_urls))
196 return list_of_uniq_urls
◆ loadListOfURLs()
def app.UrlsToBatchTask.UrlsToBatchTask.loadListOfURLs |
( |
|
self, |
|
|
|
input_unpickled_obj |
|
) |
| |
Definition at line 182 of file UrlsToBatchTask.py.
182 def loadListOfURLs(self, input_unpickled_obj):
183 list_of_url_obj = input_unpickled_obj
184 self.logger.info(self.MSG_DEBUG_LEN_URL_LIST + str(len(list_of_url_obj)))
185 self.logger.debug(self.MSG_DEBUG_INPUT_URL_LIST +
varDump(list_of_url_obj))
187 return list_of_url_obj
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
◆ process()
def app.UrlsToBatchTask.UrlsToBatchTask.process |
( |
|
self | ) |
|
Definition at line 240 of file UrlsToBatchTask.py.
242 input_pickle = self.getInputPickle()
243 input_unpickled_obj = self.unpickleInput(input_pickle)
244 list_of_url_obj = self.loadListOfURLs(input_unpickled_obj)
246 list_of_uniq_urls = list_of_url_obj
247 batch_id = self.createBatchId()
249 self.logger.debug(
'>>> list_of_uniq_urls: ' +
varDump(list_of_uniq_urls))
251 list_of_batch_items = self.createBatchItems(list_of_uniq_urls)
252 output_batch = self.createOutputBatch(batch_id, list_of_batch_items)
253 output_pickle = self.createOutputPickle(output_batch)
254 self.sendPickle(output_pickle)
256 if len(output_batch.items) == 0:
257 self.logger.debug(self.MSG_DEBUG_EMPTY_BATCH)
258 self.exitCode = self.STATUS_EMPTY_BATCH
261 self.exitCode = APP_CONSTS.EXIT_FAILURE
263 def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
◆ run()
def app.UrlsToBatchTask.UrlsToBatchTask.run |
( |
|
self | ) |
|
Definition at line 93 of file UrlsToBatchTask.py.
95 foundation.CementApp.run(self)
104 self.logger.info(APP_CONSTS.LOGGER_DELIMITER_LINE)
◆ sendPickle()
def app.UrlsToBatchTask.UrlsToBatchTask.sendPickle |
( |
|
self, |
|
|
|
output_pickle |
|
) |
| |
Definition at line 235 of file UrlsToBatchTask.py.
235 def sendPickle(self, output_pickle):
236 sys.stdout.write(output_pickle)
237 self.logger.debug(self.MSG_DEBUG_SEND_PICKLE)
◆ setup()
def app.UrlsToBatchTask.UrlsToBatchTask.setup |
( |
|
self | ) |
|
◆ unpickleInput()
def app.UrlsToBatchTask.UrlsToBatchTask.unpickleInput |
( |
|
self, |
|
|
|
input_pickle |
|
) |
| |
Definition at line 171 of file UrlsToBatchTask.py.
171 def unpickleInput(self, input_pickle):
173 input_unpickled = pickle.loads(input_pickle)
174 self.logger.debug(
'>>> input_unpickled: ' + Utils.varDump(input_unpickled))
176 input_unpickled_obj = input_unpickled.eventObject
177 self.logger.debug(self.MSG_DEBUG_INPUT_UNPICKLE +
'\n' + Utils.varDump(input_unpickled_obj))
179 return input_unpickled_obj
◆ exitCode
app.UrlsToBatchTask.UrlsToBatchTask.exitCode |
◆ id
app.UrlsToBatchTask.UrlsToBatchTask.id |
◆ logger
app.UrlsToBatchTask.UrlsToBatchTask.logger |
◆ MSG_DEBUG_EMPTY_BATCH
string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_EMPTY_BATCH = "Empty Batch, exit code " + str(STATUS_EMPTY_BATCH) |
|
static |
◆ MSG_DEBUG_INPUT_PICKLE
string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_INPUT_PICKLE = "Input pickle: " |
|
static |
◆ MSG_DEBUG_INPUT_UNPICKLE
string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_INPUT_UNPICKLE = "Input unpickle: " |
|
static |
◆ MSG_DEBUG_INPUT_URL_LIST
string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_INPUT_URL_LIST = "Append url: " |
|
static |
◆ MSG_DEBUG_LEN_URL_LIST
string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_LEN_URL_LIST = "Input url list count: " |
|
static |
◆ MSG_DEBUG_OUTPUT_BATCH
string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_OUTPUT_BATCH = "Output batch: " |
|
static |
◆ MSG_DEBUG_OUTPUT_BATCH_ITEM
string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_OUTPUT_BATCH_ITEM = "Output batch item: " |
|
static |
◆ MSG_DEBUG_OUTPUT_PICKLE
string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_OUTPUT_PICKLE = "Output pickle: " |
|
static |
◆ MSG_DEBUG_SEND_PICKLE
string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_SEND_PICKLE = "Send pickle. Done." |
|
static |
◆ MSG_DEBUG_UNIQ_URL_LIST
string app.UrlsToBatchTask.UrlsToBatchTask.MSG_DEBUG_UNIQ_URL_LIST = "Append uniq url: " |
|
static |
◆ MSG_ERROR_EMPTY_CONFIG_FILE_NAME
string app.UrlsToBatchTask.UrlsToBatchTask.MSG_ERROR_EMPTY_CONFIG_FILE_NAME = "Config file name is empty." |
|
static |
◆ MSG_ERROR_EXIT_STATUS
string app.UrlsToBatchTask.UrlsToBatchTask.MSG_ERROR_EXIT_STATUS = "Execution" |
|
static |
◆ MSG_ERROR_LOAD_APP_CONFIG
string app.UrlsToBatchTask.UrlsToBatchTask.MSG_ERROR_LOAD_APP_CONFIG = "Error loading application config file." |
|
static |
◆ MSG_ERROR_READ_LOG_CONFIG
string app.UrlsToBatchTask.UrlsToBatchTask.MSG_ERROR_READ_LOG_CONFIG = "Error read log config file." |
|
static |
◆ MSG_ERROR_UNKNOWN_EXCEPTION
string app.UrlsToBatchTask.UrlsToBatchTask.MSG_ERROR_UNKNOWN_EXCEPTION = "Unknown exception!" |
|
static |
◆ MSG_ERROR_WRONG_CONFIG_FILE_NAME
string app.UrlsToBatchTask.UrlsToBatchTask.MSG_ERROR_WRONG_CONFIG_FILE_NAME = "Config file name is wrong" |
|
static |
◆ STATUS_EMPTY_BATCH
int app.UrlsToBatchTask.UrlsToBatchTask.STATUS_EMPTY_BATCH = 2 |
|
static |
◆ URLS_TO_BATCH_TASK_OPTION_LOG
string app.UrlsToBatchTask.UrlsToBatchTask.URLS_TO_BATCH_TASK_OPTION_LOG = "log" |
|
static |
The documentation for this class was generated from the following file: