19 logger = logging.getLogger(APP_CONSTS.LOGGER_NAME)
20 logger.setLevel(logging.DEBUG)
23 ch = logging.StreamHandler()
24 ch.setLevel(logging.DEBUG)
27 formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
30 ch.setFormatter(formatter)
38 if __name__ ==
'__main__':
42 configName =
'../../ini/postprocessor_task.ini' 43 headerFileName =
'../../ini/crawler-task_headers.txt' 46 postProcessingApplicationClass.configParser = ConfigParser.ConfigParser()
47 postProcessingApplicationClass.configParser.optionxform = str
48 readOk = postProcessingApplicationClass.configParser.read(configName)
49 logger.debug(
"Read config: %s", str(readOk))
51 postProcessingApplicationClass.configParser.set(
'LinkResolver',
'headers_file', headerFileName)
54 url =
'http://127.0.0.1/test.html,https://retrip.jp/external-link/?article_content_id=482406' 55 urlObj =
URL(siteId, url)
57 processedContent = {
'link':url}
58 processedContents = [base64.b64encode(json.dumps(processedContent))]
61 batchItem =
BatchItem(siteId=siteId, urlId=urlObj.urlMd5, urlObj=urlObj, urlContentResponse=urlContentResponse)
62 batchItem.properties = {
"LINK_RESOLVE":{
"method":{
"retrip.jp/external-link":
"GET"}}}
65 logger.debug(
"Input batch: %s",
varDump(batch))
67 linkResolver =
LinkResolver(logger, postProcessingApplicationClass.getConfigOption)
69 for i
in xrange(len(batch.items)):
70 batch.items[i] = linkResolver.processBatchItem(batch.items[i])
72 logger.debug(
"Output batch: %s",
varDump(batch))
74 logger.debug(
"Resolved url: %s", str(json.loads(base64.b64decode(batch.items[0].urlContentResponse.processedContents[0]))[
'link']))
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)