3 HCE project, Python bindings, DC service utility. 4 ResponseExtractor utility main application class. 7 @file ResponseExtractor.py 8 @author bgv <developers.hce@gmail.com> 9 @link: http://hierarchical-cluster-engine.com/ 10 @copyright: Copyright © 2015 IOIX Ukraine 11 @license: http://hierarchical-cluster-engine.com/license/ 22 from cement.core
import foundation
36 MSG_ERROR_PARSE_CMD_PARAMS =
"Error parse command line parameters." 37 MSG_ERROR_EMPTY_CONFIG_FILE_NAME =
"Config file name is empty." 38 MSG_ERROR_WRONG_CONFIG_FILE_NAME =
"Config file name is wrong" 39 MSG_ERROR_LOAD_APP_CONFIG =
"Error loading application config file." 40 MSG_ERROR_READ_LOG_CONFIG =
"Error read log config file." 41 MSG_ERROR_PROCESSING_REQUEST =
"Error processing input data." 43 CONFIG_OPTION_ITEM_DELIMITER =
"itemDelimiter" 50 RESULTS_FORMAT_CSV_LINE = 0
51 RESULTS_FORMAT_FIELD_LINE = 1
52 RESULTS_FORMAT_JSON = 2
55 CONFIG_OPTION_LOG =
'log' 60 label = DC_CRAWLER_CONSTS.RTC_PREPROCESSOR_APP_NAME
68 foundation.CementApp.__init__(self)
82 foundation.CementApp.setup(self)
83 self.args.add_argument(
'-c',
'--config', action=
'store', metavar=
'config_file', help=
'config ini-file')
84 self.args.add_argument(
'-i',
'--input', action=
'store', metavar=
'input_json_file',
85 help=
'input json file of the URL_CONTENT response, if omitted the stdin read used')
86 self.args.add_argument(
'-o',
'--output', action=
'store', metavar=
'output_file, if omitted the stdout print used',
87 help=
'input file, if omitted the stdout write used')
88 self.args.add_argument(
'-f',
'--format', action=
'store', metavar=
'input_json_file_format',
89 help=
'input json file buffer format: -1 - auto (default if omitted) 0 - internal,' + \
90 ' 1 - news, 2 - rss-feed')
91 self.args.add_argument(
'-m',
'--maxitems', action=
'store', metavar=
'max_items',
92 help=
'max items number to read')
93 self.args.add_argument(
'-s',
'--start', action=
'store', metavar=
'start_from',
94 help=
'start from item')
95 self.args.add_argument(
'-e',
'--extended', action=
'store', metavar=
'extended',
96 help=
'extended log with additional debug information')
97 self.args.add_argument(
'-t',
'--tags', action=
'store', metavar=
'tags',
98 help=
'csv tags fields names list, all fields from response if omitted')
99 self.args.add_argument(
'-r',
'--results', action=
'store', metavar=
'results',
100 help=
'results format: 0 - csv fields names list one item per line, ' + \
101 '1 - fields list one field per line, 2 - json (default if omitted)')
113 config = ConfigParser.ConfigParser()
114 config.optionxform = str
116 readOk = config.read(configName)
121 if config.has_section(APP_CONSTS.CONFIG_APPLICATION_SECTION_NAME):
122 confLogFileName = str(config.get(APP_CONSTS.CONFIG_APPLICATION_SECTION_NAME, self.
CONFIG_OPTION_LOG))
124 self.
itemDelimiter = str(config.get(APP_CONSTS.CONFIG_APPLICATION_SECTION_NAME,
126 except Exception, err:
129 return confLogFileName
138 if isinstance(configName, str)
and len(configName) == 0:
141 logging.config.fileConfig(configName)
146 except Exception, err:
155 with open(templateFile,
'r') as f: 166 with open(fileName,
'w')
as f:
173 foundation.CementApp.run(self)
175 startTime = time.time()
177 if self.pargs.config:
183 inputBuffer = self.
readFile(self.pargs.input)
185 inputBuffer = sys.stdin.read()
187 if self.pargs.format:
188 inputFormat = int(self.pargs.format)
192 if self.pargs.maxitems:
193 maxItems = int(self.pargs.maxitems)
197 startFrom = int(self.pargs.start)
204 tags = self.pargs.tags.split(
',')
208 if self.pargs.results:
209 self.
results = int(self.pargs.results)
212 outputBuffer = self.
process(inputBuffer, inputFormat, maxItems, startFrom, tags)
214 self.
logger.info(
"Total time: %s", str(time.time() - startTime))
216 if self.pargs.output:
217 self.
writeFile(self.pargs.output, outputBuffer)
223 self.
logger.info(APP_CONSTS.LOGGER_DELIMITER_LINE)
235 self.
logger.debug(
"News format item processing:\n%s", str(contentObj))
241 for tagName
in contentObj:
256 if 'default' in contentObj
and 'data' in contentObj[
'default']
and 'tagList' in contentObj[
'default'][
'data']
and\
257 isinstance(contentObj[
'default'][
'data'][
'tagList'], list)
and len(contentObj[
'default'][
'data'][
'tagList']) > 0:
258 contentObj = contentObj[
'default'][
'data'][
'tagList'][0]
260 raise Exception(
'Wrong format of the contentObj, structure checks not passed!')
263 self.
logger.debug(
"Internal format item processing:\n%s", str(contentObj))
266 for tagItem
in contentObj:
267 tagName = tagItem[
'name']
270 ret[tagName] = tagValue
286 ret = item[tagName].
decode(
'string_escape')
289 self.
logger.debug(
"Tag `%s` not found as News format, empty value assumed", tagName)
293 if tag[
'name'] == tagName:
295 if len(tag[
'data']) > 0:
296 ret = tag[
'data'][0].
decode(
'string_escape')
299 self.
logger.debug(
"Tag `%s` not found as internal format, empty value assumed", tagName)
302 self.
logger.debug(
"Format %s not supported", str(responseFormat))
315 def parse(self, inputObject, inputFormat, maxItems=-1, startFrom=0, tags=None):
322 for item
in inputObject[
"itemsList"][0][
"itemObject"]:
323 if maxItems > -1
and i == maxItems:
326 if startFrom > 0
and s < startFrom:
330 if len(item[
"processedContents"]) > 0:
332 contentObj = json.loads(base64.b64decode(item[
"processedContents"][0][
"buffer"]))
333 except Exception, err:
334 self.
logger.
error(
"Error get contentObj or cDate: %s, possible wrong json in buffer:\n%s", str(err),
335 str(item[
"processedContents"][0][
"buffer"]))
340 if inputFormatLocal
is None:
341 self.
logger.info(
"Unsupported item object format or empty list:\n%s", str(contentObj))
355 except (KeyboardInterrupt, SystemExit):
357 except Exception, err:
358 self.
logger.
error(
"Error process item: %s, contentObj:\n%s", str(err), str(contentObj))
359 self.
logger.debug(
"%s", Utils.getTracebackInfo())
372 self.
logger.debug(
"Items detected %s, output: %s", str(len(inputObject[
"itemsList"][0][
"itemObject"])), str(items))
382 if isinstance(contentObj, dict):
385 elif isinstance(contentObj, list)
and len(contentObj) > 0:
403 if jsonString
is not None:
404 ret = json.loads(jsonString)
405 except Exception, err:
406 self.
logger.
error(
"Error pars json: %s\n%s", str(err), jsonString)
419 def process(self, inputBuffer, inputFormat=FROMAT_AUTO, maxItems=-1, startFrom=0, tags=None):
423 self.
logger.debug(
"Processing started, tags: %s", str(tags))
425 inputObject = json.loads(inputBuffer)
427 items = self.
parse(inputObject, inputFormat, maxItems, startFrom, tags)
431 ret = json.dumps(items, indent=2, ensure_ascii=
False)
436 buf += tagName +
'=' + item[tagName] +
',' 437 if buf[:-1].strip() !=
'':
443 buf += tagName +
'=' + item[tagName] +
"\n" 444 if buf[:-1].strip() !=
'':
449 except Exception, err:
450 ExceptionLog.handler(self.
logger, err,
"Error:")