HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
ScraperCustomJson.py File Reference

Go to the source code of this file.

Classes

class  dc_processor.ScraperCustomJson.ScraperCustomJson
 
class  dc_processor.ScraperCustomJson.Meta
 

Namespaces

 dc_processor.ScraperCustomJson
 

Functions

def dc_processor.ScraperCustomJson.__init__ (self, usageModel=APP_CONSTS.APP_USAGE_MODEL_PROCESS, configFile=None, logger=None, inputData=None)
 
def dc_processor.ScraperCustomJson.setup (self)
 
def dc_processor.ScraperCustomJson.run (self)
 
def dc_processor.ScraperCustomJson.loadConfig (self)
 
def dc_processor.ScraperCustomJson.loadLogConfigFile (self)
 
def dc_processor.ScraperCustomJson.loadOptions (self)
 
def dc_processor.ScraperCustomJson.loadScraperProperties (self)
 
def dc_processor.ScraperCustomJson.processBatch (self)
 
def dc_processor.ScraperCustomJson.loadExtractors (self)
 
def dc_processor.ScraperCustomJson.createModule (self, module_name)
 
def dc_processor.ScraperCustomJson.getNextBestExtractor (self)
 
def dc_processor.ScraperCustomJson.resourceExtraction (self, jsonElem)
 
def dc_processor.ScraperCustomJson.formatOutpuElement (self, elem, localOutputFormat)
 
def dc_processor.ScraperCustomJson.formatOutputData (self, response, localOutputFormat)
 
def dc_processor.ScraperCustomJson.jsonParserExtractor (self, jsonElem)
 
def dc_processor.ScraperCustomJson.getProcessedContent (self, result)
 
def dc_processor.ScraperCustomJson.fillScraperResponse (self, jsonElem)
 
def dc_processor.ScraperCustomJson.generateEmptyResponse (self)
 
def dc_processor.ScraperCustomJson.jsonParserProcess (self)
 
def dc_processor.ScraperCustomJson.getExitCode (self)
 

Variables

int dc_processor.ScraperCustomJson.ERROR_OK = 0
 
int dc_processor.ScraperCustomJson.EXIT_SUCCESS = 0
 
int dc_processor.ScraperCustomJson.EXIT_FAILURE = 1
 
string dc_processor.ScraperCustomJson.MSG_ERROR_LOAD_EXTRACTORS = "Error load extractors "
 
string dc_processor.ScraperCustomJson.ENV_SCRAPER_STORE_PATH = "ENV_SCRAPER_STORE_PATH"
 
list dc_processor.ScraperCustomJson.TAGS_DATETIME_NEWS_NAMES = [CONSTS.TAG_PUB_DATE, CONSTS.TAG_DC_DATE]
 
string dc_processor.ScraperCustomJson.MSG_ERROR_WRONG_CONFIG_FILE_NAME = "Config file name is wrong"
 
list dc_processor.ScraperCustomJson.TAGS_DATETIME_TEMPLATE_TYPES = [CONSTS.TAG_TYPE_DATETIME]
 
string dc_processor.ScraperCustomJson.OPTION_SECTION_DATETIME_TEMPLATE_TYPES = 'tags_datetime_template_types'
 
 dc_processor.ScraperCustomJson.exitCode
 
 dc_processor.ScraperCustomJson.usageModel
 
 dc_processor.ScraperCustomJson.configFile
 
 dc_processor.ScraperCustomJson.logger
 
 dc_processor.ScraperCustomJson.input_data
 
 dc_processor.ScraperCustomJson.properties
 
 dc_processor.ScraperCustomJson.extractor
 
 dc_processor.ScraperCustomJson.extractors
 
 dc_processor.ScraperCustomJson.itr
 
 dc_processor.ScraperCustomJson.pubdate
 
 dc_processor.ScraperCustomJson.timezone
 
 dc_processor.ScraperCustomJson.errorMask
 
 dc_processor.ScraperCustomJson.scraperPropFileName
 
 dc_processor.ScraperCustomJson.algorithm_name
 
 dc_processor.ScraperCustomJson.scraperResponses
 
 dc_processor.ScraperCustomJson.tagsCount
 
 dc_processor.ScraperCustomJson.tagsMask
 
 dc_processor.ScraperCustomJson.processedContent
 
 dc_processor.ScraperCustomJson.outputFormat
 
 dc_processor.ScraperCustomJson.metrics
 
 dc_processor.ScraperCustomJson.altTagsMask
 
 dc_processor.ScraperCustomJson.urlHost
 
 dc_processor.ScraperCustomJson.output_data
 
 dc_processor.ScraperCustomJson.dbWrapper
 
 dc_processor.ScraperCustomJson.datetimeTemplateTypes
 
 dc_processor.ScraperCustomJson.useCurrentYear
 
 dc_processor.ScraperCustomJson.config