HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
dc_processor.Scraper.Scraper Member List

This is the complete list of members for dc_processor.Scraper.Scraper, including all inherited members.

__init__(self, usageModel=APP_CONSTS.APP_USAGE_MODEL_PROCESS, configFile=None, logger=None, inputData=None)dc_processor.Scraper.Scraper
addCustomTag(self, result, tag_name, tag_value)dc_processor.Scraper.Scraper
adjustLinkURL(self, response)dc_processor.Scraper.Scraper
adjustPartialReferences(self, response)dc_processor.Scraper.Scraper
adjustTitle(self, response)dc_processor.Scraper.Scraper
algorithm_namedc_processor.Scraper.Scraper
altTagsMaskdc_processor.Scraper.Scraper
applyHTTPRedirectLink(self, siteId, url, properties, response)dc_processor.Scraper.Scraper
applyPostProcessing(self, result, key, postProcessingRE)dc_processor.Scraper.Scraper
applyPubdate(self, response, pubdate)dc_processor.Scraper.Scraper
articledc_processor.Scraper.Scraper
attrConditionsdc_processor.Scraper.Scraper
baseUrldc_processor.Scraper.Scraper
calcUrlDomainCrc(self, url)dc_processor.Scraper.Scraper
checkDOMElement(self, elem)dc_processor.Scraper.Scraper
checkMediaTag(self, urlStringMedia)dc_processor.Scraper.Scraper
commonResultOperations(self, result)dc_processor.Scraper.Scraper
compileResults(self, result, resultsList, key, xPathPreparing=None)dc_processor.Scraper.Scraper
configdc_processor.Scraper.Scraper
configFiledc_processor.Scraper.Scraper
createArticle(self)dc_processor.Scraper.Scraper
createModule(self, module_name)dc_processor.Scraper.Scraper
dataUrlsCanonizator(self, data, baseUrl=None, useAdditionEncoding=False)dc_processor.Scraper.Scraper
datetimeNewsNamesdc_processor.Scraper.Scraper
datetimeTemplateTypesdc_processor.Scraper.Scraper
dbWrapperdc_processor.Scraper.Scraper
elemUrlsCanoizator(self, data, baseUrl=None, firstDelim=' ', secondDelim=', useAdditionEncoding=False)dc_processor.Scraper.Scraper
entrydc_processor.Scraper.Scraper
errorMaskdc_processor.Scraper.Scraper
exitCodedc_processor.Scraper.Scraper
extractAdditionTagsByScrapy(self, localResult, key, tagsXpaths)dc_processor.Scraper.Scraper
extractBaseUrlRssFeed(self, siteId, url)dc_processor.Scraper.Scraper
extractFeedUrlRssFeed(self, siteId, url)dc_processor.Scraper.Scraper
extractordc_processor.Scraper.Scraper
extractorsdc_processor.Scraper.Scraper
extractPubDate(self, response, dataTagName)dc_processor.Scraper.Scraper
extractPubdateRssFeed(self, siteId, url)dc_processor.Scraper.Scraper
feedParserProcess(self)dc_processor.Scraper.Scraper
formatOutpuElement(self, elem, localOutputFormat)dc_processor.Scraper.Scraper
formatOutputData(self, response, localOutputFormat)dc_processor.Scraper.Scraper
formatTag(self, result, path, key, pathDict, isExtract)dc_processor.Scraper.Scraper
getBestDatatimeData(self, data)dc_processor.Scraper.Scraper
getDomainsForUrlSourcesRules(self, urlSourcesRules)dc_processor.Scraper.Scraper
getExitCode(self)dc_processor.Scraper.Scraper
getExtractorByName(self, extractorName)dc_processor.Scraper.Scraper
getHeaderContent(self, siteId, url)dc_processor.Scraper.Scraper
getNextBestExtractor(self)dc_processor.Scraper.Scraper
getProcessedContent(self, result)dc_processor.Scraper.Scraper
getTemplate(self, explicit=True)dc_processor.Scraper.Scraper
getVariableFromHeaderContent(self, headerContent, name, makeDecode=True)dc_processor.Scraper.Scraper
input_datadc_processor.Scraper.Scraper
itrdc_processor.Scraper.Scraper
loadConfig(self)dc_processor.Scraper.Scraper
loadExtractors(self)dc_processor.Scraper.Scraper
loadLogConfigFile(self)dc_processor.Scraper.Scraper
loadOptions(self)dc_processor.Scraper.Scraper
loadScraperProperties(self)dc_processor.Scraper.Scraper
loggerdc_processor.Scraper.Scraper
mediaLimitsHandlerdc_processor.Scraper.Scraper
message_queuedc_processor.Scraper.Scraper
metricsdc_processor.Scraper.Scraper
MSG_ERROR_WRONG_CONFIG_FILE_NAMEdc_processor.Scraper.Scraperstatic
newsExtraction(self)dc_processor.Scraper.Scraper
normalizeAuthor(self, confProp, procProp, response)dc_processor.Scraper.Scraper
normalizeDatetime(self, response, algorithmName)dc_processor.Scraper.Scraper
output_datadc_processor.Scraper.Scraper
outputFormatdc_processor.Scraper.Scraper
parseFeed(self)dc_processor.Scraper.Scraper
postprocessing(self, result, rule, tag)dc_processor.Scraper.Scraper
prepareResults(self, resultsList)dc_processor.Scraper.Scraper
preparseResponse(self, response)dc_processor.Scraper.Scraper
process(self, config)dc_processor.Scraper.Scraper
processBatch(self)dc_processor.Scraper.Scraper
processedContentdc_processor.Scraper.Scraper
processingHTMLData(self, htmlBuf, bufFormat)dc_processor.Scraper.Scraper
propertiesdc_processor.Scraper.Scraper
pubdatedc_processor.Scraper.Scraper
pubdateMonthOrder(self, rawPubdate, properties, urlString)dc_processor.Scraper.Scraper
pubdateTransform(self, rawPubdate, rawTimezone, properties, urlString)dc_processor.Scraper.Scraper
refineBadDateTags(self, response)dc_processor.Scraper.Scraper
refineCommonText(self, tagName, result)dc_processor.Scraper.Scraper
replaceLoopValue(self, buf, replaceFrom, replaceTo)dc_processor.Scraper.Scraper
run(self)dc_processor.Scraper.Scraper
scraperPropFileNamedc_processor.Scraper.Scraper
setup(self)dc_processor.Scraper.Scraper
splitMediaTagString(self, urlStringMedia)dc_processor.Scraper.Scraper
sqliteTimeoutdc_processor.Scraper.Scraper
tagReduceMaskdc_processor.Scraper.Scraper
tagsCountdc_processor.Scraper.Scraper
tagsMaskdc_processor.Scraper.Scraper
tagsTypesdc_processor.Scraper.Scraper
templateExtraction(self, config, urlHost)dc_processor.Scraper.Scraper
urlHostdc_processor.Scraper.Scraper
urlSourcesRulesdc_processor.Scraper.Scraper
usageModeldc_processor.Scraper.Scraper
useCurrentYeardc_processor.Scraper.Scraper
WWW_PREFIXdc_processor.Scraper.Scraperstatic
xpathSplitStringdc_processor.Scraper.Scraper