__init__(self, usageModel=APP_CONSTS.APP_USAGE_MODEL_PROCESS, configFile=None, logger=None, inputData=None) | dc_processor.Scraper.Scraper | |
addCustomTag(self, result, tag_name, tag_value) | dc_processor.Scraper.Scraper | |
adjustLinkURL(self, response) | dc_processor.Scraper.Scraper | |
adjustPartialReferences(self, response) | dc_processor.Scraper.Scraper | |
adjustTitle(self, response) | dc_processor.Scraper.Scraper | |
algorithm_name | dc_processor.Scraper.Scraper | |
altTagsMask | dc_processor.Scraper.Scraper | |
applyHTTPRedirectLink(self, siteId, url, properties, response) | dc_processor.Scraper.Scraper | |
applyPostProcessing(self, result, key, postProcessingRE) | dc_processor.Scraper.Scraper | |
applyPubdate(self, response, pubdate) | dc_processor.Scraper.Scraper | |
article | dc_processor.Scraper.Scraper | |
attrConditions | dc_processor.Scraper.Scraper | |
baseUrl | dc_processor.Scraper.Scraper | |
calcUrlDomainCrc(self, url) | dc_processor.Scraper.Scraper | |
checkDOMElement(self, elem) | dc_processor.Scraper.Scraper | |
checkMediaTag(self, urlStringMedia) | dc_processor.Scraper.Scraper | |
commonResultOperations(self, result) | dc_processor.Scraper.Scraper | |
compileResults(self, result, resultsList, key, xPathPreparing=None) | dc_processor.Scraper.Scraper | |
config | dc_processor.Scraper.Scraper | |
configFile | dc_processor.Scraper.Scraper | |
createArticle(self) | dc_processor.Scraper.Scraper | |
createModule(self, module_name) | dc_processor.Scraper.Scraper | |
dataUrlsCanonizator(self, data, baseUrl=None, useAdditionEncoding=False) | dc_processor.Scraper.Scraper | |
datetimeNewsNames | dc_processor.Scraper.Scraper | |
datetimeTemplateTypes | dc_processor.Scraper.Scraper | |
dbWrapper | dc_processor.Scraper.Scraper | |
elemUrlsCanoizator(self, data, baseUrl=None, firstDelim=' ', secondDelim=', useAdditionEncoding=False) | dc_processor.Scraper.Scraper | |
entry | dc_processor.Scraper.Scraper | |
errorMask | dc_processor.Scraper.Scraper | |
exitCode | dc_processor.Scraper.Scraper | |
extractAdditionTagsByScrapy(self, localResult, key, tagsXpaths) | dc_processor.Scraper.Scraper | |
extractBaseUrlRssFeed(self, siteId, url) | dc_processor.Scraper.Scraper | |
extractFeedUrlRssFeed(self, siteId, url) | dc_processor.Scraper.Scraper | |
extractor | dc_processor.Scraper.Scraper | |
extractors | dc_processor.Scraper.Scraper | |
extractPubDate(self, response, dataTagName) | dc_processor.Scraper.Scraper | |
extractPubdateRssFeed(self, siteId, url) | dc_processor.Scraper.Scraper | |
feedParserProcess(self) | dc_processor.Scraper.Scraper | |
formatOutpuElement(self, elem, localOutputFormat) | dc_processor.Scraper.Scraper | |
formatOutputData(self, response, localOutputFormat) | dc_processor.Scraper.Scraper | |
formatTag(self, result, path, key, pathDict, isExtract) | dc_processor.Scraper.Scraper | |
getBestDatatimeData(self, data) | dc_processor.Scraper.Scraper | |
getDomainsForUrlSourcesRules(self, urlSourcesRules) | dc_processor.Scraper.Scraper | |
getExitCode(self) | dc_processor.Scraper.Scraper | |
getExtractorByName(self, extractorName) | dc_processor.Scraper.Scraper | |
getHeaderContent(self, siteId, url) | dc_processor.Scraper.Scraper | |
getNextBestExtractor(self) | dc_processor.Scraper.Scraper | |
getProcessedContent(self, result) | dc_processor.Scraper.Scraper | |
getTemplate(self, explicit=True) | dc_processor.Scraper.Scraper | |
getVariableFromHeaderContent(self, headerContent, name, makeDecode=True) | dc_processor.Scraper.Scraper | |
input_data | dc_processor.Scraper.Scraper | |
itr | dc_processor.Scraper.Scraper | |
loadConfig(self) | dc_processor.Scraper.Scraper | |
loadExtractors(self) | dc_processor.Scraper.Scraper | |
loadLogConfigFile(self) | dc_processor.Scraper.Scraper | |
loadOptions(self) | dc_processor.Scraper.Scraper | |
loadScraperProperties(self) | dc_processor.Scraper.Scraper | |
logger | dc_processor.Scraper.Scraper | |
mediaLimitsHandler | dc_processor.Scraper.Scraper | |
message_queue | dc_processor.Scraper.Scraper | |
metrics | dc_processor.Scraper.Scraper | |
MSG_ERROR_WRONG_CONFIG_FILE_NAME | dc_processor.Scraper.Scraper | static |
newsExtraction(self) | dc_processor.Scraper.Scraper | |
normalizeAuthor(self, confProp, procProp, response) | dc_processor.Scraper.Scraper | |
normalizeDatetime(self, response, algorithmName) | dc_processor.Scraper.Scraper | |
output_data | dc_processor.Scraper.Scraper | |
outputFormat | dc_processor.Scraper.Scraper | |
parseFeed(self) | dc_processor.Scraper.Scraper | |
postprocessing(self, result, rule, tag) | dc_processor.Scraper.Scraper | |
prepareResults(self, resultsList) | dc_processor.Scraper.Scraper | |
preparseResponse(self, response) | dc_processor.Scraper.Scraper | |
process(self, config) | dc_processor.Scraper.Scraper | |
processBatch(self) | dc_processor.Scraper.Scraper | |
processedContent | dc_processor.Scraper.Scraper | |
processingHTMLData(self, htmlBuf, bufFormat) | dc_processor.Scraper.Scraper | |
properties | dc_processor.Scraper.Scraper | |
pubdate | dc_processor.Scraper.Scraper | |
pubdateMonthOrder(self, rawPubdate, properties, urlString) | dc_processor.Scraper.Scraper | |
pubdateTransform(self, rawPubdate, rawTimezone, properties, urlString) | dc_processor.Scraper.Scraper | |
refineBadDateTags(self, response) | dc_processor.Scraper.Scraper | |
refineCommonText(self, tagName, result) | dc_processor.Scraper.Scraper | |
replaceLoopValue(self, buf, replaceFrom, replaceTo) | dc_processor.Scraper.Scraper | |
run(self) | dc_processor.Scraper.Scraper | |
scraperPropFileName | dc_processor.Scraper.Scraper | |
setup(self) | dc_processor.Scraper.Scraper | |
splitMediaTagString(self, urlStringMedia) | dc_processor.Scraper.Scraper | |
sqliteTimeout | dc_processor.Scraper.Scraper | |
tagReduceMask | dc_processor.Scraper.Scraper | |
tagsCount | dc_processor.Scraper.Scraper | |
tagsMask | dc_processor.Scraper.Scraper | |
tagsTypes | dc_processor.Scraper.Scraper | |
templateExtraction(self, config, urlHost) | dc_processor.Scraper.Scraper | |
urlHost | dc_processor.Scraper.Scraper | |
urlSourcesRules | dc_processor.Scraper.Scraper | |
usageModel | dc_processor.Scraper.Scraper | |
useCurrentYear | dc_processor.Scraper.Scraper | |
WWW_PREFIX | dc_processor.Scraper.Scraper | static |
xpathSplitString | dc_processor.Scraper.Scraper | |