__init__(self) | dc_processor.ProcessorTask.ProcessorTask | |
accumulatedBatchItems | dc_processor.ProcessorTask.ProcessorTask | |
accumulateProcessing | dc_processor.ProcessorTask.ProcessorTask | |
addAdditionalValue(self, buf, name, value) | dc_processor.ProcessorTask.ProcessorTask | |
algorithmsClass | dc_processor.ProcessorTask.ProcessorTask | |
algorithmsModel | dc_processor.ProcessorTask.ProcessorTask | |
algorithmsModule | dc_processor.ProcessorTask.ProcessorTask | |
batchItem | dc_processor.ProcessorTask.ProcessorTask | |
batchSites | dc_processor.ProcessorTask.ProcessorTask | |
config | dc_processor.ProcessorTask.ProcessorTask | |
convertRawContentCharset(self, batchItemDict) | dc_processor.ProcessorTask.ProcessorTask | |
convertTemplateFormat(self, batchItem, batchItemDict) | dc_processor.ProcessorTask.ProcessorTask | |
createUniqueMultiItemsUrl(self, url, counter) | dc_processor.ProcessorTask.ProcessorTask | |
db_task_ini | dc_processor.ProcessorTask.ProcessorTask | |
DBConnector | dc_processor.ProcessorTask.ProcessorTask | |
exit_code | dc_processor.ProcessorTask.ProcessorTask | |
extendBatchItemsWithChain(self, batchItems) | dc_processor.ProcessorTask.ProcessorTask | |
extendProcessorProperties(self, batchItemDict, siteProperties) | dc_processor.ProcessorTask.ProcessorTask | |
extendTemplateFromSource(self, batchItemDict) | dc_processor.ProcessorTask.ProcessorTask | |
filters | dc_processor.ProcessorTask.ProcessorTask | |
filtersApply(self, localValue, wrapper, siteId, fields=None, opCode=Filters.OC_RE, stage=Filters.STAGE_ALL, defaultRet=False) | dc_processor.ProcessorTask.ProcessorTask | |
getExitCode(self) | dc_processor.ProcessorTask.ProcessorTask | |
getProcessedContent(self, template, scraperResponse, errorMask) | dc_processor.ProcessorTask.ProcessorTask | |
getProcessorCmd(self, processorName) | dc_processor.ProcessorTask.ProcessorTask | |
getPropValueFromSiteProperties(self, batchItemDict, propName) | dc_processor.ProcessorTask.ProcessorTask | |
getRawContent(self, siteId, url) | dc_processor.ProcessorTask.ProcessorTask | |
getRawContentFromFS(self, batchItem, batchItemDict) | dc_processor.ProcessorTask.ProcessorTask | |
groupResponses | dc_processor.ProcessorTask.ProcessorTask | |
hashed_content | dc_processor.ProcessorTask.ProcessorTask | |
htmlRecover | dc_processor.ProcessorTask.ProcessorTask | |
input_batch | dc_processor.ProcessorTask.ProcessorTask | |
isAllowedSiteLimits(self, siteObj, accumulatedBatchItems) | dc_processor.ProcessorTask.ProcessorTask | |
isDisabledSite(self, site) | dc_processor.ProcessorTask.ProcessorTask | |
isOverlimitMaxResources(self, site, url) | dc_processor.ProcessorTask.ProcessorTask | |
loadConfig(self) | dc_processor.ProcessorTask.ProcessorTask | |
loadLogConfigFile(self) | dc_processor.ProcessorTask.ProcessorTask | |
loadOptions(self) | dc_processor.ProcessorTask.ProcessorTask | |
loadSite(self, batchItem) | dc_processor.ProcessorTask.ProcessorTask | |
loadSiteProperties(self, site, url, batchItem, batchItemDict) | dc_processor.ProcessorTask.ProcessorTask | |
loadURL(self, batchItem) | dc_processor.ProcessorTask.ProcessorTask | |
localTemplate | dc_processor.ProcessorTask.ProcessorTask | |
logger | dc_processor.ProcessorTask.ProcessorTask | |
mapResponse(self, template, crawlingTime, scraperResponse, processorProperties) | dc_processor.ProcessorTask.ProcessorTask | |
mapResponseAdditionSubstitutes(self, buf, errorMask) | dc_processor.ProcessorTask.ProcessorTask | |
mapResponseProcessedContent(self, template, processedContent, removeTrailingComma, entry, processorProperties) | dc_processor.ProcessorTask.ProcessorTask | |
maxExecutionTimeReached | dc_processor.ProcessorTask.ProcessorTask | |
maxExecutionTimeValue | dc_processor.ProcessorTask.ProcessorTask | |
mergeChains(self, chainElem, batchItem, batchItemDict, delimiter=' ') | dc_processor.ProcessorTask.ProcessorTask | |
normMask | dc_processor.ProcessorTask.ProcessorTask | |
objFilters | dc_processor.ProcessorTask.ProcessorTask | |
parseTemplate(self, batchItem, batchItemDict) | dc_processor.ProcessorTask.ProcessorTask | |
process(self, scraperInputObject, batchItem, batchItemDict) | dc_processor.ProcessorTask.ProcessorTask | |
process_time | dc_processor.ProcessorTask.ProcessorTask | |
processBatch(self) | dc_processor.ProcessorTask.ProcessorTask | |
processBatchItemChainSelectStep(self, batchItem, batchItemDict, chainDict) | dc_processor.ProcessorTask.ProcessorTask | |
processBatchItems(self, inputItems) | dc_processor.ProcessorTask.ProcessorTask | |
processBatchItemScrapyStep(self, batchItem) | dc_processor.ProcessorTask.ProcessorTask | |
processBatchItemTemplateFillStep(self, batchItem, batchItemDict) | dc_processor.ProcessorTask.ProcessorTask | |
processBatchItemTemplateSelectStep(self, batchItem, batchItemDict) | dc_processor.ProcessorTask.ProcessorTask | |
processBatchItemURLContentStep(self, batchItem, batchItemDict) | dc_processor.ProcessorTask.ProcessorTask | |
processContentHash(self, batchItemDict) | dc_processor.ProcessorTask.ProcessorTask | |
processorName | dc_processor.ProcessorTask.ProcessorTask | |
processTask(self, batchItem, batchItemDict, withoutProcess=False) | dc_processor.ProcessorTask.ProcessorTask | |
putContent(self, batchItem, processedContent, batchItemDict) | dc_processor.ProcessorTask.ProcessorTask | |
putRawContentOfType(self, batchItems, rawContentData, contentRequestType) | dc_processor.ProcessorTask.ProcessorTask | |
putRawContentsMultiItems(self, siteId, url, batchItems) | dc_processor.ProcessorTask.ProcessorTask | |
putUrlsMultiItems(self, batchItems) | dc_processor.ProcessorTask.ProcessorTask | |
raw_content | dc_processor.ProcessorTask.ProcessorTask | |
raw_data_dir | dc_processor.ProcessorTask.ProcessorTask | |
readFilters(self, site) | dc_processor.ProcessorTask.ProcessorTask | |
readScraperOutputData(self, batchItem, scraperOutputData, siteObj) | dc_processor.ProcessorTask.ProcessorTask | |
readSiteFromDB(self, batchItem) | dc_processor.ProcessorTask.ProcessorTask | |
reduceResponse(self, processingTamplatesDict, templateSelectType, batchItemDict) | dc_processor.ProcessorTask.ProcessorTask | |
removeTemplateElementsByCondition(self, template, batchItemDict) | dc_processor.ProcessorTask.ProcessorTask | |
removeUnprocessedItems | dc_processor.ProcessorTask.ProcessorTask | |
resolveProcessorNameByContentType(self, urlContentType, batchItemDict) | dc_processor.ProcessorTask.ProcessorTask | |
resortProcessedContentsByMetrics(self, batchItemDict, sortedMetric) | dc_processor.ProcessorTask.ProcessorTask | |
run(self) | dc_processor.ProcessorTask.ProcessorTask | |
scraper_response | dc_processor.ProcessorTask.ProcessorTask | |
setDefaultInternalForChainContents(self, chainDict) | dc_processor.ProcessorTask.ProcessorTask | |
setup(self) | dc_processor.ProcessorTask.ProcessorTask | |
signalHandlerTimer(self, signum, frame) | dc_processor.ProcessorTask.ProcessorTask | |
site_table | dc_processor.ProcessorTask.ProcessorTask | |
sourceTemplateExtractor | dc_processor.ProcessorTask.ProcessorTask | |
stickHashedContents(self, listHashedTags, scraperResponse) | dc_processor.ProcessorTask.ProcessorTask | |
template | dc_processor.ProcessorTask.ProcessorTask | |
templateMetricsCalculate(self, template, scraperResponse) | dc_processor.ProcessorTask.ProcessorTask | |
updateProcessedURL(self, batchItem, batchItemDict) | dc_processor.ProcessorTask.ProcessorTask | |
updateURL(self, batchItem, errorMask=None) | dc_processor.ProcessorTask.ProcessorTask | |
updateURLCharset(self, batchItem, charset) | dc_processor.ProcessorTask.ProcessorTask | |
url | dc_processor.ProcessorTask.ProcessorTask | |
wrapper | dc_processor.ProcessorTask.ProcessorTask | |