__init__(self, isAbortedByTTL=None) | dc_crawler.CollectURLs.CollectURLs | |
_normalize_attributes(self, kv) | dc_crawler.CollectURLs.CollectURLs | private |
autoDetectMime | dc_crawler.CollectURLs.CollectURLs | |
autoRemoveProps | dc_crawler.CollectURLs.CollectURLs | |
baseUrl | dc_crawler.CollectURLs.CollectURLs | |
batchItem | dc_crawler.CollectURLs.CollectURLs | |
BINARY_CONTENT_TYPE_PATTERN | dc_crawler.CollectURLs.CollectURLs | static |
checkFieldsIsNone(self) | dc_crawler.CollectURLs.CollectURLs | |
COLLECT_POST_DATA | dc_crawler.CollectURLs.CollectURLs | static |
COLLECT_POST_DATA_NAME | dc_crawler.CollectURLs.CollectURLs | static |
crawledResource | dc_crawler.CollectURLs.CollectURLs | |
dbWrapper | dc_crawler.CollectURLs.CollectURLs | |
DC_URLS_TABLE_PREFIX | dc_crawler.CollectURLs.CollectURLs | static |
DETECT_MIME_COLLECTED_URL | dc_crawler.CollectURLs.CollectURLs | static |
DETECT_MIME_MAIN_CONTENT | dc_crawler.CollectURLs.CollectURLs | static |
DETECT_MIME_TIMEOUT | dc_crawler.CollectURLs.CollectURLs | static |
dom | dc_crawler.CollectURLs.CollectURLs | |
evaluateDateMacro(self, localPattern, dateFromat) | dc_crawler.CollectURLs.CollectURLs | |
extractFormURL(self, dom, siteProperties) | dc_crawler.CollectURLs.CollectURLs | |
feed | dc_crawler.CollectURLs.CollectURLs | |
feedElementsProcessing(self, urlMd5, httpCode, elemUrl, localSiteId, localUrlObj, localUrl, params, maxURLsFromPage, rootFeed=False) | dc_crawler.CollectURLs.CollectURLs | |
feedItems | dc_crawler.CollectURLs.CollectURLs | |
feedparserParseDateFixes(self, aDateString) | dc_crawler.CollectURLs.CollectURLs | |
filtersApply(inputFilters, subject, depth, wrapper, siteId, fields=None, opCode=Filters.OC_RE, stage=Filters.STAGE_COLLECT_URLS, selectSubject=None, defaultValue=False) | dc_crawler.CollectURLs.CollectURLs | static |
getFieldParams(self, formFields, postForms, siteId) | dc_crawler.CollectURLs.CollectURLs | |
insertNewSiteProperties(self, params, wrapper, siteId) | dc_crawler.CollectURLs.CollectURLs | |
isAbortedByTTL | dc_crawler.CollectURLs.CollectURLs | |
PATTERN_WITH_PROTOCOL | dc_crawler.CollectURLs.CollectURLs | static |
postForms | dc_crawler.CollectURLs.CollectURLs | |
process(self, httpCode, readOnly=False, httpApplyHeaders=None, proxyName=None) | dc_crawler.CollectURLs.CollectURLs | |
processContentTypes | dc_crawler.CollectURLs.CollectURLs | |
processorName | dc_crawler.CollectURLs.CollectURLs | |
processProcessor(self, urlSet, dom, urlXpathList, urlObj) | dc_crawler.CollectURLs.CollectURLs | |
realUrl | dc_crawler.CollectURLs.CollectURLs | |
robotsParser | dc_crawler.CollectURLs.CollectURLs | |
site | dc_crawler.CollectURLs.CollectURLs | |
siteProperties | dc_crawler.CollectURLs.CollectURLs | |
url | dc_crawler.CollectURLs.CollectURLs | |
urlProcess | dc_crawler.CollectURLs.CollectURLs | |
urlsXpathList | dc_crawler.CollectURLs.CollectURLs | |
urlXpathList | dc_crawler.CollectURLs.CollectURLs | |