HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
dc_crawler.URLProcess.URLProcess Member List

This is the complete list of members for dc_crawler.URLProcess.URLProcess, including all inherited members.

__init__(self, protocols=None)dc_crawler.URLProcess.URLProcess
additionalUrlObjInit(urlObj, urlInitParam, conditionalData)dc_crawler.URLProcess.URLProcessstatic
addURLFromBatchToDB(self, batchItem, crawlerType, recrawlPeriod, autoRemoveProps)dc_crawler.URLProcess.URLProcess
autoRemoveURL(autoRemoveProps, recrawlPeriod, urlTable, wrapper)dc_crawler.URLProcess.URLProcessstatic
checkDictEmptyStrings(inDict, keys)dc_crawler.URLProcess.URLProcessstatic
checkFieldsIsNone(self, checkList)dc_crawler.URLProcess.URLProcess
checkUrlByPath(self, url)dc_crawler.URLProcess.URLProcess
checkUrlByProtocol(self, url)dc_crawler.URLProcess.URLProcess
conditionEvaluate(condition, conditionalData)dc_crawler.URLProcess.URLProcessstatic
createUrlObjForChain(self, pattern, urlMd5, formMethods, parentMd5, depth, detectedMime, maxURLsFromPage)dc_crawler.URLProcess.URLProcess
createUrlObjForCollectURLs(self, urlMd5, formMethods, parentMd5, depth, detectedMime, maxURLsFromPage)dc_crawler.URLProcess.URLProcess
dbWrapperdc_crawler.URLProcess.URLProcess
DC_URLS_TABLE_PREFIXdc_crawler.URLProcess.URLProcessstatic
DEFAULT_PROTOCOLSdc_crawler.URLProcess.URLProcessstatic
DETECT_MIME_TIMEOUTdc_crawler.URLProcess.URLProcessstatic
detectUrlMime(self, contentTypeMap=None, urlObj=None)dc_crawler.URLProcess.URLProcess
fillRssFieldInUrlObj(self, oldUrl, objectUrlUlr, batchItem, processorName, feed, rootFeed=False)dc_crawler.URLProcess.URLProcess
fillRssFieldOneElem(self, entry, urlObj, batchItem, status, crawled, localType)dc_crawler.URLProcess.URLProcess
getDepthFromUrl(self, urlMd5)dc_crawler.URLProcess.URLProcess
getRealUrl(self)dc_crawler.URLProcess.URLProcess
isUpdateCollectiondc_crawler.URLProcess.URLProcess
isUrlExist(self, recrawlPeriod, urlMd5)dc_crawler.URLProcess.URLProcess
normMaskdc_crawler.URLProcess.URLProcess
PATTERN_WITH_PROTOCOLdc_crawler.URLProcess.URLProcessstatic
processURL(self, realUrl, internalLinks, externalLinks, filtersApply=None, siteFilters=None, baseUrl=None)dc_crawler.URLProcess.URLProcess
PROTOCOL_PREFIXdc_crawler.URLProcess.URLProcessstatic
protocolsListdc_crawler.URLProcess.URLProcess
readCurrentCnt(self, maxURLs)dc_crawler.URLProcess.URLProcess
recrawlUrlUpdateHandler(self, dbWrapper, recrawlUrlUpdateProperty, urlUpdateObj)dc_crawler.URLProcess.URLProcess
resetErrorMask(self, batchItem)dc_crawler.URLProcess.URLProcess
resolveHTTP(self, postForms, headersDict)dc_crawler.URLProcess.URLProcess
resolveTableName(self, localSiteId)dc_crawler.URLProcess.URLProcess
setProtocols(self, protocols=None)dc_crawler.URLProcess.URLProcess
simpleURLCanonize(self, realUrl)dc_crawler.URLProcess.URLProcess
sitedc_crawler.URLProcess.URLProcess
siteIddc_crawler.URLProcess.URLProcess
sitePropertiesdc_crawler.URLProcess.URLProcess
updateAdditionProps(self, internalLinksCount, externalLinksCount, batchItem, size, freq, contentMd5)dc_crawler.URLProcess.URLProcess
updateCollectTimeAndMime(self, detectedMime, batchItem, crawledTime, autoDetectMime, httpHeaders=None, strContent=None)dc_crawler.URLProcess.URLProcess
updateCrawledURL(self, crawledResource, batchItem, contentSize, status=dc.EventObjects.URL.STATUS_CRAWLED)dc_crawler.URLProcess.URLProcess
updateTypeForURLObjects(self, urlObjects, typeArg=dc.EventObjects.URL.TYPE_CHAIN)dc_crawler.URLProcess.URLProcess
updateURL(self, batchItem, batchId, status=dc.EventObjects.URL.STATUS_CRAWLING)dc_crawler.URLProcess.URLProcess
updateURLFields(self, urlMd5, wrapper, siteId)dc_crawler.URLProcess.URLProcess
updateURLForFailed(self, errorBit, batchItem, httpCode=CONSTS.HTTP_CODE_400, status=dc.EventObjects.URL.STATUS_CRAWLED, updateUdate=True)dc_crawler.URLProcess.URLProcess
updateURLStatus(self, urlId, status=dc.EventObjects.URL.STATUS_CRAWLED)dc_crawler.URLProcess.URLProcess
urldc_crawler.URLProcess.URLProcess
URL_TEMPLATE_CONSTdc_crawler.URLProcess.URLProcessstatic
urlDBSync(self, batchItem, crawlerType, recrawlPeriod, autoRemoveProps)dc_crawler.URLProcess.URLProcess
urlObjdc_crawler.URLProcess.URLProcess
urlTabledc_crawler.URLProcess.URLProcess
urlTemplateApply(self, url, crawlerType, urlTempalteRegular, urlTempalteRealtime, urlTempalteRegularEncode, urlTempalteRealtimeEncode)dc_crawler.URLProcess.URLProcess