|
def | __init__ (self, dbWrapper=None) |
|
def | fetchParentUrl (self, siteId, parentMd5, dbWrapper) |
|
def | resolveRefererHeader (self, headers, mode, url, siteId=None, parentMd5=None, dbWrapper=None) |
|
Definition at line 22 of file RefererHeaderResolver.py.
◆ __init__()
def dc_crawler.RefererHeaderResolver.RefererHeaderResolver.__init__ |
( |
|
self, |
|
|
|
dbWrapper = None |
|
) |
| |
◆ fetchParentUrl()
def dc_crawler.RefererHeaderResolver.RefererHeaderResolver.fetchParentUrl |
( |
|
self, |
|
|
|
siteId, |
|
|
|
parentMd5, |
|
|
|
dbWrapper |
|
) |
| |
Definition at line 40 of file RefererHeaderResolver.py.
40 def fetchParentUrl(self, siteId, parentMd5, dbWrapper):
42 if siteId
is not None and parentMd5
is not None and dbWrapper
is not None:
44 urlStatus.urlType = dc.EventObjects.URLStatus.URL_TYPE_MD5
45 drceSyncTasksCoverObj = DC_CONSTS.DRCESyncTasksCover(DC_CONSTS.EVENT_TYPES.URL_STATUS, [urlStatus])
46 responseDRCESyncTasksCover = dbWrapper.process(drceSyncTasksCoverObj)
47 row = responseDRCESyncTasksCover.eventObject
48 if row
is not None and len(row) > 0
and row[0]
is not None:
◆ resolveRefererHeader()
def dc_crawler.RefererHeaderResolver.RefererHeaderResolver.resolveRefererHeader |
( |
|
self, |
|
|
|
headers, |
|
|
|
mode, |
|
|
|
url, |
|
|
|
siteId = None , |
|
|
|
parentMd5 = None , |
|
|
|
dbWrapper = None |
|
) |
| |
Definition at line 61 of file RefererHeaderResolver.py.
61 def resolveRefererHeader(self, headers, mode, url, siteId=None, parentMd5=None, dbWrapper=None):
64 for headerName
in headers:
65 if headerName.lower() == self.HEADER_NAME.lower():
66 logger.info(
">>> Referer field already in dict headers")
69 if mode == self.MODE_NONE:
71 elif mode == self.MODE_SIMPLE:
72 headers[self.HEADER_NAME] = url
73 elif mode == self.MODE_DOMAIN:
74 headers[self.HEADER_NAME] = Utils.UrlParser.generateDomainUrl(url)
75 elif mode == self.MODE_PARENT:
76 parentUrl = self.fetchParentUrl(siteId, parentMd5, dbWrapper
if dbWrapper
is not None else self.dbWrapper)
77 headers[self.HEADER_NAME] = parentUrl
if parentUrl
is not None else url
◆ dbWrapper
dc_crawler.RefererHeaderResolver.RefererHeaderResolver.dbWrapper |
◆ HEADER_NAME
string dc_crawler.RefererHeaderResolver.RefererHeaderResolver.HEADER_NAME = "Referer" |
|
static |
◆ MODE_DOMAIN
int dc_crawler.RefererHeaderResolver.RefererHeaderResolver.MODE_DOMAIN = 2 |
|
static |
◆ MODE_NONE
int dc_crawler.RefererHeaderResolver.RefererHeaderResolver.MODE_NONE = 0 |
|
static |
◆ MODE_PARENT
int dc_crawler.RefererHeaderResolver.RefererHeaderResolver.MODE_PARENT = 3 |
|
static |
◆ MODE_SIMPLE
int dc_crawler.RefererHeaderResolver.RefererHeaderResolver.MODE_SIMPLE = 1 |
|
static |
The documentation for this class was generated from the following file: