Definition at line 25 of file LinkResolver.py.
◆ __init__()
def dc_postprocessor.LinkResolver.LinkResolver.__init__ |
( |
|
self, |
|
|
|
getConfigOption = None , |
|
|
|
log = None |
|
) |
| |
Definition at line 51 of file LinkResolver.py.
51 def __init__(self, getConfigOption=None, log=None):
52 PostProcessingModuleClass.__init__(self, getConfigOption, log)
54 self.method = self.DEFAULT_VALUE_METHOD
55 self.delimiter = self.DEFAULT_VALUE_DELIMITER
57 self.siteProperty =
None def __init__(self)
constructor
◆ __readHeaderFile()
def dc_postprocessor.LinkResolver.LinkResolver.__readHeaderFile |
( |
|
self, |
|
|
|
fileName |
|
) |
| |
|
private |
Definition at line 93 of file LinkResolver.py.
93 def __readHeaderFile(self, fileName):
96 with open(fileName,
'r') as f: 97 for header
in ''.
join(f.readlines()).splitlines():
101 key, value = header[:header.index(
':')].strip(), header[header.index(
':') + len(
':'):].strip()
102 except Exception, err:
103 self.logger.
error(self.ERROR_MSG_READ_HEADER, str(fileName), str(err), header)
◆ init()
def dc_postprocessor.LinkResolver.LinkResolver.init |
( |
|
self | ) |
|
Definition at line 64 of file LinkResolver.py.
65 if self.getConfigOption
is None:
66 raise Exception(self.ERROR_MSG_INITIALIZATION_CALLBACK)
68 if self.logger
is None:
69 raise Exception(self.ERROR_MSG_INITIALIZATION_LOGGER)
71 self.method = self.getConfigOption(sectionName=self.__class__.__name__,
72 optionName=self.CONFIG_OPTION_METHOD,
73 defaultValue=self.DEFAULT_VALUE_METHOD)
75 self.delimiter = self.getConfigOption(sectionName=self.__class__.__name__,
76 optionName=self.CONFIG_OPTION_DELIMITER,
77 defaultValue=self.DEFAULT_VALUE_DELIMITER)
79 if self.delimiter ==
"":
80 self.delimiter = self.DEFAULT_VALUE_DELIMITER
82 self.headers = self.__readHeaderFile(self.getConfigOption(sectionName=self.__class__.__name__,
83 optionName=self.CONFIG_OPTION_HEADER_FILE))
◆ processBatchItem()
def dc_postprocessor.LinkResolver.LinkResolver.processBatchItem |
( |
|
self, |
|
|
|
batchItem |
|
) |
| |
Definition at line 153 of file LinkResolver.py.
153 def processBatchItem(self, batchItem):
155 if self.LINK_RESOLVE_PROPERTY_NAME
in batchItem.properties:
156 self.siteProperty = batchItem.properties[self.LINK_RESOLVE_PROPERTY_NAME]
157 self.logger.debug(
"!!! self.siteProperty: %s, type: %s", str(self.siteProperty), str(
type(self.siteProperty)))
159 if batchItem.urlContentResponse
is not None and isinstance(batchItem.urlContentResponse.processedContents, list):
160 for index
in xrange(len(batchItem.urlContentResponse.processedContents)):
161 if isinstance(batchItem.urlContentResponse.processedContents[index], basestring)
and \
162 batchItem.urlContentResponse.processedContents[index] !=
"":
164 processedContent = json.loads(base64.b64decode(batchItem.urlContentResponse.processedContents[index]))
167 if self.LINK_FIELD_NAME
in processedContent:
168 links = processedContent[self.LINK_FIELD_NAME].split(self.delimiter)
171 rlinks.append(self.resolve(link))
173 processedContent[self.LINK_FIELD_NAME] = self.delimiter.
join(rlinks)
175 batchItem.urlContentResponse.processedContents[index] = base64.b64encode(json.dumps(processedContent))
◆ resolve()
def dc_postprocessor.LinkResolver.LinkResolver.resolve |
( |
|
self, |
|
|
|
url |
|
) |
| |
Definition at line 115 of file LinkResolver.py.
115 def resolve(self, url):
121 if self.PROPERTY_NAME_METHOD
in self.siteProperty:
122 methods = self.siteProperty[self.PROPERTY_NAME_METHOD]
123 for pattern, value
in methods.items():
124 if re.search(pattern, url, re.I + re.U)
is not None:
128 self.logger.debug(
"Apply method: '%s' for %s", str(method), str(url))
130 req = requests.Request(method=method, url=url, headers=self.headers)
132 s = requests.Session()
133 res = s.send(r, allow_redirects=
True)
134 ret = res.request.url
136 if res.content !=
"":
137 match = re.search(self.SEARCH_PATTERN, res.content, re.I + re.U)
138 if match
is not None:
141 except requests.exceptions.RequestException, err:
142 self.logger.
error(self.ERROR_MSG_RESOLVE__REDIRECT_URL, str(err))
143 except Exception, err:
144 self.logger.
error(self.ERROR_MSG_RESOLVE__REDIRECT_URL, str(err))
◆ CONFIG_OPTION_DELIMITER
string dc_postprocessor.LinkResolver.LinkResolver.CONFIG_OPTION_DELIMITER = 'delimiter' |
|
static |
◆ CONFIG_OPTION_HEADER_FILE
string dc_postprocessor.LinkResolver.LinkResolver.CONFIG_OPTION_HEADER_FILE = 'headers_file' |
|
static |
◆ CONFIG_OPTION_METHOD
string dc_postprocessor.LinkResolver.LinkResolver.CONFIG_OPTION_METHOD = 'method' |
|
static |
◆ DEFAULT_VALUE_DELIMITER
string dc_postprocessor.LinkResolver.LinkResolver.DEFAULT_VALUE_DELIMITER = ',' |
|
static |
◆ DEFAULT_VALUE_METHOD
string dc_postprocessor.LinkResolver.LinkResolver.DEFAULT_VALUE_METHOD = 'HEAD' |
|
static |
◆ delimiter
dc_postprocessor.LinkResolver.LinkResolver.delimiter |
◆ ERROR_MSG_INITIALIZATION_CALLBACK
string dc_postprocessor.LinkResolver.LinkResolver.ERROR_MSG_INITIALIZATION_CALLBACK = "Error initialization of callback function for get config options." |
|
static |
◆ ERROR_MSG_INITIALIZATION_LOGGER
string dc_postprocessor.LinkResolver.LinkResolver.ERROR_MSG_INITIALIZATION_LOGGER = "Error initialization of self.logger." |
|
static |
◆ ERROR_MSG_READ_HEADER
string dc_postprocessor.LinkResolver.LinkResolver.ERROR_MSG_READ_HEADER = "Error read header file. File: '%s', error: '%s', line: '%s'" |
|
static |
◆ ERROR_MSG_RESOLVE__REDIRECT_URL
string dc_postprocessor.LinkResolver.LinkResolver.ERROR_MSG_RESOLVE__REDIRECT_URL = "Resolve redirect url failed. Error: %s" |
|
static |
◆ headers
dc_postprocessor.LinkResolver.LinkResolver.headers |
◆ LINK_FIELD_NAME
string dc_postprocessor.LinkResolver.LinkResolver.LINK_FIELD_NAME = 'link' |
|
static |
◆ LINK_RESOLVE_PROPERTY_NAME
string dc_postprocessor.LinkResolver.LinkResolver.LINK_RESOLVE_PROPERTY_NAME = 'LINK_RESOLVE' |
|
static |
◆ method
dc_postprocessor.LinkResolver.LinkResolver.method |
◆ PROPERTY_NAME_METHOD
string dc_postprocessor.LinkResolver.LinkResolver.PROPERTY_NAME_METHOD = 'method' |
|
static |
◆ SEARCH_PATTERN
string dc_postprocessor.LinkResolver.LinkResolver.SEARCH_PATTERN = 'redirect_url\".*href=\"(.*)\">' |
|
static |
◆ siteProperty
dc_postprocessor.LinkResolver.LinkResolver.siteProperty |
The documentation for this class was generated from the following file: