4 HCE project, Python bindings, Distributed Tasks Manager application. 5 LinkResolver is a module class and has a main functional for link resolve. 7 @package: dc_postprocessor 9 @author Alexander Vybornyh <alexander.hce.cluster@gmail.com> 10 @link: http://hierarchical-cluster-engine.com/ 11 @copyright: Copyright © 2013-2017 IOIX Ukraine 12 @license: http://hierarchical-cluster-engine.com/license/ 20 import requests.exceptions
28 LINK_RESOLVE_PROPERTY_NAME =
'LINK_RESOLVE' 31 CONFIG_OPTION_METHOD =
'method' 32 CONFIG_OPTION_DELIMITER =
'delimiter' 33 CONFIG_OPTION_HEADER_FILE =
'headers_file' 35 PROPERTY_NAME_METHOD =
'method' 37 LINK_FIELD_NAME =
'link' 38 SEARCH_PATTERN =
'redirect_url\".*href=\"(.*)\">' 41 DEFAULT_VALUE_METHOD =
'HEAD' 42 DEFAULT_VALUE_DELIMITER =
',' 45 ERROR_MSG_INITIALIZATION_CALLBACK =
"Error initialization of callback function for get config options." 46 ERROR_MSG_INITIALIZATION_LOGGER =
"Error initialization of self.logger." 47 ERROR_MSG_RESOLVE__REDIRECT_URL =
"Resolve redirect url failed. Error: %s" 48 ERROR_MSG_READ_HEADER =
"Error read header file. File: '%s', error: '%s', line: '%s'" 51 def __init__(self, getConfigOption=None, log=None):
52 PostProcessingModuleClass.__init__(self, getConfigOption, log)
96 with open(fileName,
'r') as f: 97 for header
in ''.
join(f.readlines()).splitlines():
101 key, value = header[:header.index(
':')].strip(), header[header.index(
':') + len(
':'):].strip()
102 except Exception, err:
123 for pattern, value
in methods.items():
124 if re.search(pattern, url, re.I + re.U)
is not None:
128 self.
logger.debug(
"Apply method: '%s' for %s", str(method), str(url))
130 req = requests.Request(method=method, url=url, headers=self.
headers)
132 s = requests.Session()
133 res = s.send(r, allow_redirects=
True)
134 ret = res.request.url
136 if res.content !=
"":
138 if match
is not None:
141 except requests.exceptions.RequestException, err:
143 except Exception, err:
159 if batchItem.urlContentResponse
is not None and isinstance(batchItem.urlContentResponse.processedContents, list):
160 for index
in xrange(len(batchItem.urlContentResponse.processedContents)):
161 if isinstance(batchItem.urlContentResponse.processedContents[index], basestring)
and \
162 batchItem.urlContentResponse.processedContents[index] !=
"":
164 processedContent = json.loads(base64.b64decode(batchItem.urlContentResponse.processedContents[index]))
171 rlinks.append(self.
resolve(link))
175 batchItem.urlContentResponse.processedContents[index] = base64.b64encode(json.dumps(processedContent))
string PROPERTY_NAME_METHOD
string CONFIG_OPTION_METHOD
string ERROR_MSG_READ_HEADER
def __init__(self, getConfigOption=None, log=None)
string ERROR_MSG_RESOLVE__REDIRECT_URL
def processBatchItem(self, batchItem)
string CONFIG_OPTION_HEADER_FILE
string CONFIG_OPTION_DELIMITER
string ERROR_MSG_INITIALIZATION_CALLBACK
string ERROR_MSG_INITIALIZATION_LOGGER
string LINK_RESOLVE_PROPERTY_NAME
string DEFAULT_VALUE_METHOD
def __readHeaderFile(self, fileName)
string DEFAULT_VALUE_DELIMITER