15 for index
in xrange(0, len(self.
url)):
17 if not (self.
url[index] >=
"a" and self.
url[index] <=
"f" or \
18 self.
url[index] >=
"A" and self.
url[index] <=
"F" or \
19 self.
url[index] >=
"0" and self.
url[index] <=
"9"):
26 if self.
url[index] ==
"%":
34 regex = re.match(
r'^((ht|f)tp(s?)\:\/\/|~/|/)?([\w]+:\w+@)?([a-zA-Z]{1}([\w\-]+\.)+([\w]{2,5}))(:[\d]{1,5})?/?(\w+\.[\w]{3,4})?((\?\w+=\w+)?(&\w+=\w+)*)?', self.
url, re.IGNORECASE)
36 ret = regex
is not None 45 return self.
url < other.url
48 return self.
url <= other.url
51 return self.
url > other.url
54 return self.
url >= other.url
57 return self.
url == other.url
73 canonicalized_list = [url.getNormalized()
for url
in urls]
76 canonicalized = url.getNormalized()
77 url_stat[
'source'] = url.url
78 url_stat[
'canonicalized'] = canonicalized
79 url_stat[
'valid'] = url.isValid()
80 url_stat[
'source_unique'] = (urls.count(url) == 1)
81 url_stat[
'canonicalized_unique'] = \
82 (canonicalized_list.count(canonicalized) == 1)
83 stats.append(url_stat)
def url_normalize(url, charset='utf-8')
def GetStats(urls)
Parameters: urls - a list of Url instance objects to display stats for Returns: A list of stat mappin...
def checkUrlCodeValid(self)