|
def | GetStats (urls) |
| Parameters: urls - a list of Url instance objects to display stats for Returns: A list of stat mappings, a stat mapping has the following fields: source: the source url canonicalzed: the canonicalized url valid: whether the url is valid source_unique: whether the source is unqiue amongst the urls list canonicalized_unique: like source_unique but for canonicalized urls. More...
|
|
Definition at line 6 of file Url.py.
◆ __init__()
def app.Url.Url.__init__ |
( |
|
self, |
|
|
|
url |
|
) |
| |
Definition at line 7 of file Url.py.
def __init__(self)
constructor
◆ __eq__()
def app.Url.Url.__eq__ |
( |
|
self, |
|
|
|
other |
|
) |
| |
Definition at line 56 of file Url.py.
56 def __eq__(self, other):
57 return self.url == other.url
◆ __ge__()
def app.Url.Url.__ge__ |
( |
|
self, |
|
|
|
other |
|
) |
| |
Definition at line 53 of file Url.py.
53 def __ge__(self, other):
54 return self.url >= other.url
◆ __gt__()
def app.Url.Url.__gt__ |
( |
|
self, |
|
|
|
other |
|
) |
| |
Definition at line 50 of file Url.py.
50 def __gt__(self, other):
51 return self.url > other.url
◆ __le__()
def app.Url.Url.__le__ |
( |
|
self, |
|
|
|
other |
|
) |
| |
Definition at line 47 of file Url.py.
47 def __le__(self, other):
48 return self.url <= other.url
◆ __lt__()
def app.Url.Url.__lt__ |
( |
|
self, |
|
|
|
other |
|
) |
| |
Definition at line 44 of file Url.py.
44 def __lt__(self, other):
45 return self.url < other.url
◆ checkUrlCodeValid()
def app.Url.Url.checkUrlCodeValid |
( |
|
self | ) |
|
Definition at line 11 of file Url.py.
11 def checkUrlCodeValid(self):
15 for index
in xrange(0, len(self.url)):
17 if not (self.url[index] >=
"a" and self.url[index] <=
"f" or \
18 self.url[index] >=
"A" and self.url[index] <=
"F" or \
19 self.url[index] >=
"0" and self.url[index] <=
"9"):
26 if self.url[index] ==
"%":
◆ getNormalized()
def app.Url.Url.getNormalized |
( |
|
self | ) |
|
Definition at line 41 of file Url.py.
41 def getNormalized(self):
def url_normalize(url, charset='utf-8')
◆ GetStats()
def app.Url.Url.GetStats |
( |
|
urls | ) |
|
|
static |
Parameters: urls - a list of Url instance objects to display stats for Returns: A list of stat mappings, a stat mapping has the following fields: source: the source url canonicalzed: the canonicalized url valid: whether the url is valid source_unique: whether the source is unqiue amongst the urls list canonicalized_unique: like source_unique but for canonicalized urls.
Definition at line 71 of file Url.py.
73 canonicalized_list = [url.getNormalized()
for url
in urls]
76 canonicalized = url.getNormalized()
77 url_stat[
'source'] = url.url
78 url_stat[
'canonicalized'] = canonicalized
79 url_stat[
'valid'] = url.isValid()
80 url_stat[
'source_unique'] = (urls.count(url) == 1)
81 url_stat[
'canonicalized_unique'] = \
82 (canonicalized_list.count(canonicalized) == 1)
83 stats.append(url_stat)
◆ isValid()
def app.Url.Url.isValid |
( |
|
self | ) |
|
Definition at line 31 of file Url.py.
34 regex = re.match(
r'^((ht|f)tp(s?)\:\/\/|~/|/)?([\w]+:\w+@)?([a-zA-Z]{1}([\w\-]+\.)+([\w]{2,5}))(:[\d]{1,5})?/?(\w+\.[\w]{3,4})?((\?\w+=\w+)?(&\w+=\w+)*)?', self.url, re.IGNORECASE)
36 ret = regex
is not None 38 ret = self.checkUrlCodeValid()
◆ url
Definition at line 8 of file Url.py.
The documentation for this class was generated from the following file: