Definition at line 22 of file NewspaperWrapper.py.
◆ __init__()
def dc_processor.NewspaperWrapper.NewspaperWrapper.__init__ |
( |
|
self, |
|
|
|
url, |
|
|
|
title = u'' , |
|
|
|
source_url = u'' , |
|
|
|
config = None , |
|
|
|
isLoadUrls = True , |
|
|
|
kwargs |
|
) |
| |
Definition at line 33 of file NewspaperWrapper.py.
33 def __init__(self, url, title=u'', source_url=u'', config=None, isLoadUrls=True, **kwargs):
34 super(NewspaperWrapper, self).
__init__(url, title, source_url, config, **kwargs)
35 self.isLoadUrls = isLoadUrls
def __init__(self)
constructor
◆ fetch_images()
def dc_processor.NewspaperWrapper.NewspaperWrapper.fetch_images |
( |
|
self | ) |
|
Definition at line 76 of file NewspaperWrapper.py.
76 def fetch_images(self):
77 if self.clean_doc
is not None:
78 meta_img_url = self.versionnedWrapper(
"get_meta_img_url")
79 self.set_meta_img(meta_img_url)
81 if self.clean_top_node
is not None and not self.has_top_image():
82 first_img = self.versionnedWrapper(
"get_first_img_url")
83 self.set_top_img(first_img)
85 if not self.has_top_image()
and self.isLoadUrls:
86 self.set_reddit_top_img()
88 logger.debug(
">>> not load urls")
90 if self.isLoadUrls
and self.clean_doc
is not None:
91 imgs = self.versionnedWrapper(
"get_img_urls")
92 imgs = self.sort_images(imgs)
◆ set_top_img()
def dc_processor.NewspaperWrapper.NewspaperWrapper.set_top_img |
( |
|
self, |
|
|
|
src_url |
|
) |
| |
Definition at line 99 of file NewspaperWrapper.py.
99 def set_top_img(self, src_url):
101 super(NewspaperWrapper, self).set_top_img(src_url)
103 logger.debug(
">>> not load urls")
◆ sort_images()
def dc_processor.NewspaperWrapper.NewspaperWrapper.sort_images |
( |
|
self, |
|
|
|
imgs |
|
) |
| |
Definition at line 42 of file NewspaperWrapper.py.
42 def sort_images(self, imgs):
46 img_dimension = images.fetch_image_dimension(image, self.config.browser_user_agent)
47 except Exception, err:
48 logger.error(
"fetch_image_dimension: %s", str(err))
51 if img_dimension
is None:
52 img_dimensions.append({
"dim":
None,
"img_url": image})
54 img_dimensions.append({
"dim": img_dimension[0] * img_dimension[1],
"img_url": image})
55 ret = [img[
"img_url"]
for img
in sorted(img_dimensions, key=
lambda img: img[
"dim"], reverse=
True)]
◆ versionnedWrapper()
def dc_processor.NewspaperWrapper.NewspaperWrapper.versionnedWrapper |
( |
|
self, |
|
|
|
methodName |
|
) |
| |
Definition at line 64 of file NewspaperWrapper.py.
64 def versionnedWrapper(self, methodName):
65 argsResult = inspect.getargspec(getattr(self.extractor, methodName))
66 if argsResult
is not None and argsResult.args
is not None and \
67 len(argsResult.args) - (0
if argsResult.defaults
is None else len(argsResult.defaults)) == 2
and \
68 "article" in argsResult.args:
69 ret = getattr(self.extractor, methodName)(self)
71 ret = getattr(self.extractor, methodName)(self.url, self.clean_doc)
◆ isLoadUrls
dc_processor.NewspaperWrapper.NewspaperWrapper.isLoadUrls |
The documentation for this class was generated from the following file: