Definition at line 23 of file scraper_result.py.
◆ __init__()
def dc_processor.scraper_result.Result.__init__ |
( |
|
self, |
|
|
|
config, |
|
|
|
resId, |
|
|
|
metrics = None |
|
) |
| |
Definition at line 28 of file scraper_result.py.
28 def __init__(self, config, resId, metrics=None):
29 self.name =
"Scraper result object" 33 self.metrics = metrics
36 self.blockedByXpathTags = []
40 self.article_tags = []
42 self.article_tags = [tag
for tag
in config.options(
"article_tags")]
43 for tag
in self.article_tags:
45 self.start = time.time()
46 self.finish = self.start
47 self.errorCode = CONSTS.ERROR_OK
48 self.errorMessage = CONSTS.MSG_ERROR_OK
def __init__(self)
constructor
◆ get()
def dc_processor.scraper_result.Result.get |
( |
|
self | ) |
|
Definition at line 99 of file scraper_result.py.
101 data[
"resId"] = self.resId
105 data[
"tagList"].append([])
107 self.recalcTagMaskCount(data[
"tagList"][0])
109 self.data[
"data"] = data
110 self.data[
"error_code"] = self.errorCode
111 self.data[
"error_message"] = self.errorMessage
112 self.data[
"time"] =
"%s" % (self.finish - self.start)
114 self.metrics = json.dumps(self.metrics)
115 self.data[
"metrics"] = self.metrics
117 return json.dumps(self.data, ensure_ascii=
False, sort_keys=
True, indent=4, separators=(
",",
":"))
◆ getBestValue()
def dc_processor.scraper_result.Result.getBestValue |
( |
|
self, |
|
|
|
items_list |
|
) |
| |
Definition at line 136 of file scraper_result.py.
136 def getBestValue(self, items_list):
137 tmp = [item
for item
in items_list
if item !=
""]
143 if tmp[0][
"name"] ==
"content_encoded":
144 response = max(tmp, key=
lambda x: x[
"data"])
147 response = max(tmp, key=
lambda x: x[
"data"])
◆ getEmptyTags()
def dc_processor.scraper_result.Result.getEmptyTags |
( |
|
self | ) |
|
Definition at line 53 of file scraper_result.py.
53 def getEmptyTags(self):
54 empty_tags = [key
for key, value
in self.tags.
items()
if key
in self.article_tags
and not value]
◆ getFilledTags()
def dc_processor.scraper_result.Result.getFilledTags |
( |
|
self | ) |
|
Definition at line 58 of file scraper_result.py.
58 def getFilledTags(self):
59 filled_tags = [key
for key, value
in self.tags.
items()
if key
in self.article_tags
and value]
◆ isTagFilled()
def dc_processor.scraper_result.Result.isTagFilled |
( |
|
self, |
|
|
|
tagsName |
|
) |
| |
Definition at line 183 of file scraper_result.py.
183 def isTagFilled(self, tagsName):
185 if tagsName
in self.tags:
186 if isinstance(self.tags[tagsName], basestring):
187 if self.tags[tagsName].strip() !=
"":
189 elif isinstance(self.tags[tagsName], dict)
and "data" in self.tags[tagsName]:
190 if isinstance(self.tags[tagsName][
"data"], basestring):
191 if self.tags[tagsName][
"data"].strip() !=
"":
193 elif isinstance(self.tags[tagsName][
"data"], list):
194 for elem
in self.tags[tagsName][
"data"]:
195 if isinstance(elem, basestring)
and elem !=
"":
◆ mergeResults()
def dc_processor.scraper_result.Result.mergeResults |
( |
|
self, |
|
|
|
result |
|
) |
| |
Definition at line 120 of file scraper_result.py.
120 def mergeResults(self, result):
122 for blockedTag
in result.blockedByXpathTags:
123 if blockedTag
not in self.blockedByXpathTags:
124 self.blockedByXpathTags.append(blockedTag)
129 for tagName
in result.tags:
130 if tagName
not in self.tags
or not self.isTagFilled(tagName):
131 self.tags[tagName] = result.tags[tagName]
132 if tagName
in result.defaultTags
and tagName
not in self.defaultTags:
133 self.defaultTags.append(tagName)
◆ metricsPrecalculate()
def dc_processor.scraper_result.Result.metricsPrecalculate |
( |
|
self | ) |
|
Definition at line 93 of file scraper_result.py.
93 def metricsPrecalculate(self):
94 if len(self.metrics) > 0:
95 Metrics.fillMetricModulesList()
96 Metrics.metricsPrecalculate(self.metrics, self)
◆ recalcTagMaskCount()
def dc_processor.scraper_result.Result.recalcTagMaskCount |
( |
|
self, |
|
|
|
container = None , |
|
|
|
altTagsMask = None |
|
) |
| |
Definition at line 63 of file scraper_result.py.
63 def recalcTagMaskCount(self, container=None, altTagsMask=None):
67 for key, value
in self.tags.
items():
68 if value
is not None and value !=
"":
75 if tag[
"name"]
not in self.defaultTags
and "data" in value:
77 if isinstance(value[
"data"], basestring):
78 realValueString = value[
"data"]
79 elif isinstance(value[
"data"], list)
and len(value[
"data"]) > 0:
80 realValueString = value[
"data"][0]
81 if realValueString
is not None and realValueString.strip() !=
"":
82 if altTagsMask
is not None:
83 if tag[
"name"]
in altTagsMask:
84 self.tagsMask = self.tagsMask | altTagsMask[tag[
"name"]]
85 elif tag[
"name"]
in BaseExtractor.tagsMask:
86 self.tagsMask = self.tagsMask | BaseExtractor.tagsMask[tag[
"name"]]
89 if container
is not None:
90 container.append(copy.copy(tag))
◆ stripResult()
def dc_processor.scraper_result.Result.stripResult |
( |
|
self | ) |
|
Definition at line 157 of file scraper_result.py.
157 def stripResult(self):
159 for key
in self.tags:
160 if isinstance(self.tags[key], basestring):
161 self.tags[key] = self.tags[key].strip()
162 if self.tags[key] ==
"":
163 removeKeys.append(key)
164 elif isinstance(self.tags[key], dict)
and "data" in self.tags[key]:
165 if isinstance(self.tags[key][
"data"], basestring):
166 self.tags[key][
"data"] = self.tags[key][
"data"].strip()
167 if self.tags[key][
"data"] ==
"":
168 removeKeys.append(key)
169 elif isinstance(self.tags[key][
"data"], list)
and len(self.tags[key][
"data"]) > 0
and \
170 isinstance(self.tags[key][
"data"][0], basestring):
171 self.tags[key][
"data"][0] = self.tags[key][
"data"][0].strip()
172 if self.tags[key][
"data"][0] ==
"":
173 removeKeys.append(key)
175 removeKeys.append(key)
177 for key
in removeKeys:
179 logger.debug(
">>> Remove " + key +
" element because it empty")
◆ article_tags
dc_processor.scraper_result.Result.article_tags |
◆ blockedByXpathTags
dc_processor.scraper_result.Result.blockedByXpathTags |
◆ data
dc_processor.scraper_result.Result.data |
◆ defaultTags
dc_processor.scraper_result.Result.defaultTags |
◆ errorCode
dc_processor.scraper_result.Result.errorCode |
◆ errorMessage
dc_processor.scraper_result.Result.errorMessage |
◆ finish
dc_processor.scraper_result.Result.finish |
◆ metrics
dc_processor.scraper_result.Result.metrics |
◆ name
dc_processor.scraper_result.Result.name |
◆ resId
dc_processor.scraper_result.Result.resId |
◆ start
dc_processor.scraper_result.Result.start |
◆ tags
dc_processor.scraper_result.Result.tags |
◆ TAGS_LANG_DEFAULT
string dc_processor.scraper_result.Result.TAGS_LANG_DEFAULT = "en" |
|
static |
◆ TAGS_LANG_SUFFIX_DEFAULT
string dc_processor.scraper_result.Result.TAGS_LANG_SUFFIX_DEFAULT = "_language" |
|
static |
◆ tagsCount
dc_processor.scraper_result.Result.tagsCount |
◆ tagsMask
dc_processor.scraper_result.Result.tagsMask |
The documentation for this class was generated from the following file: