Definition at line 22 of file boilerpipe_extractor.py.
◆ __init__()
def dc_processor.boilerpipe_extractor.BoilerpipeExtractor.__init__ |
( |
|
self, |
|
|
|
config, |
|
|
|
templ = None , |
|
|
|
domain = None , |
|
|
|
processorProperties = None |
|
) |
| |
Definition at line 25 of file boilerpipe_extractor.py.
25 def __init__(self, config, templ=None, domain=None, processorProperties=None):
26 BaseExtractor.__init__(self, config, templ, domain, processorProperties)
27 self.name = CONSTS.EXTRACTOR_NAME_BOILERPIPE
28 self.data[
"extractor"] = CONSTS.EXTRACTOR_NAME_BOILERPIPE
29 logger.debug(
"Properties: %s",
varDump(self.properties))
31 self.rankReading(self.__class__.__name__)
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
def __init__(self)
constructor
◆ extractTags()
def dc_processor.boilerpipe_extractor.BoilerpipeExtractor.extractTags |
( |
|
self, |
|
|
|
resource, |
|
|
|
reslt |
|
) |
| |
Definition at line 34 of file boilerpipe_extractor.py.
34 def extractTags(self, resource, reslt):
36 extractor = Extractor(extractor=
'ArticleExtractor', html=resource.raw_html)
37 text = extractor.getText()
38 logger.info(
"Article's corpus: %s", text)
39 self.addTag(result=reslt, tag_name=CONSTS.TAG_CONTENT_UTF8_ENCODED, tag_value=text)
40 except Exception, err:
41 ExceptionLog.handler(logger, err,
'extractTags:', (err), \
42 {ExceptionLog.LEVEL_NAME_ERROR:ExceptionLog.LEVEL_VALUE_DEBUG})
◆ name
dc_processor.boilerpipe_extractor.BoilerpipeExtractor.name |
The documentation for this class was generated from the following file: