HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
dc_processor.scrapy_extractor.ScrapyExtractor Class Reference
Inheritance diagram for dc_processor.scrapy_extractor.ScrapyExtractor:
Collaboration diagram for dc_processor.scrapy_extractor.ScrapyExtractor:

Public Member Functions

def __init__ (self, config, templ=None, domain=None, processorProperties=None)
 
def generateTemplatesFromRowTemplates (self, rowTemplates, domain=None)
 
def templateLoad (self, config, templ=None, domain=None)
 
def pasteLists (self, lhs, rhs)
 
def templatePreparer (self, jsonBuf, domains, globalTemplate)
 
def extractTag (self, tagName, result, template, textHandler=None, delimiter=' ')
 
def extractTagsForOneTemplate (self, resource, result, template)
 
def extractTags (self, resource, result)
 
- Public Member Functions inherited from dc_processor.base_extractor.BaseExtractor
def __init__ (self, config, templ=None, domain=None, processorProperties=None)
 
def __str__ (self)
 
def __repr__ (self)
 
def loadScraperProperties (self, scraperPropFileName)
 
def isTagNotFilled (self, result, tagName)
 
def isTagValueNotEmpty (self, tagValue)
 
def tagValueElemValidate (self, tagValueElem, conditionElem)
 
def tagValueValidate (self, tagName, tagValue)
 
def addTag (self, result, tag_name, tag_value, xpath="", isDefaultTag=False, callAdjustment=True, tagType=None, allowNotFilled=False)
 
def calculateMetrics (self, response)
 
def rankReading (self, exctractorName)
 

Public Attributes

 closeVoid
 
 keepAttributes
 
 innerTextTagReplacers
 
 name
 
 sel
 
 resource
 
 postReplace
 
 postExclude
 
 templates
 
 blockedByXpathTags
 
- Public Attributes inherited from dc_processor.base_extractor.BaseExtractor
 config
 
 processorProperties
 
 name
 
 rank
 
 process_mode
 
 modules
 
 data
 
 db_dc_scraper_db
 
 DBConnector
 
 imgDelimiter
 
 tagsValidator
 

Static Public Attributes

string SELF_NAME = "Scrapy extractor"
 
string TEMPLATE_FILE_RULE_XPATH = 'xpath'
 
string TEMPLATE_FILE_RULE_REPLACE = 'replace'
 
string TEMPLATE_FILE_RULE_EXCLUDE = 'exclude'
 
list DISABLE_XPATH_CHARS_LIST = [';', '#']
 
- Static Public Attributes inherited from dc_processor.base_extractor.BaseExtractor
 properties = None
 
dictionary tag
 
dictionary tagsMask
 

Detailed Description

Definition at line 30 of file scrapy_extractor.py.

Constructor & Destructor Documentation

◆ __init__()

def dc_processor.scrapy_extractor.ScrapyExtractor.__init__ (   self,
  config,
  templ = None,
  domain = None,
  processorProperties = None 
)

Definition at line 48 of file scrapy_extractor.py.

48  def __init__(self, config, templ=None, domain=None, processorProperties=None):
49  BaseExtractor.__init__(self, config, templ, domain, processorProperties)
50  logger.debug("Properties: %s", varDump(self.properties))
51 
52  # set module rank from module's properties
53  self.rankReading(self.__class__.__name__)
54 
55  self.closeVoid = None
56  if processorProperties is not None and CONSTS.TAG_CLOSE_VOID_PROP_NAME in processorProperties and \
57  processorProperties[CONSTS.TAG_CLOSE_VOID_PROP_NAME] is not None:
58  self.closeVoid = int(processorProperties[CONSTS.TAG_CLOSE_VOID_PROP_NAME])
59 
60  self.keepAttributes = None
61  if processorProperties is not None and CONSTS.TAG_KEEP_ATTRIBUTES_PROP_NAME in processorProperties and \
62  processorProperties[CONSTS.TAG_KEEP_ATTRIBUTES_PROP_NAME] is not None:
63  self.keepAttributes = {}
64  for key in processorProperties[CONSTS.TAG_KEEP_ATTRIBUTES_PROP_NAME]:
65  self.keepAttributes[key.lower()] = processorProperties[CONSTS.TAG_KEEP_ATTRIBUTES_PROP_NAME][key]
66 
67  if processorProperties is not None and CONSTS.TAG_MARKUP_PROP_NAME in processorProperties and \
68  processorProperties[CONSTS.TAG_MARKUP_PROP_NAME] is not None:
69  self.innerTextTagReplacers = {}
70  for key in processorProperties[CONSTS.TAG_MARKUP_PROP_NAME]:
71  self.innerTextTagReplacers[key.lower()] = processorProperties[CONSTS.TAG_MARKUP_PROP_NAME][key]
72  else:
73  self.innerTextTagReplacers = None
74 
75  self.name = self.SELF_NAME
76  self.data["extractor"] = self.SELF_NAME
77  self.sel = None
78  self.resource = None
79  # for post processing
80  self.postReplace = {}
81  self.postExclude = {}
82 
83  if processorProperties is not None and "SCRAPER_SCRAPY_PRECONFIGURED" in processorProperties:
84  self.templates = self.generateTemplatesFromRowTemplates(json.loads(processorProperties\
85  ["SCRAPER_SCRAPY_PRECONFIGURED"]), domain)
86  else:
87  self.templates = [{self.SELF_NAME + "_default": self.templateLoad(config, templ, domain)}]
88 
89  try:
90  defaultConfigTemplate = config.get("Application", "default_template", None)
91  except ConfigParser.NoOptionError:
92  defaultConfigTemplate = None
93  if defaultConfigTemplate is not None:
94  logger.debug(">>> Extend Templates with config default template")
95  tempTemplates = self.generateTemplatesFromRowTemplates(json.loads(defaultConfigTemplate), domain)
96  if len(tempTemplates) > 0:
97  newTemplates = []
98  for templeteElemConfig in tempTemplates:
99  for templeteElemProperty in self.templates:
100  for templeteKeyProperty in templeteElemProperty:
101  if templeteKeyProperty in templeteElemConfig:
102  templeteElemConfig = None
103  break
104  if templeteElemConfig is None:
105  break
106  if templeteElemConfig is not None:
107  newTemplates.append(templeteElemConfig)
108  self.templates = self.templates + newTemplates
109  self.blockedByXpathTags = []
110  logger.debug("!!! INIT Template Domain: '%s'", str(domain))
111 # logger.debug("!!! INIT Template: %s", str(self.templates))
112 
113 
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
Definition: Utils.py:410
def __init__(self)
constructor
Definition: UIDGenerator.py:19
Here is the call graph for this function:

Member Function Documentation

◆ extractTag()

def dc_processor.scrapy_extractor.ScrapyExtractor.extractTag (   self,
  tagName,
  result,
  template,
  textHandler = None,
  delimiter = ' ' 
)

Definition at line 337 of file scrapy_extractor.py.

337  def extractTag(self, tagName, result, template, textHandler=None, delimiter=' '):
338  try:
339  if tagName in template:
340  for path in template[tagName]:
341 # logger.debug("!!! ENTER tagName: %s, xpath: '%s'", str(tagName), str(path))
342 
343  if tagName in self.blockedByXpathTags:
344  break
345 
346  if path == "":
347  if tagName not in result.blockedByXpathTags:
348  result.blockedByXpathTags.append(tagName)
349  break
350  elif path == "none":
351  if tagName not in self.blockedByXpathTags:
352  self.blockedByXpathTags.append(tagName)
353  break
354  if textHandler is not None:
355  conditions = None
356  if self.tagsValidator is not None and self.name in self.tagsValidator and \
357  tagName in self.tagsValidator[self.name]:
358  conditions = self.tagsValidator[self.name][tagName]
359  localValue = textHandler(self.sel.xpath(path), delimiter, delimiter, self.innerTextTagReplacers, conditions,
360  keepAttributes=self.keepAttributes, baseUrl=self.resource.url,
361  closeVoid=self.closeVoid, excludeNodes=self.postExclude[tagName] if tagName in self.postExclude else None)
362  else:
363  localValue = self.sel.xpath(path).extract()
364 
365 # if tagName == 'content_encoded':
366 # logger.debug("!!! tagName: %s", str(tagName))
367 # logger.debug("!!! xpath: %s", str(path))
368 # logger.debug("!!! value: '%s'", varDump(localValue))
369 
370 # if tagName == 'title' or tagName == 'html_lang':
371 # logger.debug("!!! tagName: %s", str(tagName))
372 # logger.debug("!!! xpath: %s", str(path))
373 # logger.debug("!!! value: '%s'", varDump(localValue))
374 
375  # apply post-processing
376  if isinstance(self.postReplace, dict) and tagName in self.postReplace and \
377  isinstance(self.postReplace[tagName], list) and localValue != "":
378 # if len(localValue) > 0:
379 # logger.debug("!!! localValue before: %s", varDump(localValue))
380 # logger.info("POST PROCESSING FOR TAG '%s', len = %s", str(tagName), len(localValue))
381  for postReplace in self.postReplace[tagName]:
382  if isinstance(postReplace, dict):
383  for pattern, repl in postReplace.items():
384  if isinstance(pattern, basestring) and isinstance(repl, basestring):
385 # logger.debug("!!! pattern: '%s', repl: '%s'", str(pattern), str(repl))
386  localValue = re.sub(pattern=pattern, repl=repl, string=localValue.decode('utf-8'), flags=re.U + re.M + re.I + re.DOTALL)
387 # logger.debug("!!! localValue after replace: %s", varDump(localValue))
388 
389  if tagName == CONSTS.TAG_LINK:
390  urlObj = Url(localValue)
391  if urlObj.isValid():
392  self.addTag(result=result, tag_name=tagName, tag_value=localValue, xpath=path)
393  else:
394  self.addTag(result=result, tag_name=tagName, tag_value=localValue, xpath=path)
395  except Exception, err:
396  ExceptionLog.handler(logger, err, 'Exception in ScrapyExtractor.extractTag:')
397 
398 
Here is the call graph for this function:
Here is the caller graph for this function:

◆ extractTags()

def dc_processor.scrapy_extractor.ScrapyExtractor.extractTags (   self,
  resource,
  result 
)

Definition at line 438 of file scrapy_extractor.py.

438  def extractTags(self, resource, result):
439  self.blockedByXpathTags = []
440  localResults = []
441  for templateDict in self.templates:
442  for templateName in templateDict:
443  localResult = copy.deepcopy(result)
444  self.extractTagsForOneTemplate(resource, localResult, templateDict[templateName])
445  localResults.append(localResult)
446  break
447 
448  for localResult in localResults:
449  result.mergeResults(localResult)
450  return result
451 
Here is the call graph for this function:

◆ extractTagsForOneTemplate()

def dc_processor.scrapy_extractor.ScrapyExtractor.extractTagsForOneTemplate (   self,
  resource,
  result,
  template 
)

Definition at line 405 of file scrapy_extractor.py.

405  def extractTagsForOneTemplate(self, resource, result, template):
406  try:
407  self.resource = resource
408 # logger.debug("URL: %s \nresource.raw_html: %s ", self.resource.url, resource.raw_html[:255])
409  self.sel = SelectorWrapper(text=resource.raw_html)
410 
411  # search engine parsing
412 # logger.debug("Regular parsing")
413  self.extractTag(CONSTS.TAG_TITLE, result, template, Utils.innerText)
414  self.extractTag(CONSTS.TAG_AUTHOR, result, template, Utils.innerText)
415  self.extractTag(CONSTS.TAG_PUB_DATE, result, template)
416  self.extractTag(CONSTS.TAG_DESCRIPTION, result, template, Utils.innerText)
417  self.extractTag(CONSTS.TAG_DC_DATE, result, template)
418  self.extractTag(CONSTS.TAG_MEDIA, result, template, Utils.innerText, self.imgDelimiter)
419  self.extractTag(CONSTS.TAG_LINK, result, template, Utils.innerText)
420  self.extractTag(CONSTS.TAG_CONTENT_UTF8_ENCODED, result, template, Utils.innerText)
421 
422  # for path in template["enclosure"]]
423  self.extractTag(CONSTS.TAG_KEYWORDS, result, template, Utils.innerText)
424  # Add support of html_lang tag
425  self.extractTag(CONSTS.HTML_LANG, result, template, Utils.innerText)
426 
427  except Exception as err:
428  ExceptionLog.handler(logger, err, "Parse error:", (err))
429 
430  return result
431 
432 
Here is the call graph for this function:
Here is the caller graph for this function:

◆ generateTemplatesFromRowTemplates()

def dc_processor.scrapy_extractor.ScrapyExtractor.generateTemplatesFromRowTemplates (   self,
  rowTemplates,
  domain = None 
)

Definition at line 118 of file scrapy_extractor.py.

118  def generateTemplatesFromRowTemplates(self, rowTemplates, domain=None):
119  ret = []
120  try:
121  if "sets" in rowTemplates:
122  ret = rowTemplates["sets"]
123  for elem in ret:
124  for setName in elem:
125  if isinstance(elem[setName], basestring):
126  try:
127  with open(elem[setName], "rb") as fd:
128  elem[setName] = json.loads(fd.read())
129  except Exception as excp:
130  logger.debug(">>> generateTemplatesFromRowTemplates element[%s] file/json operations error, %s",
131  setName, str(type(elem[setName])))
132  elem[setName] = {}
133  elif not isinstance(elem[setName], dict):
134  logger.debug(">>> generateTemplatesFromRowTemplates element[%s] wrong type is %s", setName,
135  str(type(elem[setName])))
136  elem[setName] = {}
137 
138  elem[setName] = self.templatePreparer(None, domain, elem[setName])
139  break
140  except Exception as excp:
141  logger.debug(">>> Some error during generateTemplatesFromRowTemplates = " + str(excp))
142  return ret
143 
144 
Here is the call graph for this function:

◆ pasteLists()

def dc_processor.scrapy_extractor.ScrapyExtractor.pasteLists (   self,
  lhs,
  rhs 
)

Definition at line 199 of file scrapy_extractor.py.

199  def pasteLists(self, lhs, rhs):
200 # logger.debug("lhs: %s, type: %s", str(lhs), str(type(lhs)))
201 # logger.debug("rhs: %s, type: %s", str(rhs), str(type(rhs)))
202 
203  if isinstance(lhs, dict) and isinstance(rhs, dict):
204  for elem in rhs:
205 
206  self.postReplace[elem] = []
207  if elem in lhs and self.TEMPLATE_FILE_RULE_REPLACE in lhs[elem] and \
208  isinstance(lhs[elem][self.TEMPLATE_FILE_RULE_REPLACE], dict):
209  self.postReplace[elem].append(lhs[elem][self.TEMPLATE_FILE_RULE_REPLACE])
210 # logger.debug("!!! lhs self.postReplace: %s", str(lhs[elem][self.TEMPLATE_FILE_RULE_REPLACE]))
211 # logger.debug("!!! self.postReplace: %s", str(self.postReplace))
212 
213  if elem in rhs and self.TEMPLATE_FILE_RULE_REPLACE in rhs[elem] and \
214  isinstance(rhs[elem][self.TEMPLATE_FILE_RULE_REPLACE], dict):
215  self.postReplace[elem].append(rhs[elem][self.TEMPLATE_FILE_RULE_REPLACE])
216 # logger.debug("!!! rhs self.postReplace: %s", str(rhs[elem][self.TEMPLATE_FILE_RULE_REPLACE]))
217 # logger.debug("!!! self.postReplace: %s", str(self.postReplace))
218 
219  self.postExclude[elem] = []
220  if elem in lhs and self.TEMPLATE_FILE_RULE_EXCLUDE in lhs[elem] and \
221  isinstance(lhs[elem][self.TEMPLATE_FILE_RULE_EXCLUDE], list):
222  self.postExclude[elem].extend(lhs[elem][self.TEMPLATE_FILE_RULE_EXCLUDE])
223 
224  if elem in rhs and self.TEMPLATE_FILE_RULE_EXCLUDE in rhs[elem] and \
225  isinstance(rhs[elem][self.TEMPLATE_FILE_RULE_EXCLUDE], list):
226  self.postExclude[elem].extend(rhs[elem][self.TEMPLATE_FILE_RULE_EXCLUDE])
227 
228 # logger.debug("!!! self.postExclude['%s']: %s", str(elem), str(self.postExclude[elem]))
229 
230  lXpathList = []
231  rXpathList = []
232 
233  if elem in lhs and isinstance(lhs[elem], dict) and self.TEMPLATE_FILE_RULE_XPATH in lhs[elem] and isinstance(lhs[elem][self.TEMPLATE_FILE_RULE_XPATH], list):
234  lXpathList = lhs[elem][self.TEMPLATE_FILE_RULE_XPATH]
235 
236  if elem in lhs and isinstance(lhs[elem], list):
237  lXpathList = lhs[elem]
238 
239  if elem in rhs and isinstance(rhs[elem], dict) and self.TEMPLATE_FILE_RULE_XPATH in rhs[elem] and isinstance(rhs[elem][self.TEMPLATE_FILE_RULE_XPATH], list):
240  rXpathList = rhs[elem][self.TEMPLATE_FILE_RULE_XPATH]
241 
242  if elem in rhs and isinstance(rhs[elem], list):
243  rXpathList = rhs[elem]
244 
245 # logger.debug("!!! lXpathList: %s", varDump(lXpathList))
246 # logger.debug("!!! rXpathList: %s", varDump(rXpathList))
247 
248  lhs[elem] = lXpathList + rXpathList
249 # logger.debug("!!! lhs[elem]: %s", varDump(lhs[elem]))
250 
251 
Here is the caller graph for this function:

◆ templateLoad()

def dc_processor.scrapy_extractor.ScrapyExtractor.templateLoad (   self,
  config,
  templ = None,
  domain = None 
)

Definition at line 151 of file scrapy_extractor.py.

151  def templateLoad(self, config, templ=None, domain=None):
152  ret = {}
153  defaultTemplate = None
154  try:
155  templateFile = config.get("Application", "template", None)
156  except ConfigParser.NoOptionError:
157  templateFile = None
158  if templateFile:
159  try:
160  logger.debug("Read template from file. %s", templateFile)
161  with open(templateFile, "rb") as fd:
162  defaultTemplate = self.templatePreparer(fd.read(), domain, {})
163  except Exception, err:
164  logger.error("Error Read template from file. %s", str(err))
165 
166  if self.properties is not None and CONSTS.TEMPLATE_KEY not in self.properties:
167  ret = self.templatePreparer(self.properties[CONSTS.TEMPLATE_KEY], domain, {})
168  logger.debug("template: " + str(ret))
169  elif templ is not None:
170  logger.debug("template: %s", str(templ))
171  if isinstance(templ, dict):
172  ret = self.templatePreparer(None, domain, templ)
173  else:
174  ret = self.templatePreparer(templ, domain, {})
175 
176 # logger.debug("!!! ret template: %s ", str(ret))
177 
178  # merge default template and custom one
179  if defaultTemplate is not None:
180  logger.debug("merge default template and custom one")
181  defaultTags = defaultTemplate.keys()
182  customTags = ret.keys()
183  logger.debug("tags in default template:\n%s\nin custom template:\n%s", str(defaultTags), str(customTags))
184  for tag in defaultTags:
185  if tag not in customTags:
186  ret[tag] = defaultTemplate[tag]
187  logger.debug("%s was replaced from custom template", str(tag))
188  elif defaultTemplate is not None:
189  ret = defaultTemplate
190  else:
191  logger.error("Error Read template.")
192  return ret
193 
194 
Here is the call graph for this function:

◆ templatePreparer()

def dc_processor.scrapy_extractor.ScrapyExtractor.templatePreparer (   self,
  jsonBuf,
  domains,
  globalTemplate 
)

Definition at line 258 of file scrapy_extractor.py.

258  def templatePreparer(self, jsonBuf, domains, globalTemplate):
259  ret = {}
260  if len(globalTemplate) == 0:
261  try:
262  globalTemplate = json.loads(jsonBuf)
263  except Exception, err:
264  logger.error(">>> Wrong json format. %s", str(err))
265 
266  if len(globalTemplate) > 0:
267  try:
268  if domains is not None:
269 # logger.debug("!!! domains: '%s', type: %s", str(domains), str(type(domains)))
270 # logger.debug("!!! globalTemplate: '%s'", str(globalTemplate))
271 # logger.debug("!!! type(globalTemplate): '%s'", str(type(globalTemplate)))
272  if isinstance(domains, basestring):
273  domains = [domains]
274 
275  for domain in domains:
276  for pattern in globalTemplate:
277  try:
278  searchPatterns = pattern.split()
279  # logger.debug("!!! searchPatterns: '%s'", str(searchPatterns))
280  found = False
281  for searchPattern in searchPatterns:
282  if searchPattern != '*':
283  if re.search(searchPattern, domain, re.UNICODE) is not None:
284  logger.debug("!!! Found pattern: '%s'", str(pattern))
285  if isinstance(globalTemplate[pattern], dict):
286  ret = globalTemplate[pattern]
287  found = True
288  break
289 
290  if found:
291  break
292  except Exception, err:
293  logger.debug("Regular expression error: %s, pattern: '%s', domain: '%s'",
294  str(err), str(pattern), str(domain))
295 
296  # If was fail use old algorithm
297  if len(ret) == 0 and domain in globalTemplate and isinstance(globalTemplate[domain], dict):
298  ret = globalTemplate[domain]
299 
300  if domains is not None:
301  for domain in domains:
302  if len(ret) == 0:
303  while domain.find(".") != -1:
304  domain = domain[domain.find(".") + 1: len(domain)]
305  if domain is not None and domain in globalTemplate:
306  self.pasteLists(ret, globalTemplate[domain])
307  if domain is not None and domain in globalTemplate:
308  self.pasteLists(ret, globalTemplate[domain])
309 
310  domain = "*"
311  if domains is not None and domain in globalTemplate:
312  self.pasteLists(ret, globalTemplate[domain])
313 
314  except Exception, err:
315  ExceptionLog.handler(logger, err, 'Exception: ', (ret))
316 
317  for key, value in ret.items():
318  if isinstance(value, list):
319  removeList = []
320  for elemXPath in value:
321  if elemXPath != "" and elemXPath[0] in self.DISABLE_XPATH_CHARS_LIST:
322  removeList.append(elemXPath)
323 
324  for removeElem in removeList:
325  value.remove(removeElem)
326  logger.debug("For '%s' found disabled xpath: %s", str(key), str(removeElem))
327 
328  return ret
329 
330 
Here is the call graph for this function:
Here is the caller graph for this function:

Member Data Documentation

◆ blockedByXpathTags

dc_processor.scrapy_extractor.ScrapyExtractor.blockedByXpathTags

Definition at line 109 of file scrapy_extractor.py.

◆ closeVoid

dc_processor.scrapy_extractor.ScrapyExtractor.closeVoid

Definition at line 55 of file scrapy_extractor.py.

◆ DISABLE_XPATH_CHARS_LIST

list dc_processor.scrapy_extractor.ScrapyExtractor.DISABLE_XPATH_CHARS_LIST = [';', '#']
static

Definition at line 39 of file scrapy_extractor.py.

◆ innerTextTagReplacers

dc_processor.scrapy_extractor.ScrapyExtractor.innerTextTagReplacers

Definition at line 69 of file scrapy_extractor.py.

◆ keepAttributes

dc_processor.scrapy_extractor.ScrapyExtractor.keepAttributes

Definition at line 60 of file scrapy_extractor.py.

◆ name

dc_processor.scrapy_extractor.ScrapyExtractor.name

Definition at line 75 of file scrapy_extractor.py.

◆ postExclude

dc_processor.scrapy_extractor.ScrapyExtractor.postExclude

Definition at line 81 of file scrapy_extractor.py.

◆ postReplace

dc_processor.scrapy_extractor.ScrapyExtractor.postReplace

Definition at line 80 of file scrapy_extractor.py.

◆ resource

dc_processor.scrapy_extractor.ScrapyExtractor.resource

Definition at line 78 of file scrapy_extractor.py.

◆ sel

dc_processor.scrapy_extractor.ScrapyExtractor.sel

Definition at line 77 of file scrapy_extractor.py.

◆ SELF_NAME

string dc_processor.scrapy_extractor.ScrapyExtractor.SELF_NAME = "Scrapy extractor"
static

Definition at line 32 of file scrapy_extractor.py.

◆ TEMPLATE_FILE_RULE_EXCLUDE

string dc_processor.scrapy_extractor.ScrapyExtractor.TEMPLATE_FILE_RULE_EXCLUDE = 'exclude'
static

Definition at line 37 of file scrapy_extractor.py.

◆ TEMPLATE_FILE_RULE_REPLACE

string dc_processor.scrapy_extractor.ScrapyExtractor.TEMPLATE_FILE_RULE_REPLACE = 'replace'
static

Definition at line 36 of file scrapy_extractor.py.

◆ TEMPLATE_FILE_RULE_XPATH

string dc_processor.scrapy_extractor.ScrapyExtractor.TEMPLATE_FILE_RULE_XPATH = 'xpath'
static

Definition at line 35 of file scrapy_extractor.py.

◆ templates

dc_processor.scrapy_extractor.ScrapyExtractor.templates

Definition at line 84 of file scrapy_extractor.py.


The documentation for this class was generated from the following file: