HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
SelectorWrapper.py
Go to the documentation of this file.
1 '''
2 Created on Nov 19, 2015
3 
4 @package: app
5 @author: scorp
6 @link: http://hierarchical-cluster-engine.com/
7 @copyright: Copyright © 2013-2014 IOIX Ukraine
8 @license: http://hierarchical-cluster-engine.com/license/
9 @since: 0.1
10 '''
11 
12 import logging
13 from scrapy.selector import Selector
14 from scrapy.selector import SelectorList
15 
16 import app.Consts as APP_CONSTS
17 
18 
19 logger = logging.getLogger(APP_CONSTS.LOGGER_NAME)
20 
21 
22 # # SelectorWrapper implements wrapper for Selector module functionallity (xpath or css extraction)
23 #
24 class SelectorWrapper(Selector):
25 
26  # XPATH_DETECT_SYMBOL = '/'
27  # SPECIAL_XPATHES = ["name()", "node()"]
28  CSS_DETECT_SYMBOLS = ['.', '#']
29 
30  # #Class's constructor
31  #
32  # @param text incoming text buf with document (xml or html) structute
33  def __init__(self, response=None, text=None, type=None, namespaces=None, _root=None, _expr=None): # pylint: disable=W0622
34  super(SelectorWrapper, self).__init__(response, text, type, namespaces, _root, _expr)
35 
36 
37  # #Method xpath deliveries wrapper for Selector interface
38  #
39  # @param xpathStr - incoming xpath or css selector string
40  # @return instance of SelectorWrapper which contains Selector with result of xpathStr appluing
41  def xpath(self, xpathStr):
42  retSelector = SelectorList([])
43  if xpathStr is not None and isinstance(xpathStr, basestring) and len(xpathStr) > 0:
44  # if xpathStr[0] == self.XPATH_DETECT_SYMBOL or xpathStr in self.SPECIAL_XPATHES:
45  if xpathStr[0] in self.CSS_DETECT_SYMBOLS:
46  retSelector = super(SelectorWrapper, self).css(xpathStr)
47  else:
48  retSelector = super(SelectorWrapper, self).xpath(xpathStr)
49  else:
50  retSelector = super(SelectorWrapper, self).xpath(xpathStr)
51 
52  return retSelector
def __init__(self, response=None, text=None, type=None, namespaces=None, _root=None, _expr=None)