HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
app.UrlNormalize.UrlNormalize Class Reference
Inheritance diagram for app.UrlNormalize.UrlNormalize:
Collaboration diagram for app.UrlNormalize.UrlNormalize:

Public Member Functions

def __init__ (self)
 

Static Public Member Functions

def getNormalizeMask (siteProperties, defaultValue=Utils.UrlNormalizator.NORM_DEFAULT)
 get normalize mask More...
 
def execute (siteProperties, base, url, supportProtocols=None, log=None)
 

Static Public Attributes

string PROPERTY_OPTIONS_MASK = 'mask'
 
string PROPERTY_OPTIONS_REPLACE = 'replace'
 
string ERROR_MSG_FAILED_REPLACE = "Operation replace failed. Error: %s"
 

Detailed Description

Definition at line 21 of file UrlNormalize.py.

Constructor & Destructor Documentation

◆ __init__()

def app.UrlNormalize.UrlNormalize.__init__ (   self)

Definition at line 31 of file UrlNormalize.py.

31  def __init__(self):
32  pass
33 
34 
def __init__(self)
constructor
Definition: UIDGenerator.py:19

Member Function Documentation

◆ execute()

def app.UrlNormalize.UrlNormalize.execute (   siteProperties,
  base,
  url,
  supportProtocols = None,
  log = None 
)
static

Definition at line 61 of file UrlNormalize.py.

61  def execute(siteProperties, base, url, supportProtocols=None, log=None):
62 
63  # check site property for exist replace rule
64  if siteProperties is not None and isinstance(siteProperties, dict) and APP_CONSTS.URL_NORMALIZE in siteProperties:
65  if log is not None:
66  log.info("!!! siteProperties['%s']: '%s', type: %s", str(APP_CONSTS.URL_NORMALIZE), str(siteProperties[APP_CONSTS.URL_NORMALIZE]),
67  str(type(siteProperties[APP_CONSTS.URL_NORMALIZE])))
68 
69  replaceList = []
70  propertyDict = {}
71  if isinstance(siteProperties[APP_CONSTS.URL_NORMALIZE], basestring):
72  propertyDict = Utils.jsonLoadsSafe(jsonString=siteProperties[APP_CONSTS.URL_NORMALIZE], default=propertyDict, log=log)
73 
74  if isinstance(propertyDict, dict) and UrlNormalize.PROPERTY_OPTIONS_REPLACE in propertyDict:
75  replaceList = propertyDict[UrlNormalize.PROPERTY_OPTIONS_REPLACE]
76 
77  if log is not None:
78  log.debug("!!! replaceList: %s", str(replaceList))
79 
80  if isinstance(replaceList, list):
81  for replaceElem in replaceList:
82  if isinstance(replaceElem, dict):
83  for pattern, repl in replaceElem.items():
84  try:
85  if log is not None:
86  log.debug("!!! pattern: %s, url: %s", str(pattern), str(url))
87  url = re.sub(pattern=pattern, repl=repl, string=url, flags=re.U + re.I)
88  if log is not None:
89  log.debug("!!! res url: %s", str(url))
90  except Exception, err:
91  if log is not None:
92  log.error(UrlNormalize.ERROR_MSG_FAILED_REPLACE, str(err))
93 
94  return Utils.urlNormalization(base=base, url=url, supportProtocols=supportProtocols, log=log)
95 
def execute(self, commands, nodes)
execute method execute incoming commands on nodes, keepts reult in responses and responsesDicts field...
Definition: NodeManager.py:63

◆ getNormalizeMask()

def app.UrlNormalize.UrlNormalize.getNormalizeMask (   siteProperties,
  defaultValue = Utils.UrlNormalizator.NORM_DEFAULT 
)
static

get normalize mask

Parameters
siteProperties- site properties
defaultValue- default value
Returns
normalize mask

Definition at line 41 of file UrlNormalize.py.

41  def getNormalizeMask(siteProperties, defaultValue=Utils.UrlNormalizator.NORM_DEFAULT):
42  # variable for result
43  ret = defaultValue
44 
45  if siteProperties is not None and isinstance(siteProperties, dict) and APP_CONSTS.URL_NORMALIZE in siteProperties and \
46  isinstance(siteProperties[APP_CONSTS.URL_NORMALIZE], dict) and UrlNormalize.PROPERTY_OPTIONS_MASK in siteProperties[APP_CONSTS.URL_NORMALIZE]:
47  ret = int(siteProperties[APP_CONSTS.URL_NORMALIZE][UrlNormalize.PROPERTY_OPTIONS_MASK])
48 
49  return ret
50 
51 

Member Data Documentation

◆ ERROR_MSG_FAILED_REPLACE

string app.UrlNormalize.UrlNormalize.ERROR_MSG_FAILED_REPLACE = "Operation replace failed. Error: %s"
static

Definition at line 28 of file UrlNormalize.py.

◆ PROPERTY_OPTIONS_MASK

string app.UrlNormalize.UrlNormalize.PROPERTY_OPTIONS_MASK = 'mask'
static

Definition at line 24 of file UrlNormalize.py.

◆ PROPERTY_OPTIONS_REPLACE

string app.UrlNormalize.UrlNormalize.PROPERTY_OPTIONS_REPLACE = 'replace'
static

Definition at line 25 of file UrlNormalize.py.


The documentation for this class was generated from the following file: