HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
ContentEvaluator.py
Go to the documentation of this file.
1 # coding: utf-8
2 """
3 HCE project, Python bindings, Distributed Tasks Manager application.
4 FieldsSQLExpressionEvaluator Class content main functional of support
5 the SQL_EXPRESSION_FIELDS_UPDATE_CRAWLER and SQL_EXPRESSION_FIELDS_UPDATE_PROCESSOR properties.
6 
7 @package: app
8 @file FieldsSQLExpressionEvaluator.py
9 @author Alexander Vybornyh <alexander.hce.cluster@gmail.com>
10 @link: http://hierarchical-cluster-engine.com/
11 @copyright: Copyright &copy; 2013-2017 IOIX Ukraine
12 @license: http://hierarchical-cluster-engine.com/license/
13 @since: 0.1
14 """
15 
16 import re
17 import json
18 from app.Exceptions import DatabaseException
19 from app.Utils import varDump
20 import app.Utils as Utils
21 
22 
24 
25 
26 class ContentEvaluator(object):
27 
28  # Constants the support names of objects in incoming json
29  PROPERTY_WHERE_NAME = 'WHERE' # name of a content, can be "RAW" at least;
30  PROPERTY_WHAT_NAME = 'WHAT' # regular expression defines what to replace;
31  PROPERTY_WITH_NAME = 'WITH' # string to replace with;
32  PROPERTY_CONDITION_NAME = 'CONDITION' # optional, the SQL expression to be executed with urls_<site_id> table;
33 
34  PROPERTY_WHERE_VALUE_RAW = 'RAW'
35 
36  # Other using constants
37  DB_NAME = "dc_urls"
38  QUERY_TEMPALATE = "SELECT * from urls_%s WHERE %s"
39 
40  # Constants of error messages
41  ERROR_MSG_BAD_FORMAT = "Wrong format of input json: %s"
42  ERROR_MSG_BAD_TYPE = "Wrong type of parameter '%s'"
43  ERROR_MSG_BAD_DBWRAPPER = "DBWrapper instance is None"
44  ERROR_MSG_MISSED_PARAMETER = "Missed parameter '%s'"
45  ERROR_MSG_WRONG_PARAMETER = "Parameter '%s' has not support value '%s'"
46 
47  # # Constructor
48  def __init__(self):
49  pass
50 
51 
52  # # Execute sql expression
53  #
54  # @param dbWrapper - DBWrapper instance
55  # @param siteId - site ID
56  # @param sqlExpression - sql expression
57  # @return boolean result of execution
58  @staticmethod
59  def executeSqlExpression(dbWrapper, siteId, sqlExpression):
60  # variable for result
61  ret = False
62  try:
63  if not isinstance(sqlExpression, basestring):
64  raise Exception(ContentEvaluator.ERROR_MSG_BAD_TYPE % ContentEvaluator.PROPERTY_CONDITION_NAME)
65 
66  if dbWrapper is None:
67  raise Exception(ContentEvaluator.ERROR_MSG_BAD_DBWRAPPER)
68 
69  logger.debug('sqlExpression: %s', str(sqlExpression))
70  sqlQuery = ContentEvaluator.QUERY_TEMPALATE % (str(siteId), str(sqlExpression))
71 
72  logger.debug("sqlQuery: " + str(sqlQuery))
73  affectDB = dbWrapper.affect_db
74  dbWrapper.affect_db = True
75  customResponse = None
76  try:
77  customResponse = dbWrapper.customRequest(sqlQuery, ContentEvaluator.DB_NAME)
78  except DatabaseException, err:
79  logger.error("Bad query: " + str(sqlQuery))
80 
81  dbWrapper.affect_db = affectDB
82  logger.debug("customResponse: " + str(customResponse))
83 
84  if customResponse is not None and len(customResponse) > 0 and len(customResponse[0]) > 0:
85  ret = True
86 
87  except Exception, err:
88  logger.error(str(err))
89 
90  return ret
91 
92 
93  # # Execute replace content
94  #
95  # @param dbWrapper - DBWrapper instance
96  # @param siteId - site ID
97  # @param propertyString - property json string with rules for replace
98  # @param contentData - content data for replace
99  # @return content data after replacement
100  @staticmethod
101  def executeReplace(dbWrapper, siteId, propertyString, contentData):
102  # variable for result
103  ret = contentData
104  try:
105  propertyObjs = json.loads(propertyString)
106 
107  logger.debug("propertyObj: %s", varDump(propertyObjs))
108 
109  if not isinstance(propertyObjs, list):
110  raise Exception(ContentEvaluator.ERROR_MSG_BAD_FORMAT % varDump(propertyObjs))
111 
112  for propertyObj in propertyObjs:
113  try:
114  if ContentEvaluator.PROPERTY_WHERE_NAME not in propertyObj:
115  raise Exception(ContentEvaluator.ERROR_MSG_MISSED_PARAMETER % str(ContentEvaluator.PROPERTY_WHERE_NAME))
116  else:
117  if not isinstance(propertyObj[ContentEvaluator.PROPERTY_WHERE_NAME], basestring):
118  raise Exception(ContentEvaluator.ERROR_MSG_BAD_TYPE % ContentEvaluator.PROPERTY_WHERE_NAME)
119 
120  if ContentEvaluator.PROPERTY_WHAT_NAME not in propertyObj:
121  raise Exception(ContentEvaluator.ERROR_MSG_MISSED_PARAMETER % str(ContentEvaluator.PROPERTY_WHAT_NAME))
122  else:
123  if not isinstance(propertyObj[ContentEvaluator.PROPERTY_WHAT_NAME], basestring):
124  raise Exception(ContentEvaluator.ERROR_MSG_BAD_TYPE % ContentEvaluator.PROPERTY_WHAT_NAME)
125 
126  if ContentEvaluator.PROPERTY_WITH_NAME not in propertyObj:
127  raise Exception(ContentEvaluator.ERROR_MSG_MISSED_PARAMETER % str(ContentEvaluator.PROPERTY_WITH_NAME))
128  else:
129  if not isinstance(propertyObj[ContentEvaluator.PROPERTY_WITH_NAME], basestring):
130  raise Exception(ContentEvaluator.ERROR_MSG_BAD_TYPE % ContentEvaluator.PROPERTY_WITH_NAME)
131 
132  if ContentEvaluator.PROPERTY_CONDITION_NAME in propertyObj and \
133  not isinstance(propertyObj[ContentEvaluator.PROPERTY_CONDITION_NAME], basestring):
134  raise Exception(ContentEvaluator.ERROR_MSG_BAD_TYPE % ContentEvaluator.PROPERTY_CONDITION_NAME)
135 
136  if propertyObj[ContentEvaluator.PROPERTY_WHERE_NAME] == ContentEvaluator.PROPERTY_WHERE_VALUE_RAW:
137  ret = ContentEvaluator.executeReplaceRawContent(
138  dbWrapper=dbWrapper,
139  siteId=siteId,
140  pattern=propertyObj[ContentEvaluator.PROPERTY_WHAT_NAME],
141  repl=propertyObj[ContentEvaluator.PROPERTY_WITH_NAME],
142  sqlExpression=propertyObj[ContentEvaluator.PROPERTY_CONDITION_NAME],
143  contentData=contentData)
144  else:
145  raise Exception(ContentEvaluator.ERROR_MSG_WRONG_PARAMETER % \
146  (str(ContentEvaluator.PROPERTY_WHERE_NAME),
147  str(propertyObj[ContentEvaluator.PROPERTY_WHERE_NAME])))
148 
149  except Exception, err:
150  logger.error(str(err))
151 
152  except Exception, err:
153  logger.error(str(err))
154 
155  return ret
156 
157 
158  # # Execute replace raw content
159  #
160  # @param dbWrapper - DBWrapper instance
161  # @param siteId - site ID
162  # @param pattern - pattern for regular expression
163  # @param repl - result string to replace with
164  # @param sqlExpression - sql expression
165  # @param contentData - content data for replace
166  # @return content data after replacement
167  @staticmethod
168  def executeReplaceRawContent(dbWrapper, siteId, pattern, repl, sqlExpression, contentData):
169  # variable for result
170  ret = contentData
171  try:
172  if sqlExpression == "":
173  resSqlExpression = True
174  else:
175  resSqlExpression = ContentEvaluator.executeSqlExpression(dbWrapper, siteId, sqlExpression)
176 
177  logger.debug("resSqlExpression: %s", str(resSqlExpression))
178  if resSqlExpression:
179  ret = re.sub(pattern, repl, contentData)
180 
181  logger.debug("before replace len= %s, after replace len = %s", str(len(contentData)), str(len(ret)))
182  except Exception, err:
183  logger.error(str(err))
184 
185  return ret
def executeReplace(dbWrapper, siteId, propertyString, contentData)
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
Definition: Utils.py:410
def executeSqlExpression(dbWrapper, siteId, sqlExpression)
def executeReplaceRawContent(dbWrapper, siteId, pattern, repl, sqlExpression, contentData)