HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
FieldsSQLExpressionEvaluator.py
Go to the documentation of this file.
1 # coding: utf-8
2 """
3 HCE project, Python bindings, Distributed Tasks Manager application.
4 FieldsSQLExpressionEvaluator Class content main functional of support
5 the SQL_EXPRESSION_FIELDS_UPDATE_CRAWLER and SQL_EXPRESSION_FIELDS_UPDATE_PROCESSOR properties.
6 
7 @package: app
8 @file FieldsSQLExpressionEvaluator.py
9 @author Alexander Vybornyh <alexander.hce.cluster@gmail.com>
10 @link: http://hierarchical-cluster-engine.com/
11 @copyright: Copyright &copy; 2013-2016 IOIX Ukraine
12 @license: http://hierarchical-cluster-engine.com/license/
13 @since: 0.1
14 """
15 
16 import re
17 import json
18 import copy
19 import app.Consts as APP_CONSTS
20 from app.Utils import varDump
21 from app.DateTimeType import DateTimeType
22 from app.Exceptions import DatabaseException
23 import dc_db.Constants as DB_CONSTS
24 
26 
27  # Constants the support names of objects in incoming json
28  OBJECT_NAME_SITE = 'Site'
29  OBJECT_NAME_URL = 'URL'
30  # Constants using macro case
31  MACRO_CASE_ORIGINAL = 0
32  MACRO_CASE_UPPER = 1
33  MACRO_CASE_LOWER = 2
34  # Constants support castomization types
35  CAST_TYPE_TO_INTEGER = 0
36  CAST_TYPE_TO_STRING = 1
37  CAST_TYPE_TO_DATETIME = 2
38  # Other using constants
39  REPLACE_MARKER = '%'
40  DB_NAME = "dc_sites"
41 
42  # # Constructor
43  def __init__(self):
44  pass
45 
46  # # Execute method
47  #
48  # @param siteProperties - properties of site
49  # @param dbWrapper - instance of DBTasksWrapper for work with DB
50  # @param siteObj - instance of Site
51  # @param urlObj - instance of URL
52  # @param logger - logger instance
53  # @param propertyName - property name for current execution
54  # @return - dict replaced fields
55  @staticmethod
56  def execute(siteProperties, dbWrapper, siteObj, urlObj, logger,
57  propertyName=APP_CONSTS.SQL_EXPRESSION_FIELDS_UPDATE_CRAWLER):
58 
59  logger.debug("Execute enter.... property: '" + str(propertyName) + "' is exist = " + \
60  str(True if propertyName in siteProperties else False))
61  # variable for result
62  ret = {}
63  if dbWrapper is not None:
64  propertyList = None
65  if siteProperties is not None and propertyName in siteProperties:
66  try:
67  propertyList = json.loads(siteProperties[propertyName])
68  except Exception, err:
69  logger.error("Load from site property error: %s, source: %s", str(err), str(propertyName))
70 
71  if propertyList is not None:
72  for propertyStruct in propertyList:
73  if FieldsSQLExpressionEvaluator.OBJECT_NAME_SITE in propertyStruct:
74  # Evaluate for 'Site' object
75  ret = FieldsSQLExpressionEvaluator.evaluateElement(dbWrapper, logger, siteObj, DB_CONSTS.siteDict,
76  propertyStruct[FieldsSQLExpressionEvaluator.\
77  OBJECT_NAME_SITE])
78  elif FieldsSQLExpressionEvaluator.OBJECT_NAME_URL in propertyStruct:
79  # Evaluate for 'URL' object
80  ret = FieldsSQLExpressionEvaluator.evaluateElement(dbWrapper, logger, urlObj, DB_CONSTS.URLTableDict,
81  propertyStruct[FieldsSQLExpressionEvaluator.\
82  OBJECT_NAME_URL])
83  else:
84  logger.error("Not support name of object in inputted json")
85 
86  return ret
87 
88 
89  # # Evaluate one element
90  #
91  # @param dbWrapper - instance of DBTasksWrapper for work with DB
92  # @param logger - logger instance
93  # @param obj - instance object for evaluate
94  # @param fieldsDict - dict field names in Object and DB
95  # @param objPropertyStruct - property structure of object for update
96  # @return - dict replaced fields
97  @staticmethod
98  def evaluateElement(dbWrapper, logger, obj, fieldsDict, objPropertyStruct):
99  logger.debug('evaluateElement enter.... \nobj: ' + varDump(obj) + '\nfieldsDict: ' + str(fieldsDict) + \
100  '\nobjPropertyStruct: ' + str(objPropertyStruct))
101  # variable for result
102  ret = {}
103  if obj is not None:
104  objFields = {}
105  for key, value in obj.__dict__.items():
106  if isinstance(value, basestring):
107  # objFields[key.upper()] = MySQLdb.escape_string(str(value)) # pylint: disable=E1101
108  objFields[key.upper()] = dbWrapper.dbTask.dbConnections[DB_CONSTS.PRIMARY_DB_ID].escape_string(str(value))
109  else:
110  objFields[key.upper()] = value
111 
112  logger.debug('objFields: %s', str(objFields))
113 
114  for fieldName, fieldValue in objPropertyStruct.items():
115  logger.debug('fieldName: %s, fieldValue: %s', str(fieldName), str(fieldValue))
116  for sqlExpression, valueType in fieldValue.items():
117  logger.debug('sqlExpression: %s, valueType: %s', str(sqlExpression), str(valueType))
118  sqlQuery = 'SELECT ' + \
119  FieldsSQLExpressionEvaluator.macroReplace(sqlExpression, objFields,
120  FieldsSQLExpressionEvaluator.REPLACE_MARKER,
121  case=1)
122  logger.debug("sqlQuery: " + str(sqlQuery))
123  affectDB = dbWrapper.affect_db
124  dbWrapper.affect_db = True
125  customResponse = None
126  try:
127  customResponse = dbWrapper.customRequest(sqlQuery, FieldsSQLExpressionEvaluator.DB_NAME)
128  except DatabaseException, err:
129  logger.error("Bad query: " + str(sqlQuery))
130 
131  dbWrapper.affect_db = affectDB
132  logger.debug("customResponse: " + str(customResponse))
133  if customResponse is not None and len(customResponse) > 0 and len(customResponse[0]) > 0:
134  result = None
135  try:
136  if valueType == FieldsSQLExpressionEvaluator.CAST_TYPE_TO_INTEGER:
137  result = int(customResponse[0][0])
138  elif valueType == FieldsSQLExpressionEvaluator.CAST_TYPE_TO_STRING:
139  result = str(customResponse[0][0]) # pylint: disable=R0204
140  elif valueType == FieldsSQLExpressionEvaluator.CAST_TYPE_TO_DATETIME:
141  dt = DateTimeType.parse(customResponse[0][0])
142  if dt is not None:
143  result = dt.strftime("%Y-%m-%d %H:%M:%S")
144  else:
145  logger.debug('Unknown type for cast: ' + str(valueType))
146 
147  except Exception, err:
148  logger.error("Customization result by type failed, error: %s", str(err))
149 
150  logger.debug('result after cast: ' + str(result) + ' type: ' + str(type(result)))
151  # Update field value
152  if result is not None:
153  for fieldObjName, fieldDBName in fieldsDict.items():
154  if fieldDBName == fieldName and hasattr(obj, fieldObjName):
155  logger.debug("Found attribute '" + str(fieldObjName) + "' in object...")
156  ret[fieldObjName] = result
157 
158  return ret
159 
160 
161  # # macroReplace makes macroreplacement in incoming pattern string by string from values dict
162  #
163  # @param pattern - incoming pattern string
164  # @param values - dict with incoming old substrings (values.keys) and new substrings (values.values)
165  # @param marker - additional prefix+suffix for values.keys
166  # @param case - 0 - don not change name case, 1 - change to upper, 2 - change to lower
167  # @return replacemented string
168  @staticmethod
169  def macroReplace(pattern, values, marker, case=MACRO_CASE_ORIGINAL):
170  ret = copy.copy(pattern)
171  for key in values:
172  if values[key] is not None:
173  if case == FieldsSQLExpressionEvaluator.MACRO_CASE_UPPER:
174  rkey = key.upper()
175  elif case == FieldsSQLExpressionEvaluator.MACRO_CASE_LOWER:
176  rkey = key.lower()
177  else:
178  rkey = key
179  ret = ret.replace(marker + rkey + marker, "'" + str(values[key]) + "'" if isinstance(values[key], basestring) \
180  else str(values[key]))
181 
182  return ret
183 
184 
185  # # evaluate PDate using 'PDATE_TIME' site property
186  #
187  # @param siteProperties - properties of site
188  # @param dbWrapper - instance of DBTasksWrapper for work with DB
189  # @param urlObj - instance of URL
190  # @param logger - logger instance
191  # @param defaultPubdateValue - default value of pubdate
192  # @return pubdate value
193  @staticmethod
194  def evaluatePDateTime(siteProperties, dbWrapper, urlObj, logger, defaultPubdateValue=None):
195  logger.debug("evaluatePDateTime enter.... property: '" + str(APP_CONSTS.SQL_EXPRESSION_FIELDS_PDATE_TIME) + \
196  "' is exist = " + \
197  str(True if APP_CONSTS.SQL_EXPRESSION_FIELDS_PDATE_TIME in siteProperties else False))
198 
199  localUrlObj = copy.deepcopy(urlObj)
200  # variable for result
201  ret = defaultPubdateValue
202  propertyList = None
203  if siteProperties is not None and APP_CONSTS.SQL_EXPRESSION_FIELDS_PDATE_TIME in siteProperties:
204  try:
205  propertyList = json.loads(siteProperties[APP_CONSTS.SQL_EXPRESSION_FIELDS_PDATE_TIME])
206  logger.debug("propertyList: " + varDump(propertyList) + " type: " + str(type(propertyList)))
207  except Exception, err:
208  logger.error("Load from site property '%s' has error: %s",
209  str(APP_CONSTS.SQL_EXPRESSION_FIELDS_PDATE_TIME), str(err))
210 
211  if propertyList is not None:
212  for propertyStruct in propertyList:
213  if "pattern" in propertyStruct and "value" in propertyStruct:
214  pattern = propertyStruct["pattern"]
215  value = propertyStruct["value"]
216 
217  logger.debug("pattern: " + str(pattern))
218  logger.debug("value: " + str(value))
219  logger.debug("localUrlObj.url: " + str(localUrlObj.url))
220  if localUrlObj.pDate is None:
221  localUrlObj.pDate = defaultPubdateValue
222 
223  # Check pattern apply to url
224  if re.search(pattern, localUrlObj.url, re.UNICODE) is not None:
225  objPropertyStruct = {DB_CONSTS.URLTableDict['pDate']:\
226  {value:FieldsSQLExpressionEvaluator.CAST_TYPE_TO_STRING}}
227  # Evaluate for 'URL' object
228  resDict = FieldsSQLExpressionEvaluator.evaluateElement(dbWrapper, logger, localUrlObj, DB_CONSTS.URLTableDict,
229  objPropertyStruct)
230  logger.debug("!!! evaluatePDateTime resDict: %s", str(resDict))
231  if "pDate" in resDict:
232  rawDate = resDict["pDate"]
233  if rawDate.isdigit():
234  logger.debug("!!! Return numeric value: " + str(rawDate))
235  d = DateTimeType(int(rawDate))
236  ret = d.getString()
237  else:
238  logger.debug("!!! Return string value: " + str(rawDate))
239  # ret = rawDate
240  dt = DateTimeType.parse(rawDate, True, logger, False)
241  if dt is not None:
242  ret = dt.strftime("%Y-%m-%d %H:%M:%S")
243  else:
244  ret = rawDate
245  else:
246  logger.error("Not found mandatory fields for property '%s'", \
247  str(APP_CONSTS.SQL_EXPRESSION_FIELDS_PDATE_TIME))
248 
249  logger.debug("!!! evaluatePDateTime ret: %s", str(ret))
250 
251  return ret
def evaluatePDateTime(siteProperties, dbWrapper, urlObj, logger, defaultPubdateValue=None)
def evaluateElement(dbWrapper, logger, obj, fieldsDict, objPropertyStruct)
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
Definition: Utils.py:410
def macroReplace(pattern, values, marker, case=MACRO_CASE_ORIGINAL)
def execute(siteProperties, dbWrapper, siteObj, urlObj, logger, propertyName=APP_CONSTS.SQL_EXPRESSION_FIELDS_UPDATE_CRAWLER)