HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
ProcessedContentInternalStruct.py
Go to the documentation of this file.
1 '''
2 @package: dc
3 @author scorp
4 @link: http://hierarchical-cluster-engine.com/
5 @copyright: Copyright © 2013-2014 IOIX Ukraine
6 @license: http://hierarchical-cluster-engine.com/license/
7 @since: 0.1
8 '''
9 
10 import base64
11 import json
12 import copy
13 import types
14 import dc.EventObjects
15 import app.Utils as Utils # pylint: disable=F0401
16 
17 logger = Utils.MPLogger().getLogger()
18 
19 
20 # class ProcessedContentInternalStruct contents static methods for processing internal structure of processedContent
22 
23  DATA_FIELD = "data"
24  CDATE_FIELD = "CDate"
25 
26 
27  # # parseProcessedBuf fills processed content list, depend of contentMask value,
28  #
29  # @param cDateValue - incoming CDate value
30  # @param contentMask - incoming contentMask
31  # @param processedContent - incoming processedContent as dict value
32  # @return list of Content or tuple(Content, Content) objects
33  @staticmethod
34  def processDictProcessedContent(cDateValue, contentMask, processedContent):
35  ret = []
36 
37  logger.debug("contentMask: %s, len(processedContent): %s", str(contentMask), str(len(processedContent)))
38  if contentMask & dc.EventObjects.URLContentRequest.CONTENT_TYPE_PROCESSED:
39  if len(processedContent["custom"]) > 0:
40  logger.debug("return item processedContent['custom'][0]")
41  resultElem = base64.b64encode(processedContent["custom"][0])
42  else:
43  logger.debug("return deep copy of processedContent, `internal` and `custom` removed")
44  localProcessedContent = copy.deepcopy(processedContent)
45  del localProcessedContent["internal"]
46  del localProcessedContent["custom"]
47  resultElem = json.dumps(localProcessedContent, ensure_ascii=False, encoding='utf-8')
48  resultElem = base64.b64encode(resultElem)
49  content = dc.EventObjects.Content(resultElem, cDateValue, dc.EventObjects.Content.CONTENT_PROCESSOR_CONTENT)
50  ret.append(content)
51  if contentMask & dc.EventObjects.URLContentRequest.CONTENT_TYPE_PROCESSED_INTERNAL and \
52  contentMask & dc.EventObjects.URLContentRequest.CONTENT_TYPE_PROCESSED_CUSTOM:
53  i = 0
54  for elem in processedContent["internal"]:
55  resultElem = json.dumps(elem, ensure_ascii=False, encoding='utf-8')
56  resultElem = base64.b64encode(resultElem)
57  contentInternal = dc.EventObjects.Content(resultElem, cDateValue,
58  dc.EventObjects.Content.CONTENT_PROCESSOR_CONTENT)
59  if i < len(processedContent["custom"]):
60  elemCustom = processedContent["custom"][i]
61  resultElem = base64.b64encode(elemCustom)
62  contentCustom = dc.EventObjects.Content(resultElem, cDateValue,
63  dc.EventObjects.Content.CONTENT_PROCESSOR_CONTENT)
64  else:
65  contentCustom = None
66  insertTuple = (contentInternal, contentCustom)
67  ret.append(insertTuple)
68  i += 1
69  elif contentMask & dc.EventObjects.URLContentRequest.CONTENT_TYPE_PROCESSED_INTERNAL:
70  for elem in processedContent["internal"]:
71  resultElem = json.dumps(elem, ensure_ascii=False, encoding='utf-8')
72  resultElem = base64.b64encode(resultElem)
73  content = dc.EventObjects.Content(resultElem, cDateValue, dc.EventObjects.Content.CONTENT_PROCESSOR_CONTENT)
74  ret.append(content)
75  if (contentMask & dc.EventObjects.URLContentRequest.CONTENT_TYPE_PROCESSED_ALL) == 0:
76  break
77  elif contentMask & dc.EventObjects.URLContentRequest.CONTENT_TYPE_PROCESSED_CUSTOM:
78  for elem in processedContent["custom"]:
79  resultElem = base64.b64encode(elem)
80  content = dc.EventObjects.Content(resultElem, cDateValue, dc.EventObjects.Content.CONTENT_PROCESSOR_CONTENT)
81  ret.append(content)
82  if (contentMask & dc.EventObjects.URLContentRequest.CONTENT_TYPE_PROCESSED_ALL) == 0:
83  break
84 
85  logger.debug("Return Content instance list: %s", str(len(ret)))
86 
87  return ret
88 
89 
90  # # parseProcessedBuf method decodes incoming processedContent buff, and fills, depend of contentMask value,
91  # outgoing Contents list
92  #
93  # @param buf - incoming processedContent buff
94  # @param cDateValue - incoming CDate value
95  # @param contentMask - incoming contentMask
96  # @return list of Content or tuple(Content, Content) objects
97  @staticmethod
98  def parseProcessedBuf(buf, cDateValue, contentMask):
99  ret = []
100  try:
101  processedContent = json.loads(base64.b64decode(buf))
102  except Exception as excp:
103  processedContent = None
104  logger.debug(">>> Wrong something bad with processedContent decode, =" + str(excp))
105  if processedContent is not None:
106  if isinstance(processedContent, types.DictType):
107  if "custom" in processedContent and "internal" in processedContent:
108  ret = ProcessedContentInternalStruct.processDictProcessedContent(cDateValue, contentMask, processedContent)
109  else:
110  logger.debug(">>> Wrong custom or internal not present in processedContent DICT")
111  elif isinstance(processedContent, types.ListType):
112  if contentMask & dc.EventObjects.URLContentRequest.CONTENT_TYPE_PROCESSED:
113  resultElem = json.dumps(processedContent, ensure_ascii=False, encoding='utf-8')
114  resultElem = base64.b64encode(resultElem)
115  content = dc.EventObjects.Content(resultElem, cDateValue, dc.EventObjects.Content.CONTENT_PROCESSOR_CONTENT)
116  ret.append(content)
117  return ret