HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
UserProxyJsonWrapper.py
Go to the documentation of this file.
1 # coding: utf-8
2 
3 """
4 HCE project, Python bindings, Distributed Tasks Manager application.
5 It's wrapper user proxy property.
6 
7 @package: dc_crawler
8 @file UserProxyJsonWrapper.py
9 @author Alexander Vybornyh <alexander.hce.cluster@gmail.com>
10 @link: http://hierarchical-cluster-engine.com/
11 @copyright: Copyright &copy; 2013-2017 IOIX Ukraine
12 @license: http://hierarchical-cluster-engine.com/license/
13 @since: 0.1
14 """
15 
16 from dc.EventObjects import Proxy
17 
18 
19 class UserProxyJsonWrapper(object):
20 
21  # #Constants used in class
22  SOURCE_NAME = 'source'
23  FILE_PATH_NAME = 'file_path'
24  TRIES_COUNT_NAME = 'tries_count'
25  STATUS_UPDATE_EMPTY_PROXY_LIST_NAME = 'status_update_empty_proxy_list'
26  STATUS_UPDATE_NO_AVAILABLE_PROXY_NAME = 'status_update_no_available_proxy'
27  STATUS_UPDATE_TRIES_LIMITS_NAME = 'status_update_tries_limit'
28 
29  PROXIES_NAME = 'proxies'
30  PROXIES_HOST_NAME = 'host'
31  PROXIES_DOMAINS_NAME = 'domains'
32  PROXIES_PRIORITY_NAME = 'priority'
33  PROXIES_LIMITS_NAME = 'limits'
34  PROXIES_STATE_NAME = 'state'
35 
36  PROXIES_DATA_LIST_NAMES = [PROXIES_HOST_NAME, PROXIES_DOMAINS_NAME, PROXIES_PRIORITY_NAME, PROXIES_LIMITS_NAME, \
37  PROXIES_STATE_NAME]
38 
39  RAW_CONTENY_CHECK_NAME = 'raw_content_check'
40  RAW_CONTENY_CHECK_PATTERNS_NAME = 'patterns'
41  RAW_CONTENY_CHECK_ROTATE_NAME = 'rotate'
42  RAW_CONTENY_CHECK_FAULTS_NAME = 'faults'
43 
44 
45  # Support state values
46  PROXY_STATE_DISABLED = 0
47  PROXY_STATE_ENABLED = 1
48 
49  # Support source type values
50  SOURCE_PROPERTY = 0
51  SOURCE_DATABASE = 1
52 
53  # Status update range allowed values
54  STATUS_UPDATE_MIN_ALLOWED_VALUE = 1
55  STATUS_UPDATE_MAX_ALLOWED_VALUE = 7
56 
57  # Default values
58  DEFAULT_VALUE_JSON_DATA = {}
59  DEFAULT_VALUE_SOURCE = SOURCE_PROPERTY
60  DEFAULT_VALUE_PROXIES = {}
61  DEFAULT_VALUE_PROXIES_DOMAIN = ['*']
62  DEFAULT_VALUE_PROXIES_PRIORITY = 10
63  DEFAULT_VALUE_PROXIES_LIMITS = None
64  DEFAULT_VALUE_PROXIES_STATE = PROXY_STATE_ENABLED
65 
66  DEFAULT_VALUE_RAW_CONTENY_CHECK_PATTERNS = []
67  DEFAULT_VALUE_RAW_CONTENY_CHECK_ROTATE = 1
68  DEFAULT_VALUE_RAW_CONTENY_CHECK_FAULTS = 1
69 
70  DEFAULT_VALUE_TRIES_COUNT = 1
71 
72 
73  # # Initialization
74  #
75  # @param jsonData - json data
76  def __init__(self, jsonData):
77  self.jsonData = (jsonData if isinstance(jsonData, dict) else self.DEFAULT_VALUE_JSON_DATA)
78 
79 
80  # # get source value from json
81  #
82  # @param - None
83  # @return source value from json
84  def getSource(self):
85  return int(self.jsonData[self.SOURCE_NAME] if self.SOURCE_NAME in self.jsonData and \
86  int(self.jsonData[self.SOURCE_NAME]) == (self.SOURCE_PROPERTY or self.SOURCE_DATABASE) else \
87  self.SOURCE_PROPERTY)
88 
89  # # set source value to json
90  #
91  # @param - source value
92  # @return - None
93  def setSource(self, source):
94  if int(source) == self.SOURCE_PROPERTY or int(source) == self.SOURCE_DATABASE:
95  self.jsonData[self.SOURCE_NAME] = int(source)
96 
97 
98  # # get file path value from json
99  #
100  # @param - None
101  # @return file path value from json
102  def getFilePath(self):
103  return self.jsonData[self.FILE_PATH_NAME] if self.FILE_PATH_NAME in self.jsonData else None
104 
105 
106  # # get tries count value from json
107  #
108  # @param - None
109  # @return tries count value from json
110  def getTriesCount(self):
111  return int(self.jsonData[self.TRIES_COUNT_NAME]) if self.TRIES_COUNT_NAME in self.jsonData \
112  else self.DEFAULT_VALUE_TRIES_COUNT
113 
114 
115  # # get status_update_empty_proxy_list value from json
116  #
117  # @param - None
118  # @return status_update_empty_proxy_list value from json
120  return int(self.jsonData[self.STATUS_UPDATE_EMPTY_PROXY_LIST_NAME]) \
121  if self.STATUS_UPDATE_EMPTY_PROXY_LIST_NAME in self.jsonData and \
124 
125 
126  # # get status_update_no_available_proxy value from json
127  #
128  # @param - None
129  # @return status_update_no_available_proxy value from json
131  return int(self.jsonData[self.STATUS_UPDATE_NO_AVAILABLE_PROXY_NAME]) \
132  if self.STATUS_UPDATE_NO_AVAILABLE_PROXY_NAME in self.jsonData and \
135 
136 
137  # # get status_update_tries_limit value from json
138  #
139  # @param - None
140  # @return status_update_tries_limit value from json
142  return int(self.jsonData[self.STATUS_UPDATE_TRIES_LIMITS_NAME]) \
143  if self.STATUS_UPDATE_TRIES_LIMITS_NAME in self.jsonData and \
146 
147 
148  # # get proxies value from json
149  #
150  # @param - None
151  # @return proxies value from json
152  def getProxies(self):
153  return self.jsonData[self.PROXIES_NAME] if self.PROXIES_NAME in self.jsonData else self.DEFAULT_VALUE_PROXIES
154 
155 
156  # # set proxies value to json
157  #
158  # @param proxies - proxies value as dict
159  # @return - None
160  def setProxies(self, proxies):
161  if isinstance(proxies, dict):
162  self.jsonData[self.PROXIES_NAME] = proxies
163 
164 
165  # # set proxies data value to json
166  #
167  # @param proxyData - proxy data value
168  # @return - None
169  def setProxyData(self, proxyData):
170  if isinstance(proxyData, dict):
171  proxies = self.getProxies()
172  if self.PROXIES_HOST_NAME in proxyData.keys():
173 
174  proxyData[self.PROXIES_DOMAINS_NAME] = proxyData[self.PROXIES_DOMAINS_NAME] \
175  if self.PROXIES_DOMAINS_NAME in proxyData.keys() and proxyData[self.PROXIES_DOMAINS_NAME] is not None \
176  and proxyData[self.PROXIES_DOMAINS_NAME] != "" else self.DEFAULT_VALUE_PROXIES_DOMAIN
177 
178  proxyData[self.PROXIES_PRIORITY_NAME] = proxyData[self.PROXIES_PRIORITY_NAME] \
179  if self.PROXIES_PRIORITY_NAME in proxyData.keys() and proxyData[self.PROXIES_PRIORITY_NAME] is not None \
180  and int(proxyData[self.PROXIES_PRIORITY_NAME]) > 0 else self.DEFAULT_VALUE_PROXIES_PRIORITY
181 
182  proxyData[self.PROXIES_LIMITS_NAME] = proxyData[self.PROXIES_LIMITS_NAME] \
183  if self.PROXIES_LIMITS_NAME in proxyData.keys() else self.DEFAULT_VALUE_PROXIES_LIMITS
184 
185  proxyData[self.PROXIES_STATE_NAME] = proxyData[self.PROXIES_STATE_NAME] \
186  if self.PROXIES_STATE_NAME in proxyData.keys() else self.DEFAULT_VALUE_PROXIES_STATE
187 
188  proxies[proxyData[self.PROXIES_HOST_NAME]] = proxyData
189  self.setProxies(proxies)
190 
191 
192  # # get proxies data value from json
193  #
194  # @param proxyName - proxy name
195  # @return - proxy data value
196  def getProxyData(self, proxyName):
197  # variable for result
198  ret = None
199  proxies = self.getProxies()
200  if proxyName in proxies:
201  ret = proxies[proxyName]
202 
203  return ret
204 
205 
206  # # add list of Proxy objects to json
207  #
208  # @param proxyList - list of Proxy objects
209  # @return - None
210  def addProxyList(self, proxyList):
211  if isinstance(proxyList, list):
212  for proxy in proxyList:
213  if isinstance(proxy, Proxy):
214  proxyData = {}
215  for name in self.PROXIES_DATA_LIST_NAMES:
216  if hasattr(proxy, name):
217  proxyData[name] = getattr(proxy, name)
218 
219  self.setProxyData(proxyData)
220 
221 
222  # # get list of Proxy objects from json
223  #
224  # @param - None
225  # @return list of Proxy objects
226  def getProxyList(self):
227  # variable for result
228  proxyList = []
229 
230  proxies = self.getProxies()
231  for proxyName in proxies.keys():
232  proxyData = self.getProxyData(proxyName)
233 
234  proxy = Proxy(siteId='0', host='')
235  proxy.state = self.DEFAULT_VALUE_PROXIES_STATE
236 
237  for name in self.PROXIES_DATA_LIST_NAMES:
238  if name in proxyData:
239  setattr(proxy, name, proxyData[name])
240 
241  if proxy.host != "":
242  proxyList.append(proxy)
243 
244  return proxyList
245 
246 
247  # # get raw_content_check value from json
248  #
249  # @param - None
250  # @return raw_content_check value from json
252  return self.jsonData[self.RAW_CONTENY_CHECK_NAME] if self.RAW_CONTENY_CHECK_NAME in self.jsonData else None
253 
254 
255  # # get patterns value from raw_content_check value
256  #
257  # @param - None
258  # @return patterns value from raw_content_check value
260  rawContentCheck = self.getRawContentCheck()
261  return rawContentCheck[self.RAW_CONTENY_CHECK_PATTERNS_NAME] if rawContentCheck is not None and \
262  self.RAW_CONTENY_CHECK_PATTERNS_NAME in rawContentCheck and \
263  isinstance(rawContentCheck[self.RAW_CONTENY_CHECK_PATTERNS_NAME], list) \
265 
266 
267  # # get rotate value from raw_content_check value
268  #
269  # @param - None
270  # @return rotate value from raw_content_check value
272  rawContentCheck = self.getRawContentCheck()
273  return rawContentCheck[self.RAW_CONTENY_CHECK_ROTATE_NAME] if \
274  rawContentCheck is not None and self.RAW_CONTENY_CHECK_ROTATE_NAME in rawContentCheck \
276 
277 
278  # # get faults value from raw_content_check value
279  #
280  # @param - None
281  # @return faults value from raw_content_check value
283  rawContentCheck = self.getRawContentCheck()
284  return rawContentCheck[self.RAW_CONTENY_CHECK_FAULTS_NAME] if \
285  rawContentCheck is not None and self.RAW_CONTENY_CHECK_FAULTS_NAME in rawContentCheck \