HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
ftest_ProxyResolver.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 
3 import os
4 import json
5 import logging
6 import copy
7 from dc_crawler.ProxyResolver import ProxyResolver
8 
9 def getLogger():
10  # create logger
11  log = logging.getLogger('hce')
12  log.setLevel(logging.DEBUG)
13 
14  # create console handler and set level to debug
15  ch = logging.StreamHandler()
16  ch.setLevel(logging.DEBUG)
17 
18  # create formatter
19  formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
20 
21  # add formatter to ch
22  ch.setFormatter(formatter)
23 
24  # add ch to logger
25  log.addHandler(ch)
26 
27  return log
28 
29 
30 class FakeDBWrapper(object):
31 
32  TEMPL_ELEMENT = {"Site_Id": None, "Host": None, "Domains": None, "Priority": None, "State": None, "Limits": None}
33 
34  def __init__(self):
35  self.affect_db = True
36 
37 
38  def customRequest(self, query, dbName, includeFieldsNames=None):
39  ret = []
40  elem = self.TEMPL_ELEMENT
41  elem["Site_Id"] = "1"
42  elem["Host"] = "ibm.com:9090"
43  elem["Domains"] = None
44  elem["Priority"] = 1
45  elem["State"] = 1
46  elem["Limits"] = None
47  ret.append(copy.deepcopy(elem))
48  elem["Site_Id"] = "1"
49  elem["Host"] = "intel.com:11"
50  elem["Domains"] = '["*"]'
51  elem["Priority"] = 1
52  elem["State"] = 1
53  elem["Limits"] = '[10, 10, 2]'
54  ret.append(copy.deepcopy(elem))
55  elem["Site_Id"] = "1"
56  elem["Host"] = "intel.com:22"
57  elem["Domains"] = '["mazda.com"]'
58  elem["Priority"] = 1
59  elem["State"] = 1
60  elem["Limits"] = '[10, 10, 2]'
61  ret.append(copy.deepcopy(elem))
62  elem["Site_Id"] = "1"
63  elem["Host"] = "intel.com:44"
64  elem["Domains"] = '["www.latimes.com"]'
65  elem["Priority"] = 1
66  elem["State"] = 1
67  elem["Limits"] = '[11, 12, 13]'
68  ret.append(copy.deepcopy(elem))
69  elem["Site_Id"] = "1"
70  elem["Host"] = "intel.com:55"
71  elem["Domains"] = '["www.latimes.com"]'
72  elem["Priority"] = 1
73  elem["State"] = 1
74  elem["Limits"] = '[11, 12, 13]'
75  ret.append(copy.deepcopy(elem))
76 
77  return ret
78 
79 
80 logger = getLogger()
81 
82 # don't work rotation use frequence
83 # siteProperties = {"USER_PROXY": "{\"source\": 0,\"file_path\":\"\/tmp\/proxy.json\",\"proxies\":{\"11.23.107.195:8080\":{\"host\":\"11.23.107.195:8080\",\"domains\": [\"*\"],\"priority\":11,\"limits\":null},\"22.23.107.195:8080\":{\"host\":\"22.23.107.195:8080\",\"domains\": [\"*\"],\"priority\":11,\"limits\":null}}}" }
84 # dbWrapper = None
85 # siteId = '0'
86 # url = None
87 #
88 # proxyResolver = ProxyResolver(siteProperties, dbWrapper, siteId, url)
89 # proxyTuple = proxyResolver.getProxy()
90 # logger.debug("!!! proxyTuple: %s", str(proxyTuple))
91 
92 
93 # don't work rotation use frequence
94 # siteProperties = {"USER_PROXY": "{\"source\": 1,\"file_path\":\"\/tmp\/proxy.json\",\"proxies\":{\"11.23.107.195:8080\":{\"host\":\"11.23.107.195:8080\",\"domains\": [\"*\"],\"priority\":11,\"limits\":null},\"22.23.107.195:8080\":{\"host\":\"22.23.107.195:8080\",\"domains\": [\"*\"],\"priority\":11,\"limits\":null}}}" }
95 siteProperties = {"USER_PROXY": "{\"source\": 1,\"file_path\":\"\/tmp\/proxy.json\",\"proxies\":{}}" }
96 dbWrapper = FakeDBWrapper()
97 siteId = '1'
98 url = 'http://www.latimes.com/dev/index.html'
99 
100 proxyResolver = ProxyResolver(siteProperties, dbWrapper, siteId, url)
101 proxyTuple = proxyResolver.getProxy()
102 logger.debug("!!! proxyTuple: %s", str(proxyTuple))
103 
104 
def customRequest(self, query, dbName, includeFieldsNames=None)