HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
ftest_request.py
Go to the documentation of this file.
1 # coding: utf-8
2 '''
3 HCE project, Python bindings, DC dependencies
4 The requests research tests.
5 
6 @package: drce
7 @author bgv bgv.hce@gmail.com
8 @link: http://hierarchical-cluster-engine.com/
9 @copyright: Copyright © 2017 IOIX Ukraine
10 @license: http://hierarchical-cluster-engine.com/license/
11 @since: 0.1
12 '''
13 
14 import logging
15 import requests
16 import requests.exceptions
17 
18 from app.Utils import varDump
19 from app.Utils import getTracebackInfo
20 from dc_crawler.RequestsRedirectWrapper import RequestsRedirectWrapper
21 
22 
23 def getLogger():
24  # create logger
25  logger = logging.getLogger('hce')
26  logger.setLevel(logging.DEBUG)
27 
28  # create console handler and set level to debug
29  ch = logging.StreamHandler()
30  ch.setLevel(logging.DEBUG)
31 
32  # create formatter
33  formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
34 
35  # add formatter to ch
36  ch.setFormatter(formatter)
37 
38  # add ch to logger
39  logger.addHandler(ch)
40 
41  return logger
42 
43 logger = getLogger()
44 
45 # http://docs.python-requests.org/en/latest/api/
46 
47 url = 'https://api.github.com/user'
48 payload = ''
49 headers = {}
50 
51 # request(method, url, params=None, data=None, headers=None, cookies=None, files=None, auth=None, timeout=None, allow_redirects=True, proxies=None,
52 # hooks=None, stream=None, verify=None, cert=None, json=None)
53 
54 # r = requests.get(url, auth=('user', 'pass'))
55 # r = requests.post(url, data=json.dumps(payload), headers=headers)
56 # r = requests.put("http://httpbin.org/put")
57 # r = requests.delete("http://httpbin.org/delete")
58 # r = requests.head("http://httpbin.org/get")
59 # r = requests.options("http://httpbin.org/get")
60 # r = requests.head("http://feedproxy.google.com/~r/tpm-news/~3/41c5vi-4njA/senate-confirms-wilbur-ross-as-secretary-of-commerce")
61 # r = requests.get('https://www.nytimes.com/2017/02/27/science/arctic-plants-spring-global-warming.html?partner=rss&emc=rss')
62 
63 # s = requests.Session()
64 # r = s.get('http://httpbin.org/get')
65 # print str(r)
66 # with requests.Session() as s:
67 # s.get('http://httpbin.org/get')
68 # print str(r)
69 
70 # req = requests.Request('GET', 'http://httpbin.org/get')
71 # r = req.prepare()
72 # s = requests.Session()
73 # s.send(r)
74 #
75 # print varDump(obj=r, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False,
76 # objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=15)
77 #
78 # print varDump(r.headers)
79 # print varDump(r.cookies)
80 # print varDump(r.history)
81 # print str(len(r.history))
82 # print varDump(r.history[0].cookies)
83 # print varDump(r.history[1].cookies)
84 # print "====="
85 # r = requests.head("https://www.nytimes.com/2017/02/27/science/arctic-plants-spring-global-warming.html?partner=rss&emc=rss")
86 #
87 # print varDump(obj=r, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False,
88 # objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=15)
89 # print varDump(r.cookies)
90 # print str(requests.utils.dict_from_cookiejar(r.cookies))
91 # print str(type(requests.utils.dict_from_cookiejar(r.cookies)))
92 
93 # maxRedirects = 12
94 
95 # req = requests.Request('HEAD', 'http://thecaucus.blogs.nytimes.com/feed')
96 # req = requests.Request('HEAD', 'http://scotsman.com/cmlink/swts-news-dynmc-politics-feed-1-957044')
97 # req = requests.Request('HEAD', 'https://www.nytimes.com/politics/first-draft/feed/')
98 # r = req.prepare()
99 #
100 # s = requests.Session()
101 # s.max_redirects = int(maxRedirects)
102 # res = s.send(r)
103 #
104 # print varDump(res)
105 #
106 # print('Url: ' + str(res.request.url))
107 # url = 'http://www.forbes.com/sites/jonentine/2014/04/30/infographic-on-4-ways-to-breed-crops-by-scrambling-genes-youll-be-surprised-which-ones-are-regulated/'
108 # url = 'http://www.forbes.com/sites/blakeoestriecher/2017/04/04/wwe-smackdown-5-ways-shinsuke-nakamura-will-change-the-blue-brand/'
109 # url = 'http://rssfeeds.usatoday.com/~/477545908/0/usatodaycomworld-topstories~Japanese-PM-Shinzo-Abe-vows-aposcountermeasuresapos-against-North-Korea/'
110 # url = 'http://rssfeeds.usatoday.com/~/476998866/0/usatodaycomworld-topstories~All-the-presidentaposs-men-and-women-Trumplike-leaders-proliferate/'
111 url = 'http://rssfeeds.usatoday.com/~/477506178/0/usatodaycomworld-topstories~London-fights-pollution-by-charging-drivers-of-older-polluting-cars/'
112 # url = 'http://thecaucus.blogs.nytimes.com/feed'
113 # url = 'https://www.nytimes.com/politics/first-draft/feed/'
114 # url = 'http://scotsman.com/cmlink/swts-news-dynmc-politics-feed-1-957044'
115 method = 'head'
116 timeout = 101
117 # headers = {'Accept-Language':'en-US,en;q=0.8,en;q=0.6,us;q=0.4,us;q=0.2,ja;q=0.2', 'Accept-Encoding':'gzip, deflate', 'Cache-Control':'no-cache', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36', 'Connection':'close', 'Referer':u'http://www.scotsman.com/news/politics/indyref2-scotland-in-union-group-launches-project-listen-1-4392095', '--allow-running-insecure-content':'', 'Pragma':'no-cache', '--disable-setuid-sandbox':'', '--allow-file-access-from-files':'', '--disable-web-security':''}
118 headers = {'Accept-Language':'en-US,en;q=0.8,en;q=0.6,us;q=0.4,us;q=0.2,ja;q=0.2', 'Accept-Encoding':'gzip, deflate', 'Cache-Control':'no-cache', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36', 'Connection':'close', '--allow-running-insecure-content':'', 'Pragma':'no-cache', '--disable-setuid-sandbox':'', '--allow-file-access-from-files':'', '--disable-web-security':''}
119 allowRedirects = True
120 proxySetting = None
121 auth = None
122 data = None
123 maxRedirects = 10
124 filters = None
125 
126 try:
128  res = reqv.request(url, method, timeout, headers, allowRedirects, proxySetting, auth, data, maxRedirects, filters)
129 
130  print varDump(res)
131  print('Url: ' + str(res.request.url))
132  print('res.cookies: ' + varDump(res.cookies) + ' type: ' + str(type(res.cookies)))
133  cookies = requests.utils.dict_from_cookiejar(res.cookies)
134  print('cookies: ' + varDump(cookies) + ' type: ' + str(type(cookies)))
135 
136  print('len(res.content) = ' + str(len(res.content)))
137 # print('res.iter_content() = ' + varDump(res.iter_content()))
138 # out = list(res.iter_content())
139 # print('out = ' + varDump(''.join(out)))
140 
141 # domain = None
142 # path = None
143 # name = None
144 # value = None
145 # for key, value in res.cookies.items():
146 # domain = key
147 # print('value: ' + str(value) + ' type: ' + str(type(value)))
148 #
149 # print('domain: ' + str(domain) + ', path: ' + str(path) + ', name: ' + str(name) + ', value: ' + str(value))
150 
151 except requests.exceptions.RequestException, err:
152  print ("!!! RequestException: " + str(err))
153 except Exception, err:
154  print ("!!! Exception: " + str(err))
155  print (getTracebackInfo())
156 
157 # try:
158 # req = requests.Request(method, url, headers)
159 # r = req.prepare()
160 #
161 # s = requests.Session()
162 # s.max_redirects = int(maxRedirects)
163 # res = s.send(r)
164 # # print varDump(res)
165 # print varDump(res.headers)
166 # print varDump(res.headers['content-type'])
167 #
168 # print('Url: ' + str(res.request.url))
169 # except requests.exceptions.RequestException, err:
170 # print ("!!! RequestException: %s", str(err))
171 # except Exception, err:
172 # print ("!!! Exception: %s", str(err))
173 
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
Definition: Utils.py:410
def getTracebackInfo(linesNumberMax=None)
Definition: Utils.py:218