HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
ftest_selenium_chrome.py
Go to the documentation of this file.
1 #coding: utf-8
2 '''
3 HCE project, Python bindings, DC dependencies
4 The selenium research tests.
5 
6 @package: drce
7 @author bgv bgv.hce@gmail.com
8 @link: http://hierarchical-cluster-engine.com/
9 @copyright: Copyright © 2015 IOIX Ukraine
10 @license: http://hierarchical-cluster-engine.com/license/
11 @since: 0.1
12 '''
13 
14 
15 import sys
16 reload(sys)
17 sys.setdefaultencoding('utf8')
18 import time
19 import re
20 import ctypes
21 
22 
23 from selenium import webdriver
24 import selenium.webdriver.support.ui
25 
26 
27 timeout = 10
28 out_dir = "/tmp/00/"
29 macro_execute = True
30 
31 #u="http://www.google.com/"
32 #out_file = "www.google.com.html"
33 
34 u = "http://www.nytimes.com/2015/06/18/us/politics/gop-is-wary-that-health-care-win-could-have-its-own-risks.html?hp&action=click&pgtype=Homepage&module=first-column-region&region=top-news&WT.nav=top-news&_r=0"
35 out_file = "www.nytimes.com.html"
36 
37 
38 timeout = 5
39 
40 #u = "http://www.intel.co.uk/content/www/uk/en/processors/core/core-i5-processor.html"
41 #out_file = "www.intel.co.uk.html"
42 #timeout = 5
43 
44 #u = "http://www.afpbb.com/articles/-/3046087"
45 #out_file = "www.afpbb.com.html"
46 #timeout = 10
47 
48 #u = "http://www.dhtmlgoodies.com/scripts/ajax-dynamic-content/ajax-dynamic-content.html"
49 #out_file = "www.dhtmlgoodies.com.html"
50 #timeout = 5
51 
52 #u = "http://tr.dc4.hce-project.com/wp-content/uploads/revslider/home-business-slide2/img-slide3.png"
53 #out_file = "tr.dc4.hce-project.com.png"
54 #timeout = 5
55 
56 #u = "http://hierarchical-cluster-engine.com/docs/pdf/DC_client_setup.pdf"
57 #out_file = "hierarchical-cluster-engine.com.pdf"
58 #timeout = 5
59 
60 
61 #Errors simulation
62 #u = "http://wrongurlwrongurlwrongurlwrongurlwrongurl.com/"
63 #out_file = "wrongurlwrongurlwrongurlwrongurlwrongurl.com.html"
64 #timeout = 1
65 #"Failed to load resource: net::ERR_NAME_NOT_RESOLVED"
66 
67 #u = "http://127.0.0.1/retcode.php?c=404"
68 #out_file = "404.html"
69 #timeout = 1
70 #"404 (Not Found)"
71 
72 #u = "http://127.0.0.1/retcode.php?c=403"
73 #out_file = "403.html"
74 #timeout = 1
75 #"403 (Forbidden)"
76 
77 #u = "http://127.0.0.1/retcode.php?c=500"
78 #out_file = "500.html"
79 #timeout = 1
80 #"500 (Internal Server Error)"
81 
82 #u = "http://127.0.0.1/redirect.php?c=303&n=100&u=http://127.0.0.1/"
83 #out_file = "redirect303.html"
84 #timeout = 5
85 #Failed to load resource: net::ERR_TOO_MANY_REDIRECTS
86 
87 
88 exec_path = "../../bin/"
89 #--verbose --log-path=chromedriver32.log
90 driver_name = "chromedriver"
91 driver_release = "_chrome50"
92 error_msg = ""
93 driver = None
94 
95 
96 '''
97 from pyvirtualdisplay import Display
98 from selenium import webdriver
99 display = Display(visible=0, size=(800, 600))
100 display.start()
101 browser = webdriver.Chrome()
102 browser.get('http://www.google.com')
103 print browser.title
104 browser.quit()
105 display.stop()
106 '''
107 
108 
109 try:
110  #Get driver
111  #driver = webdriver.Chrome(executable_path=exec_path + driver_name + str(ctypes.sizeof(ctypes.c_voidp) * 8))
112  #driver = webdriver.Remote(command_executor="http://127.0.0.1:36454",
113  # desired_capabilities=webdriver.DesiredCapabilities.CHROME)
114 
115  disable_setuid_sandbox = "--disable-setuid-sandbox"
116  chrome_option = webdriver.ChromeOptions()
117  chrome_option.add_argument(disable_setuid_sandbox)
118  driver = webdriver.Chrome(executable_path=exec_path + driver_name + str(ctypes.sizeof(ctypes.c_voidp) * 8) + driver_release, chrome_options=chrome_option)
119 except Exception, err:
120  error_msg = "Error: " + str(err)
121  error_code = 1
122 except:
123  error_msg = "Error: General driver initialization!"
124  error_code = 2
125 
126 if error_msg != "":
127  if driver is not None:
128  driver.quit()
129  print error_msg
130  sys.exit()
131 
132 #print "session_id: " + str(driver.session_id)
133 #print "capabilities: " + str(driver.capabilities)
134 
135 driver.set_page_load_timeout(timeout)
136 driver.get(u)
137 
138 #Get logs
139 log_types = driver.log_types
140 if 'browser' in log_types:
141  log_list = driver.get_log('browser')
142  for item_dict in log_list:
143  if "message" in item_dict and item_dict["message"] != '' and u in item_dict["message"]:
144  error_msg += item_dict["message"] + "\n"
145 
146 error_code = 0
147 if error_msg != "":
148  entrances = [
149  (r"(.*)net::ERR_NAME_NOT_RESOLVED(.*)", 10),
150  (r"(.*)net::ERR_TOO_MANY_REDIRECTS(.*)", 11),
151  (r"(.*)403 \(Forbidden\)(.*)", 403),
152  (r"(.*)404 \(Not Found\)(.*)", 404),
153  (r"(.*)500 \(Internal Server Error\)(.*)", 500),
154  (r"(.*)net::(.*)", 520)]
155  for item in entrances:
156  regex = re.compile(item[0])
157  r = regex.search(error_msg)
158  if r:
159  error_code = item[1]
160  break
161 
162 if error_code == 0:
163  time.sleep(timeout)
164 
165 content_type = None
166 charset = None
167 attr = None
168 
169 try:
170  #attr = driver.find_element_by_xpath('//meta[@http-equiv="content-type"]').get_attribute("content")
171  attr = driver.find_element_by_xpath(".//meta[translate(@http-equiv,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz')='content-type']").get_attribute("content")
172  regex = re.compile(r"(.*); charset=(.*)", re.IGNORECASE)
173  items = regex.search(attr)
174  if items is not None:
175  items = items.groups()
176  if len(items) > 1:
177  content_type = items[0]
178  charset = items[1]
179 except Exception, err:
180  pass
181 
182 if content_type is None:
183  try:
184  attr = driver.find_element_by_xpath('//html')
185  content_type = "text/html"
186  except Exception, err:
187  pass
188 
189 if content_type is not None and charset is None:
190  try:
191  charset = driver.find_element_by_xpath('//meta[@charset]').get_attribute("charset")
192  except Exception, err:
193  pass
194 
195 if charset is None:
196  try:
197  charset = driver.execute_script("return document.characterSet;")
198  except Exception, err:
199  print str(err)
200 
201 print "attr=" + str(attr) + ", charset=" + str(charset) + ", content-type=" + str(content_type)
202 
203 
204 #print str(driver.get_log('driver'))
205 #GET_SESSION_LOGS, STATUS
206 
207 html = driver.page_source
208 cookies = driver.get_cookies()
209 #print str(cookies)
210 print driver.current_url
211 
212 #Macro execution functionality
213 if macro_execute:
214  m = "function aaa(){location.replace('https://www.congress.gov/bill/114th-congress/senate-bill/1016/text');} return aaa();"
215  m1 = "function bbb(){return 1;} return bbb();"
216  r = driver.execute_script(m)
217  r1 = driver.execute_script(m1)
218  html_macro = driver.page_source
219  print "after macro execution:\n" + "driver.current_url: " + driver.current_url + "\nreturned: " + str(r) + "\nreturned1: " + str(r1)
220  f = open(out_dir + out_file + "_macro", "w")
221  f.write(html_macro)
222  f.close()
223 
224 driver.quit()
225 
226 f = open(out_dir + out_file, "w")
227 f.write(html)
228 f.close()
229 
230 if error_msg != "":
231  print "ERRORS, code " + str(error_code) + ":\n" + error_msg
232 
233