3 HCE project, Python bindings, DC dependencies 4 The selenium research tests. 7 @author bgv bgv.hce@gmail.com 8 @link: http://hierarchical-cluster-engine.com/ 9 @copyright: Copyright © 2015 IOIX Ukraine 10 @license: http://hierarchical-cluster-engine.com/license/ 17 sys.setdefaultencoding(
'utf8')
23 from selenium
import webdriver
24 import selenium.webdriver.support.ui
34 u =
"http://www.nytimes.com/2015/06/18/us/politics/gop-is-wary-that-health-care-win-could-have-its-own-risks.html?hp&action=click&pgtype=Homepage&module=first-column-region®ion=top-news&WT.nav=top-news&_r=0" 35 out_file =
"www.nytimes.com.html" 88 exec_path =
"../../bin/" 90 driver_name =
"chromedriver" 91 driver_release =
"_chrome50" 97 from pyvirtualdisplay import Display 98 from selenium import webdriver 99 display = Display(visible=0, size=(800, 600)) 101 browser = webdriver.Chrome() 102 browser.get('http://www.google.com') 115 disable_setuid_sandbox =
"--disable-setuid-sandbox" 116 chrome_option = webdriver.ChromeOptions()
117 chrome_option.add_argument(disable_setuid_sandbox)
118 driver = webdriver.Chrome(executable_path=exec_path + driver_name + str(ctypes.sizeof(ctypes.c_voidp) * 8) + driver_release, chrome_options=chrome_option)
119 except Exception, err:
120 error_msg =
"Error: " + str(err)
123 error_msg =
"Error: General driver initialization!" 127 if driver
is not None:
135 driver.set_page_load_timeout(timeout)
139 log_types = driver.log_types
140 if 'browser' in log_types:
141 log_list = driver.get_log(
'browser')
142 for item_dict
in log_list:
143 if "message" in item_dict
and item_dict[
"message"] !=
'' and u
in item_dict[
"message"]:
144 error_msg += item_dict[
"message"] +
"\n" 149 (
r"(.*)net::ERR_NAME_NOT_RESOLVED(.*)", 10),
150 (
r"(.*)net::ERR_TOO_MANY_REDIRECTS(.*)", 11),
151 (
r"(.*)403 \(Forbidden\)(.*)", 403),
152 (
r"(.*)404 \(Not Found\)(.*)", 404),
153 (
r"(.*)500 \(Internal Server Error\)(.*)", 500),
154 (
r"(.*)net::(.*)", 520)]
155 for item
in entrances:
156 regex = re.compile(item[0])
157 r = regex.search(error_msg)
171 attr = driver.find_element_by_xpath(
".//meta[translate(@http-equiv,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz')='content-type']").get_attribute(
"content")
172 regex = re.compile(
r"(.*); charset=(.*)", re.IGNORECASE)
173 items = regex.search(attr)
174 if items
is not None:
175 items = items.groups()
177 content_type = items[0]
179 except Exception, err:
182 if content_type
is None:
184 attr = driver.find_element_by_xpath(
'//html')
185 content_type =
"text/html" 186 except Exception, err:
189 if content_type
is not None and charset
is None:
191 charset = driver.find_element_by_xpath(
'//meta[@charset]').get_attribute(
"charset")
192 except Exception, err:
197 charset = driver.execute_script(
"return document.characterSet;")
198 except Exception, err:
201 print "attr=" + str(attr) +
", charset=" + str(charset) +
", content-type=" + str(content_type)
207 html = driver.page_source
208 cookies = driver.get_cookies()
210 print driver.current_url
214 m =
"function aaa(){location.replace('https://www.congress.gov/bill/114th-congress/senate-bill/1016/text');} return aaa();" 215 m1 =
"function bbb(){return 1;} return bbb();" 216 r = driver.execute_script(m)
217 r1 = driver.execute_script(m1)
218 html_macro = driver.page_source
219 print "after macro execution:\n" +
"driver.current_url: " + driver.current_url +
"\nreturned: " + str(r) +
"\nreturned1: " + str(r1)
220 f = open(out_dir + out_file +
"_macro",
"w")
226 f = open(out_dir + out_file,
"w")
231 print "ERRORS, code " + str(error_code) +
":\n" + error_msg