selenium爬取企查查案例 发表于 2019-09-12 | 分类于 python | 阅读次数: 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') # 驱动路径 path = 'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe' # 创建浏览器对象 driver = webdriver.Chrome(executable_path=path, chrome_options=chrome_options) url = 'https://www.qichacha.com/' driver = webdriver.Chrome() driver.get(url) driver.find_element_by_xpath('//input[@id="searchkey"]').send_keys(f'{company_name}') driver.find_element_by_xpath("//input[@value ='查一下']").click() driver.find_element_by_xpath("//a[@class ='ma_h1']").click() cookies = driver.get_cookies() cookies_list= [] for cookie_dict in cookies: cookie =cookie_dict['name']+'='+cookie_dict['value'] cookies_list.append(cookie) header_cookie = ';'.join(cookies_list) print(header_cookie) headers2 = { 'cookie':header_cookie, 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } fin_url = driver.current_url response = requests.get(fin_url,headers=headers2) mainhandle=driver.current_window_handle #主页面句柄 每个浏览器标签页都有一个句柄 # print(response.text) handles = driver.window_handles for handle in handles:# 轮流得出标签页的句柄 切换窗口 因为只有两个标签页实际是假for循环 if handle!=mainhandle: driver.switch_to_window(handle) #获得数据 try: raw=driver.find_element_by_xpath("//table[@class='ntable']") print (raw.text) for data1 in raw.text.split('\n'): if '-' in data1: continue #流通市值 ltsz = data1.split(' ')[3] #市盈率 syl = data1.split(' ')[1] #市净率 sjl = data1.split(' ')[3] print(ltsz,syl,sjl) except Exception as e: print("无该数据")