selenium自动化爬虫运行越来越慢,如何解决?
最近用selenium写了一个小爬虫,需要循环在搜索框内输入内容,然后模拟点击搜索,在前进的新的页面获取数据,之后循环之前步骤,知道搜索结束。在爬虫刚开始运行的时候速度还可以,大约一秒一个页面,随着运行,速度开始越来越慢。这里贴出代码browser = webdriver.Chrome(executable_path="D:\GeckoDriver\chromedriver")browser.get("https://www.qcc.com/")
# #4.设置浏览器的大小
browser.maximize_window()
login = browser.find_element_by_xpath('/html/body/header/div/ul/li/a')
login.click()
# sleep(30)
# print("30 seconds later")
x = input("登录后请按y")
cookies = browser.get_cookies()
browser.quit()
browser = webdriver.Chrome(executable_path="D:\GeckoDriver\chromedriver",options = chrome_options)
# , options = chrome_options
browser.get("https://www.qcc.com/")
for cookie in cookies:
browser.add_cookie(cookie)
browser.get("https://www.qcc.com/")
browser.maximize_window()
qccinput = browser.find_element_by_css_selector("#searchkey")
# qccinput.clear()
qccinput.send_keys(companyNames)
qccbutton = browser.find_element_by_css_selector(".index-searchbtn")
sleep(0.5)
qccbutton.click()
qccbutton = browser.find_element_by_css_selector(".input-group-btn")
sleep(0.5)
qccbutton.click()
pbar = tqdm(range(len(companyNames)))
for companyName, i in zip(companyNames, pbar):
browser.forward()
# browser.delete_all_cookies()
# browser.refresh();
lem = WebDriverWait(browser, 15, 0.5).until(EC.presence_of_element_located((By.ID, "searchKey"))) # 节约时间,网页出现这个元素再操作
seach = browser.find_element_by_css_selector("#searchKey")
seach.clear()
seach.send_keys(companyName)
seachButton = browser.find_element_by_css_selector(".btn-primary")
seachButton.click()
response = browser.page_source
html = etree.HTML(response)
result = etree.tostring(html)
cookies = browser.get_cookies()
try:
companyName = html.xpath(
'normalize-space(/html/body/div/div/div/div/div/div/div/table/tr/td/div/a)') # 去掉tbody
urls = html.xpath(
'/html/body/div/div/div/div/div/div/div/table/tr/td/div/a/@href') # 去掉tbody
getCompanyNames.append(companyName)
getTaxpayerNumber(urls, cookies)
except Exception as r:
getCompanyNames.append("无法搜索到公司")
taxpayerNumber.append("无法查到税号")
# browser = webdriver.Chrome(profile)
# send_command = ('POST', '/session/$sessionId/chromium/send_command')
# browser.command_executor._commands['SEND_COMMAND'] = send_command
# browser.execute('SEND_COMMAND', dict(cmd='Network.clearBrowserCache', params={}))
browser.delete_all_cookies()
for cookie in cookies:
browser.add_cookie(cookie)
页:
[1]