51Testing软件测试论坛
标题:
selenium模拟登录及爬取信息
[打印本页]
作者:
测试积点老人
时间:
2022-9-1 10:06
标题:
selenium模拟登录及爬取信息
怎么定位下一页
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from pyquery import PyQuery as pq
from selenium.webdriver import ActionChains
import time
from lxml import etree
driver = webdriver.Chrome()
driver.get("https://www.taobao.com/")
driver.find_element(by=By.XPATH,value='/html/body/div[3]/div[2]/div[2]/div[2]/div[5]/div/div[2]/div[1]/a[1]').click()
time.sleep(6)
handles = driver.window_handles
driver.switch_to.window(handles[-1])
driver.find_element(by=By.ID, value='fm-login-id').send_keys('1')
driver.find_element(by=By.ID, value='fm-login-password').send_keys('1')
driver.find_element(by=By.XPATH,value='/html/body/div/div[2]/div[3]/div/div/div/div[2]/div/form/div[4]/button').click()
time.sleep(6)
driver.switch_to.frame('baxia-dialog-content')
el1 = driver.find_element(by=By.XPATH, value='//*[@id="nc_1_n1z"]')
print(el1.size['width'])
print(el1.size['height'])
el2 = driver.find_element(by=By.XPATH, value='//*[@id="nc_1__scale_text"]')
print(el2.size['width'])
print(el2.size['height'])
time.sleep(2)
driver.maximize_window()
chains = ActionChains(driver)
chains.drag_and_drop_by_offset(el1, el2.size['width'], -el2.size['height'])
# 事件提交
chains.perform()
# driver.close()
# 退出浏览器
# driver.quit()
wait = WebDriverWait(driver, 10)
def search():
try:
input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#q")))
submit = wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR, '#J_TSearchForm > div.search-button > button')))
input.send_keys('美食')
submit.click()
total = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > div.total')))
get_products()
return total.text
driver.find_element(by=By.XPATH,
value='//*[@id="mainsrp-pager"]/div/div/div/ul/li[8]/a/span[1]').click()
except:
search()
def get_products():
# wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mainsrp-itemlist.items.item')))#加载宝贝信息并等待
html = driver.page_source
doc = pq(html)
items = doc('#mainsrp-itemlist .items .item').items() # 得到所有宝贝的内容
for item in items:
product = {
'image': item.find('.pic .img').attr('data-src'), # 图片链接
'price': item.find('.price').text(), # 商品价格
'deal': item.find('.deal-cnt').text()[:-3], # 付款人数,-3是为了去掉人付款这几个字
'title': item.find('.title').text(), # 商品名称
'shop': item.find('.shop').text(), # 店铺名称
'location': item.find('.location').text()
}
print(product)
def main():
total = search()
# 用来写csv文件的标题
start_csv = True
#total1 = int(re.compile('(\d+)').search(total).group(1)) # 转换为数值型
#print(total1)
if __name__ == '__main__':
main()
复制代码
运行结果及报错内容
只能打印第一页内容
我想要达到的结果
正常点击下一页,并继续爬取,直到全部爬取完
作者:
qqq911
时间:
2022-9-2 11:50
代码中要遍历
作者:
jingzizx
时间:
2022-9-2 14:07
没有操作吗
欢迎光临 51Testing软件测试论坛 (http://bbs.51testing.com/)
Powered by Discuz! X3.2