selenium模拟登录及爬取信息
怎么定位下一页import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from pyquery import PyQuery as pq
from selenium.webdriver import ActionChains
import time
from lxml import etree
driver = webdriver.Chrome()
driver.get("https://www.taobao.com/")
driver.find_element(by=By.XPATH,value='/html/body/div/div/div/div/div/div/div/div/a').click()
time.sleep(6)
handles = driver.window_handles
driver.switch_to.window(handles[-1])
driver.find_element(by=By.ID, value='fm-login-id').send_keys('1')
driver.find_element(by=By.ID, value='fm-login-password').send_keys('1')
driver.find_element(by=By.XPATH,value='/html/body/div/div/div/div/div/div/div/div/form/div/button').click()
time.sleep(6)
driver.switch_to.frame('baxia-dialog-content')
el1 = driver.find_element(by=By.XPATH, value='//*[@id="nc_1_n1z"]')
print(el1.size['width'])
print(el1.size['height'])
el2 = driver.find_element(by=By.XPATH, value='//*[@id="nc_1__scale_text"]')
print(el2.size['width'])
print(el2.size['height'])
time.sleep(2)
driver.maximize_window()
chains = ActionChains(driver)
chains.drag_and_drop_by_offset(el1, el2.size['width'], -el2.size['height'])
# 事件提交
chains.perform()
# driver.close()
# 退出浏览器
# driver.quit()
wait = WebDriverWait(driver, 10)
def search():
try:
input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#q")))
submit = wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR, '#J_TSearchForm > div.search-button > button')))
input.send_keys('美食')
submit.click()
total = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > div.total')))
get_products()
return total.text
driver.find_element(by=By.XPATH,
value='//*[@id="mainsrp-pager"]/div/div/div/ul/li/a/span').click()
except:
search()
def get_products():
# wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mainsrp-itemlist.items.item')))#加载宝贝信息并等待
html = driver.page_source
doc = pq(html)
items = doc('#mainsrp-itemlist .items .item').items()# 得到所有宝贝的内容
for item in items:
product = {
'image': item.find('.pic .img').attr('data-src'),# 图片链接
'price': item.find('.price').text(),# 商品价格
'deal': item.find('.deal-cnt').text()[:-3],# 付款人数,-3是为了去掉人付款这几个字
'title': item.find('.title').text(),# 商品名称
'shop': item.find('.shop').text(),# 店铺名称
'location': item.find('.location').text()
}
print(product)
def main():
total = search()
# 用来写csv文件的标题
start_csv = True
#total1 = int(re.compile('(\d+)').search(total).group(1))# 转换为数值型
#print(total1)
if __name__ == '__main__':
main()
运行结果及报错内容只能打印第一页内容我想要达到的结果正常点击下一页,并继续爬取,直到全部爬取完
代码中要遍历 没有操作吗
页:
[1]