问题遇到的现象和发生背景 爬到第二页就提示没有获得元素,第一页都是可以获取的,是怎么回事呢 - import time
- from selenium import webdriver
- import pandas as pd
- driver = webdriver.Edge("C:\Program Files (x86)\Microsoft\Edge\Application\msedgedriver.exe")
- driver.get('https://movie.douban.com/top250')
- driver.implicitly_wait(10)
- datas=[]
-
- while True:
- #获取所有li
- allLists = driver.find_elements_by_xpath('//*[@id="content"]/div[@class="grid-16-8 clearfix"]/div[@class="article"]/ol[@class="grid_view"]/li')
-
- for eachList in allLists:
- #第一页数据正常,爬取第二页时提示没有取到元素
- rank = eachList.find_element_by_xpath('./div[@class="item"]/div[@class="pic"]/em').text
- title = eachList.find_element_by_xpath('./div/div[2]/div[1]/a').text
- rating = eachList.find_element_by_xpath('./div/div[2]/div[2]/div/span[2]').text
- intro = eachList.find_element_by_xpath('./div/div[2]/div[2]/p[2]/span').text
- NumberofPeople = eachList.find_element_by_xpath('./div/div[2]/div[2]/div/span[4]').text
- # print(rank)
- datas.append({
- '排名':rank,
- '电影名称':title,
- '评分':rating,
- '介绍':intro,
- '评价人数':NumberofPeople
- })
- if driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/span[3]/a'):
- nextPage = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/span[3]/a')
- nextPage.click()
- time.sleep(2)
- else:
- break
-
-
- df = pd.DataFrame(datas)
- df.to_excel('selenium豆瓣top250.xlsx',index = False)
复制代码 运行结果及报错内容我的解答思路和尝试过的方法我想要达到的结果导出所有数据到excel表格
|