测试积点老人 发表于 2022-9-1 10:06:46

selenium模拟登录及爬取信息

怎么定位下一页
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from pyquery import PyQuery as pq
from selenium.webdriver import ActionChains
import time
from lxml import etree
driver = webdriver.Chrome()
driver.get("https://www.taobao.com/")
driver.find_element(by=By.XPATH,value='/html/body/div/div/div/div/div/div/div/div/a').click()
time.sleep(6)
handles = driver.window_handles
driver.switch_to.window(handles[-1])
driver.find_element(by=By.ID, value='fm-login-id').send_keys('1')
driver.find_element(by=By.ID, value='fm-login-password').send_keys('1')
driver.find_element(by=By.XPATH,value='/html/body/div/div/div/div/div/div/div/div/form/div/button').click()
time.sleep(6)
driver.switch_to.frame('baxia-dialog-content')
el1 = driver.find_element(by=By.XPATH, value='//*[@id="nc_1_n1z"]')
print(el1.size['width'])
print(el1.size['height'])
el2 = driver.find_element(by=By.XPATH, value='//*[@id="nc_1__scale_text"]')
print(el2.size['width'])
print(el2.size['height'])
time.sleep(2)
driver.maximize_window()
chains = ActionChains(driver)
chains.drag_and_drop_by_offset(el1, el2.size['width'], -el2.size['height'])
# 事件提交
chains.perform()
# driver.close()
# 退出浏览器
# driver.quit()
wait = WebDriverWait(driver, 10)



def search():


    try:
      input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#q")))
      submit = wait.until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, '#J_TSearchForm > div.search-button > button')))
      input.send_keys('美食')
      submit.click()
      total = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > div.total')))

      get_products()

      return total.text
      driver.find_element(by=By.XPATH,
                            value='//*[@id="mainsrp-pager"]/div/div/div/ul/li/a/span').click()

    except:
      search()






def get_products():
    # wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mainsrp-itemlist.items.item')))#加载宝贝信息并等待
    html = driver.page_source
    doc = pq(html)
    items = doc('#mainsrp-itemlist .items .item').items()# 得到所有宝贝的内容
    for item in items:
      product = {
            'image': item.find('.pic .img').attr('data-src'),# 图片链接
            'price': item.find('.price').text(),# 商品价格
            'deal': item.find('.deal-cnt').text()[:-3],# 付款人数,-3是为了去掉人付款这几个字
            'title': item.find('.title').text(),# 商品名称
            'shop': item.find('.shop').text(),# 店铺名称
            'location': item.find('.location').text()
      }
      print(product)





def main():


    total = search()
    # 用来写csv文件的标题
    start_csv = True

    #total1 = int(re.compile('(\d+)').search(total).group(1))# 转换为数值型
    #print(total1)




if __name__ == '__main__':
       main()

运行结果及报错内容只能打印第一页内容我想要达到的结果正常点击下一页,并继续爬取,直到全部爬取完

qqq911 发表于 2022-9-2 11:50:26

代码中要遍历

jingzizx 发表于 2022-9-2 14:07:47

没有操作吗
页: [1]
查看完整版本: selenium模拟登录及爬取信息