TA的每日心情 | 无聊 3 天前 |
---|
签到天数: 530 天 连续签到: 2 天 [LV.9]测试副司令
|
1测试积点
怎么定位下一页
- import re
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- from pyquery import PyQuery as pq
- from selenium.webdriver import ActionChains
- import time
- from lxml import etree
- driver = webdriver.Chrome()
- driver.get("https://www.taobao.com/")
- driver.find_element(by=By.XPATH,value='/html/body/div[3]/div[2]/div[2]/div[2]/div[5]/div/div[2]/div[1]/a[1]').click()
- time.sleep(6)
- handles = driver.window_handles
- driver.switch_to.window(handles[-1])
- driver.find_element(by=By.ID, value='fm-login-id').send_keys('1')
- driver.find_element(by=By.ID, value='fm-login-password').send_keys('1')
- driver.find_element(by=By.XPATH,value='/html/body/div/div[2]/div[3]/div/div/div/div[2]/div/form/div[4]/button').click()
- time.sleep(6)
- driver.switch_to.frame('baxia-dialog-content')
- el1 = driver.find_element(by=By.XPATH, value='//*[@id="nc_1_n1z"]')
- print(el1.size['width'])
- print(el1.size['height'])
- el2 = driver.find_element(by=By.XPATH, value='//*[@id="nc_1__scale_text"]')
- print(el2.size['width'])
- print(el2.size['height'])
- time.sleep(2)
- driver.maximize_window()
- chains = ActionChains(driver)
- chains.drag_and_drop_by_offset(el1, el2.size['width'], -el2.size['height'])
- # 事件提交
- chains.perform()
- # driver.close()
- # 退出浏览器
- # driver.quit()
- wait = WebDriverWait(driver, 10)
-
-
-
- def search():
-
-
- try:
- input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#q")))
- submit = wait.until(
- EC.element_to_be_clickable((By.CSS_SELECTOR, '#J_TSearchForm > div.search-button > button')))
- input.send_keys('美食')
- submit.click()
- total = wait.until(
- EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > div.total')))
-
- get_products()
-
- return total.text
- driver.find_element(by=By.XPATH,
- value='//*[@id="mainsrp-pager"]/div/div/div/ul/li[8]/a/span[1]').click()
-
- except:
- search()
-
-
-
-
-
-
- def get_products():
- # wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mainsrp-itemlist.items.item')))#加载宝贝信息并等待
- html = driver.page_source
- doc = pq(html)
- items = doc('#mainsrp-itemlist .items .item').items() # 得到所有宝贝的内容
- for item in items:
- product = {
- 'image': item.find('.pic .img').attr('data-src'), # 图片链接
- 'price': item.find('.price').text(), # 商品价格
- 'deal': item.find('.deal-cnt').text()[:-3], # 付款人数,-3是为了去掉人付款这几个字
- 'title': item.find('.title').text(), # 商品名称
- 'shop': item.find('.shop').text(), # 店铺名称
- 'location': item.find('.location').text()
- }
- print(product)
-
-
-
-
-
- def main():
-
-
- total = search()
- # 用来写csv文件的标题
- start_csv = True
-
- #total1 = int(re.compile('(\d+)').search(total).group(1)) # 转换为数值型
- #print(total1)
-
-
-
-
- if __name__ == '__main__':
- main()
-
-
复制代码 运行结果及报错内容只能打印第一页内容 我想要达到的结果正常点击下一页,并继续爬取,直到全部爬取完
|
|