字体:大 中 小 | 上一篇 | 下一篇 |我要投稿 | 推荐标签: selenium 自动化测试框架
1.安装seleniumpip3 install selenium -i https://pypi.douban.com/simple |
from selenium import webdriver import time def get_title(wd): '''获取当前页的所有博文标题控件''' # 获取当前页的所有博文标题内容 elements = wd.find_elements_by_class_name('postTitle') titles = [] for i in elements: titles.append(i.text) # 从所有tag中选出是博文标题的tag elements_ = wd.find_elements_by_tag_name('span') elements = [] for i in elements_: if i.text in titles: elements.append(i) return elements def save_page(wd): '''保存当前页面至程序根目录''' f = open(wd.title+'.html','wb') time.sleep(1.) # 保证浏览器响应成功后再进行下一步操作 f.write(wd.page_source.encode()) time.sleep(1.) print('写入成功') #关闭文件 f.close() def ergodic_article(wd): '''遍历当前页面的所有博文并保存''' elements = get_title(wd) num = len(elements) # 当前页的博文数量 for i in range(num): elements.click() print(i) save_page(wd) wd.back() elements = get_title(wd) def next_page(wd): '''翻到下一页''' elements = wd.find_elements_by_tag_name('a') for i in elements: if i.text == '下一页': print('next page') i.click() return 0 return 1 def ergodic_catalog(wd): '''遍历博文目录的每一页,并保存每一页的所有博文''' wd.get('https://www.cnblogs.com/bill-h/') # 打开博客首页 while 1: ergodic_article(wd) t = next_page(wd) if t: return wd = webdriver.Chrome(r'D:\Tool\chromedriver.exe') # 打开chrome驱动 ergodic_catalog(wd) print('finish') |
欢迎光临 51Testing软件测试论坛 (http://bbs.51testing.com/) | Powered by Discuz! X3.2 |