在用python爬取某吧吧友网名时,为啥不能全量爬取?
代码如下,请大神指正import requests
from lxml import etree
count = []
response = requests.get("https://tieba.baidu.com/p/3183092174")#获取网页的html文件
html = response.text#将HTML文件文本化
content = etree.HTML(html)#将文本化文件转为element文件,便于etree.HTML函数进行解析
link_list = content.xpath('//a[@class="p_author_name j_user_card"]/@href')#使用element文件的xpath方法,根据path特征值进行解析,找到的内容以列表形式返回
print(len(link_list))
for zhuye_url in link_list:
link_zhuye_url = "https://tieba.baidu.com/" + zhuye_url
response_one = requests.get(link_zhuye_url)
html_one = response_one.text#将HTML文件文本化
content_one = etree.HTML(html_one)
link_list_one =content_one.xpath('//span[@class="userinfo_username "]/text()')
count = count + link_list_one
print(count)
看下报错是什么 :time: 支持下
页:
[1]