import requests
import re
import time
import random
import openpyxl
for page in range(1,101):
params = (
('q', '棉袄'),
('imgfile', ''),
('commend', 'all'),
('ssid', 's5-e'),
('search_type', 'item'),
('sourceId', 'tb.index'),
('spm', 'a21bo.jianhua.201856-taobao-item.2'),
('ie', 'utf8'),
('initiative_id', 'tbindexz_20170306'),
('hintq', '1'),
('s', str(page*44)),
)
response = requests.get(url, params=params)
a = 0
b = 0
for i in range(44):
try:
sheet.append([dianpumingcheng,shangpinming,float(jiage),fahuodi,fukuanrenshu])
except:
a+=1
if a>30:
print(f"第{page}页数据未爬取......")
wb.save('棉袄.xlsx')
# 把xxx改成你想要的存储的名称即可
b = 1
break
if b == 1:
break
print(f"已爬取完第{page}页数据......")
time.sleep(random.randint(3,5))
nt(f'共爬取{page}页数据......')
df.drop_duplicates()
欢迎光临 51Testing软件测试论坛 (http://bbs.51testing.com/) | Powered by Discuz! X3.2 |