TA的每日心情 | 擦汗 2022-8-30 09:02 |
---|
签到天数: 2 天 连续签到: 2 天 [LV.1]测试小兵
|
- #coding=utf-8
-
- __doc__ = '''''
- 使用requests来简单的爬取图片,请求使用Referer,否则爬取不到正确的结果
- '''
-
- CHUNK = 1024
-
- import requests
- import re
- import os
-
- def download_images(x, y):
- URL_SEGMENT = '{0}/{1}'.format(x,y)
- URL_FORMAT = 'http://img.zngirls.com/gallery/%s/{0:03d}.jpg' % (URL_SEGMENT)
- URL_FORMAT0 = 'http://img.zngirls.com/gallery/%s/{0}.jpg' % (URL_SEGMENT)
-
- i=0
- numbers = re.compile('\\d+/\\d+')
- rl = numbers.findall(URL_FORMAT0)
- if not rl:
- return
- dirname = rl[0].replace('/','-')
- if not os.path.isdir(dirname):
- os.makedirs(dirname)
-
- while True:
- url = ''
- if i==0:
- url = URL_FORMAT0.format(i)
- else:
- url = URL_FORMAT.format(i)
-
- print('url=', url)
- res = requests.get(url, headers={'Referer':'http://www.zngirls.com/g/13080/2.html',}, stream=True)
- if res.status_code != 200:
- break
-
- filename = os.path.join(dirname,'{0:03d}.jpg'.format(i))
- with open(filename, mode='wb') as f:
- for chunk in res.iter_content(CHUNK):
- f.write(chunk)
-
- i += 1
-
- def main():
- download_images(21363, 18304)
-
- if __name__ == '__main__':
- main()
复制代码
|
|