爬虫性能测试代码
同步执行,效率慢# import requests
# def parse_page(res):
# print("PAESE %s" %(len(res)))
#
# def get_page(url):
# print("GET %s" %url)
# response = requests.get(url)
# if response.status_code == 200:
# return response.text
#
# if __name__ == '__main__':
# urls = [
# "https://www.baidu.com",
# "https://www.taobao.com",
# "https://www.python.org",
# ]
# for url in urls:
# res = get_page(url)
# parse_page(res)
# 开进程线程
# import requests
# from threading import Thread,current_thread
# from multiprocessing import Process
#
# def parse_page(res):
# print("%s PARSE %s"%(current_thread().getName(),len(res)))
#
# def get_page(url,callback=parse_page):
# print("%s GET %s"%(current_thread().getName(),url))
# response = requests.get(url)
# if response.status_code == 200:
# callback(response.text)
#
# if __name__ == '__main__':
# import time
# urls = [
# "https://www.baidu.com",
# "https://www.taobao.com",
# "https://www.python.ort",
# ]
# start_time = time.time()
# for url in urls:
# # t = Thread(target=get_page,args=(url,))
# p = Process(target=get_page,args=(url,))
# p.start()
# # t.start()
# print("===========耗时",time.time()-start_time)
import requests
from threading import current_thread
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
# 进程池线程池
# def parse_page(res):
# res = res.result()
# print("%s PAESE %s"%(current_thread().getName(),len(res)))
# def get_page(url):
# print("%s GET %s"%(current_thread().getName(),url))
# response = requests.get(url)
# if response.status_code == 200:
# return response.text
#
# if __name__ == '__main__':
# import time
# start_time = time.time()
# urls = [
# "https://www.baidu.com",
# "https://www.taobao.com",
# "https://www.python.org",
# ]
# # pool = ThreadPoolExecutor(50) #3.5992057323455?81
# pool = ProcessPoolExecutor(50) #12.549717903137207
# for url in urls:
# pool.submit(get_page,url).add_done_callback(parse_page)
# pool.shutdown()
# print("========>",time.time()-start_time)
# # 协程 实现的是仿线程
# from gevent import joinall,spawn,monkey;monkey.patch_all()
# import requests,time
# from threading import current_thread
#
# def parse_page(res):
# print("%s PAESAE %s"%(current_thread().getName(),len(res)))
#
# def get_page(url,callback=parse_page):
# print("%s GET %s"%(current_thread().getName(),url))
# response = requests.get(url)
# if response.status_code == 200:
# callback(response.text)
#
#
# if __name__ == '__main__':
# start_time = time.time()
# urls = [
# "https://www.baidu.com",
# "https://www.taobao.com",
# "https://www.python.org",
# ]
# tasks = []
# for url in urls:
# tasks.append(spawn(get_page,url))
# joinall(tasks)
# print("++++++=====>",time.time()-start_time) #2.9541687965393066
#
# import asyncio
# @asyncio.coroutine
# def task(task_id,second):
# print("%s run "%task_id)
# yield from asyncio.sleep(second)
# print("%s run done"%task_id)
#
# if __name__ == '__main__':
# tasks = [
# task("任务一",3),
# task("任务二",2),
# task("任务三",1),
# ]
# loop = asyncio.get_event_loop()
# loop.run_until_complete(asyncio.wait(tasks))
# loop.close()
谢谢分享~ 666 six,six,six!
页:
[1]