胖虎 发表于 2018-3-15 17:03:54

爬虫性能测试代码

同步执行,效率慢
# import requests
# def parse_page(res):
#   print("PAESE %s" %(len(res)))
#
# def get_page(url):
#   print("GET %s" %url)
#   response = requests.get(url)
#   if response.status_code == 200:
#         return response.text
#
# if __name__ == '__main__':
#   urls = [
#         "https://www.baidu.com",
#         "https://www.taobao.com",
#         "https://www.python.org",
#   ]
#   for url in urls:
#         res = get_page(url)
#         parse_page(res)

# 开进程线程
# import requests
# from threading import Thread,current_thread
# from multiprocessing import Process
#
# def parse_page(res):
#   print("%s PARSE %s"%(current_thread().getName(),len(res)))
#
# def get_page(url,callback=parse_page):
#   print("%s GET %s"%(current_thread().getName(),url))
#   response = requests.get(url)
#   if response.status_code == 200:
#         callback(response.text)
#
# if __name__ == '__main__':
#   import time
#   urls = [
#         "https://www.baidu.com",
#         "https://www.taobao.com",
#         "https://www.python.ort",
#   ]
#   start_time = time.time()
#   for url in urls:
#         # t = Thread(target=get_page,args=(url,))
#         p = Process(target=get_page,args=(url,))
#         p.start()
#         # t.start()
#   print("===========耗时",time.time()-start_time)


import requests
from threading import current_thread
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor

# 进程池线程池
# def parse_page(res):
#   res = res.result()
#   print("%s PAESE %s"%(current_thread().getName(),len(res)))

# def get_page(url):
#   print("%s GET %s"%(current_thread().getName(),url))
#   response = requests.get(url)
#   if response.status_code == 200:
#         return response.text
#
# if __name__ == '__main__':
#   import time
#   start_time = time.time()
#   urls = [
#         "https://www.baidu.com",
#         "https://www.taobao.com",
#         "https://www.python.org",
#   ]
#   # pool = ThreadPoolExecutor(50)      #3.5992057323455?81
#   pool = ProcessPoolExecutor(50)       #12.549717903137207
#   for url in urls:
#         pool.submit(get_page,url).add_done_callback(parse_page)
#   pool.shutdown()
#   print("========>",time.time()-start_time)


# # 协程 实现的是仿线程
# from gevent import joinall,spawn,monkey;monkey.patch_all()
# import requests,time
# from threading import current_thread
#
# def parse_page(res):
#   print("%s PAESAE %s"%(current_thread().getName(),len(res)))
#
# def get_page(url,callback=parse_page):
#   print("%s GET %s"%(current_thread().getName(),url))
#   response = requests.get(url)
#   if response.status_code == 200:
#         callback(response.text)
#
#
# if __name__ == '__main__':
#   start_time = time.time()
#   urls = [
#         "https://www.baidu.com",
#         "https://www.taobao.com",
#         "https://www.python.org",
#   ]
#   tasks = []
#   for url in urls:
#         tasks.append(spawn(get_page,url))
#   joinall(tasks)
#   print("++++++=====>",time.time()-start_time)       #2.9541687965393066
#


# import asyncio
# @asyncio.coroutine
# def task(task_id,second):
#   print("%s run "%task_id)
#   yield from asyncio.sleep(second)
#   print("%s run done"%task_id)
#
# if __name__ == '__main__':
#   tasks = [
#         task("任务一",3),
#         task("任务二",2),
#         task("任务三",1),
#   ]
#   loop = asyncio.get_event_loop()
#   loop.run_until_complete(asyncio.wait(tasks))
#   loop.close()

海海豚 发表于 2018-3-15 17:36:44

谢谢分享~

libingyu135 发表于 2018-3-16 09:32:01

666

清晨一缕阳光 发表于 2018-3-16 11:55:14

six,six,six!
页: [1]
查看完整版本: 爬虫性能测试代码