51Testing软件测试论坛

标题: 爬虫性能测试代码 [打印本页]

作者: 胖虎    时间: 2018-3-15 17:03
标题: 爬虫性能测试代码
  1. 同步执行,效率慢
  2. # import requests
  3. # def parse_page(res):
  4. #     print("PAESE %s" %(len(res)))
  5. #
  6. # def get_page(url):
  7. #     print("GET %s" %url)
  8. #     response = requests.get(url)
  9. #     if response.status_code == 200:
  10. #         return response.text
  11. #
  12. # if __name__ == '__main__':
  13. #     urls = [
  14. #         "https://www.baidu.com",
  15. #         "https://www.taobao.com",
  16. #         "https://www.python.org",
  17. #     ]
  18. #     for url in urls:
  19. #         res = get_page(url)
  20. #         parse_page(res)

  21. # 开进程线程
  22. # import requests
  23. # from threading import Thread,current_thread
  24. # from multiprocessing import Process
  25. #
  26. # def parse_page(res):
  27. #     print("%s PARSE %s"%(current_thread().getName(),len(res)))
  28. #
  29. # def get_page(url,callback=parse_page):
  30. #     print("%s GET %s"%(current_thread().getName(),url))
  31. #     response = requests.get(url)
  32. #     if response.status_code == 200:
  33. #         callback(response.text)
  34. #
  35. # if __name__ == '__main__':
  36. #     import time
  37. #     urls = [
  38. #         "https://www.baidu.com",
  39. #         "https://www.taobao.com",
  40. #         "https://www.python.ort",
  41. #     ]
  42. #     start_time = time.time()
  43. #     for url in urls:
  44. #         # t = Thread(target=get_page,args=(url,))
  45. #         p = Process(target=get_page,args=(url,))
  46. #         p.start()
  47. #         # t.start()
  48. #     print("===========耗时",time.time()-start_time)


  49. import requests
  50. from threading import current_thread
  51. from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor

  52. # 进程池线程池
  53. # def parse_page(res):
  54. #     res = res.result()
  55. #     print("%s PAESE %s"%(current_thread().getName(),len(res)))

  56. # def get_page(url):
  57. #     print("%s GET %s"%(current_thread().getName(),url))
  58. #     response = requests.get(url)
  59. #     if response.status_code == 200:
  60. #         return response.text
  61. #
  62. # if __name__ == '__main__':
  63. #     import time
  64. #     start_time = time.time()
  65. #     urls = [
  66. #         "https://www.baidu.com",
  67. #         "https://www.taobao.com",
  68. #         "https://www.python.org",
  69. #     ]
  70. #     # pool = ThreadPoolExecutor(50)      #3.5992057323455?81
  71. #     pool = ProcessPoolExecutor(50)       #12.549717903137207
  72. #     for url in urls:
  73. #         pool.submit(get_page,url).add_done_callback(parse_page)
  74. #     pool.shutdown()
  75. #     print("========>",time.time()-start_time)


  76. # # 协程 实现的是仿线程
  77. # from gevent import joinall,spawn,monkey;monkey.patch_all()
  78. # import requests,time
  79. # from threading import current_thread
  80. #
  81. # def parse_page(res):
  82. #     print("%s PAESAE %s"%(current_thread().getName(),len(res)))
  83. #
  84. # def get_page(url,callback=parse_page):
  85. #     print("%s GET %s"%(current_thread().getName(),url))
  86. #     response = requests.get(url)
  87. #     if response.status_code == 200:
  88. #         callback(response.text)
  89. #
  90. #
  91. # if __name__ == '__main__':
  92. #     start_time = time.time()
  93. #     urls = [
  94. #         "https://www.baidu.com",
  95. #         "https://www.taobao.com",
  96. #         "https://www.python.org",
  97. #     ]
  98. #     tasks = []
  99. #     for url in urls:
  100. #         tasks.append(spawn(get_page,url))
  101. #     joinall(tasks)
  102. #     print("++++++=====>",time.time()-start_time)       #2.9541687965393066
  103. #


  104. # import asyncio
  105. # @asyncio.coroutine
  106. # def task(task_id,second):
  107. #     print("%s run "%task_id)
  108. #     yield from asyncio.sleep(second)
  109. #     print("%s run done"%task_id)
  110. #
  111. # if __name__ == '__main__':
  112. #     tasks = [
  113. #         task("任务一",3),
  114. #         task("任务二",2),
  115. #         task("任务三",1),
  116. #     ]
  117. #     loop = asyncio.get_event_loop()
  118. #     loop.run_until_complete(asyncio.wait(tasks))
  119. #     loop.close()
复制代码



作者: 海海豚    时间: 2018-3-15 17:36
谢谢分享~
作者: libingyu135    时间: 2018-3-16 09:32
666
作者: 清晨一缕阳光    时间: 2018-3-16 11:55
six,six,six!





欢迎光临 51Testing软件测试论坛 (http://bbs.51testing.com/) Powered by Discuz! X3.2