TA的每日心情 | 擦汗 3 天前 |
---|
签到天数: 527 天 连续签到: 4 天 [LV.9]测试副司令
|
1、在Pyspider的脚本开头引入:
- from pyspider.database.mysql.mysqldb import SQL
复制代码
2、重写on_result方法:
- def on_result(self,result):
- if not result or not result['original_id']:
- return
- sql = SQL()
- sql.insert('t_dream_xm_project',**result)
复制代码
3、编写数据库脚本(放入/usr/lib/python2.7/site-packages/pyspider/database/mysql/下):
- #!/usr/bin/env python
- # -*- encoding: utf-8 -*-
- from six import itervalues
- import MySQLdb
- class SQL():
- #数据库初始化
- def __init__(self):
- #数据库连接相关信息
- hosts = '数据库地址'
- username = '数据库用户名'
- password = '数据库密码'
- database = '数据库名'
- charsets = 'utf8'
- self.connection = False
- try:
- self.conn = MySQLdb.connect(host = hosts,user = username,passwd = password,db = database,charset = charsets)
- self.cursor = self.conn.cursor()
- self.cursor.execute("set names "+charsets)
- self.connection = True
- except Exception,e:
- print "Cannot Connect To Mysql!/n",e
- def escape(self,string):
- return '%s' % string
- #插入数据到数据库
- def insert(self,tablename=None,**values):
- if self.connection:
- tablename = self.escape(tablename)
- if values:
- _keys = ",".join(self.escape(k) for k in values)
- _values = ",".join(['%s',]*len(values))
- sql_query = "insert into %s (%s) values (%s)" % (tablename,_keys,_values)
- else:
- sql_query = "replace into %s default values" % tablename
- try:
- if values:
- self.cursor.execute(sql_query,list(itervalues(values)))
- else:
- self.cursor.execute(sql_query)
- self.conn.commit()
- return True
- except Exception,e:
- print "An Error Occured: ",e
- return False
复制代码 说明:这里使用的是MySQLdb驱动
4、数据库新建数据库以及对应的表,表的字段名称和Pyspider脚本中detail_page方法中return返回的字段名称对应。
OK,完成这项步骤就可以启动服务器进行测试了。
完整的Pyspider脚本:
- #!/usr/bin/env python
- # -*- encoding: utf-8 -*-
- # Created on 2017-07-14 10:36:36
- # Project: xiaomi
- from pyspider.libs.base_handler import *
- from pyspider.database.mysql.mysqldb import SQL
- import urllib
- import time
- import json
- class Handler(BaseHandler):
- #配置通用的请求属性
- crawl_config = {
- 'headers' : {'Connection':'keep-alive','Accept-Encoding':'gzip, deflate, br','Accept-Language':'zh-CN,zh;q=0.8','content-type':'application/x-www-form-urlencoded','Referer':'//home.mi.com/crowdfundinglist?id=78&title=%E4%BC%97%E7%AD%B9','User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'}
- }
- @every(minutes=24 * 60)
- def on_start(self):
- #获取所有的产品详细产品地址
- param = 'data=%7B%22HomeList%22%3A%7B%22model%22%3A%22Homepage%22%2C%22action%22%3A%22BuildHome%22%2C%22parameters%22%3A%7B%22id%22%3A12%7D%7D%7D'
- self.crawl('https://home.mi.com/app/shopv3/pipe',method="GET",params=param,callback=self.index_page)
- @config(age=60 * 60)
- def index_page(self, response):
- #获取单个产品的详细信息
- for each in response.json['result']['HomeList']['data']:
- gid = each['gid']
- detailparm = "{\"detail\":{\"model\":\"Shopv2\",\"action\":\"getDetail\",\"parameters\":{\"gid\":\"%s\"}},\"comment\":{\"model\":\"Comment\",\"action\":\"getList\",\"parameters\":{\"goods_id\":\"%s\",\"orderby\":\"1\",\"pageindex\":\"0\",\"pagesize\":3}},\"activity\":{\"model\":\"Activity\",\"action\":\"getAct\",\"parameters\":{\"gid\":\"%s\"}}}" % (gid,gid,gid)
- detailreq = urllib.quote(detailparm)
- detailreq = "data=" + detailreq
- detailurl = "https://home.mi.com/app/shop/pipe?gid=%s" % gid
- #print detailurl
- self.crawl(detailurl,method='POST',data=detailreq ,callback=self.detail_page)
- @config(priority=2)
- def detail_page(self, response):
- #转换成Json格式的字符串
- resultjsonstr = json.dumps(response.json)
- result = json.loads(resultjsonstr)['result']['detail']['data']['good']
- #将返回的结果保存到文件
- resultfile = open("/tmp/xiaomi/%s.txt" % result['gid'].encode('utf-8'),'w')
- resultfile.write(resultjsonstr)
- resultfile.close()
- #将返回的结果保存到MySQL数据库
- return {
- "original_id": result['gid'].encode('utf-8'),
- "project_name": result['name'].encode('utf-8'),
- "project_desc": result['summary'].encode('utf-8'),
- "curr_money":result['saled'].encode('utf-8'),
- "begin_date":time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(float(result['ctime'].encode('utf-8'))))
- }
- def on_result(self,result):
- if not result or not result['original_id']:
- return
- sql = SQL()
- sql.insert('t_dream_xm_project',**result)
复制代码
|
|