Peewee的官方文档点这里。
首先,在items.py里建立Model和MySQL连接。
import scrapy
from peewee import *
db = MySQLDatabase("test",host='127.0.0.1',port=3306,user='test', passwd='test', charset='utf8')
class GoodsItem(scrapy.Item):
# define the fields for your item here like:
one = scrapy.Field()
two = scrapy.Field()
three = scrapy.Field()
class Goods(Model):
# one 是主键
one = CharField(verbose_name="one", max_length=100, primary_key=True, null=False)
two = CharField(verbose_name="two", max_length=200, null=False)
three = CharField(verbose_name="three",max_length=80, null=False) null=False)
class Meta:
database = db
接下来在spiders目录下新建一个爬虫脚本,写入代码 :
import scrapy
from ..items import GoodsItem
from bs4 import BeautifulSoup
import datetime
class mySpider(scrapy.Spider):
name = 'test'
def start_requests(self):
url = 'https://xxxx.xxxl.com'
yield scrapy.Request(url=ur, callback=self.parse)
def parse(self, response):
item = GoodsItem()
item['one'] = 'xxx'
item['two'] = 'xxx'
item['three'] = 'xxx'
yield item
然后在pipliens.py里执行数据库操作:
from .items import Goods
class MySQLStorePipeline(object):
def process_item(self, item, spider):
if Goods.table_exists() == False:
Goods.create_table()
try:
Goods.create(one=item['one'],two=item['two'],three=item['three'])
except Exception as e:
if str(e.args[0]) == '1062':
print ('重复数据,跳过。')
else:
print (e.args[0],e.args[1])
return item
最后记得在settings.py里把item_pipeliens打开:
ITEM_PIPELINES = {
'tmproject.pipelines.MySQLStorePipeline': 300,
}
到此大功告成!