编辑pipelines.py
,添加自定义pipelines类:
class MySQLPipeline(object):
@classmethod
def from_crawler(cls, crawler):
# 从项目的配置文件中读取相应的参数
cls.MYSQL_DB_NAME = crawler.settings.get("MYSQL_DB_NAME", 'scrapy_default')
cls.HOST = crawler.settings.get("MYSQL_HOST", 'localhost')
cls.PORT = crawler.settings.get("MYSQL_PORT", 3306)
cls.USER = crawler.settings.get("MYSQL_USER", 'root')
cls.PASSWD = crawler.settings.get("MYSQL_PASSWORD", 'new.1234')
return cls()
def open_spider(self, spider):
self.dbpool = adbapi.ConnectionPool('pymysql', host=self.HOST, port=self.PORT, user=self.USER, passwd=self.PASSWD, db=self.MYSQL_DB_NAME, charset='utf8')
def close_spider(self, spider):
self.dbpool.close()
def process_item(self, item, spider):
self.dbpool.runInteraction(self.insert_db, item)
return item
def insert_db(self, tx, item):
values = (
item['upc'],
item['name'],
item['price'],
item['review_rating'],
item['review_num'],
item['stock'],
)
sql = 'INSERT INTO books VALUES (%s,%s,%s,%s,%s,%s)'
tx.execute(sql, values)
接着在settings.py
中写入相关配置参数,添加至item_pipelines中:
MYSQL_DB_NAME = 'scrapy_db'
MYSQL_HOST = 'localhost'
MYSQL_PORT = 3306
MYSQL_USER = 'root'
MYSQL_PASSWORD = 'new.1234'
#
ITEM_PIPELINES = {
'toscrape_book.pipelines.MySQLPipeline': 400,
}