1、先打开settting.py的item_pipeline(原来处于注释状态)
ITEM_PIPELINES = {
'moon_blog.pipelines.MoonBlogPipeline': 300,
}
2、在pipeline.py文件中写入
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
import os
import csv
class MoonBlogPipeline(object):
def __init__(self):
# csv文件的位置,无需事先创建
store_file = os.path.dirname(__file__) + '/spiders/articles.csv'
print("***************************************************************")
# 打开(创建)文件
self.file = open(store_file, 'a+', encoding="utf-8",newline='')
# csv写法
self.writer = csv.writer(self.file, dialect="excel")
def process_item(self, item, spider):
# 判断字段值不为空再写入文件
print("正在写入......")
if item['article_title']:
# 主要是解决存入csv文件时出现的每一个字以‘,’隔离
self.writer.writerow([item['article_title'],item['article_link'],item['publish_date'],item['scan_num'],item['article_content']])
return item
def close_spider(self, spider):
# 关闭爬虫时顺便将文件保存退出
self.file.close()
注:如何解决存入csv文件时出现的每一个字以‘,’隔离的问题。