- 检查settings.py中ITEM_PIPELINES是否指定Item管道,例如:
# Configure item pipelines
# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
'worm.pipelines.WormPipeline': 100,
}
- 如果Item实现了子类的构造,则父类必须显示声明父类构造:
# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html
import scrapy
from scrapy import Field
class TestSpiderItem(scrapy.Item):
def __init__(self):
# 如果实现了子类的构造,则必须声明父类构造,
# 否则无法执行ItemProcess的process_item方法
super().__init__()
print('<INFO> TestSpiderItem is instancing.')
name = Field()
- 检查process_item(self, item, spider)方法是否返回一个item或dict对象:
class WormPipeline(object):
# This method is called for every item pipeline component.
# process_item() must either: return a dict with data,
# return an Item (or any descendant class) object,
# return a Twisted Deferred or raise DropItem exception.
# Dropped items are no longer processed by further pipeline components.
def process_item(self, item, spider):
with open('F:\\text1.txt', 'a') as f:
f.write(item['author'] + '\n')
return item
网友评论