from scrapy.exceptions import DropItem
class DaboPipeline(object):
def __init__(self,file_path):
self.file_path = file_path
self.f = None
@classmethod
def from_crawler(cls, crawler):
"""
初始化时候,用于创建pipeline对象
:param crawler:
:return:
"""
# val = crawler.settings.getint('MMMM')
file_path = crawler.settings.get('FILE_PATH')
return cls(file_path)
def process_item(self, item, spider):
"""
:param item:
:param spider:
:return:
"""
# 3.打开
# f = open('news.log', mode='a+')
# f.write(item.url,item.content)
# f.close()
if spider.name == "renjian":
self.f.write(item['url']+'\n')
# return item
#如果不想下一个pipelines接收参数直接抛错
raise DropItem()
def open_spider(self, spider):
"""
爬虫开始执行时,调用
:param spider:
:return:
"""
self.f = open(self.file_path,'a+')
def close_spider(self, spider):
"""
爬虫关闭时,被调用
:param spider:
:return:
"""
self.f.close()
网友评论