Scrapy爬虫框架(二) ------ 爬取猫眼电影以及评分

    item :

    import scrapy
    class MovieItem(scrapy.Item):
        # define the fields for your item here like:
        name = scrapy.Field()
        score = scrapy.Field()

    MaoyanSpider :

    # -*- coding: utf-8 -*-
    import scrapy
    from demo1.items import MovieItem
    class MaoyanSpider(scrapy.Spider):
        name = 'maoyan'
        allowed_domains = ['maoyan.com']
        start_urls = ['http://maoyan.com/films?offset=30']
        def parse(self, response):
            names = response.xpath('//div[@class="channel-detail movie-item-title"]/@title').extract()
            scores_div = [score.xpath('string(.)').extract_first() for score in  response.xpath('//div[@class="channel-detail channel-detail-orange"]')]
            scores = []
            # for score in scores_div:
            #     scores.append(score.xpath('string(.)').extract_first())
            # for name, score in zip(names, scores_div):
            #     # print(name, ':', score)
            #     yield {"name": name, "score": score}
            item = MovieItem()
            for name, score in zip(names, scores_div):
                item['name'] = name
                item['score'] = score
                yield item

    pipeline :

    import json
    class Demo1Pipeline(object):
        def open_spider(self, spider):
            self.filename = open('movie.txt', 'w', encoding='utf-8')
        def process_item(self, item, spider):
            # with open('movie.txt', 'a', encoding='utf-8') as f:
            #     f.write(json.dumps(item, ensure_ascii=False) + '\n')
            # print(item)
            self.filename.write(json.dumps(dict(item), ensure_ascii=False) + '\n')
            return item  #return是为了让其他pipeline也能用
        def close_spider(self, spider):

    setting :

       'demo1.pipelines.Demo1Pipeline': 300,  #pipelines的路径 : 300代表优先级顺序,越小启动级别越高



