美文网首页
糗事百科的scrapy爬取

糗事百科的scrapy爬取

作者: 楚糖的糖 | 来源:发表于2018-11-09 19:12 被阅读0次
# -*- coding: utf-8 -*-
import scrapy


class QiuqiuSpider(scrapy.Spider):
    name = 'qiuqiu'
    # allowed_domains = ['www.qiushibaike.com']
    # start_urls = ['http://www.qiushibaike.com/']
    def start_requests(self):
        urls=["https://www.qiushibaike.com/8hr/page/%s/" %i for i in range(1,13)]
        for item in urls:
            yield scrapy.Request(url=item,callback=self.parse22)

    def parse22(self, response):
        li_list = response.xpath("//div[@id='content-left']/div")
        for li in li_list:
            item={}
            item["url_1"] = response.url
            # 用户头像,用户名,用户年龄,内容,好笑个数,评论数
            item["face"] = li.xpath(".//div[@class='author clearfix']//img/@src").extract()
            item["face"]=["https:" + i for i in item["face"]]
            item["name"]=li.xpath(".//div[@class='author clearfix']//h2/text()").extract_first()
            item["age"]=li.xpath(".//div[@class='author clearfix']/div/text()").extract_first()
            item["content"] = li.xpath(".//div[@class='content']/span/text()").extract_first()
            item["haha_count"] = li.xpath("../span[@class='stats-vote']/span[1]//i/text()").extract_first()
            item["ping_count"] = li.xpath("./span[@class='stats-comments']/i[2]/text()").extract_first()
            print(item)
            # yield item

相关文章

网友评论

      本文标题:糗事百科的scrapy爬取

      本文链接:https://www.haomeiwen.com/subject/coaoxqtx.html