# -*- coding: utf-8 -*-
import scrapy
class QiuqiuSpider(scrapy.Spider):
name = 'qiuqiu'
# allowed_domains = ['www.qiushibaike.com']
# start_urls = ['http://www.qiushibaike.com/']
def start_requests(self):
urls=["https://www.qiushibaike.com/8hr/page/%s/" %i for i in range(1,13)]
for item in urls:
yield scrapy.Request(url=item,callback=self.parse22)
def parse22(self, response):
li_list = response.xpath("//div[@id='content-left']/div")
for li in li_list:
item={}
item["url_1"] = response.url
# 用户头像,用户名,用户年龄,内容,好笑个数,评论数
item["face"] = li.xpath(".//div[@class='author clearfix']//img/@src").extract()
item["face"]=["https:" + i for i in item["face"]]
item["name"]=li.xpath(".//div[@class='author clearfix']//h2/text()").extract_first()
item["age"]=li.xpath(".//div[@class='author clearfix']/div/text()").extract_first()
item["content"] = li.xpath(".//div[@class='content']/span/text()").extract_first()
item["haha_count"] = li.xpath("../span[@class='stats-vote']/span[1]//i/text()").extract_first()
item["ping_count"] = li.xpath("./span[@class='stats-comments']/i[2]/text()").extract_first()
print(item)
# yield item
网友评论