intrest.py
# -*- coding: utf-8 -*-
import scrapy
from u148.items import U148Item
#爬取 有意思吧网站的音乐
#http://www.u148.cn/music
class IntrestSpider(scrapy.Spider):
name = 'intrest'
allowed_domains = ['u148.cn']
start_urls = ['http://www.u148.cn/music/']
def parse(self, response):
print("__________________________")
m_list = response.xpath("//article[starts-with(@class,'ajaxpost box')]")
# 遍历这些音乐
for m in m_list:
item = U148Item()
item["title"] = m.xpath(".//h2/a/@title").extract()[0]
item["img"] = m.xpath(".//img[@class='thumb']/@src").extract()[0]
item["zhaiyao"] = m.xpath(".//div[@class='excerpt']/text()").extract()[0]
item["author"] = m.xpath(".//span[@class='author']/a/text()").extract()[0]
item["time"] = m.xpath(".//span[@class='date']/text()").extract()[0]
yield item
items.py
import scrapy
class U148Item(scrapy.Item):
title = scrapy.Field()
img = scrapy.Field()
zhaiyao = scrapy.Field()
author = scrapy.Field()
time = scrapy.Field()
pipelines.py
class U148Pipeline(object):
def process_item(self, item, spider):
return item
网友评论