1.启动pyspider

在命令行输入：pyspider all
在浏览器中打开 http://localhost:5000/

2.crawl()方法

url
url是爬取时的URL，可以是单个URL，也可以是URL列表
callback
callback是回调函数，指定了该URL对应的响应内容用哪个方法来解析
age
age是任务的有效时间。
priority
priority是爬取任务的优先级，默认值为0，priority越大，对应的请求会优先被调用。
exetime
exetime参数可以设置定时任务，其值是时间戳，默认为0，即代表立即执行。

pyspider实例

利用pyspider爬取青岛旅游景点信息，代码如下：

from pyspider.libs.base_handler import *
import pymongo

class Handler(BaseHandler):
    crawl_config = {
    }
    
    client = pymongo.MongoClient('localhost')
    db = client['trip']

    @every(minutes=24 * 60)
    def on_start(self):
        self.crawl('https://www.tripadvisor.cn/Attractions-g297458-Activities-Qingdao_Shandong.html', callback=self.index_page, fetch_type = 'js')

    @config(age=10 * 24 * 60 * 60)
    def index_page(self, response):
        for each in response.doc('.listing_title>a').items():
            self.crawl(each.attr.href, callback=self.detail_page)
            
        next_url = response.doc('#FILTERED_LIST > div.al_border.deckTools.btm > div > div > a').attr.href
        self.crawl(next_url, callback=self.index_page, fetch_type = 'js')
        

    @config(priority=2)
    def detail_page(self, response):
        return {
            'name': response.doc('.h1').text(),
            'rating_count':response.doc('a > .reviewCount').text(),           
            'open_time':response.doc('.headerBL .header_detail').text(),
            'picture':response.doc('.large_photo_wrapper img').attr.src,
            'introduce':response.doc('.centerWell > div > div > div > div > div > span').text(),
            'duration':response.doc('.contactInfo > .address > span > span').text(),
            'tel':response.doc('.contact > .is-hidden-mobile > div').text(), 
            'rating':response.doc('.overallRating').text()
        }
    
    def on_result(self, result):
        if result:
            self.save_to_mongo(result)
            
    def save_to_mongo(self, result):
        if self.db['qinddao'].insert(result):
            print('saved to mongo',result)