nodejs 小爬虫

作者: jiangadam | 来源:发表于2016-12-20 09:05 被阅读35次

nodejs 小爬虫
nodejs小爬虫
nodeJS爬虫（完整版）
nodejs通过钉钉群机器人推送消息
NodeJs + Phantomjs 简易爬虫
Nodejs http小爬虫
Nodejs爬虫
NodeJS 爬虫
nodejs爬虫
nodejs 爬虫

nodejs 基础练习，爬取慕课网的某一个课程的内容
http 模块
cheerio 解析HTML的DOM结构

安装 cheerio

npm install cheerio
// 如果不行换淘宝的源
npm config set registry http://registry.npm.taobao.org
npm install cheerio

/**
 * nodejs 小爬虫基础练习，爬取慕课网的某一个课程的内容
 * 抓取慕课网的课程 URL = http://www.imooc.com/learn/348
 * @author= jiangadam
 */

var http = require('http')  // 加载http模块
var cheerio = require('cheerio')   // 加载cheerio模块
var url = "http://www.imooc.com/learn/348"

/**
 * 解析HTML的DOM结构  cheerio
 */
function filterChapter(html){
    var $ = cheerio.load(html)  // cheerio 的 load 方法将加载HTML结构

    var chapters = $('.chapter ')  // 课程内容的 class

    var data = []

    // 遍历
    chapters.each(function(item){
        var chapter = $(this)
        var chapterTitle = chapter.find('strong').text()

        var videos = chapter.find('.video').children('li')

        var chapterData = {
            // 去除换行符和空格
            chapterTitle: chapterTitle.replace(/\r|\n/ig,"").replace(/[ ]/g,""),
            videos:[]
        }

        videos.each(function(item){
            var video = $(this).find('.J-media-item')

            var videoTitle = video.text()
            var id = video.attr('href').split('video/')[1]

            chapterData.videos.push({
                title: videoTitle.replace(/\r|\n/ig,"").replace(/[ ]/g,""),
                id: id
            })
        })

        data.push(chapterData)
    })

    return data
}

// 遍历输出最后的结构
function printChapterData(data){
    data.forEach(function(item){
        console.log(item.chapterTitle)

        item.videos.forEach(function(item){
            console.log('id->' + item.id + ' title->' + item.title)
        })
    })
}

http.get(url, function(response){
    var html = ''
    response.on("data", function(data){
        html += data
    })

    response.on("end", function(){
        var data = filterChapter(html)
        printChapterData(data)
    })
})

网友评论

本文标题：nodejs 小爬虫

本文链接：https://www.haomeiwen.com/subject/cnhxvttx.html

延伸阅读

深度阅读

您也可以注册成为美文阅读网的作者，发表您的原创作品、分享您的心情！

nodejs 小爬虫

相关文章

nodejs 小爬虫

nodejs小爬虫

nodeJS爬虫（完整版）

nodejs通过钉钉群机器人推送消息

NodeJs + Phantomjs 简易爬虫

Nodejs http小爬虫

Nodejs爬虫

NodeJS 爬虫

nodejs爬虫

nodejs 爬虫

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读