美文网首页
记录使用node写一个简易爬虫

记录使用node写一个简易爬虫

作者: zhyzhyzz | 来源:发表于2019-12-26 16:51 被阅读0次
    准备工作-使用到的模块
    //全局安装自动重启工具nodemon
    cnpm install -g  nodemon
    cnpm i --save koa koa-router mysql cheerio superagent-charset superagent
    
    ----app.js----
    const Koa = require('koa'),
    app = new Koa(),
    index = require('./routes/index');
    app.use(index.routes(), index.allowedMethods());
    app.listen(3000);
    
    //路由信息
    ----/routes/index----
    const router = require('koa-router')(),
      mysql = require('../db/mysql'),
      superagent = require('../caiji/superagent');
    router.get('/caiji/:page', async (ctx, next) => {
      let page = ctx.params;
      let html = await superagent.get(`https://cnodejs.org/?tab=all&page=${page.page}`);
      if (html.length > 0) {
        try {
          let count = 0;
          for (let i in html) {
            await mysql.query(`insert into nodeData(title,userName,time) 
    values('${html[i].title}','${html[i].userName}','${html[i].time}')`);
            count += 1
          }
          ctx.body = {
            code: 1,
            message: `该页采集完成,共采集【${count}】条`
          }
        } catch (error) {
          ctx.body = {
            code: 0,
            message: `采集失败:${error}`
          }
        }
      }
    })
    
    //"数据库配置"
    ----/db/config----
    module.exports = {
       DATABASE:'test',
            USERNAME:'root',
            PASSWORD:'zhy123456',
            PORT:'3306',
            HOST:'localhost'
    }
    ----/db/mysql----
    const mysql = require('mysql');
    const config = require('./config');
    let pool = mysql.createPool({
        host:config.HOST,
        user:config.USERNAME,
        password:config.PASSWORD,
        database:config.DATABASE
    })
    
    class Mysql{
        constructor(){
    
        }
        query(sql){
            console.log(sql)
            return new Promise((resolve,resject)=>{
                pool.query(sql,(err,res,fields)=>{
                    if (err) {
                        throw err;
                    }
                    resolve(res)
                })
            })
        }
    }
    module.exports = new Mysql()
    
    //采集模块
    ----/db/caiji----
    const cheerio = require('cheerio'),
        superagent = require('superagent'),
        charset = require('superagent-charset');
    charset(superagent);
    module.exports = {
        get(url) {
            return new Promise((resolve, reject) => {
                superagent.get(url)
                    .charset('utf-8')
                    .end((err, res) => {
                        if (err) {
                            resolve([])
                        }
                        if (res) {
                            let $ = cheerio.load(res.text, {
                                decodeEntities: false
                            })
                            let arr = [];
                            for (let i in $('.cell')) {
                                let title = $('.cell').eq(i).find('.topic_title').eq(0).html(),
                                    userName = $('.cell').eq(i).find('.user_avatar').eq(0).find('img').eq(0).attr('title'),
                                    time = $('.cell').eq(i).find('.last_active_time').eq(0).html(),
                                    views = $('.cell').eq(i).find('.count_of_visits').eq(0).text();
                                if (title) {
                                    arr.push({
                                        title: title.trim(),
                                        userName,
                                        time,
                                        views: views.trim()
                                    })
                                }
                            }
                            resolve(arr)
                        } else {
                            resolve([])
                        }
                    })
            })
        }
    }
    
    //启动
    nodemon app.js
    //浏览器地址
    [http://127.0.0.1:3000/caiji/1](http://127.0.0.1:3000/caiji/1)
    第一页采集完成
    ![image.png](https://img.haomeiwen.com/i5814981/6d1c2143f10bc9ca.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
    
    

    相关文章

      网友评论

          本文标题:记录使用node写一个简易爬虫

          本文链接:https://www.haomeiwen.com/subject/kykloctx.html