美文网首页
Nodejs爬虫爬取黄色网站全站,且以base64加密将资源存进

Nodejs爬虫爬取黄色网站全站,且以base64加密将资源存进

作者: 报告老师 | 来源:发表于2017-12-24 13:42 被阅读887次

    talk is cheap,show you the code

    'use strict';

    const http = require('http');

    const cheerio = require('cheerio');

    const mysql = require('mysql');

    //create dblink

    const connection = mysql.createConnection({

    host: '192.168.199.194',

    user: 'root',

    password: 'sjh',

    port: '3306',

    database: 'sjh'

    });

    // var url = 'http://www.runoob.com/nodejs/nodejs-tutorial.html';

    var paths = ['/index.php?s=video/search/wd/%E9%BB%84%E7%89%87',

    '/index.php?s=video/search/wd/26uuu',

    '/index.php?s=video/search/wd/%E6%83%85%E8%89%B2%E7%BD%91%E5%9D%80'

    ];

    var address = 'http://www.yn213.com';

    var urls = [];//声明空数组待命

    for (var i = 0; i < paths.length; i++) {

    var url = address+paths[i];//url拼接

    urls.push(url);//遍历的结果逐一push进数组

    }

    console.log(urls);,//验证url拼接结果

    connection.connect();

    for (var i = 0; i < urls.length; i++) {

    http.get(urls[i], function(res) {

    var html = '';

    res.on('data', function(data) {

    html += data;

    });

    res.on('erro', function(erro) {

    console.log('抓取失败!' + erro.stack);

    });

    res.on('end', function() {

    var htmlBuf = new Buffer(html);

    var h = htmlBuf.toString('base64');,//加密

    var sql = 'insert into html (result) values("' + h + '")';

    connection.query(sql, function(err, result) {

    if (err) {

    console.log('[INSERT ERROR] - ', err.message);

    } else {

    console.log('success!');

        }

      });

    });

    });

    }

    Sql代码:

    创建一个表用于存放结果

    create table result(reault longtext);

    相关文章

      网友评论

          本文标题:Nodejs爬虫爬取黄色网站全站,且以base64加密将资源存进

          本文链接:https://www.haomeiwen.com/subject/sbqhgxtx.html