美文网首页
node+request+cheerio+iconv-lite爬

node+request+cheerio+iconv-lite爬

作者: adtk | 来源:发表于2017-05-22 10:47 被阅读0次

    http://www.99css.com/nodejs-request-chinese-encoding/

    步骤:
    1,获取html源码
    2,iconv-lite解码
    3,cheerio像jq一样获取dom

    var fs = require("fs");
    var request = require("request");// 请求
    var cheerio = require("cheerio");//cheerio 解析 HTML
    var iconv = require('iconv-lite');//Node.js 抓取非 utf-8 的中文网页时会出现乱码问题
    
    
    function objFn() {
        var _this = this;
        this.getContent = function(url) {
                request({
                    url: "http://www.31xs.net/0/102/" + url,
                    method: "GET",
                    // gzip:true,
                    encoding:null//获取的内容不编码,二进制
                }, function(error, response, body) {
                    // console.log(response);
                    if (!error) {
                        var strJson = iconv.decode(body,"GBK"); //进行gbk解码
                        var $ = cheerio.load(strJson,{decodeEntities: false}); //解决Unicode 编码
    
                        // console.log($("h1").html(),$("#content").html());
                        _this.contentEach($("h1").html(),$("#content").html());
                    }
                })
            };
        this.contentEach=function(title,content){
            fs.writeFile('./a.txt',content,function(err){
                console.log(err)
            })
        }
       
    }
    var obj = new objFn();
        obj.getContent('4954112.html');
    

    相关文章

      网友评论

          本文标题:node+request+cheerio+iconv-lite爬

          本文链接:https://www.haomeiwen.com/subject/cwaktxtx.html