http://www.99css.com/nodejs-request-chinese-encoding/
步骤:
1,获取html源码
2,iconv-lite解码
3,cheerio像jq一样获取dom
var fs = require("fs");
var request = require("request");// 请求
var cheerio = require("cheerio");//cheerio 解析 HTML
var iconv = require('iconv-lite');//Node.js 抓取非 utf-8 的中文网页时会出现乱码问题
function objFn() {
var _this = this;
this.getContent = function(url) {
request({
url: "http://www.31xs.net/0/102/" + url,
method: "GET",
// gzip:true,
encoding:null//获取的内容不编码,二进制
}, function(error, response, body) {
// console.log(response);
if (!error) {
var strJson = iconv.decode(body,"GBK"); //进行gbk解码
var $ = cheerio.load(strJson,{decodeEntities: false}); //解决Unicode 编码
// console.log($("h1").html(),$("#content").html());
_this.contentEach($("h1").html(),$("#content").html());
}
})
};
this.contentEach=function(title,content){
fs.writeFile('./a.txt',content,function(err){
console.log(err)
})
}
}
var obj = new objFn();
obj.getContent('4954112.html');
网友评论