最近喜欢用Nodejs(node-fetch+cheerio)来写简单的爬虫。这几天在爬视频的的时候发现有几个视频网站做了ip限制,就是在一段时间内访问次数过多,就提示我过一会儿才能访问。所以就研究了下Node-Fetch的代理请求来解决。这里就简单的告诉告诉大家如何设置Node-Fetch的http/https代理。
先分享一些资源
免费代理查询网站:
http IP查询接口:
- http://ip-api.com/json
- http://pv.sohu.com/cityjson
- http://ip.ws.126.net/ipquery
- http://whois.pconline.com.cn/ipJson.jsp
https IP查询接口:
HTTP 代理请求示例:
从代理网站上获取匿名代理服务器的IP与端口
然后通过http-proxy-agent模块进行代理
const fetch = require("node-fetch");
const HttpProxyAgent = require('http-proxy-agent');
// let url="http://pv.sohu.com/cityjson";
// let url="http://ip-api.com/json";
let url="http://whois.pconline.com.cn/ipJson.jsp";
let ip='代理服务的IP';
let port='代理服务的端口';
fetch(url, {
method: 'GET',
// body: null,
redirect: 'follow', // set to `manual` to extract redirect headers, `error` to reject redirect
timeout: 10000, //ms
agent: new HttpProxyAgent("http://" + ip + ":" + port)
}).then(function (res) {
console.log("Response Headers ============ ");
res.headers.forEach(function(v,i,a) {
console.log(i+" : "+v);
});
return res.text();
}).then(function (res) {
console.log("Response Body ============ ");
console.log(res);
});
测试
这边顺便处理下GBK显示乱码的问题
GBK解析参考:
const fetch = require("node-fetch");
const HttpProxyAgent = require('http-proxy-agent');
const iconv = require('iconv-lite');
let charset;
// let url = "http://pv.sohu.com/cityjson";
// let url="http://ip-api.com/json";
let url = "http://whois.pconline.com.cn/ipJson.jsp";
let ip='代理服务的IP';
let port='代理服务的端口';
fetch(url, {
method: 'GET',
// body: null,
redirect: 'follow', // set to `manual` to extract redirect headers, `error` to reject redirect
timeout: 10000, //ms
agent: new HttpProxyAgent("http://" + ip + ":" + port)
}).then(function (res) {
console.log("Response Headers ============ ");
res.headers.forEach(function (v, i, a) {
console.log(i + " : " + v);
if (i.toLowerCase() == "content-type") charset = v.split("charset=").reverse()[0];
});
return res.buffer();
}).then(function (res) {
console.log("Response Body ============ ");
let body = iconv.decode(Buffer.concat([res]), charset.toLowerCase() == "gbk" ? 'GBK' : 'UTF-8');
console.log(body);
});
测试
HTTPS 代理请求示例:
通过http-proxy-agent模块进行代理
const fetch = require("node-fetch");
const HttpsProxyAgent = require('https-proxy-agent');
// let url="https://ip.nf/me.json";
let url="https://pv.sohu.com/cityjson?ie=utf-8";
let ip='代理服务的IP';
let port='代理服务的端口';
fetch(url, {
method: 'GET',
// body: null,
redirect: 'follow', // set to `manual` to extract redirect headers, `error` to reject redirect
timeout: 10000, //ms
agent: new HttpsProxyAgent("http://" + ip + ":" + port) //<==注意是 `http://`
}).then(function (res) {
console.log("Response Headers ============ ");
res.headers.forEach(function(v,i,a) {
console.log(i+" : "+v);
});
return res.text();
}).then(function (res) {
console.log("Response Body ============ ");
console.log(res);
});
测试
网友评论