封装 download.js
let http = require("http");
function download(url, callback) {
http.get(url, function (res) {
let data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function () {
callback(data);
});
}).on("error", function () {
callback(null);
});
}
exports.download = download;
index.js
let cheerio = require("cheerio");
let server = require("./download");
let request = require('request')
let fs = require('fs')
let url = "http://image.baidu.com/"
server.download(url, function (data) {
if (data) {
let $ = cheerio.load(data);
$('div.img_pic_wrap_layer img').each(function (index, item) {
let img = $(this).attr('src');
request(img).pipe(fs.createWriteStream('./image/' + index + '.jpg'));
});
} else {
console.log("error");
}
});
over
express 脚手架 搭建的 cnode 爬虫
let cheerio = require('cheerio')
let superagent = require('superagent')
let cnodeUrl = 'https://cnodejs.org'
router.get('/cnode', (req, res, next) => {
Promise.all(arr).then(data => {
console.log(data)
// res.send(data)
})
});
function getPageAsync(url) {
return new Promise((resolve, reject) => {
superagent.get(url).end((err, result) => {
if (err) {
reject(err)
} else {
let $ = cheerio.load(result.text)
let items = []
$('#topic_list .cell').each((idx, ele) => {
let $element = $(ele)
items.push({
avator: cnodeUrl + $element.find('.user_avatar').attr('href'),
avatorImg: $element.find('.user_avatar a').attr('src'),
title: $element.find('.topic_title_wrapper a').attr('title').trim(),
url: cnodeUrl + $element.find('.topic_title_wrapper a').attr('href').trim(),
tab: $element.find('.topic_title_wrapper span').text().trim(),
last_time: $element.find('.last_time span').text().trim(),
clickNum: $element.find('.reply_count span').text().trim(),
})
})
resolve(items)
}
})
})
}
let arr = []
for (let i = 1; i < 10; i++) {
console.log(cnodeUrl + '?tab=all&page=' + i)
arr.push(getPageAsync(cnodeUrl + '?tab=all&page=' + i))
}
网友评论