工具
项目目录结构

项目目录结构

package.json

babel
watcher.js
const chokidar = require('chokidar');
const shell = require('shelljs');
const watcher = chokidar.watch('.', {
ignored: [
/[\/\\]\./,
/node_modules/,
/vscode/,
/babelrc/,
/watcher.js/,
/package.json/
], persistent: true
});
const log = console.log.bind(console);
const modify = () => {
shell.exec('clear && npm start');
}
watcher
.on('add', function(path) { log('File', path, 'has been added'); })
.on('addDir', function(path) { log('Directory', path, 'has been added'); })
.on('change', function(path) { modify();})
.on('unlink', function(path) { log('File', path, 'has been removed'); })
.on('unlinkDir', function(path) { log('Directory', path, 'has been removed'); })
.on('error', function(error) { log('Error happened', error); })
.on('ready', function() { log('Initial scan complete. Ready for changes.'); });
// .on('raw', function(event, path, details) { log('Raw event info:', event, path, details); })
modify();
getPage.js
const http = require("http");
// Utility function that downloads a URL and invokes
// callback with the data.
function download(url, callback) {
http.get(url, function(res) {
let data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function() {
callback(data);
});
}).on("error", function() {
callback(null);
});
}
export default download;
index.js 数据抓取
// 抓取虾米主页的新碟首发
const cheerio = require("cheerio");
const fs = require('fs');
const path = require('path');
import getPage from './util/getPage';
const URL = 'http://www.xiami.com/';
getPage(URL, (data) => {
const jsonObj = [];
if(data) {
const $ = cheerio.load(data);
$('#albums').find('.content_block').children(function(i, e){
const $image = $(e).find('.image');
const $info = $image.next();
jsonObj.push({
img: $image.children('img').attr('src'),
url: URL + $image.children('a').attr('href'),
name: $info.find('a').text()
});
});
}
// 将抓取的数据写入的文件中去
fs.writeFile(path.resolve(__dirname, 'test.json'), JSON.stringify(jsonObj), (err) => {
console.log(err);
});
});
网友评论