nodeJS数据抓取

作者: 9ac64e1f7a99 | 来源:发表于2017-05-08 04:07 被阅读1066次

nodeJS数据抓取
NodeJS 用jsdom抓取html数据
nodejs request CURl 抓取网页数据
04_中央气象台
iOS 防止 Charles 抓取数据
03_中央气象台
requests-code说明
服务端nodejs抓取jsonp接口数据
人人都会数据分析大纲
NodeJs的一次实用（定时抓取数据）

工具

项目目录结构

package.json

babel

watcher.js

const chokidar = require('chokidar');
const shell = require('shelljs');


const watcher = chokidar.watch('.', {
  ignored: [
    /[\/\\]\./,
    /node_modules/,
    /vscode/,
    /babelrc/,
    /watcher.js/,
    /package.json/
    ], persistent: true
});

const log = console.log.bind(console);
const modify = () => {
  shell.exec('clear && npm start');
}

watcher
  .on('add', function(path) { log('File', path, 'has been added'); }) 
  .on('addDir', function(path) {  log('Directory', path, 'has been added'); })
  .on('change', function(path) { modify();})
  .on('unlink', function(path) {  log('File', path, 'has been removed'); })
  .on('unlinkDir', function(path) {  log('Directory', path, 'has been removed'); })
  .on('error', function(error) { log('Error happened', error); })
  .on('ready', function() {   log('Initial scan complete. Ready for changes.'); }); 
  // .on('raw', function(event, path, details) { log('Raw event info:', event, path, details); }) 


modify();

getPage.js

const http = require("http");

// Utility function that downloads a URL and invokes
// callback with the data.
function download(url, callback) {
  http.get(url, function(res) {
    let data = "";
    res.on('data', function (chunk) {
      data += chunk;
    });
    res.on("end", function() {
      callback(data);
    });
  }).on("error", function() {
    callback(null);
  });
}

export default download;

index.js 数据抓取

// 抓取虾米主页的新碟首发
const cheerio = require("cheerio");
const fs = require('fs');
const path = require('path');
import getPage from './util/getPage';

const URL = 'http://www.xiami.com/';

getPage(URL, (data) => {
  const jsonObj = [];
  if(data) {
    const $ = cheerio.load(data);
    $('#albums').find('.content_block').children(function(i, e){
      const $image = $(e).find('.image');
      const $info = $image.next();
      jsonObj.push({
        img: $image.children('img').attr('src'),
        url: URL + $image.children('a').attr('href'),
        name: $info.find('a').text()
      });
    });
  }
  // 将抓取的数据写入的文件中去
  fs.writeFile(path.resolve(__dirname, 'test.json'), JSON.stringify(jsonObj), (err) => {
    console.log(err);
  });
});

网友评论

本文标题：nodeJS数据抓取

本文链接：https://www.haomeiwen.com/subject/pwaptxtx.html

延伸阅读

深度阅读

您也可以注册成为美文阅读网的作者，发表您的原创作品、分享您的心情！

nodeJS数据抓取

工具

项目目录结构

watcher.js

getPage.js

index.js 数据抓取

相关文章

nodeJS数据抓取

NodeJS 用jsdom抓取html数据

nodejs request CURl 抓取网页数据

04_中央气象台

iOS 防止 Charles 抓取数据

03_中央气象台

requests-code说明

服务端nodejs抓取jsonp接口数据

人人都会数据分析大纲

NodeJs的一次实用（定时抓取数据）

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读