美文网首页
nodeJS数据抓取

nodeJS数据抓取

作者: 9ac64e1f7a99 | 来源:发表于2017-05-08 04:07 被阅读1066次

    工具

    项目目录结构

    项目目录结构
    package.json
    babel

    watcher.js

    const chokidar = require('chokidar');
    const shell = require('shelljs');
    
    
    const watcher = chokidar.watch('.', {
      ignored: [
        /[\/\\]\./,
        /node_modules/,
        /vscode/,
        /babelrc/,
        /watcher.js/,
        /package.json/
        ], persistent: true
    });
    
    const log = console.log.bind(console);
    const modify = () => {
      shell.exec('clear && npm start');
    }
    
    watcher
      .on('add', function(path) { log('File', path, 'has been added'); }) 
      .on('addDir', function(path) {  log('Directory', path, 'has been added'); })
      .on('change', function(path) { modify();})
      .on('unlink', function(path) {  log('File', path, 'has been removed'); })
      .on('unlinkDir', function(path) {  log('Directory', path, 'has been removed'); })
      .on('error', function(error) { log('Error happened', error); })
      .on('ready', function() {   log('Initial scan complete. Ready for changes.'); }); 
      // .on('raw', function(event, path, details) { log('Raw event info:', event, path, details); }) 
    
    
    modify();
    
    

    getPage.js

    const http = require("http");
    
    // Utility function that downloads a URL and invokes
    // callback with the data.
    function download(url, callback) {
      http.get(url, function(res) {
        let data = "";
        res.on('data', function (chunk) {
          data += chunk;
        });
        res.on("end", function() {
          callback(data);
        });
      }).on("error", function() {
        callback(null);
      });
    }
    
    export default download;
    

    index.js 数据抓取

    // 抓取虾米主页的新碟首发
    const cheerio = require("cheerio");
    const fs = require('fs');
    const path = require('path');
    import getPage from './util/getPage';
    
    const URL = 'http://www.xiami.com/';
    
    getPage(URL, (data) => {
      const jsonObj = [];
      if(data) {
        const $ = cheerio.load(data);
        $('#albums').find('.content_block').children(function(i, e){
          const $image = $(e).find('.image');
          const $info = $image.next();
          jsonObj.push({
            img: $image.children('img').attr('src'),
            url: URL + $image.children('a').attr('href'),
            name: $info.find('a').text()
          });
        });
      }
      // 将抓取的数据写入的文件中去
      fs.writeFile(path.resolve(__dirname, 'test.json'), JSON.stringify(jsonObj), (err) => {
        console.log(err);
      });
    });
    

    相关文章

      网友评论

          本文标题:nodeJS数据抓取

          本文链接:https://www.haomeiwen.com/subject/pwaptxtx.html