美文网首页
nodejs读取网络数据并写入mongo

nodejs读取网络数据并写入mongo

作者: liwuwuzhi | 来源:发表于2021-04-13 15:36 被阅读0次
  1. 连接数据库并创建数据存储集合,例如要存储歌手对象的数据。

singerModel.js:

const mongoose = require('mongoose');
mongoose.connect('mongodb://127.0.0.1:27017/simple_music', {
  useNewUrlParser: true,
  useUnifiedTopology: true
})

const Schema = mongoose.Schema

const SingerSchema = new Schema({

  // 歌手id
  id: { type: String, required: true },

  // 歌手名字
  name: { type: String, required: true, validate: /\S+/ },

  // 歌手头像
  avatar: String,

  // 歌曲
  songList: Array,

  // 创建时间
  createAt: { type: Date, default: Date.now }
})

module.exports = mongoose.model('Singer', SingerSchema)

在本地数据库 simple_music 创建 Singer 集合,存储歌手的 name、avatar、songList 还有该该数据的创建时间。

  1. 用nodejs 的 https 请求请求数据,并写入到刚才创建的集合中。

crawler.js:

/** mongoose */
const SingerModal = require('./singerModel.js')

/** nodejs https */
const https = require('https');

const sleep = () => {
  return new Promise((resolve) => {
    setTimeout(resolve, 1000);
  })
}

const request = async function (url, jsonpCallback = '') {
  return new Promise((resolve, reject) => {
    https.get(url, (response) => {
      // console.log('状态码:', response.statusCode);
      // console.log('请求头:', response.headers);

      let rawData = '';

      response.on('data', (chunk) => { rawData += chunk; });
      response.on('end', () => {

        // jsonp
        if (jsonpCallback) {
          if (jsonpCallback == '__jp0') rawData = rawData.replace(/__jp0\(/, '').replace(/[)]$/, '')
          if (jsonpCallback == '__jp1') rawData = rawData.replace(/__jp1\(/, '').replace(/[)]$/, '')
        }

        let parsedData = JSON.parse(rawData)

        resolve(parsedData)
      })

    }).on('error', (error) => {
      console.error(error)
      reject(error)
    })
  })
}

class Singer {
  constructor({ id, name }) {
    this.id = id
    this.name = name
    this.avatar = `https://y.gtimg.cn/music/photo_new/T001R300x300M000${id}.jpg?max_age=2592000`
    this.songList = []
  }

  async getSongList () {

    const jsonpCallback = '__jp1'

    const url = `https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg?g_tk=1928093487&inCharset=utf-8&outCharset=utf-8&notice=0&format=jsonp&hostUin=0&needNewCode=0&platform=yqq&order=listen&begin=0&num=80&songstatus=1&singermid=${this.id}&jsonpCallback=${jsonpCallback}`

    const res = await request(url, jsonpCallback)

    const { list = [] } = res.data
    // const list = res.data.list || []

    this.songList = list.map(e => e.musicData.songname).slice(0, 3)

    return this.songList
  }
}

async function createSinger (item) {
  const singer = new Singer({
    id: item.Fsinger_mid,
    name: item.Fsinger_name
  })
  await singer.getSongList()
  console.log(`----- ${singer.name}, songs count: ${singer.songList.length} -----------`)
  return singer
}



; (async () => {
  console.log('-------------- stat --------------')

  const jsonpCallback = '__jp0'

  const singerURL = `https://c.y.qq.com/v8/fcg-bin/v8.fcg?g_tk=1928093487&inCharset=utf-8&outCharset=utf-8&notice=0&format=jsonp&channel=singer&page=list&key=all_all_all&pagesize=100&pagenum=1&hostUin=0&needNewCode=0&platform=yqq&jsonpCallback=${jsonpCallback}`

  const res = await request(singerURL, jsonpCallback)

  const { list = [] } = res.data

  const COUNT = 5

  for (let i = 0; i < COUNT; i++) {
    await sleep(5000)
    let item = list[i]
    let singer = await createSinger(item)

    // 存储到数据库
    await new SingerModal(singer).save().catch(error => console.log("error: ", error))

    console.log('----------- save ----------')
    console.log(singer)
  }

  // forEach 对 async await 函数无效
  // list.forEach(async (item, index) => {
  //    await sleep(3000)
  //   console.log('-------------------------------')
  //   let singer = await createSinger(item)
  // })

  console.log('----------- end --------------')
})()

在请求返回后,处理出自己想要的字段。例如 类 Singer 中的属性idnameavatarsongList 对应 singerModel 中的 Schema。

  1. 执行该文件
node crawler.js

nodejs http.get

相关文章

网友评论

      本文标题:nodejs读取网络数据并写入mongo

      本文链接:https://www.haomeiwen.com/subject/ofhclltx.html