- 连接数据库并创建数据存储集合,例如要存储歌手对象的数据。
singerModel.js:
const mongoose = require('mongoose');
mongoose.connect('mongodb://127.0.0.1:27017/simple_music', {
useNewUrlParser: true,
useUnifiedTopology: true
})
const Schema = mongoose.Schema
const SingerSchema = new Schema({
// 歌手id
id: { type: String, required: true },
// 歌手名字
name: { type: String, required: true, validate: /\S+/ },
// 歌手头像
avatar: String,
// 歌曲
songList: Array,
// 创建时间
createAt: { type: Date, default: Date.now }
})
module.exports = mongoose.model('Singer', SingerSchema)
在本地数据库 simple_music 创建 Singer 集合,存储歌手的 name、avatar、songList 还有该该数据的创建时间。
- 用nodejs 的 https 请求请求数据,并写入到刚才创建的集合中。
crawler.js:
/** mongoose */
const SingerModal = require('./singerModel.js')
/** nodejs https */
const https = require('https');
const sleep = () => {
return new Promise((resolve) => {
setTimeout(resolve, 1000);
})
}
const request = async function (url, jsonpCallback = '') {
return new Promise((resolve, reject) => {
https.get(url, (response) => {
// console.log('状态码:', response.statusCode);
// console.log('请求头:', response.headers);
let rawData = '';
response.on('data', (chunk) => { rawData += chunk; });
response.on('end', () => {
// jsonp
if (jsonpCallback) {
if (jsonpCallback == '__jp0') rawData = rawData.replace(/__jp0\(/, '').replace(/[)]$/, '')
if (jsonpCallback == '__jp1') rawData = rawData.replace(/__jp1\(/, '').replace(/[)]$/, '')
}
let parsedData = JSON.parse(rawData)
resolve(parsedData)
})
}).on('error', (error) => {
console.error(error)
reject(error)
})
})
}
class Singer {
constructor({ id, name }) {
this.id = id
this.name = name
this.avatar = `https://y.gtimg.cn/music/photo_new/T001R300x300M000${id}.jpg?max_age=2592000`
this.songList = []
}
async getSongList () {
const jsonpCallback = '__jp1'
const url = `https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg?g_tk=1928093487&inCharset=utf-8&outCharset=utf-8¬ice=0&format=jsonp&hostUin=0&needNewCode=0&platform=yqq&order=listen&begin=0&num=80&songstatus=1&singermid=${this.id}&jsonpCallback=${jsonpCallback}`
const res = await request(url, jsonpCallback)
const { list = [] } = res.data
// const list = res.data.list || []
this.songList = list.map(e => e.musicData.songname).slice(0, 3)
return this.songList
}
}
async function createSinger (item) {
const singer = new Singer({
id: item.Fsinger_mid,
name: item.Fsinger_name
})
await singer.getSongList()
console.log(`----- ${singer.name}, songs count: ${singer.songList.length} -----------`)
return singer
}
; (async () => {
console.log('-------------- stat --------------')
const jsonpCallback = '__jp0'
const singerURL = `https://c.y.qq.com/v8/fcg-bin/v8.fcg?g_tk=1928093487&inCharset=utf-8&outCharset=utf-8¬ice=0&format=jsonp&channel=singer&page=list&key=all_all_all&pagesize=100&pagenum=1&hostUin=0&needNewCode=0&platform=yqq&jsonpCallback=${jsonpCallback}`
const res = await request(singerURL, jsonpCallback)
const { list = [] } = res.data
const COUNT = 5
for (let i = 0; i < COUNT; i++) {
await sleep(5000)
let item = list[i]
let singer = await createSinger(item)
// 存储到数据库
await new SingerModal(singer).save().catch(error => console.log("error: ", error))
console.log('----------- save ----------')
console.log(singer)
}
// forEach 对 async await 函数无效
// list.forEach(async (item, index) => {
// await sleep(3000)
// console.log('-------------------------------')
// let singer = await createSinger(item)
// })
console.log('----------- end --------------')
})()
在请求返回后,处理出自己想要的字段。例如 类 Singer
中的属性id
、name
、avatar
、songList
对应 singerModel 中的 Schema。
- 执行该文件
node crawler.js

网友评论