使用TS中的一些语法特性,比如接口等语法,实现一个简单的爬虫示例代码。
import path from 'path';
import fs from 'fs';
import superagent from 'superagent';
import cheerio from 'cheerio';
interface ImgInfo {
imgItem: string | undefined,
infoItem: string
}
interface JsonGenerate {
count: number,
data: ImgInfo[],
time: number
}
interface Generate{
[propName:number]:JsonGenerate
}
class Crowller{
private url = 'https://www.zcool.com.cn/';
constructor(){
this.initSpider()
}
async initSpider(){ // 初始化
let html = await this.getRowHtml()
let result = this.getData(html);
this.generateJson(result);
}
async getRowHtml(){ // 获取html文本
const result = await superagent.get(this.url)
return result.text;
}
getData(html:string){ // 核心 使用cherrio 获取相应数据并进行组合
const $ = cheerio.load(html);
const cardBox = $('.card-box');
const dataInfo: ImgInfo[] = [];
cardBox.map((index,item)=>{
let imgItem = $(item).find('img').attr('src');
let infoItem = $(item).find('.card-info-title a').text();
dataInfo.push({imgItem,infoItem})
})
return {
count: dataInfo.length,
data: dataInfo,
time: Date.now()
}
}
generateJson(json: JsonGenerate){ // 将获取到的数据存储到本地文件
let generate:Generate = {}
let filePath = path.resolve(__dirname, '../data/data.json');
if(fs.existsSync(filePath)){
generate = JSON.parse(fs.readFileSync(filePath,'utf-8'));
}
generate[json.time] = json;
fs.writeFileSync(filePath,JSON.stringify(generate));
}
}
let crowller = new Crowller();
网友评论