美文网首页让前端飞
用node爬点羞羞图吧~2019-01-24

用node爬点羞羞图吧~2019-01-24

作者: littleyu | 来源:发表于2019-01-24 17:06 被阅读3次
npm i request request-promise cheerio
npm index.js
const rp = require("request-promise")
const fs = require("fs")
const cheerio = require("cheerio")

class Download {
  constructor(url){
    this.curPage = 1
    this.init(url)
    this.main(url)
  }
  async init(url) {
    const pageTotal = await this.getPageTotal(url)
    console.log(`成功获取到 ${pageTotal} 个页面,开始分页获取~`)
    return pageTotal
  }
  async main(url){
    const pageList = await this.getPageData(url, this.curPage)
    for (let i=0; i<pageList.length; i++) {
      const $ = await this.getPage(pageList[i].url)
      console.log(`开始下载第 ${this.curPage} 页,第${i + 1} 组的图片,共 ${$('.pagenavi a').eq(-2).find('span').html()} 张......`)
      await this.mkdirFolder($, pageList[i])
    }
    this.curPage += 1
    if (this.curPage > await this.getPageTotal(url)) return
    this.main(url)
  }
  async getPage(url) {
    const data = await rp({
        url,
        transform: function (body) {
          return cheerio.load(body);
        }
      })
    return data
  }
  async getPageTotal(url){
    const $ = await this.getPage(url)
    const pageTotal = $('.nav-links a').eq(-2).html()
    return pageTotal
  }
  async getPageData(url, curPage){
    const pageList = []
    const $ = await this.getPage(url + 'page/' + curPage)
    $('#pins li img').each(function(){
      pageList.push({
        name: $(this).attr('alt'),
        url: $(this).parent().attr('href'),
      })
    })
    return pageList
  }
  async mkdirFolder($, pageList){
    const perPageUrl = []
    for (let i=1; i<=$('.pagenavi a').eq(-2).find('span').html(); i++) {
      perPageUrl.push(pageList.url + '/' + i)
    }
    const folderName = __dirname + '/' + pageList.name
    fs.access(folderName, fs.constants.F_OK, (err) => {
      if (err) {
        fs.mkdir(folderName, async (err) => {
          if (err) throw err;
          console.log(`${pageList.name} 文件夹创建成功,开始写入图片~`);
          await this.touchFile(perPageUrl, pageList.name)
        })
      } else {
        console.log(`${pageList.name} 文件夹已经存在,暂不写入图片~`);
      }
    });
  }
  async touchFile(perPageUrl, name){
    for (let i=0; i<perPageUrl.length; i++) {
      const $ = await this.getPage(perPageUrl[i])
      const imgSrc = $('.main-image img').attr('src')
      let headers = {
        Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
        "Accept-Encoding": "gzip, deflate",
        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
        "Cache-Control": "no-cache",
        Host: "i.meizitu.net",
        Pragma: "no-cache",
        "Proxy-Connection": "keep-alive",
        Referer: perPageUrl[i],
        "Upgrade-Insecure-Requests": 1,
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.19 Safari/537.36"
      }; // 反防盗链
      await rp({
        url: imgSrc,
        resolveWithFullResponse: true,
        headers
      }).pipe(fs.createWriteStream(`${__dirname}/${name}/${i}.jpg`)); // 下载
      // console.log(`正在下载 ${name} 图组 ${i}/${perPageUrl[i].length}`)
    }
  }
}
let start = new Download('https://www.mzitu.com/')

顺序目前有点问题,紧急修复中....

相关文章

网友评论

    本文标题:用node爬点羞羞图吧~2019-01-24

    本文链接:https://www.haomeiwen.com/subject/axrujqtx.html