美文网首页
puppeteer爬虫

puppeteer爬虫

作者: noyanse | 来源:发表于2018-08-19 22:30 被阅读0次
const url = `https://movie.douban.com/tag/#/?sort=R&range=6,10&tags=`

const puppeteer = require('puppeteer')


const sleep = time => new Promise(resolve => {
  setTimeout(resolve, time)
})

;(async () => {
  console.log('Start visit the target page')

  const browser = await puppeteer.launch({
    args: ['--no-sandbox']
  })

  const page = await browser.newPage()
  await page.goto(url, {
    waitUntil: 'networkidle2'
  })

  await sleep(3000)

  await page.waitForSelector('.more')

  for (let i = 0; i < 1; i++) {
    await sleep(3000)
    await page.click('.more')
  }

  const result = await page.evaluate(() => {
    var $ = window.$
    var items = $('.list-wp a')
    var links = []

    if (items.length >= 1) {
      items.each((index, item) => {
        let it = $(item)
        let doubanId = it.find('div').data('id')
        let title = it.find('.title').text()
        let rate = Number(it.find('.rate').text())
        let poster = it.find('img').attr('src').replace('s_ratio', 'l_ratio')

        links.push({
          doubanId,
          title,
          rate,
          poster
        })
      })
    }

    return links
  })

  await browser.close()
  console.log(result)
  // process.send({result})
  // process.exit(0)
})()

相关文章

网友评论

      本文标题:puppeteer爬虫

      本文链接:https://www.haomeiwen.com/subject/iephiftx.html