美文网首页
puppeteer爬虫

puppeteer爬虫

作者: noyanse | 来源:发表于2018-08-19 22:30 被阅读0次
    const url = `https://movie.douban.com/tag/#/?sort=R&range=6,10&tags=`
    
    const puppeteer = require('puppeteer')
    
    
    const sleep = time => new Promise(resolve => {
      setTimeout(resolve, time)
    })
    
    ;(async () => {
      console.log('Start visit the target page')
    
      const browser = await puppeteer.launch({
        args: ['--no-sandbox']
      })
    
      const page = await browser.newPage()
      await page.goto(url, {
        waitUntil: 'networkidle2'
      })
    
      await sleep(3000)
    
      await page.waitForSelector('.more')
    
      for (let i = 0; i < 1; i++) {
        await sleep(3000)
        await page.click('.more')
      }
    
      const result = await page.evaluate(() => {
        var $ = window.$
        var items = $('.list-wp a')
        var links = []
    
        if (items.length >= 1) {
          items.each((index, item) => {
            let it = $(item)
            let doubanId = it.find('div').data('id')
            let title = it.find('.title').text()
            let rate = Number(it.find('.rate').text())
            let poster = it.find('img').attr('src').replace('s_ratio', 'l_ratio')
    
            links.push({
              doubanId,
              title,
              rate,
              poster
            })
          })
        }
    
        return links
      })
    
      await browser.close()
      console.log(result)
      // process.send({result})
      // process.exit(0)
    })()
    
    

    相关文章

      网友评论

          本文标题:puppeteer爬虫

          本文链接:https://www.haomeiwen.com/subject/iephiftx.html