美文网首页
puppeteer实现node爬虫

puppeteer实现node爬虫

作者: 猫久伴你入眠 | 来源:发表于2018-03-27 18:10 被阅读0次
    image.png image.png
    const puppeteer = require('puppeteer');
    const url = 'https://movie.douban.com/explore#!type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start=0';
    
    
    //延迟时间
    let sleep = time => new Promise(resolve => {
        setTimeout(resolve, time);
    });
    (async() => {
        console.log("开始")
            //模拟打开浏览器
        const browser = await puppeteer.launch({
            args: ['--no-sandbox'],
            dumpio: false
        });
        //模拟创建新页面,并打开链接
        const page = await browser.newPage();
        await page.goto(url, {
            waitUntil: 'networkidle2'
        });
    
        //等待3秒
        await sleep(3000);
    
        // await page.screenshot({ path: 'example.png' });
    
        //等待 .more  Dom元素加载完成
        await page.waitForSelector('.more');
    
    
        for (let i = 0; i < 1; i++) {
            await sleep(3000);
            await page.click('.more');
        }
    
        const result = await page.evaluate(() => {
    
            //进行dom遍历
            var $ = window.$;
            var links = [];
            var list = $('.list a');
    
            list.each(function(idx, item) {
                var id = $(this).find('.cover-wp').data('id');
                var title = $(this).find('img').attr('alt');
                var imgUrl = $(this).find('img').attr('src').replace('s_ratio', 'l_ratio');
                var rate = Number($(this).find('strong').text());
    
                links.push({
                    id: id,
                    title: title,
                    imgUrl: imgUrl,
                    rate: rate
                })
            });
    
            return links;
        });
    
        console.log(result)
    
        await browser.close();
    })();
    

    相关文章

      网友评论

          本文标题:puppeteer实现node爬虫

          本文链接:https://www.haomeiwen.com/subject/otbscftx.html