一、文档
https://pptr.dev/
https://zhaoqize.github.io/puppeteer-api-zh_CN/#?product=Puppeteer&version=v1.20.0&show=api-class-puppeteer
二、api文档
https://github.com/puppeteer/puppeteer/blob/v3.1.0/docs/api.md#puppeteerlaunchoptions
三、github
https://github.com/puppeteer/puppeteer
四、实例
1、iPhone6 模式访问百度搜索Puppeteer并截图,然后将搜索结果的标题存成txt文件
const puppeteer = require('puppeteer');
// https://github.com/puppeteer/puppeteer/blob/master/src/DeviceDescriptors.ts
const iPhone6 = puppeteer.devices['iPhone 6'];
// const imgName=new Date().getTime();
const fs = require('fs');
(async () => {
const browser = await puppeteer.launch({
// 是否为无头模式
headless: false,
// 如果自己下载的chrome则填写路径
// executablePath: '/path/to/Chrome',
// 是否开启开发者工具
// devtools: true,
// 是否调慢速度展示 一般调试用到
// slowMo: 500
// product: 'firefox'
});
const page = await browser.newPage();
// await page.setRequestInterception(true);
// await page.setViewport({
// width: 1640,
// height: 1480,
// deviceScaleFactor: 1,
// isMobile:true
// });
await page.emulate(iPhone6);
// 访问百度页面 直到所有没有网络链接了
await page.goto('https://www.baidu.com', {waitUntil: 'networkidle0'});
// 在#index-kw输入框 输入puppeteer
await page.type('#index-kw', 'puppeteer')
// 按回车键
// await page.keyboard.press('Enter')
// 等待3s
await page.waitFor(3000);
// 等待跳转 设置超时时间为3s
// await page.waitForNavigation({ timeout: 3000 })
// 点击 #index-bn 元素
await page.click('#index-bn')
console.log('等待网络'+new Date().getTime())
await page.on('response', async response => {
console.log(await response)
console.log(response.url()) //显示响应的 URL,字符串
console.log(response.headers()) //显示响应的header对象
console.log(response.status()) //显示响应的状态码,数值型
console.log(response.ok()) //显示响应是否成功,布尔值
console.log(response.request()) //显示响应对应的 request 对象
console.log(response.text()) //显示响应的body,Promise
response.text().then((body)=>{
console.log(body)
})
// if (response.url() === "https://capuk.org/ajax_search/capmoneycourses"){
// console.log('XHR response received');
// console.log(response.json());
// }
});
await page.waitForNavigation({ waitUntil:'networkidle0' })
console.log('网络完成'+new Date().getTime())
// await page.on('request', async (request) => {
// console.log('request',request)
// // request.respond({
// // status: 404,
// // contentType: 'text/plain',
// // body: 'Not Found!'
// // });
//
// });
// 截图
await page.screenshot({path: new Date().getTime()+'网络完成.png'});
// await page.waitFor(3000);
// Get the "viewport" of the page, as reported by the page.
// 运行js文件
const dimensions = await page.evaluate(() => {
// document.getElementById('userName').value='xxx'
// document.getElementById('password').value='xxx'
// document.getElementById('loginBtn').click()
return {
width: document.documentElement.clientWidth,
height: document.documentElement.clientHeight,
deviceScaleFactor: window.devicePixelRatio
};
});
console.log('Dimensions:', dimensions);
// console.log('fs',fs)
// page.waitForSelector()
// 生成pdf
// await page.pdf({path: 'opdata.pdf', format: 'A4'});
// $ 表示document.querySelector
// $$ 表示document.querySelectorAll $$eval 表示Array.from(document.querySelectorAll(selector)) 即将类数组转换为数组
const options = await page.$$eval('.c-result.result span.c-title-text',(divs)=>{
return divs.map((item)=>{
return item.innerText+'\n'
})
});
console.log('options',options)
// 写入文件
let writerStream = fs.createWriteStream('百度.txt');
writerStream.write(options.toString(), 'UTF8');
writerStream.end();
// await browser.close();
})();
2、文件的上传和下载
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
//通过 CDP 会话设置下载路径
const cdp = await page.target().createCDPSession();
await cdp.send('Page.setDownloadBehavior', {
behavior: 'allow', //允许所有下载请求
downloadPath: 'path/to/download' //设置下载路径
});
//点击按钮触发下载
await (await page.waitForSelector('#someButton')).click();
//等待文件出现,轮训判断文件是否出现
await waitForFile('path/to/download/filename');
//上传时对应的 inputElement 必须是<input>元素
let inputElement = await page.waitForXPath('//input[@type="file"]');
await inputElement.uploadFile('/path/to/file');
browser.close();
})();
3、跳转新 tab 页处理
let page = await browser.newPage();
await page.goto(url);
let btn = await page.waitForSelector('#btn');
//在点击按钮之前,事先定义一个 Promise,用于返回新 tab 的 Page 对象
const newPagePromise = new Promise(res =>
browser.once('targetcreated',
target => res(target.page())
)
);
await btn.click();
//点击按钮后,等待新tab对象
let newPage = await newPagePromise;
4、如何抓取 iframe 中的元素
(async () => {
const browser = await puppeteer.launch({headless: false, slowMo: 50});
const page = await browser.newPage();
await page.goto('https://www.188.com');
//点击使用密码登录
let passwordLogin = await page.waitForXPath('//*[@id="qcode"]/div/div[2]/a');
await passwordLogin.click();
for (const frame of page.mainFrame().childFrames()){
//根据 url 找到登录页面对应的 iframe
if (frame.url().includes('passport.188.com')){
await frame.type('.dlemail', 'admin@admin.com');
await frame.type('.dlpwd', '123456');
await Promise.all([
frame.click('#dologin'),
page.waitForNavigation()
]);
break;
}
}
await page.close();
await browser.close();
})();
5、请求拦截
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const blockTypes = new Set(['image', 'media', 'font']);
await page.setRequestInterception(true); //开启请求拦截
page.on('request', request => {
const type = request.resourceType();
const shouldBlock = blockTypes.has(type);
if(shouldBlock){
//直接阻止请求
return request.abort();
}else{
//对请求重写
return request.continue({
//可以对 url,method,postData,headers 进行覆盖
headers: Object.assign({}, request.headers(), {
'puppeteer-test': 'true'
})
});
}
});
await page.goto('https://demo.youdata.com');
await page.close();
await browser.close();
})();
6、使用Page.exposeFunction 在页面上注册全局函数
const puppeteer = require('puppeteer');
const fs = require('fs');
puppeteer.launch().then(async browser => {
const page = await browser.newPage();
page.on('console', msg => console.log(msg.text));
//给 window 对象注册 readfile 全局函数
await page.exposeFunction('readfile', async filePath => {
return new Promise((resolve, reject) => {
fs.readFile(filePath, 'utf8', (err, text) => {
if (err)
reject(err);
else
resolve(text);
});
});
});
await page.evaluate(async () => {
// use window.readfile to read contents of a file
const content = await window.readfile('/etc/hosts');
console.log(content);
});
await browser.close();
});
7、滚动指定距离
window.scrollBy(xnum, ynum) 页面向右、向下滑动的像素值
五、推荐资料
1、https://zhuanlan.zhihu.com/p/76237595
2、https://www.lfhacks.com/tech/puppeteer-http-response
3、http://www.r9it.com/20171106/puppeteer.html
5、https://www.zoo.team/article/puppeteer
六、与Selenium对比
1、Puppeteer 仅支持谷歌和火狐浏览器,Selenium
除了支持谷歌、火狐还支持Safari、IE及Edge;
2、Puppeteer 比Selenium安装简单;
3、Puppeteer 比Selenium功能更强大;
4、它们的实现原理不同:https://www.zhihu.com/question/278723746
七、其他e2e
Nightwatch 、 Testcafe 、 Cypress
网友评论