目的
dart 服务端爬虫实践
目标网站
使用库
功能
-
解析目标网页,获取内容JSON
-
下载目标图片
代码
妹子图
根据目标页面元素,获取图片地址
getImage([page = 1]) async {
try {
var headers = {
'User-Agent':
'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
};
var res = await http.get(
page == 1
? 'https://www.mzitu.com'
: 'https://www.mzitu.com/page/$page/',
headers: headers);
if (res.statusCode == 200) {
String body = res.body;
Document dom = parse(body);
var imgs = dom.querySelectorAll('#pins > li > a > img');
imgs.forEach((v) {
String filename = v.attributes['alt'];
Download.image(
v.attributes['data-original'],
filename.replaceAll(' ', ''),
{'Referer': 'https://www.mzitu.com/'});
});
}
} catch (e) {
print(e);
}
}
图片下载
妹子图下载需要设置referer
class Download {
// 下载图片
static image(url, fileName, [Map<String, String> headers]) async {
try {
var res = await http.get(url, headers: headers);
var image = img.decodeImage(res.bodyBytes);
await File('./img/${fileName}.png').writeAsBytes(img.encodePng(image));
} catch (e) {
print(e);
}
}
}
豆瓣
douban([page = 1]) async {
try {
var headers = {
'User-Agent':
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"
};
var res = await http.get(
'https://movie.douban.com/subject/3882715/reviews?start=${20 * page}',
headers: headers);
if (res.statusCode == 200) {
String body = res.body;
Document dom = parse(body);
var items = dom.querySelectorAll('.main.review-item');
items.forEach((v) {
String name = v.querySelector('.name').text.trim();
String avator = v.querySelector('.avator img').attributes['src'];
String content = v.querySelector('.short-content').text.trim();
String time = v.querySelector('.main-meta').text.trim();
print(
{'name': name, 'avatar': avator, 'content': content, 'time': time});
});
}
} catch (e) {
print(e);
}
}
网友评论