其实这篇是失败的一篇,因为在抓取的过程中,亚马逊总会返回几个空的内容给我,导致无法还原完整的内容,因此这篇权作记录,
const http=require('https');
const fs=require('fs');
const cheerio=require('cheerio');
const request=require('request');
var url='https://www.amazon.com/Best-Sellers-Sports-Outdoors/zgbs/sporting-goods/ref=zg_bs_pg_2?_encoding=UTF8&pg=1';
var prefix='https://www.amazon.com';
http.get(url,function(res){
var html='';
res.setEncoding('utf-8');
res.on('data',function(chunk){
html+=chunk;
});
res.on('end',function(){
var $=cheerio.load(html);
var groups=$('.zg_itemImmersion');
groups.each(function(item){
var group=$(this);
var link=group.find('.a-link-normal').attr('href');
var linkContent_1=prefix+link+'\n';
var linkContent=prefix+link;
fs.appendFile('pathUrl.txt',linkContent_1,function(err){
if(err){
console.log(err);
}
});
getSubContent(linkContent);
})
});
});
function getSubContent(urlSub){
http.get(urlSub,function(res){
var subHtml='';
res.setEncoding('utf-8');
res.on('data',function(subChunk){
subHtml+=subChunk;
});
res.on('end',function(){
var $=cheerio.load(subHtml);
var subGroup=$('.content');
var rank=subGroup.find('#SalesRank').text().replace(/[\r\n]/g,"");
var zg_hrsr_rank=subGroup.find('.zg_hrsr_rank').text().trim().replace(/[\r\n]/g,"");
var zg_hrsr_ladder=subGroup.find('.zg_hrsr_ladder').text().trim().replace(/[\r\n]/g,"");
var rankContent='△'+rank+zg_hrsr_rank+zg_hrsr_ladder+'\n';
fs.appendFile('input.txt',rankContent,function(err){
if(err){
console.log(err);
}
});
});
});
}
另外试着用了readline模块从文件中一行一行的读取url
const readline = require('readline');
const fs = require('fs');
const cheerio=require('cheerio');
const http=require('https');
var url='https://www.amazon.com/LifeStraw-LSPHF017-Personal-Emergency-Preparedness/dp/B006QF3TW4/ref=zg_bs_sporting-goods_8/146-4419491-9931667?_encoding=UTF8&psc=1&refRID=S3S2QJNQRHHF69ZY41WQ';
var asin=url.split('/')[5];
getRankContent(url);
function getRankContent(url){
var asin=url.split('/')[5];
http.get(url,function(res){
var html='';
res.on('data',function(chunk){
html+=chunk;
})
res.on('end',function(){
fs.appendFile('input.txt',html,function(err){
if(err){
console.log(err);
}
});
var $=cheerio.load(html);
var group=$('.content');
var rank=group.find('#SalesRank').text().replace(/[\r\n]/g,"");
var zg_hrsr_rank=group.find('.zg_hrsr_rank').text().trim().replace(/[\r\n]/g,"");
var zg_hrsr_ladder=group.find('.zg_hrsr_ladder').text().trim().replace(/[\r\n]/g,"");
var rankContent=rank+zg_hrsr_rank+zg_hrsr_ladder;
console.log(typeof rankContent);
console.log(rankContent);
});
});
}
如果需要取得亚马逊的数据,似乎需要amazon API的调用,最近将会研究这个,以保证最大程度的取得亚马逊的数据,为选品与工作效率的提升作基础性的作用。
网友评论