1.先对今日头条接口进行抓包
https://www.toutiao.com/api/pc/list/feed?offset=0&channel_id=94349549395&max_behot_time=0&category=pc_profile_channel&disable_raw_data=true&aid=24&app_name=toutiao_web&_signature=_02B4Z6wo00901dMswnwAAIDCY90utx-uGVXTCMbAABU9qkwnAPv0iWmk40B-QunCGITEQjHDyFNHFE-QKDWSwqNwubTwMctJCpdmksArmYOWUAKFIgKzLhv3quWkkx5Ob9.65ALLjnvItx2nc6
发现这个_signature是会改变的
第一步 拿_signature这个会变动的值去js里面搜 然后搜出来的js进行一个一个的查找,找到差不多相似的就打上断点进行调试,调试完就开始分析js
经过断点调试发现S方法里面
var o = (null === (n = window.byted_acrawler) || void 0 === n ? void 0 : null === (r = n.sign) || void 0 === r ? void 0 : r.call(n, i)) || "";
这个值是我们需要的 然后进行三元运算进行拆解最后得出
对三元运算符进行拆解r.call(n,i) 是最有用的
r = n.sign
n = window.byted_acrawler
最后就是 window.byted_acrawler.sign.call(window.byted_acrawler,i)
浏览器执行下看看有没有问题然后就是看window.byted_acrawler是怎么生成的
acrawler.js可以看到他是通过acrawler.js生成的
通过 jsdom把代码改造成可以执行的文件
//导入jsdom
const jsdom =require("jsdom")
const {JSDOM} = jsdom;
const dom =new JSDOM('<!DOCTYPE html><p>HELLO word</p>');
window = dom.window;
document = dom.window.document;
let params ={
location:{
hash:"",
host:"www.toutiao.com",
hostname:"www.toutiao.com",
href:"https://www.toutiao.com/",
origin:"https://www.toutiao.com",
pathname:"/",
port:"",
protocol:"https:",
search:""
},
navigator:{
appCodeName:"Mozilla",
appName:"Netscape",
appVersion:"5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
cookieEnabled:true,
deviceMemory:8,
doNotTrack:null,
hardwareConcurrency:8,
language:"zh-CN",
languages: (2) ["zh-CN", "zh"],
maxTouchPoints:0,
onLine:true,
platform:"Win32",
product:"Gecko",
productSub:"20030107",
userAgent:"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
vendor:"Google Inc.",
vendorSub:"",
webdriver:false
}
}
Object.assign(global,params)
acrawler.js js的内容自己复制进来
var un_sign_url =process.argv[2]
var sign =global.byted_acrawler.sign({url:un_sign_url});
console.log(sign);
import os
import subprocess
import execjs
npm安装的jsdom 路径
os.environ["NODE_PATH"] ="C:/Users/公司/AppData/Roaming/npm/node_modules"
url ="https://www.toutiao.com/api/pc/list/feed?offset=0&channel_id=94349549395&max_behot_time=0&category=pc_profile_channel&disable_raw_data=true&aid=24&app_name=toutiao_web"
#应变仪下
signature = subprocess.getoutput('node ac.js "{}"'.format(url))
signature = signature.strip()
finally_url ="{}&_signature={}".format(url,signature)
print(finally_url)
最终生成的访问地址
网友评论