科学上网,需要到SSR的账号密码,有时候每次都要去查,比较麻烦,写个脚本爬取数据,保存到配置文件。
首先看看配置文件json结构如下:
{
"configs": [
{
"remarks": "xxxx",
"id": "BED33E45B9D8972248846237E8B938C2",
"server": "xxxx", //这里隐藏了server ip 地址
"server_port": "12259",
"server_udp_port": 0,
"password": "85973970",
"method": "aes-256-cfb",
"protocol": "auth_sha1_v4",
"protocolparam": "",
"obfs": "tls1.2_ticket_auth",
"obfsparam": "",
"remarks_base64": "U1NSMQ",
"group": "SSR",
"enable": true,
"udp_over_tcp": false
}
],
}
需要到数据是
server,server_port,password,method,protocol,obfs
这几个数据。
查看目标数据:
酸酸乳的data.png再看看网页目标html 结构:
酸酸乳的html.png
蓝色部分就是我们对应的位置。
接下来是代码部分
需要工具:
- python3.5
- bs4
#coding=utf-8
from bs4 import BeautifulSoup
from urllib.request import urlopen
import json
# crawler get data
def ssr():
url="https://www.abc.com/" # 隐藏了酸酸乳网址
user_agent ="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"
header = {'User-Agent':user_agent}
try:
page = urlopen(url)
soup = BeautifulSoup(page,"html.parser")
items = soup.find_all("div",class_="portfolio-item")
target_item = items[11]
# 找到目标节点获取数据
ipssrc=target_item.find(id="ipssrc").string.lstrip()
portssrc=target_item.find(id="portssrc").string.strip('\n')
pwssrc=target_item.find(id="pwssrc").string.strip('\n')
h4s = target_item.find_all("h4")
method = h4s[3].string.split(":")[1].lstrip()
temp = h4s[4].string.split(" ")
protocol = temp[0].lstrip()
obfs = temp[1].lstrip()
#调用saveData()保存数据
saveData(ipssrc,portssrc,pwssrc,method,protocol,obfs)
except Exception as e:
raise e
def saveData(ipssrc,portssrc,pwssrc,method,protocol,obfs):
#打开读取json文件
with open('gui-config.json',mode='r+', encoding='utf-8') as f:
data = json.load(f)
configs = data['configs']
#设置对应信息
ssr = configs[2]
ssr['remarks'] = ipssrc
ssr['server'] = ipssrc
ssr['server_port'] = portssrc
ssr['password'] = pwssrc
ssr['method'] = method
ssr['protocol'] = protocol
ssr['obfs'] = obfs
#保存json数据到文件
with open("gui-config.json",mode="w") as f:
json.dump(data,f)
if __name__ == '__main__':
ssr()
网友评论