import requests
import json
import re
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'cookie': '_T_WM=39971880054; SUHB=0G0vSx687zpwFs; MLOGIN=1; XSRF-TOKEN=8d7ee6'
}
url ='https://m.weibo.cn/feed/group?gid=3935483719458447'
def get_info(_url, page):
res = requests.get(_url, headers=headers)
json_data = json.loads(res.text)
statuses = json_data['data']['statuses']
for status in statuses:
text = status['text']
new_text = re.sub('[a-zA-Z0-9\s<="_>:/.?]+', '', text, re.S)
print(new_text)
next_cursor = json_data['data']['next_cursor']
page = page + 1
if page <= 10:
next_url = 'https://m.weibo.cn/feed/group?gid=3935483719458447&max_id={}'.format(next_cursor)
get_info(next_url, page)
else:
pass
get_info(url, 1)
网友评论