代码
import requests
import re
def request_get(url):
response = requests.get(url)
response.encoding = 'gb2312' # 中文乱码处理
text = response.text
re_handle(text)
def re_handle(text):
text = re.findall(r'<div id="twgg" class="gg">.*<p>\s+(.*?)</p>', text, re.S)[0] # 跨行 re.S
text = re.sub('<.*?>', '', text)
text = re.sub('\s+', '', text)
print_text(text)
def print_text(text):
print(text)
if __name__ == '__main__':
url = 'http://www.jjcom/jjart/412827.html'
request_get(url)
新知识
requests.get中文乱码,指定编码格式
response = requests.get(url)
response.encoding = 'gb2312'
text = response.text
正则跨行匹配,要加re.S
text = re.findall(r'<div id="twgg" class="gg">.*<p>\s+(.*?)</p>', text, re.S)[0] # 跨行 re.S
网友评论