# terminal中安装库 bs4 requests # pip install bs4 requests import requests from bs4 import BeautifulSoup import bs4 def get_html(url,headers): r = requests.get(url,headers=headers) r.encoding = r.apparent_encoding return r.text def get_pages(html): global s soup = BeautifulSoup(html,'html.parser') all_topics=soup.find_all('tr')[1:] for each_topic in all_topics: #print(each_topic) topic_times = each_topic.find('td',class_='last')#搜索指数 topic_rank = each_topic.find('td',class_='first')#排名 topic_name = each_topic.find('td',class_='keyword')#标题目 if topic_rank != None and topic_name!=None and topic_times!=None: topic_rank = each_topic.find('td',class_='first').get_text().replace(' ','').replace('\n','') topic_name = each_topic.find('td',class_='keyword').get_text().replace(' ','').replace('\n','') topic_times = each_topic.find('td',class_='last').get_text().replace(' ','').replace('\n','') #print('排名:{},标题:{},热度:{}'.format(topic_rank,topic_name,topic_times)) tplt = "排名:{0:^4}\t标题:{1:{3}^15}\t热度:{2:^8}" print(tplt.format(topic_rank,topic_name,topic_times,chr(12288))) s=s+topic_name.replace('search','')+'\n' url = 'http://top.baidu.com/buzz?b=1&fr=20811' headers= {'User-Agent':'Mozilla/5.0'} html = get_html(url,headers) s = '' get_pages(html) with open('百度热榜.txt','w',encoding='utf-8') as f: f.write(s)
网友评论