import requests
from datetime import date
from bs4 import BeautifulSoup
def getNewsTitle(url):
response = requests.get(url)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text,'lxml')
print(soup.find_all('div',style= "float:right; line-height: 70px;")[0].text)
titleNo = soup.find_all('div','l_t')[0].text.strip()
print('-----%s-----'%titleNo)
titles = soup.find_all('div',id = 'titleList')[0].find_all('li')
for title in titles:
print(title.text,'\t',url+title.a['href'])
print('--------------------\n')
newsdate = 'http://whwb.cjn.cn/html/%s-%s/%s/'%
(date.today().year,
str(date.today().month).zfill(2),
str(date.today().day).zfill(2))
urls = [newsdate+'node_%s.htm'%i for i in range(73,83)]
for url in urls:
getNewsTitle(url)
while not input('任意输入退出'):
break
月份和日期是01/06 的格式,可以用一个很少用到的的 .zfill( )函数实现,他是str类下的函数,传入位数,他往前自动补零。
运行后弹出一个cmd 框,显示当日武汉晚报的新闻标题和链接,按任意键退出.
网友评论