#!python3
# -*- encoding:utf-8 -*-
import requests,re,pprint
from bs4 import BeautifulSoup
print('中国最好大学排行榜'.center(50,'='))
college_list = []
college_dict = {}
y = 0
n = 0
url = 'http://www.zuihaodaxue.com/shengyuanzhiliangpaiming2017.html'
headers = {'User-agent':'Mozilla/5.0'}
r = requests.get(url=url,headers = headers,timeout=30)
r.encoding = r.apparent_encoding
html = r.text
soup_first = BeautifulSoup(html,"html.parser")
soup_next = soup_first.find_all('tbody',class_="hidden_zhpm")
soup_end = BeautifulSoup(str(soup_next),"html.parser")
for string in soup_end.strings:
if string == '\n' or string == '[' or string == ']':
continue
else:
college_list.append(string)
while y <= (len(college_list)-4):
n += 1
college_list[y] = n
college_dict[str(college_list[y])] = [college_list[y+1],
college_list[y+2],
college_list[y+3]]
y += 4
college_tuple = tuple(college_list)
#college_rank = open(r'E:\workspace\python\program\collegeTop1000.txt','a')
#college_rank.write('中国最好大学排行榜Top1000'+'\n')
key = [int(x) for x in college_dict.keys()]
rank_list = sorted(key)
rank_tuple = tuple(rank_list)
for rank in rank_tuple:
name = college_dict.get(str(rank))[0]
local = college_dict.get(str(rank))[1]
score = college_dict.get(str(rank))[2]
print(rank,str(name),str(local),score+'\n')
网友评论