python爬虫代码--爬取最好大学排行榜

作者: 匿名用户919 | 来源:发表于2018-01-20 14:58 被阅读0次

2017-12-31
python爬虫代码--爬取最好大学排行榜
python爬取中国大学排名
python爬虫爬取英雄联盟英雄图片
python爬虫爬取王者荣耀英雄列表图片
各类链接
Python学习
猫眼电影Top100数据爬取，使用requests和xpath
利用python爬虫爬取LOL英雄联盟英雄数据和皮肤价格信息
豌豆荚游戏排行榜爬虫源码

#!python3
# -*- encoding:utf-8 -*-

import requests,re,pprint
from bs4 import BeautifulSoup

print('中国最好大学排行榜'.center(50,'='))

college_list = []

college_dict = {}

y = 0

n = 0

url = 'http://www.zuihaodaxue.com/shengyuanzhiliangpaiming2017.html'

headers = {'User-agent':'Mozilla/5.0'}

r = requests.get(url=url,headers = headers,timeout=30)

r.encoding = r.apparent_encoding

html = r.text


soup_first = BeautifulSoup(html,"html.parser")

soup_next = soup_first.find_all('tbody',class_="hidden_zhpm")

soup_end = BeautifulSoup(str(soup_next),"html.parser")

for string in soup_end.strings:
    if string == '\n' or string == '[' or string == ']':
        continue
    else:
        college_list.append(string)


        





while y <= (len(college_list)-4):

    n += 1

    college_list[y] = n

    college_dict[str(college_list[y])] = [college_list[y+1],
                                          college_list[y+2],
                                          college_list[y+3]]
    
    y += 4

college_tuple = tuple(college_list)
      
#college_rank = open(r'E:\workspace\python\program\collegeTop1000.txt','a')

#college_rank.write('中国最好大学排行榜Top1000'+'\n')

key = [int(x) for x in college_dict.keys()]

rank_list = sorted(key)

rank_tuple = tuple(rank_list)




for rank in rank_tuple:
    name = college_dict.get(str(rank))[0]
    local = college_dict.get(str(rank))[1]
    score = college_dict.get(str(rank))[2]
    print(rank,str(name),str(local),score+'\n')