美文网首页
python爬虫代码--爬取最好大学排行榜

python爬虫代码--爬取最好大学排行榜

作者: 匿名用户919 | 来源:发表于2018-01-20 14:58 被阅读0次
    #!python3
    # -*- encoding:utf-8 -*-
    
    import requests,re,pprint
    from bs4 import BeautifulSoup
    
    print('中国最好大学排行榜'.center(50,'='))
    
    college_list = []
    
    college_dict = {}
    
    y = 0
    
    n = 0
    
    url = 'http://www.zuihaodaxue.com/shengyuanzhiliangpaiming2017.html'
    
    headers = {'User-agent':'Mozilla/5.0'}
    
    r = requests.get(url=url,headers = headers,timeout=30)
    
    r.encoding = r.apparent_encoding
    
    html = r.text
    
    
    soup_first = BeautifulSoup(html,"html.parser")
    
    soup_next = soup_first.find_all('tbody',class_="hidden_zhpm")
    
    soup_end = BeautifulSoup(str(soup_next),"html.parser")
    
    for string in soup_end.strings:
        if string == '\n' or string == '[' or string == ']':
            continue
        else:
            college_list.append(string)
    
    
            
    
    
    
    
    
    while y <= (len(college_list)-4):
    
        n += 1
    
        college_list[y] = n
    
        college_dict[str(college_list[y])] = [college_list[y+1],
                                              college_list[y+2],
                                              college_list[y+3]]
        
        y += 4
    
    college_tuple = tuple(college_list)
          
    #college_rank = open(r'E:\workspace\python\program\collegeTop1000.txt','a')
    
    #college_rank.write('中国最好大学排行榜Top1000'+'\n')
    
    key = [int(x) for x in college_dict.keys()]
    
    rank_list = sorted(key)
    
    rank_tuple = tuple(rank_list)
    
    
    
    
    for rank in rank_tuple:
        name = college_dict.get(str(rank))[0]
        local = college_dict.get(str(rank))[1]
        score = college_dict.get(str(rank))[2]
        print(rank,str(name),str(local),score+'\n')
    
    
    
    
    

    相关文章

      网友评论

          本文标题:python爬虫代码--爬取最好大学排行榜

          本文链接:https://www.haomeiwen.com/subject/lydtaxtx.html