美文网首页
抓取豆瓣网电影的例子

抓取豆瓣网电影的例子

作者: 丽雁解 | 来源:发表于2018-01-14 00:18 被阅读0次
    from bs4 import BeautifulSoup
    import requests
    
    def main(base_url):
        # base_url='https://movie.douban.com/top250?start={}&filter='.format(start_name)
        # print(base_url)
        req=requests.get(base_url)
        soup=BeautifulSoup(req.text,'lxml')
        ol=soup.find("ol",class_="grid_view")
        print(type(soup),type(ol))
        li_list=ol.find_all('li')
        for li in li_list:
            img=li.find('img')
            img_src=img['src']
    
            title=li.find('span',class_="title").text.strip()
            actor=li.find('div',class_="bd").p.get_text().strip()
            star_info_all=li.find('div',class_='star').find_all('span')
            mv_score=star_info_all[1].text.strip()
            comment_num=star_info_all[2].text.strip()
            print(title)
            print(img_src)
            print(actor)
            print(mv_score)
            print(comment_num)
            print('-' * 50)
    
        next_span=soup.find('span',class_='next')
        next_a=next_span.find('a')
        next_url=None #
        if next_a:
            next_url=next_a['href']
        return 'https://movie.douban.com/top250'+next_url
    
    if __name__=='__main__':
        n_url=main('https://movie.douban.com/top250?start=0')
        #page down
        while n_url:
            main(n_url)
    

    相关文章

      网友评论

          本文标题:抓取豆瓣网电影的例子

          本文链接:https://www.haomeiwen.com/subject/rhghoxtx.html