美文网首页
爬取豆瓣电影top250

爬取豆瓣电影top250

作者: 周闖 | 来源:发表于2018-08-09 18:24 被阅读0次
    代码
    #  Author:ZhouChuang
    #  coding:utf-8
    
    from bs4 import BeautifulSoup
    import requests
    import time
    
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3514.0 Safari/537.36',
        'Cookie':'viewed="2166211"; bid=wLwzb9b0g_A; douban-fav-remind=1; ll="118173"; __utmc=30149280; __utmc=223695111; _vwo_uuid_v2=D96C'
                 '22273BD00491856812822DDB071A2|e5653604c927a32fa93d6e494419f10c; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1533806091%2C%22h'
                 'ttps%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DuEf5o7h6W1QgcIPLdxBrM9-O5w1pL72KygnR1F15VN2W7NpRddrICJa95QHW8IHb%26wd%3D%26eqid%3'
                 'Dd58b6e88000163dd000000045b6bfbe0%22%5D; _pk_ses.100001.4cf6=*; ps=y; ck=2wWO; __utma=30149280.1177526221.1531553567.1533803'
                 '492.1533806181.4; __utmz=30149280.1533806181.4.3.utmcsr=accounts.douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/phone/b'
                 'ind; __utma=223695111.801250262.1533803492.1533803492.1533806181.2; __utmz=223695111.1533806181.2.2.utmcsr=accounts.douban.com'
                 '|utmccn=(referral)|utmcmd=referral|utmcct=/phone/bind; ap=1; push_noty_num=0; push_doumail_num=0; douban-profile-remind=1; __'
                 'utmv=30149280.15261; __utmb=30149280.22.10.1533806181; _pk_id.100001.4cf6=3eba8e0d5047ec4c.1533803492.2.1533806798.1533803530.;'
                 ' __utmb=223695111.15.10.1533806181'
    }
    urls = ['https://movie.douban.com/top250?start={}&filter='.format(str(i)) for i in range(0,275,25)]
    url = 'https://movie.douban.com/top250?start=0&filter='
    def get(url,data=None):
        wb_data = requests.get(url,headers=headers)
        time.sleep(2)
        Soup = BeautifulSoup(wb_data.text,'lxml')
        paimings =Soup.select('#content > div > div.article > ol > li > div > div.pic > em')
        titles = Soup.select('#content > div > div.article > ol > li > div > div.pic > a > img')
        pingfens = Soup.select('#content > div > div.article > ol > li > div > div.info > div.bd > div > span.rating_num')
        jianpings = Soup.select('#content > div > div.article > ol > li > div > div.info > div.bd > p.quote > span')
        images = Soup.select('#content > div > div.article > ol > li > div > div.pic > a > img')
        # for pingfen in pingfens:
        #     print(pingfen.get_text())
        for title,pingfen,image,paiming,jianping in zip(titles,pingfens,images,paimings,jianpings):
            data = {
                '排名':paiming.get_text(),
                '名称':title.get('alt'),
                '评分':pingfen.get_text(),
                '简评':jianping.get_text(),
                '图片链接':image.get('src')
            }
            print(data)
    # print(urls)
    for smart in urls:
        get(smart)
    
    结果截图
    截图.png

    相关文章

      网友评论

          本文标题:爬取豆瓣电影top250

          本文链接:https://www.haomeiwen.com/subject/hbmcbftx.html