美文网首页
2019-08-06 爬取豆瓣电影鸟人的所有剧照

2019-08-06 爬取豆瓣电影鸟人的所有剧照

作者: 年画儿 | 来源:发表于2019-08-06 19:09 被阅读0次
    # 爬去豆瓣电影其中一部电影中所有的剧照 例子里用的是《鸟人》
    # 先得到所有urls 这种策略似乎有点愚蠢 如果得不到url就悲剧了
    # 可能边找边保存 或者 用多线程 都会好一些。
    
    
    import requests
    from bs4 import BeautifulSoup
    import time
    import os
    
    
    # 得到所有的网页
    def get_urls(n):
        urls = []
        for i in range(n):
            url ="https://movie.douban.com/subject/20438962/photos?type=S&start=%i"%(i*30) # 鸟人的url
            urls.append(url)
        return(urls)
    
    
    # 得到图片的链接
    def parse_url(urls,headers):
        picture_urls = []
        for ui in urls:
            ri = requests.get(url=ui,headers=headers)
            # print(ri.text)
            soup = BeautifulSoup(ri.text,'lxml')
            ul = soup.find('ul',class_="poster-col3 clearfix")
            lis = ul.find_all('li')
            # print(len(lis))
            for li in lis:
                url_link = li.find("img")["src"]
                picture_urls.append(url_link)
                print(url_link)
    
            # time.sleep(1) # 休息1s
    
        # print(picture_urls)
        return(picture_urls)
            #     print(len(li))
    
    # 保存图片
    def save_pictures(urls,path):
        for pic in urls:
            picture = requests.get(pic)
            name =pic.split("/")[-1]
            savepath = path + '/' + name
    
            with open(savepath,"wb") as f:
                f.write(picture.content)
            print("已经保存" + name)
    
    if __name__ == '__main__':
        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'
        }
        # cookie = {'bid=pEVRx5Atsbg; gr_user_id=55cb2cb6-72d2-4edc-849c-41c97efe6ed1; _vwo_uuid_v2=D6942293ABD2C06C0FD297FF0C094A22F|b6163324a54034fd46a0c0ff38c052dd; push_noty_num=0; push_doumail_num=0; __utmv=30149280.8901; ll="118282"; douban-profile-remind=1; douban-fav-remind=1; ct=y; __utmz=30149280.1564624985.50.9.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided); UM_distinctid=16c4aea9cd774-0a35554905ca2d-37627c02-384000-16c4aea9cd8263; __utmc=30149280; __utmc=223695111; viewed="30463116_1291204_26999123_1072313_26895988_25913349_4237482_30400047_30395230_3584987"; __utma=30149280.1328571580.1561017049.1565080925.1565086173.59; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1565086184%2C%22https%3A%2F%2Fbook.douban.com%2Fsubject_search%3Fsearch_text%3D%25E9%25B8%259F%25E4%25BA%25BA%26cat%3D1001%22%5D; _pk_ses.100001.4cf6=*; __utma=223695111.2042501461.1562549977.1565066378.1565086184.17; __utmb=223695111.0.10.1565086184; __utmz=223695111.1565086184.17.12.utmcsr=book.douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/subject_search; ap_v=0,6.0; gr_session_id_22c937bbd8ebd703f2d8e9445f7dfd03=b71b9e87-9527-42d0-a06d-1c24303c4899; gr_cs1_b71b9e87-9527-42d0-a06d-1c24303c4899=user_id%3A0; __utmt_douban=1; gr_session_id_22c937bbd8ebd703f2d8e9445f7dfd03_b71b9e87-9527-42d0-a06d-1c24303c4899=true; __utmt=1; __utmb=30149280.6.10.1565086173; _pk_id.100001.4cf6=e3f875611916fee4.1562549977.16.1565089087.1565066378.'}
    
        douban_urls = get_urls(51)
        all_pic_urls = parse_url(douban_urls,headers)
    
        save_path = './pictures'
    
        # 如果文件路径不存在即创建
        if not os.path.exists(save_path):
            os.makedirs(save_path)
    
        save_pictures(all_pic_urls,save_path)
    
    

    相关文章

      网友评论

          本文标题:2019-08-06 爬取豆瓣电影鸟人的所有剧照

          本文链接:https://www.haomeiwen.com/subject/ujzwdctx.html