美文网首页
python day09 获取豆瓣电影top25集合

python day09 获取豆瓣电影top25集合

作者: michaeljacc | 来源:发表于2016-06-23 15:56 被阅读52次

进阶,获取豆瓣top250的电影.评分,简评

运行结果

/Library/Frameworks/Python.framework/Versions/3.5/bin/python3.5 /Users/wjw/PycharmProjects/class9/豆瓣.py
[
<Movie:
  score = (9.6)
  name = (肖申克的救赎)
  quote = (希望让人自由。)
>, 
<Movie:
  score = (9.4)
  name = (这个杀手不太冷)
  quote = (怪蜀黍和小萝莉不得不说的故事。)
>, 
<Movie:
  score = (9.4)
  name = (阿甘正传)
  quote = (一部美国近现代史。)
>, 
<Movie:
  score = (9.4)
  name = (霸王别姬)
  quote = (风华绝代。)
>, 
<Movie:
  score = (9.5)
  name = (美丽人生)
  quote = (最美的谎言。)
>, 
<Movie:
  score = (9.2)
  name = (千与千寻)
  quote = (最好的宫崎骏,最好的久石让。 )
>, 
<Movie:
  score = (9.4)
  name = (辛德勒的名单)
  quote = (拯救一个人,就是拯救整个世界。)
>, 
<Movie:
  score = (9.2)
  name = (海上钢琴师)
  quote = (每个人都要走一条自己坚定了的路,就算是粉身碎骨。 )
>, 
<Movie:
  score = (9.3)
  name = (机器人总动员)
  quote = (小瓦力,大人生。)
>, 
<Movie:
  score = (9.2)
  name = (盗梦空间)
  quote = (诺兰给了我们一场无法盗取的梦。)
>, 
<Movie:
  score = (9.1)
  name = (泰坦尼克号)
  quote = (失去的才是永恒的。 )
>, 
<Movie:
  score = (9.1)
  name = (三傻大闹宝莱坞)
  quote = (英俊版憨豆,高情商版谢耳朵。)
>, 
<Movie:
  score = (9.2)
  name = (放牛班的春天)
  quote = (天籁一般的童声,是最接近上帝的存在。 )
>, 
<Movie:
  score = (9.2)
  name = (忠犬八公的故事)
  quote = (永远都不能忘记你所爱的人。)
>, 
<Movie:
  score = (9.1)
  name = (大话西游之大圣娶亲)
  quote = (一生所爱。)
>, 
<Movie:
  score = (9.1)
  name = (龙猫)
  quote = (人人心中都有个龙猫,童年就永远不会消失。)
>, 
<Movie:
  score = (9.2)
  name = (教父)
  quote = (千万不要记恨你的对手,这样会让你失去理智。)
>, 
<Movie:
  score = (9.2)
  name = (乱世佳人)
  quote = (Tomorrow is another day.)
>, 
<Movie:
  score = (9.1)
  name = (天堂电影院)
  quote = (那些吻戏,那些青春,都在影院的黑暗里被泪水冲刷得无比清晰。)
>, 
<Movie:
  score = (8.9)
  name = (当幸福来敲门)
  quote = (平民励志片。 )
>, 
<Movie:
  score = (9.0)
  name = (搏击俱乐部)
  quote = (邪恶与平庸蛰伏于同一个母体,在特定的时间互相对峙。)
>, 
<Movie:
  score = (9.0)
  name = (楚门的世界)
  quote = (如果再也不能见到你,祝你早安,午安,晚安。)
>, 
<Movie:
  score = (9.1)
  name = (触不可及)
  quote = (满满温情的高雅喜剧。)
>, 
<Movie:
  score = (9.1)
  name = (指环王3:王者无敌)
  quote = (史诗的终章。)
>, 
<Movie:
  score = (8.9)
  name = (罗马假日)
  quote = (爱情哪怕只有一天。)
>]

Process finished with exit code 0

源代码

import requests
from lxml import html

class Model(object):
    def __repr__(self):
        class_name = self.__class__.__name__
        properties = ('{0} = ({1})'.format(k, v) for k, v in self.__dict__.items())
        return '\n<{0}:\n  {1}\n>'.format(class_name, '\n  '.join(properties))


class Movie(Model):
    def __init__(self):
        super(Movie, self).__init__()
        self.name = ''
        self.score = 0
        self.quote = ''
        self.cover_url = ''


def movie_from_div(div):
    movie = Movie()
    movie.name = div.xpath('.//span[@class="title"]')[0].text
    movie.score = div.xpath('.//span[@class="rating_num"]')[0].text
    movie.quote = div.xpath('.//span[@class="inq"]')[0].text
    img_url = div.xpath('.//div[@class="pic"]/a/img/@src')[0]
    print(img_url)
    movie.cover_url = img_url
    return movie


def movies_from_url(url):
    page = requests.get(url)
    root = html.fromstring(page.content)
    #                         <div class="item">
    movie_divs = root.xpath('//div[@class="item"]')
    # movies = [movie_from_div(div) for div in movie_divs]
    # 上面一行相当于下面四行
    movies = []
    for div in movie_divs:
        movie = movie_from_div(div)
        movies.append(movie)
    return movies


def download_img(url, name):
    r = requests.get(url)
    with open(name, 'wb') as f:
        f.write(r.content)


def save_covers(movies):
    for m in movies:
        download_img(m.cover_url, m.name + '.jpg')


def main():
    url = 'https://movie.douban.com/top250'
    movies = movies_from_url(url)
    print(movies)
    save_covers(movies)



if __name__ == '__main__':
    main()

相关文章

网友评论

      本文标题:python day09 获取豆瓣电影top25集合

      本文链接:https://www.haomeiwen.com/subject/kjtodttx.html