美文网首页
Python爬取豆瓣图书250

Python爬取豆瓣图书250

作者: 李白开水 | 来源:发表于2020-02-24 23:06 被阅读0次

    使用了requests+beautifulsoup爬取,并发邮件给自己

    #!/usr/bin/env python
    # encoding: utf-8
    import requests
    from bs4 import BeautifulSoup
    import sys
    import yagmail
    
    reload(sys)
    sys.setdefaultencoding('utf8')
    
    """
     获取豆瓣图书 Top 250
    """
    
    
    # # 获得指定开始排行的图书url
    def get_url(root_url, n):
        url1 = root_url + str(n * 25)
        return url1
    
    
    def get_review(page_url):
        books_list = []
        response = requests.get(page_url)
        soup = BeautifulSoup(response.text, 'lxml')
        # soup = soup.find('div', 'indent')
        table = soup.findAll('table', {"width": "100%"})
        for item in table:
            name = item.div.a.text.strip()
            r_name = name.replace('\n', '').replace(' ', '')
            tmp2 = item.div.span  # 判断是否存在别名
            if tmp2:
                name2 = tmp2.text.strip().replace(':', '')
            else:
                name2 = r_name
            info = item.find('p', {"class": "pl"}).text
            score = item.find('span', {"class": "rating_nums"}).text.strip()
            books_list.append((r_name, name2, info, score))
        return books_list
    
    
    def send_mail():
        yag = yagmail.SMTP(user='bb@qq.com', password='abcdefg', host='smtp.qq.com')
        yag.send(to='bb@qq.com', subject="豆瓣图书 Top 250", contents="豆瓣图书 Top 250",attachments = ["D:\\top250_books.txt"])
        print "Send already"
    
    
    def main():
        for n in range(10):
            root_url = "https://book.douban.com/top250?start=0"
            books_url = get_url(root_url, n)
            books_list = get_review(books_url)
            with open('D:\\top250_books.txt', 'a') as f:
                for books_dict in books_list:
                    book_info = "图书名称:" + books_dict[0] + "\t" + "图书别名:" + books_dict[1] + "\t" + "作者及出版信息:" + books_dict[
                        2] + "\t" + "评分:" + books_dict[3] + "\t"
                    f.write(book_info)
                    # print book_info
        send_mail()
    
    
    if __name__ == "__main__":
        main()
    

    相关文章

      网友评论

          本文标题:Python爬取豆瓣图书250

          本文链接:https://www.haomeiwen.com/subject/yzzjuctx.html