使用了requests+beautifulsoup爬取,并发邮件给自己
#!/usr/bin/env python
# encoding: utf-8
import requests
from bs4 import BeautifulSoup
import sys
import yagmail
reload(sys)
sys.setdefaultencoding('utf8')
"""
获取豆瓣图书 Top 250
"""
# # 获得指定开始排行的图书url
def get_url(root_url, n):
url1 = root_url + str(n * 25)
return url1
def get_review(page_url):
books_list = []
response = requests.get(page_url)
soup = BeautifulSoup(response.text, 'lxml')
# soup = soup.find('div', 'indent')
table = soup.findAll('table', {"width": "100%"})
for item in table:
name = item.div.a.text.strip()
r_name = name.replace('\n', '').replace(' ', '')
tmp2 = item.div.span # 判断是否存在别名
if tmp2:
name2 = tmp2.text.strip().replace(':', '')
else:
name2 = r_name
info = item.find('p', {"class": "pl"}).text
score = item.find('span', {"class": "rating_nums"}).text.strip()
books_list.append((r_name, name2, info, score))
return books_list
def send_mail():
yag = yagmail.SMTP(user='bb@qq.com', password='abcdefg', host='smtp.qq.com')
yag.send(to='bb@qq.com', subject="豆瓣图书 Top 250", contents="豆瓣图书 Top 250",attachments = ["D:\\top250_books.txt"])
print "Send already"
def main():
for n in range(10):
root_url = "https://book.douban.com/top250?start=0"
books_url = get_url(root_url, n)
books_list = get_review(books_url)
with open('D:\\top250_books.txt', 'a') as f:
for books_dict in books_list:
book_info = "图书名称:" + books_dict[0] + "\t" + "图书别名:" + books_dict[1] + "\t" + "作者及出版信息:" + books_dict[
2] + "\t" + "评分:" + books_dict[3] + "\t"
f.write(book_info)
# print book_info
send_mail()
if __name__ == "__main__":
main()
网友评论