今天用requests写了一个简书点赞的小爬虫来玩玩。没有使用多线程,进程等,就是单纯的requests.
"""
----------
version=1.0
------------
"""
import requests
from lxml import etree
from time import time
headers = {
# 用自己的账号的header
}
def fetch_url(url, page):
data = {'page': page}
r = requests.post(url, headers=headers, data=data)
return r.content
# 解析出来的文章url,加入到队列中
def parse(url, pages):
article_urllist = []
article_xpath = "//li/div[@class='content']/a[@class='title']"
for page_number in range(1, pages + 1):
try:
page = fetch_url(url, page_number)
html = etree.HTML(page)
article_list = html.xpath(article_xpath)
for article in article_list:
article_urllist.append(f"https://www.jianshu.com{article.get('href')}")
except:
print('失败')
return article_urllist
# 请求网站,点赞
def dianzan(article_url):
try:
a = requests.get(article_url, headers=headers)
except:
print('请求文章页面失败')
html = etree.HTML(a.content)
dianzan_path = "//meta[@property='al:android:url']"
url_part = html.xpath(dianzan_path)[0].get('content')
# https://www.jianshu.com/notes/38734391/like
url = f'https://www.jianshu.com{url_part[9:]}/like'
# 点赞
try:
requests.post(url, headers=headers)
except:
print('点赞失败')
def run(url, page):
n = 0
article_list = parse(url, page)
for article_url in article_list:
dianzan(article_url)
n = n + 1
print(f'点赞文章数量{n}')
if __name__ == '__main__':
url = 'https://www.jianshu.com/trending_notes'
pages = 1
start = time()
run(url, pages)
end = time()
print(f'{end - start}')
image.png
网友评论