login.py 用于登录验证
# -*- coding: utf-8-*-
'''
@Description:获取cookie
'''
from zhihu_oauth import ZhihuClient
from zhihu_oauth.exception import NeedCaptchaException
client = ZhihuClient()
user = '保密'
pwd = '保密'
try:
client.login(user, pwd)
print(u"登陆成功!")
except NeedCaptchaException: # 处理要验证码的情况
# 保存验证码并提示输入,重新登录
with open('a.gif', 'wb') as f:
f.write(client.get_captcha())
captcha = input('please input captcha:')
client.login(user, pwd, captcha)
client.save_token('token.pkl') # 保存token
pic.py 用于爬取图片
# -*- coding: utf-8-*-
'''
@Description:保存知乎某个问题下所有答案的图片
'''
from __future__ import print_function
from zhihu_oauth import ZhihuClient
import re
import os
import urllib
client = ZhihuClient()
# 登录
client.load_token('token.pkl') # 加载token文件
# id = 327588950 # https://www.zhihu.com/question/287345713/answer/714145378
# id = 322665913
# id = 22462004
id = 20312271
question = client.question(id)
print(u"问题:", question.title)
print(u"回答数量:", question.answer_count)
# 建立存放图片的文件夹
os.mkdir(question.title.replace('?', '') + u"(图片)")
# os.mkdir('你们被骗子骗过钱吗?')
path = question.title.replace('?', '') + u"(图片)"
index = 1 # 图片序号
for answer in question.answers:
content = answer.content # 回答内容
re_compile = re.compile(r'<img src="(https://pic\d\.zhimg\.com/.*?\.(jpg|png))".*?>')
img_lists = re.findall(re_compile, content)
if (img_lists):
for img in img_lists:
img_url = img[0] # 图片url
urllib.request.urlretrieve(img_url, path + u"/%d.jpg" % index)
print(u"成功保存第%d张图片" % index)
index += 1
网友评论