美文网首页
模拟登陆知乎并批量下载问题关注者的头像、ID、签名

模拟登陆知乎并批量下载问题关注者的头像、ID、签名

作者: 你清澈又神秘 | 来源:发表于2017-09-27 17:09 被阅读67次

    import re
    import requests
    import http.cookiejar as cookielib
    from PIL import Image
    import time
    import os
    import json

    登录内容

    login_url = 'https://www.zhihu.com/#signin'
    meizi_url = 'https://www.zhihu.com/topic/19565769/followers'
    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'
    }
    session = requests.session()
    session.cookies = cookielib.LWPCookieJar(filename='zhihucookies')
    try:
    session.cookies.load(ignore_discard=True)
    except:
    print('Cookie未加载')

    def get_xsrf():
    index_url = 'https://www.zhihu.com/topic/19565769/followers'
    index_page = session.get(index_url, headers=headers).text
    _xsrf = re.findall('name="_xsrf" value="(.*?)"', index_page)
    return _xsrf[0]
    def get_captcha():
    t = str(int(time.time() * 1000))
    captcha_url = 'https://www.zhihu.com/captcha.gif?r=' + t + "&type=login"
    r = session.get(captcha_url, headers=headers)
    with open('captcha.jpg', 'wb') as f:
    f.write(r.content)
    f.close()
    try:
    im = Image.open('captcha.jpg')
    im.show()
    im.close()
    except:
    print(u'请到 %s 目录找到captcha.jpg 手动输入' % os.path.abspath('captcha.jpg'))
    captcha = input('请输入验证码\n> ')
    return captcha

    def isLogin():
    url1 = 'https://www.zhihu.com/settings/profile'
    html = session.get(url1, headers=headers, allow_redirects=False)
    login_code = html.status_code
    if login_code == 200:
    return True
    else:
    return False
    def login(secret, account):
    _xsrf = get_xsrf()
    if re.match(r'^1\d{10}$', account):
    print('login by phone! \n')
    post_url = 'https://www.zhihu.com/login/phone_num'
    postdata = {
    '_xsrf': _xsrf,
    'password': secret,
    'phone_num': account
    }
    else:
    if '@' in account:
    print('login by mail! \n')
    else:
    print('你的账号输入有问题,请重新登录')
    return 0
    post_url = 'https://www.zhihu.com/login/email'
    postdata = {
    '_xsrf': _xsrf,
    'password': secret,
    'email': account
    }
    login_page = session.post(post_url, data=postdata, headers=headers)
    login_code = login_page.json()
    if login_code['r'] == 1:
    postdata['captcha'] = get_captcha()
    login_page = session.post(post_url, data=postdata, headers=headers)
    login_code = login_page.json()
    print(login_code['msg'])
    print(login_code)
    session.cookies.save()

    获取美女头像和ID

    def getMzUrl(page):
    startime = int(time.time())
    pages = page * 20
    count = 1
    for off in range(0, pages+20, 20):
    formdata = {
    "start": startime,
    "offset": off
    }
    headers1 = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
    'Host': 'www.zhihu.com',
    'Origin': 'https://www.zhihu.com',
    'Referer': 'https://ww.zhihu.com/topic/19565769/followers',
    'X-Requested-With': 'XMLHttpRequest',
    'X-Xsrftoken': get_xsrf()
    }
    meizi_page = json.loads(session.post(meizi_url, data=formdata, headers=headers1).text)
    print(meizi_page)
    userinfos = re.findall(r'avatar-img-50.?src="(.?)".?<a href.?author-link">(.*?)</a>', str(meizi_page), re.S)
    print(userinfos)
    for userinfo in userinfos:
    os.chdir('E:/pylearn/rsa/知乎妹子头像和id和签名')
    image = requests.get(userinfo[0])
    with open(userinfo[1]+'.jpg', 'wb') as file:
    file.write(image.content)
    count += 1
    print("用户头像下载地址:" + userinfo[0])
    print("用户名:" + userinfo[1])
    print('图片总数:' + str(count))
    print(startime)

    if name == 'main':
    if isLogin():
    print('已登录.')
    getMzUrl(10)
    else:
    account = input('请输入用户名\n> ')
    secret = input('请输入密码\n> ')
    login(secret, account)

    相关文章

      网友评论

          本文标题:模拟登陆知乎并批量下载问题关注者的头像、ID、签名

          本文链接:https://www.haomeiwen.com/subject/flguextx.html