美文网首页Python新世界
Python实现新浪微博登陆!验证码?反爬?在我这里是不存在的!

Python实现新浪微博登陆!验证码?反爬?在我这里是不存在的!

作者: 919b0c54458f | 来源:发表于2018-09-15 18:59 被阅读36次

    抓包分析

    首先打开charles,记录从打开浏览器到新浪微博登陆成功的全部http请求

    打开新浪微博,等待页面加载完成后,输入账号密码点击登陆,charles停止抓包,关闭浏览器。并将抓包结果进行保存。

    找到登陆的POST请求https://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.19)

    登陆POST请求

    私信小编01-04即可获取数十套PDF哦!

    理论上我们只需要能完整的提交这个表单就能实现新浪微博的登陆。但是如果进行试验的话,会发现将该表单完整复制之后使用requests进行post提交是无法登陆的,所以可以断定其中某些字段是通过动态获取。

    由于新浪微博的首页内容太多太杂

    我们将上文中拿到的登陆post请求https://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.19)进行访问发现是一个单独的登陆页面。

    登陆页面

    打开F12,对登陆按钮进行定位,根据前后台交互的方式可以知道后台应该是通过中某一个内容判断用户点击了登陆按钮,在Source中的js代码部分检索的type:submit。

    按钮定位

    js代码查找登陆操作

    初步断定该js中进行了一些加密操作

    username转换

    根据命名规则尝试检索用户名username很容易的找到了一段username的转换操作

    this.prelogin = function(config, callback) {

    var url = location.protocol == "https:" ? ssoPreLoginUrl.replace(/^http:/, "https:") : ssoPreLoginUrl;

    var username = config.username || "";

    username = sinaSSOEncoder.base64.encode(urlencode(username));

    delete config.username;

    var arrQuery = {

    entry: me.entry,

    callback: me.name + ".preloginCallBack",

    su: username,

    rsakt: "mod"

    };

    从username = sinaSSOEncoder.base64.encode(urlencode(username));可以看出来,用户名经过了url编码后再进行了base64转码,从键值对可以看出来su提交的就是转码后的账号

    使用python实现

    def get_username(self):

    username_quote = urllib.parse.quote_plus(self.user_name)

    username_base64 = base64.b64encode(username_quote.encode("utf-8"))

    return username_base64.decode("utf-8")

    password转换

    下面检索password一下子就发现了关键的一句话,而且明目张胆的写着RSA

    password = RSAKey.encrypt([me.servertime, me.nonce].join(" ") + " " + password)

    要进行RSA加密需要公钥PublicKey,检索一下Public找到了公钥RSAKey.setPublic(me.rsaPubkey, "10001");这样还不够,再找找看me.rsaPubkey是什么东西me.rsaPubkey = result.pubkey;所以应是返回值中有带咯,在charles里面找一下pubkey

    公钥

    这个返回值中有很多眼熟的东西servertime,nonce都在这里面了。记下这个有用的url:

    https://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=&rsakt=mod&client=ssologin.js(v1.4.19)&_=1536460959875

    sinaSSOController.preloginCallBack

    ({

    "retcode":0,

    "servertime":1536460961,

    "pcid":"gz-1c8cc52b95dad5397635083e4ddcd33994aa",

    "nonce":"42KG80",

    "pubkey":"EB2A38568661887FA180BDDB5CABD5F21C7BFD59C090CB2D245A87AC253062882729293E5506350508E7F9AA3BB77F4333231490F915F6D63C55FE2F08A49B353F444AD3993CACC02DB784ABBB8E42A9B1BBFFFB38BE18D78E87A0E41B9B8F73A928EE0CCEE1F6739884B9777E4FE9E88A1BBE495927AC4A799B3181D6442443",

    "rsakv":"1330428213",

    "uid":"2239053435",

    "exectime":6

    })

    这个url有点复杂,看到了?,&等内容使用requests里面get的params=传入

    def get_json_data(self, su_value):

    params = {

    "entry": "weibo",

    "callback": "sinaSSOController.preloginCallBack",

    "rsakt": "mod",

    "checkpin": "1",

    "client": "ssologin.js(v1.4.18)",

    "su": su_value,

    "_": int(time.time()*1000),

    }

    try:

    response = self.session.get("http://login.sina.com.cn/sso/prelogin.php", params=params)

    json_data = json.loads(re.search(r"((?P.*))", response.text).group("data"))

    except Exception as excep:

    json_data = {}

    logging.error("WeiBoLogin get_json_data error: %s", excep)

    logging.debug("WeiBoLogin get_json_data: %s", json_data)

    return json_data

    根据password = RSAKey.encrypt([me.servertime, me.nonce].join(" ") + " " + password)用python写一下rsa加密

    def get_password(self, servertime, nonce, pubkey):

    string = (str(servertime) + "" + str(nonce) + "" + str(self.pass_word)).encode("utf-8")

    public_key = rsa.PublicKey(int(pubkey, 16), int("10001", 16))

    password = rsa.encrypt(string, public_key)

    password = binascii.b2a_hex(password)

    return password.decode()

    文章开头的表单的动态内容都动态获取了接下来就是post表单提交了,然后拿一下user_uniqueid和user_nick就可以(用你的账户)爬必须登录才能获取的数据了

    login_url_1 = "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)&_=%d" % int(time.time())

    json_data_1 = self.session.post(login_url_1, data=post_data).json()

    if json_data_1["retcode"] == "0":

    params = {

    "callback": "sinaSSOController.callbackLoginStatus",

    "client": "ssologin.js(v1.4.18)",

    "ticket": json_data_1["ticket"],

    "ssosavestate": int(time.time()),

    "_": int(time.time()*1000),

    }

    response = self.session.get("https://passport.weibo.com/wbsso/login", params=params)

    json_data_2 = json.loads(re.search(r"((?P.*))", response.text).group("result"))

    if json_data_2["result"] is True:

    self.user_uniqueid = json_data_2["userinfo"]["uniqueid"]

    self.user_nick = json_data_2["userinfo"]["displayname"]

    logging.warning("WeiBoLogin succeed: %s", json_data_2)

    else:

    logging.warning("WeiBoLogin failed: %s", json_data_2)

    else:

    logging.warning("WeiBoLogin failed: %s", json_data_1)

    return True if self.user_uniqueid and self.user_nick else False

    登录结果

    完整代码来自:https://github.com/xianhu/LearnPython/blob/master/python_wechat.py

    #coding=utf-8

    import re

    import rsa

    import time

    import json

    import base64

    import logging

    import binascii

    import requests

    import urllib.parse

    class WeiBoLogin(object):

    """

    class of WeiBoLogin, to login weibo.com

    """

    def __init__(self):

    """

    constructor

    """

    self.user_name = None

    self.pass_word = None

    self.user_uniqueid = None

    self.user_nick = None

    self.session = requests.Session()

    self.session.headers.update({"User-Agent": "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0"})

    self.session.get("http://weibo.com/login.php")

    return

    def login(self, user_name, pass_word):

    """

    login weibo.com, return True or False

    """

    self.user_name = user_name

    self.pass_word = pass_word

    self.user_uniqueid = None

    self.user_nick = None

    # get json data

    s_user_name = self.get_username()

    json_data = self.get_json_data(su_value=s_user_name)

    if not json_data:

    return False

    s_pass_word = self.get_password(json_data["servertime"], json_data["nonce"], json_data["pubkey"])

    # make post_data

    post_data = {

    "entry": "weibo",

    "gateway": "1",

    "from": "",

    "savestate": "7",

    "userticket": "1",

    "vsnf": "1",

    "service": "miniblog",

    "encoding": "UTF-8",

    "pwencode": "rsa2",

    "sr": "1280*800",

    "prelt": "529",

    "url": "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack",

    "rsakv": json_data["rsakv"],

    "servertime": json_data["servertime"],

    "nonce": json_data["nonce"],

    "su": s_user_name,

    "sp": s_pass_word,

    "returntype": "TEXT",

    }

    # get captcha code

    if json_data["showpin"] == 1:

    url = "http://login.sina.com.cn/cgi/pin.php?r=%d&s=0&p=%s" % (int(time.time()), json_data["pcid"])

    with open("captcha.jpeg", "wb") as file_out:

    file_out.write(self.session.get(url).content)

    code = input("请输入验证码:")

    post_data["pcid"] = json_data["pcid"]

    post_data["door"] = code

    # login weibo.com

    login_url_1 = "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)&_=%d" % int(time.time())

    json_data_1 = self.session.post(login_url_1, data=post_data).json()

    if json_data_1["retcode"] == "0":

    params = {

    "callback": "sinaSSOController.callbackLoginStatus",

    "client": "ssologin.js(v1.4.18)",

    "ticket": json_data_1["ticket"],

    "ssosavestate": int(time.time()),

    "_": int(time.time()*1000),

    }

    response = self.session.get("https://passport.weibo.com/wbsso/login", params=params)

    json_data_2 = json.loads(re.search(r"((?P.*))", response.text).group("result"))

    if json_data_2["result"] is True:

    self.user_uniqueid = json_data_2["userinfo"]["uniqueid"]

    self.user_nick = json_data_2["userinfo"]["displayname"]

    logging.warning("WeiBoLogin succeed: %s", json_data_2)

    else:

    logging.warning("WeiBoLogin failed: %s", json_data_2)

    else:

    logging.warning("WeiBoLogin failed: %s", json_data_1)

    return True if self.user_uniqueid and self.user_nick else False

    def get_username(self):

    """

    get legal username

    """

    username_quote = urllib.parse.quote_plus(self.user_name)

    username_base64 = base64.b64encode(username_quote.encode("utf-8"))

    return username_base64.decode("utf-8")

    def get_json_data(self, su_value):

    """

    get the value of "servertime", "nonce", "pubkey", "rsakv" and "showpin", etc

    """

    params = {

    "entry": "weibo",

    "callback": "sinaSSOController.preloginCallBack",

    "rsakt": "mod",

    "checkpin": "1",

    "client": "ssologin.js(v1.4.18)",

    "su": su_value,

    "_": int(time.time()*1000),

    }

    try:

    response = self.session.get("http://login.sina.com.cn/sso/prelogin.php", params=params)

    json_data = json.loads(re.search(r"((?P.*))", response.text).group("data"))

    except Exception as excep:

    json_data = {}

    logging.error("WeiBoLogin get_json_data error: %s", excep)

    logging.debug("WeiBoLogin get_json_data: %s", json_data)

    return json_data

    def get_password(self, servertime, nonce, pubkey):

    """

    get legal password

    """

    string = (str(servertime) + "" + str(nonce) + "" + str(self.pass_word)).encode("utf-8")

    public_key = rsa.PublicKey(int(pubkey, 16), int("10001", 16))

    password = rsa.encrypt(string, public_key)

    password = binascii.b2a_hex(password)

    return password.decode()

    if __name__ == "__main__":

    logging.basicConfig(level=logging.DEBUG, format="%(asctime)s%(levelname)s%(message)s")

    weibo = WeiBoLogin()

    weibo.login("username", "password")

    相关文章

      网友评论

      本文标题:Python实现新浪微博登陆!验证码?反爬?在我这里是不存在的!

      本文链接:https://www.haomeiwen.com/subject/dfytnftx.html