美文网首页nodejs随记
chrome extensions cdp(通过debugger

chrome extensions cdp(通过debugger

作者: LCSan | 来源:发表于2020-09-25 21:28 被阅读0次

    在爬虫开发过程中,踩过无数的坑,与目标方斗智斗勇。总结来看终归是成本的博弈,不管开发成本,时间成本,空间成本,拟或是其他。方案万千,权衡后低成本拿下才是王道,当成本超过预期也就放弃挣扎了。

    在某些安全至上的行业,时间、空间成本通常只能往后靠。优先安全的情况下,完全模拟用户行为几乎是最为有效的。想想那些各种加密,接口鉴权,请求策略…,为了最后几根头发妥协吧!

    本文是cdp第三种使用方式,目的都是为了嗅探接口数据(为啥嗅探?你去看看强鉴权的网站)。某些变态的网站,selenium cdp、chrome带启动参数cdp都会检测,导致登录或鉴权错误。走投无路的情况下,采用了chrome扩展,通过debugger来实现。

    浏览器启动

    未避免debugger弹调试弹框,启动项增加--silent-debugger-extension-api配置,如下:

    # 跨域、debugger api、指定嗅探扩展目录
    "C:\Program Files (x86)\Google\Chrome\Application\chrome.exe" --disable-web-security --user-data-dir="d:\aaa" --silent-debugger-extension-api --load-extension="d:\嗅探"
    

    manifest.json

    {
       "background": {
          "persistent": true,
          "scripts": [ "background.js" ]
       },
       "browser_action": {
          "default_icon": "icon_38.png"
       },
       "content_security_policy": "script-src 'self' 'unsafe-eval'; object-src 'self'",
       "description": "网页嗅探",
       "icons": {
          "128": "icon.png",
          "16": "icon.png",
          "48": "icon.png"
       },
       "manifest_version": 2,
       "name": "网页嗅探",
       "permissions": [ "debugger", "storage", "notifications", "tts", "webRequest", "webRequestBlocking", "http://*/*","https://*/*", "tabs", "contextMenus", "webNavigation", "clipboardWrite", "clipboardRead" ],
       "short_name": "网页嗅探",
       "version": "2.7"
    }
    

    background.js

    本来是准备用Native Message的,但是太复杂了。索性用nodejs简单搭了个web服务用着,脚手架也方便。

    console.log("开启嗅探");
    
    //循环定时器,获取规则
    var timename = setInterval(initRule, 1500);
    
    // 跨域jsonp
    function xhr(url, method, data, callback) {
        url = url || "http://127.0.0.1:8080/";
        method = method || "POST";
        data = data ? (typeof (data) == "object" ? JSON.stringify(data) : data) : "";
        var xhr = new XMLHttpRequest();
        xhr.open(method, url, true);
        xhr.setRequestHeader("Content-Type", "application/json");
        xhr.onload = callback || function () {
            console.log(this.responseText);
        };
        xhr.send(data);
    }
    
    // 初始化规则
    function initRule() {
        var url = "http://127.0.0.1:8080/rule";
        var callback = function () {
            // 删除定时循环
            clearInterval(timename);
            var rule = this.responseText;
            rule = JSON.parse(rule);
            console.log(rule);
            handleRule(rule);
        };
        xhr(url, "GET", "", callback);
    }
    
    // 规则解析
    function handleRule(rule) {
        // tab页嗅探规则,指定对哪些域名对应的页面嗅探
        var domain = rule["tabDomain"];
        domain.forEach(function (i, b, c) {
            c[b] = i.replace(/\./ig, "\\.");
        });
        domain = domain.join("|");
        var w_Domain = new RegExp("^https?:\\/\\/(" + domain + ")");
        var b_Domain = new RegExp("^https?:\\/\\/(?!" + domain + ")");
        // 嗅探url过滤
        var fu_Filters = rule["fetchUrlFilters"];
        fu_Filters.forEach(function (i, b, c) {
            c[b] = new RegExp(i);
        });
        initListener(w_Domain, b_Domain, fu_Filters);
    }
    
    // 过滤嗅探到的url,
    function fetchUrlFilter(fu_Filters, url) {
        for (ft in fu_Filters) {
            if (fu_Filters[ft].test(url)) {
                return true;
            }
        }
        return false;
    }
    
    // 创建监听
    function initListener(w_Domain, b_Domain, fu_Filters) {
        // 全局tab缓存
        var tab_cache = {};
    
        // tab页存在更新动作
        chrome.tabs.onUpdated.addListener(function (id, info, tab) {
            console.log(id + ":tab 更新\t" + JSON.stringify(info));
            //var index = global_tab.indexOf(id);
            var index = tab_cache[id + ""];
            // 释放监听
            if (info.status == "loading" && index && b_Domain.test(tab["url"])) {
                chrome.debugger.detach({
                    "tabId": id
                }, function () {
                    console.log(id + ":tab debugger解绑");
                    // 删除监控,这里不安全,异步没有保护。可能其他删除,导致下标不一致。
                    delete tab_cache[id + ""]["fetch_urls"];
                    delete tab_cache[id + ""]["webSocket_urls"];
                    delete tab_cache[id + ""];
                });
            } else if (info.status == "loading" && !index && w_Domain.test(tab["url"])) {
                // 添加监听
                tab_cache[id + ""] = {};
                tab_cache[id + ""]["fetch_urls"] = {};
                tab_cache[id + ""]["webSocket_urls"] = {};
    
                chrome.debugger.attach({
                    "tabId": id
                }, "1.0", function () {
                    console.log(id + ":tab debugger绑定");
                    chrome.debugger.sendCommand({
                        "tabId": id
                    }, "Network.enable", {}, function () {
                        console.log(id + ":tab Network.enable");
                        chrome.debugger.onEvent.addListener(function (source, method, params) {
                            console.log(id + ":tab debugger event fetch");
                            var requestId = params.requestId;
                            var fetch_urls = tab_cache[id + ""]["fetch_urls"];
                            var webSocket_urls = tab_cache[id + ""]["webSocket_urls"];
    
                            switch (method) {
                                case "Network.requestWillBeSent":
                                    var feg = fetchUrlFilter(fu_Filters, params.request.url);
                                    if (feg) {
                                        fetch_urls[requestId] = {};
                                        fetch_urls[requestId]["request"] = params["request"];
                                    }
                                    break;
                                case "Network.responseReceived":
                                    if (fetch_urls[requestId]) {
                                        fetch_urls[requestId]["ResponseHeaders"] = params["response"]["headers"];
                                    }
                                    break;
                                case "Network.loadingFinished":
                                    if (fetch_urls[requestId]) {
                                        console.log(method + "\t" + fetch_urls[requestId].request
                                            .url);
                                        chrome.debugger.sendCommand(source,
                                            "Network.getResponseBody", {
                                                "requestId": requestId
                                            },
                                            function (response) {
                                                var body = {};
                                                fetch_urls[requestId]["tabId"] = source.tabId;
                                                fetch_urls[requestId]["response"] = response;
                                                var callback = function () {
                                                    console.log(this.responseText);
                                                    // 删除requestId,减小缓存
                                                    delete fetch_urls[requestId];
                                                };
                                                // 传输嗅探结果
                                                xhr(null, null, fetch_urls[requestId],
                                                    callback);
                                            });
                                    }
                                    break;
                                case "Network.webSocketCreated":
                                    var feg = fetchUrlFilter(fu_Filters, params.url);
                                    if (feg) {
                                        webSocket_urls[requestId] = params.url;
                                    }
                                    break;
                                case "Network.webSocketFrameReceived":
                                    if (webSocket_urls[requestId]) {
                                        var data = params;
                                        data["url"] = webSocket_urls[params.requestId];
                                        data["tabId"] = source.tabId;
                                        // 传输嗅探结果,websocket是长链接,不能删缓存
                                        xhr(null, null, data);
                                    }
                                    break;
                                default:
                                    break;
                            }
                        });
                    });
                });
            }
        });
    
        // Cleanup the variables when a tab is closed
        chrome.tabs.onRemoved.addListener(function (id) {
            console.log(id + ":tab 关闭");
            var index = tab_cache[id + ""];
            if (index) {
                delete tab_cache[id + ""]["fetch_urls"];
                delete tab_cache[id + ""]["webSocket_urls"];
                delete tab_cache[id + ""];
            }
        });
    }
    

    rule.json 嗅探规则

    每当浏览器启动时,扩展会每1.5秒请求一次rule规则,直到请求到为止。

    {
        "tabDomain": [
            "xxxx.xxxx.com",
            "xxxx.xxxx.com"
        ],
        "fetchUrlFilters": [
            "\\.json",
        ]
    }
    

    rule.json说明:
    1、浏览器嗅规则,扩展在启动后每隔1.5秒获取一次,请求成功则停止获取。如果想新规则生效,则重启浏览器。
    2、浏览器启动参数附带--silent-debugger-extension-api,解决浏览器弹调试框问题。
    3、规则:
    tabDomain:要嗅探的域名,这里必须是完整的域名。对应浏览器的地址栏的url,进行域名过滤。tab刷新会实时监控,不用担心重复会丢失嗅探。
    fetchUrlFilters:要抓取的目标请求地址,正则表达式,用来过滤出要抓取的内容。

    最后

    嗅探的结构会post到web服务,至于怎么处理嗅探结果,就具体分析了。

    相关文章

      网友评论

        本文标题:chrome extensions cdp(通过debugger

        本文链接:https://www.haomeiwen.com/subject/npqquktx.html