美文网首页
spider.js chrome扩展

spider.js chrome扩展

作者: LCSan | 来源:发表于2021-07-06 10:22 被阅读0次
    spider = function(e){
        this.ele = e || document;
    }
    spider.prototype.$x = function(xpath){
        var res = this.get(function(pr){
            var doc = (pr && pr.ownerDocument) || document;
            var result = doc.evaluate(xpath, pr || doc, null, XPathResult.ANY_TYPE, null);
            switch (result.resultType) {
            case XPathResult.NUMBER_TYPE:
                return result.numberValue;
            case XPathResult.STRING_TYPE:
                return result.stringValue;
            case XPathResult.BOOLEAN_TYPE:
                return result.booleanValue;
            default:
                var nodes = [];
                var node;
                while (node = result.iterateNext()){
                    nodes.push(node);
                }
                return nodes;
            }
        });
        return new spider(res);
    }
    spider.prototype.$ = function(css, ext){
        var res = this.get(function(pr){
            var doc = (pr && pr.contentDocument) || pr || document;
            var temp = [];
            doc.querySelectorAll(css).forEach(function(item){
                if(ext){
                    var res;
                    try{
                        res = eval(`item.${ext}`);
                    }catch(err){
                        res = eval(`item${ext}`);
                    }
                    temp.push(res);
                }else{
                    temp.push(item);
                }
            });
            return temp;
        });
        return new spider(res);
    }
    spider.prototype.$j = function jsonPath(expr, arg) {
       var P = {
          resultType: arg && arg.resultType || "VALUE",
          result: [],
          normalize: function(expr) {
             var subx = [];
             return expr.replace(/[\['](\??\(.*?\))[\]']/g, function($0,$1){return "[#"+(subx.push($1)-1)+"]";})
                        .replace(/'?\.'?|\['?/g, ";")
                        .replace(/;;;|;;/g, ";..;")
                        .replace(/;$|'?\]|'$/g, "")
                        .replace(/#([0-9]+)/g, function($0,$1){return subx[$1];});
          },
          asPath: function(path) {
             var x = path.split(";"), p = "$";
             for (var i=1,n=x.length; i<n; i++)
                p += /^[0-9*]+$/.test(x[i]) ? ("["+x[i]+"]") : ("['"+x[i]+"']");
             return p;
          },
          store: function(p, v) {
             if (p) P.result[P.result.length] = P.resultType == "PATH" ? P.asPath(p) : v;
             return !!p;
          },
          trace: function(expr, val, path) {
             if (expr) {
                var x = expr.split(";"), loc = x.shift();
                x = x.join(";");
                if (val && val.hasOwnProperty(loc))
                   P.trace(x, val[loc], path + ";" + loc);
                else if (loc === "*")
                   P.walk(loc, x, val, path, function(m,l,x,v,p) { P.trace(m+";"+x,v,p); });
                else if (loc === "..") {
                   P.trace(x, val, path);
                   P.walk(loc, x, val, path, function(m,l,x,v,p) { typeof v[m] === "object" && P.trace("..;"+x,v[m],p+";"+m); });
                }
                else if (/,/.test(loc)) { // [name1,name2,...]
                   for (var s=loc.split(/'?,'?/),i=0,n=s.length; i<n; i++)
                      P.trace(s[i]+";"+x, val, path);
                }
                else if (/^\(.*?\)$/.test(loc)) // [(expr)]
                   P.trace(P.eval(loc, val, path.substr(path.lastIndexOf(";")+1))+";"+x, val, path);
                else if (/^\?\(.*?\)$/.test(loc)) // [?(expr)]
                   P.walk(loc, x, val, path, function(m,l,x,v,p) { if (P.eval(l.replace(/^\?\((.*?)\)$/,"$1"),v[m],m)) P.trace(m+";"+x,v,p); });
                else if (/^(-?[0-9]*):(-?[0-9]*):?([0-9]*)$/.test(loc)) // [start:end:step]  phyton slice syntax
                   P.slice(loc, x, val, path);
             }
             else
                P.store(path, val);
          },
          walk: function(loc, expr, val, path, f) {
             if (val instanceof Array) {
                for (var i=0,n=val.length; i<n; i++)
                   if (i in val)
                      f(i,loc,expr,val,path);
             }
             else if (typeof val === "object") {
                for (var m in val)
                   if (val.hasOwnProperty(m))
                      f(m,loc,expr,val,path);
             }
          },
          slice: function(loc, expr, val, path) {
             if (val instanceof Array) {
                var len=val.length, start=0, end=len, step=1;
                loc.replace(/^(-?[0-9]*):(-?[0-9]*):?(-?[0-9]*)$/g, function($0,$1,$2,$3){start=parseInt($1||start);end=parseInt($2||end);step=parseInt($3||step);});
                start = (start < 0) ? Math.max(0,start+len) : Math.min(len,start);
                end   = (end < 0)   ? Math.max(0,end+len)   : Math.min(len,end);
                for (var i=start; i<end; i+=step)
                   P.trace(i+";"+expr, val, path);
             }
          },
          eval: function(x, _v, _vname) {
             try { return $ && _v && eval(x.replace(/@/g, "_v")); }
             catch(e) { throw new SyntaxError("jsonPath: " + e.message + ": " + x.replace(/@/g, "_v").replace(/\^/g, "_a")); }
          }
       };
    
    
        var res = this.get(function(pr){        
            var $ = pr;
            $ = $.constructor == Object ? $ : JSON.parse($);
            if (expr && $ && (P.resultType == "VALUE" || P.resultType == "PATH")) {
                P.trace(P.normalize(expr).replace(/^\$;/,""), $, "$");
                return P.result;
            }
            return pr;
        });
        return new spider(res);
    } 
    
    spider.prototype.filter = function(reg, ext){
        var res = this.get(function(pr){
            var ag = spider.prototype.str(pr,ext);
            if(reg.test(ag)){
                return pr;
            }
            return false;
        });
        return new spider(res);
    }
    spider.prototype.replace = function(reg, toScp, ext){
        var res = this.get(function(pr){
            var ag = spider.prototype.str(pr,ext);
            return ag.replace(reg, toScp);
        });
        return new spider(res);
    }
    spider.prototype.split = function(reg, ext){
        var res = this.get(function(pr){
            var ag = spider.prototype.str(pr,ext);
            return ag.split(reg);
        });
        return new spider(res);
    }
    spider.prototype.regex = function(reg, ext){
        var res = this.get(function(pr){
            var ag = spider.prototype.str(pr,ext);
            var re = ag.match(reg);
            re = (re && re.length>1) ? re.slice(1) : re;
            return re;
        });
        return new spider(res);
    }
    spider.prototype.mix = function(expression) {
        return eval(`this.${expression}`)
    }
    spider.prototype.str = function(t,ext){
        if(!t){
            return;
        }
        if(t.constructor == Array || t.constructor == Object){
            for(i in t){
                t[i] = this.str(t[i],ext);
            }
            if(t.constructor == Array && t.length == 1 && t[0].constructor != Array){
                return t[0];
            }
            return t;
        }
        if(ext){
            var res;
            try{
                res = eval(`t.${ext}`);
            }catch(err){
                res = eval(`t${ext}`);
            }
            return res;
        }
        if(t.nodeType){
            switch (t.nodeType){
            case Node.ELEMENT_NODE:
                // 1 元素
                return t.outerHTML;
            default:
                return t.value || t.nodeValue;
            }
        }
        return t;
    }
    spider.prototype.get = function(callback){
        if(!this.ele){
            return;
        }
        if(this.ele.constructor == Array){
            var res = [];
            for(item in this.ele){
                re = callback ? callback(this.ele[item]) : this.ele[item];
                if(re){
                    if(re.constructor == Array){
                        res = res.concat(re);
                    }else{
                        res.push(re);
                    }
                }
            }
            return res;
        }
        var re = callback ? callback(this.ele) : this.ele;
        return re;
    }
    spider.prototype.re = function(ext){
        var re = this.get(function(pr){
            return spider.prototype.str(pr,ext);
        });
        if(re.constructor == Array && re.length == 1){
            return re[0];
        }
        return re;
    }
    spider.prototype.json = function(ext){
        return JSON.stringify(this.re(ext));
    }
    spider.prototype.more = function(exps, tip) {
        if(exps.constructor == Array || exps.constructor == Object){
            // 处理表达式
            exps.forEach(function(a,b,c){
                if(a.constructor == Array){
                    a.forEach(function(a1,b1,c1){
                        c1[b1]=`'${a1.replace(/'/ig,"\\'")}'`;
                    });
                    c[b] = `t.${tip.toLowerCase()}(${a.join(",")})`;
                }else{
                    c[b] = `t.${tip.toLowerCase()}('${a.replace(/'/ig,"\\'")}')`;
                }
            });
            var res = this.get(function(pr){
                var t = new spider(pr);
                var temp = exps.constructor == Object ? {} : [];
                for(i in exps){
                    var exp = eval(exps[i]);
                    if(exp.constructor == spider){
                        exp = exp.ele;
                    }
                    temp[i] = exp;
                }
                if(exps.constructor == Array){
                    temp = [[temp]];
                }
                return temp;
            });
            return new spider(res);
        }
        return this;
    }
    

    相关文章

      网友评论

          本文标题:spider.js chrome扩展

          本文链接:https://www.haomeiwen.com/subject/szzwultx.html