美文网首页aardio
aardio xpath抽取

aardio xpath抽取

作者: LCSan | 来源:发表于2023-10-24 18:40 被阅读0次

    引入.Net的HtmlAgilityPack库,实现html,xml文档的xpath抽取

    import dotNet;
    
    namespace myplu
    
    class xpath{
        ctor( html ){
            this = ..HtmlAgilityPack.HtmlDocument();
            this.LoadHtml(html);    
            this[["select"]] = function(xpath,node){
                node := owner.DocumentNode;
                return node.SelectSingleNode(xpath).OuterHtml;
            };
            this[["selectList"]] = function(xpath,node){
                node := owner.DocumentNode;
                var nodes = node.SelectNodes(xpath); 
                var res = {}; 
                for(i=1;nodes.Count;1){
                    ..table.insert(res,nodes[i].OuterHtml); 
                }       
                return res;     
            };  
        };
        /*对象的所有成员必须用分号分隔*/
        
    }
    
    
    namespace xpath
    
    var assembly = ..dotNet.load("HtmlAgilityPack",$"~\lib\myplu\xpath\.res\HtmlAgilityPack.dll");
    assembly.import("HtmlAgilityPack");
    
    /*****intellisense()
    myplu.xpath = 导入xpath解析库
    myplu.xpath(__) = 创建xpath解析器。\n@1html或者xml文本内容\n!xpathNode.
    end intellisense*****/
    
    /*****intellisense(!xpathNode)
    select(.(xpath,查找节点) = 从@2指定的节点中执行@1查找,返回第一个匹配结果字符。
    selectList(.(xpath,查找节点) = 从@2指定的节点中执行@1查找,返回所有匹配结果字符。
    DocumentNode = document元素\n!htmlNode.
    end intellisense*****/
    
    /*****intellisense(!htmlNode)
    OuterHtml = 元素内容
    SelectNodes(__) = xpath查找所有结果。\n!htmlNodeCollection.
    SelectSingleNode(__) = xpath查找第一个匹配的结果。\n!htmlNode.
    end intellisense*****/
    
    
    /*****intellisense(!htmlNodeCollection)
    each() = @for(i=1;??.Count;1){
        //console.log(??[i].OuterHtml)
        /**循环取值**/
    }
    Count = 元素个数
    end intellisense*****/
    
    

    相关文章

      网友评论

        本文标题:aardio xpath抽取

        本文链接:https://www.haomeiwen.com/subject/camsidtx.html