美文网首页aardio
aardio xpath抽取

aardio xpath抽取

作者: LCSan | 来源:发表于2023-10-24 18:40 被阅读0次

引入.Net的HtmlAgilityPack库,实现html,xml文档的xpath抽取

import dotNet;

namespace myplu

class xpath{
    ctor( html ){
        this = ..HtmlAgilityPack.HtmlDocument();
        this.LoadHtml(html);    
        this[["select"]] = function(xpath,node){
            node := owner.DocumentNode;
            return node.SelectSingleNode(xpath).OuterHtml;
        };
        this[["selectList"]] = function(xpath,node){
            node := owner.DocumentNode;
            var nodes = node.SelectNodes(xpath); 
            var res = {}; 
            for(i=1;nodes.Count;1){
                ..table.insert(res,nodes[i].OuterHtml); 
            }       
            return res;     
        };  
    };
    /*对象的所有成员必须用分号分隔*/
    
}


namespace xpath

var assembly = ..dotNet.load("HtmlAgilityPack",$"~\lib\myplu\xpath\.res\HtmlAgilityPack.dll");
assembly.import("HtmlAgilityPack");

/*****intellisense()
myplu.xpath = 导入xpath解析库
myplu.xpath(__) = 创建xpath解析器。\n@1html或者xml文本内容\n!xpathNode.
end intellisense*****/

/*****intellisense(!xpathNode)
select(.(xpath,查找节点) = 从@2指定的节点中执行@1查找,返回第一个匹配结果字符。
selectList(.(xpath,查找节点) = 从@2指定的节点中执行@1查找,返回所有匹配结果字符。
DocumentNode = document元素\n!htmlNode.
end intellisense*****/

/*****intellisense(!htmlNode)
OuterHtml = 元素内容
SelectNodes(__) = xpath查找所有结果。\n!htmlNodeCollection.
SelectSingleNode(__) = xpath查找第一个匹配的结果。\n!htmlNode.
end intellisense*****/


/*****intellisense(!htmlNodeCollection)
each() = @for(i=1;??.Count;1){
    //console.log(??[i].OuterHtml)
    /**循环取值**/
}
Count = 元素个数
end intellisense*****/

相关文章

网友评论

    本文标题:aardio xpath抽取

    本文链接:https://www.haomeiwen.com/subject/camsidtx.html