引入.Net的HtmlAgilityPack库,实现html,xml文档的xpath抽取
import dotNet;
namespace myplu
class xpath{
ctor( html ){
this = ..HtmlAgilityPack.HtmlDocument();
this.LoadHtml(html);
this[["select"]] = function(xpath,node){
node := owner.DocumentNode;
return node.SelectSingleNode(xpath).OuterHtml;
};
this[["selectList"]] = function(xpath,node){
node := owner.DocumentNode;
var nodes = node.SelectNodes(xpath);
var res = {};
for(i=1;nodes.Count;1){
..table.insert(res,nodes[i].OuterHtml);
}
return res;
};
};
/*对象的所有成员必须用分号分隔*/
}
namespace xpath
var assembly = ..dotNet.load("HtmlAgilityPack",$"~\lib\myplu\xpath\.res\HtmlAgilityPack.dll");
assembly.import("HtmlAgilityPack");
/*****intellisense()
myplu.xpath = 导入xpath解析库
myplu.xpath(__) = 创建xpath解析器。\n@1html或者xml文本内容\n!xpathNode.
end intellisense*****/
/*****intellisense(!xpathNode)
select(.(xpath,查找节点) = 从@2指定的节点中执行@1查找,返回第一个匹配结果字符。
selectList(.(xpath,查找节点) = 从@2指定的节点中执行@1查找,返回所有匹配结果字符。
DocumentNode = document元素\n!htmlNode.
end intellisense*****/
/*****intellisense(!htmlNode)
OuterHtml = 元素内容
SelectNodes(__) = xpath查找所有结果。\n!htmlNodeCollection.
SelectSingleNode(__) = xpath查找第一个匹配的结果。\n!htmlNode.
end intellisense*****/
/*****intellisense(!htmlNodeCollection)
each() = @for(i=1;??.Count;1){
//console.log(??[i].OuterHtml)
/**循环取值**/
}
Count = 元素个数
end intellisense*****/
网友评论