安装模块
xpath
xmldom
htmlparser2
const xpath = require('xpath')
const DOMParser = require('xmldom').DOMParser
const htmlparser2 = require('htmlparser2')
const domParser = new DOMParser({
errorHandler: {
warning: w => {
// console.warn(w)
},
error: e => {
// console.error(e)
},
fatalError: e => {
// console.error(e)
}
}
})
function loadPage(url) {
var http = require('https');
var pm = new Promise(function (resolve, reject) {
http.get(url, function (res) {
var html = '';
res.on('data', function (d) {
html += d.toString()
});
res.on('end', function () {
resolve(html, 'end');
});
}).on('error', function (e) {
reject(e)
});
});
return pm;
}
loadPage('https://www.baidu.com/').then(function (html) {
// console.log(html);
// 不用用 htmlparser2 转换直接解析有时候会报错
const outerHTML = htmlparser2.DomUtils.getOuterHTML(htmlparser2.parseDOM(html))
// console.log(outerHTML);
let doc = domParser.parseFromString(outerHTML)
// console.log(doc);
let nodes = xpath.select("//a[@href='http://home.baidu.com']", doc)
console.log(nodes[0]);
console.log(nodes[0].localName + ": " + nodes[0].lastChild.data)
console.log("Node: " + nodes[0].toString())
});
网友评论