$url = "https://so.gushiwen.org/shiwenv_94e9aad7a0d2.aspx";
$html = curlget($url);
$regular = "/html/body//a//@href";
$titlexpath = "//h1";
$titles = xpathregular($html,$titlexpath);
for ($i = 0; $i < $titles->length; $i++)
{
$title = $titles->item($i);
$title = $title->nodeValue;
echo $title;
}
function curlget($url)
{
$url = "https://so.gushiwen.org/shiwenv_94e9aad7a0d2.aspx";
$ch = curl_init(); // 2. 设置选项,包括URL
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_SSL_VERIFYPEER,0);
curl_setopt($ch,CURLOPT_SSL_VERIFYHOST,0);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch,CURLOPT_HEADER,0);
// 3. 执行并获取HTML文档内容
$output = curl_exec($ch);
if($output === FALSE )
{
echo "CURL Error:".curl_error($ch);
} // 4. 释放curl句柄
return $output;
curl_close($ch);
}
function xpathregular($html,$regular)
{
$dom = new DOMDocument();
//从一个字符串加载HTML
@$dom->loadHTML($html);
//使该HTML规范化
$dom->normalize();
//用DOMXpath加载DOM,用于查询
$xpath = new DOMXPath($dom);
#获取所有的a标签的地址
$results = $xpath->query($regular);
return $results;
}
网友评论