这只是引擎的主要部分,接下来要做的就是把相关信息存入数据库,然后接着对所有获取的连接再去检索,然后把相关信息再存入数据库,那么核心部分就是我们获取了这些信息之后根据信息内容来设定网站的关键字,然后给他一个排名,供以后搜索。设定网站的排名和关键字只能你自己去想了
<?php
class Engine{
private $_url = ''; //URL地址
private $_sites = ''; //页面信息
public function __construct($url){
$this->_url = $url;
}
//启动引擎
public function start(){
//$content = $this->socketOpen($this->_url);
$content = $this->getContent($this->_url);
$this->_sites['url'] = $this->_url;
$this->_sites['meta'] = $this->getMeta($content);
// $this->_sites['title'] = $this->getTitle($content);
$this->_sites['detail'] = $this->getDetail($content);
$this->_sites['links'] = $this->getLinks($content);
}
//获取meta内容
public function getMeta($content){
$file = 'metaCache';
file_put_contents($file,$content);
$meta = get_meta_tags($file);
return $meta;
}
//获取body内容
public function getDetail($content){
preg_match('/<body>(.*?)<\/body>/i',$content,$matchs);
$body = $this->stripHTML($matchs[1]);
return substr($body,0,400);
}
//获取a链接
public function getLinks($content){
$pat = '/<a[^>](.*?)href="(.*?)"(.*?)>(.*?)<\/a>/i';
preg_match_all($pat,$content,$matchs);
$result['href'] = $matchs[2];
$result['name'] = $this->stripTags($matchs[4]);
return $result;
}
//Socket监听
public function socketOpen($url){
$fp = fsockopen($url,80,$errno,$errstr,30);
if($fp === false){
echo "连接失败:$errstr($errno)<br/>";
return false;
}
else{
$out = "GET/HTTP/1.1\r\n";
$out .= "Host:$url\r\n";
$out .= "Connection:Close\r\n";
fwrite($fp,$out);
$content = '';
while(!feof($fp)){
$content .= fgets($fp,1024);
}
fclose($fp);
var_dump($content);exit;
return $content;
}
}
//获取指定url内容
public function getContent($url){
$ch = @curl_init($url);
@curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.0)");
ob_start();
$result = @curl_exec($ch);
$content = ob_get_clean();
ob_end_clean();
@curl_close($ch);
return $content;
}
//取出script和style标签
public function stripHTML($string){
$pat = array(
"/<script[^>].*?>.*?<\/script>/i",
"/<style[^>].*?>.*?<\/style>/i"
);
$rep = array('','');
return preg_replace($pat,$rep,$string);
}
//去除数组元素的标签
public function stripTags(&$arr){
foreach ($arr as $key => $val )
{
if(is_array($val)){
$this->stripTags($arr[$key]);
}
else{
$arr[$key] = strip_tags($val);
}
}
return $arr;
}
function show(){
echo "<pre>";
print_r($this->_sites);
echo "</pre>";
}
//End Class Engine
}
$engine = new Engine('http://www.igift.hk');
$engine->start();
$engine->show();
?>
网友评论