1.按照所抓取网页的规则去编写
public function comment(){
for ($i = 700; $i <750; $i++){
$aa = $this->caiji($i);
var_dump($i);
}
}
private function caiji($id){
$v = 'http://www.xx.com'.$id;
$info = file_get_contents($url); // 原网址
$ch = curl_init();
$timeout = 5;
curl_setopt ($ch, CURLOPT_URL, $v);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
$content = curl_exec($ch);
$content = mb_convert_encoding($content, 'utf-8', 'GBK,UTF-8,ASCII');
$preg = "/<tr.*?>(.*?)<\/tr>/ism"; // 这里是表达式,大神看看
preg_match_all($preg,$content,$matches);
unset($matches['0']['0']);
unset($matches['0']['1']);
//$aa = $matches['0'];
if($matches['0']){
$dd = '';
$cc = '';
foreach ($matches['0'] as $key => $value) {
preg_match_all("/<td.*?>(.*?)<\/td>/ism",$value,$aa);
$dd[] = $aa;
}
//var_dump($dd);
foreach ($dd as $vv) {
$cc['s_id'] = $vv['1']['1'];
$cc['pid'] = $vv['1']['2'];
$cc['uid'] = $vv['1']['3'];
$cc['name'] = $vv['1']['4'];
$cc['car_type'] = $vv['1']['5'];
$cc['mobile'] = $vv['1']['6'];
$cc['zhan_id'] = $vv['1']['7'];
$cc['city'] = $vv['1']['8'];
$cc['pay'] = $vv['1']['9'];
$cc['title'] = $vv['1']['10'];
$cc['comment'] = $vv['1']['11'];
$cc['time'] = $vv['1']['12'];
$aa = M('Comment_cc')->add($cc);
//var_dump($cc);exit;
}
}
}
2.php 获取文章摘要
function cutArticle($data,$cut=0,$str="....")
{
$data=strip_tags($data);//去除html标记
$pattern = "/&[a-zA-Z]+;/";//去除特殊符号
$data=preg_replace($pattern,'',$data);
if(!is_numeric($cut))
return $data;
if($cut>0)
$data=mb_strimwidth($data,0,$cut,$str);
return $data;
}
网友评论