需要 phpQuery
phpquery下载地址:
phpquery下载地址:
https://github.com/TobiaszCudnik/phpquery
require('../phpQuery/phpQuery.php');
$page = 5;
//ini_set('Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36');
$max_count = 10;//保存一次的数据量
$mysql_server_name='127.0.0.1'; //改成自己的mysql数据库服务器
$mysql_username='root'; //改成自己的mysql数据库用户名
$mysql_password='123456'; //改成自己的mysql数据库密码
$mysql_database='test'; //改成自己的mysql数据库名
ini_set('memory_limit', '500M');
while ($page >=1) {
$url="http://jandan.net/pic/page-".$page."#comments";
set_time_limit(30);
$ch=curl_init($url);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
$data = curl_exec($ch);
curl_close($ch);
$doc = phpQuery::newDocumentHTML($data);
phpQuery::selectDocument($doc);
echo $page.PHP_EOL;
$temp_arr = array();
//file_put_contents('jiandan_' . date("Y-m-d_H") . '.txt', sprintf("%s %s %s", date("Y-m-d H:i:s"),($doc), PHP_EOL), FILE_APPEND);
foreach (pq('.commentlist li') as $key => $value) {
$img_url = pq($value)->find('img')->attr('src');
$like = pq($value)->find('.tucao-like-container')->find('span')->text();
$unlike = pq($value)->find('.tucao-unlike-container')->find('span')->text();
if (empty($like)) {
$like=0;
}
if (empty($unlike)) {
$unlike=0;
}
$type= 'png';
if (stripos($img_url,'.gif')!== false) {
$type='gif';
}
$img_url = str_replace('thumb180', 'mw690', $img_url);
$img_url = str_replace('//w', 'w', $img_url);
$name = 'jiandan/'.$page.'_'.$key.'.'.$type;
echo $img_url.PHP_EOL;
httpcopy($img_url, $name, $timeout=60);
$temp_arr[$key]['img_name'] = $name;
$temp_arr[$key]['like'] = $like;
$temp_arr[$key]['unlike'] = $unlike;
$temp_arr[$key]['ctime'] = time();
}
if (!empty($temp_arr)) {
$conn=mysql_connect($mysql_server_name,$mysql_username,$mysql_password) or die("error connecting") ; //连接数据库
mysql_query("set names 'utf8'"); //数据库输出编码
mysql_select_db($mysql_database); //打开数据库
$sql = "insert into jiandan (img_name,like_quantity,unlike_quantity,ctime) values";
foreach ($temp_arr as $key => $value) {
$time = time();
$sql .= " ('".$value['img_name']."',".$value['like'].",".$value['unlike'].",".$value['ctime']."),";
}
$sql = substr($sql,0,strlen($sql)-1);
$r = mysql_query($sql);
mysql_close(); //关闭MySQL连接
$temp_arr = array();
}
$page --;
}
$url="http://jandan.net/pic/page-".$page."#comments";
function httpcopy($url, $file="", $timeout=60) {
set_time_limit(60);
$file = empty($file) ? pathinfo($url,PATHINFO_BASENAME) : $file;
$dir = pathinfo($file,PATHINFO_DIRNAME);
!is_dir($dir) && @mkdir($dir,0755,true);
$url = str_replace(" ","%20",$url);
if(function_exists('curl_init')) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
$temp = curl_exec($ch);
if(@file_put_contents($file, $temp) && !curl_error($ch)) {
return $file;
} else {
return false;
}
} else {
$opts = array(
"http"=>array(
"method"=>"GET",
"header"=>"",
"timeout"=>$timeout)
);
$context = stream_context_create($opts);
if(@copy($url, $file, $context)) {
//$http_response_header
return $file;
} else {
return false;
}
}
}
网友评论