/**
* 抓取微信公众号文章
*
* @param string $url 微信公众号文章链接
* @return mixed
*/
public function search()
{
$is_up_oss = true; //是否将上传到OSS
$label_replace = '<p>'; //标签替换模式 p:替换为P标签 span:替换为span标签
$url = request()->post('url');
try {
//匹配url类型 true:是从微信公众号里复制的连接 false:是从浏览器中搜索的
preg_match("/https:\/\/mp\.weixin\.qq\.com\/s\/[a-zA-Z0-9]*$/", $url, $url_type);
if ($url_type) {
//获取微信页面信息
$wx_content = file_get_contents($url);
//匹配接口地址
$rule = "/(?<=<meta property=\"og:url\" content=\").*?(?=\" \/>)/";
preg_match($rule, $wx_content, $api_url);
$content_url = $api_url[0];
} else {
$content_url = $url;
}
//获取真实文章信息
$wx_article = file_get_contents($content_url);
//匹配标题
$title_rule = "/\<h2 class=\"rich_media_title\".*?id=\"activity-name\".*?\>\K[\s\S]*?(?=\<\/h2\>)/";
preg_match($title_rule, $wx_article, $titles);
$title = trim($titles[0]);
//匹配内容
$content_rule = "/\<div class=\"rich_media_content \".*?id=\"js_content\".*?\>\K[\s\S]*?(?=\<ul id=\"js_hotspot_area\" class=\"article_extend_area\"\>\<\/ul\>)/";
preg_match($content_rule, $wx_article, $contents);
$content = $contents[0];
switch ($label_replace) {
case 'span':
//将块级标签替换为span标签并转为块级元素(这里为了防止百度富文本编辑器转换)
$content = preg_replace(["/<section(.*?)style=\"/","/<div(.*?)style=\"/","/<p(.*?)style=\"/"], ['<span$1style="display:block;','<span$1style="display:block;','<span$1style="display:block;'], $content);
//将闭合标签也替换
$content = strtr($content, ['data-src' => 'src','</section>' => '</span>','</div>' => '</span>','</p>' => '</span>']);
break;
case 'p':
//将块级标签替换为p标签并转为块级元素(配合前端编辑使用,但是板式会错乱)
$content = preg_replace(["/<section/","/<div/","/<span(.*?)style=\"/"], ['<p','<p','<p$1style="display:inline;'], $content);
//将闭合标签也替换
$content = strtr($content, ['</section>' => '</p>','</div>' => '</p>','</span>' => '</p>']);
break;
case '<p>':
//去掉所有的样式
//将块级标签替换为p标签并转为块级元素(配合前端编辑使用,但是板式会错乱)
$content = preg_replace("/(<\/?)(div|p|a|span|section|h1|h2|h3|h4|h5|h6|strong|table|tr|th|td|ul|li)[\s\S]*?(>)/", '$1p$3', $content);
}
$preg_replace = [
'/<p>\s*?(<br>)*?\s*?<\/p>\n?/' => '', //过滤换行符
'/<\/p>\s*?(<br>)*?\s*?<p>\n?/' => '', //过滤两个P标签之间的换行符
'/(<img.*?>)/' => '<p>$1</p>', //将IMG用p标签包裹
];
$content = preg_replace(array_keys($preg_replace), array_values($preg_replace), $content);
//将图片url赋值
$content = strtr($content, ['data-src' => 'src']);
//将有固定宽度的图片这是宽度优先级,防止小图片(比如:表情、箭头)被设置为宽度100%
//开发中
//$content = preg_replace('/(<img[^>]*?)width="([^;"]*)/', '$1 style="width:$2 !important', $content);
//将图片上传至阿里OSS
if ($is_up_oss) {
$img_rule = "/<img.*?src=\"\K[^\"]+/";
preg_match_all($img_rule, $content, $imgs);
$img_strtr = [];
foreach ($imgs[0] as $v) {
//将图片上传至阿里云OSS
$oss_result = Oss::uploadByString(file_get_contents($v), 'wx_tencent_img');
if ($oss_result['state'] == 1) {
$img_strtr[$v] = Img($oss_result['data']['path']);
} else {
//删除上传失败的图片
//将路径中的字符进行转义
$en_v = strtr($v, ["/" => "\/","." => "\.","?" => "\?"]);
$content = preg_replace("/<img.*?src=\"". $en_v ."\".*?>/", "<br>",$content);
}
}
$content = strtr($content, $img_strtr);
}
} catch (\Exception $e) {
return $this->error('匹配失败');
}
return $this->success('成功', '', [
'title' => $title,
'content' => $content
]);
}
网友评论