方式1(来自网络)
/**
* html 转纯文本
* @param $str
* @return mixed
*/
function html2text($str) {
$str = preg_replace("/<style .*?<\\/style>/is", "", $str);
$str = preg_replace("/<script .*?<\\/script>/is", "", $str);
$str = preg_replace("/<br \\s*\\/>/i", ">>>>", $str);
$str = preg_replace("/<\\/?p>/i", ">>>>", $str);
$str = preg_replace("/<\\/?td>/i", "", $str);
$str = preg_replace("/<\\/?div>/i", ">>>>", $str);
$str = preg_replace("/<\\/?blockquote>/i", "", $str);
$str = preg_replace("/<\\/?li>/i", ">>>>", $str);
$str = preg_replace("/ /i", " ", $str);
$str = preg_replace("/ /i", " ", $str);
$str = preg_replace("/&/i", "&", $str);
$str = preg_replace("/&/i", "&", $str);
$str = preg_replace("/</i", "<", $str);
$str = preg_replace("/</i", "<", $str);
$str = preg_replace("/“/i", '"', $str);
$str = preg_replace("/&ldquo/i", '"', $str);
$str = preg_replace("/‘/i", "'", $str);
$str = preg_replace("/&lsquo/i", "'", $str);
$str = preg_replace("/'/i", "'", $str);
$str = preg_replace("/&rsquo/i", "'", $str);
$str = preg_replace("/>/i", ">", $str);
$str = preg_replace("/>/i", ">", $str);
$str = preg_replace("/”/i", '"', $str);
$str = preg_replace("/&rdquo/i", '"', $str);
$str = strip_tags($str);
$str = html_entity_decode($str, ENT_QUOTES, "utf-8");
$str = preg_replace("/&#.*?;/i", "", $str);
return $str;
}
输出结果:
$str='<li><a id="blog_nav_sitehome" class="menu" href="http://www.cnblogs.com/">博客园</a></li>';
echo html2text($str);
输出:
>>>>博客园>>>>
如果li标记不完整
$str='li><a id="blog_nav_sitehome" class="menu" href="http://www.cnblogs.com/">博客园</a></li';
echo html2text($str);
输出:
li>博客园
方式2,采用html2text类
链接地址:http://www.chuggnutt.com/html2text
怎么下载,见文章,这里就不描述了
require_once "./html2text.class.php";
$str='<li><a id="blog_nav_sitehome" class="menu" href="http://www.cnblogs.com/">博客园</a></li>';
$h2t =new html2text($str);
$text = $h2t->get_text();
echo $text;
输出:
* 博客园 [1] Links: ------ [1] http://www.cnblogs.com/
如果li标记不完整
$str='li><a id="blog_nav_sitehome" class="menu" href="http://www.cnblogs.com/">博客园</a></li';
$h2t =new html2text($str);
$text = $h2t->get_text();
echo $text;
输出:
li>博客园 [1] Links: ------ [1] http://www.cnblogs.com/
注:具体采用那个,个人自己衡量就行~
网友评论