<?php
/**
* @name 联系方式识别
* @name 识别出来的数字最长为20位,多组满足需求的数字也都提取出来
* @author Rohn(253133755@qq.com)
* @date 2018/4/25
*/
class ContactRecognize
{
//识别文本
private $_arr = array();
//符合条件的数字组合
private $_box = array();
//栈数组
private $_shed = array();
private $_variation_num = array(
'①' => 1,
'②' => 2,
'③' => 3,
'④' => 4,
'⑤' => 5,
'⑥' => 6,
'⑦' => 7,
'⑧' => 8,
'⑨' => 9,
'㈠' => 1,
'㈡' => 2,
'㈢' => 3,
'㈣' => 4,
'㈤' => 5,
'㈥' => 6,
'㈦' => 7,
'㈧' => 8,
'㈨' => 9,
'⑴' => 1,
'⑵' => 2,
'⑶' => 3,
'⑷' => 4,
'⑸' => 5,
'⑹' => 6,
'⑺' => 7,
'⑻' => 8,
'⑼' => 9,
'Ⅰ' => 1,
'Ⅱ' => 2,
'Ⅲ' => 3,
'Ⅳ' => 4,
'Ⅴ' => 5,
'Ⅵ' => 6,
'Ⅶ' => 7,
'Ⅷ' => 8,
'Ⅸ' => 9,
//简体中文
'一' => 1,
'二' => 2,
'三' => 3,
'四' => 4,
'五' => 5,
'六' => 6,
'七' => 7,
'八' => 8,
'九' => 9,
'久' => 9,
//繁体中文
'零' => 0,
'壹' => 1,
'贰' => 2,
'叁' => 3,
'肆' => 4,
'伍' => 5,
'陆' => 6,
'柒' => 7,
'捌' => 8,
'玖' => 9,
//字母
'o' => 0,
'O' => 0,
'l' => 1,
'I' => 1,
);
//最大匹配的数字长度
const MAX_NUMBER_LENGTH = 6;
//状态重置的标记
const FLAG_RESET = 'reset';
/**
* ContactRecognize constructor.
* @param $str
*/
public function __construct($str){
$this->_arr = $this->_ch2arr($str);
}
/**
* 识别主体
* @return mixed
*/
public function recognize(){
foreach($this->_arr as $char){
$number = $this->_formatChar($char);
//干扰字符,忽略
if($number === false){
continue;
}
switch($curState){
case 0:
if($number != self::FLAG_RESET){
$curState = $this->_moveState($number, $curState);
}
break;
case 1:
case 2:
case 3:
case 4:
$curState = $this->_setState($number, $curState);
break;
case 5:
case 6:
case 7:
case 8:
case 9:
case 10:
case 11:
case 12:
case 13:
case 14:
case 15:
case 16:
case 17:
case 18:
case 19:
$curState = $this->_setState($number, $curState);
break;
default:
//超过20位的不再做检查,直接做判定处理
if(!$this->_isExempt()){
array_push($this->_box, $this->_shed);
}
break;
}
}
//结束检查一次,是否可以把最后一组数据放入box中
$this->_intoBox($curState);
if(count($this->_box) > 0){
//return implode('', $this->_shed);
return json_encode($this->_box);
}
return false;
}
/**
* 是否豁免
* a)豁免重复数字,如555555,6666666666
*/
private function _isExempt(){
if($this->_isAllRepeat()){
return true;
}
}
/**
* 豁免重复数字,如555555,6666666666
*/
private function _isAllRepeat(){
return count(array_count_values(array_slice($this->_shed, -self::MAX_NUMBER_LENGTH))) == 1;
}
/**
* 设置状态位与数字盒子
* @param $number
* @param $curState
* @return int
*/
private function _setState($number, $curState){
if($number == self::FLAG_RESET){
$this->_intoBox($curState);
$curState = $this->_resetState();
}else{
$curState = $this->_moveState($number, $curState);
}
return $curState;
}
/**
* 重置之前检查是否是全重复,满足条件加入到box中
* @param $curState
*/
private function _intoBox($curState){
if($curState >= self::MAX_NUMBER_LENGTH){
if(!$this->_isExempt()){
array_push($this->_box, $this->_shed);
}
}
}
/**
* 状态前移
* @param $number
* @param $curState
* @return mixed
*/
private function _moveState($number, $curState){
array_push($this->_shed, $number);
$curState++;
return $curState;
}
/**
* 归初始位
* @return int
*/
private function _resetState(){
$this->_shed = array();
$curState = 0;
return $curState;
}
/**
* 字符格式化
* @param $char
* @return
* number 数字
* FLAG_RESET 重置
* false 字符豁免忽略
*/
private function _formatChar($char){
//普通数字
if(is_numeric($char)){
return $char;
}
//变种数字
$rs = $this->_isVariation($char);
if($rs !== false){
return $rs;
}
//状态重置
$rs = $this->_isRest($char);
if($rs !== false){
return self::FLAG_RESET;
}
return false;
}
/**
* 包含是中文、英文大小写重置
* @param $char
* @return bool
*/
private function _isRest($char){
//英文
if(preg_match("/[a-zA-Z\s]/", $char)){
return true;
}
//中文
if(preg_match('/[\x{4e00}-\x{9fa5}]/u', $char) > 0){
return true;
}
return false;
}
/**
* 是否是变种数字
* @param $char
* @return bool|mixed
*/
private function _isVariation($char){
return isset($this->_variation_num[$char])?$this->_variation_num[$char]:false;
}
/**
* 汉字转字符串
* @param $str
* @param string $charset
* @return array
*/
private function _ch2arr($str, $charset = 'utf-8'){
$length = mb_strlen($str, $charset);
$array = array();
for($i = 0; $i < $length; $i++){
$array[] = mb_substr($str, $i, 1, $charset);
}
return $array;
}
}
//测试
$s = '12资源12 零3456哈哈12Ⅶ 34567@a1234567890O00o001';
$obj = new ContactRecognize($s);
$s = $obj->recognize();
print_r($s);
网友评论