美文网首页
联系方式识别(php版本)

联系方式识别(php版本)

作者: Rohn | 来源:发表于2018-04-28 17:05 被阅读18次
     <?php
    /**
     * @name   联系方式识别
     * @name   识别出来的数字最长为20位,多组满足需求的数字也都提取出来
     * @author Rohn(253133755@qq.com)
     * @date   2018/4/25
     */
    class ContactRecognize
    {
    
        //识别文本
        private $_arr = array();
        //符合条件的数字组合
        private $_box = array();
        //栈数组
        private $_shed = array();
    
        private $_variation_num = array(
            '①' => 1,
            '②' => 2,
            '③' => 3,
            '④' => 4,
            '⑤' => 5,
            '⑥' => 6,
            '⑦' => 7,
            '⑧' => 8,
            '⑨' => 9,
            '㈠' => 1,
            '㈡' => 2,
            '㈢' => 3,
            '㈣' => 4,
            '㈤' => 5,
            '㈥' => 6,
            '㈦' => 7,
            '㈧' => 8,
            '㈨' => 9,
            '⑴' => 1,
            '⑵' => 2,
            '⑶' => 3,
            '⑷' => 4,
            '⑸' => 5,
            '⑹' => 6,
            '⑺' => 7,
            '⑻' => 8,
            '⑼' => 9,
            'Ⅰ' => 1,
            'Ⅱ' => 2,
            'Ⅲ' => 3,
            'Ⅳ' => 4,
            'Ⅴ' => 5,
            'Ⅵ' => 6,
            'Ⅶ' => 7,
            'Ⅷ' => 8,
            'Ⅸ' => 9,
            //简体中文
            '一' => 1,
            '二' => 2,
            '三' => 3,
            '四' => 4,
            '五' => 5,
            '六' => 6,
            '七' => 7,
            '八' => 8,
            '九' => 9,
            '久' => 9,
            //繁体中文
            '零' => 0,
            '壹' => 1,
            '贰' => 2,
            '叁' => 3,
            '肆' => 4,
            '伍' => 5,
            '陆' => 6,
            '柒' => 7,
            '捌' => 8,
            '玖' => 9,
            //字母
            'o' => 0,
            'O' => 0,
            'l' => 1,
            'I' => 1,
        );
    
        //最大匹配的数字长度
        const MAX_NUMBER_LENGTH = 6;
        //状态重置的标记
        const FLAG_RESET = 'reset';
    
        /**
         * ContactRecognize constructor.
         * @param $str
         */
        public function __construct($str){
    
            $this->_arr = $this->_ch2arr($str);
        }
    
        /**
         * 识别主体
         * @return mixed
         */
        public function recognize(){
    
            foreach($this->_arr as $char){
    
                $number = $this->_formatChar($char);
                //干扰字符,忽略
                if($number === false){
                    continue;
                }
                switch($curState){
                    case 0:
                        if($number != self::FLAG_RESET){
                            $curState = $this->_moveState($number, $curState);
                        }
                        break;
                    case 1:
                    case 2:
                    case 3:
                    case 4:
                        $curState = $this->_setState($number, $curState);
                        break;
                    case 5:
                    case 6:
                    case 7:
                    case 8:
                    case 9:
                    case 10:
                    case 11:
                    case 12:
                    case 13:
                    case 14:
                    case 15:
                    case 16:
                    case 17:
                    case 18:
                    case 19:
                        $curState = $this->_setState($number, $curState);
                        break;
                    default:
                        //超过20位的不再做检查,直接做判定处理
                        if(!$this->_isExempt()){
                            array_push($this->_box, $this->_shed);
                        }
                        break;
                }
            }
            //结束检查一次,是否可以把最后一组数据放入box中
            $this->_intoBox($curState);
            if(count($this->_box) > 0){
                //return implode('', $this->_shed);
                return json_encode($this->_box);
            }
    
            return false;
        }
    
        /**
         * 是否豁免
         * a)豁免重复数字,如555555,6666666666
         */
        private function _isExempt(){
    
            if($this->_isAllRepeat()){
                return true;
            }
        }
    
        /**
         * 豁免重复数字,如555555,6666666666
         */
        private function _isAllRepeat(){
    
            return count(array_count_values(array_slice($this->_shed, -self::MAX_NUMBER_LENGTH))) == 1;
        }
    
        /**
         * 设置状态位与数字盒子
         * @param $number
         * @param $curState
         * @return int
         */
        private function _setState($number, $curState){
    
            if($number == self::FLAG_RESET){
    
                $this->_intoBox($curState);
                $curState = $this->_resetState();
            }else{
    
                $curState = $this->_moveState($number, $curState);
            }
    
            return $curState;
        }
    
        /**
         * 重置之前检查是否是全重复,满足条件加入到box中
         * @param $curState
         */
        private function _intoBox($curState){
    
            if($curState >= self::MAX_NUMBER_LENGTH){
                if(!$this->_isExempt()){
                    array_push($this->_box, $this->_shed);
                }
            }
        }
    
        /**
         * 状态前移
         * @param $number
         * @param $curState
         * @return mixed
         */
        private function _moveState($number, $curState){
    
            array_push($this->_shed, $number);
            $curState++;
    
            return $curState;
        }
    
        /**
         * 归初始位
         * @return int
         */
        private function _resetState(){
    
            $this->_shed = array();
            $curState    = 0;
    
            return $curState;
        }
    
        /**
         * 字符格式化
         * @param $char
         * @return
         *  number 数字
         *  FLAG_RESET 重置
         *  false 字符豁免忽略
         */
        private function _formatChar($char){
    
            //普通数字
            if(is_numeric($char)){
                return $char;
            }
            //变种数字
            $rs = $this->_isVariation($char);
            if($rs !== false){
                return $rs;
            }
            //状态重置
            $rs = $this->_isRest($char);
            if($rs !== false){
                return self::FLAG_RESET;
            }
    
            return false;
        }
    
        /**
         * 包含是中文、英文大小写重置
         * @param $char
         * @return bool
         */
        private function _isRest($char){
    
            //英文
            if(preg_match("/[a-zA-Z\s]/", $char)){
                return true;
            }
            //中文
            if(preg_match('/[\x{4e00}-\x{9fa5}]/u', $char) > 0){
                return true;
            }
    
            return false;
        }
    
        /**
         * 是否是变种数字
         * @param $char
         * @return bool|mixed
         */
        private function _isVariation($char){
    
            return isset($this->_variation_num[$char])?$this->_variation_num[$char]:false;
        }
    
        /**
         * 汉字转字符串
         * @param $str
         * @param string $charset
         * @return array
         */
        private function _ch2arr($str, $charset = 'utf-8'){
    
            $length = mb_strlen($str, $charset);
            $array  = array();
            for($i = 0; $i < $length; $i++){
                $array[] = mb_substr($str, $i, 1, $charset);
            }
    
            return $array;
        }
    }
    
    //测试
    $s = '12资源12 零3456哈哈12Ⅶ 34567@a1234567890O00o001';
    $obj = new ContactRecognize($s);
    $s   = $obj->recognize();
    print_r($s);
    

    相关文章

      网友评论

          本文标题:联系方式识别(php版本)

          本文链接:https://www.haomeiwen.com/subject/bdaglftx.html