美文网首页
敏感词处理

敏感词处理

作者: 门口的黑猫 | 来源:发表于2021-10-15 16:00 被阅读0次
      <?php
    
    namespace App\Http\Services;
    
    use App\Http\Models\SensitiveWords;
    use Illuminate\Support\Facades\DB;
    
    class SensitiveService extends Service
    {
        protected static $dict; // 敏感词字典
        public static $sensitiveWords; // 已匹配到的敏感词
    
        /**
         * 敏感词校验
         *
         * @param string $content 字符串内容
         * @param boolean $isReplace 是否替换敏感词
         * @param string $replaceWord 敏感词替换字符
         * @param boolean $isReturnSensitive 是否返回匹配到的敏感词数组
         * @return bool | array
         */
        public static function isSensitivePost($content, $isReplace = false, $replaceWord = '*', $isReturnSensitive = false)
        {
            $content = trim($content);
            if (!$content) {
                return '';
            }
    
            $filterResult = self::filter($content, 3, 0, $isReplace, $replaceWord);
            if (self::$sensitiveWords) {
                SensitiveWords::whereIn('word', self::$sensitiveWords)->increment('violated_times');
            }
    
            if ($isReturnSensitive) {
                return self::$sensitiveWords;
            }
    
            return $filterResult;
        }
    
    
        /**
         * 敏感词校验
         *
         * @param string $str 需要校验的字符串
         * @param int $level 屏蔽词校验等级 1-只要顺序包含都屏蔽;2-中间间隔skipDistance个字符就屏蔽;3-全词匹配屏蔽
         * @param int $skipDistance 允许敏感词跳过的最大距离,eg: 【笨aa蛋】 $skipDistance为2时捕获,$skipDistance为1时不捕获
         * @param bool $isReplace 是否需要替换,不需要的话,返回是否有敏感词,否则返回被替换后的字符串
         * @param string $replace 替换字符
         * @return bool|string
         */
        public static function filter($str, $level = 1, $skipDistance = 0, $isReplace = true, $replace = '*')
        {
            if (!self::$dict) {
                self::loadSensitiveWords();
            }
    
            // 允许跳过的最大距离
            $maxDistance = 0;
            if ($level == 1) {
                $maxDistance = strlen($str);
            }
            if ($level == 2) {
                $maxDistance = max($skipDistance, 0);
            }
    
            $strArr = self::splitStr($str);
            $strLength = count($strArr);
            $hasSensitiveWords = false;
            for ($i = 0; $i < $strLength; $i++) {
                // 判断当前敏感字是否有存在对应节点
                $curChar = $strArr[$i];
                if (!isset(self::$dict[$curChar])) {
                    continue;
                }
    
                $curNode = &self::$dict[$curChar];
                $dist = 0;
                $matchIndex = [$i]; // 匹配后续字符串是否match剩余敏感词
                for ($j = $i + 1; $j < $strLength && $dist < $maxDistance + 1; $j++) {
                    if (!isset($curNode[$strArr[$j]])) {
                        $dist++;
                        continue;
                    }
    
                    $matchIndex[] = $j;
                    $curNode = &$curNode[$strArr[$j]];
                }
    
                // 判断是否已经到敏感词字典结尾,是的话,进行敏感词替换
                if (isset($curNode['end'])) {
                    $words = '';
                    foreach ($matchIndex as $index) {
                        $words .= $strArr[$index];
                        $strArr[$index] = $replace;
                    }
    
                    $hasSensitiveWords = true;
                    self::$sensitiveWords[] = $words;
                    $i = max($matchIndex);
                }
            }
    
            if ($isReplace) {
                return implode('', $strArr);
            }
    
            return $hasSensitiveWords;
        }
    
    
        /**
         * 加载敏感词字典
         */
        private static function loadSensitiveWords()
        {
            $cacheKey = RedisService::KEY_GAME_SENSITIVE_WORDS;
            $sensitiveWords = RedisService::get($cacheKey);
            if (empty($sensitiveWords)) {
                $sensitiveWords = SensitiveWords::pluck('word')->toArray();
                RedisService::set($cacheKey, $sensitiveWords);
            }
    
            //将敏感词加入节点
            foreach ($sensitiveWords as $value) {
                self::addWords(trim($value));
            }
        }
    
    
        /**
         * 添加敏感字至节点
         *
         * @param $words
         */
        private static function addWords($words)
        {
            $wordArr = self::splitStr($words);
            $curNode = &self::$dict;
            foreach ($wordArr as $char) {
                if (!isset($curNode)) {
                    $curNode[$char] = [];
                }
                $curNode = &$curNode[$char];
            }
    
            $curNode['end'] = true;
        }
    
    
        /**
         * 分割文本
         *
         * @param $str
         * @return array
         */
        private static function splitStr($str)
        {
            return preg_split("//u", $str, -1, PREG_SPLIT_NO_EMPTY);
        }
    
    
        /**
         * 敏感词列表
         *
         * @param string $keyWord
         * @return mixed
         */
        public static function getSensitiveList($keyWord = '')
        {
            return SensitiveWords::where('word', 'like', '%'.$keyWord.'%')
                ->orderby('violated_times', 'desc')
                ->paginate(self::DEFAULT_PER_PAGE);
        }
    
    
        /**
         * 添加敏感词
         *
         * @param $userId
         * @param $words
         * @throws \Exception
         */
        public static function addSensitiveWords($userId, $words)
        {
            DB::beginTransaction();
            try {
                $words = array_unique($words);
                $wordsExists = SensitiveWords::whereIn('word', $words)->pluck('word')->toArray();
                if (!empty($wordsExists)) {
                    $words = array_diff($words, $wordsExists);
                }
    
                if (!empty($words)) {
                    $insData = [];
                    foreach ($words as $word) {
                        $insData[] = [
                            'word'          =>  $word,
                            'user_id'       =>  $userId,
                            'created_at'    =>  date('Y-m-d H:i:s')
                        ];
                    }
                    SensitiveWords::insert($insData);
    
                    self::refreshSensitiveWords();
                }
    
                DB::commit();
            } catch (\Exception $e) {
                DB::rollBack();
    
                throw new \Exception(RespService::ERR_MSG_DEFAULT);
            }
        }
    
    
        /**
         * 修改敏感词
         *
         * @param $id
         * @param $word
         * @throws \Exception
         */
        public static function updSensitiveWords($id, $word)
        {
            DB::beginTransaction();
            try {
                SensitiveWords::where('id', $id)->update(['word' => $word]);
    
                self::refreshSensitiveWords();
    
                DB::commit();
            } catch (\Exception $e) {
                DB::rollBack();
    
                throw new \Exception(RespService::ERR_MSG_DEFAULT);
            }
        }
    
    
        /**
         * 删除敏感词
         *
         * @param $id
         * @throws \Exception
         */
        public static function delSensitiveWords($id)
        {
            DB::beginTransaction();
            try {
                SensitiveWords::destroy($id);
    
                self::refreshSensitiveWords();
    
                DB::commit();
            } catch (\Exception $e) {
                DB::rollBack();
    
                throw new \Exception(RespService::ERR_MSG_DEFAULT);
            }
        }
    
    
        /**
         * 更新敏感词库
         */
        private static function refreshSensitiveWords()
        {
            $cacheKey = RedisService::KEY_GAME_SENSITIVE_WORDS;
            $sensitiveWords = SensitiveWords::pluck('word')->toArray();
    
            RedisService::set($cacheKey, $sensitiveWords);
        }
    
    
    }
    
    

    相关文章

      网友评论

          本文标题:敏感词处理

          本文链接:https://www.haomeiwen.com/subject/ycumoltx.html