敏感词过滤,提前将敏感词设置好,然后每次有新句子都要过滤一遍,若存在敏感词,则用*号代替,代码如下
package main
import (
"fmt"
"unicode/utf8"
)
type Trie struct {
child map[rune]*Trie
word string
}
func NewTrie() *Trie {
return &Trie{
child: make(map[rune]*Trie),
word: "",
}
}
func (trie *Trie) insert(word string) *Trie {
cur := trie
for _, v := range []rune(word) {
// 若存在,不做处理,若不存在,创建新的子树
if _, ok := cur.child[v]; !ok {
t := NewTrie()
cur.child[v] = t
}
cur = cur.child[v]
}
cur.word = word
return trie
}
func (trie *Trie) filterString(word string) string {
cur := trie
for i, v := range []rune(word) {
if _, ok := cur.child[v]; ok {
cur = cur.child[v]
if cur.word != "" {
word = replaceStr(word, "*", i+1-utf8.RuneCountInString(cur.word), i)
cur = trie // ,符合条件,从头开始准备下一次遍历
}
} else {
cur = trie // 不存在,则从头遍历
}
}
return word
}
func replaceStr(word, replace string, left, right int) string {
str := ""
for i, v := range []rune(word) {
if i >= left && i <= right {
str = str + replace
} else {
str += string(v)
}
}
return str
}
func main() {
trie := NewTrie()
trie.insert("sb").insert("狗日").insert("cnm").insert("狗日的").insert("c").insert("nm")
fmt.Println(trie.filterString("狗头,你就是个狗日的,我要cnm,你个sb,嘿嘿"))
}
网友评论