gitee.com/h79/goutils@v1.22.10/sensitive/filter.go (about) 1 package sensitive 2 3 import ( 4 "bufio" 5 "gitee.com/h79/goutils/common/trie" 6 "io" 7 "net/http" 8 "os" 9 "regexp" 10 "time" 11 ) 12 13 // Filter 敏感词过滤器 14 type Filter struct { 15 regexp *Regexp 16 trie *trie.Trie 17 noise *regexp.Regexp 18 } 19 20 // New 返回一个敏感词过滤器 21 func New(reg bool) *Filter { 22 noise := regexp.MustCompile(`[\\|\s&%$@*]+`) 23 if reg { 24 return &Filter{ 25 regexp: NewRegexp(), 26 trie: nil, 27 noise: noise, 28 } 29 } 30 return &Filter{ 31 regexp: nil, 32 trie: trie.NewTrie(), 33 noise: noise, 34 } 35 } 36 37 // UpdateNoisePattern 更新去噪模式 38 func (filter *Filter) UpdateNoisePattern(pattern string) { 39 filter.noise = regexp.MustCompile(pattern) 40 } 41 42 // LoadWordDict 加载敏感词字典 43 func (filter *Filter) LoadWordDict(path string) error { 44 f, err := os.Open(path) 45 if err != nil { 46 return err 47 } 48 defer f.Close() 49 50 return filter.Load(f) 51 } 52 53 // LoadWordDictByNetworkFile 加载网络敏感词字典,文本文件 54 func (filter *Filter) LoadWordDictByNetworkFile(url string) error { 55 c := http.Client{ 56 Timeout: 5 * time.Second, 57 } 58 rsp, err := c.Get(url) 59 if err != nil { 60 return err 61 } 62 defer rsp.Body.Close() 63 64 return filter.Load(rsp.Body) 65 } 66 67 // Load common method to add words 68 func (filter *Filter) Load(rd io.Reader) error { 69 buf := bufio.NewReader(rd) 70 for { 71 line, _, err := buf.ReadLine() 72 if err != nil { 73 if err != io.EOF { 74 return err 75 } 76 break 77 } 78 filter.add(string(line)) 79 } 80 81 return nil 82 } 83 84 func (filter *Filter) add(words string) { 85 if filter.trie != nil { 86 filter.trie.Add(words) 87 } else if filter.regexp != nil { 88 _, _ = filter.regexp.Add(words) 89 } 90 } 91 92 func (filter *Filter) del(word string) { 93 if filter.trie != nil { 94 filter.trie.Del(word) 95 } else if filter.regexp != nil { 96 filter.regexp.Del(word) 97 } 98 } 99 100 // AddWord 添加敏感词 101 func (filter *Filter) AddWord(words ...string) { 102 for i := range words { 103 filter.add(words[i]) 104 } 105 } 106 107 // DelWord 删除敏感词 108 func (filter *Filter) DelWord(words ...string) { 109 for i := range words { 110 filter.del(words[i]) 111 } 112 } 113 114 // Filter 过滤敏感词 115 func (filter *Filter) Filter(text string) string { 116 if filter.trie != nil { 117 return filter.trie.Filter(text) 118 } 119 return filter.regexp.Filter(text) 120 } 121 122 // Replace 和谐敏感词 123 func (filter *Filter) Replace(text string, repl rune, replaceF func(repl rune) string) string { 124 if filter.trie != nil { 125 return filter.trie.Replace(text, repl) 126 } 127 return filter.regexp.Replace(text, replaceF(repl)) 128 } 129 130 // FindIn 检测敏感词 131 func (filter *Filter) FindIn(text string) (bool, string) { 132 text = filter.RemoveNoise(text) 133 if filter.trie != nil { 134 return filter.trie.FindIn(text) 135 } 136 return filter.regexp.FindIn(text) 137 } 138 139 // FindAll 找到所有匹配词 140 func (filter *Filter) FindAll(text string) []*trie.Group { 141 if filter.trie != nil { 142 return filter.trie.FindAll(text) 143 } 144 return filter.regexp.FindAll(text) 145 } 146 147 // Validate 检测字符串是否合法 148 func (filter *Filter) Validate(text string) (bool, string) { 149 text = filter.RemoveNoise(text) 150 if filter.trie != nil { 151 return filter.trie.Validate(text) 152 } 153 return filter.regexp.Validate(text) 154 } 155 156 // RemoveNoise 去除空格等噪音 157 func (filter *Filter) RemoveNoise(text string) string { 158 return filter.noise.ReplaceAllString(text, "") 159 }