gitee.com/h79/goutils@v1.22.10/common/trie/trie.go (about) 1 package trie 2 3 import ( 4 "strconv" 5 "sync" 6 ) 7 8 const ( 9 Empty = "" 10 ) 11 12 type Group struct { 13 No string 14 Text string `xml:"text" json:"text"` //找出的词 15 Index int `xml:"index" json:"index"` //找出的位置 16 } 17 18 type Trie struct { 19 Root *Node 20 } 21 22 type Node struct { 23 end bool 24 character rune 25 weight int 26 rm sync.RWMutex 27 children map[rune]*Node 28 } 29 30 func newNode(character rune) *Node { 31 return &Node{ 32 weight: 0, 33 character: character, 34 children: make(map[rune]*Node, 0), 35 } 36 } 37 38 func newRootNode() *Node { 39 return newNode(0) 40 } 41 42 func (node *Node) Get(r rune) (*Node, bool) { 43 node.rm.RLock() 44 defer node.rm.RUnlock() 45 node, ok := node.children[r] 46 return node, ok 47 } 48 49 func (node *Node) Add(r rune, n *Node) *Node { 50 node.rm.Lock() 51 defer node.rm.Unlock() 52 node.children[r] = n 53 n.weight = len(node.children) 54 return n 55 } 56 57 // IsLeaf 是否叶子节点 58 func (node *Node) IsLeaf() bool { 59 return len(node.children) == 0 60 } 61 62 // IsRoot 是否为根节点 63 func (node *Node) IsRoot() bool { 64 return node.character == 0 65 } 66 67 // IsPathEnd 某个路径的结束 68 func (node *Node) IsPathEnd() bool { 69 return node.end 70 } 71 72 // SoftDel 置软删除状态 73 func (node *Node) SoftDel() { 74 node.end = false 75 } 76 77 func NewTrie() *Trie { 78 return &Trie{ 79 Root: newRootNode(), 80 } 81 } 82 83 // Add 添加, return no 唯一的编号 84 func (tree *Trie) Add(word string) (no string) { 85 return tree.AddRune([]rune(word)) 86 } 87 88 func (tree *Trie) AddRune(word []rune) (no string) { 89 90 var ( 91 parent = tree.Root 92 position = 0 93 found = false 94 r rune 95 current *Node 96 ) 97 for position = 0; position < len(word); position++ { 98 if position > 0 { 99 no += "-" 100 } 101 r = word[position] 102 if current, found = parent.Get(r); found { 103 parent = current 104 } else { 105 parent = parent.Add(r, newNode(r)) 106 } 107 no += strconv.Itoa(parent.weight) 108 if position == len(word)-1 { 109 parent.end = true 110 } 111 } 112 return 113 } 114 115 func (tree *Trie) Del(word string) { 116 tree.DelRune([]rune(word)) 117 } 118 119 func (tree *Trie) DelRune(word []rune) { 120 var ( 121 current = tree.Root 122 position = 0 123 found = false 124 ) 125 for position = 0; position < len(word); position++ { 126 if current, found = current.Get(word[position]); !found { 127 return 128 } 129 if position == len(word)-1 { 130 current.SoftDel() 131 } 132 } 133 } 134 135 // Replace 词语替换 136 func (tree *Trie) Replace(text string, character rune) string { 137 return tree.ReplaceRune([]rune(text), character) 138 } 139 140 func (tree *Trie) ReplaceRune(text []rune, character rune) string { 141 var ( 142 parent = tree.Root 143 length = len(text) 144 left = 0 145 position = 0 146 found = false 147 next = func() { 148 parent = tree.Root 149 position = left 150 left++ 151 } 152 current *Node 153 ) 154 155 for position = 0; position < len(text); position++ { 156 if current, found = parent.children[text[position]]; !found { 157 next() 158 continue 159 } 160 if !current.IsPathEnd() && position == length-1 { 161 next() 162 continue 163 } 164 if current.IsPathEnd() && left <= position { 165 for i := left; i <= position; i++ { 166 text[i] = character 167 } 168 } 169 parent = current 170 } 171 return string(text) 172 } 173 174 // Filter 直接过滤掉字符串中的敏感词 175 func (tree *Trie) Filter(text string) string { 176 return tree.FilterRune([]rune(text)) 177 } 178 179 func (tree *Trie) FilterRune(text []rune) string { 180 181 var ( 182 parent = tree.Root 183 length = len(text) 184 left = 0 185 position = 0 186 found = false 187 next = func() { 188 parent = tree.Root 189 position = left 190 left++ 191 } 192 current *Node 193 resultRunes []rune 194 ) 195 196 for position = 0; position < length; position++ { 197 if current, found = parent.children[text[position]]; !found { 198 next() 199 continue 200 } 201 if !current.IsPathEnd() && position == length-1 { 202 resultRunes = append(resultRunes, text[left]) 203 next() 204 continue 205 } 206 if current.IsPathEnd() { 207 left = position + 1 208 parent = tree.Root 209 } else { 210 parent = current 211 } 212 } 213 resultRunes = append(resultRunes, text[left:]...) 214 return string(resultRunes) 215 } 216 217 func (tree *Trie) Validate(text string) (bool, string) { 218 var validated, g = tree.ValidateRune([]rune(text)) 219 return validated, g.Text 220 } 221 222 // ValidateReturnNo 验证字符串是否合法,如不合法则返回false和检测到 223 // 的第一个敏感词 224 func (tree *Trie) ValidateReturnNo(text string) (bool, string, string) { 225 var validated, g = tree.ValidateRune([]rune(text)) 226 return validated, g.Text, g.No 227 } 228 229 func (tree *Trie) ValidateRune(text []rune) (bool, Group) { 230 231 var ( 232 parent = tree.Root 233 length = len(text) 234 left = 0 235 position = 0 236 found = false 237 no = Empty 238 next = func() { 239 no = Empty 240 parent = tree.Root 241 position = left 242 left++ 243 } 244 current *Node 245 ) 246 247 for position = 0; position < len(text); position++ { 248 if current, found = parent.children[text[position]]; !found { 249 next() 250 continue 251 } 252 if !current.IsPathEnd() && position == length-1 { 253 next() 254 continue 255 } 256 if len(no) > 0 { 257 no += "-" 258 } 259 no += strconv.Itoa(current.weight) 260 if current.IsPathEnd() && left <= position { 261 return false, Group{No: no, Text: string(text[left : position+1]), Index: left} 262 } 263 parent = current 264 } 265 return true, Group{No: "", Text: "", Index: -1} 266 } 267 268 // FindInReturnNo 判断text中是否含有词库中的词 269 func (tree *Trie) FindInReturnNo(text string) (bool, string, string) { 270 var validated, g = tree.ValidateRune([]rune(text)) 271 return !validated, g.Text, g.No 272 } 273 274 func (tree *Trie) FindInGroup(text string) (bool, Group) { 275 var validated, g = tree.ValidateRune([]rune(text)) 276 return !validated, g 277 } 278 279 func (tree *Trie) FindRune(text []rune) (bool, Group) { 280 var validated, g = tree.ValidateRune(text) 281 return !validated, g 282 } 283 284 func (tree *Trie) FindIn(text string) (bool, string) { 285 validated, g := tree.ValidateRune([]rune(text)) 286 return !validated, g.Text 287 } 288 289 func (tree *Trie) FindAll(text string) []*Group { 290 return tree.FindRunes([]rune(text)) 291 } 292 293 // FindRunes 找有所有包含在词库中的词 294 func (tree *Trie) FindRunes(text []rune) []*Group { 295 296 var ( 297 parent = tree.Root 298 length = len(text) 299 left = 0 300 position = 0 301 found = false 302 no = Empty 303 next = func() { 304 no = Empty 305 parent = tree.Root 306 position = left 307 left++ 308 } 309 310 matches []*Group 311 current *Node 312 ) 313 314 for position = 0; position < length; position++ { 315 if current, found = parent.children[text[position]]; !found { 316 next() 317 continue 318 } 319 if !current.IsPathEnd() && position == length-1 { 320 next() 321 continue 322 } 323 if len(no) > 0 { 324 no += "-" 325 } 326 no += strconv.Itoa(current.weight) 327 if current.IsPathEnd() && left <= position { 328 matches = append(matches, &Group{No: no, Text: string(text[left : position+1]), Index: left}) 329 } 330 parent = current 331 } 332 return matches 333 }