gitee.com/h79/goutils@v1.22.10/common/trie/trie.go (about)

     1  package trie
     2  
     3  import (
     4  	"strconv"
     5  	"sync"
     6  )
     7  
     8  const (
     9  	Empty = ""
    10  )
    11  
    12  type Group struct {
    13  	No    string
    14  	Text  string `xml:"text" json:"text"`   //找出的词
    15  	Index int    `xml:"index" json:"index"` //找出的位置
    16  }
    17  
    18  type Trie struct {
    19  	Root *Node
    20  }
    21  
    22  type Node struct {
    23  	end       bool
    24  	character rune
    25  	weight    int
    26  	rm        sync.RWMutex
    27  	children  map[rune]*Node
    28  }
    29  
    30  func newNode(character rune) *Node {
    31  	return &Node{
    32  		weight:    0,
    33  		character: character,
    34  		children:  make(map[rune]*Node, 0),
    35  	}
    36  }
    37  
    38  func newRootNode() *Node {
    39  	return newNode(0)
    40  }
    41  
    42  func (node *Node) Get(r rune) (*Node, bool) {
    43  	node.rm.RLock()
    44  	defer node.rm.RUnlock()
    45  	node, ok := node.children[r]
    46  	return node, ok
    47  }
    48  
    49  func (node *Node) Add(r rune, n *Node) *Node {
    50  	node.rm.Lock()
    51  	defer node.rm.Unlock()
    52  	node.children[r] = n
    53  	n.weight = len(node.children)
    54  	return n
    55  }
    56  
    57  // IsLeaf 是否叶子节点
    58  func (node *Node) IsLeaf() bool {
    59  	return len(node.children) == 0
    60  }
    61  
    62  // IsRoot 是否为根节点
    63  func (node *Node) IsRoot() bool {
    64  	return node.character == 0
    65  }
    66  
    67  // IsPathEnd 某个路径的结束
    68  func (node *Node) IsPathEnd() bool {
    69  	return node.end
    70  }
    71  
    72  // SoftDel 置软删除状态
    73  func (node *Node) SoftDel() {
    74  	node.end = false
    75  }
    76  
    77  func NewTrie() *Trie {
    78  	return &Trie{
    79  		Root: newRootNode(),
    80  	}
    81  }
    82  
    83  // Add 添加, return no 唯一的编号
    84  func (tree *Trie) Add(word string) (no string) {
    85  	return tree.AddRune([]rune(word))
    86  }
    87  
    88  func (tree *Trie) AddRune(word []rune) (no string) {
    89  
    90  	var (
    91  		parent   = tree.Root
    92  		position = 0
    93  		found    = false
    94  		r        rune
    95  		current  *Node
    96  	)
    97  	for position = 0; position < len(word); position++ {
    98  		if position > 0 {
    99  			no += "-"
   100  		}
   101  		r = word[position]
   102  		if current, found = parent.Get(r); found {
   103  			parent = current
   104  		} else {
   105  			parent = parent.Add(r, newNode(r))
   106  		}
   107  		no += strconv.Itoa(parent.weight)
   108  		if position == len(word)-1 {
   109  			parent.end = true
   110  		}
   111  	}
   112  	return
   113  }
   114  
   115  func (tree *Trie) Del(word string) {
   116  	tree.DelRune([]rune(word))
   117  }
   118  
   119  func (tree *Trie) DelRune(word []rune) {
   120  	var (
   121  		current  = tree.Root
   122  		position = 0
   123  		found    = false
   124  	)
   125  	for position = 0; position < len(word); position++ {
   126  		if current, found = current.Get(word[position]); !found {
   127  			return
   128  		}
   129  		if position == len(word)-1 {
   130  			current.SoftDel()
   131  		}
   132  	}
   133  }
   134  
   135  // Replace 词语替换
   136  func (tree *Trie) Replace(text string, character rune) string {
   137  	return tree.ReplaceRune([]rune(text), character)
   138  }
   139  
   140  func (tree *Trie) ReplaceRune(text []rune, character rune) string {
   141  	var (
   142  		parent   = tree.Root
   143  		length   = len(text)
   144  		left     = 0
   145  		position = 0
   146  		found    = false
   147  		next     = func() {
   148  			parent = tree.Root
   149  			position = left
   150  			left++
   151  		}
   152  		current *Node
   153  	)
   154  
   155  	for position = 0; position < len(text); position++ {
   156  		if current, found = parent.children[text[position]]; !found {
   157  			next()
   158  			continue
   159  		}
   160  		if !current.IsPathEnd() && position == length-1 {
   161  			next()
   162  			continue
   163  		}
   164  		if current.IsPathEnd() && left <= position {
   165  			for i := left; i <= position; i++ {
   166  				text[i] = character
   167  			}
   168  		}
   169  		parent = current
   170  	}
   171  	return string(text)
   172  }
   173  
   174  // Filter 直接过滤掉字符串中的敏感词
   175  func (tree *Trie) Filter(text string) string {
   176  	return tree.FilterRune([]rune(text))
   177  }
   178  
   179  func (tree *Trie) FilterRune(text []rune) string {
   180  
   181  	var (
   182  		parent   = tree.Root
   183  		length   = len(text)
   184  		left     = 0
   185  		position = 0
   186  		found    = false
   187  		next     = func() {
   188  			parent = tree.Root
   189  			position = left
   190  			left++
   191  		}
   192  		current     *Node
   193  		resultRunes []rune
   194  	)
   195  
   196  	for position = 0; position < length; position++ {
   197  		if current, found = parent.children[text[position]]; !found {
   198  			next()
   199  			continue
   200  		}
   201  		if !current.IsPathEnd() && position == length-1 {
   202  			resultRunes = append(resultRunes, text[left])
   203  			next()
   204  			continue
   205  		}
   206  		if current.IsPathEnd() {
   207  			left = position + 1
   208  			parent = tree.Root
   209  		} else {
   210  			parent = current
   211  		}
   212  	}
   213  	resultRunes = append(resultRunes, text[left:]...)
   214  	return string(resultRunes)
   215  }
   216  
   217  func (tree *Trie) Validate(text string) (bool, string) {
   218  	var validated, g = tree.ValidateRune([]rune(text))
   219  	return validated, g.Text
   220  }
   221  
   222  // ValidateReturnNo 验证字符串是否合法,如不合法则返回false和检测到
   223  // 的第一个敏感词
   224  func (tree *Trie) ValidateReturnNo(text string) (bool, string, string) {
   225  	var validated, g = tree.ValidateRune([]rune(text))
   226  	return validated, g.Text, g.No
   227  }
   228  
   229  func (tree *Trie) ValidateRune(text []rune) (bool, Group) {
   230  
   231  	var (
   232  		parent   = tree.Root
   233  		length   = len(text)
   234  		left     = 0
   235  		position = 0
   236  		found    = false
   237  		no       = Empty
   238  		next     = func() {
   239  			no = Empty
   240  			parent = tree.Root
   241  			position = left
   242  			left++
   243  		}
   244  		current *Node
   245  	)
   246  
   247  	for position = 0; position < len(text); position++ {
   248  		if current, found = parent.children[text[position]]; !found {
   249  			next()
   250  			continue
   251  		}
   252  		if !current.IsPathEnd() && position == length-1 {
   253  			next()
   254  			continue
   255  		}
   256  		if len(no) > 0 {
   257  			no += "-"
   258  		}
   259  		no += strconv.Itoa(current.weight)
   260  		if current.IsPathEnd() && left <= position {
   261  			return false, Group{No: no, Text: string(text[left : position+1]), Index: left}
   262  		}
   263  		parent = current
   264  	}
   265  	return true, Group{No: "", Text: "", Index: -1}
   266  }
   267  
   268  // FindInReturnNo 判断text中是否含有词库中的词
   269  func (tree *Trie) FindInReturnNo(text string) (bool, string, string) {
   270  	var validated, g = tree.ValidateRune([]rune(text))
   271  	return !validated, g.Text, g.No
   272  }
   273  
   274  func (tree *Trie) FindInGroup(text string) (bool, Group) {
   275  	var validated, g = tree.ValidateRune([]rune(text))
   276  	return !validated, g
   277  }
   278  
   279  func (tree *Trie) FindRune(text []rune) (bool, Group) {
   280  	var validated, g = tree.ValidateRune(text)
   281  	return !validated, g
   282  }
   283  
   284  func (tree *Trie) FindIn(text string) (bool, string) {
   285  	validated, g := tree.ValidateRune([]rune(text))
   286  	return !validated, g.Text
   287  }
   288  
   289  func (tree *Trie) FindAll(text string) []*Group {
   290  	return tree.FindRunes([]rune(text))
   291  }
   292  
   293  // FindRunes 找有所有包含在词库中的词
   294  func (tree *Trie) FindRunes(text []rune) []*Group {
   295  
   296  	var (
   297  		parent   = tree.Root
   298  		length   = len(text)
   299  		left     = 0
   300  		position = 0
   301  		found    = false
   302  		no       = Empty
   303  		next     = func() {
   304  			no = Empty
   305  			parent = tree.Root
   306  			position = left
   307  			left++
   308  		}
   309  
   310  		matches []*Group
   311  		current *Node
   312  	)
   313  
   314  	for position = 0; position < length; position++ {
   315  		if current, found = parent.children[text[position]]; !found {
   316  			next()
   317  			continue
   318  		}
   319  		if !current.IsPathEnd() && position == length-1 {
   320  			next()
   321  			continue
   322  		}
   323  		if len(no) > 0 {
   324  			no += "-"
   325  		}
   326  		no += strconv.Itoa(current.weight)
   327  		if current.IsPathEnd() && left <= position {
   328  			matches = append(matches, &Group{No: no, Text: string(text[left : position+1]), Index: left})
   329  		}
   330  		parent = current
   331  	}
   332  	return matches
   333  }