github.com/lingyao2333/mo-zero@v1.4.1/core/stringx/trie.go (about)

     1  package stringx
     2  
     3  import "github.com/lingyao2333/mo-zero/core/lang"
     4  
     5  const defaultMask = '*'
     6  
     7  type (
     8  	// TrieOption defines the method to customize a Trie.
     9  	TrieOption func(trie *trieNode)
    10  
    11  	// A Trie is a tree implementation that used to find elements rapidly.
    12  	Trie interface {
    13  		Filter(text string) (string, []string, bool)
    14  		FindKeywords(text string) []string
    15  	}
    16  
    17  	trieNode struct {
    18  		node
    19  		mask rune
    20  	}
    21  
    22  	scope struct {
    23  		start int
    24  		stop  int
    25  	}
    26  )
    27  
    28  // NewTrie returns a Trie.
    29  func NewTrie(words []string, opts ...TrieOption) Trie {
    30  	n := new(trieNode)
    31  
    32  	for _, opt := range opts {
    33  		opt(n)
    34  	}
    35  	if n.mask == 0 {
    36  		n.mask = defaultMask
    37  	}
    38  	for _, word := range words {
    39  		n.add(word)
    40  	}
    41  
    42  	n.build()
    43  
    44  	return n
    45  }
    46  
    47  func (n *trieNode) Filter(text string) (sentence string, keywords []string, found bool) {
    48  	chars := []rune(text)
    49  	if len(chars) == 0 {
    50  		return text, nil, false
    51  	}
    52  
    53  	scopes := n.find(chars)
    54  	keywords = n.collectKeywords(chars, scopes)
    55  
    56  	for _, match := range scopes {
    57  		// we don't care about overlaps, not bringing a performance improvement
    58  		n.replaceWithAsterisk(chars, match.start, match.stop)
    59  	}
    60  
    61  	return string(chars), keywords, len(keywords) > 0
    62  }
    63  
    64  func (n *trieNode) FindKeywords(text string) []string {
    65  	chars := []rune(text)
    66  	if len(chars) == 0 {
    67  		return nil
    68  	}
    69  
    70  	scopes := n.find(chars)
    71  	return n.collectKeywords(chars, scopes)
    72  }
    73  
    74  func (n *trieNode) collectKeywords(chars []rune, scopes []scope) []string {
    75  	set := make(map[string]lang.PlaceholderType)
    76  	for _, v := range scopes {
    77  		set[string(chars[v.start:v.stop])] = lang.Placeholder
    78  	}
    79  
    80  	var i int
    81  	keywords := make([]string, len(set))
    82  	for k := range set {
    83  		keywords[i] = k
    84  		i++
    85  	}
    86  
    87  	return keywords
    88  }
    89  
    90  func (n *trieNode) replaceWithAsterisk(chars []rune, start, stop int) {
    91  	for i := start; i < stop; i++ {
    92  		chars[i] = n.mask
    93  	}
    94  }
    95  
    96  // WithMask customizes a Trie with keywords masked as given mask char.
    97  func WithMask(mask rune) TrieOption {
    98  	return func(n *trieNode) {
    99  		n.mask = mask
   100  	}
   101  }