gitee.com/h79/goutils@v1.22.10/sensitive/filter.go (about)

     1  package sensitive
     2  
     3  import (
     4  	"bufio"
     5  	"gitee.com/h79/goutils/common/trie"
     6  	"io"
     7  	"net/http"
     8  	"os"
     9  	"regexp"
    10  	"time"
    11  )
    12  
    13  // Filter 敏感词过滤器
    14  type Filter struct {
    15  	regexp *Regexp
    16  	trie   *trie.Trie
    17  	noise  *regexp.Regexp
    18  }
    19  
    20  // New 返回一个敏感词过滤器
    21  func New(reg bool) *Filter {
    22  	noise := regexp.MustCompile(`[\\|\s&%$@*]+`)
    23  	if reg {
    24  		return &Filter{
    25  			regexp: NewRegexp(),
    26  			trie:   nil,
    27  			noise:  noise,
    28  		}
    29  	}
    30  	return &Filter{
    31  		regexp: nil,
    32  		trie:   trie.NewTrie(),
    33  		noise:  noise,
    34  	}
    35  }
    36  
    37  // UpdateNoisePattern 更新去噪模式
    38  func (filter *Filter) UpdateNoisePattern(pattern string) {
    39  	filter.noise = regexp.MustCompile(pattern)
    40  }
    41  
    42  // LoadWordDict 加载敏感词字典
    43  func (filter *Filter) LoadWordDict(path string) error {
    44  	f, err := os.Open(path)
    45  	if err != nil {
    46  		return err
    47  	}
    48  	defer f.Close()
    49  
    50  	return filter.Load(f)
    51  }
    52  
    53  // LoadWordDictByNetworkFile 加载网络敏感词字典,文本文件
    54  func (filter *Filter) LoadWordDictByNetworkFile(url string) error {
    55  	c := http.Client{
    56  		Timeout: 5 * time.Second,
    57  	}
    58  	rsp, err := c.Get(url)
    59  	if err != nil {
    60  		return err
    61  	}
    62  	defer rsp.Body.Close()
    63  
    64  	return filter.Load(rsp.Body)
    65  }
    66  
    67  // Load common method to add words
    68  func (filter *Filter) Load(rd io.Reader) error {
    69  	buf := bufio.NewReader(rd)
    70  	for {
    71  		line, _, err := buf.ReadLine()
    72  		if err != nil {
    73  			if err != io.EOF {
    74  				return err
    75  			}
    76  			break
    77  		}
    78  		filter.add(string(line))
    79  	}
    80  
    81  	return nil
    82  }
    83  
    84  func (filter *Filter) add(words string) {
    85  	if filter.trie != nil {
    86  		filter.trie.Add(words)
    87  	} else if filter.regexp != nil {
    88  		_, _ = filter.regexp.Add(words)
    89  	}
    90  }
    91  
    92  func (filter *Filter) del(word string) {
    93  	if filter.trie != nil {
    94  		filter.trie.Del(word)
    95  	} else if filter.regexp != nil {
    96  		filter.regexp.Del(word)
    97  	}
    98  }
    99  
   100  // AddWord 添加敏感词
   101  func (filter *Filter) AddWord(words ...string) {
   102  	for i := range words {
   103  		filter.add(words[i])
   104  	}
   105  }
   106  
   107  // DelWord 删除敏感词
   108  func (filter *Filter) DelWord(words ...string) {
   109  	for i := range words {
   110  		filter.del(words[i])
   111  	}
   112  }
   113  
   114  // Filter 过滤敏感词
   115  func (filter *Filter) Filter(text string) string {
   116  	if filter.trie != nil {
   117  		return filter.trie.Filter(text)
   118  	}
   119  	return filter.regexp.Filter(text)
   120  }
   121  
   122  // Replace 和谐敏感词
   123  func (filter *Filter) Replace(text string, repl rune, replaceF func(repl rune) string) string {
   124  	if filter.trie != nil {
   125  		return filter.trie.Replace(text, repl)
   126  	}
   127  	return filter.regexp.Replace(text, replaceF(repl))
   128  }
   129  
   130  // FindIn 检测敏感词
   131  func (filter *Filter) FindIn(text string) (bool, string) {
   132  	text = filter.RemoveNoise(text)
   133  	if filter.trie != nil {
   134  		return filter.trie.FindIn(text)
   135  	}
   136  	return filter.regexp.FindIn(text)
   137  }
   138  
   139  // FindAll 找到所有匹配词
   140  func (filter *Filter) FindAll(text string) []*trie.Group {
   141  	if filter.trie != nil {
   142  		return filter.trie.FindAll(text)
   143  	}
   144  	return filter.regexp.FindAll(text)
   145  }
   146  
   147  // Validate 检测字符串是否合法
   148  func (filter *Filter) Validate(text string) (bool, string) {
   149  	text = filter.RemoveNoise(text)
   150  	if filter.trie != nil {
   151  		return filter.trie.Validate(text)
   152  	}
   153  	return filter.regexp.Validate(text)
   154  }
   155  
   156  // RemoveNoise 去除空格等噪音
   157  func (filter *Filter) RemoveNoise(text string) string {
   158  	return filter.noise.ReplaceAllString(text, "")
   159  }