github.com/metacubex/mihomo@v1.18.5/component/geodata/strmatcher/ac_automaton_matcher.go (about)

     1  package strmatcher
     2  
     3  import (
     4  	list "github.com/bahlo/generic-list-go"
     5  )
     6  
     7  const validCharCount = 53
     8  
     9  type MatchType struct {
    10  	matchType Type
    11  	exist     bool
    12  }
    13  
    14  const (
    15  	TrieEdge bool = true
    16  	FailEdge bool = false
    17  )
    18  
    19  type Edge struct {
    20  	edgeType bool
    21  	nextNode int
    22  }
    23  
    24  type ACAutomaton struct {
    25  	trie   [][validCharCount]Edge
    26  	fail   []int
    27  	exists []MatchType
    28  	count  int
    29  }
    30  
    31  func newNode() [validCharCount]Edge {
    32  	var s [validCharCount]Edge
    33  	for i := range s {
    34  		s[i] = Edge{
    35  			edgeType: FailEdge,
    36  			nextNode: 0,
    37  		}
    38  	}
    39  	return s
    40  }
    41  
    42  var char2Index = [...]int{
    43  	'A':  0,
    44  	'a':  0,
    45  	'B':  1,
    46  	'b':  1,
    47  	'C':  2,
    48  	'c':  2,
    49  	'D':  3,
    50  	'd':  3,
    51  	'E':  4,
    52  	'e':  4,
    53  	'F':  5,
    54  	'f':  5,
    55  	'G':  6,
    56  	'g':  6,
    57  	'H':  7,
    58  	'h':  7,
    59  	'I':  8,
    60  	'i':  8,
    61  	'J':  9,
    62  	'j':  9,
    63  	'K':  10,
    64  	'k':  10,
    65  	'L':  11,
    66  	'l':  11,
    67  	'M':  12,
    68  	'm':  12,
    69  	'N':  13,
    70  	'n':  13,
    71  	'O':  14,
    72  	'o':  14,
    73  	'P':  15,
    74  	'p':  15,
    75  	'Q':  16,
    76  	'q':  16,
    77  	'R':  17,
    78  	'r':  17,
    79  	'S':  18,
    80  	's':  18,
    81  	'T':  19,
    82  	't':  19,
    83  	'U':  20,
    84  	'u':  20,
    85  	'V':  21,
    86  	'v':  21,
    87  	'W':  22,
    88  	'w':  22,
    89  	'X':  23,
    90  	'x':  23,
    91  	'Y':  24,
    92  	'y':  24,
    93  	'Z':  25,
    94  	'z':  25,
    95  	'!':  26,
    96  	'$':  27,
    97  	'&':  28,
    98  	'\'': 29,
    99  	'(':  30,
   100  	')':  31,
   101  	'*':  32,
   102  	'+':  33,
   103  	',':  34,
   104  	';':  35,
   105  	'=':  36,
   106  	':':  37,
   107  	'%':  38,
   108  	'-':  39,
   109  	'.':  40,
   110  	'_':  41,
   111  	'~':  42,
   112  	'0':  43,
   113  	'1':  44,
   114  	'2':  45,
   115  	'3':  46,
   116  	'4':  47,
   117  	'5':  48,
   118  	'6':  49,
   119  	'7':  50,
   120  	'8':  51,
   121  	'9':  52,
   122  }
   123  
   124  func NewACAutomaton() *ACAutomaton {
   125  	ac := new(ACAutomaton)
   126  	ac.trie = append(ac.trie, newNode())
   127  	ac.fail = append(ac.fail, 0)
   128  	ac.exists = append(ac.exists, MatchType{
   129  		matchType: Full,
   130  		exist:     false,
   131  	})
   132  	return ac
   133  }
   134  
   135  func (ac *ACAutomaton) Add(domain string, t Type) {
   136  	node := 0
   137  	for i := len(domain) - 1; i >= 0; i-- {
   138  		idx := char2Index[domain[i]]
   139  		if ac.trie[node][idx].nextNode == 0 {
   140  			ac.count++
   141  			if len(ac.trie) < ac.count+1 {
   142  				ac.trie = append(ac.trie, newNode())
   143  				ac.fail = append(ac.fail, 0)
   144  				ac.exists = append(ac.exists, MatchType{
   145  					matchType: Full,
   146  					exist:     false,
   147  				})
   148  			}
   149  			ac.trie[node][idx] = Edge{
   150  				edgeType: TrieEdge,
   151  				nextNode: ac.count,
   152  			}
   153  		}
   154  		node = ac.trie[node][idx].nextNode
   155  	}
   156  	ac.exists[node] = MatchType{
   157  		matchType: t,
   158  		exist:     true,
   159  	}
   160  	switch t {
   161  	case Domain:
   162  		ac.exists[node] = MatchType{
   163  			matchType: Full,
   164  			exist:     true,
   165  		}
   166  		idx := char2Index['.']
   167  		if ac.trie[node][idx].nextNode == 0 {
   168  			ac.count++
   169  			if len(ac.trie) < ac.count+1 {
   170  				ac.trie = append(ac.trie, newNode())
   171  				ac.fail = append(ac.fail, 0)
   172  				ac.exists = append(ac.exists, MatchType{
   173  					matchType: Full,
   174  					exist:     false,
   175  				})
   176  			}
   177  			ac.trie[node][idx] = Edge{
   178  				edgeType: TrieEdge,
   179  				nextNode: ac.count,
   180  			}
   181  		}
   182  		node = ac.trie[node][idx].nextNode
   183  		ac.exists[node] = MatchType{
   184  			matchType: t,
   185  			exist:     true,
   186  		}
   187  	default:
   188  		break
   189  	}
   190  }
   191  
   192  func (ac *ACAutomaton) Build() {
   193  	queue := list.New[Edge]()
   194  	for i := 0; i < validCharCount; i++ {
   195  		if ac.trie[0][i].nextNode != 0 {
   196  			queue.PushBack(ac.trie[0][i])
   197  		}
   198  	}
   199  	for {
   200  		front := queue.Front()
   201  		if front == nil {
   202  			break
   203  		} else {
   204  			node := front.Value.nextNode
   205  			queue.Remove(front)
   206  			for i := 0; i < validCharCount; i++ {
   207  				if ac.trie[node][i].nextNode != 0 {
   208  					ac.fail[ac.trie[node][i].nextNode] = ac.trie[ac.fail[node]][i].nextNode
   209  					queue.PushBack(ac.trie[node][i])
   210  				} else {
   211  					ac.trie[node][i] = Edge{
   212  						edgeType: FailEdge,
   213  						nextNode: ac.trie[ac.fail[node]][i].nextNode,
   214  					}
   215  				}
   216  			}
   217  		}
   218  	}
   219  }
   220  
   221  func (ac *ACAutomaton) Match(s string) bool {
   222  	node := 0
   223  	fullMatch := true
   224  	// 1. the match string is all through trie edge. FULL MATCH or DOMAIN
   225  	// 2. the match string is through a fail edge. NOT FULL MATCH
   226  	// 2.1 Through a fail edge, but there exists a valid node. SUBSTR
   227  	for i := len(s) - 1; i >= 0; i-- {
   228  		idx := char2Index[s[i]]
   229  		fullMatch = fullMatch && ac.trie[node][idx].edgeType
   230  		node = ac.trie[node][idx].nextNode
   231  		switch ac.exists[node].matchType {
   232  		case Substr:
   233  			return true
   234  		case Domain:
   235  			if fullMatch {
   236  				return true
   237  			}
   238  		}
   239  	}
   240  	return fullMatch && ac.exists[node].exist
   241  }