github.com/v2fly/v2ray-core/v5@v5.16.2-0.20240507031116-8191faa6e095/common/strmatcher/matchers.go (about) 1 package strmatcher 2 3 import ( 4 "errors" 5 "regexp" 6 "strings" 7 "unicode/utf8" 8 9 "golang.org/x/net/idna" 10 ) 11 12 // FullMatcher is an implementation of Matcher. 13 type FullMatcher string 14 15 func (FullMatcher) Type() Type { 16 return Full 17 } 18 19 func (m FullMatcher) Pattern() string { 20 return string(m) 21 } 22 23 func (m FullMatcher) String() string { 24 return "full:" + m.Pattern() 25 } 26 27 func (m FullMatcher) Match(s string) bool { 28 return string(m) == s 29 } 30 31 // DomainMatcher is an implementation of Matcher. 32 type DomainMatcher string 33 34 func (DomainMatcher) Type() Type { 35 return Domain 36 } 37 38 func (m DomainMatcher) Pattern() string { 39 return string(m) 40 } 41 42 func (m DomainMatcher) String() string { 43 return "domain:" + m.Pattern() 44 } 45 46 func (m DomainMatcher) Match(s string) bool { 47 pattern := m.Pattern() 48 if !strings.HasSuffix(s, pattern) { 49 return false 50 } 51 return len(s) == len(pattern) || s[len(s)-len(pattern)-1] == '.' 52 } 53 54 // SubstrMatcher is an implementation of Matcher. 55 type SubstrMatcher string 56 57 func (SubstrMatcher) Type() Type { 58 return Substr 59 } 60 61 func (m SubstrMatcher) Pattern() string { 62 return string(m) 63 } 64 65 func (m SubstrMatcher) String() string { 66 return "keyword:" + m.Pattern() 67 } 68 69 func (m SubstrMatcher) Match(s string) bool { 70 return strings.Contains(s, m.Pattern()) 71 } 72 73 // RegexMatcher is an implementation of Matcher. 74 type RegexMatcher struct { 75 pattern *regexp.Regexp 76 } 77 78 func (*RegexMatcher) Type() Type { 79 return Regex 80 } 81 82 func (m *RegexMatcher) Pattern() string { 83 return m.pattern.String() 84 } 85 86 func (m *RegexMatcher) String() string { 87 return "regexp:" + m.Pattern() 88 } 89 90 func (m *RegexMatcher) Match(s string) bool { 91 return m.pattern.MatchString(s) 92 } 93 94 // New creates a new Matcher based on the given pattern. 95 func (t Type) New(pattern string) (Matcher, error) { 96 switch t { 97 case Full: 98 return FullMatcher(pattern), nil 99 case Substr: 100 return SubstrMatcher(pattern), nil 101 case Domain: 102 pattern, err := ToDomain(pattern) 103 if err != nil { 104 return nil, err 105 } 106 return DomainMatcher(pattern), nil 107 case Regex: // 1. regex matching is case-sensitive 108 regex, err := regexp.Compile(pattern) 109 if err != nil { 110 return nil, err 111 } 112 return &RegexMatcher{pattern: regex}, nil 113 default: 114 return nil, errors.New("unknown matcher type") 115 } 116 } 117 118 // NewDomainPattern creates a new Matcher based on the given domain pattern. 119 // It works like `Type.New`, but will do validation and conversion to ensure it's a valid domain pattern. 120 func (t Type) NewDomainPattern(pattern string) (Matcher, error) { 121 switch t { 122 case Full: 123 pattern, err := ToDomain(pattern) 124 if err != nil { 125 return nil, err 126 } 127 return FullMatcher(pattern), nil 128 case Substr: 129 pattern, err := ToDomain(pattern) 130 if err != nil { 131 return nil, err 132 } 133 return SubstrMatcher(pattern), nil 134 case Domain: 135 pattern, err := ToDomain(pattern) 136 if err != nil { 137 return nil, err 138 } 139 return DomainMatcher(pattern), nil 140 case Regex: // Regex's charset not in LDH subset 141 regex, err := regexp.Compile(pattern) 142 if err != nil { 143 return nil, err 144 } 145 return &RegexMatcher{pattern: regex}, nil 146 default: 147 return nil, errors.New("unknown matcher type") 148 } 149 } 150 151 // ToDomain converts input pattern to a domain string, and return error if such a conversion cannot be made. 152 // 1. Conforms to Letter-Digit-Hyphen (LDH) subset (https://tools.ietf.org/html/rfc952): 153 // * Letters A to Z (no distinction between uppercase and lowercase, we convert to lowers) 154 // * Digits 0 to 9 155 // * Hyphens(-) and Periods(.) 156 // 2. If any non-ASCII characters, domain are converted from Internationalized domain name to Punycode. 157 func ToDomain(pattern string) (string, error) { 158 for { 159 isASCII, hasUpper := true, false 160 for i := 0; i < len(pattern); i++ { 161 c := pattern[i] 162 if c >= utf8.RuneSelf { 163 isASCII = false 164 break 165 } 166 switch { 167 case 'A' <= c && c <= 'Z': 168 hasUpper = true 169 case 'a' <= c && c <= 'z': 170 case '0' <= c && c <= '9': 171 case c == '-': 172 case c == '.': 173 default: 174 return "", errors.New("pattern string does not conform to Letter-Digit-Hyphen (LDH) subset") 175 } 176 } 177 if !isASCII { 178 var err error 179 pattern, err = idna.Punycode.ToASCII(pattern) 180 if err != nil { 181 return "", err 182 } 183 continue 184 } 185 if hasUpper { 186 pattern = strings.ToLower(pattern) 187 } 188 break 189 } 190 return pattern, nil 191 } 192 193 // MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers. 194 type MatcherGroupForAll interface { 195 AddMatcher(matcher Matcher, value uint32) 196 } 197 198 // MatcherGroupForFull is an interface indicating a MatcherGroup could accept FullMatchers. 199 type MatcherGroupForFull interface { 200 AddFullMatcher(matcher FullMatcher, value uint32) 201 } 202 203 // MatcherGroupForDomain is an interface indicating a MatcherGroup could accept DomainMatchers. 204 type MatcherGroupForDomain interface { 205 AddDomainMatcher(matcher DomainMatcher, value uint32) 206 } 207 208 // MatcherGroupForSubstr is an interface indicating a MatcherGroup could accept SubstrMatchers. 209 type MatcherGroupForSubstr interface { 210 AddSubstrMatcher(matcher SubstrMatcher, value uint32) 211 } 212 213 // MatcherGroupForRegex is an interface indicating a MatcherGroup could accept RegexMatchers. 214 type MatcherGroupForRegex interface { 215 AddRegexMatcher(matcher *RegexMatcher, value uint32) 216 } 217 218 // AddMatcherToGroup is a helper function to try to add a Matcher to any kind of MatcherGroup. 219 // It returns error if the MatcherGroup does not accept the provided Matcher's type. 220 // This function is provided to help writing code to test a MatcherGroup. 221 func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error { 222 if g, ok := g.(IndexMatcher); ok { 223 g.Add(matcher) 224 return nil 225 } 226 if g, ok := g.(MatcherGroupForAll); ok { 227 g.AddMatcher(matcher, value) 228 return nil 229 } 230 switch matcher := matcher.(type) { 231 case FullMatcher: 232 if g, ok := g.(MatcherGroupForFull); ok { 233 g.AddFullMatcher(matcher, value) 234 return nil 235 } 236 case DomainMatcher: 237 if g, ok := g.(MatcherGroupForDomain); ok { 238 g.AddDomainMatcher(matcher, value) 239 return nil 240 } 241 case SubstrMatcher: 242 if g, ok := g.(MatcherGroupForSubstr); ok { 243 g.AddSubstrMatcher(matcher, value) 244 return nil 245 } 246 case *RegexMatcher: 247 if g, ok := g.(MatcherGroupForRegex); ok { 248 g.AddRegexMatcher(matcher, value) 249 return nil 250 } 251 } 252 return errors.New("cannot add matcher to matcher group") 253 } 254 255 // CompositeMatches flattens the matches slice to produce a single matched indices slice. 256 // It is designed to avoid new memory allocation as possible. 257 func CompositeMatches(matches [][]uint32) []uint32 { 258 switch len(matches) { 259 case 0: 260 return nil 261 case 1: 262 return matches[0] 263 default: 264 result := make([]uint32, 0, 5) 265 for i := 0; i < len(matches); i++ { 266 result = append(result, matches[i]...) 267 } 268 return result 269 } 270 } 271 272 // CompositeMatches flattens the matches slice to produce a single matched indices slice. 273 // It is designed that: 274 // 1. All matchers are concatenated in reverse order, so the matcher that matches further ranks higher. 275 // 2. Indices in the same matcher keeps their original order. 276 // 3. Avoid new memory allocation as possible. 277 func CompositeMatchesReverse(matches [][]uint32) []uint32 { 278 switch len(matches) { 279 case 0: 280 return nil 281 case 1: 282 return matches[0] 283 default: 284 result := make([]uint32, 0, 5) 285 for i := len(matches) - 1; i >= 0; i-- { 286 result = append(result, matches[i]...) 287 } 288 return result 289 } 290 }