github.com/v2fly/v2ray-core/v5@v5.16.2-0.20240507031116-8191faa6e095/common/strmatcher/matchers.go (about)

     1  package strmatcher
     2  
     3  import (
     4  	"errors"
     5  	"regexp"
     6  	"strings"
     7  	"unicode/utf8"
     8  
     9  	"golang.org/x/net/idna"
    10  )
    11  
    12  // FullMatcher is an implementation of Matcher.
    13  type FullMatcher string
    14  
    15  func (FullMatcher) Type() Type {
    16  	return Full
    17  }
    18  
    19  func (m FullMatcher) Pattern() string {
    20  	return string(m)
    21  }
    22  
    23  func (m FullMatcher) String() string {
    24  	return "full:" + m.Pattern()
    25  }
    26  
    27  func (m FullMatcher) Match(s string) bool {
    28  	return string(m) == s
    29  }
    30  
    31  // DomainMatcher is an implementation of Matcher.
    32  type DomainMatcher string
    33  
    34  func (DomainMatcher) Type() Type {
    35  	return Domain
    36  }
    37  
    38  func (m DomainMatcher) Pattern() string {
    39  	return string(m)
    40  }
    41  
    42  func (m DomainMatcher) String() string {
    43  	return "domain:" + m.Pattern()
    44  }
    45  
    46  func (m DomainMatcher) Match(s string) bool {
    47  	pattern := m.Pattern()
    48  	if !strings.HasSuffix(s, pattern) {
    49  		return false
    50  	}
    51  	return len(s) == len(pattern) || s[len(s)-len(pattern)-1] == '.'
    52  }
    53  
    54  // SubstrMatcher is an implementation of Matcher.
    55  type SubstrMatcher string
    56  
    57  func (SubstrMatcher) Type() Type {
    58  	return Substr
    59  }
    60  
    61  func (m SubstrMatcher) Pattern() string {
    62  	return string(m)
    63  }
    64  
    65  func (m SubstrMatcher) String() string {
    66  	return "keyword:" + m.Pattern()
    67  }
    68  
    69  func (m SubstrMatcher) Match(s string) bool {
    70  	return strings.Contains(s, m.Pattern())
    71  }
    72  
    73  // RegexMatcher is an implementation of Matcher.
    74  type RegexMatcher struct {
    75  	pattern *regexp.Regexp
    76  }
    77  
    78  func (*RegexMatcher) Type() Type {
    79  	return Regex
    80  }
    81  
    82  func (m *RegexMatcher) Pattern() string {
    83  	return m.pattern.String()
    84  }
    85  
    86  func (m *RegexMatcher) String() string {
    87  	return "regexp:" + m.Pattern()
    88  }
    89  
    90  func (m *RegexMatcher) Match(s string) bool {
    91  	return m.pattern.MatchString(s)
    92  }
    93  
    94  // New creates a new Matcher based on the given pattern.
    95  func (t Type) New(pattern string) (Matcher, error) {
    96  	switch t {
    97  	case Full:
    98  		return FullMatcher(pattern), nil
    99  	case Substr:
   100  		return SubstrMatcher(pattern), nil
   101  	case Domain:
   102  		pattern, err := ToDomain(pattern)
   103  		if err != nil {
   104  			return nil, err
   105  		}
   106  		return DomainMatcher(pattern), nil
   107  	case Regex: // 1. regex matching is case-sensitive
   108  		regex, err := regexp.Compile(pattern)
   109  		if err != nil {
   110  			return nil, err
   111  		}
   112  		return &RegexMatcher{pattern: regex}, nil
   113  	default:
   114  		return nil, errors.New("unknown matcher type")
   115  	}
   116  }
   117  
   118  // NewDomainPattern creates a new Matcher based on the given domain pattern.
   119  // It works like `Type.New`, but will do validation and conversion to ensure it's a valid domain pattern.
   120  func (t Type) NewDomainPattern(pattern string) (Matcher, error) {
   121  	switch t {
   122  	case Full:
   123  		pattern, err := ToDomain(pattern)
   124  		if err != nil {
   125  			return nil, err
   126  		}
   127  		return FullMatcher(pattern), nil
   128  	case Substr:
   129  		pattern, err := ToDomain(pattern)
   130  		if err != nil {
   131  			return nil, err
   132  		}
   133  		return SubstrMatcher(pattern), nil
   134  	case Domain:
   135  		pattern, err := ToDomain(pattern)
   136  		if err != nil {
   137  			return nil, err
   138  		}
   139  		return DomainMatcher(pattern), nil
   140  	case Regex: // Regex's charset not in LDH subset
   141  		regex, err := regexp.Compile(pattern)
   142  		if err != nil {
   143  			return nil, err
   144  		}
   145  		return &RegexMatcher{pattern: regex}, nil
   146  	default:
   147  		return nil, errors.New("unknown matcher type")
   148  	}
   149  }
   150  
   151  // ToDomain converts input pattern to a domain string, and return error if such a conversion cannot be made.
   152  //  1. Conforms to Letter-Digit-Hyphen (LDH) subset (https://tools.ietf.org/html/rfc952):
   153  //     * Letters A to Z (no distinction between uppercase and lowercase, we convert to lowers)
   154  //     * Digits 0 to 9
   155  //     * Hyphens(-) and Periods(.)
   156  //  2. If any non-ASCII characters, domain are converted from Internationalized domain name to Punycode.
   157  func ToDomain(pattern string) (string, error) {
   158  	for {
   159  		isASCII, hasUpper := true, false
   160  		for i := 0; i < len(pattern); i++ {
   161  			c := pattern[i]
   162  			if c >= utf8.RuneSelf {
   163  				isASCII = false
   164  				break
   165  			}
   166  			switch {
   167  			case 'A' <= c && c <= 'Z':
   168  				hasUpper = true
   169  			case 'a' <= c && c <= 'z':
   170  			case '0' <= c && c <= '9':
   171  			case c == '-':
   172  			case c == '.':
   173  			default:
   174  				return "", errors.New("pattern string does not conform to Letter-Digit-Hyphen (LDH) subset")
   175  			}
   176  		}
   177  		if !isASCII {
   178  			var err error
   179  			pattern, err = idna.Punycode.ToASCII(pattern)
   180  			if err != nil {
   181  				return "", err
   182  			}
   183  			continue
   184  		}
   185  		if hasUpper {
   186  			pattern = strings.ToLower(pattern)
   187  		}
   188  		break
   189  	}
   190  	return pattern, nil
   191  }
   192  
   193  // MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers.
   194  type MatcherGroupForAll interface {
   195  	AddMatcher(matcher Matcher, value uint32)
   196  }
   197  
   198  // MatcherGroupForFull is an interface indicating a MatcherGroup could accept FullMatchers.
   199  type MatcherGroupForFull interface {
   200  	AddFullMatcher(matcher FullMatcher, value uint32)
   201  }
   202  
   203  // MatcherGroupForDomain is an interface indicating a MatcherGroup could accept DomainMatchers.
   204  type MatcherGroupForDomain interface {
   205  	AddDomainMatcher(matcher DomainMatcher, value uint32)
   206  }
   207  
   208  // MatcherGroupForSubstr is an interface indicating a MatcherGroup could accept SubstrMatchers.
   209  type MatcherGroupForSubstr interface {
   210  	AddSubstrMatcher(matcher SubstrMatcher, value uint32)
   211  }
   212  
   213  // MatcherGroupForRegex is an interface indicating a MatcherGroup could accept RegexMatchers.
   214  type MatcherGroupForRegex interface {
   215  	AddRegexMatcher(matcher *RegexMatcher, value uint32)
   216  }
   217  
   218  // AddMatcherToGroup is a helper function to try to add a Matcher to any kind of MatcherGroup.
   219  // It returns error if the MatcherGroup does not accept the provided Matcher's type.
   220  // This function is provided to help writing code to test a MatcherGroup.
   221  func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
   222  	if g, ok := g.(IndexMatcher); ok {
   223  		g.Add(matcher)
   224  		return nil
   225  	}
   226  	if g, ok := g.(MatcherGroupForAll); ok {
   227  		g.AddMatcher(matcher, value)
   228  		return nil
   229  	}
   230  	switch matcher := matcher.(type) {
   231  	case FullMatcher:
   232  		if g, ok := g.(MatcherGroupForFull); ok {
   233  			g.AddFullMatcher(matcher, value)
   234  			return nil
   235  		}
   236  	case DomainMatcher:
   237  		if g, ok := g.(MatcherGroupForDomain); ok {
   238  			g.AddDomainMatcher(matcher, value)
   239  			return nil
   240  		}
   241  	case SubstrMatcher:
   242  		if g, ok := g.(MatcherGroupForSubstr); ok {
   243  			g.AddSubstrMatcher(matcher, value)
   244  			return nil
   245  		}
   246  	case *RegexMatcher:
   247  		if g, ok := g.(MatcherGroupForRegex); ok {
   248  			g.AddRegexMatcher(matcher, value)
   249  			return nil
   250  		}
   251  	}
   252  	return errors.New("cannot add matcher to matcher group")
   253  }
   254  
   255  // CompositeMatches flattens the matches slice to produce a single matched indices slice.
   256  // It is designed to avoid new memory allocation as possible.
   257  func CompositeMatches(matches [][]uint32) []uint32 {
   258  	switch len(matches) {
   259  	case 0:
   260  		return nil
   261  	case 1:
   262  		return matches[0]
   263  	default:
   264  		result := make([]uint32, 0, 5)
   265  		for i := 0; i < len(matches); i++ {
   266  			result = append(result, matches[i]...)
   267  		}
   268  		return result
   269  	}
   270  }
   271  
   272  // CompositeMatches flattens the matches slice to produce a single matched indices slice.
   273  // It is designed that:
   274  //  1. All matchers are concatenated in reverse order, so the matcher that matches further ranks higher.
   275  //  2. Indices in the same matcher keeps their original order.
   276  //  3. Avoid new memory allocation as possible.
   277  func CompositeMatchesReverse(matches [][]uint32) []uint32 {
   278  	switch len(matches) {
   279  	case 0:
   280  		return nil
   281  	case 1:
   282  		return matches[0]
   283  	default:
   284  		result := make([]uint32, 0, 5)
   285  		for i := len(matches) - 1; i >= 0; i-- {
   286  			result = append(result, matches[i]...)
   287  		}
   288  		return result
   289  	}
   290  }