github.com/TeaOSLab/EdgeNode@v1.3.8/internal/re/regexp_test.go (about)

     1  // Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved.
     2  
     3  package re_test
     4  
     5  import (
     6  	"github.com/TeaOSLab/EdgeCommon/pkg/serverconfigs/firewallconfigs"
     7  	"github.com/TeaOSLab/EdgeNode/internal/re"
     8  	"github.com/iwind/TeaGo/assert"
     9  	"regexp"
    10  	"strings"
    11  	"testing"
    12  )
    13  
    14  func TestRegexp(t *testing.T) {
    15  	for _, s := range []string{"(?i)(abc|efg)", "abc|efg", "abc(.+)"} {
    16  		var reg = regexp.MustCompile(s)
    17  		t.Log("===" + s + "===")
    18  		t.Log(reg.LiteralPrefix())
    19  		t.Log(reg.NumSubexp())
    20  		t.Log(reg.SubexpNames())
    21  	}
    22  }
    23  
    24  func TestRegexp_MatchString(t *testing.T) {
    25  	var a = assert.NewAssertion(t)
    26  
    27  	{
    28  		var r = re.MustCompile("abc")
    29  		a.IsTrue(r.MatchString("abc"))
    30  		a.IsFalse(r.MatchString("ab"))
    31  		a.IsFalse(r.MatchString("ABC"))
    32  	}
    33  
    34  	{
    35  		var r = re.MustCompile("(?i)abc|def|ghi")
    36  		a.IsTrue(r.MatchString("DEF"))
    37  		a.IsFalse(r.MatchString("ab"))
    38  		a.IsTrue(r.MatchString("ABC"))
    39  	}
    40  }
    41  
    42  func TestRegexp_Sub(t *testing.T) {
    43  	{
    44  		reg := regexp.MustCompile(`(a|b|c)(e|f|g)`)
    45  		for _, subName := range reg.SubexpNames() {
    46  			t.Log(subName)
    47  		}
    48  	}
    49  }
    50  
    51  func TestRegexp_ParseKeywords(t *testing.T) {
    52  	var r = re.MustCompile("")
    53  
    54  	{
    55  		var keywords = r.ParseKeywords(`\n\t\n\f\r\v\x123`)
    56  		t.Log(keywords)
    57  	}
    58  }
    59  
    60  func TestRegexp_Special(t *testing.T) {
    61  	for _, s := range []string{
    62  		`\\s`,
    63  		`\s\W`,
    64  		`aaaa/\W`,
    65  		`aaaa\/\W`,
    66  		`aaaa\=\W`,
    67  		`aaaa\\=\W`,
    68  		`aaaa\\\=\W`,
    69  		`aaaa\\\\=\W`,
    70  	} {
    71  		var es = testUnescape(t, s)
    72  		t.Log(s, "=>", es)
    73  		_, err := re.Compile(es)
    74  		if err != nil {
    75  			t.Fatal(err)
    76  		}
    77  	}
    78  }
    79  
    80  func TestRegexp_Special2(t *testing.T) {
    81  	r, err := re.Compile(testUnescape(t, `/api/ios/a
    82  /api/ios/b
    83  /api/ios/c
    84  /report`))
    85  	if err != nil {
    86  		t.Fatal(err)
    87  	}
    88  	t.Log(r.Keywords())
    89  }
    90  
    91  func TestRegexp_ParseKeywords2(t *testing.T) {
    92  	var a = assert.NewAssertion(t)
    93  
    94  	var r = re.MustCompile("")
    95  	a.IsTrue(testCompareStrings(r.ParseKeywords("(abc)def"), []string{"abcdef"}))
    96  	a.IsTrue(testCompareStrings(r.ParseKeywords("(abc)|(?:def)"), []string{"abc", "def"}))
    97  	a.IsTrue(testCompareStrings(r.ParseKeywords("(abc)"), []string{"abc"}))
    98  	a.IsTrue(testCompareStrings(r.ParseKeywords("(abc|def|ghi)"), []string{"abc", "def", "ghi"}))
    99  	a.IsTrue(testCompareStrings(r.ParseKeywords("(?i:abc)"), []string{}))
   100  	a.IsTrue(testCompareStrings(r.ParseKeywords(`\babc`), []string{"abc"}))
   101  	a.IsTrue(testCompareStrings(r.ParseKeywords(`    \babc`), []string{"    "}))
   102  	a.IsTrue(testCompareStrings(r.ParseKeywords(`\babc\b`), []string{"abc"}))
   103  	a.IsTrue(testCompareStrings(r.ParseKeywords(`\b(abc)`), []string{"abc"}))
   104  	a.IsTrue(testCompareStrings(r.ParseKeywords("abc"), []string{"abc"}))
   105  	a.IsTrue(testCompareStrings(r.ParseKeywords("abc|efg|hij"), []string{"abc", "efg", "hij"}))
   106  	a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg|hij`), []string{"abc|efg", "hij"}))
   107  	a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg*|hij`), []string{"abc|ef", "hij"}))
   108  	a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg?|hij`), []string{"abc|ef", "hij"}))
   109  	a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg+|hij`), []string{"abc|ef", "hij"}))
   110  	a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg{2,10}|hij`), []string{"abc|ef", "hij"}))
   111  	a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg{0,10}|hij`), []string{"abc|ef", "hij"}))
   112  	a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg.+|hij`), []string{"abc|efg", "hij"}))
   113  	a.IsTrue(testCompareStrings(r.ParseKeywords("A(abc|bcd)"), []string{"Aabc", "Abcd"}))
   114  	a.IsTrue(testCompareStrings(r.ParseKeywords("^abc"), []string{"abc"}))
   115  	a.IsTrue(testCompareStrings(r.ParseKeywords("abc$"), []string{"abc"}))
   116  	a.IsTrue(testCompareStrings(r.ParseKeywords(`abc$`), []string{"abc"}))
   117  	a.IsTrue(testCompareStrings(r.ParseKeywords("abc\\d"), []string{"abc"}))
   118  	a.IsTrue(testCompareStrings(r.ParseKeywords("abc{0,4}"), []string{"ab"}))
   119  	a.IsTrue(testCompareStrings(r.ParseKeywords("{0,4}"), []string{}))
   120  	a.IsTrue(testCompareStrings(r.ParseKeywords("{1,4}"), []string{}))
   121  	a.IsTrue(testCompareStrings(r.ParseKeywords("中文|北京|上海|golang"), []string{"中文", "北京", "上海", "golang"}))
   122  	a.IsTrue(testCompareStrings(r.ParseKeywords(`(onmouseover|onmousemove|onmousedown|onmouseup|onerror|onload|onclick|ondblclick)\s*=`), strings.Split("onmouseover|onmousemove|onmousedown|onmouseup|onerror|onload|onclick|ondblclick", "|")))
   123  	a.IsTrue(testCompareStrings(r.ParseKeywords(`/\*(!|\x00)`), []string{"/*"}))
   124  }
   125  
   126  func TestRegexp_ParseKeywords3(t *testing.T) {
   127  	var r = re.MustCompile("")
   128  
   129  	var policy = firewallconfigs.HTTPFirewallTemplate()
   130  	for _, group := range policy.Inbound.Groups {
   131  		for _, set := range group.Sets {
   132  			for _, rule := range set.Rules {
   133  				if rule.Operator == firewallconfigs.HTTPFirewallRuleOperatorMatch || rule.Operator == firewallconfigs.HTTPFirewallRuleOperatorNotMatch {
   134  					t.Log(set.Name+":", rule.Value, "=>", r.ParseKeywords(rule.Value))
   135  				}
   136  			}
   137  		}
   138  	}
   139  }
   140  
   141  func BenchmarkRegexp_MatchString(b *testing.B) {
   142  	var r = re.MustCompile("(?i)(onmouseover|onmousemove|onmousedown|onmouseup|onerror|onload|onclick|ondblclick|onkeydown|onkeyup|onkeypress)(\\s|%09|%0A|(\\+|%20))*(=|%3D)")
   143  	b.ResetTimer()
   144  
   145  	//b.Log("keywords:", r.Keywords())
   146  	for i := 0; i < b.N; i++ {
   147  		r.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
   148  	}
   149  }
   150  
   151  func BenchmarkRegexp_MatchString2(b *testing.B) {
   152  	var r = regexp.MustCompile(`(?i)(onmouseover|onmousemove|onmousedown|onmouseup|onerror|onload|onclick|ondblclick|onkeydown|onkeyup|onkeypress)(\s|%09|%0A|(\+|%20))*(=|%3D)`)
   153  	b.ResetTimer()
   154  
   155  	for i := 0; i < b.N; i++ {
   156  		r.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
   157  	}
   158  }
   159  
   160  func BenchmarkRegexp_MatchString_CaseSensitive(b *testing.B) {
   161  	var r = re.MustCompile("(abc|def|ghi)")
   162  	b.Log("keywords:", r.Keywords())
   163  	b.ResetTimer()
   164  	for i := 0; i < b.N; i++ {
   165  		r.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
   166  	}
   167  }
   168  
   169  func BenchmarkRegexp_MatchString_CaseSensitive2(b *testing.B) {
   170  	var r = regexp.MustCompile("(abc|def|ghi)")
   171  	b.ResetTimer()
   172  	for i := 0; i < b.N; i++ {
   173  		r.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
   174  	}
   175  }
   176  
   177  func BenchmarkRegexp_MatchString_VS_FindSubString1(b *testing.B) {
   178  	var r = re.MustCompile("(?i)(chrome)")
   179  	b.ResetTimer()
   180  	for i := 0; i < b.N; i++ {
   181  		_ = r.Raw().MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
   182  	}
   183  }
   184  
   185  func BenchmarkRegexp_MatchString_VS_FindSubString2(b *testing.B) {
   186  	var r = re.MustCompile("(?i)(chrome)")
   187  	b.ResetTimer()
   188  	for i := 0; i < b.N; i++ {
   189  		_ = r.Raw().FindStringSubmatch("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
   190  	}
   191  }
   192  
   193  func TestSplitAndJoin(t *testing.T) {
   194  	var pieces = strings.Split(`/api/ios/a
   195  /api/ios/b
   196  /api/ios/c
   197  /report`, "/")
   198  	t.Log(strings.Join(pieces, `(/|%2F)`))
   199  }
   200  
   201  func testCompareStrings(s1 []string, s2 []string) bool {
   202  	if len(s1) != len(s2) {
   203  		return false
   204  	}
   205  	for index, s := range s1 {
   206  		if s != s2[index] {
   207  			return false
   208  		}
   209  	}
   210  	return true
   211  }
   212  
   213  func testUnescape(t *testing.T, v string) string {
   214  	// replace urlencoded characters
   215  	var unescapeChars = [][2]string{
   216  		{`\s`, `(\s|%09|%0A|\+)`},
   217  		{`\(`, `(\(|%28)`},
   218  		{`=`, `(=|%3D)`},
   219  		{`<`, `(<|%3C)`},
   220  		{`\*`, `(\*|%2A)`},
   221  		{`\\`, `(\\|%2F)`},
   222  		{`!`, `(!|%21)`},
   223  		{`/`, `(/|%2F)`},
   224  		{`;`, `(;|%3B)`},
   225  		{`\+`, `(\+|%20)`},
   226  	}
   227  
   228  	for _, c := range unescapeChars {
   229  		if !strings.Contains(v, c[0]) {
   230  			continue
   231  		}
   232  		var pieces = strings.Split(v, c[0])
   233  
   234  		// 修复piece中错误的\
   235  		for pieceIndex, piece := range pieces {
   236  			var l = len(piece)
   237  			if l == 0 {
   238  				continue
   239  			}
   240  			if piece[l-1] != '\\' {
   241  				continue
   242  			}
   243  
   244  			// 计算\的数量
   245  			var countBackSlashes = 0
   246  			for i := l - 1; i >= 0; i-- {
   247  				if piece[i] == '\\' {
   248  					countBackSlashes++
   249  				} else {
   250  					break
   251  				}
   252  			}
   253  			if countBackSlashes%2 == 1 {
   254  				// 去掉最后一个
   255  				pieces[pieceIndex] = piece[:len(piece)-1]
   256  			}
   257  		}
   258  
   259  		v = strings.Join(pieces, c[1])
   260  	}
   261  
   262  	return v
   263  }