github.com/TeaOSLab/EdgeNode@v1.3.8/internal/re/regexp_test.go (about) 1 // Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved. 2 3 package re_test 4 5 import ( 6 "github.com/TeaOSLab/EdgeCommon/pkg/serverconfigs/firewallconfigs" 7 "github.com/TeaOSLab/EdgeNode/internal/re" 8 "github.com/iwind/TeaGo/assert" 9 "regexp" 10 "strings" 11 "testing" 12 ) 13 14 func TestRegexp(t *testing.T) { 15 for _, s := range []string{"(?i)(abc|efg)", "abc|efg", "abc(.+)"} { 16 var reg = regexp.MustCompile(s) 17 t.Log("===" + s + "===") 18 t.Log(reg.LiteralPrefix()) 19 t.Log(reg.NumSubexp()) 20 t.Log(reg.SubexpNames()) 21 } 22 } 23 24 func TestRegexp_MatchString(t *testing.T) { 25 var a = assert.NewAssertion(t) 26 27 { 28 var r = re.MustCompile("abc") 29 a.IsTrue(r.MatchString("abc")) 30 a.IsFalse(r.MatchString("ab")) 31 a.IsFalse(r.MatchString("ABC")) 32 } 33 34 { 35 var r = re.MustCompile("(?i)abc|def|ghi") 36 a.IsTrue(r.MatchString("DEF")) 37 a.IsFalse(r.MatchString("ab")) 38 a.IsTrue(r.MatchString("ABC")) 39 } 40 } 41 42 func TestRegexp_Sub(t *testing.T) { 43 { 44 reg := regexp.MustCompile(`(a|b|c)(e|f|g)`) 45 for _, subName := range reg.SubexpNames() { 46 t.Log(subName) 47 } 48 } 49 } 50 51 func TestRegexp_ParseKeywords(t *testing.T) { 52 var r = re.MustCompile("") 53 54 { 55 var keywords = r.ParseKeywords(`\n\t\n\f\r\v\x123`) 56 t.Log(keywords) 57 } 58 } 59 60 func TestRegexp_Special(t *testing.T) { 61 for _, s := range []string{ 62 `\\s`, 63 `\s\W`, 64 `aaaa/\W`, 65 `aaaa\/\W`, 66 `aaaa\=\W`, 67 `aaaa\\=\W`, 68 `aaaa\\\=\W`, 69 `aaaa\\\\=\W`, 70 } { 71 var es = testUnescape(t, s) 72 t.Log(s, "=>", es) 73 _, err := re.Compile(es) 74 if err != nil { 75 t.Fatal(err) 76 } 77 } 78 } 79 80 func TestRegexp_Special2(t *testing.T) { 81 r, err := re.Compile(testUnescape(t, `/api/ios/a 82 /api/ios/b 83 /api/ios/c 84 /report`)) 85 if err != nil { 86 t.Fatal(err) 87 } 88 t.Log(r.Keywords()) 89 } 90 91 func TestRegexp_ParseKeywords2(t *testing.T) { 92 var a = assert.NewAssertion(t) 93 94 var r = re.MustCompile("") 95 a.IsTrue(testCompareStrings(r.ParseKeywords("(abc)def"), []string{"abcdef"})) 96 a.IsTrue(testCompareStrings(r.ParseKeywords("(abc)|(?:def)"), []string{"abc", "def"})) 97 a.IsTrue(testCompareStrings(r.ParseKeywords("(abc)"), []string{"abc"})) 98 a.IsTrue(testCompareStrings(r.ParseKeywords("(abc|def|ghi)"), []string{"abc", "def", "ghi"})) 99 a.IsTrue(testCompareStrings(r.ParseKeywords("(?i:abc)"), []string{})) 100 a.IsTrue(testCompareStrings(r.ParseKeywords(`\babc`), []string{"abc"})) 101 a.IsTrue(testCompareStrings(r.ParseKeywords(` \babc`), []string{" "})) 102 a.IsTrue(testCompareStrings(r.ParseKeywords(`\babc\b`), []string{"abc"})) 103 a.IsTrue(testCompareStrings(r.ParseKeywords(`\b(abc)`), []string{"abc"})) 104 a.IsTrue(testCompareStrings(r.ParseKeywords("abc"), []string{"abc"})) 105 a.IsTrue(testCompareStrings(r.ParseKeywords("abc|efg|hij"), []string{"abc", "efg", "hij"})) 106 a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg|hij`), []string{"abc|efg", "hij"})) 107 a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg*|hij`), []string{"abc|ef", "hij"})) 108 a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg?|hij`), []string{"abc|ef", "hij"})) 109 a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg+|hij`), []string{"abc|ef", "hij"})) 110 a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg{2,10}|hij`), []string{"abc|ef", "hij"})) 111 a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg{0,10}|hij`), []string{"abc|ef", "hij"})) 112 a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg.+|hij`), []string{"abc|efg", "hij"})) 113 a.IsTrue(testCompareStrings(r.ParseKeywords("A(abc|bcd)"), []string{"Aabc", "Abcd"})) 114 a.IsTrue(testCompareStrings(r.ParseKeywords("^abc"), []string{"abc"})) 115 a.IsTrue(testCompareStrings(r.ParseKeywords("abc$"), []string{"abc"})) 116 a.IsTrue(testCompareStrings(r.ParseKeywords(`abc$`), []string{"abc"})) 117 a.IsTrue(testCompareStrings(r.ParseKeywords("abc\\d"), []string{"abc"})) 118 a.IsTrue(testCompareStrings(r.ParseKeywords("abc{0,4}"), []string{"ab"})) 119 a.IsTrue(testCompareStrings(r.ParseKeywords("{0,4}"), []string{})) 120 a.IsTrue(testCompareStrings(r.ParseKeywords("{1,4}"), []string{})) 121 a.IsTrue(testCompareStrings(r.ParseKeywords("中文|北京|上海|golang"), []string{"中文", "北京", "上海", "golang"})) 122 a.IsTrue(testCompareStrings(r.ParseKeywords(`(onmouseover|onmousemove|onmousedown|onmouseup|onerror|onload|onclick|ondblclick)\s*=`), strings.Split("onmouseover|onmousemove|onmousedown|onmouseup|onerror|onload|onclick|ondblclick", "|"))) 123 a.IsTrue(testCompareStrings(r.ParseKeywords(`/\*(!|\x00)`), []string{"/*"})) 124 } 125 126 func TestRegexp_ParseKeywords3(t *testing.T) { 127 var r = re.MustCompile("") 128 129 var policy = firewallconfigs.HTTPFirewallTemplate() 130 for _, group := range policy.Inbound.Groups { 131 for _, set := range group.Sets { 132 for _, rule := range set.Rules { 133 if rule.Operator == firewallconfigs.HTTPFirewallRuleOperatorMatch || rule.Operator == firewallconfigs.HTTPFirewallRuleOperatorNotMatch { 134 t.Log(set.Name+":", rule.Value, "=>", r.ParseKeywords(rule.Value)) 135 } 136 } 137 } 138 } 139 } 140 141 func BenchmarkRegexp_MatchString(b *testing.B) { 142 var r = re.MustCompile("(?i)(onmouseover|onmousemove|onmousedown|onmouseup|onerror|onload|onclick|ondblclick|onkeydown|onkeyup|onkeypress)(\\s|%09|%0A|(\\+|%20))*(=|%3D)") 143 b.ResetTimer() 144 145 //b.Log("keywords:", r.Keywords()) 146 for i := 0; i < b.N; i++ { 147 r.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36") 148 } 149 } 150 151 func BenchmarkRegexp_MatchString2(b *testing.B) { 152 var r = regexp.MustCompile(`(?i)(onmouseover|onmousemove|onmousedown|onmouseup|onerror|onload|onclick|ondblclick|onkeydown|onkeyup|onkeypress)(\s|%09|%0A|(\+|%20))*(=|%3D)`) 153 b.ResetTimer() 154 155 for i := 0; i < b.N; i++ { 156 r.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36") 157 } 158 } 159 160 func BenchmarkRegexp_MatchString_CaseSensitive(b *testing.B) { 161 var r = re.MustCompile("(abc|def|ghi)") 162 b.Log("keywords:", r.Keywords()) 163 b.ResetTimer() 164 for i := 0; i < b.N; i++ { 165 r.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36") 166 } 167 } 168 169 func BenchmarkRegexp_MatchString_CaseSensitive2(b *testing.B) { 170 var r = regexp.MustCompile("(abc|def|ghi)") 171 b.ResetTimer() 172 for i := 0; i < b.N; i++ { 173 r.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36") 174 } 175 } 176 177 func BenchmarkRegexp_MatchString_VS_FindSubString1(b *testing.B) { 178 var r = re.MustCompile("(?i)(chrome)") 179 b.ResetTimer() 180 for i := 0; i < b.N; i++ { 181 _ = r.Raw().MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36") 182 } 183 } 184 185 func BenchmarkRegexp_MatchString_VS_FindSubString2(b *testing.B) { 186 var r = re.MustCompile("(?i)(chrome)") 187 b.ResetTimer() 188 for i := 0; i < b.N; i++ { 189 _ = r.Raw().FindStringSubmatch("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36") 190 } 191 } 192 193 func TestSplitAndJoin(t *testing.T) { 194 var pieces = strings.Split(`/api/ios/a 195 /api/ios/b 196 /api/ios/c 197 /report`, "/") 198 t.Log(strings.Join(pieces, `(/|%2F)`)) 199 } 200 201 func testCompareStrings(s1 []string, s2 []string) bool { 202 if len(s1) != len(s2) { 203 return false 204 } 205 for index, s := range s1 { 206 if s != s2[index] { 207 return false 208 } 209 } 210 return true 211 } 212 213 func testUnescape(t *testing.T, v string) string { 214 // replace urlencoded characters 215 var unescapeChars = [][2]string{ 216 {`\s`, `(\s|%09|%0A|\+)`}, 217 {`\(`, `(\(|%28)`}, 218 {`=`, `(=|%3D)`}, 219 {`<`, `(<|%3C)`}, 220 {`\*`, `(\*|%2A)`}, 221 {`\\`, `(\\|%2F)`}, 222 {`!`, `(!|%21)`}, 223 {`/`, `(/|%2F)`}, 224 {`;`, `(;|%3B)`}, 225 {`\+`, `(\+|%20)`}, 226 } 227 228 for _, c := range unescapeChars { 229 if !strings.Contains(v, c[0]) { 230 continue 231 } 232 var pieces = strings.Split(v, c[0]) 233 234 // 修复piece中错误的\ 235 for pieceIndex, piece := range pieces { 236 var l = len(piece) 237 if l == 0 { 238 continue 239 } 240 if piece[l-1] != '\\' { 241 continue 242 } 243 244 // 计算\的数量 245 var countBackSlashes = 0 246 for i := l - 1; i >= 0; i-- { 247 if piece[i] == '\\' { 248 countBackSlashes++ 249 } else { 250 break 251 } 252 } 253 if countBackSlashes%2 == 1 { 254 // 去掉最后一个 255 pieces[pieceIndex] = piece[:len(piece)-1] 256 } 257 } 258 259 v = strings.Join(pieces, c[1]) 260 } 261 262 return v 263 }