vitess.io/vitess@v0.16.2/go/mysql/collations/wildcard_test.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"}, 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package collations 18 19 import ( 20 "testing" 21 22 "github.com/stretchr/testify/assert" 23 24 "vitess.io/vitess/go/mysql/collations/internal/charset" 25 ) 26 27 type wildcardtest struct { 28 in, pat string 29 match bool 30 } 31 32 func testWildcardMatches(t *testing.T, collName string, chOne, chMany, chEsc rune, cases []wildcardtest) { 33 t.Run(collName, func(t *testing.T) { 34 coll := testcollation(t, collName) 35 for _, tc := range cases { 36 pat := coll.Wildcard([]byte(tc.pat), chOne, chMany, chEsc) 37 match := pat.Match([]byte(tc.in)) 38 assert.Equal(t, tc.match, match, "%q LIKE %q = %v (expected %v)", tc.in, tc.pat, match, tc.match) 39 40 } 41 }) 42 } 43 44 func TestLikeMatches(t *testing.T) { 45 testWildcardMatches(t, "utf8mb4_0900_ai_ci", 0, 0, 0, []wildcardtest{ 46 {"abc", "abc", true}, 47 {"Abc", "aBc", true}, 48 {"abc", "_bc", true}, 49 {"abc", "a_c", true}, 50 {"abc", "ab_", true}, 51 {"abc", "%c", true}, 52 {"abc", "a%c", true}, 53 {"abc", "a%", true}, 54 {"abcdef", "a%d_f", true}, 55 {"abcdefg", "a%d%g", true}, 56 {"a\\", "a\\", true}, 57 {"aa\\", "a%\\", true}, 58 {"Y", "\u00dd", true}, 59 {"abcd", "abcde", false}, 60 {"abcde", "abcd", false}, 61 {"abcde", "a%f", false}, 62 {"abcdef", "a%%f", true}, 63 {"abcd", "a__d", true}, 64 {"abcd", "a\\bcd", true}, 65 {"a\\bcd", "abcd", false}, 66 {"abdbcd", "a%cd", true}, 67 {"abecd", "a%bd", false}, 68 }) 69 70 testWildcardMatches(t, "utf8mb4_0900_as_cs", 0, 0, 0, []wildcardtest{ 71 {"abc", "abc", true}, 72 {"Abc", "aBc", false}, 73 {"abc", "_bc", true}, 74 {"abc", "a_c", true}, 75 {"abc", "ab_", true}, 76 {"abc", "%c", true}, 77 {"abc", "a%c", true}, 78 {"abc", "a%", true}, 79 {"abcdef", "a%d_f", true}, 80 {"abcdefg", "a%d%g", true}, 81 {"a\\", "a\\", true}, 82 {"aa\\", "a%\\", true}, 83 {"Y", "\u00dd", false}, 84 {"abcd", "abcde", false}, 85 {"abcde", "abcd", false}, 86 {"abcde", "a%f", false}, 87 {"abcdef", "a%%f", true}, 88 {"abcd", "a__d", true}, 89 {"abcd", "a\\bcd", true}, 90 {"a\\bcd", "abcd", false}, 91 {"abdbcd", "a%cd", true}, 92 {"abecd", "a%bd", false}, 93 }) 94 95 testWildcardMatches(t, "utf8mb4_0900_as_ci", 0, 0, 0, []wildcardtest{ 96 {"ǎḄÇ", "Ǎḅç", true}, 97 {"ÁḆĈ", "Ǎḅç", false}, 98 {"ǍBc", "_bc", true}, 99 {"Aḅc", "a_c", true}, 100 {"Abç", "ab_", true}, 101 {"Ǎḅç", "%ç", true}, 102 {"Ǎḅç", "ǎ%Ç", true}, 103 {"aḅç", "a%", true}, 104 {"Ǎḅçdef", "ǎ%d_f", true}, 105 {"Ǎḅçdefg", "ǎ%d%g", true}, 106 {"ǎ\\", "Ǎ\\", true}, 107 {"ǎa\\", "Ǎ%\\", true}, 108 {"Y", "\u00dd", false}, 109 {"abcd", "Ǎḅçde", false}, 110 {"abcde", "Ǎḅçd", false}, 111 {"Ǎḅçde", "a%f", false}, 112 {"Ǎḅçdef", "ǎ%%f", true}, 113 {"Ǎḅçd", "ǎ__d", true}, 114 {"Ǎḅçd", "ǎ\\ḄÇd", true}, 115 {"a\\bcd", "Ǎḅçd", false}, 116 {"Ǎḅdbçd", "ǎ%Çd", true}, 117 {"Ǎḅeçd", "a%bd", false}, 118 }) 119 } 120 121 // from http://developforperformance.com/MatchingWildcards_AnImprovedAlgorithmForBigData.html 122 // Copyright 2018 IBM Corporation 123 // Licensed under the Apache License, Version 2.0 124 var wildcardTestCases = []wildcardtest{ 125 {"Hi", "Hi*", true}, 126 {"abc", "ab*d", false}, 127 {"abcccd", "*ccd", true}, 128 {"mississipissippi", "*issip*ss*", true}, 129 {"xxxx*zzzzzzzzy*f", "xxxx*zzy*fffff", false}, 130 {"xxxx*zzzzzzzzy*f", "xxx*zzy*f", true}, 131 {"xxxxzzzzzzzzyf", "xxxx*zzy*fffff", false}, 132 {"xxxxzzzzzzzzyf", "xxxx*zzy*f", true}, 133 {"xyxyxyzyxyz", "xy*z*xyz", true}, 134 {"mississippi", "*sip*", true}, 135 {"xyxyxyxyz", "xy*xyz", true}, 136 {"mississippi", "mi*sip*", true}, 137 {"ababac", "*abac*", true}, 138 {"ababac", "*abac*", true}, 139 {"aaazz", "a*zz*", true}, 140 {"a12b12", "*12*23", false}, 141 {"a12b12", "a12b", false}, 142 {"a12b12", "*12*12*", true}, 143 {"caaab", "*a?b", true}, 144 {"*", "*", true}, 145 {"a*abab", "a*b", true}, 146 {"a*r", "a*", true}, 147 {"a*ar", "a*aar", false}, 148 {"XYXYXYZYXYz", "XY*Z*XYz", true}, 149 {"missisSIPpi", "*SIP*", true}, 150 {"mississipPI", "*issip*PI", true}, 151 {"xyxyxyxyz", "xy*xyz", true}, 152 {"miSsissippi", "mi*sip*", true}, 153 {"miSsissippi", "mi*Sip*", false}, 154 {"abAbac", "*Abac*", true}, 155 {"abAbac", "*Abac*", true}, 156 {"aAazz", "a*zz*", true}, 157 {"A12b12", "*12*23", false}, 158 {"a12B12", "*12*12*", true}, 159 {"oWn", "*oWn*", true}, 160 {"bLah", "bLah", true}, 161 {"bLah", "bLaH", false}, 162 {"a", "*?", true}, 163 {"ab", "*?", true}, 164 {"abc", "*?", true}, 165 {"a", "??", false}, 166 {"ab", "?*?", true}, 167 {"ab", "*?*?*", true}, 168 {"abc", "?**?*?", true}, 169 {"abc", "?**?*&?", false}, 170 {"abcd", "?b*??", true}, 171 {"abcd", "?a*??", false}, 172 {"abcd", "?**?c?", true}, 173 {"abcd", "?**?d?", false}, 174 {"abcde", "?*b*?*d*?", true}, 175 {"bLah", "bL?h", true}, 176 {"bLaaa", "bLa?", false}, 177 {"bLah", "bLa?", true}, 178 {"bLaH", "?Lah", false}, 179 {"bLaH", "?LaH", true}, 180 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", "a*a*a*a*a*a*aa*aaa*a*a*b", true}, 181 {"abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", "*a*b*ba*ca*a*aa*aaa*fa*ga*b*", true}, 182 {"abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", "*a*b*ba*ca*a*x*aaa*fa*ga*b*", false}, 183 {"abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", "*a*b*ba*ca*aaaa*fa*ga*gggg*b*", false}, 184 {"abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", "*a*b*ba*ca*aaaa*fa*ga*ggg*b*", true}, 185 {"aaabbaabbaab", "*aabbaa*a*", true}, 186 {"a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*", "a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*", true}, 187 {"aaaaaaaaaaaaaaaaa", "*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*", true}, 188 {"aaaaaaaaaaaaaaaa", "*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*", false}, 189 {"abc*abcd*abcde*abcdef*abcdefg*abcdefgh*abcdefghi*abcdefghij*abcdefghijk*abcdefghijkl*abcdefghijklm*abcdefghijklmn", "abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*", false}, 190 {"abc*abcd*abcde*abcdef*abcdefg*abcdefgh*abcdefghi*abcdefghij*abcdefghijk*abcdefghijkl*abcdefghijklm*abcdefghijklmn", "abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*", true}, 191 {"abc*abcd*abcd*abc*abcd", "abc*abc*abc*abc*abc", false}, 192 {"abc*abcd*abcd*abc*abcd*abcd*abc*abcd*abc*abc*abcd", "abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abcd", true}, 193 {"abc", "********a********b********c********", true}, 194 {"********a********b********c********", "abc", false}, 195 {"abc", "********a********b********b********", false}, 196 {"*abc*", "***a*b*c***", true}, 197 {"", "?", false}, 198 {"", "*?", false}, 199 {"", "", true}, 200 {"a", "", false}, 201 202 {"abc", "abd", false}, 203 {"abcccd", "abcccd", true}, 204 {"mississipissippi", "mississipissippi", true}, 205 {"xxxxzzzzzzzzyf", "xxxxzzzzzzzzyfffff", false}, 206 {"xxxxzzzzzzzzyf", "xxxxzzzzzzzzyf", true}, 207 {"xxxxzzzzzzzzyf", "xxxxzzy.fffff", false}, 208 {"xxxxzzzzzzzzyf", "xxxxzzzzzzzzyf", true}, 209 {"xyxyxyzyxyz", "xyxyxyzyxyz", true}, 210 {"mississippi", "mississippi", true}, 211 {"xyxyxyxyz", "xyxyxyxyz", true}, 212 {"m ississippi", "m ississippi", true}, 213 {"ababac", "ababac?", false}, 214 {"dababac", "ababac", false}, 215 {"aaazz", "aaazz", true}, 216 {"a12b12", "1212", false}, 217 {"a12b12", "a12b", false}, 218 {"a12b12", "a12b12", true}, 219 {"n", "n", true}, 220 {"aabab", "aabab", true}, 221 {"ar", "ar", true}, 222 {"aar", "aaar", false}, 223 {"XYXYXYZYXYz", "XYXYXYZYXYz", true}, 224 {"missisSIPpi", "missisSIPpi", true}, 225 {"mississipPI", "mississipPI", true}, 226 {"xyxyxyxyz", "xyxyxyxyz", true}, 227 {"miSsissippi", "miSsissippi", true}, 228 {"miSsissippi", "miSsisSippi", false}, 229 {"abAbac", "abAbac", true}, 230 {"abAbac", "abAbac", true}, 231 {"aAazz", "aAazz", true}, 232 {"A12b12", "A12b123", false}, 233 {"a12B12", "a12B12", true}, 234 {"oWn", "oWn", true}, 235 {"bLah", "bLah", true}, 236 {"bLah", "bLaH", false}, 237 {"a", "a", true}, 238 {"ab", "a?", true}, 239 {"abc", "ab?", true}, 240 {"a", "??", false}, 241 {"ab", "??", true}, 242 {"abc", "???", true}, 243 {"abcd", "????", true}, 244 {"abc", "????", false}, 245 {"abcd", "?b??", true}, 246 {"abcd", "?a??", false}, 247 {"abcd", "??c?", true}, 248 {"abcd", "??d?", false}, 249 {"abcde", "?b?d*?", true}, 250 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", true}, 251 {"abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", "abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", true}, 252 {"abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", "abababababababababababababababababababaacacacacacacacadaeafagahaiajaxalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", false}, 253 {"abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", "abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaggggagaaaaaaaab", false}, 254 {"abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", "abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", true}, 255 {"aaabbaabbaab", "aaabbaabbaab", true}, 256 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 257 {"aaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaa", true}, 258 {"aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaa", false}, 259 {"abcabcdabcdeabcdefabcdefgabcdefghabcdefghiabcdefghijabcdefghijkabcdefghijklabcdefghijklmabcdefghijklmn", "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc", false}, 260 {"abcabcdabcdeabcdefabcdefgabcdefghabcdefghiabcdefghijabcdefghijkabcdefghijklabcdefghijklmabcdefghijklmn", "abcabcdabcdeabcdefabcdefgabcdefghabcdefghiabcdefghijabcdefghijkabcdefghijklabcdefghijklmabcdefghijklmn", true}, 261 {"abcabcdabcdabcabcd", "abcabc?abcabcabc", false}, 262 {"abcabcdabcdabcabcdabcdabcabcdabcabcabcd", "abcabc?abc?abcabc?abc?abc?bc?abc?bc?bcd", true}, 263 {"?abc?", "?abc?", true}, 264 265 {"", "abd", false}, 266 {"", "abcccd", false}, 267 {"", "mississipissippi", false}, 268 {"", "xxxxzzzzzzzzyfffff", false}, 269 {"", "xxxxzzzzzzzzyf", false}, 270 {"", "xxxxzzy.fffff", false}, 271 {"", "xxxxzzzzzzzzyf", false}, 272 {"", "xyxyxyzyxyz", false}, 273 {"", "mississippi", false}, 274 {"", "xyxyxyxyz", false}, 275 {"", "m ississippi", false}, 276 {"", "ababac*", false}, 277 {"", "ababac", false}, 278 {"", "aaazz", false}, 279 {"", "1212", false}, 280 {"", "a12b", false}, 281 {"", "a12b12", false}, 282 {"", "n", false}, 283 {"", "aabab", false}, 284 {"", "ar", false}, 285 {"", "aaar", false}, 286 {"", "XYXYXYZYXYz", false}, 287 {"", "missisSIPpi", false}, 288 {"", "mississipPI", false}, 289 {"", "xyxyxyxyz", false}, 290 {"", "miSsissippi", false}, 291 {"", "miSsisSippi", false}, 292 {"", "abAbac", false}, 293 {"", "abAbac", false}, 294 {"", "aAazz", false}, 295 {"", "A12b123", false}, 296 {"", "a12B12", false}, 297 {"", "oWn", false}, 298 {"", "bLah", false}, 299 {"", "bLaH", false}, 300 {"", "", true}, 301 {"abc", "", false}, 302 {"abcccd", "", false}, 303 {"mississipissippi", "", false}, 304 {"xxxxzzzzzzzzyf", "", false}, 305 {"xxxxzzzzzzzzyf", "", false}, 306 {"xxxxzzzzzzzzyf", "", false}, 307 {"xxxxzzzzzzzzyf", "", false}, 308 {"xyxyxyzyxyz", "", false}, 309 {"mississippi", "", false}, 310 {"xyxyxyxyz", "", false}, 311 {"m ississippi", "", false}, 312 {"ababac", "", false}, 313 {"dababac", "", false}, 314 {"aaazz", "", false}, 315 {"a12b12", "", false}, 316 {"a12b12", "", false}, 317 {"a12b12", "", false}, 318 {"n", "", false}, 319 {"aabab", "", false}, 320 {"ar", "", false}, 321 {"aar", "", false}, 322 {"XYXYXYZYXYz", "", false}, 323 {"missisSIPpi", "", false}, 324 {"mississipPI", "", false}, 325 {"xyxyxyxyz", "", false}, 326 {"miSsissippi", "", false}, 327 {"miSsissippi", "", false}, 328 {"abAbac", "", false}, 329 {"abAbac", "", false}, 330 {"aAazz", "", false}, 331 {"A12b12", "", false}, 332 {"a12B12", "", false}, 333 {"oWn", "", false}, 334 {"bLah", "", false}, 335 {"bLah", "", false}, 336 } 337 338 func identity(a, b rune) bool { 339 return a == b 340 } 341 342 func TestWildcardMatches(t *testing.T) { 343 t.Run("UnicodeWildcardMatcher (no optimization)", func(t *testing.T) { 344 for _, tc := range wildcardTestCases { 345 wildcard := newUnicodeWildcardMatcher(charset.Charset_utf8mb4{}, identity, nil, []byte(tc.pat), '?', '*', '\\') 346 match := wildcard.Match([]byte(tc.in)) 347 assert.Equal(t, tc.match, match, "wildcard(%q, %q) = %v (expected %v)", tc.in, tc.pat, match, tc.match) 348 349 } 350 }) 351 352 t.Run("EightbitWildcardMatcher (no optimization)", func(t *testing.T) { 353 for _, tc := range wildcardTestCases { 354 wildcard := newEightbitWildcardMatcher(&sortOrderIdentity, nil, []byte(tc.pat), '?', '*', '\\') 355 match := wildcard.Match([]byte(tc.in)) 356 assert.Equal(t, tc.match, match, "wildcard(%q, %q) = %v (expected %v)", tc.in, tc.pat, match, tc.match) 357 358 } 359 }) 360 361 testWildcardMatches(t, "utf8mb4_0900_bin", '?', '*', '\\', wildcardTestCases) 362 testWildcardMatches(t, "utf8mb4_0900_as_cs", '?', '*', '\\', wildcardTestCases) 363 } 364 365 func BenchmarkWildcardMatching(b *testing.B) { 366 type bench struct { 367 input []byte 368 m1 WildcardPattern 369 m2 WildcardPattern 370 } 371 372 var patterns []bench 373 for _, tc := range wildcardTestCases { 374 patterns = append(patterns, bench{ 375 input: []byte(tc.in), 376 m1: newUnicodeWildcardMatcher(charset.Charset_utf8mb4{}, identity, nil, []byte(tc.pat), '?', '*', '\\'), 377 m2: newEightbitWildcardMatcher(&sortOrderIdentity, nil, []byte(tc.pat), '?', '*', '\\'), 378 }) 379 } 380 381 b.Run("unicode", func(b *testing.B) { 382 b.ReportAllocs() 383 b.ResetTimer() 384 385 for n := 0; n < b.N; n++ { 386 for _, bb := range patterns { 387 _ = bb.m1.Match(bb.input) 388 } 389 } 390 }) 391 392 b.Run("8bit", func(b *testing.B) { 393 b.ReportAllocs() 394 b.ResetTimer() 395 396 for n := 0; n < b.N; n++ { 397 for _, bb := range patterns { 398 _ = bb.m2.Match(bb.input) 399 } 400 } 401 }) 402 }