github.com/code-reading/golang@v0.0.0-20220303082512-ba5bc0e589a3/go/src/regexp/find_test.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package regexp 6 7 import ( 8 "fmt" 9 "strings" 10 "testing" 11 ) 12 13 // For each pattern/text pair, what is the expected output of each function? 14 // We can derive the textual results from the indexed results, the non-submatch 15 // results from the submatched results, the single results from the 'all' results, 16 // and the byte results from the string results. Therefore the table includes 17 // only the FindAllStringSubmatchIndex result. 18 type FindTest struct { 19 pat string 20 text string 21 matches [][]int 22 } 23 24 func (t FindTest) String() string { 25 return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text) 26 } 27 28 var findTests = []FindTest{ 29 {``, ``, build(1, 0, 0)}, 30 {`^abcdefg`, "abcdefg", build(1, 0, 7)}, 31 {`a+`, "baaab", build(1, 1, 4)}, 32 {"abcd..", "abcdef", build(1, 0, 6)}, 33 {`a`, "a", build(1, 0, 1)}, 34 {`x`, "y", nil}, 35 {`b`, "abc", build(1, 1, 2)}, 36 {`.`, "a", build(1, 0, 1)}, 37 {`.*`, "abcdef", build(1, 0, 6)}, 38 {`^`, "abcde", build(1, 0, 0)}, 39 {`$`, "abcde", build(1, 5, 5)}, 40 {`^abcd$`, "abcd", build(1, 0, 4)}, 41 {`^bcd'`, "abcdef", nil}, 42 {`^abcd$`, "abcde", nil}, 43 {`a+`, "baaab", build(1, 1, 4)}, 44 {`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)}, 45 {`[a-z]+`, "abcd", build(1, 0, 4)}, 46 {`[^a-z]+`, "ab1234cd", build(1, 2, 6)}, 47 {`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)}, 48 {`[^\n]+`, "abcd\n", build(1, 0, 4)}, 49 {`[日本語]+`, "日本語日本語", build(1, 0, 18)}, 50 {`日本語+`, "日本語", build(1, 0, 9)}, 51 {`日本語+`, "日本語語語語", build(1, 0, 18)}, 52 {`()`, "", build(1, 0, 0, 0, 0)}, 53 {`(a)`, "a", build(1, 0, 1, 0, 1)}, 54 {`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)}, 55 {`(.*)`, "", build(1, 0, 0, 0, 0)}, 56 {`(.*)`, "abcd", build(1, 0, 4, 0, 4)}, 57 {`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)}, 58 {`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)}, 59 {`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)}, 60 {`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)}, 61 {`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)}, 62 {`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)}, 63 64 {`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)}, 65 {`(.*).*`, "ab", build(1, 0, 2, 0, 2)}, 66 {`[.]`, ".", build(1, 0, 1)}, 67 {`/$`, "/abc/", build(1, 4, 5)}, 68 {`/$`, "/abc", nil}, 69 70 // multiple matches 71 {`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)}, 72 {`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)}, 73 {`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)}, 74 {`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)}, 75 {`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)}, 76 77 // fixed bugs 78 {`ab$`, "cab", build(1, 1, 3)}, 79 {`axxb$`, "axxcb", nil}, 80 {`data`, "daXY data", build(1, 5, 9)}, 81 {`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)}, 82 {`zx+`, "zzx", build(1, 1, 3)}, 83 {`ab$`, "abcab", build(1, 3, 5)}, 84 {`(aa)*$`, "a", build(1, 1, 1, -1, -1)}, 85 {`(?:.|(?:.a))`, "", nil}, 86 {`(?:A(?:A|a))`, "Aa", build(1, 0, 2)}, 87 {`(?:A|(?:A|a))`, "a", build(1, 0, 1)}, 88 {`(a){0}`, "", build(1, 0, 0, -1, -1)}, 89 {`(?-s)(?:(?:^).)`, "\n", nil}, 90 {`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)}, 91 {`(?:(?:^).)`, "\n", nil}, 92 {`\b`, "x", build(2, 0, 0, 1, 1)}, 93 {`\b`, "xx", build(2, 0, 0, 2, 2)}, 94 {`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)}, 95 {`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)}, 96 {`\B`, "x", nil}, 97 {`\B`, "xx", build(1, 1, 1)}, 98 {`\B`, "x y", nil}, 99 {`\B`, "xx yy", build(2, 1, 1, 4, 4)}, 100 {`(|a)*`, "aa", build(3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2)}, 101 102 // RE2 tests 103 {`[^\S\s]`, "abcd", nil}, 104 {`[^\S[:space:]]`, "abcd", nil}, 105 {`[^\D\d]`, "abcd", nil}, 106 {`[^\D[:digit:]]`, "abcd", nil}, 107 {`(?i)\W`, "x", nil}, 108 {`(?i)\W`, "k", nil}, 109 {`(?i)\W`, "s", nil}, 110 111 // can backslash-escape any punctuation 112 {`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`, 113 `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)}, 114 {`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`, 115 `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)}, 116 {"\\`", "`", build(1, 0, 1)}, 117 {"[\\`]+", "`", build(1, 0, 1)}, 118 119 // long set of matches (longer than startSize) 120 { 121 ".", 122 "qwertyuiopasdfghjklzxcvbnm1234567890", 123 build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 124 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 125 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 126 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36), 127 }, 128 } 129 130 // build is a helper to construct a [][]int by extracting n sequences from x. 131 // This represents n matches with len(x)/n submatches each. 132 func build(n int, x ...int) [][]int { 133 ret := make([][]int, n) 134 runLength := len(x) / n 135 j := 0 136 for i := range ret { 137 ret[i] = make([]int, runLength) 138 copy(ret[i], x[j:]) 139 j += runLength 140 if j > len(x) { 141 panic("invalid build entry") 142 } 143 } 144 return ret 145 } 146 147 // First the simple cases. 148 149 func TestFind(t *testing.T) { 150 for _, test := range findTests { 151 re := MustCompile(test.pat) 152 if re.String() != test.pat { 153 t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat) 154 } 155 result := re.Find([]byte(test.text)) 156 switch { 157 case len(test.matches) == 0 && len(result) == 0: 158 // ok 159 case test.matches == nil && result != nil: 160 t.Errorf("expected no match; got one: %s", test) 161 case test.matches != nil && result == nil: 162 t.Errorf("expected match; got none: %s", test) 163 case test.matches != nil && result != nil: 164 expect := test.text[test.matches[0][0]:test.matches[0][1]] 165 if len(result) != cap(result) { 166 t.Errorf("expected capacity %d got %d: %s", len(result), cap(result), test) 167 } 168 if expect != string(result) { 169 t.Errorf("expected %q got %q: %s", expect, result, test) 170 } 171 } 172 } 173 } 174 175 func TestFindString(t *testing.T) { 176 for _, test := range findTests { 177 result := MustCompile(test.pat).FindString(test.text) 178 switch { 179 case len(test.matches) == 0 && len(result) == 0: 180 // ok 181 case test.matches == nil && result != "": 182 t.Errorf("expected no match; got one: %s", test) 183 case test.matches != nil && result == "": 184 // Tricky because an empty result has two meanings: no match or empty match. 185 if test.matches[0][0] != test.matches[0][1] { 186 t.Errorf("expected match; got none: %s", test) 187 } 188 case test.matches != nil && result != "": 189 expect := test.text[test.matches[0][0]:test.matches[0][1]] 190 if expect != result { 191 t.Errorf("expected %q got %q: %s", expect, result, test) 192 } 193 } 194 } 195 } 196 197 func testFindIndex(test *FindTest, result []int, t *testing.T) { 198 switch { 199 case len(test.matches) == 0 && len(result) == 0: 200 // ok 201 case test.matches == nil && result != nil: 202 t.Errorf("expected no match; got one: %s", test) 203 case test.matches != nil && result == nil: 204 t.Errorf("expected match; got none: %s", test) 205 case test.matches != nil && result != nil: 206 expect := test.matches[0] 207 if expect[0] != result[0] || expect[1] != result[1] { 208 t.Errorf("expected %v got %v: %s", expect, result, test) 209 } 210 } 211 } 212 213 func TestFindIndex(t *testing.T) { 214 for _, test := range findTests { 215 testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t) 216 } 217 } 218 219 func TestFindStringIndex(t *testing.T) { 220 for _, test := range findTests { 221 testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t) 222 } 223 } 224 225 func TestFindReaderIndex(t *testing.T) { 226 for _, test := range findTests { 227 testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t) 228 } 229 } 230 231 // Now come the simple All cases. 232 233 func TestFindAll(t *testing.T) { 234 for _, test := range findTests { 235 result := MustCompile(test.pat).FindAll([]byte(test.text), -1) 236 switch { 237 case test.matches == nil && result == nil: 238 // ok 239 case test.matches == nil && result != nil: 240 t.Errorf("expected no match; got one: %s", test) 241 case test.matches != nil && result == nil: 242 t.Fatalf("expected match; got none: %s", test) 243 case test.matches != nil && result != nil: 244 if len(test.matches) != len(result) { 245 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 246 continue 247 } 248 for k, e := range test.matches { 249 got := result[k] 250 if len(got) != cap(got) { 251 t.Errorf("match %d: expected capacity %d got %d: %s", k, len(got), cap(got), test) 252 } 253 expect := test.text[e[0]:e[1]] 254 if expect != string(got) { 255 t.Errorf("match %d: expected %q got %q: %s", k, expect, got, test) 256 } 257 } 258 } 259 } 260 } 261 262 func TestFindAllString(t *testing.T) { 263 for _, test := range findTests { 264 result := MustCompile(test.pat).FindAllString(test.text, -1) 265 switch { 266 case test.matches == nil && result == nil: 267 // ok 268 case test.matches == nil && result != nil: 269 t.Errorf("expected no match; got one: %s", test) 270 case test.matches != nil && result == nil: 271 t.Errorf("expected match; got none: %s", test) 272 case test.matches != nil && result != nil: 273 if len(test.matches) != len(result) { 274 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 275 continue 276 } 277 for k, e := range test.matches { 278 expect := test.text[e[0]:e[1]] 279 if expect != result[k] { 280 t.Errorf("expected %q got %q: %s", expect, result, test) 281 } 282 } 283 } 284 } 285 } 286 287 func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) { 288 switch { 289 case test.matches == nil && result == nil: 290 // ok 291 case test.matches == nil && result != nil: 292 t.Errorf("expected no match; got one: %s", test) 293 case test.matches != nil && result == nil: 294 t.Errorf("expected match; got none: %s", test) 295 case test.matches != nil && result != nil: 296 if len(test.matches) != len(result) { 297 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 298 return 299 } 300 for k, e := range test.matches { 301 if e[0] != result[k][0] || e[1] != result[k][1] { 302 t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test) 303 } 304 } 305 } 306 } 307 308 func TestFindAllIndex(t *testing.T) { 309 for _, test := range findTests { 310 testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t) 311 } 312 } 313 314 func TestFindAllStringIndex(t *testing.T) { 315 for _, test := range findTests { 316 testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t) 317 } 318 } 319 320 // Now come the Submatch cases. 321 322 func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) { 323 if len(submatches) != len(result)*2 { 324 t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test) 325 return 326 } 327 for k := 0; k < len(submatches); k += 2 { 328 if submatches[k] == -1 { 329 if result[k/2] != nil { 330 t.Errorf("match %d: expected nil got %q: %s", n, result, test) 331 } 332 continue 333 } 334 got := result[k/2] 335 if len(got) != cap(got) { 336 t.Errorf("match %d: expected capacity %d got %d: %s", n, len(got), cap(got), test) 337 return 338 } 339 expect := test.text[submatches[k]:submatches[k+1]] 340 if expect != string(got) { 341 t.Errorf("match %d: expected %q got %q: %s", n, expect, got, test) 342 return 343 } 344 } 345 } 346 347 func TestFindSubmatch(t *testing.T) { 348 for _, test := range findTests { 349 result := MustCompile(test.pat).FindSubmatch([]byte(test.text)) 350 switch { 351 case test.matches == nil && result == nil: 352 // ok 353 case test.matches == nil && result != nil: 354 t.Errorf("expected no match; got one: %s", test) 355 case test.matches != nil && result == nil: 356 t.Errorf("expected match; got none: %s", test) 357 case test.matches != nil && result != nil: 358 testSubmatchBytes(&test, 0, test.matches[0], result, t) 359 } 360 } 361 } 362 363 func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) { 364 if len(submatches) != len(result)*2 { 365 t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test) 366 return 367 } 368 for k := 0; k < len(submatches); k += 2 { 369 if submatches[k] == -1 { 370 if result[k/2] != "" { 371 t.Errorf("match %d: expected nil got %q: %s", n, result, test) 372 } 373 continue 374 } 375 expect := test.text[submatches[k]:submatches[k+1]] 376 if expect != result[k/2] { 377 t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test) 378 return 379 } 380 } 381 } 382 383 func TestFindStringSubmatch(t *testing.T) { 384 for _, test := range findTests { 385 result := MustCompile(test.pat).FindStringSubmatch(test.text) 386 switch { 387 case test.matches == nil && result == nil: 388 // ok 389 case test.matches == nil && result != nil: 390 t.Errorf("expected no match; got one: %s", test) 391 case test.matches != nil && result == nil: 392 t.Errorf("expected match; got none: %s", test) 393 case test.matches != nil && result != nil: 394 testSubmatchString(&test, 0, test.matches[0], result, t) 395 } 396 } 397 } 398 399 func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) { 400 if len(expect) != len(result) { 401 t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test) 402 return 403 } 404 for k, e := range expect { 405 if e != result[k] { 406 t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test) 407 } 408 } 409 } 410 411 func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) { 412 switch { 413 case test.matches == nil && result == nil: 414 // ok 415 case test.matches == nil && result != nil: 416 t.Errorf("expected no match; got one: %s", test) 417 case test.matches != nil && result == nil: 418 t.Errorf("expected match; got none: %s", test) 419 case test.matches != nil && result != nil: 420 testSubmatchIndices(test, 0, test.matches[0], result, t) 421 } 422 } 423 424 func TestFindSubmatchIndex(t *testing.T) { 425 for _, test := range findTests { 426 testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t) 427 } 428 } 429 430 func TestFindStringSubmatchIndex(t *testing.T) { 431 for _, test := range findTests { 432 testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t) 433 } 434 } 435 436 func TestFindReaderSubmatchIndex(t *testing.T) { 437 for _, test := range findTests { 438 testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t) 439 } 440 } 441 442 // Now come the monster AllSubmatch cases. 443 444 func TestFindAllSubmatch(t *testing.T) { 445 for _, test := range findTests { 446 result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1) 447 switch { 448 case test.matches == nil && result == nil: 449 // ok 450 case test.matches == nil && result != nil: 451 t.Errorf("expected no match; got one: %s", test) 452 case test.matches != nil && result == nil: 453 t.Errorf("expected match; got none: %s", test) 454 case len(test.matches) != len(result): 455 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 456 case test.matches != nil && result != nil: 457 for k, match := range test.matches { 458 testSubmatchBytes(&test, k, match, result[k], t) 459 } 460 } 461 } 462 } 463 464 func TestFindAllStringSubmatch(t *testing.T) { 465 for _, test := range findTests { 466 result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1) 467 switch { 468 case test.matches == nil && result == nil: 469 // ok 470 case test.matches == nil && result != nil: 471 t.Errorf("expected no match; got one: %s", test) 472 case test.matches != nil && result == nil: 473 t.Errorf("expected match; got none: %s", test) 474 case len(test.matches) != len(result): 475 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 476 case test.matches != nil && result != nil: 477 for k, match := range test.matches { 478 testSubmatchString(&test, k, match, result[k], t) 479 } 480 } 481 } 482 } 483 484 func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) { 485 switch { 486 case test.matches == nil && result == nil: 487 // ok 488 case test.matches == nil && result != nil: 489 t.Errorf("expected no match; got one: %s", test) 490 case test.matches != nil && result == nil: 491 t.Errorf("expected match; got none: %s", test) 492 case len(test.matches) != len(result): 493 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 494 case test.matches != nil && result != nil: 495 for k, match := range test.matches { 496 testSubmatchIndices(test, k, match, result[k], t) 497 } 498 } 499 } 500 501 func TestFindAllSubmatchIndex(t *testing.T) { 502 for _, test := range findTests { 503 testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t) 504 } 505 } 506 507 func TestFindAllStringSubmatchIndex(t *testing.T) { 508 for _, test := range findTests { 509 testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t) 510 } 511 }