github.com/x04/go/src@v0.0.0-20200202162449-3d481ceb3525/regexp/find_test.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package regexp 6 7 import ( 8 "github.com/x04/go/src/fmt" 9 "github.com/x04/go/src/strings" 10 "github.com/x04/go/src/testing" 11 ) 12 13 // For each pattern/text pair, what is the expected output of each function? 14 // We can derive the textual results from the indexed results, the non-submatch 15 // results from the submatched results, the single results from the 'all' results, 16 // and the byte results from the string results. Therefore the table includes 17 // only the FindAllStringSubmatchIndex result. 18 type FindTest struct { 19 pat string 20 text string 21 matches [][]int 22 } 23 24 func (t FindTest) String() string { 25 return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text) 26 } 27 28 var findTests = []FindTest{ 29 {``, ``, build(1, 0, 0)}, 30 {`^abcdefg`, "abcdefg", build(1, 0, 7)}, 31 {`a+`, "baaab", build(1, 1, 4)}, 32 {"abcd..", "abcdef", build(1, 0, 6)}, 33 {`a`, "a", build(1, 0, 1)}, 34 {`x`, "y", nil}, 35 {`b`, "abc", build(1, 1, 2)}, 36 {`.`, "a", build(1, 0, 1)}, 37 {`.*`, "abcdef", build(1, 0, 6)}, 38 {`^`, "abcde", build(1, 0, 0)}, 39 {`$`, "abcde", build(1, 5, 5)}, 40 {`^abcd$`, "abcd", build(1, 0, 4)}, 41 {`^bcd'`, "abcdef", nil}, 42 {`^abcd$`, "abcde", nil}, 43 {`a+`, "baaab", build(1, 1, 4)}, 44 {`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)}, 45 {`[a-z]+`, "abcd", build(1, 0, 4)}, 46 {`[^a-z]+`, "ab1234cd", build(1, 2, 6)}, 47 {`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)}, 48 {`[^\n]+`, "abcd\n", build(1, 0, 4)}, 49 {`[日本語]+`, "日本語日本語", build(1, 0, 18)}, 50 {`日本語+`, "日本語", build(1, 0, 9)}, 51 {`日本語+`, "日本語語語語", build(1, 0, 18)}, 52 {`()`, "", build(1, 0, 0, 0, 0)}, 53 {`(a)`, "a", build(1, 0, 1, 0, 1)}, 54 {`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)}, 55 {`(.*)`, "", build(1, 0, 0, 0, 0)}, 56 {`(.*)`, "abcd", build(1, 0, 4, 0, 4)}, 57 {`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)}, 58 {`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)}, 59 {`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)}, 60 {`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)}, 61 {`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)}, 62 {`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)}, 63 64 {`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)}, 65 {`(.*).*`, "ab", build(1, 0, 2, 0, 2)}, 66 {`[.]`, ".", build(1, 0, 1)}, 67 {`/$`, "/abc/", build(1, 4, 5)}, 68 {`/$`, "/abc", nil}, 69 70 // multiple matches 71 {`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)}, 72 {`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)}, 73 {`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)}, 74 {`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)}, 75 {`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)}, 76 77 // fixed bugs 78 {`ab$`, "cab", build(1, 1, 3)}, 79 {`axxb$`, "axxcb", nil}, 80 {`data`, "daXY data", build(1, 5, 9)}, 81 {`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)}, 82 {`zx+`, "zzx", build(1, 1, 3)}, 83 {`ab$`, "abcab", build(1, 3, 5)}, 84 {`(aa)*$`, "a", build(1, 1, 1, -1, -1)}, 85 {`(?:.|(?:.a))`, "", nil}, 86 {`(?:A(?:A|a))`, "Aa", build(1, 0, 2)}, 87 {`(?:A|(?:A|a))`, "a", build(1, 0, 1)}, 88 {`(a){0}`, "", build(1, 0, 0, -1, -1)}, 89 {`(?-s)(?:(?:^).)`, "\n", nil}, 90 {`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)}, 91 {`(?:(?:^).)`, "\n", nil}, 92 {`\b`, "x", build(2, 0, 0, 1, 1)}, 93 {`\b`, "xx", build(2, 0, 0, 2, 2)}, 94 {`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)}, 95 {`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)}, 96 {`\B`, "x", nil}, 97 {`\B`, "xx", build(1, 1, 1)}, 98 {`\B`, "x y", nil}, 99 {`\B`, "xx yy", build(2, 1, 1, 4, 4)}, 100 101 // RE2 tests 102 {`[^\S\s]`, "abcd", nil}, 103 {`[^\S[:space:]]`, "abcd", nil}, 104 {`[^\D\d]`, "abcd", nil}, 105 {`[^\D[:digit:]]`, "abcd", nil}, 106 {`(?i)\W`, "x", nil}, 107 {`(?i)\W`, "k", nil}, 108 {`(?i)\W`, "s", nil}, 109 110 // can backslash-escape any punctuation 111 {`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`, 112 `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)}, 113 {`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`, 114 `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)}, 115 {"\\`", "`", build(1, 0, 1)}, 116 {"[\\`]+", "`", build(1, 0, 1)}, 117 118 // long set of matches (longer than startSize) 119 { 120 ".", 121 "qwertyuiopasdfghjklzxcvbnm1234567890", 122 build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 123 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 124 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 125 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36), 126 }, 127 } 128 129 // build is a helper to construct a [][]int by extracting n sequences from x. 130 // This represents n matches with len(x)/n submatches each. 131 func build(n int, x ...int) [][]int { 132 ret := make([][]int, n) 133 runLength := len(x) / n 134 j := 0 135 for i := range ret { 136 ret[i] = make([]int, runLength) 137 copy(ret[i], x[j:]) 138 j += runLength 139 if j > len(x) { 140 panic("invalid build entry") 141 } 142 } 143 return ret 144 } 145 146 // First the simple cases. 147 148 func TestFind(t *testing.T) { 149 for _, test := range findTests { 150 re := MustCompile(test.pat) 151 if re.String() != test.pat { 152 t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat) 153 } 154 result := re.Find([]byte(test.text)) 155 switch { 156 case len(test.matches) == 0 && len(result) == 0: 157 // ok 158 case test.matches == nil && result != nil: 159 t.Errorf("expected no match; got one: %s", test) 160 case test.matches != nil && result == nil: 161 t.Errorf("expected match; got none: %s", test) 162 case test.matches != nil && result != nil: 163 expect := test.text[test.matches[0][0]:test.matches[0][1]] 164 if len(result) != cap(result) { 165 t.Errorf("expected capacity %d got %d: %s", len(result), cap(result), test) 166 } 167 if expect != string(result) { 168 t.Errorf("expected %q got %q: %s", expect, result, test) 169 } 170 } 171 } 172 } 173 174 func TestFindString(t *testing.T) { 175 for _, test := range findTests { 176 result := MustCompile(test.pat).FindString(test.text) 177 switch { 178 case len(test.matches) == 0 && len(result) == 0: 179 // ok 180 case test.matches == nil && result != "": 181 t.Errorf("expected no match; got one: %s", test) 182 case test.matches != nil && result == "": 183 // Tricky because an empty result has two meanings: no match or empty match. 184 if test.matches[0][0] != test.matches[0][1] { 185 t.Errorf("expected match; got none: %s", test) 186 } 187 case test.matches != nil && result != "": 188 expect := test.text[test.matches[0][0]:test.matches[0][1]] 189 if expect != result { 190 t.Errorf("expected %q got %q: %s", expect, result, test) 191 } 192 } 193 } 194 } 195 196 func testFindIndex(test *FindTest, result []int, t *testing.T) { 197 switch { 198 case len(test.matches) == 0 && len(result) == 0: 199 // ok 200 case test.matches == nil && result != nil: 201 t.Errorf("expected no match; got one: %s", test) 202 case test.matches != nil && result == nil: 203 t.Errorf("expected match; got none: %s", test) 204 case test.matches != nil && result != nil: 205 expect := test.matches[0] 206 if expect[0] != result[0] || expect[1] != result[1] { 207 t.Errorf("expected %v got %v: %s", expect, result, test) 208 } 209 } 210 } 211 212 func TestFindIndex(t *testing.T) { 213 for _, test := range findTests { 214 testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t) 215 } 216 } 217 218 func TestFindStringIndex(t *testing.T) { 219 for _, test := range findTests { 220 testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t) 221 } 222 } 223 224 func TestFindReaderIndex(t *testing.T) { 225 for _, test := range findTests { 226 testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t) 227 } 228 } 229 230 // Now come the simple All cases. 231 232 func TestFindAll(t *testing.T) { 233 for _, test := range findTests { 234 result := MustCompile(test.pat).FindAll([]byte(test.text), -1) 235 switch { 236 case test.matches == nil && result == nil: 237 // ok 238 case test.matches == nil && result != nil: 239 t.Errorf("expected no match; got one: %s", test) 240 case test.matches != nil && result == nil: 241 t.Fatalf("expected match; got none: %s", test) 242 case test.matches != nil && result != nil: 243 if len(test.matches) != len(result) { 244 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 245 continue 246 } 247 for k, e := range test.matches { 248 got := result[k] 249 if len(got) != cap(got) { 250 t.Errorf("match %d: expected capacity %d got %d: %s", k, len(got), cap(got), test) 251 } 252 expect := test.text[e[0]:e[1]] 253 if expect != string(got) { 254 t.Errorf("match %d: expected %q got %q: %s", k, expect, got, test) 255 } 256 } 257 } 258 } 259 } 260 261 func TestFindAllString(t *testing.T) { 262 for _, test := range findTests { 263 result := MustCompile(test.pat).FindAllString(test.text, -1) 264 switch { 265 case test.matches == nil && result == nil: 266 // ok 267 case test.matches == nil && result != nil: 268 t.Errorf("expected no match; got one: %s", test) 269 case test.matches != nil && result == nil: 270 t.Errorf("expected match; got none: %s", test) 271 case test.matches != nil && result != nil: 272 if len(test.matches) != len(result) { 273 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 274 continue 275 } 276 for k, e := range test.matches { 277 expect := test.text[e[0]:e[1]] 278 if expect != result[k] { 279 t.Errorf("expected %q got %q: %s", expect, result, test) 280 } 281 } 282 } 283 } 284 } 285 286 func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) { 287 switch { 288 case test.matches == nil && result == nil: 289 // ok 290 case test.matches == nil && result != nil: 291 t.Errorf("expected no match; got one: %s", test) 292 case test.matches != nil && result == nil: 293 t.Errorf("expected match; got none: %s", test) 294 case test.matches != nil && result != nil: 295 if len(test.matches) != len(result) { 296 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 297 return 298 } 299 for k, e := range test.matches { 300 if e[0] != result[k][0] || e[1] != result[k][1] { 301 t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test) 302 } 303 } 304 } 305 } 306 307 func TestFindAllIndex(t *testing.T) { 308 for _, test := range findTests { 309 testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t) 310 } 311 } 312 313 func TestFindAllStringIndex(t *testing.T) { 314 for _, test := range findTests { 315 testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t) 316 } 317 } 318 319 // Now come the Submatch cases. 320 321 func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) { 322 if len(submatches) != len(result)*2 { 323 t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test) 324 return 325 } 326 for k := 0; k < len(submatches); k += 2 { 327 if submatches[k] == -1 { 328 if result[k/2] != nil { 329 t.Errorf("match %d: expected nil got %q: %s", n, result, test) 330 } 331 continue 332 } 333 got := result[k/2] 334 if len(got) != cap(got) { 335 t.Errorf("match %d: expected capacity %d got %d: %s", n, len(got), cap(got), test) 336 return 337 } 338 expect := test.text[submatches[k]:submatches[k+1]] 339 if expect != string(got) { 340 t.Errorf("match %d: expected %q got %q: %s", n, expect, got, test) 341 return 342 } 343 } 344 } 345 346 func TestFindSubmatch(t *testing.T) { 347 for _, test := range findTests { 348 result := MustCompile(test.pat).FindSubmatch([]byte(test.text)) 349 switch { 350 case test.matches == nil && result == nil: 351 // ok 352 case test.matches == nil && result != nil: 353 t.Errorf("expected no match; got one: %s", test) 354 case test.matches != nil && result == nil: 355 t.Errorf("expected match; got none: %s", test) 356 case test.matches != nil && result != nil: 357 testSubmatchBytes(&test, 0, test.matches[0], result, t) 358 } 359 } 360 } 361 362 func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) { 363 if len(submatches) != len(result)*2 { 364 t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test) 365 return 366 } 367 for k := 0; k < len(submatches); k += 2 { 368 if submatches[k] == -1 { 369 if result[k/2] != "" { 370 t.Errorf("match %d: expected nil got %q: %s", n, result, test) 371 } 372 continue 373 } 374 expect := test.text[submatches[k]:submatches[k+1]] 375 if expect != result[k/2] { 376 t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test) 377 return 378 } 379 } 380 } 381 382 func TestFindStringSubmatch(t *testing.T) { 383 for _, test := range findTests { 384 result := MustCompile(test.pat).FindStringSubmatch(test.text) 385 switch { 386 case test.matches == nil && result == nil: 387 // ok 388 case test.matches == nil && result != nil: 389 t.Errorf("expected no match; got one: %s", test) 390 case test.matches != nil && result == nil: 391 t.Errorf("expected match; got none: %s", test) 392 case test.matches != nil && result != nil: 393 testSubmatchString(&test, 0, test.matches[0], result, t) 394 } 395 } 396 } 397 398 func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) { 399 if len(expect) != len(result) { 400 t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test) 401 return 402 } 403 for k, e := range expect { 404 if e != result[k] { 405 t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test) 406 } 407 } 408 } 409 410 func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) { 411 switch { 412 case test.matches == nil && result == nil: 413 // ok 414 case test.matches == nil && result != nil: 415 t.Errorf("expected no match; got one: %s", test) 416 case test.matches != nil && result == nil: 417 t.Errorf("expected match; got none: %s", test) 418 case test.matches != nil && result != nil: 419 testSubmatchIndices(test, 0, test.matches[0], result, t) 420 } 421 } 422 423 func TestFindSubmatchIndex(t *testing.T) { 424 for _, test := range findTests { 425 testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t) 426 } 427 } 428 429 func TestFindStringSubmatchIndex(t *testing.T) { 430 for _, test := range findTests { 431 testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t) 432 } 433 } 434 435 func TestFindReaderSubmatchIndex(t *testing.T) { 436 for _, test := range findTests { 437 testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t) 438 } 439 } 440 441 // Now come the monster AllSubmatch cases. 442 443 func TestFindAllSubmatch(t *testing.T) { 444 for _, test := range findTests { 445 result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1) 446 switch { 447 case test.matches == nil && result == nil: 448 // ok 449 case test.matches == nil && result != nil: 450 t.Errorf("expected no match; got one: %s", test) 451 case test.matches != nil && result == nil: 452 t.Errorf("expected match; got none: %s", test) 453 case len(test.matches) != len(result): 454 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 455 case test.matches != nil && result != nil: 456 for k, match := range test.matches { 457 testSubmatchBytes(&test, k, match, result[k], t) 458 } 459 } 460 } 461 } 462 463 func TestFindAllStringSubmatch(t *testing.T) { 464 for _, test := range findTests { 465 result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1) 466 switch { 467 case test.matches == nil && result == nil: 468 // ok 469 case test.matches == nil && result != nil: 470 t.Errorf("expected no match; got one: %s", test) 471 case test.matches != nil && result == nil: 472 t.Errorf("expected match; got none: %s", test) 473 case len(test.matches) != len(result): 474 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 475 case test.matches != nil && result != nil: 476 for k, match := range test.matches { 477 testSubmatchString(&test, k, match, result[k], t) 478 } 479 } 480 } 481 } 482 483 func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) { 484 switch { 485 case test.matches == nil && result == nil: 486 // ok 487 case test.matches == nil && result != nil: 488 t.Errorf("expected no match; got one: %s", test) 489 case test.matches != nil && result == nil: 490 t.Errorf("expected match; got none: %s", test) 491 case len(test.matches) != len(result): 492 t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) 493 case test.matches != nil && result != nil: 494 for k, match := range test.matches { 495 testSubmatchIndices(test, k, match, result[k], t) 496 } 497 } 498 } 499 500 func TestFindAllSubmatchIndex(t *testing.T) { 501 for _, test := range findTests { 502 testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t) 503 } 504 } 505 506 func TestFindAllStringSubmatchIndex(t *testing.T) { 507 for _, test := range findTests { 508 testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t) 509 } 510 }