github.com/gnolang/gno@v0.0.0-20240520182011-228e9d0192ce/gnovm/stdlibs/regexp/find_test.gno (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package regexp
     6  
     7  import (
     8  	"fmt"
     9  	"strings"
    10  	"testing"
    11  )
    12  
    13  // For each pattern/text pair, what is the expected output of each function?
    14  // We can derive the textual results from the indexed results, the non-submatch
    15  // results from the submatched results, the single results from the 'all' results,
    16  // and the byte results from the string results. Therefore the table includes
    17  // only the FindAllStringSubmatchIndex result.
    18  type FindTest struct {
    19  	pat     string
    20  	text    string
    21  	matches [][]int
    22  }
    23  
    24  func (t FindTest) String() string {
    25  	return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text)
    26  }
    27  
    28  var findTests = []FindTest{
    29  	{``, ``, build(1, 0, 0)},
    30  	{`^abcdefg`, "abcdefg", build(1, 0, 7)},
    31  	{`a+`, "baaab", build(1, 1, 4)},
    32  	{"abcd..", "abcdef", build(1, 0, 6)},
    33  	{`a`, "a", build(1, 0, 1)},
    34  	{`x`, "y", nil},
    35  	{`b`, "abc", build(1, 1, 2)},
    36  	{`.`, "a", build(1, 0, 1)},
    37  	{`.*`, "abcdef", build(1, 0, 6)},
    38  	{`^`, "abcde", build(1, 0, 0)},
    39  	{`$`, "abcde", build(1, 5, 5)},
    40  	{`^abcd$`, "abcd", build(1, 0, 4)},
    41  	{`^bcd'`, "abcdef", nil},
    42  	{`^abcd$`, "abcde", nil},
    43  	{`a+`, "baaab", build(1, 1, 4)},
    44  	{`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)},
    45  	{`[a-z]+`, "abcd", build(1, 0, 4)},
    46  	{`[^a-z]+`, "ab1234cd", build(1, 2, 6)},
    47  	{`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)},
    48  	{`[^\n]+`, "abcd\n", build(1, 0, 4)},
    49  	{`[日本語]+`, "日本語日本語", build(1, 0, 18)},
    50  	{`日本語+`, "日本語", build(1, 0, 9)},
    51  	{`日本語+`, "日本語語語語", build(1, 0, 18)},
    52  	{`()`, "", build(1, 0, 0, 0, 0)},
    53  	{`(a)`, "a", build(1, 0, 1, 0, 1)},
    54  	{`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)},
    55  	{`(.*)`, "", build(1, 0, 0, 0, 0)},
    56  	{`(.*)`, "abcd", build(1, 0, 4, 0, 4)},
    57  	{`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)},
    58  	{`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
    59  	{`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
    60  	{`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
    61  	{`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)},
    62  	{`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)},
    63  
    64  	{`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
    65  	{`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
    66  	{`[.]`, ".", build(1, 0, 1)},
    67  	{`/$`, "/abc/", build(1, 4, 5)},
    68  	{`/$`, "/abc", nil},
    69  
    70  	// multiple matches
    71  	{`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)},
    72  	{`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)},
    73  	{`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)},
    74  	{`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)},
    75  	{`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)},
    76  
    77  	// fixed bugs
    78  	{`ab$`, "cab", build(1, 1, 3)},
    79  	{`axxb$`, "axxcb", nil},
    80  	{`data`, "daXY data", build(1, 5, 9)},
    81  	{`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
    82  	{`zx+`, "zzx", build(1, 1, 3)},
    83  	{`ab$`, "abcab", build(1, 3, 5)},
    84  	{`(aa)*$`, "a", build(1, 1, 1, -1, -1)},
    85  	{`(?:.|(?:.a))`, "", nil},
    86  	{`(?:A(?:A|a))`, "Aa", build(1, 0, 2)},
    87  	{`(?:A|(?:A|a))`, "a", build(1, 0, 1)},
    88  	{`(a){0}`, "", build(1, 0, 0, -1, -1)},
    89  	{`(?-s)(?:(?:^).)`, "\n", nil},
    90  	{`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)},
    91  	{`(?:(?:^).)`, "\n", nil},
    92  	{`\b`, "x", build(2, 0, 0, 1, 1)},
    93  	{`\b`, "xx", build(2, 0, 0, 2, 2)},
    94  	{`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)},
    95  	{`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)},
    96  	{`\B`, "x", nil},
    97  	{`\B`, "xx", build(1, 1, 1)},
    98  	{`\B`, "x y", nil},
    99  	{`\B`, "xx yy", build(2, 1, 1, 4, 4)},
   100  	{`(|a)*`, "aa", build(3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2)},
   101  
   102  	// RE2 tests
   103  	{`[^\S\s]`, "abcd", nil},
   104  	{`[^\S[:space:]]`, "abcd", nil},
   105  	{`[^\D\d]`, "abcd", nil},
   106  	{`[^\D[:digit:]]`, "abcd", nil},
   107  	{`(?i)\W`, "x", nil},
   108  	{`(?i)\W`, "k", nil},
   109  	{`(?i)\W`, "s", nil},
   110  
   111  	// can backslash-escape any punctuation
   112  	{
   113  		`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
   114  		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31),
   115  	},
   116  	{
   117  		`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
   118  		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31),
   119  	},
   120  	{"\\`", "`", build(1, 0, 1)},
   121  	{"[\\`]+", "`", build(1, 0, 1)},
   122  
   123  	// long set of matches (longer than startSize)
   124  	{
   125  		".",
   126  		"qwertyuiopasdfghjklzxcvbnm1234567890",
   127  		build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
   128  			10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20,
   129  			20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30,
   130  			30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
   131  	},
   132  }
   133  
   134  // build is a helper to construct a [][]int by extracting n sequences from x.
   135  // This represents n matches with len(x)/n submatches each.
   136  func build(n int, x ...int) [][]int {
   137  	ret := make([][]int, n)
   138  	runLength := len(x) / n
   139  	j := 0
   140  	for i := range ret {
   141  		ret[i] = make([]int, runLength)
   142  		copy(ret[i], x[j:])
   143  		j += runLength
   144  		if j > len(x) {
   145  			panic("invalid build entry")
   146  		}
   147  	}
   148  	return ret
   149  }
   150  
   151  // First the simple cases.
   152  
   153  func TestFind(t *testing.T) {
   154  	for _, test := range findTests {
   155  		re := MustCompile(test.pat)
   156  		if re.String() != test.pat {
   157  			t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
   158  		}
   159  		result := re.Find([]byte(test.text))
   160  		switch {
   161  		case len(test.matches) == 0 && len(result) == 0:
   162  			// ok
   163  		case test.matches == nil && result != nil:
   164  			t.Errorf("expected no match; got one: %s", test)
   165  		case test.matches != nil && result == nil:
   166  			t.Errorf("expected match; got none: %s", test)
   167  		case test.matches != nil && result != nil:
   168  			expect := test.text[test.matches[0][0]:test.matches[0][1]]
   169  			if len(result) != cap(result) {
   170  				t.Errorf("expected capacity %d got %d: %s", len(result), cap(result), test)
   171  			}
   172  			if expect != string(result) {
   173  				t.Errorf("expected %q got %q: %s", expect, result, test)
   174  			}
   175  		}
   176  	}
   177  }
   178  
   179  func TestFindString(t *testing.T) {
   180  	for _, test := range findTests {
   181  		result := MustCompile(test.pat).FindString(test.text)
   182  		switch {
   183  		case len(test.matches) == 0 && len(result) == 0:
   184  			// ok
   185  		case test.matches == nil && result != "":
   186  			t.Errorf("expected no match; got one: %s", test)
   187  		case test.matches != nil && result == "":
   188  			// Tricky because an empty result has two meanings: no match or empty match.
   189  			if test.matches[0][0] != test.matches[0][1] {
   190  				t.Errorf("expected match; got none: %s", test)
   191  			}
   192  		case test.matches != nil && result != "":
   193  			expect := test.text[test.matches[0][0]:test.matches[0][1]]
   194  			if expect != result {
   195  				t.Errorf("expected %q got %q: %s", expect, result, test)
   196  			}
   197  		}
   198  	}
   199  }
   200  
   201  func testFindIndex(test *FindTest, result []int, t *testing.T) {
   202  	switch {
   203  	case len(test.matches) == 0 && len(result) == 0:
   204  		// ok
   205  	case test.matches == nil && result != nil:
   206  		t.Errorf("expected no match; got one: %s", test)
   207  	case test.matches != nil && result == nil:
   208  		t.Errorf("expected match; got none: %s", test)
   209  	case test.matches != nil && result != nil:
   210  		expect := test.matches[0]
   211  		if expect[0] != result[0] || expect[1] != result[1] {
   212  			t.Errorf("expected %v got %v: %s", expect, result, test)
   213  		}
   214  	}
   215  }
   216  
   217  func TestFindIndex(t *testing.T) {
   218  	for _, test := range findTests {
   219  		testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t)
   220  	}
   221  }
   222  
   223  func TestFindStringIndex(t *testing.T) {
   224  	for _, test := range findTests {
   225  		testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t)
   226  	}
   227  }
   228  
   229  func TestFindReaderIndex(t *testing.T) {
   230  	for _, test := range findTests {
   231  		testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t)
   232  	}
   233  }
   234  
   235  // Now come the simple All cases.
   236  
   237  func TestFindAll(t *testing.T) {
   238  	for _, test := range findTests {
   239  		result := MustCompile(test.pat).FindAll([]byte(test.text), -1)
   240  		switch {
   241  		case test.matches == nil && result == nil:
   242  			// ok
   243  		case test.matches == nil && result != nil:
   244  			t.Errorf("expected no match; got one: %s", test)
   245  		case test.matches != nil && result == nil:
   246  			t.Fatalf("expected match; got none: %s", test)
   247  		case test.matches != nil && result != nil:
   248  			if len(test.matches) != len(result) {
   249  				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   250  				continue
   251  			}
   252  			for k, e := range test.matches {
   253  				got := result[k]
   254  				if len(got) != cap(got) {
   255  					t.Errorf("match %d: expected capacity %d got %d: %s", k, len(got), cap(got), test)
   256  				}
   257  				expect := test.text[e[0]:e[1]]
   258  				if expect != string(got) {
   259  					t.Errorf("match %d: expected %q got %q: %s", k, expect, got, test)
   260  				}
   261  			}
   262  		}
   263  	}
   264  }
   265  
   266  func TestFindAllString(t *testing.T) {
   267  	for _, test := range findTests {
   268  		result := MustCompile(test.pat).FindAllString(test.text, -1)
   269  		switch {
   270  		case test.matches == nil && result == nil:
   271  			// ok
   272  		case test.matches == nil && result != nil:
   273  			t.Errorf("expected no match; got one: %s", test)
   274  		case test.matches != nil && result == nil:
   275  			t.Errorf("expected match; got none: %s", test)
   276  		case test.matches != nil && result != nil:
   277  			if len(test.matches) != len(result) {
   278  				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   279  				continue
   280  			}
   281  			for k, e := range test.matches {
   282  				expect := test.text[e[0]:e[1]]
   283  				if expect != result[k] {
   284  					t.Errorf("expected %q got %q: %s", expect, result, test)
   285  				}
   286  			}
   287  		}
   288  	}
   289  }
   290  
   291  func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) {
   292  	switch {
   293  	case test.matches == nil && result == nil:
   294  		// ok
   295  	case test.matches == nil && result != nil:
   296  		t.Errorf("expected no match; got one: %s", test)
   297  	case test.matches != nil && result == nil:
   298  		t.Errorf("expected match; got none: %s", test)
   299  	case test.matches != nil && result != nil:
   300  		if len(test.matches) != len(result) {
   301  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   302  			return
   303  		}
   304  		for k, e := range test.matches {
   305  			if e[0] != result[k][0] || e[1] != result[k][1] {
   306  				t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test)
   307  			}
   308  		}
   309  	}
   310  }
   311  
   312  func TestFindAllIndex(t *testing.T) {
   313  	for _, test := range findTests {
   314  		testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t)
   315  	}
   316  }
   317  
   318  func TestFindAllStringIndex(t *testing.T) {
   319  	for _, test := range findTests {
   320  		testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t)
   321  	}
   322  }
   323  
   324  // Now come the Submatch cases.
   325  
   326  func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) {
   327  	if len(submatches) != len(result)*2 {
   328  		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
   329  		return
   330  	}
   331  	for k := 0; k < len(submatches); k += 2 {
   332  		if submatches[k] == -1 {
   333  			if result[k/2] != nil {
   334  				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
   335  			}
   336  			continue
   337  		}
   338  		got := result[k/2]
   339  		if len(got) != cap(got) {
   340  			t.Errorf("match %d: expected capacity %d got %d: %s", n, len(got), cap(got), test)
   341  			return
   342  		}
   343  		expect := test.text[submatches[k]:submatches[k+1]]
   344  		if expect != string(got) {
   345  			t.Errorf("match %d: expected %q got %q: %s", n, expect, got, test)
   346  			return
   347  		}
   348  	}
   349  }
   350  
   351  func TestFindSubmatch(t *testing.T) {
   352  	for _, test := range findTests {
   353  		result := MustCompile(test.pat).FindSubmatch([]byte(test.text))
   354  		switch {
   355  		case test.matches == nil && result == nil:
   356  			// ok
   357  		case test.matches == nil && result != nil:
   358  			t.Errorf("expected no match; got one: %s", test)
   359  		case test.matches != nil && result == nil:
   360  			t.Errorf("expected match; got none: %s", test)
   361  		case test.matches != nil && result != nil:
   362  			testSubmatchBytes(&test, 0, test.matches[0], result, t)
   363  		}
   364  	}
   365  }
   366  
   367  func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) {
   368  	if len(submatches) != len(result)*2 {
   369  		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
   370  		return
   371  	}
   372  	for k := 0; k < len(submatches); k += 2 {
   373  		if submatches[k] == -1 {
   374  			if result[k/2] != "" {
   375  				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
   376  			}
   377  			continue
   378  		}
   379  		expect := test.text[submatches[k]:submatches[k+1]]
   380  		if expect != result[k/2] {
   381  			t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
   382  			return
   383  		}
   384  	}
   385  }
   386  
   387  func TestFindStringSubmatch(t *testing.T) {
   388  	for _, test := range findTests {
   389  		result := MustCompile(test.pat).FindStringSubmatch(test.text)
   390  		switch {
   391  		case test.matches == nil && result == nil:
   392  			// ok
   393  		case test.matches == nil && result != nil:
   394  			t.Errorf("expected no match; got one: %s", test)
   395  		case test.matches != nil && result == nil:
   396  			t.Errorf("expected match; got none: %s", test)
   397  		case test.matches != nil && result != nil:
   398  			testSubmatchString(&test, 0, test.matches[0], result, t)
   399  		}
   400  	}
   401  }
   402  
   403  func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) {
   404  	if len(expect) != len(result) {
   405  		t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test)
   406  		return
   407  	}
   408  	for k, e := range expect {
   409  		if e != result[k] {
   410  			t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test)
   411  		}
   412  	}
   413  }
   414  
   415  func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) {
   416  	switch {
   417  	case test.matches == nil && result == nil:
   418  		// ok
   419  	case test.matches == nil && result != nil:
   420  		t.Errorf("expected no match; got one: %s", test)
   421  	case test.matches != nil && result == nil:
   422  		t.Errorf("expected match; got none: %s", test)
   423  	case test.matches != nil && result != nil:
   424  		testSubmatchIndices(test, 0, test.matches[0], result, t)
   425  	}
   426  }
   427  
   428  func TestFindSubmatchIndex(t *testing.T) {
   429  	for _, test := range findTests {
   430  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t)
   431  	}
   432  }
   433  
   434  func TestFindStringSubmatchIndex(t *testing.T) {
   435  	for _, test := range findTests {
   436  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t)
   437  	}
   438  }
   439  
   440  func TestFindReaderSubmatchIndex(t *testing.T) {
   441  	for _, test := range findTests {
   442  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t)
   443  	}
   444  }
   445  
   446  // Now come the monster AllSubmatch cases.
   447  
   448  func TestFindAllSubmatch(t *testing.T) {
   449  	for _, test := range findTests {
   450  		result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1)
   451  		switch {
   452  		case test.matches == nil && result == nil:
   453  			// ok
   454  		case test.matches == nil && result != nil:
   455  			t.Errorf("expected no match; got one: %s", test)
   456  		case test.matches != nil && result == nil:
   457  			t.Errorf("expected match; got none: %s", test)
   458  		case len(test.matches) != len(result):
   459  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   460  		case test.matches != nil && result != nil:
   461  			for k, match := range test.matches {
   462  				testSubmatchBytes(&test, k, match, result[k], t)
   463  			}
   464  		}
   465  	}
   466  }
   467  
   468  func TestFindAllStringSubmatch(t *testing.T) {
   469  	for _, test := range findTests {
   470  		result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1)
   471  		switch {
   472  		case test.matches == nil && result == nil:
   473  			// ok
   474  		case test.matches == nil && result != nil:
   475  			t.Errorf("expected no match; got one: %s", test)
   476  		case test.matches != nil && result == nil:
   477  			t.Errorf("expected match; got none: %s", test)
   478  		case len(test.matches) != len(result):
   479  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   480  		case test.matches != nil && result != nil:
   481  			for k, match := range test.matches {
   482  				testSubmatchString(&test, k, match, result[k], t)
   483  			}
   484  		}
   485  	}
   486  }
   487  
   488  func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) {
   489  	switch {
   490  	case test.matches == nil && result == nil:
   491  		// ok
   492  	case test.matches == nil && result != nil:
   493  		t.Errorf("expected no match; got one: %s", test)
   494  	case test.matches != nil && result == nil:
   495  		t.Errorf("expected match; got none: %s", test)
   496  	case len(test.matches) != len(result):
   497  		t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   498  	case test.matches != nil && result != nil:
   499  		for k, match := range test.matches {
   500  			testSubmatchIndices(test, k, match, result[k], t)
   501  		}
   502  	}
   503  }
   504  
   505  func TestFindAllSubmatchIndex(t *testing.T) {
   506  	for _, test := range findTests {
   507  		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t)
   508  	}
   509  }
   510  
   511  func TestFindAllStringSubmatchIndex(t *testing.T) {
   512  	for _, test := range findTests {
   513  		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t)
   514  	}
   515  }