github.com/x04/go/src@v0.0.0-20200202162449-3d481ceb3525/regexp/find_test.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package regexp
     6  
     7  import (
     8  	"github.com/x04/go/src/fmt"
     9  	"github.com/x04/go/src/strings"
    10  	"github.com/x04/go/src/testing"
    11  )
    12  
    13  // For each pattern/text pair, what is the expected output of each function?
    14  // We can derive the textual results from the indexed results, the non-submatch
    15  // results from the submatched results, the single results from the 'all' results,
    16  // and the byte results from the string results. Therefore the table includes
    17  // only the FindAllStringSubmatchIndex result.
    18  type FindTest struct {
    19  	pat	string
    20  	text	string
    21  	matches	[][]int
    22  }
    23  
    24  func (t FindTest) String() string {
    25  	return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text)
    26  }
    27  
    28  var findTests = []FindTest{
    29  	{``, ``, build(1, 0, 0)},
    30  	{`^abcdefg`, "abcdefg", build(1, 0, 7)},
    31  	{`a+`, "baaab", build(1, 1, 4)},
    32  	{"abcd..", "abcdef", build(1, 0, 6)},
    33  	{`a`, "a", build(1, 0, 1)},
    34  	{`x`, "y", nil},
    35  	{`b`, "abc", build(1, 1, 2)},
    36  	{`.`, "a", build(1, 0, 1)},
    37  	{`.*`, "abcdef", build(1, 0, 6)},
    38  	{`^`, "abcde", build(1, 0, 0)},
    39  	{`$`, "abcde", build(1, 5, 5)},
    40  	{`^abcd$`, "abcd", build(1, 0, 4)},
    41  	{`^bcd'`, "abcdef", nil},
    42  	{`^abcd$`, "abcde", nil},
    43  	{`a+`, "baaab", build(1, 1, 4)},
    44  	{`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)},
    45  	{`[a-z]+`, "abcd", build(1, 0, 4)},
    46  	{`[^a-z]+`, "ab1234cd", build(1, 2, 6)},
    47  	{`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)},
    48  	{`[^\n]+`, "abcd\n", build(1, 0, 4)},
    49  	{`[日本語]+`, "日本語日本語", build(1, 0, 18)},
    50  	{`日本語+`, "日本語", build(1, 0, 9)},
    51  	{`日本語+`, "日本語語語語", build(1, 0, 18)},
    52  	{`()`, "", build(1, 0, 0, 0, 0)},
    53  	{`(a)`, "a", build(1, 0, 1, 0, 1)},
    54  	{`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)},
    55  	{`(.*)`, "", build(1, 0, 0, 0, 0)},
    56  	{`(.*)`, "abcd", build(1, 0, 4, 0, 4)},
    57  	{`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)},
    58  	{`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
    59  	{`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
    60  	{`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
    61  	{`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)},
    62  	{`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)},
    63  
    64  	{`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
    65  	{`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
    66  	{`[.]`, ".", build(1, 0, 1)},
    67  	{`/$`, "/abc/", build(1, 4, 5)},
    68  	{`/$`, "/abc", nil},
    69  
    70  	// multiple matches
    71  	{`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)},
    72  	{`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)},
    73  	{`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)},
    74  	{`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)},
    75  	{`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)},
    76  
    77  	// fixed bugs
    78  	{`ab$`, "cab", build(1, 1, 3)},
    79  	{`axxb$`, "axxcb", nil},
    80  	{`data`, "daXY data", build(1, 5, 9)},
    81  	{`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
    82  	{`zx+`, "zzx", build(1, 1, 3)},
    83  	{`ab$`, "abcab", build(1, 3, 5)},
    84  	{`(aa)*$`, "a", build(1, 1, 1, -1, -1)},
    85  	{`(?:.|(?:.a))`, "", nil},
    86  	{`(?:A(?:A|a))`, "Aa", build(1, 0, 2)},
    87  	{`(?:A|(?:A|a))`, "a", build(1, 0, 1)},
    88  	{`(a){0}`, "", build(1, 0, 0, -1, -1)},
    89  	{`(?-s)(?:(?:^).)`, "\n", nil},
    90  	{`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)},
    91  	{`(?:(?:^).)`, "\n", nil},
    92  	{`\b`, "x", build(2, 0, 0, 1, 1)},
    93  	{`\b`, "xx", build(2, 0, 0, 2, 2)},
    94  	{`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)},
    95  	{`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)},
    96  	{`\B`, "x", nil},
    97  	{`\B`, "xx", build(1, 1, 1)},
    98  	{`\B`, "x y", nil},
    99  	{`\B`, "xx yy", build(2, 1, 1, 4, 4)},
   100  
   101  	// RE2 tests
   102  	{`[^\S\s]`, "abcd", nil},
   103  	{`[^\S[:space:]]`, "abcd", nil},
   104  	{`[^\D\d]`, "abcd", nil},
   105  	{`[^\D[:digit:]]`, "abcd", nil},
   106  	{`(?i)\W`, "x", nil},
   107  	{`(?i)\W`, "k", nil},
   108  	{`(?i)\W`, "s", nil},
   109  
   110  	// can backslash-escape any punctuation
   111  	{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
   112  		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
   113  	{`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
   114  		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
   115  	{"\\`", "`", build(1, 0, 1)},
   116  	{"[\\`]+", "`", build(1, 0, 1)},
   117  
   118  	// long set of matches (longer than startSize)
   119  	{
   120  		".",
   121  		"qwertyuiopasdfghjklzxcvbnm1234567890",
   122  		build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
   123  			10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20,
   124  			20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30,
   125  			30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
   126  	},
   127  }
   128  
   129  // build is a helper to construct a [][]int by extracting n sequences from x.
   130  // This represents n matches with len(x)/n submatches each.
   131  func build(n int, x ...int) [][]int {
   132  	ret := make([][]int, n)
   133  	runLength := len(x) / n
   134  	j := 0
   135  	for i := range ret {
   136  		ret[i] = make([]int, runLength)
   137  		copy(ret[i], x[j:])
   138  		j += runLength
   139  		if j > len(x) {
   140  			panic("invalid build entry")
   141  		}
   142  	}
   143  	return ret
   144  }
   145  
   146  // First the simple cases.
   147  
   148  func TestFind(t *testing.T) {
   149  	for _, test := range findTests {
   150  		re := MustCompile(test.pat)
   151  		if re.String() != test.pat {
   152  			t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
   153  		}
   154  		result := re.Find([]byte(test.text))
   155  		switch {
   156  		case len(test.matches) == 0 && len(result) == 0:
   157  			// ok
   158  		case test.matches == nil && result != nil:
   159  			t.Errorf("expected no match; got one: %s", test)
   160  		case test.matches != nil && result == nil:
   161  			t.Errorf("expected match; got none: %s", test)
   162  		case test.matches != nil && result != nil:
   163  			expect := test.text[test.matches[0][0]:test.matches[0][1]]
   164  			if len(result) != cap(result) {
   165  				t.Errorf("expected capacity %d got %d: %s", len(result), cap(result), test)
   166  			}
   167  			if expect != string(result) {
   168  				t.Errorf("expected %q got %q: %s", expect, result, test)
   169  			}
   170  		}
   171  	}
   172  }
   173  
   174  func TestFindString(t *testing.T) {
   175  	for _, test := range findTests {
   176  		result := MustCompile(test.pat).FindString(test.text)
   177  		switch {
   178  		case len(test.matches) == 0 && len(result) == 0:
   179  			// ok
   180  		case test.matches == nil && result != "":
   181  			t.Errorf("expected no match; got one: %s", test)
   182  		case test.matches != nil && result == "":
   183  			// Tricky because an empty result has two meanings: no match or empty match.
   184  			if test.matches[0][0] != test.matches[0][1] {
   185  				t.Errorf("expected match; got none: %s", test)
   186  			}
   187  		case test.matches != nil && result != "":
   188  			expect := test.text[test.matches[0][0]:test.matches[0][1]]
   189  			if expect != result {
   190  				t.Errorf("expected %q got %q: %s", expect, result, test)
   191  			}
   192  		}
   193  	}
   194  }
   195  
   196  func testFindIndex(test *FindTest, result []int, t *testing.T) {
   197  	switch {
   198  	case len(test.matches) == 0 && len(result) == 0:
   199  		// ok
   200  	case test.matches == nil && result != nil:
   201  		t.Errorf("expected no match; got one: %s", test)
   202  	case test.matches != nil && result == nil:
   203  		t.Errorf("expected match; got none: %s", test)
   204  	case test.matches != nil && result != nil:
   205  		expect := test.matches[0]
   206  		if expect[0] != result[0] || expect[1] != result[1] {
   207  			t.Errorf("expected %v got %v: %s", expect, result, test)
   208  		}
   209  	}
   210  }
   211  
   212  func TestFindIndex(t *testing.T) {
   213  	for _, test := range findTests {
   214  		testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t)
   215  	}
   216  }
   217  
   218  func TestFindStringIndex(t *testing.T) {
   219  	for _, test := range findTests {
   220  		testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t)
   221  	}
   222  }
   223  
   224  func TestFindReaderIndex(t *testing.T) {
   225  	for _, test := range findTests {
   226  		testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t)
   227  	}
   228  }
   229  
   230  // Now come the simple All cases.
   231  
   232  func TestFindAll(t *testing.T) {
   233  	for _, test := range findTests {
   234  		result := MustCompile(test.pat).FindAll([]byte(test.text), -1)
   235  		switch {
   236  		case test.matches == nil && result == nil:
   237  			// ok
   238  		case test.matches == nil && result != nil:
   239  			t.Errorf("expected no match; got one: %s", test)
   240  		case test.matches != nil && result == nil:
   241  			t.Fatalf("expected match; got none: %s", test)
   242  		case test.matches != nil && result != nil:
   243  			if len(test.matches) != len(result) {
   244  				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   245  				continue
   246  			}
   247  			for k, e := range test.matches {
   248  				got := result[k]
   249  				if len(got) != cap(got) {
   250  					t.Errorf("match %d: expected capacity %d got %d: %s", k, len(got), cap(got), test)
   251  				}
   252  				expect := test.text[e[0]:e[1]]
   253  				if expect != string(got) {
   254  					t.Errorf("match %d: expected %q got %q: %s", k, expect, got, test)
   255  				}
   256  			}
   257  		}
   258  	}
   259  }
   260  
   261  func TestFindAllString(t *testing.T) {
   262  	for _, test := range findTests {
   263  		result := MustCompile(test.pat).FindAllString(test.text, -1)
   264  		switch {
   265  		case test.matches == nil && result == nil:
   266  			// ok
   267  		case test.matches == nil && result != nil:
   268  			t.Errorf("expected no match; got one: %s", test)
   269  		case test.matches != nil && result == nil:
   270  			t.Errorf("expected match; got none: %s", test)
   271  		case test.matches != nil && result != nil:
   272  			if len(test.matches) != len(result) {
   273  				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   274  				continue
   275  			}
   276  			for k, e := range test.matches {
   277  				expect := test.text[e[0]:e[1]]
   278  				if expect != result[k] {
   279  					t.Errorf("expected %q got %q: %s", expect, result, test)
   280  				}
   281  			}
   282  		}
   283  	}
   284  }
   285  
   286  func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) {
   287  	switch {
   288  	case test.matches == nil && result == nil:
   289  		// ok
   290  	case test.matches == nil && result != nil:
   291  		t.Errorf("expected no match; got one: %s", test)
   292  	case test.matches != nil && result == nil:
   293  		t.Errorf("expected match; got none: %s", test)
   294  	case test.matches != nil && result != nil:
   295  		if len(test.matches) != len(result) {
   296  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   297  			return
   298  		}
   299  		for k, e := range test.matches {
   300  			if e[0] != result[k][0] || e[1] != result[k][1] {
   301  				t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test)
   302  			}
   303  		}
   304  	}
   305  }
   306  
   307  func TestFindAllIndex(t *testing.T) {
   308  	for _, test := range findTests {
   309  		testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t)
   310  	}
   311  }
   312  
   313  func TestFindAllStringIndex(t *testing.T) {
   314  	for _, test := range findTests {
   315  		testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t)
   316  	}
   317  }
   318  
   319  // Now come the Submatch cases.
   320  
   321  func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) {
   322  	if len(submatches) != len(result)*2 {
   323  		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
   324  		return
   325  	}
   326  	for k := 0; k < len(submatches); k += 2 {
   327  		if submatches[k] == -1 {
   328  			if result[k/2] != nil {
   329  				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
   330  			}
   331  			continue
   332  		}
   333  		got := result[k/2]
   334  		if len(got) != cap(got) {
   335  			t.Errorf("match %d: expected capacity %d got %d: %s", n, len(got), cap(got), test)
   336  			return
   337  		}
   338  		expect := test.text[submatches[k]:submatches[k+1]]
   339  		if expect != string(got) {
   340  			t.Errorf("match %d: expected %q got %q: %s", n, expect, got, test)
   341  			return
   342  		}
   343  	}
   344  }
   345  
   346  func TestFindSubmatch(t *testing.T) {
   347  	for _, test := range findTests {
   348  		result := MustCompile(test.pat).FindSubmatch([]byte(test.text))
   349  		switch {
   350  		case test.matches == nil && result == nil:
   351  			// ok
   352  		case test.matches == nil && result != nil:
   353  			t.Errorf("expected no match; got one: %s", test)
   354  		case test.matches != nil && result == nil:
   355  			t.Errorf("expected match; got none: %s", test)
   356  		case test.matches != nil && result != nil:
   357  			testSubmatchBytes(&test, 0, test.matches[0], result, t)
   358  		}
   359  	}
   360  }
   361  
   362  func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) {
   363  	if len(submatches) != len(result)*2 {
   364  		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
   365  		return
   366  	}
   367  	for k := 0; k < len(submatches); k += 2 {
   368  		if submatches[k] == -1 {
   369  			if result[k/2] != "" {
   370  				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
   371  			}
   372  			continue
   373  		}
   374  		expect := test.text[submatches[k]:submatches[k+1]]
   375  		if expect != result[k/2] {
   376  			t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
   377  			return
   378  		}
   379  	}
   380  }
   381  
   382  func TestFindStringSubmatch(t *testing.T) {
   383  	for _, test := range findTests {
   384  		result := MustCompile(test.pat).FindStringSubmatch(test.text)
   385  		switch {
   386  		case test.matches == nil && result == nil:
   387  			// ok
   388  		case test.matches == nil && result != nil:
   389  			t.Errorf("expected no match; got one: %s", test)
   390  		case test.matches != nil && result == nil:
   391  			t.Errorf("expected match; got none: %s", test)
   392  		case test.matches != nil && result != nil:
   393  			testSubmatchString(&test, 0, test.matches[0], result, t)
   394  		}
   395  	}
   396  }
   397  
   398  func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) {
   399  	if len(expect) != len(result) {
   400  		t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test)
   401  		return
   402  	}
   403  	for k, e := range expect {
   404  		if e != result[k] {
   405  			t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test)
   406  		}
   407  	}
   408  }
   409  
   410  func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) {
   411  	switch {
   412  	case test.matches == nil && result == nil:
   413  		// ok
   414  	case test.matches == nil && result != nil:
   415  		t.Errorf("expected no match; got one: %s", test)
   416  	case test.matches != nil && result == nil:
   417  		t.Errorf("expected match; got none: %s", test)
   418  	case test.matches != nil && result != nil:
   419  		testSubmatchIndices(test, 0, test.matches[0], result, t)
   420  	}
   421  }
   422  
   423  func TestFindSubmatchIndex(t *testing.T) {
   424  	for _, test := range findTests {
   425  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t)
   426  	}
   427  }
   428  
   429  func TestFindStringSubmatchIndex(t *testing.T) {
   430  	for _, test := range findTests {
   431  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t)
   432  	}
   433  }
   434  
   435  func TestFindReaderSubmatchIndex(t *testing.T) {
   436  	for _, test := range findTests {
   437  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t)
   438  	}
   439  }
   440  
   441  // Now come the monster AllSubmatch cases.
   442  
   443  func TestFindAllSubmatch(t *testing.T) {
   444  	for _, test := range findTests {
   445  		result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1)
   446  		switch {
   447  		case test.matches == nil && result == nil:
   448  			// ok
   449  		case test.matches == nil && result != nil:
   450  			t.Errorf("expected no match; got one: %s", test)
   451  		case test.matches != nil && result == nil:
   452  			t.Errorf("expected match; got none: %s", test)
   453  		case len(test.matches) != len(result):
   454  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   455  		case test.matches != nil && result != nil:
   456  			for k, match := range test.matches {
   457  				testSubmatchBytes(&test, k, match, result[k], t)
   458  			}
   459  		}
   460  	}
   461  }
   462  
   463  func TestFindAllStringSubmatch(t *testing.T) {
   464  	for _, test := range findTests {
   465  		result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1)
   466  		switch {
   467  		case test.matches == nil && result == nil:
   468  			// ok
   469  		case test.matches == nil && result != nil:
   470  			t.Errorf("expected no match; got one: %s", test)
   471  		case test.matches != nil && result == nil:
   472  			t.Errorf("expected match; got none: %s", test)
   473  		case len(test.matches) != len(result):
   474  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   475  		case test.matches != nil && result != nil:
   476  			for k, match := range test.matches {
   477  				testSubmatchString(&test, k, match, result[k], t)
   478  			}
   479  		}
   480  	}
   481  }
   482  
   483  func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) {
   484  	switch {
   485  	case test.matches == nil && result == nil:
   486  		// ok
   487  	case test.matches == nil && result != nil:
   488  		t.Errorf("expected no match; got one: %s", test)
   489  	case test.matches != nil && result == nil:
   490  		t.Errorf("expected match; got none: %s", test)
   491  	case len(test.matches) != len(result):
   492  		t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   493  	case test.matches != nil && result != nil:
   494  		for k, match := range test.matches {
   495  			testSubmatchIndices(test, k, match, result[k], t)
   496  		}
   497  	}
   498  }
   499  
   500  func TestFindAllSubmatchIndex(t *testing.T) {
   501  	for _, test := range findTests {
   502  		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t)
   503  	}
   504  }
   505  
   506  func TestFindAllStringSubmatchIndex(t *testing.T) {
   507  	for _, test := range findTests {
   508  		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t)
   509  	}
   510  }