github.com/tcnksm/go@v0.0.0-20141208075154-439b32936367/src/regexp/exec_test.go

github.com/tcnksm/go@v0.0.0-20141208075154-439b32936367/src/regexp/exec_test.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package regexp
     6  
     7  import (
     8  	"bufio"
     9  	"compress/bzip2"
    10  	"fmt"
    11  	"io"
    12  	"os"
    13  	"path/filepath"
    14  	"regexp/syntax"
    15  	"strconv"
    16  	"strings"
    17  	"testing"
    18  	"unicode/utf8"
    19  )
    20  
    21  // TestRE2 tests this package's regexp API against test cases
    22  // considered during RE2's exhaustive tests, which run all possible
    23  // regexps over a given set of atoms and operators, up to a given
    24  // complexity, over all possible strings over a given alphabet,
    25  // up to a given size.  Rather than try to link with RE2, we read a
    26  // log file containing the test cases and the expected matches.
    27  // The log file, re2.txt, is generated by running 'make exhaustive-log'
    28  // in the open source RE2 distribution.  http://code.google.com/p/re2/
    29  //
    30  // The test file format is a sequence of stanzas like:
    31  //
    32  //	strings
    33  //	"abc"
    34  //	"123x"
    35  //	regexps
    36  //	"[a-z]+"
    37  //	0-3;0-3
    38  //	-;-
    39  //	"([0-9])([0-9])([0-9])"
    40  //	-;-
    41  //	-;0-3 0-1 1-2 2-3
    42  //
    43  // The stanza begins by defining a set of strings, quoted
    44  // using Go double-quote syntax, one per line.  Then the
    45  // regexps section gives a sequence of regexps to run on
    46  // the strings.  In the block that follows a regexp, each line
    47  // gives the semicolon-separated match results of running
    48  // the regexp on the corresponding string.
    49  // Each match result is either a single -, meaning no match, or a
    50  // space-separated sequence of pairs giving the match and
    51  // submatch indices.  An unmatched subexpression formats
    52  // its pair as a single - (not illustrated above).  For now
    53  // each regexp run produces two match results, one for a
    54  // ``full match'' that restricts the regexp to matching the entire
    55  // string or nothing, and one for a ``partial match'' that gives
    56  // the leftmost first match found in the string.
    57  //
    58  // Lines beginning with # are comments.  Lines beginning with
    59  // a capital letter are test names printed during RE2's test suite
    60  // and are echoed into t but otherwise ignored.
    61  //
    62  // At time of writing, re2.txt is 32 MB but compresses to 760 kB,
    63  // so we store re2.txt.gz in the repository and decompress it on the fly.
    64  //
    65  func TestRE2Search(t *testing.T) {
    66  	testRE2(t, "testdata/re2-search.txt")
    67  }
    68  
    69  func testRE2(t *testing.T, file string) {
    70  	f, err := os.Open(file)
    71  	if err != nil {
    72  		t.Fatal(err)
    73  	}
    74  	defer f.Close()
    75  	var txt io.Reader
    76  	if strings.HasSuffix(file, ".bz2") {
    77  		z := bzip2.NewReader(f)
    78  		txt = z
    79  		file = file[:len(file)-len(".bz2")] // for error messages
    80  	} else {
    81  		txt = f
    82  	}
    83  	lineno := 0
    84  	scanner := bufio.NewScanner(txt)
    85  	var (
    86  		str       []string
    87  		input     []string
    88  		inStrings bool
    89  		re        *Regexp
    90  		refull    *Regexp
    91  		nfail     int
    92  		ncase     int
    93  	)
    94  	for lineno := 1; scanner.Scan(); lineno++ {
    95  		line := scanner.Text()
    96  		switch {
    97  		case line == "":
    98  			t.Fatalf("%s:%d: unexpected blank line", file, lineno)
    99  		case line[0] == '#':
   100  			continue
   101  		case 'A' <= line[0] && line[0] <= 'Z':
   102  			// Test name.
   103  			t.Logf("%s\n", line)
   104  			continue
   105  		case line == "strings":
   106  			str = str[:0]
   107  			inStrings = true
   108  		case line == "regexps":
   109  			inStrings = false
   110  		case line[0] == '"':
   111  			q, err := strconv.Unquote(line)
   112  			if err != nil {
   113  				// Fatal because we'll get out of sync.
   114  				t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
   115  			}
   116  			if inStrings {
   117  				str = append(str, q)
   118  				continue
   119  			}
   120  			// Is a regexp.
   121  			if len(input) != 0 {
   122  				t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
   123  			}
   124  			re, err = tryCompile(q)
   125  			if err != nil {
   126  				if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
   127  					// We don't and likely never will support \C; keep going.
   128  					continue
   129  				}
   130  				t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
   131  				if nfail++; nfail >= 100 {
   132  					t.Fatalf("stopping after %d errors", nfail)
   133  				}
   134  				continue
   135  			}
   136  			full := `\A(?:` + q + `)\z`
   137  			refull, err = tryCompile(full)
   138  			if err != nil {
   139  				// Fatal because q worked, so this should always work.
   140  				t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
   141  			}
   142  			input = str
   143  		case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
   144  			// A sequence of match results.
   145  			ncase++
   146  			if re == nil {
   147  				// Failed to compile: skip results.
   148  				continue
   149  			}
   150  			if len(input) == 0 {
   151  				t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
   152  			}
   153  			var text string
   154  			text, input = input[0], input[1:]
   155  			if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
   156  				// RE2's \B considers every byte position,
   157  				// so it sees 'not word boundary' in the
   158  				// middle of UTF-8 sequences.  This package
   159  				// only considers the positions between runes,
   160  				// so it disagrees.  Skip those cases.
   161  				continue
   162  			}
   163  			res := strings.Split(line, ";")
   164  			if len(res) != len(run) {
   165  				t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
   166  			}
   167  			for i := range res {
   168  				have, suffix := run[i](re, refull, text)
   169  				want := parseResult(t, file, lineno, res[i])
   170  				if !same(have, want) {
   171  					t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
   172  					if nfail++; nfail >= 100 {
   173  						t.Fatalf("stopping after %d errors", nfail)
   174  					}
   175  					continue
   176  				}
   177  				b, suffix := match[i](re, refull, text)
   178  				if b != (want != nil) {
   179  					t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
   180  					if nfail++; nfail >= 100 {
   181  						t.Fatalf("stopping after %d errors", nfail)
   182  					}
   183  					continue
   184  				}
   185  			}
   186  
   187  		default:
   188  			t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
   189  		}
   190  	}
   191  	if err := scanner.Err(); err != nil {
   192  		t.Fatalf("%s:%d: %v", file, lineno, err)
   193  	}
   194  	if len(input) != 0 {
   195  		t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
   196  	}
   197  	t.Logf("%d cases tested", ncase)
   198  }
   199  
   200  var run = []func(*Regexp, *Regexp, string) ([]int, string){
   201  	runFull,
   202  	runPartial,
   203  	runFullLongest,
   204  	runPartialLongest,
   205  }
   206  
   207  func runFull(re, refull *Regexp, text string) ([]int, string) {
   208  	refull.longest = false
   209  	return refull.FindStringSubmatchIndex(text), "[full]"
   210  }
   211  
   212  func runPartial(re, refull *Regexp, text string) ([]int, string) {
   213  	re.longest = false
   214  	return re.FindStringSubmatchIndex(text), ""
   215  }
   216  
   217  func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
   218  	refull.longest = true
   219  	return refull.FindStringSubmatchIndex(text), "[full,longest]"
   220  }
   221  
   222  func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
   223  	re.longest = true
   224  	return re.FindStringSubmatchIndex(text), "[longest]"
   225  }
   226  
   227  var match = []func(*Regexp, *Regexp, string) (bool, string){
   228  	matchFull,
   229  	matchPartial,
   230  	matchFullLongest,
   231  	matchPartialLongest,
   232  }
   233  
   234  func matchFull(re, refull *Regexp, text string) (bool, string) {
   235  	refull.longest = false
   236  	return refull.MatchString(text), "[full]"
   237  }
   238  
   239  func matchPartial(re, refull *Regexp, text string) (bool, string) {
   240  	re.longest = false
   241  	return re.MatchString(text), ""
   242  }
   243  
   244  func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
   245  	refull.longest = true
   246  	return refull.MatchString(text), "[full,longest]"
   247  }
   248  
   249  func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
   250  	re.longest = true
   251  	return re.MatchString(text), "[longest]"
   252  }
   253  
   254  func isSingleBytes(s string) bool {
   255  	for _, c := range s {
   256  		if c >= utf8.RuneSelf {
   257  			return false
   258  		}
   259  	}
   260  	return true
   261  }
   262  
   263  func tryCompile(s string) (re *Regexp, err error) {
   264  	// Protect against panic during Compile.
   265  	defer func() {
   266  		if r := recover(); r != nil {
   267  			err = fmt.Errorf("panic: %v", r)
   268  		}
   269  	}()
   270  	return Compile(s)
   271  }
   272  
   273  func parseResult(t *testing.T, file string, lineno int, res string) []int {
   274  	// A single - indicates no match.
   275  	if res == "-" {
   276  		return nil
   277  	}
   278  	// Otherwise, a space-separated list of pairs.
   279  	n := 1
   280  	for j := 0; j < len(res); j++ {
   281  		if res[j] == ' ' {
   282  			n++
   283  		}
   284  	}
   285  	out := make([]int, 2*n)
   286  	i := 0
   287  	n = 0
   288  	for j := 0; j <= len(res); j++ {
   289  		if j == len(res) || res[j] == ' ' {
   290  			// Process a single pair.  - means no submatch.
   291  			pair := res[i:j]
   292  			if pair == "-" {
   293  				out[n] = -1
   294  				out[n+1] = -1
   295  			} else {
   296  				k := strings.Index(pair, "-")
   297  				if k < 0 {
   298  					t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
   299  				}
   300  				lo, err1 := strconv.Atoi(pair[:k])
   301  				hi, err2 := strconv.Atoi(pair[k+1:])
   302  				if err1 != nil || err2 != nil || lo > hi {
   303  					t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
   304  				}
   305  				out[n] = lo
   306  				out[n+1] = hi
   307  			}
   308  			n += 2
   309  			i = j + 1
   310  		}
   311  	}
   312  	return out
   313  }
   314  
   315  func same(x, y []int) bool {
   316  	if len(x) != len(y) {
   317  		return false
   318  	}
   319  	for i, xi := range x {
   320  		if xi != y[i] {
   321  			return false
   322  		}
   323  	}
   324  	return true
   325  }
   326  
   327  // TestFowler runs this package's regexp API against the
   328  // POSIX regular expression tests collected by Glenn Fowler
   329  // at http://www2.research.att.com/~gsf/testregex/.
   330  func TestFowler(t *testing.T) {
   331  	files, err := filepath.Glob("testdata/*.dat")
   332  	if err != nil {
   333  		t.Fatal(err)
   334  	}
   335  	for _, file := range files {
   336  		t.Log(file)
   337  		testFowler(t, file)
   338  	}
   339  }
   340  
   341  var notab = MustCompilePOSIX(`[^\t]+`)
   342  
   343  func testFowler(t *testing.T, file string) {
   344  	f, err := os.Open(file)
   345  	if err != nil {
   346  		t.Error(err)
   347  		return
   348  	}
   349  	defer f.Close()
   350  	b := bufio.NewReader(f)
   351  	lineno := 0
   352  	lastRegexp := ""
   353  Reading:
   354  	for {
   355  		lineno++
   356  		line, err := b.ReadString('\n')
   357  		if err != nil {
   358  			if err != io.EOF {
   359  				t.Errorf("%s:%d: %v", file, lineno, err)
   360  			}
   361  			break Reading
   362  		}
   363  
   364  		// http://www2.research.att.com/~gsf/man/man1/testregex.html
   365  		//
   366  		// INPUT FORMAT
   367  		//   Input lines may be blank, a comment beginning with #, or a test
   368  		//   specification. A specification is five fields separated by one
   369  		//   or more tabs. NULL denotes the empty string and NIL denotes the
   370  		//   0 pointer.
   371  		if line[0] == '#' || line[0] == '\n' {
   372  			continue Reading
   373  		}
   374  		line = line[:len(line)-1]
   375  		field := notab.FindAllString(line, -1)
   376  		for i, f := range field {
   377  			if f == "NULL" {
   378  				field[i] = ""
   379  			}
   380  			if f == "NIL" {
   381  				t.Logf("%s:%d: skip: %s", file, lineno, line)
   382  				continue Reading
   383  			}
   384  		}
   385  		if len(field) == 0 {
   386  			continue Reading
   387  		}
   388  
   389  		//   Field 1: the regex(3) flags to apply, one character per REG_feature
   390  		//   flag. The test is skipped if REG_feature is not supported by the
   391  		//   implementation. If the first character is not [BEASKLP] then the
   392  		//   specification is a global control line. One or more of [BEASKLP] may be
   393  		//   specified; the test will be repeated for each mode.
   394  		//
   395  		//     B 	basic			BRE	(grep, ed, sed)
   396  		//     E 	REG_EXTENDED		ERE	(egrep)
   397  		//     A	REG_AUGMENTED		ARE	(egrep with negation)
   398  		//     S	REG_SHELL		SRE	(sh glob)
   399  		//     K	REG_SHELL|REG_AUGMENTED	KRE	(ksh glob)
   400  		//     L	REG_LITERAL		LRE	(fgrep)
   401  		//
   402  		//     a	REG_LEFT|REG_RIGHT	implicit ^...$
   403  		//     b	REG_NOTBOL		lhs does not match ^
   404  		//     c	REG_COMMENT		ignore space and #...\n
   405  		//     d	REG_SHELL_DOT		explicit leading . match
   406  		//     e	REG_NOTEOL		rhs does not match $
   407  		//     f	REG_MULTIPLE		multiple \n separated patterns
   408  		//     g	FNM_LEADING_DIR		testfnmatch only -- match until /
   409  		//     h	REG_MULTIREF		multiple digit backref
   410  		//     i	REG_ICASE		ignore case
   411  		//     j	REG_SPAN		. matches \n
   412  		//     k	REG_ESCAPE		\ to ecape [...] delimiter
   413  		//     l	REG_LEFT		implicit ^...
   414  		//     m	REG_MINIMAL		minimal match
   415  		//     n	REG_NEWLINE		explicit \n match
   416  		//     o	REG_ENCLOSED		(|&) magic inside [@|&](...)
   417  		//     p	REG_SHELL_PATH		explicit / match
   418  		//     q	REG_DELIMITED		delimited pattern
   419  		//     r	REG_RIGHT		implicit ...$
   420  		//     s	REG_SHELL_ESCAPED	\ not special
   421  		//     t	REG_MUSTDELIM		all delimiters must be specified
   422  		//     u	standard unspecified behavior -- errors not counted
   423  		//     v	REG_CLASS_ESCAPE	\ special inside [...]
   424  		//     w	REG_NOSUB		no subexpression match array
   425  		//     x	REG_LENIENT		let some errors slide
   426  		//     y	REG_LEFT		regexec() implicit ^...
   427  		//     z	REG_NULL		NULL subexpressions ok
   428  		//     $	                        expand C \c escapes in fields 2 and 3
   429  		//     /	                        field 2 is a regsubcomp() expression
   430  		//     =	                        field 3 is a regdecomp() expression
   431  		//
   432  		//   Field 1 control lines:
   433  		//
   434  		//     C		set LC_COLLATE and LC_CTYPE to locale in field 2
   435  		//
   436  		//     ?test ...	output field 5 if passed and != EXPECTED, silent otherwise
   437  		//     &test ...	output field 5 if current and previous passed
   438  		//     |test ...	output field 5 if current passed and previous failed
   439  		//     ; ...	output field 2 if previous failed
   440  		//     {test ...	skip if failed until }
   441  		//     }		end of skip
   442  		//
   443  		//     : comment		comment copied as output NOTE
   444  		//     :comment:test	:comment: ignored
   445  		//     N[OTE] comment	comment copied as output NOTE
   446  		//     T[EST] comment	comment
   447  		//
   448  		//     number		use number for nmatch (20 by default)
   449  		flag := field[0]
   450  		switch flag[0] {
   451  		case '?', '&', '|', ';', '{', '}':
   452  			// Ignore all the control operators.
   453  			// Just run everything.
   454  			flag = flag[1:]
   455  			if flag == "" {
   456  				continue Reading
   457  			}
   458  		case ':':
   459  			i := strings.Index(flag[1:], ":")
   460  			if i < 0 {
   461  				t.Logf("skip: %s", line)
   462  				continue Reading
   463  			}
   464  			flag = flag[1+i+1:]
   465  		case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   466  			t.Logf("skip: %s", line)
   467  			continue Reading
   468  		}
   469  
   470  		// Can check field count now that we've handled the myriad comment formats.
   471  		if len(field) < 4 {
   472  			t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
   473  			continue Reading
   474  		}
   475  
   476  		// Expand C escapes (a.k.a. Go escapes).
   477  		if strings.Contains(flag, "$") {
   478  			f := `"` + field[1] + `"`
   479  			if field[1], err = strconv.Unquote(f); err != nil {
   480  				t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
   481  			}
   482  			f = `"` + field[2] + `"`
   483  			if field[2], err = strconv.Unquote(f); err != nil {
   484  				t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
   485  			}
   486  		}
   487  
   488  		//   Field 2: the regular expression pattern; SAME uses the pattern from
   489  		//     the previous specification.
   490  		//
   491  		if field[1] == "SAME" {
   492  			field[1] = lastRegexp
   493  		}
   494  		lastRegexp = field[1]
   495  
   496  		//   Field 3: the string to match.
   497  		text := field[2]
   498  
   499  		//   Field 4: the test outcome...
   500  		ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
   501  		if !ok {
   502  			t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
   503  			continue Reading
   504  		}
   505  
   506  		//   Field 5: optional comment appended to the report.
   507  
   508  	Testing:
   509  		// Run test once for each specified capital letter mode that we support.
   510  		for _, c := range flag {
   511  			pattern := field[1]
   512  			syn := syntax.POSIX | syntax.ClassNL
   513  			switch c {
   514  			default:
   515  				continue Testing
   516  			case 'E':
   517  				// extended regexp (what we support)
   518  			case 'L':
   519  				// literal
   520  				pattern = QuoteMeta(pattern)
   521  			}
   522  
   523  			for _, c := range flag {
   524  				switch c {
   525  				case 'i':
   526  					syn |= syntax.FoldCase
   527  				}
   528  			}
   529  
   530  			re, err := compile(pattern, syn, true)
   531  			if err != nil {
   532  				if shouldCompile {
   533  					t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
   534  				}
   535  				continue Testing
   536  			}
   537  			if !shouldCompile {
   538  				t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
   539  				continue Testing
   540  			}
   541  			match := re.MatchString(text)
   542  			if match != shouldMatch {
   543  				t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
   544  				continue Testing
   545  			}
   546  			have := re.FindStringSubmatchIndex(text)
   547  			if (len(have) > 0) != match {
   548  				t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
   549  				continue Testing
   550  			}
   551  			if len(have) > len(pos) {
   552  				have = have[:len(pos)]
   553  			}
   554  			if !same(have, pos) {
   555  				t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
   556  			}
   557  		}
   558  	}
   559  }
   560  
   561  func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
   562  	//   Field 4: the test outcome. This is either one of the posix error
   563  	//     codes (with REG_ omitted) or the match array, a list of (m,n)
   564  	//     entries with m and n being first and last+1 positions in the
   565  	//     field 3 string, or NULL if REG_NOSUB is in effect and success
   566  	//     is expected. BADPAT is acceptable in place of any regcomp(3)
   567  	//     error code. The match[] array is initialized to (-2,-2) before
   568  	//     each test. All array elements from 0 to nmatch-1 must be specified
   569  	//     in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
   570  	//     Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
   571  	//     matched (?{...}) expression, where x is the text enclosed by {...},
   572  	//     o is the expression ordinal counting from 1, and n is the length of
   573  	//     the unmatched portion of the subject string. If x starts with a
   574  	//     number then that is the return value of re_execf(), otherwise 0 is
   575  	//     returned.
   576  	switch {
   577  	case s == "":
   578  		// Match with no position information.
   579  		ok = true
   580  		compiled = true
   581  		matched = true
   582  		return
   583  	case s == "NOMATCH":
   584  		// Match failure.
   585  		ok = true
   586  		compiled = true
   587  		matched = false
   588  		return
   589  	case 'A' <= s[0] && s[0] <= 'Z':
   590  		// All the other error codes are compile errors.
   591  		ok = true
   592  		compiled = false
   593  		return
   594  	}
   595  	compiled = true
   596  
   597  	var x []int
   598  	for s != "" {
   599  		var end byte = ')'
   600  		if len(x)%2 == 0 {
   601  			if s[0] != '(' {
   602  				ok = false
   603  				return
   604  			}
   605  			s = s[1:]
   606  			end = ','
   607  		}
   608  		i := 0
   609  		for i < len(s) && s[i] != end {
   610  			i++
   611  		}
   612  		if i == 0 || i == len(s) {
   613  			ok = false
   614  			return
   615  		}
   616  		var v = -1
   617  		var err error
   618  		if s[:i] != "?" {
   619  			v, err = strconv.Atoi(s[:i])
   620  			if err != nil {
   621  				ok = false
   622  				return
   623  			}
   624  		}
   625  		x = append(x, v)
   626  		s = s[i+1:]
   627  	}
   628  	if len(x)%2 != 0 {
   629  		ok = false
   630  		return
   631  	}
   632  	ok = true
   633  	matched = true
   634  	pos = x
   635  	return
   636  }
   637  
   638  var text []byte
   639  
   640  func makeText(n int) []byte {
   641  	if len(text) >= n {
   642  		return text[:n]
   643  	}
   644  	text = make([]byte, n)
   645  	x := ^uint32(0)
   646  	for i := range text {
   647  		x += x
   648  		x ^= 1
   649  		if int32(x) < 0 {
   650  			x ^= 0x88888eef
   651  		}
   652  		if x%31 == 0 {
   653  			text[i] = '\n'
   654  		} else {
   655  			text[i] = byte(x%(0x7E+1-0x20) + 0x20)
   656  		}
   657  	}
   658  	return text
   659  }
   660  
   661  func benchmark(b *testing.B, re string, n int) {
   662  	r := MustCompile(re)
   663  	t := makeText(n)
   664  	b.ResetTimer()
   665  	b.SetBytes(int64(n))
   666  	for i := 0; i < b.N; i++ {
   667  		if r.Match(t) {
   668  			b.Fatal("match!")
   669  		}
   670  	}
   671  }
   672  
   673  const (
   674  	easy0  = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
   675  	easy1  = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
   676  	medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
   677  	hard   = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
   678  	parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" +
   679  		"(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
   680  )
   681  
   682  func BenchmarkMatchEasy0_32(b *testing.B)   { benchmark(b, easy0, 32<<0) }
   683  func BenchmarkMatchEasy0_1K(b *testing.B)   { benchmark(b, easy0, 1<<10) }
   684  func BenchmarkMatchEasy0_32K(b *testing.B)  { benchmark(b, easy0, 32<<10) }
   685  func BenchmarkMatchEasy0_1M(b *testing.B)   { benchmark(b, easy0, 1<<20) }
   686  func BenchmarkMatchEasy0_32M(b *testing.B)  { benchmark(b, easy0, 32<<20) }
   687  func BenchmarkMatchEasy1_32(b *testing.B)   { benchmark(b, easy1, 32<<0) }
   688  func BenchmarkMatchEasy1_1K(b *testing.B)   { benchmark(b, easy1, 1<<10) }
   689  func BenchmarkMatchEasy1_32K(b *testing.B)  { benchmark(b, easy1, 32<<10) }
   690  func BenchmarkMatchEasy1_1M(b *testing.B)   { benchmark(b, easy1, 1<<20) }
   691  func BenchmarkMatchEasy1_32M(b *testing.B)  { benchmark(b, easy1, 32<<20) }
   692  func BenchmarkMatchMedium_32(b *testing.B)  { benchmark(b, medium, 32<<0) }
   693  func BenchmarkMatchMedium_1K(b *testing.B)  { benchmark(b, medium, 1<<10) }
   694  func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) }
   695  func BenchmarkMatchMedium_1M(b *testing.B)  { benchmark(b, medium, 1<<20) }
   696  func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) }
   697  func BenchmarkMatchHard_32(b *testing.B)    { benchmark(b, hard, 32<<0) }
   698  func BenchmarkMatchHard_1K(b *testing.B)    { benchmark(b, hard, 1<<10) }
   699  func BenchmarkMatchHard_32K(b *testing.B)   { benchmark(b, hard, 32<<10) }
   700  func BenchmarkMatchHard_1M(b *testing.B)    { benchmark(b, hard, 1<<20) }
   701  func BenchmarkMatchHard_32M(b *testing.B)   { benchmark(b, hard, 32<<20) }
   702  
   703  func TestLongest(t *testing.T) {
   704  	re, err := Compile(`a(|b)`)
   705  	if err != nil {
   706  		t.Fatal(err)
   707  	}
   708  	if g, w := re.FindString("ab"), "a"; g != w {
   709  		t.Errorf("first match was %q, want %q", g, w)
   710  	}
   711  	re.Longest()
   712  	if g, w := re.FindString("ab"), "ab"; g != w {
   713  		t.Errorf("longest match was %q, want %q", g, w)
   714  	}
   715  }