github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/compile/internal/syntax/scanner_test.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package syntax
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"os"
    11  	"strings"
    12  	"testing"
    13  )
    14  
    15  func TestScanner(t *testing.T) {
    16  	if testing.Short() {
    17  		t.Skip("skipping test in short mode")
    18  	}
    19  
    20  	src, err := os.Open("parser.go")
    21  	if err != nil {
    22  		t.Fatal(err)
    23  	}
    24  	defer src.Close()
    25  
    26  	var s scanner
    27  	s.init(src, nil, 0)
    28  	for {
    29  		s.next()
    30  		if s.tok == _EOF {
    31  			break
    32  		}
    33  		if !testing.Verbose() {
    34  			continue
    35  		}
    36  		switch s.tok {
    37  		case _Name:
    38  			fmt.Println(s.line, s.tok, "=>", s.lit)
    39  		case _Operator:
    40  			fmt.Println(s.line, s.tok, "=>", s.op, s.prec)
    41  		default:
    42  			fmt.Println(s.line, s.tok)
    43  		}
    44  	}
    45  }
    46  
    47  func TestTokens(t *testing.T) {
    48  	// make source
    49  	var buf bytes.Buffer
    50  	for i, s := range sampleTokens {
    51  		buf.WriteString("\t\t\t\t"[:i&3])                          // leading indentation
    52  		buf.WriteString(s.src)                                     // token
    53  		buf.WriteString("        "[:i&7])                          // trailing spaces
    54  		fmt.Fprintf(&buf, "/*line foo:%d */ // bar\n", i+linebase) // comments (don't crash w/o directive handler)
    55  	}
    56  
    57  	// scan source
    58  	var got scanner
    59  	got.init(&buf, func(line, col uint, msg string) {
    60  		t.Fatalf("%d:%d: %s", line, col, msg)
    61  	}, 0)
    62  	got.next()
    63  	for i, want := range sampleTokens {
    64  		nlsemi := false
    65  
    66  		if got.line != uint(i+linebase) {
    67  			t.Errorf("got line %d; want %d", got.line, i+linebase)
    68  		}
    69  
    70  		if got.tok != want.tok {
    71  			t.Errorf("got tok = %s; want %s", got.tok, want.tok)
    72  			continue
    73  		}
    74  
    75  		switch want.tok {
    76  		case _Semi:
    77  			if got.lit != "semicolon" {
    78  				t.Errorf("got %s; want semicolon", got.lit)
    79  			}
    80  
    81  		case _Name, _Literal:
    82  			if got.lit != want.src {
    83  				t.Errorf("got lit = %q; want %q", got.lit, want.src)
    84  				continue
    85  			}
    86  			nlsemi = true
    87  
    88  		case _Operator, _AssignOp, _IncOp:
    89  			if got.op != want.op {
    90  				t.Errorf("got op = %s; want %s", got.op, want.op)
    91  				continue
    92  			}
    93  			if got.prec != want.prec {
    94  				t.Errorf("got prec = %d; want %d", got.prec, want.prec)
    95  				continue
    96  			}
    97  			nlsemi = want.tok == _IncOp
    98  
    99  		case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return:
   100  			nlsemi = true
   101  		}
   102  
   103  		if nlsemi {
   104  			got.next()
   105  			if got.tok != _Semi {
   106  				t.Errorf("got tok = %s; want ;", got.tok)
   107  				continue
   108  			}
   109  			if got.lit != "newline" {
   110  				t.Errorf("got %s; want newline", got.lit)
   111  			}
   112  		}
   113  
   114  		got.next()
   115  	}
   116  
   117  	if got.tok != _EOF {
   118  		t.Errorf("got %q; want _EOF", got.tok)
   119  	}
   120  }
   121  
   122  var sampleTokens = [...]struct {
   123  	tok  token
   124  	src  string
   125  	op   Operator
   126  	prec int
   127  }{
   128  	// name samples
   129  	{_Name, "x", 0, 0},
   130  	{_Name, "X123", 0, 0},
   131  	{_Name, "foo", 0, 0},
   132  	{_Name, "Foo123", 0, 0},
   133  	{_Name, "foo_bar", 0, 0},
   134  	{_Name, "_", 0, 0},
   135  	{_Name, "_foobar", 0, 0},
   136  	{_Name, "a۰۱۸", 0, 0},
   137  	{_Name, "foo६४", 0, 0},
   138  	{_Name, "bar9876", 0, 0},
   139  	{_Name, "ŝ", 0, 0},
   140  	{_Name, "ŝfoo", 0, 0},
   141  
   142  	// literal samples
   143  	{_Literal, "0", 0, 0},
   144  	{_Literal, "1", 0, 0},
   145  	{_Literal, "12345", 0, 0},
   146  	{_Literal, "123456789012345678890123456789012345678890", 0, 0},
   147  	{_Literal, "01234567", 0, 0},
   148  	{_Literal, "0_1_234_567", 0, 0},
   149  	{_Literal, "0X0", 0, 0},
   150  	{_Literal, "0xcafebabe", 0, 0},
   151  	{_Literal, "0x_cafe_babe", 0, 0},
   152  	{_Literal, "0O0", 0, 0},
   153  	{_Literal, "0o000", 0, 0},
   154  	{_Literal, "0o_000", 0, 0},
   155  	{_Literal, "0B1", 0, 0},
   156  	{_Literal, "0b01100110", 0, 0},
   157  	{_Literal, "0b_0110_0110", 0, 0},
   158  	{_Literal, "0.", 0, 0},
   159  	{_Literal, "0.e0", 0, 0},
   160  	{_Literal, "0.e-1", 0, 0},
   161  	{_Literal, "0.e+123", 0, 0},
   162  	{_Literal, ".0", 0, 0},
   163  	{_Literal, ".0E00", 0, 0},
   164  	{_Literal, ".0E-0123", 0, 0},
   165  	{_Literal, ".0E+12345678901234567890", 0, 0},
   166  	{_Literal, ".45e1", 0, 0},
   167  	{_Literal, "3.14159265", 0, 0},
   168  	{_Literal, "1e0", 0, 0},
   169  	{_Literal, "1e+100", 0, 0},
   170  	{_Literal, "1e-100", 0, 0},
   171  	{_Literal, "2.71828e-1000", 0, 0},
   172  	{_Literal, "0i", 0, 0},
   173  	{_Literal, "1i", 0, 0},
   174  	{_Literal, "012345678901234567889i", 0, 0},
   175  	{_Literal, "123456789012345678890i", 0, 0},
   176  	{_Literal, "0.i", 0, 0},
   177  	{_Literal, ".0i", 0, 0},
   178  	{_Literal, "3.14159265i", 0, 0},
   179  	{_Literal, "1e0i", 0, 0},
   180  	{_Literal, "1e+100i", 0, 0},
   181  	{_Literal, "1e-100i", 0, 0},
   182  	{_Literal, "2.71828e-1000i", 0, 0},
   183  	{_Literal, "'a'", 0, 0},
   184  	{_Literal, "'\\000'", 0, 0},
   185  	{_Literal, "'\\xFF'", 0, 0},
   186  	{_Literal, "'\\uff16'", 0, 0},
   187  	{_Literal, "'\\U0000ff16'", 0, 0},
   188  	{_Literal, "`foobar`", 0, 0},
   189  	{_Literal, "`foo\tbar`", 0, 0},
   190  	{_Literal, "`\r`", 0, 0},
   191  
   192  	// operators
   193  	{_Operator, "||", OrOr, precOrOr},
   194  
   195  	{_Operator, "&&", AndAnd, precAndAnd},
   196  
   197  	{_Operator, "==", Eql, precCmp},
   198  	{_Operator, "!=", Neq, precCmp},
   199  	{_Operator, "<", Lss, precCmp},
   200  	{_Operator, "<=", Leq, precCmp},
   201  	{_Operator, ">", Gtr, precCmp},
   202  	{_Operator, ">=", Geq, precCmp},
   203  
   204  	{_Operator, "+", Add, precAdd},
   205  	{_Operator, "-", Sub, precAdd},
   206  	{_Operator, "|", Or, precAdd},
   207  	{_Operator, "^", Xor, precAdd},
   208  
   209  	{_Star, "*", Mul, precMul},
   210  	{_Operator, "/", Div, precMul},
   211  	{_Operator, "%", Rem, precMul},
   212  	{_Operator, "&", And, precMul},
   213  	{_Operator, "&^", AndNot, precMul},
   214  	{_Operator, "<<", Shl, precMul},
   215  	{_Operator, ">>", Shr, precMul},
   216  
   217  	// assignment operations
   218  	{_AssignOp, "+=", Add, precAdd},
   219  	{_AssignOp, "-=", Sub, precAdd},
   220  	{_AssignOp, "|=", Or, precAdd},
   221  	{_AssignOp, "^=", Xor, precAdd},
   222  
   223  	{_AssignOp, "*=", Mul, precMul},
   224  	{_AssignOp, "/=", Div, precMul},
   225  	{_AssignOp, "%=", Rem, precMul},
   226  	{_AssignOp, "&=", And, precMul},
   227  	{_AssignOp, "&^=", AndNot, precMul},
   228  	{_AssignOp, "<<=", Shl, precMul},
   229  	{_AssignOp, ">>=", Shr, precMul},
   230  
   231  	// other operations
   232  	{_IncOp, "++", Add, precAdd},
   233  	{_IncOp, "--", Sub, precAdd},
   234  	{_Assign, "=", 0, 0},
   235  	{_Define, ":=", 0, 0},
   236  	{_Arrow, "<-", 0, 0},
   237  
   238  	// delimiters
   239  	{_Lparen, "(", 0, 0},
   240  	{_Lbrack, "[", 0, 0},
   241  	{_Lbrace, "{", 0, 0},
   242  	{_Rparen, ")", 0, 0},
   243  	{_Rbrack, "]", 0, 0},
   244  	{_Rbrace, "}", 0, 0},
   245  	{_Comma, ",", 0, 0},
   246  	{_Semi, ";", 0, 0},
   247  	{_Colon, ":", 0, 0},
   248  	{_Dot, ".", 0, 0},
   249  	{_DotDotDot, "...", 0, 0},
   250  
   251  	// keywords
   252  	{_Break, "break", 0, 0},
   253  	{_Case, "case", 0, 0},
   254  	{_Chan, "chan", 0, 0},
   255  	{_Const, "const", 0, 0},
   256  	{_Continue, "continue", 0, 0},
   257  	{_Default, "default", 0, 0},
   258  	{_Defer, "defer", 0, 0},
   259  	{_Else, "else", 0, 0},
   260  	{_Fallthrough, "fallthrough", 0, 0},
   261  	{_For, "for", 0, 0},
   262  	{_Func, "func", 0, 0},
   263  	{_Go, "go", 0, 0},
   264  	{_Goto, "goto", 0, 0},
   265  	{_If, "if", 0, 0},
   266  	{_Import, "import", 0, 0},
   267  	{_Interface, "interface", 0, 0},
   268  	{_Map, "map", 0, 0},
   269  	{_Package, "package", 0, 0},
   270  	{_Range, "range", 0, 0},
   271  	{_Return, "return", 0, 0},
   272  	{_Select, "select", 0, 0},
   273  	{_Struct, "struct", 0, 0},
   274  	{_Switch, "switch", 0, 0},
   275  	{_Type, "type", 0, 0},
   276  	{_Var, "var", 0, 0},
   277  }
   278  
   279  func TestComments(t *testing.T) {
   280  	type comment struct {
   281  		line, col uint // 0-based
   282  		text      string
   283  	}
   284  
   285  	for _, test := range []struct {
   286  		src  string
   287  		want comment
   288  	}{
   289  		// no comments
   290  		{"no comment here", comment{0, 0, ""}},
   291  		{" /", comment{0, 0, ""}},
   292  		{"\n /*/", comment{0, 0, ""}},
   293  
   294  		//-style comments
   295  		{"// line comment\n", comment{0, 0, "// line comment"}},
   296  		{"package p // line comment\n", comment{0, 10, "// line comment"}},
   297  		{"//\n//\n\t// want this one\r\n", comment{2, 1, "// want this one\r"}},
   298  		{"\n\n//\n", comment{2, 0, "//"}},
   299  		{"//", comment{0, 0, "//"}},
   300  
   301  		/*-style comments */
   302  		{"/* regular comment */", comment{0, 0, "/* regular comment */"}},
   303  		{"package p /* regular comment", comment{0, 0, ""}},
   304  		{"\n\n\n/*\n*//* want this one */", comment{4, 2, "/* want this one */"}},
   305  		{"\n\n/**/", comment{2, 0, "/**/"}},
   306  		{"/*", comment{0, 0, ""}},
   307  	} {
   308  		var s scanner
   309  		var got comment
   310  		s.init(strings.NewReader(test.src),
   311  			func(line, col uint, msg string) {
   312  				if msg[0] != '/' {
   313  					// error
   314  					if msg != "comment not terminated" {
   315  						t.Errorf("%q: %s", test.src, msg)
   316  					}
   317  					return
   318  				}
   319  				got = comment{line - linebase, col - colbase, msg} // keep last one
   320  			}, comments)
   321  
   322  		for {
   323  			s.next()
   324  			if s.tok == _EOF {
   325  				break
   326  			}
   327  		}
   328  
   329  		want := test.want
   330  		if got.line != want.line || got.col != want.col {
   331  			t.Errorf("%q: got position %d:%d; want %d:%d", test.src, got.line, got.col, want.line, want.col)
   332  		}
   333  		if got.text != want.text {
   334  			t.Errorf("%q: got %q; want %q", test.src, got.text, want.text)
   335  		}
   336  	}
   337  }
   338  
   339  func TestNumbers(t *testing.T) {
   340  	for _, test := range []struct {
   341  		kind             LitKind
   342  		src, tokens, err string
   343  	}{
   344  		// binaries
   345  		{IntLit, "0b0", "0b0", ""},
   346  		{IntLit, "0b1010", "0b1010", ""},
   347  		{IntLit, "0B1110", "0B1110", ""},
   348  
   349  		{IntLit, "0b", "0b", "binary literal has no digits"},
   350  		{IntLit, "0b0190", "0b0190", "invalid digit '9' in binary literal"},
   351  		{IntLit, "0b01a0", "0b01 a0", ""}, // only accept 0-9
   352  
   353  		{FloatLit, "0b.", "0b.", "invalid radix point in binary literal"},
   354  		{FloatLit, "0b.1", "0b.1", "invalid radix point in binary literal"},
   355  		{FloatLit, "0b1.0", "0b1.0", "invalid radix point in binary literal"},
   356  		{FloatLit, "0b1e10", "0b1e10", "'e' exponent requires decimal mantissa"},
   357  		{FloatLit, "0b1P-1", "0b1P-1", "'P' exponent requires hexadecimal mantissa"},
   358  
   359  		{ImagLit, "0b10i", "0b10i", ""},
   360  		{ImagLit, "0b10.0i", "0b10.0i", "invalid radix point in binary literal"},
   361  
   362  		// octals
   363  		{IntLit, "0o0", "0o0", ""},
   364  		{IntLit, "0o1234", "0o1234", ""},
   365  		{IntLit, "0O1234", "0O1234", ""},
   366  
   367  		{IntLit, "0o", "0o", "octal literal has no digits"},
   368  		{IntLit, "0o8123", "0o8123", "invalid digit '8' in octal literal"},
   369  		{IntLit, "0o1293", "0o1293", "invalid digit '9' in octal literal"},
   370  		{IntLit, "0o12a3", "0o12 a3", ""}, // only accept 0-9
   371  
   372  		{FloatLit, "0o.", "0o.", "invalid radix point in octal literal"},
   373  		{FloatLit, "0o.2", "0o.2", "invalid radix point in octal literal"},
   374  		{FloatLit, "0o1.2", "0o1.2", "invalid radix point in octal literal"},
   375  		{FloatLit, "0o1E+2", "0o1E+2", "'E' exponent requires decimal mantissa"},
   376  		{FloatLit, "0o1p10", "0o1p10", "'p' exponent requires hexadecimal mantissa"},
   377  
   378  		{ImagLit, "0o10i", "0o10i", ""},
   379  		{ImagLit, "0o10e0i", "0o10e0i", "'e' exponent requires decimal mantissa"},
   380  
   381  		// 0-octals
   382  		{IntLit, "0", "0", ""},
   383  		{IntLit, "0123", "0123", ""},
   384  
   385  		{IntLit, "08123", "08123", "invalid digit '8' in octal literal"},
   386  		{IntLit, "01293", "01293", "invalid digit '9' in octal literal"},
   387  		{IntLit, "0F.", "0 F .", ""}, // only accept 0-9
   388  		{IntLit, "0123F.", "0123 F .", ""},
   389  		{IntLit, "0123456x", "0123456 x", ""},
   390  
   391  		// decimals
   392  		{IntLit, "1", "1", ""},
   393  		{IntLit, "1234", "1234", ""},
   394  
   395  		{IntLit, "1f", "1 f", ""}, // only accept 0-9
   396  
   397  		{ImagLit, "0i", "0i", ""},
   398  		{ImagLit, "0678i", "0678i", ""},
   399  
   400  		// decimal floats
   401  		{FloatLit, "0.", "0.", ""},
   402  		{FloatLit, "123.", "123.", ""},
   403  		{FloatLit, "0123.", "0123.", ""},
   404  
   405  		{FloatLit, ".0", ".0", ""},
   406  		{FloatLit, ".123", ".123", ""},
   407  		{FloatLit, ".0123", ".0123", ""},
   408  
   409  		{FloatLit, "0.0", "0.0", ""},
   410  		{FloatLit, "123.123", "123.123", ""},
   411  		{FloatLit, "0123.0123", "0123.0123", ""},
   412  
   413  		{FloatLit, "0e0", "0e0", ""},
   414  		{FloatLit, "123e+0", "123e+0", ""},
   415  		{FloatLit, "0123E-1", "0123E-1", ""},
   416  
   417  		{FloatLit, "0.e+1", "0.e+1", ""},
   418  		{FloatLit, "123.E-10", "123.E-10", ""},
   419  		{FloatLit, "0123.e123", "0123.e123", ""},
   420  
   421  		{FloatLit, ".0e-1", ".0e-1", ""},
   422  		{FloatLit, ".123E+10", ".123E+10", ""},
   423  		{FloatLit, ".0123E123", ".0123E123", ""},
   424  
   425  		{FloatLit, "0.0e1", "0.0e1", ""},
   426  		{FloatLit, "123.123E-10", "123.123E-10", ""},
   427  		{FloatLit, "0123.0123e+456", "0123.0123e+456", ""},
   428  
   429  		{FloatLit, "0e", "0e", "exponent has no digits"},
   430  		{FloatLit, "0E+", "0E+", "exponent has no digits"},
   431  		{FloatLit, "1e+f", "1e+ f", "exponent has no digits"},
   432  		{FloatLit, "0p0", "0p0", "'p' exponent requires hexadecimal mantissa"},
   433  		{FloatLit, "1.0P-1", "1.0P-1", "'P' exponent requires hexadecimal mantissa"},
   434  
   435  		{ImagLit, "0.i", "0.i", ""},
   436  		{ImagLit, ".123i", ".123i", ""},
   437  		{ImagLit, "123.123i", "123.123i", ""},
   438  		{ImagLit, "123e+0i", "123e+0i", ""},
   439  		{ImagLit, "123.E-10i", "123.E-10i", ""},
   440  		{ImagLit, ".123E+10i", ".123E+10i", ""},
   441  
   442  		// hexadecimals
   443  		{IntLit, "0x0", "0x0", ""},
   444  		{IntLit, "0x1234", "0x1234", ""},
   445  		{IntLit, "0xcafef00d", "0xcafef00d", ""},
   446  		{IntLit, "0XCAFEF00D", "0XCAFEF00D", ""},
   447  
   448  		{IntLit, "0x", "0x", "hexadecimal literal has no digits"},
   449  		{IntLit, "0x1g", "0x1 g", ""},
   450  
   451  		{ImagLit, "0xf00i", "0xf00i", ""},
   452  
   453  		// hexadecimal floats
   454  		{FloatLit, "0x0p0", "0x0p0", ""},
   455  		{FloatLit, "0x12efp-123", "0x12efp-123", ""},
   456  		{FloatLit, "0xABCD.p+0", "0xABCD.p+0", ""},
   457  		{FloatLit, "0x.0189P-0", "0x.0189P-0", ""},
   458  		{FloatLit, "0x1.ffffp+1023", "0x1.ffffp+1023", ""},
   459  
   460  		{FloatLit, "0x.", "0x.", "hexadecimal literal has no digits"},
   461  		{FloatLit, "0x0.", "0x0.", "hexadecimal mantissa requires a 'p' exponent"},
   462  		{FloatLit, "0x.0", "0x.0", "hexadecimal mantissa requires a 'p' exponent"},
   463  		{FloatLit, "0x1.1", "0x1.1", "hexadecimal mantissa requires a 'p' exponent"},
   464  		{FloatLit, "0x1.1e0", "0x1.1e0", "hexadecimal mantissa requires a 'p' exponent"},
   465  		{FloatLit, "0x1.2gp1a", "0x1.2 gp1a", "hexadecimal mantissa requires a 'p' exponent"},
   466  		{FloatLit, "0x0p", "0x0p", "exponent has no digits"},
   467  		{FloatLit, "0xeP-", "0xeP-", "exponent has no digits"},
   468  		{FloatLit, "0x1234PAB", "0x1234P AB", "exponent has no digits"},
   469  		{FloatLit, "0x1.2p1a", "0x1.2p1 a", ""},
   470  
   471  		{ImagLit, "0xf00.bap+12i", "0xf00.bap+12i", ""},
   472  
   473  		// separators
   474  		{IntLit, "0b_1000_0001", "0b_1000_0001", ""},
   475  		{IntLit, "0o_600", "0o_600", ""},
   476  		{IntLit, "0_466", "0_466", ""},
   477  		{IntLit, "1_000", "1_000", ""},
   478  		{FloatLit, "1_000.000_1", "1_000.000_1", ""},
   479  		{ImagLit, "10e+1_2_3i", "10e+1_2_3i", ""},
   480  		{IntLit, "0x_f00d", "0x_f00d", ""},
   481  		{FloatLit, "0x_f00d.0p1_2", "0x_f00d.0p1_2", ""},
   482  
   483  		{IntLit, "0b__1000", "0b__1000", "'_' must separate successive digits"},
   484  		{IntLit, "0o60___0", "0o60___0", "'_' must separate successive digits"},
   485  		{IntLit, "0466_", "0466_", "'_' must separate successive digits"},
   486  		{FloatLit, "1_.", "1_.", "'_' must separate successive digits"},
   487  		{FloatLit, "0._1", "0._1", "'_' must separate successive digits"},
   488  		{FloatLit, "2.7_e0", "2.7_e0", "'_' must separate successive digits"},
   489  		{ImagLit, "10e+12_i", "10e+12_i", "'_' must separate successive digits"},
   490  		{IntLit, "0x___0", "0x___0", "'_' must separate successive digits"},
   491  		{FloatLit, "0x1.0_p0", "0x1.0_p0", "'_' must separate successive digits"},
   492  	} {
   493  		var s scanner
   494  		var err string
   495  		s.init(strings.NewReader(test.src), func(_, _ uint, msg string) {
   496  			if err == "" {
   497  				err = msg
   498  			}
   499  		}, 0)
   500  
   501  		for i, want := range strings.Split(test.tokens, " ") {
   502  			err = ""
   503  			s.next()
   504  
   505  			if err != "" && !s.bad {
   506  				t.Errorf("%q: got error but bad not set", test.src)
   507  			}
   508  
   509  			// compute lit where where s.lit is not defined
   510  			var lit string
   511  			switch s.tok {
   512  			case _Name, _Literal:
   513  				lit = s.lit
   514  			case _Dot:
   515  				lit = "."
   516  			}
   517  
   518  			if i == 0 {
   519  				if s.tok != _Literal || s.kind != test.kind {
   520  					t.Errorf("%q: got token %s (kind = %d); want literal (kind = %d)", test.src, s.tok, s.kind, test.kind)
   521  				}
   522  				if err != test.err {
   523  					t.Errorf("%q: got error %q; want %q", test.src, err, test.err)
   524  				}
   525  			}
   526  
   527  			if lit != want {
   528  				t.Errorf("%q: got literal %q (%s); want %s", test.src, lit, s.tok, want)
   529  			}
   530  		}
   531  
   532  		// make sure we read all
   533  		s.next()
   534  		if s.tok == _Semi {
   535  			s.next()
   536  		}
   537  		if s.tok != _EOF {
   538  			t.Errorf("%q: got %s; want EOF", test.src, s.tok)
   539  		}
   540  	}
   541  }
   542  
   543  func TestScanErrors(t *testing.T) {
   544  	for _, test := range []struct {
   545  		src, msg  string
   546  		line, col uint // 0-based
   547  	}{
   548  		// Note: Positions for lexical errors are the earliest position
   549  		// where the error is apparent, not the beginning of the respective
   550  		// token.
   551  
   552  		// rune-level errors
   553  		{"fo\x00o", "invalid NUL character", 0, 2},
   554  		{"foo\n\ufeff bar", "invalid BOM in the middle of the file", 1, 0},
   555  		{"foo\n\n\xff    ", "invalid UTF-8 encoding", 2, 0},
   556  
   557  		// token-level errors
   558  		{"\u00BD" /* ½ */, "invalid identifier character U+00BD '½'", 0, 0},
   559  		{"\U0001d736\U0001d737\U0001d738_½" /* 𝜶𝜷𝜸_½ */, "invalid identifier character U+00BD '½'", 0, 13 /* byte offset */},
   560  		{"\U0001d7d8" /* 𝟘 */, "identifier cannot begin with digit U+1D7D8 '𝟘'", 0, 0},
   561  		{"foo\U0001d7d8_½" /* foo𝟘_½ */, "invalid identifier character U+00BD '½'", 0, 8 /* byte offset */},
   562  
   563  		{"x + ~y", "invalid character U+007E '~'", 0, 4},
   564  		{"foo$bar = 0", "invalid character U+0024 '$'", 0, 3},
   565  		{"0123456789", "invalid digit '8' in octal literal", 0, 8},
   566  		{"0123456789. /* foobar", "comment not terminated", 0, 12},   // valid float constant
   567  		{"0123456789e0 /*\nfoobar", "comment not terminated", 0, 13}, // valid float constant
   568  		{"var a, b = 09, 07\n", "invalid digit '9' in octal literal", 0, 12},
   569  
   570  		{`''`, "empty character literal or unescaped ' in character literal", 0, 1},
   571  		{"'\n", "newline in character literal", 0, 1},
   572  		{`'\`, "invalid character literal (missing closing ')", 0, 0},
   573  		{`'\'`, "invalid character literal (missing closing ')", 0, 0},
   574  		{`'\x`, "invalid character literal (missing closing ')", 0, 0},
   575  		{`'\x'`, "non-hex character in escape sequence: '", 0, 3},
   576  		{`'\y'`, "unknown escape sequence", 0, 2},
   577  		{`'\x0'`, "non-hex character in escape sequence: '", 0, 4},
   578  		{`'\00'`, "non-octal character in escape sequence: '", 0, 4},
   579  		{`'\377' /*`, "comment not terminated", 0, 7}, // valid octal escape
   580  		{`'\378`, "non-octal character in escape sequence: 8", 0, 4},
   581  		{`'\400'`, "octal escape value > 255: 256", 0, 5},
   582  		{`'xx`, "invalid character literal (missing closing ')", 0, 0},
   583  		{`'xx'`, "invalid character literal (more than one character)", 0, 0},
   584  
   585  		{"\"\n", "newline in string", 0, 1},
   586  		{`"`, "string not terminated", 0, 0},
   587  		{`"foo`, "string not terminated", 0, 0},
   588  		{"`", "string not terminated", 0, 0},
   589  		{"`foo", "string not terminated", 0, 0},
   590  		{"/*/", "comment not terminated", 0, 0},
   591  		{"/*\n\nfoo", "comment not terminated", 0, 0},
   592  		{`"\`, "string not terminated", 0, 0},
   593  		{`"\"`, "string not terminated", 0, 0},
   594  		{`"\x`, "string not terminated", 0, 0},
   595  		{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
   596  		{`"\y"`, "unknown escape sequence", 0, 2},
   597  		{`"\x0"`, "non-hex character in escape sequence: \"", 0, 4},
   598  		{`"\00"`, "non-octal character in escape sequence: \"", 0, 4},
   599  		{`"\377" /*`, "comment not terminated", 0, 7}, // valid octal escape
   600  		{`"\378"`, "non-octal character in escape sequence: 8", 0, 4},
   601  		{`"\400"`, "octal escape value > 255: 256", 0, 5},
   602  
   603  		{`s := "foo\z"`, "unknown escape sequence", 0, 10},
   604  		{`s := "foo\z00\nbar"`, "unknown escape sequence", 0, 10},
   605  		{`"\x`, "string not terminated", 0, 0},
   606  		{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
   607  		{`var s string = "\x"`, "non-hex character in escape sequence: \"", 0, 18},
   608  		{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point U+FFFFFFFF", 0, 18},
   609  
   610  		// former problem cases
   611  		{"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0},
   612  	} {
   613  		var s scanner
   614  		nerrors := 0
   615  		s.init(strings.NewReader(test.src), func(line, col uint, msg string) {
   616  			nerrors++
   617  			// only check the first error
   618  			if nerrors == 1 {
   619  				if msg != test.msg {
   620  					t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
   621  				}
   622  				if line != test.line+linebase {
   623  					t.Errorf("%q: got line = %d; want %d", test.src, line, test.line+linebase)
   624  				}
   625  				if col != test.col+colbase {
   626  					t.Errorf("%q: got col = %d; want %d", test.src, col, test.col+colbase)
   627  				}
   628  			} else if nerrors > 1 {
   629  				// TODO(gri) make this use position info
   630  				t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
   631  			}
   632  		}, 0)
   633  
   634  		for {
   635  			s.next()
   636  			if s.tok == _EOF {
   637  				break
   638  			}
   639  		}
   640  
   641  		if nerrors == 0 {
   642  			t.Errorf("%q: got no error; want %q", test.src, test.msg)
   643  		}
   644  	}
   645  }
   646  
   647  func TestIssue21938(t *testing.T) {
   648  	s := "/*" + strings.Repeat(" ", 4089) + "*/ .5"
   649  
   650  	var got scanner
   651  	got.init(strings.NewReader(s), nil, 0)
   652  	got.next()
   653  
   654  	if got.tok != _Literal || got.lit != ".5" {
   655  		t.Errorf("got %s %q; want %s %q", got.tok, got.lit, _Literal, ".5")
   656  	}
   657  }
   658  
   659  func TestIssue33961(t *testing.T) {
   660  	literals := `08__ 0b.p 0b_._p 0x.e 0x.p`
   661  	for _, lit := range strings.Split(literals, " ") {
   662  		n := 0
   663  		var got scanner
   664  		got.init(strings.NewReader(lit), func(_, _ uint, msg string) {
   665  			// fmt.Printf("%s: %s\n", lit, msg) // uncomment for debugging
   666  			n++
   667  		}, 0)
   668  		got.next()
   669  
   670  		if n != 1 {
   671  			t.Errorf("%q: got %d errors; want 1", lit, n)
   672  			continue
   673  		}
   674  
   675  		if !got.bad {
   676  			t.Errorf("%q: got error but bad not set", lit)
   677  		}
   678  	}
   679  }