github.com/corona10/go@v0.0.0-20180224231303-7a218942be57/src/cmd/compile/internal/syntax/scanner_test.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package syntax
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"os"
    11  	"strings"
    12  	"testing"
    13  )
    14  
    15  func TestScanner(t *testing.T) {
    16  	if testing.Short() {
    17  		t.Skip("skipping test in short mode")
    18  	}
    19  
    20  	src, err := os.Open("parser.go")
    21  	if err != nil {
    22  		t.Fatal(err)
    23  	}
    24  	defer src.Close()
    25  
    26  	var s scanner
    27  	s.init(src, nil, 0)
    28  	for {
    29  		s.next()
    30  		if s.tok == _EOF {
    31  			break
    32  		}
    33  		switch s.tok {
    34  		case _Name:
    35  			fmt.Println(s.line, s.tok, "=>", s.lit)
    36  		case _Operator:
    37  			fmt.Println(s.line, s.tok, "=>", s.op, s.prec)
    38  		default:
    39  			fmt.Println(s.line, s.tok)
    40  		}
    41  	}
    42  }
    43  
    44  func TestTokens(t *testing.T) {
    45  	// make source
    46  	var buf bytes.Buffer
    47  	for i, s := range sampleTokens {
    48  		buf.WriteString("\t\t\t\t"[:i&3])           // leading indentation
    49  		buf.WriteString(s.src)                      // token
    50  		buf.WriteString("        "[:i&7])           // trailing spaces
    51  		buf.WriteString("/*line foo:1 */ // bar\n") // comments (don't crash w/o directive handler)
    52  	}
    53  
    54  	// scan source
    55  	var got scanner
    56  	got.init(&buf, nil, 0)
    57  	got.next()
    58  	for i, want := range sampleTokens {
    59  		nlsemi := false
    60  
    61  		if got.line != uint(i+linebase) {
    62  			t.Errorf("got line %d; want %d", got.line, i+linebase)
    63  		}
    64  
    65  		if got.tok != want.tok {
    66  			t.Errorf("got tok = %s; want %s", got.tok, want.tok)
    67  			continue
    68  		}
    69  
    70  		switch want.tok {
    71  		case _Semi:
    72  			if got.lit != "semicolon" {
    73  				t.Errorf("got %s; want semicolon", got.lit)
    74  			}
    75  
    76  		case _Name, _Literal:
    77  			if got.lit != want.src {
    78  				t.Errorf("got lit = %q; want %q", got.lit, want.src)
    79  				continue
    80  			}
    81  			nlsemi = true
    82  
    83  		case _Operator, _AssignOp, _IncOp:
    84  			if got.op != want.op {
    85  				t.Errorf("got op = %s; want %s", got.op, want.op)
    86  				continue
    87  			}
    88  			if got.prec != want.prec {
    89  				t.Errorf("got prec = %d; want %d", got.prec, want.prec)
    90  				continue
    91  			}
    92  			nlsemi = want.tok == _IncOp
    93  
    94  		case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return:
    95  			nlsemi = true
    96  		}
    97  
    98  		if nlsemi {
    99  			got.next()
   100  			if got.tok != _Semi {
   101  				t.Errorf("got tok = %s; want ;", got.tok)
   102  				continue
   103  			}
   104  			if got.lit != "newline" {
   105  				t.Errorf("got %s; want newline", got.lit)
   106  			}
   107  		}
   108  
   109  		got.next()
   110  	}
   111  
   112  	if got.tok != _EOF {
   113  		t.Errorf("got %q; want _EOF", got.tok)
   114  	}
   115  }
   116  
   117  var sampleTokens = [...]struct {
   118  	tok  token
   119  	src  string
   120  	op   Operator
   121  	prec int
   122  }{
   123  	// name samples
   124  	{_Name, "x", 0, 0},
   125  	{_Name, "X123", 0, 0},
   126  	{_Name, "foo", 0, 0},
   127  	{_Name, "Foo123", 0, 0},
   128  	{_Name, "foo_bar", 0, 0},
   129  	{_Name, "_", 0, 0},
   130  	{_Name, "_foobar", 0, 0},
   131  	{_Name, "a۰۱۸", 0, 0},
   132  	{_Name, "foo६४", 0, 0},
   133  	{_Name, "bar9876", 0, 0},
   134  	{_Name, "ŝ", 0, 0},
   135  	{_Name, "ŝfoo", 0, 0},
   136  
   137  	// literal samples
   138  	{_Literal, "0", 0, 0},
   139  	{_Literal, "1", 0, 0},
   140  	{_Literal, "12345", 0, 0},
   141  	{_Literal, "123456789012345678890123456789012345678890", 0, 0},
   142  	{_Literal, "01234567", 0, 0},
   143  	{_Literal, "0x0", 0, 0},
   144  	{_Literal, "0xcafebabe", 0, 0},
   145  	{_Literal, "0.", 0, 0},
   146  	{_Literal, "0.e0", 0, 0},
   147  	{_Literal, "0.e-1", 0, 0},
   148  	{_Literal, "0.e+123", 0, 0},
   149  	{_Literal, ".0", 0, 0},
   150  	{_Literal, ".0E00", 0, 0},
   151  	{_Literal, ".0E-0123", 0, 0},
   152  	{_Literal, ".0E+12345678901234567890", 0, 0},
   153  	{_Literal, ".45e1", 0, 0},
   154  	{_Literal, "3.14159265", 0, 0},
   155  	{_Literal, "1e0", 0, 0},
   156  	{_Literal, "1e+100", 0, 0},
   157  	{_Literal, "1e-100", 0, 0},
   158  	{_Literal, "2.71828e-1000", 0, 0},
   159  	{_Literal, "0i", 0, 0},
   160  	{_Literal, "1i", 0, 0},
   161  	{_Literal, "012345678901234567889i", 0, 0},
   162  	{_Literal, "123456789012345678890i", 0, 0},
   163  	{_Literal, "0.i", 0, 0},
   164  	{_Literal, ".0i", 0, 0},
   165  	{_Literal, "3.14159265i", 0, 0},
   166  	{_Literal, "1e0i", 0, 0},
   167  	{_Literal, "1e+100i", 0, 0},
   168  	{_Literal, "1e-100i", 0, 0},
   169  	{_Literal, "2.71828e-1000i", 0, 0},
   170  	{_Literal, "'a'", 0, 0},
   171  	{_Literal, "'\\000'", 0, 0},
   172  	{_Literal, "'\\xFF'", 0, 0},
   173  	{_Literal, "'\\uff16'", 0, 0},
   174  	{_Literal, "'\\U0000ff16'", 0, 0},
   175  	{_Literal, "`foobar`", 0, 0},
   176  	{_Literal, "`foo\tbar`", 0, 0},
   177  	{_Literal, "`\r`", 0, 0},
   178  
   179  	// operators
   180  	{_Operator, "||", OrOr, precOrOr},
   181  
   182  	{_Operator, "&&", AndAnd, precAndAnd},
   183  
   184  	{_Operator, "==", Eql, precCmp},
   185  	{_Operator, "!=", Neq, precCmp},
   186  	{_Operator, "<", Lss, precCmp},
   187  	{_Operator, "<=", Leq, precCmp},
   188  	{_Operator, ">", Gtr, precCmp},
   189  	{_Operator, ">=", Geq, precCmp},
   190  
   191  	{_Operator, "+", Add, precAdd},
   192  	{_Operator, "-", Sub, precAdd},
   193  	{_Operator, "|", Or, precAdd},
   194  	{_Operator, "^", Xor, precAdd},
   195  
   196  	{_Star, "*", Mul, precMul},
   197  	{_Operator, "/", Div, precMul},
   198  	{_Operator, "%", Rem, precMul},
   199  	{_Operator, "&", And, precMul},
   200  	{_Operator, "&^", AndNot, precMul},
   201  	{_Operator, "<<", Shl, precMul},
   202  	{_Operator, ">>", Shr, precMul},
   203  
   204  	// assignment operations
   205  	{_AssignOp, "+=", Add, precAdd},
   206  	{_AssignOp, "-=", Sub, precAdd},
   207  	{_AssignOp, "|=", Or, precAdd},
   208  	{_AssignOp, "^=", Xor, precAdd},
   209  
   210  	{_AssignOp, "*=", Mul, precMul},
   211  	{_AssignOp, "/=", Div, precMul},
   212  	{_AssignOp, "%=", Rem, precMul},
   213  	{_AssignOp, "&=", And, precMul},
   214  	{_AssignOp, "&^=", AndNot, precMul},
   215  	{_AssignOp, "<<=", Shl, precMul},
   216  	{_AssignOp, ">>=", Shr, precMul},
   217  
   218  	// other operations
   219  	{_IncOp, "++", Add, precAdd},
   220  	{_IncOp, "--", Sub, precAdd},
   221  	{_Assign, "=", 0, 0},
   222  	{_Define, ":=", 0, 0},
   223  	{_Arrow, "<-", 0, 0},
   224  
   225  	// delimiters
   226  	{_Lparen, "(", 0, 0},
   227  	{_Lbrack, "[", 0, 0},
   228  	{_Lbrace, "{", 0, 0},
   229  	{_Rparen, ")", 0, 0},
   230  	{_Rbrack, "]", 0, 0},
   231  	{_Rbrace, "}", 0, 0},
   232  	{_Comma, ",", 0, 0},
   233  	{_Semi, ";", 0, 0},
   234  	{_Colon, ":", 0, 0},
   235  	{_Dot, ".", 0, 0},
   236  	{_DotDotDot, "...", 0, 0},
   237  
   238  	// keywords
   239  	{_Break, "break", 0, 0},
   240  	{_Case, "case", 0, 0},
   241  	{_Chan, "chan", 0, 0},
   242  	{_Const, "const", 0, 0},
   243  	{_Continue, "continue", 0, 0},
   244  	{_Default, "default", 0, 0},
   245  	{_Defer, "defer", 0, 0},
   246  	{_Else, "else", 0, 0},
   247  	{_Fallthrough, "fallthrough", 0, 0},
   248  	{_For, "for", 0, 0},
   249  	{_Func, "func", 0, 0},
   250  	{_Go, "go", 0, 0},
   251  	{_Goto, "goto", 0, 0},
   252  	{_If, "if", 0, 0},
   253  	{_Import, "import", 0, 0},
   254  	{_Interface, "interface", 0, 0},
   255  	{_Map, "map", 0, 0},
   256  	{_Package, "package", 0, 0},
   257  	{_Range, "range", 0, 0},
   258  	{_Return, "return", 0, 0},
   259  	{_Select, "select", 0, 0},
   260  	{_Struct, "struct", 0, 0},
   261  	{_Switch, "switch", 0, 0},
   262  	{_Type, "type", 0, 0},
   263  	{_Var, "var", 0, 0},
   264  }
   265  
   266  func TestComments(t *testing.T) {
   267  	type comment struct {
   268  		line, col uint // 0-based
   269  		text      string
   270  	}
   271  
   272  	for _, test := range []struct {
   273  		src  string
   274  		want comment
   275  	}{
   276  		// no comments
   277  		{"no comment here", comment{0, 0, ""}},
   278  		{" /", comment{0, 0, ""}},
   279  		{"\n /*/", comment{0, 0, ""}},
   280  
   281  		//-style comments
   282  		{"// line comment\n", comment{0, 0, "// line comment"}},
   283  		{"package p // line comment\n", comment{0, 10, "// line comment"}},
   284  		{"//\n//\n\t// want this one\r\n", comment{2, 1, "// want this one\r"}},
   285  		{"\n\n//\n", comment{2, 0, "//"}},
   286  		{"//", comment{0, 0, "//"}},
   287  
   288  		/*-style comments */
   289  		{"/* regular comment */", comment{0, 0, "/* regular comment */"}},
   290  		{"package p /* regular comment", comment{0, 0, ""}},
   291  		{"\n\n\n/*\n*//* want this one */", comment{4, 2, "/* want this one */"}},
   292  		{"\n\n/**/", comment{2, 0, "/**/"}},
   293  		{"/*", comment{0, 0, ""}},
   294  	} {
   295  		var s scanner
   296  		var got comment
   297  		s.init(strings.NewReader(test.src),
   298  			func(line, col uint, msg string) {
   299  				if msg[0] != '/' {
   300  					// error
   301  					if msg != "comment not terminated" {
   302  						t.Errorf("%q: %s", test.src, msg)
   303  					}
   304  					return
   305  				}
   306  				got = comment{line - linebase, col - colbase, msg} // keep last one
   307  			}, comments)
   308  
   309  		for {
   310  			s.next()
   311  			if s.tok == _EOF {
   312  				break
   313  			}
   314  		}
   315  
   316  		want := test.want
   317  		if got.line != want.line || got.col != want.col {
   318  			t.Errorf("%q: got position %d:%d; want %d:%d", test.src, got.line, got.col, want.line, want.col)
   319  		}
   320  		if got.text != want.text {
   321  			t.Errorf("%q: got %q; want %q", test.src, got.text, want.text)
   322  		}
   323  	}
   324  }
   325  
   326  func TestScanErrors(t *testing.T) {
   327  	for _, test := range []struct {
   328  		src, msg  string
   329  		line, col uint // 0-based
   330  	}{
   331  		// Note: Positions for lexical errors are the earliest position
   332  		// where the error is apparent, not the beginning of the respective
   333  		// token.
   334  
   335  		// rune-level errors
   336  		{"fo\x00o", "invalid NUL character", 0, 2},
   337  		{"foo\n\ufeff bar", "invalid BOM in the middle of the file", 1, 0},
   338  		{"foo\n\n\xff    ", "invalid UTF-8 encoding", 2, 0},
   339  
   340  		// token-level errors
   341  		{"\u00BD" /* ½ */, "invalid identifier character U+00BD '½'", 0, 0},
   342  		{"\U0001d736\U0001d737\U0001d738_½" /* 𝜶𝜷𝜸_½ */, "invalid identifier character U+00BD '½'", 0, 13 /* byte offset */},
   343  		{"\U0001d7d8" /* 𝟘 */, "identifier cannot begin with digit U+1D7D8 '𝟘'", 0, 0},
   344  		{"foo\U0001d7d8_½" /* foo𝟘_½ */, "invalid identifier character U+00BD '½'", 0, 8 /* byte offset */},
   345  
   346  		{"x + ~y", "invalid character U+007E '~'", 0, 4},
   347  		{"foo$bar = 0", "invalid character U+0024 '$'", 0, 3},
   348  		{"const x = 0xyz", "malformed hex constant", 0, 12},
   349  		{"0123456789", "malformed octal constant", 0, 10},
   350  		{"0123456789. /* foobar", "comment not terminated", 0, 12},   // valid float constant
   351  		{"0123456789e0 /*\nfoobar", "comment not terminated", 0, 13}, // valid float constant
   352  		{"var a, b = 08, 07\n", "malformed octal constant", 0, 13},
   353  		{"(x + 1.0e+x)", "malformed floating-point constant exponent", 0, 10},
   354  
   355  		{`''`, "empty character literal or unescaped ' in character literal", 0, 1},
   356  		{"'\n", "newline in character literal", 0, 1},
   357  		{`'\`, "invalid character literal (missing closing ')", 0, 0},
   358  		{`'\'`, "invalid character literal (missing closing ')", 0, 0},
   359  		{`'\x`, "invalid character literal (missing closing ')", 0, 0},
   360  		{`'\x'`, "non-hex character in escape sequence: '", 0, 3},
   361  		{`'\y'`, "unknown escape sequence", 0, 2},
   362  		{`'\x0'`, "non-hex character in escape sequence: '", 0, 4},
   363  		{`'\00'`, "non-octal character in escape sequence: '", 0, 4},
   364  		{`'\377' /*`, "comment not terminated", 0, 7}, // valid octal escape
   365  		{`'\378`, "non-octal character in escape sequence: 8", 0, 4},
   366  		{`'\400'`, "octal escape value > 255: 256", 0, 5},
   367  		{`'xx`, "invalid character literal (missing closing ')", 0, 0},
   368  		{`'xx'`, "invalid character literal (more than one character)", 0, 0},
   369  
   370  		{"\"\n", "newline in string", 0, 1},
   371  		{`"`, "string not terminated", 0, 0},
   372  		{`"foo`, "string not terminated", 0, 0},
   373  		{"`", "string not terminated", 0, 0},
   374  		{"`foo", "string not terminated", 0, 0},
   375  		{"/*/", "comment not terminated", 0, 0},
   376  		{"/*\n\nfoo", "comment not terminated", 0, 0},
   377  		{`"\`, "string not terminated", 0, 0},
   378  		{`"\"`, "string not terminated", 0, 0},
   379  		{`"\x`, "string not terminated", 0, 0},
   380  		{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
   381  		{`"\y"`, "unknown escape sequence", 0, 2},
   382  		{`"\x0"`, "non-hex character in escape sequence: \"", 0, 4},
   383  		{`"\00"`, "non-octal character in escape sequence: \"", 0, 4},
   384  		{`"\377" /*`, "comment not terminated", 0, 7}, // valid octal escape
   385  		{`"\378"`, "non-octal character in escape sequence: 8", 0, 4},
   386  		{`"\400"`, "octal escape value > 255: 256", 0, 5},
   387  
   388  		{`s := "foo\z"`, "unknown escape sequence", 0, 10},
   389  		{`s := "foo\z00\nbar"`, "unknown escape sequence", 0, 10},
   390  		{`"\x`, "string not terminated", 0, 0},
   391  		{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
   392  		{`var s string = "\x"`, "non-hex character in escape sequence: \"", 0, 18},
   393  		{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 0, 18},
   394  
   395  		// former problem cases
   396  		{"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0},
   397  	} {
   398  		var s scanner
   399  		nerrors := 0
   400  		s.init(strings.NewReader(test.src), func(line, col uint, msg string) {
   401  			nerrors++
   402  			// only check the first error
   403  			if nerrors == 1 {
   404  				if msg != test.msg {
   405  					t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
   406  				}
   407  				if line != test.line+linebase {
   408  					t.Errorf("%q: got line = %d; want %d", test.src, line, test.line+linebase)
   409  				}
   410  				if col != test.col+colbase {
   411  					t.Errorf("%q: got col = %d; want %d", test.src, col, test.col+colbase)
   412  				}
   413  			} else if nerrors > 1 {
   414  				// TODO(gri) make this use position info
   415  				t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
   416  			}
   417  		}, 0)
   418  
   419  		for {
   420  			s.next()
   421  			if s.tok == _EOF {
   422  				break
   423  			}
   424  		}
   425  
   426  		if nerrors == 0 {
   427  			t.Errorf("%q: got no error; want %q", test.src, test.msg)
   428  		}
   429  	}
   430  }
   431  
   432  func TestIssue21938(t *testing.T) {
   433  	s := "/*" + strings.Repeat(" ", 4089) + "*/ .5"
   434  
   435  	var got scanner
   436  	got.init(strings.NewReader(s), nil, 0)
   437  	got.next()
   438  
   439  	if got.tok != _Literal || got.lit != ".5" {
   440  		t.Errorf("got %s %q; want %s %q", got.tok, got.lit, _Literal, ".5")
   441  	}
   442  }