github.com/euank/go@v0.0.0-20160829210321-495514729181/src/cmd/compile/internal/syntax/scanner_test.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package syntax
     6  
     7  import (
     8  	"fmt"
     9  	"os"
    10  	"testing"
    11  )
    12  
    13  func TestScanner(t *testing.T) {
    14  	if testing.Short() {
    15  		t.Skip("skipping test in short mode")
    16  	}
    17  
    18  	src, err := os.Open("parser.go")
    19  	if err != nil {
    20  		t.Fatal(err)
    21  	}
    22  	defer src.Close()
    23  
    24  	var s scanner
    25  	s.init(src, nil)
    26  	for {
    27  		s.next()
    28  		if s.tok == _EOF {
    29  			break
    30  		}
    31  		switch s.tok {
    32  		case _Name:
    33  			fmt.Println(s.line, s.tok, "=>", s.lit)
    34  		case _Operator:
    35  			fmt.Println(s.line, s.tok, "=>", s.op, s.prec)
    36  		default:
    37  			fmt.Println(s.line, s.tok)
    38  		}
    39  	}
    40  }
    41  
    42  func TestTokens(t *testing.T) {
    43  	// make source
    44  	var buf []byte
    45  	for i, s := range sampleTokens {
    46  		buf = append(buf, "\t\t\t\t"[:i&3]...)     // leading indentation
    47  		buf = append(buf, s.src...)                // token
    48  		buf = append(buf, "        "[:i&7]...)     // trailing spaces
    49  		buf = append(buf, "/* foo */ // bar\n"...) // comments
    50  	}
    51  
    52  	// scan source
    53  	var got scanner
    54  	got.init(&bytesReader{buf}, nil)
    55  	got.next()
    56  	for i, want := range sampleTokens {
    57  		nlsemi := false
    58  
    59  		if got.line != i+1 {
    60  			t.Errorf("got line %d; want %d", got.line, i+1)
    61  		}
    62  
    63  		if got.tok != want.tok {
    64  			t.Errorf("got tok = %s; want %s", got.tok, want.tok)
    65  			continue
    66  		}
    67  
    68  		switch want.tok {
    69  		case _Name, _Literal:
    70  			if got.lit != want.src {
    71  				t.Errorf("got lit = %q; want %q", got.lit, want.src)
    72  				continue
    73  			}
    74  			nlsemi = true
    75  
    76  		case _Operator, _AssignOp, _IncOp:
    77  			if got.op != want.op {
    78  				t.Errorf("got op = %s; want %s", got.op, want.op)
    79  				continue
    80  			}
    81  			if got.prec != want.prec {
    82  				t.Errorf("got prec = %d; want %d", got.prec, want.prec)
    83  				continue
    84  			}
    85  			nlsemi = want.tok == _IncOp
    86  
    87  		case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return:
    88  			nlsemi = true
    89  		}
    90  
    91  		if nlsemi {
    92  			got.next()
    93  			if got.tok != _Semi {
    94  				t.Errorf("got tok = %s; want ;", got.tok)
    95  				continue
    96  			}
    97  		}
    98  
    99  		got.next()
   100  	}
   101  
   102  	if got.tok != _EOF {
   103  		t.Errorf("got %q; want _EOF", got.tok)
   104  	}
   105  }
   106  
   107  var sampleTokens = [...]struct {
   108  	tok  token
   109  	src  string
   110  	op   Operator
   111  	prec int
   112  }{
   113  	// name samples
   114  	{_Name, "x", 0, 0},
   115  	{_Name, "X123", 0, 0},
   116  	{_Name, "foo", 0, 0},
   117  	{_Name, "Foo123", 0, 0},
   118  	{_Name, "foo_bar", 0, 0},
   119  	{_Name, "_", 0, 0},
   120  	{_Name, "_foobar", 0, 0},
   121  	{_Name, "a۰۱۸", 0, 0},
   122  	{_Name, "foo६४", 0, 0},
   123  	{_Name, "bar9876", 0, 0},
   124  	{_Name, "ŝ", 0, 0},
   125  	{_Name, "ŝfoo", 0, 0},
   126  
   127  	// literal samples
   128  	{_Literal, "0", 0, 0},
   129  	{_Literal, "1", 0, 0},
   130  	{_Literal, "12345", 0, 0},
   131  	{_Literal, "123456789012345678890123456789012345678890", 0, 0},
   132  	{_Literal, "01234567", 0, 0},
   133  	{_Literal, "0x0", 0, 0},
   134  	{_Literal, "0xcafebabe", 0, 0},
   135  	{_Literal, "0.", 0, 0},
   136  	{_Literal, "0.e0", 0, 0},
   137  	{_Literal, "0.e-1", 0, 0},
   138  	{_Literal, "0.e+123", 0, 0},
   139  	{_Literal, ".0", 0, 0},
   140  	{_Literal, ".0E00", 0, 0},
   141  	{_Literal, ".0E-0123", 0, 0},
   142  	{_Literal, ".0E+12345678901234567890", 0, 0},
   143  	{_Literal, ".45e1", 0, 0},
   144  	{_Literal, "3.14159265", 0, 0},
   145  	{_Literal, "1e0", 0, 0},
   146  	{_Literal, "1e+100", 0, 0},
   147  	{_Literal, "1e-100", 0, 0},
   148  	{_Literal, "2.71828e-1000", 0, 0},
   149  	{_Literal, "0i", 0, 0},
   150  	{_Literal, "1i", 0, 0},
   151  	{_Literal, "012345678901234567889i", 0, 0},
   152  	{_Literal, "123456789012345678890i", 0, 0},
   153  	{_Literal, "0.i", 0, 0},
   154  	{_Literal, ".0i", 0, 0},
   155  	{_Literal, "3.14159265i", 0, 0},
   156  	{_Literal, "1e0i", 0, 0},
   157  	{_Literal, "1e+100i", 0, 0},
   158  	{_Literal, "1e-100i", 0, 0},
   159  	{_Literal, "2.71828e-1000i", 0, 0},
   160  	{_Literal, "'a'", 0, 0},
   161  	{_Literal, "'\\000'", 0, 0},
   162  	{_Literal, "'\\xFF'", 0, 0},
   163  	{_Literal, "'\\uff16'", 0, 0},
   164  	{_Literal, "'\\U0000ff16'", 0, 0},
   165  	{_Literal, "`foobar`", 0, 0},
   166  	{_Literal, "`foo\tbar`", 0, 0},
   167  	{_Literal, "`\r`", 0, 0},
   168  
   169  	// operators
   170  	{_Operator, "||", OrOr, precOrOr},
   171  
   172  	{_Operator, "&&", AndAnd, precAndAnd},
   173  
   174  	{_Operator, "==", Eql, precCmp},
   175  	{_Operator, "!=", Neq, precCmp},
   176  	{_Operator, "<", Lss, precCmp},
   177  	{_Operator, "<=", Leq, precCmp},
   178  	{_Operator, ">", Gtr, precCmp},
   179  	{_Operator, ">=", Geq, precCmp},
   180  
   181  	{_Operator, "+", Add, precAdd},
   182  	{_Operator, "-", Sub, precAdd},
   183  	{_Operator, "|", Or, precAdd},
   184  	{_Operator, "^", Xor, precAdd},
   185  
   186  	{_Star, "*", Mul, precMul},
   187  	{_Operator, "/", Div, precMul},
   188  	{_Operator, "%", Rem, precMul},
   189  	{_Operator, "&", And, precMul},
   190  	{_Operator, "&^", AndNot, precMul},
   191  	{_Operator, "<<", Shl, precMul},
   192  	{_Operator, ">>", Shr, precMul},
   193  
   194  	// assignment operations
   195  	{_AssignOp, "+=", Add, precAdd},
   196  	{_AssignOp, "-=", Sub, precAdd},
   197  	{_AssignOp, "|=", Or, precAdd},
   198  	{_AssignOp, "^=", Xor, precAdd},
   199  
   200  	{_AssignOp, "*=", Mul, precMul},
   201  	{_AssignOp, "/=", Div, precMul},
   202  	{_AssignOp, "%=", Rem, precMul},
   203  	{_AssignOp, "&=", And, precMul},
   204  	{_AssignOp, "&^=", AndNot, precMul},
   205  	{_AssignOp, "<<=", Shl, precMul},
   206  	{_AssignOp, ">>=", Shr, precMul},
   207  
   208  	// other operations
   209  	{_IncOp, "++", Add, precAdd},
   210  	{_IncOp, "--", Sub, precAdd},
   211  	{_Assign, "=", 0, 0},
   212  	{_Define, ":=", 0, 0},
   213  	{_Arrow, "<-", 0, 0},
   214  
   215  	// delimiters
   216  	{_Lparen, "(", 0, 0},
   217  	{_Lbrack, "[", 0, 0},
   218  	{_Lbrace, "{", 0, 0},
   219  	{_Rparen, ")", 0, 0},
   220  	{_Rbrack, "]", 0, 0},
   221  	{_Rbrace, "}", 0, 0},
   222  	{_Comma, ",", 0, 0},
   223  	{_Semi, ";", 0, 0},
   224  	{_Colon, ":", 0, 0},
   225  	{_Dot, ".", 0, 0},
   226  	{_DotDotDot, "...", 0, 0},
   227  
   228  	// keywords
   229  	{_Break, "break", 0, 0},
   230  	{_Case, "case", 0, 0},
   231  	{_Chan, "chan", 0, 0},
   232  	{_Const, "const", 0, 0},
   233  	{_Continue, "continue", 0, 0},
   234  	{_Default, "default", 0, 0},
   235  	{_Defer, "defer", 0, 0},
   236  	{_Else, "else", 0, 0},
   237  	{_Fallthrough, "fallthrough", 0, 0},
   238  	{_For, "for", 0, 0},
   239  	{_Func, "func", 0, 0},
   240  	{_Go, "go", 0, 0},
   241  	{_Goto, "goto", 0, 0},
   242  	{_If, "if", 0, 0},
   243  	{_Import, "import", 0, 0},
   244  	{_Interface, "interface", 0, 0},
   245  	{_Map, "map", 0, 0},
   246  	{_Package, "package", 0, 0},
   247  	{_Range, "range", 0, 0},
   248  	{_Return, "return", 0, 0},
   249  	{_Select, "select", 0, 0},
   250  	{_Struct, "struct", 0, 0},
   251  	{_Switch, "switch", 0, 0},
   252  	{_Type, "type", 0, 0},
   253  	{_Var, "var", 0, 0},
   254  }
   255  
   256  func TestScanErrors(t *testing.T) {
   257  	for _, test := range []struct {
   258  		src, msg  string
   259  		pos, line int
   260  	}{
   261  		// Note: Positions for lexical errors are the earliest position
   262  		// where the error is apparent, not the beginning of the respective
   263  		// token.
   264  
   265  		// rune-level errors
   266  		{"fo\x00o", "invalid NUL character", 2, 1},
   267  		{"foo\n\ufeff bar", "invalid BOM in the middle of the file", 4, 2},
   268  		{"foo\n\n\xff    ", "invalid UTF-8 encoding", 5, 3},
   269  
   270  		// token-level errors
   271  		{"x + ~y", "bitwise complement operator is ^", 4, 1},
   272  		{"foo$bar = 0", "illegal character U+0024 '$'", 3, 1},
   273  		{"const x = 0xyz", "malformed hex constant", 12, 1},
   274  		{"0123456789", "malformed octal constant", 10, 1},
   275  		{"0123456789. /* foobar", "comment not terminated", 12, 1},   // valid float constant
   276  		{"0123456789e0 /*\nfoobar", "comment not terminated", 13, 1}, // valid float constant
   277  		{"var a, b = 08, 07\n", "malformed octal constant", 13, 1},
   278  		{"(x + 1.0e+x)", "malformed floating-point constant exponent", 10, 1},
   279  
   280  		{`''`, "empty character literal or unescaped ' in character literal", 1, 1},
   281  		{"'\n", "newline in character literal", 1, 1},
   282  		{`'\`, "missing '", 2, 1},
   283  		{`'\'`, "missing '", 3, 1},
   284  		{`'\x`, "missing '", 3, 1},
   285  		{`'\x'`, "non-hex character in escape sequence: '", 3, 1},
   286  		{`'\y'`, "unknown escape sequence", 2, 1},
   287  		{`'\x0'`, "non-hex character in escape sequence: '", 4, 1},
   288  		{`'\00'`, "non-octal character in escape sequence: '", 4, 1},
   289  		{`'\377' /*`, "comment not terminated", 7, 1}, // valid octal escape
   290  		{`'\378`, "non-octal character in escape sequence: 8", 4, 1},
   291  		{`'\400'`, "octal escape value > 255: 256", 5, 1},
   292  		{`'xx`, "missing '", 2, 1},
   293  
   294  		{"\"\n", "newline in string", 1, 1},
   295  		{`"`, "string not terminated", 0, 1},
   296  		{`"foo`, "string not terminated", 0, 1},
   297  		{"`", "string not terminated", 0, 1},
   298  		{"`foo", "string not terminated", 0, 1},
   299  		{"/*/", "comment not terminated", 0, 1},
   300  		{"/*\n\nfoo", "comment not terminated", 0, 1},
   301  		{"/*\n\nfoo", "comment not terminated", 0, 1},
   302  		{`"\`, "string not terminated", 0, 1},
   303  		{`"\"`, "string not terminated", 0, 1},
   304  		{`"\x`, "string not terminated", 0, 1},
   305  		{`"\x"`, "non-hex character in escape sequence: \"", 3, 1},
   306  		{`"\y"`, "unknown escape sequence", 2, 1},
   307  		{`"\x0"`, "non-hex character in escape sequence: \"", 4, 1},
   308  		{`"\00"`, "non-octal character in escape sequence: \"", 4, 1},
   309  		{`"\377" /*`, "comment not terminated", 7, 1}, // valid octal escape
   310  		{`"\378"`, "non-octal character in escape sequence: 8", 4, 1},
   311  		{`"\400"`, "octal escape value > 255: 256", 5, 1},
   312  
   313  		{`s := "foo\z"`, "unknown escape sequence", 10, 1},
   314  		{`s := "foo\z00\nbar"`, "unknown escape sequence", 10, 1},
   315  		{`"\x`, "string not terminated", 0, 1},
   316  		{`"\x"`, "non-hex character in escape sequence: \"", 3, 1},
   317  		{`var s string = "\x"`, "non-hex character in escape sequence: \"", 18, 1},
   318  		{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 18, 1},
   319  
   320  		// former problem cases
   321  		{"package p\n\n\xef", "invalid UTF-8 encoding", 11, 3},
   322  	} {
   323  		var s scanner
   324  		nerrors := 0
   325  		s.init(&bytesReader{[]byte(test.src)}, func(pos, line int, msg string) {
   326  			nerrors++
   327  			// only check the first error
   328  			if nerrors == 1 {
   329  				if msg != test.msg {
   330  					t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
   331  				}
   332  				if pos != test.pos {
   333  					t.Errorf("%q: got pos = %d; want %d", test.src, pos, test.pos)
   334  				}
   335  				if line != test.line {
   336  					t.Errorf("%q: got line = %d; want %d", test.src, line, test.line)
   337  				}
   338  			} else if nerrors > 1 {
   339  				t.Errorf("%q: got unexpected %q at pos = %d, line = %d", test.src, msg, pos, line)
   340  			}
   341  		})
   342  
   343  		for {
   344  			s.next()
   345  			if s.tok == _EOF {
   346  				break
   347  			}
   348  		}
   349  
   350  		if nerrors == 0 {
   351  			t.Errorf("%q: got no error; want %q", test.src, test.msg)
   352  		}
   353  	}
   354  }