github.com/rakyll/go@v0.0.0-20170216000551-64c02460d703/src/cmd/compile/internal/syntax/scanner_test.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package syntax
     6  
     7  import (
     8  	"fmt"
     9  	"os"
    10  	"testing"
    11  )
    12  
    13  func TestScanner(t *testing.T) {
    14  	if testing.Short() {
    15  		t.Skip("skipping test in short mode")
    16  	}
    17  
    18  	src, err := os.Open("parser.go")
    19  	if err != nil {
    20  		t.Fatal(err)
    21  	}
    22  	defer src.Close()
    23  
    24  	var s scanner
    25  	s.init(src, nil, nil)
    26  	for {
    27  		s.next()
    28  		if s.tok == _EOF {
    29  			break
    30  		}
    31  		switch s.tok {
    32  		case _Name:
    33  			fmt.Println(s.line, s.tok, "=>", s.lit)
    34  		case _Operator:
    35  			fmt.Println(s.line, s.tok, "=>", s.op, s.prec)
    36  		default:
    37  			fmt.Println(s.line, s.tok)
    38  		}
    39  	}
    40  }
    41  
    42  func TestTokens(t *testing.T) {
    43  	// make source
    44  	var buf []byte
    45  	for i, s := range sampleTokens {
    46  		buf = append(buf, "\t\t\t\t"[:i&3]...)     // leading indentation
    47  		buf = append(buf, s.src...)                // token
    48  		buf = append(buf, "        "[:i&7]...)     // trailing spaces
    49  		buf = append(buf, "/* foo */ // bar\n"...) // comments
    50  	}
    51  
    52  	// scan source
    53  	var got scanner
    54  	got.init(&bytesReader{buf}, nil, nil)
    55  	got.next()
    56  	for i, want := range sampleTokens {
    57  		nlsemi := false
    58  
    59  		if got.line != uint(i+1) {
    60  			t.Errorf("got line %d; want %d", got.line, i+1)
    61  		}
    62  
    63  		if got.tok != want.tok {
    64  			t.Errorf("got tok = %s; want %s", got.tok, want.tok)
    65  			continue
    66  		}
    67  
    68  		switch want.tok {
    69  		case _Semi:
    70  			if got.lit != "semicolon" {
    71  				t.Errorf("got %s; want semicolon", got.lit)
    72  			}
    73  
    74  		case _Name, _Literal:
    75  			if got.lit != want.src {
    76  				t.Errorf("got lit = %q; want %q", got.lit, want.src)
    77  				continue
    78  			}
    79  			nlsemi = true
    80  
    81  		case _Operator, _AssignOp, _IncOp:
    82  			if got.op != want.op {
    83  				t.Errorf("got op = %s; want %s", got.op, want.op)
    84  				continue
    85  			}
    86  			if got.prec != want.prec {
    87  				t.Errorf("got prec = %d; want %d", got.prec, want.prec)
    88  				continue
    89  			}
    90  			nlsemi = want.tok == _IncOp
    91  
    92  		case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return:
    93  			nlsemi = true
    94  		}
    95  
    96  		if nlsemi {
    97  			got.next()
    98  			if got.tok != _Semi {
    99  				t.Errorf("got tok = %s; want ;", got.tok)
   100  				continue
   101  			}
   102  			if got.lit != "newline" {
   103  				t.Errorf("got %s; want newline", got.lit)
   104  			}
   105  		}
   106  
   107  		got.next()
   108  	}
   109  
   110  	if got.tok != _EOF {
   111  		t.Errorf("got %q; want _EOF", got.tok)
   112  	}
   113  }
   114  
   115  var sampleTokens = [...]struct {
   116  	tok  token
   117  	src  string
   118  	op   Operator
   119  	prec int
   120  }{
   121  	// name samples
   122  	{_Name, "x", 0, 0},
   123  	{_Name, "X123", 0, 0},
   124  	{_Name, "foo", 0, 0},
   125  	{_Name, "Foo123", 0, 0},
   126  	{_Name, "foo_bar", 0, 0},
   127  	{_Name, "_", 0, 0},
   128  	{_Name, "_foobar", 0, 0},
   129  	{_Name, "a۰۱۸", 0, 0},
   130  	{_Name, "foo६४", 0, 0},
   131  	{_Name, "bar9876", 0, 0},
   132  	{_Name, "ŝ", 0, 0},
   133  	{_Name, "ŝfoo", 0, 0},
   134  
   135  	// literal samples
   136  	{_Literal, "0", 0, 0},
   137  	{_Literal, "1", 0, 0},
   138  	{_Literal, "12345", 0, 0},
   139  	{_Literal, "123456789012345678890123456789012345678890", 0, 0},
   140  	{_Literal, "01234567", 0, 0},
   141  	{_Literal, "0x0", 0, 0},
   142  	{_Literal, "0xcafebabe", 0, 0},
   143  	{_Literal, "0.", 0, 0},
   144  	{_Literal, "0.e0", 0, 0},
   145  	{_Literal, "0.e-1", 0, 0},
   146  	{_Literal, "0.e+123", 0, 0},
   147  	{_Literal, ".0", 0, 0},
   148  	{_Literal, ".0E00", 0, 0},
   149  	{_Literal, ".0E-0123", 0, 0},
   150  	{_Literal, ".0E+12345678901234567890", 0, 0},
   151  	{_Literal, ".45e1", 0, 0},
   152  	{_Literal, "3.14159265", 0, 0},
   153  	{_Literal, "1e0", 0, 0},
   154  	{_Literal, "1e+100", 0, 0},
   155  	{_Literal, "1e-100", 0, 0},
   156  	{_Literal, "2.71828e-1000", 0, 0},
   157  	{_Literal, "0i", 0, 0},
   158  	{_Literal, "1i", 0, 0},
   159  	{_Literal, "012345678901234567889i", 0, 0},
   160  	{_Literal, "123456789012345678890i", 0, 0},
   161  	{_Literal, "0.i", 0, 0},
   162  	{_Literal, ".0i", 0, 0},
   163  	{_Literal, "3.14159265i", 0, 0},
   164  	{_Literal, "1e0i", 0, 0},
   165  	{_Literal, "1e+100i", 0, 0},
   166  	{_Literal, "1e-100i", 0, 0},
   167  	{_Literal, "2.71828e-1000i", 0, 0},
   168  	{_Literal, "'a'", 0, 0},
   169  	{_Literal, "'\\000'", 0, 0},
   170  	{_Literal, "'\\xFF'", 0, 0},
   171  	{_Literal, "'\\uff16'", 0, 0},
   172  	{_Literal, "'\\U0000ff16'", 0, 0},
   173  	{_Literal, "`foobar`", 0, 0},
   174  	{_Literal, "`foo\tbar`", 0, 0},
   175  	{_Literal, "`\r`", 0, 0},
   176  
   177  	// operators
   178  	{_Operator, "||", OrOr, precOrOr},
   179  
   180  	{_Operator, "&&", AndAnd, precAndAnd},
   181  
   182  	{_Operator, "==", Eql, precCmp},
   183  	{_Operator, "!=", Neq, precCmp},
   184  	{_Operator, "<", Lss, precCmp},
   185  	{_Operator, "<=", Leq, precCmp},
   186  	{_Operator, ">", Gtr, precCmp},
   187  	{_Operator, ">=", Geq, precCmp},
   188  
   189  	{_Operator, "+", Add, precAdd},
   190  	{_Operator, "-", Sub, precAdd},
   191  	{_Operator, "|", Or, precAdd},
   192  	{_Operator, "^", Xor, precAdd},
   193  
   194  	{_Star, "*", Mul, precMul},
   195  	{_Operator, "/", Div, precMul},
   196  	{_Operator, "%", Rem, precMul},
   197  	{_Operator, "&", And, precMul},
   198  	{_Operator, "&^", AndNot, precMul},
   199  	{_Operator, "<<", Shl, precMul},
   200  	{_Operator, ">>", Shr, precMul},
   201  
   202  	// assignment operations
   203  	{_AssignOp, "+=", Add, precAdd},
   204  	{_AssignOp, "-=", Sub, precAdd},
   205  	{_AssignOp, "|=", Or, precAdd},
   206  	{_AssignOp, "^=", Xor, precAdd},
   207  
   208  	{_AssignOp, "*=", Mul, precMul},
   209  	{_AssignOp, "/=", Div, precMul},
   210  	{_AssignOp, "%=", Rem, precMul},
   211  	{_AssignOp, "&=", And, precMul},
   212  	{_AssignOp, "&^=", AndNot, precMul},
   213  	{_AssignOp, "<<=", Shl, precMul},
   214  	{_AssignOp, ">>=", Shr, precMul},
   215  
   216  	// other operations
   217  	{_IncOp, "++", Add, precAdd},
   218  	{_IncOp, "--", Sub, precAdd},
   219  	{_Assign, "=", 0, 0},
   220  	{_Define, ":=", 0, 0},
   221  	{_Arrow, "<-", 0, 0},
   222  
   223  	// delimiters
   224  	{_Lparen, "(", 0, 0},
   225  	{_Lbrack, "[", 0, 0},
   226  	{_Lbrace, "{", 0, 0},
   227  	{_Rparen, ")", 0, 0},
   228  	{_Rbrack, "]", 0, 0},
   229  	{_Rbrace, "}", 0, 0},
   230  	{_Comma, ",", 0, 0},
   231  	{_Semi, ";", 0, 0},
   232  	{_Colon, ":", 0, 0},
   233  	{_Dot, ".", 0, 0},
   234  	{_DotDotDot, "...", 0, 0},
   235  
   236  	// keywords
   237  	{_Break, "break", 0, 0},
   238  	{_Case, "case", 0, 0},
   239  	{_Chan, "chan", 0, 0},
   240  	{_Const, "const", 0, 0},
   241  	{_Continue, "continue", 0, 0},
   242  	{_Default, "default", 0, 0},
   243  	{_Defer, "defer", 0, 0},
   244  	{_Else, "else", 0, 0},
   245  	{_Fallthrough, "fallthrough", 0, 0},
   246  	{_For, "for", 0, 0},
   247  	{_Func, "func", 0, 0},
   248  	{_Go, "go", 0, 0},
   249  	{_Goto, "goto", 0, 0},
   250  	{_If, "if", 0, 0},
   251  	{_Import, "import", 0, 0},
   252  	{_Interface, "interface", 0, 0},
   253  	{_Map, "map", 0, 0},
   254  	{_Package, "package", 0, 0},
   255  	{_Range, "range", 0, 0},
   256  	{_Return, "return", 0, 0},
   257  	{_Select, "select", 0, 0},
   258  	{_Struct, "struct", 0, 0},
   259  	{_Switch, "switch", 0, 0},
   260  	{_Type, "type", 0, 0},
   261  	{_Var, "var", 0, 0},
   262  }
   263  
   264  func TestScanErrors(t *testing.T) {
   265  	for _, test := range []struct {
   266  		src, msg  string
   267  		line, col uint
   268  	}{
   269  		// Note: Positions for lexical errors are the earliest position
   270  		// where the error is apparent, not the beginning of the respective
   271  		// token.
   272  
   273  		// rune-level errors
   274  		{"fo\x00o", "invalid NUL character", 1, 2},
   275  		{"foo\n\ufeff bar", "invalid BOM in the middle of the file", 2, 0},
   276  		{"foo\n\n\xff    ", "invalid UTF-8 encoding", 3, 0},
   277  
   278  		// token-level errors
   279  		{"\u00BD" /* ½ */, "invalid identifier character U+00BD '½'", 1, 0},
   280  		{"\U0001d736\U0001d737\U0001d738_½" /* 𝜶𝜷𝜸_½ */, "invalid identifier character U+00BD '½'", 1, 13 /* byte offset */},
   281  		{"\U0001d7d8" /* 𝟘 */, "identifier cannot begin with digit U+1D7D8 '𝟘'", 1, 0},
   282  		{"foo\U0001d7d8_½" /* foo𝟘_½ */, "invalid identifier character U+00BD '½'", 1, 8 /* byte offset */},
   283  
   284  		{"x + ~y", "bitwise complement operator is ^", 1, 4},
   285  		{"foo$bar = 0", "invalid character U+0024 '$'", 1, 3},
   286  		{"const x = 0xyz", "malformed hex constant", 1, 12},
   287  		{"0123456789", "malformed octal constant", 1, 10},
   288  		{"0123456789. /* foobar", "comment not terminated", 1, 12},   // valid float constant
   289  		{"0123456789e0 /*\nfoobar", "comment not terminated", 1, 13}, // valid float constant
   290  		{"var a, b = 08, 07\n", "malformed octal constant", 1, 13},
   291  		{"(x + 1.0e+x)", "malformed floating-point constant exponent", 1, 10},
   292  
   293  		{`''`, "empty character literal or unescaped ' in character literal", 1, 1},
   294  		{"'\n", "newline in character literal", 1, 1},
   295  		{`'\`, "missing '", 1, 2},
   296  		{`'\'`, "missing '", 1, 3},
   297  		{`'\x`, "missing '", 1, 3},
   298  		{`'\x'`, "non-hex character in escape sequence: '", 1, 3},
   299  		{`'\y'`, "unknown escape sequence", 1, 2},
   300  		{`'\x0'`, "non-hex character in escape sequence: '", 1, 4},
   301  		{`'\00'`, "non-octal character in escape sequence: '", 1, 4},
   302  		{`'\377' /*`, "comment not terminated", 1, 7}, // valid octal escape
   303  		{`'\378`, "non-octal character in escape sequence: 8", 1, 4},
   304  		{`'\400'`, "octal escape value > 255: 256", 1, 5},
   305  		{`'xx`, "missing '", 1, 2},
   306  
   307  		{"\"\n", "newline in string", 1, 1},
   308  		{`"`, "string not terminated", 1, 0},
   309  		{`"foo`, "string not terminated", 1, 0},
   310  		{"`", "string not terminated", 1, 0},
   311  		{"`foo", "string not terminated", 1, 0},
   312  		{"/*/", "comment not terminated", 1, 0},
   313  		{"/*\n\nfoo", "comment not terminated", 1, 0},
   314  		{"/*\n\nfoo", "comment not terminated", 1, 0},
   315  		{`"\`, "string not terminated", 1, 0},
   316  		{`"\"`, "string not terminated", 1, 0},
   317  		{`"\x`, "string not terminated", 1, 0},
   318  		{`"\x"`, "non-hex character in escape sequence: \"", 1, 3},
   319  		{`"\y"`, "unknown escape sequence", 1, 2},
   320  		{`"\x0"`, "non-hex character in escape sequence: \"", 1, 4},
   321  		{`"\00"`, "non-octal character in escape sequence: \"", 1, 4},
   322  		{`"\377" /*`, "comment not terminated", 1, 7}, // valid octal escape
   323  		{`"\378"`, "non-octal character in escape sequence: 8", 1, 4},
   324  		{`"\400"`, "octal escape value > 255: 256", 1, 5},
   325  
   326  		{`s := "foo\z"`, "unknown escape sequence", 1, 10},
   327  		{`s := "foo\z00\nbar"`, "unknown escape sequence", 1, 10},
   328  		{`"\x`, "string not terminated", 1, 0},
   329  		{`"\x"`, "non-hex character in escape sequence: \"", 1, 3},
   330  		{`var s string = "\x"`, "non-hex character in escape sequence: \"", 1, 18},
   331  		{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 1, 18},
   332  
   333  		// former problem cases
   334  		{"package p\n\n\xef", "invalid UTF-8 encoding", 3, 0},
   335  	} {
   336  		var s scanner
   337  		nerrors := 0
   338  		s.init(&bytesReader{[]byte(test.src)}, func(line, col uint, msg string) {
   339  			nerrors++
   340  			// only check the first error
   341  			if nerrors == 1 {
   342  				if msg != test.msg {
   343  					t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
   344  				}
   345  				if line != test.line {
   346  					t.Errorf("%q: got line = %d; want %d", test.src, line, test.line)
   347  				}
   348  				if col != test.col {
   349  					t.Errorf("%q: got col = %d; want %d", test.src, col, test.col)
   350  				}
   351  			} else if nerrors > 1 {
   352  				// TODO(gri) make this use position info
   353  				t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
   354  			}
   355  		}, nil)
   356  
   357  		for {
   358  			s.next()
   359  			if s.tok == _EOF {
   360  				break
   361  			}
   362  		}
   363  
   364  		if nerrors == 0 {
   365  			t.Errorf("%q: got no error; want %q", test.src, test.msg)
   366  		}
   367  	}
   368  }