github.com/sanprasirt/go@v0.0.0-20170607001320-a027466e4b6d/src/cmd/compile/internal/syntax/scanner_test.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package syntax
     6  
     7  import (
     8  	"fmt"
     9  	"os"
    10  	"testing"
    11  )
    12  
    13  func TestScanner(t *testing.T) {
    14  	if testing.Short() {
    15  		t.Skip("skipping test in short mode")
    16  	}
    17  
    18  	src, err := os.Open("parser.go")
    19  	if err != nil {
    20  		t.Fatal(err)
    21  	}
    22  	defer src.Close()
    23  
    24  	var s scanner
    25  	s.init(src, nil, nil)
    26  	for {
    27  		s.next()
    28  		if s.tok == _EOF {
    29  			break
    30  		}
    31  		switch s.tok {
    32  		case _Name:
    33  			fmt.Println(s.line, s.tok, "=>", s.lit)
    34  		case _Operator:
    35  			fmt.Println(s.line, s.tok, "=>", s.op, s.prec)
    36  		default:
    37  			fmt.Println(s.line, s.tok)
    38  		}
    39  	}
    40  }
    41  
    42  func TestTokens(t *testing.T) {
    43  	// make source
    44  	var buf []byte
    45  	for i, s := range sampleTokens {
    46  		buf = append(buf, "\t\t\t\t"[:i&3]...)     // leading indentation
    47  		buf = append(buf, s.src...)                // token
    48  		buf = append(buf, "        "[:i&7]...)     // trailing spaces
    49  		buf = append(buf, "/* foo */ // bar\n"...) // comments
    50  	}
    51  
    52  	// scan source
    53  	var got scanner
    54  	got.init(&bytesReader{buf}, nil, nil)
    55  	got.next()
    56  	for i, want := range sampleTokens {
    57  		nlsemi := false
    58  
    59  		if got.line != uint(i+linebase) {
    60  			t.Errorf("got line %d; want %d", got.line, i+linebase)
    61  		}
    62  
    63  		if got.tok != want.tok {
    64  			t.Errorf("got tok = %s; want %s", got.tok, want.tok)
    65  			continue
    66  		}
    67  
    68  		switch want.tok {
    69  		case _Semi:
    70  			if got.lit != "semicolon" {
    71  				t.Errorf("got %s; want semicolon", got.lit)
    72  			}
    73  
    74  		case _Name, _Literal:
    75  			if got.lit != want.src {
    76  				t.Errorf("got lit = %q; want %q", got.lit, want.src)
    77  				continue
    78  			}
    79  			nlsemi = true
    80  
    81  		case _Operator, _AssignOp, _IncOp:
    82  			if got.op != want.op {
    83  				t.Errorf("got op = %s; want %s", got.op, want.op)
    84  				continue
    85  			}
    86  			if got.prec != want.prec {
    87  				t.Errorf("got prec = %d; want %d", got.prec, want.prec)
    88  				continue
    89  			}
    90  			nlsemi = want.tok == _IncOp
    91  
    92  		case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return:
    93  			nlsemi = true
    94  		}
    95  
    96  		if nlsemi {
    97  			got.next()
    98  			if got.tok != _Semi {
    99  				t.Errorf("got tok = %s; want ;", got.tok)
   100  				continue
   101  			}
   102  			if got.lit != "newline" {
   103  				t.Errorf("got %s; want newline", got.lit)
   104  			}
   105  		}
   106  
   107  		got.next()
   108  	}
   109  
   110  	if got.tok != _EOF {
   111  		t.Errorf("got %q; want _EOF", got.tok)
   112  	}
   113  }
   114  
   115  var sampleTokens = [...]struct {
   116  	tok  token
   117  	src  string
   118  	op   Operator
   119  	prec int
   120  }{
   121  	// name samples
   122  	{_Name, "x", 0, 0},
   123  	{_Name, "X123", 0, 0},
   124  	{_Name, "foo", 0, 0},
   125  	{_Name, "Foo123", 0, 0},
   126  	{_Name, "foo_bar", 0, 0},
   127  	{_Name, "_", 0, 0},
   128  	{_Name, "_foobar", 0, 0},
   129  	{_Name, "a۰۱۸", 0, 0},
   130  	{_Name, "foo६४", 0, 0},
   131  	{_Name, "bar9876", 0, 0},
   132  	{_Name, "ŝ", 0, 0},
   133  	{_Name, "ŝfoo", 0, 0},
   134  
   135  	// literal samples
   136  	{_Literal, "0", 0, 0},
   137  	{_Literal, "1", 0, 0},
   138  	{_Literal, "12345", 0, 0},
   139  	{_Literal, "123456789012345678890123456789012345678890", 0, 0},
   140  	{_Literal, "01234567", 0, 0},
   141  	{_Literal, "0x0", 0, 0},
   142  	{_Literal, "0xcafebabe", 0, 0},
   143  	{_Literal, "0.", 0, 0},
   144  	{_Literal, "0.e0", 0, 0},
   145  	{_Literal, "0.e-1", 0, 0},
   146  	{_Literal, "0.e+123", 0, 0},
   147  	{_Literal, ".0", 0, 0},
   148  	{_Literal, ".0E00", 0, 0},
   149  	{_Literal, ".0E-0123", 0, 0},
   150  	{_Literal, ".0E+12345678901234567890", 0, 0},
   151  	{_Literal, ".45e1", 0, 0},
   152  	{_Literal, "3.14159265", 0, 0},
   153  	{_Literal, "1e0", 0, 0},
   154  	{_Literal, "1e+100", 0, 0},
   155  	{_Literal, "1e-100", 0, 0},
   156  	{_Literal, "2.71828e-1000", 0, 0},
   157  	{_Literal, "0i", 0, 0},
   158  	{_Literal, "1i", 0, 0},
   159  	{_Literal, "012345678901234567889i", 0, 0},
   160  	{_Literal, "123456789012345678890i", 0, 0},
   161  	{_Literal, "0.i", 0, 0},
   162  	{_Literal, ".0i", 0, 0},
   163  	{_Literal, "3.14159265i", 0, 0},
   164  	{_Literal, "1e0i", 0, 0},
   165  	{_Literal, "1e+100i", 0, 0},
   166  	{_Literal, "1e-100i", 0, 0},
   167  	{_Literal, "2.71828e-1000i", 0, 0},
   168  	{_Literal, "'a'", 0, 0},
   169  	{_Literal, "'\\000'", 0, 0},
   170  	{_Literal, "'\\xFF'", 0, 0},
   171  	{_Literal, "'\\uff16'", 0, 0},
   172  	{_Literal, "'\\U0000ff16'", 0, 0},
   173  	{_Literal, "`foobar`", 0, 0},
   174  	{_Literal, "`foo\tbar`", 0, 0},
   175  	{_Literal, "`\r`", 0, 0},
   176  
   177  	// operators
   178  	{_Operator, "||", OrOr, precOrOr},
   179  
   180  	{_Operator, "&&", AndAnd, precAndAnd},
   181  
   182  	{_Operator, "==", Eql, precCmp},
   183  	{_Operator, "!=", Neq, precCmp},
   184  	{_Operator, "<", Lss, precCmp},
   185  	{_Operator, "<=", Leq, precCmp},
   186  	{_Operator, ">", Gtr, precCmp},
   187  	{_Operator, ">=", Geq, precCmp},
   188  
   189  	{_Operator, "+", Add, precAdd},
   190  	{_Operator, "-", Sub, precAdd},
   191  	{_Operator, "|", Or, precAdd},
   192  	{_Operator, "^", Xor, precAdd},
   193  
   194  	{_Star, "*", Mul, precMul},
   195  	{_Operator, "/", Div, precMul},
   196  	{_Operator, "%", Rem, precMul},
   197  	{_Operator, "&", And, precMul},
   198  	{_Operator, "&^", AndNot, precMul},
   199  	{_Operator, "<<", Shl, precMul},
   200  	{_Operator, ">>", Shr, precMul},
   201  
   202  	// assignment operations
   203  	{_AssignOp, "+=", Add, precAdd},
   204  	{_AssignOp, "-=", Sub, precAdd},
   205  	{_AssignOp, "|=", Or, precAdd},
   206  	{_AssignOp, "^=", Xor, precAdd},
   207  
   208  	{_AssignOp, "*=", Mul, precMul},
   209  	{_AssignOp, "/=", Div, precMul},
   210  	{_AssignOp, "%=", Rem, precMul},
   211  	{_AssignOp, "&=", And, precMul},
   212  	{_AssignOp, "&^=", AndNot, precMul},
   213  	{_AssignOp, "<<=", Shl, precMul},
   214  	{_AssignOp, ">>=", Shr, precMul},
   215  
   216  	// other operations
   217  	{_IncOp, "++", Add, precAdd},
   218  	{_IncOp, "--", Sub, precAdd},
   219  	{_Assign, "=", 0, 0},
   220  	{_Define, ":=", 0, 0},
   221  	{_Arrow, "<-", 0, 0},
   222  
   223  	// delimiters
   224  	{_Lparen, "(", 0, 0},
   225  	{_Lbrack, "[", 0, 0},
   226  	{_Lbrace, "{", 0, 0},
   227  	{_Rparen, ")", 0, 0},
   228  	{_Rbrack, "]", 0, 0},
   229  	{_Rbrace, "}", 0, 0},
   230  	{_Comma, ",", 0, 0},
   231  	{_Semi, ";", 0, 0},
   232  	{_Colon, ":", 0, 0},
   233  	{_Dot, ".", 0, 0},
   234  	{_DotDotDot, "...", 0, 0},
   235  
   236  	// keywords
   237  	{_Break, "break", 0, 0},
   238  	{_Case, "case", 0, 0},
   239  	{_Chan, "chan", 0, 0},
   240  	{_Const, "const", 0, 0},
   241  	{_Continue, "continue", 0, 0},
   242  	{_Default, "default", 0, 0},
   243  	{_Defer, "defer", 0, 0},
   244  	{_Else, "else", 0, 0},
   245  	{_Fallthrough, "fallthrough", 0, 0},
   246  	{_For, "for", 0, 0},
   247  	{_Func, "func", 0, 0},
   248  	{_Go, "go", 0, 0},
   249  	{_Goto, "goto", 0, 0},
   250  	{_If, "if", 0, 0},
   251  	{_Import, "import", 0, 0},
   252  	{_Interface, "interface", 0, 0},
   253  	{_Map, "map", 0, 0},
   254  	{_Package, "package", 0, 0},
   255  	{_Range, "range", 0, 0},
   256  	{_Return, "return", 0, 0},
   257  	{_Select, "select", 0, 0},
   258  	{_Struct, "struct", 0, 0},
   259  	{_Switch, "switch", 0, 0},
   260  	{_Type, "type", 0, 0},
   261  	{_Var, "var", 0, 0},
   262  }
   263  
   264  func TestScanErrors(t *testing.T) {
   265  	for _, test := range []struct {
   266  		src, msg  string
   267  		line, col uint // 0-based
   268  	}{
   269  		// Note: Positions for lexical errors are the earliest position
   270  		// where the error is apparent, not the beginning of the respective
   271  		// token.
   272  
   273  		// rune-level errors
   274  		{"fo\x00o", "invalid NUL character", 0, 2},
   275  		{"foo\n\ufeff bar", "invalid BOM in the middle of the file", 1, 0},
   276  		{"foo\n\n\xff    ", "invalid UTF-8 encoding", 2, 0},
   277  
   278  		// token-level errors
   279  		{"\u00BD" /* ½ */, "invalid identifier character U+00BD '½'", 0, 0},
   280  		{"\U0001d736\U0001d737\U0001d738_½" /* 𝜶𝜷𝜸_½ */, "invalid identifier character U+00BD '½'", 0, 13 /* byte offset */},
   281  		{"\U0001d7d8" /* 𝟘 */, "identifier cannot begin with digit U+1D7D8 '𝟘'", 0, 0},
   282  		{"foo\U0001d7d8_½" /* foo𝟘_½ */, "invalid identifier character U+00BD '½'", 0, 8 /* byte offset */},
   283  
   284  		{"x + ~y", "bitwise complement operator is ^", 0, 4},
   285  		{"foo$bar = 0", "invalid character U+0024 '$'", 0, 3},
   286  		{"const x = 0xyz", "malformed hex constant", 0, 12},
   287  		{"0123456789", "malformed octal constant", 0, 10},
   288  		{"0123456789. /* foobar", "comment not terminated", 0, 12},   // valid float constant
   289  		{"0123456789e0 /*\nfoobar", "comment not terminated", 0, 13}, // valid float constant
   290  		{"var a, b = 08, 07\n", "malformed octal constant", 0, 13},
   291  		{"(x + 1.0e+x)", "malformed floating-point constant exponent", 0, 10},
   292  
   293  		{`''`, "empty character literal or unescaped ' in character literal", 0, 1},
   294  		{"'\n", "newline in character literal", 0, 1},
   295  		{`'\`, "invalid character literal (missing closing ')", 0, 0},
   296  		{`'\'`, "invalid character literal (missing closing ')", 0, 0},
   297  		{`'\x`, "invalid character literal (missing closing ')", 0, 0},
   298  		{`'\x'`, "non-hex character in escape sequence: '", 0, 3},
   299  		{`'\y'`, "unknown escape sequence", 0, 2},
   300  		{`'\x0'`, "non-hex character in escape sequence: '", 0, 4},
   301  		{`'\00'`, "non-octal character in escape sequence: '", 0, 4},
   302  		{`'\377' /*`, "comment not terminated", 0, 7}, // valid octal escape
   303  		{`'\378`, "non-octal character in escape sequence: 8", 0, 4},
   304  		{`'\400'`, "octal escape value > 255: 256", 0, 5},
   305  		{`'xx`, "invalid character literal (missing closing ')", 0, 0},
   306  		{`'xx'`, "invalid character literal (more than one character)", 0, 0},
   307  
   308  		{"\"\n", "newline in string", 0, 1},
   309  		{`"`, "string not terminated", 0, 0},
   310  		{`"foo`, "string not terminated", 0, 0},
   311  		{"`", "string not terminated", 0, 0},
   312  		{"`foo", "string not terminated", 0, 0},
   313  		{"/*/", "comment not terminated", 0, 0},
   314  		{"/*\n\nfoo", "comment not terminated", 0, 0},
   315  		{"/*\n\nfoo", "comment not terminated", 0, 0},
   316  		{`"\`, "string not terminated", 0, 0},
   317  		{`"\"`, "string not terminated", 0, 0},
   318  		{`"\x`, "string not terminated", 0, 0},
   319  		{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
   320  		{`"\y"`, "unknown escape sequence", 0, 2},
   321  		{`"\x0"`, "non-hex character in escape sequence: \"", 0, 4},
   322  		{`"\00"`, "non-octal character in escape sequence: \"", 0, 4},
   323  		{`"\377" /*`, "comment not terminated", 0, 7}, // valid octal escape
   324  		{`"\378"`, "non-octal character in escape sequence: 8", 0, 4},
   325  		{`"\400"`, "octal escape value > 255: 256", 0, 5},
   326  
   327  		{`s := "foo\z"`, "unknown escape sequence", 0, 10},
   328  		{`s := "foo\z00\nbar"`, "unknown escape sequence", 0, 10},
   329  		{`"\x`, "string not terminated", 0, 0},
   330  		{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
   331  		{`var s string = "\x"`, "non-hex character in escape sequence: \"", 0, 18},
   332  		{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 0, 18},
   333  
   334  		// former problem cases
   335  		{"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0},
   336  	} {
   337  		var s scanner
   338  		nerrors := 0
   339  		s.init(&bytesReader{[]byte(test.src)}, func(line, col uint, msg string) {
   340  			nerrors++
   341  			// only check the first error
   342  			if nerrors == 1 {
   343  				if msg != test.msg {
   344  					t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
   345  				}
   346  				if line != test.line+linebase {
   347  					t.Errorf("%q: got line = %d; want %d", test.src, line, test.line+linebase)
   348  				}
   349  				if col != test.col+colbase {
   350  					t.Errorf("%q: got col = %d; want %d", test.src, col, test.col+colbase)
   351  				}
   352  			} else if nerrors > 1 {
   353  				// TODO(gri) make this use position info
   354  				t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
   355  			}
   356  		}, nil)
   357  
   358  		for {
   359  			s.next()
   360  			if s.tok == _EOF {
   361  				break
   362  			}
   363  		}
   364  
   365  		if nerrors == 0 {
   366  			t.Errorf("%q: got no error; want %q", test.src, test.msg)
   367  		}
   368  	}
   369  }