github.com/goproxy0/go@v0.0.0-20171111080102-49cc0c489d2c/src/cmd/compile/internal/syntax/scanner_test.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package syntax
     6  
     7  import (
     8  	"fmt"
     9  	"os"
    10  	"strings"
    11  	"testing"
    12  )
    13  
    14  func TestScanner(t *testing.T) {
    15  	if testing.Short() {
    16  		t.Skip("skipping test in short mode")
    17  	}
    18  
    19  	src, err := os.Open("parser.go")
    20  	if err != nil {
    21  		t.Fatal(err)
    22  	}
    23  	defer src.Close()
    24  
    25  	var s scanner
    26  	s.init(src, nil, nil)
    27  	for {
    28  		s.next()
    29  		if s.tok == _EOF {
    30  			break
    31  		}
    32  		switch s.tok {
    33  		case _Name:
    34  			fmt.Println(s.line, s.tok, "=>", s.lit)
    35  		case _Operator:
    36  			fmt.Println(s.line, s.tok, "=>", s.op, s.prec)
    37  		default:
    38  			fmt.Println(s.line, s.tok)
    39  		}
    40  	}
    41  }
    42  
    43  func TestTokens(t *testing.T) {
    44  	// make source
    45  	var buf []byte
    46  	for i, s := range sampleTokens {
    47  		buf = append(buf, "\t\t\t\t"[:i&3]...)     // leading indentation
    48  		buf = append(buf, s.src...)                // token
    49  		buf = append(buf, "        "[:i&7]...)     // trailing spaces
    50  		buf = append(buf, "/* foo */ // bar\n"...) // comments
    51  	}
    52  
    53  	// scan source
    54  	var got scanner
    55  	got.init(&bytesReader{buf}, nil, nil)
    56  	got.next()
    57  	for i, want := range sampleTokens {
    58  		nlsemi := false
    59  
    60  		if got.line != uint(i+linebase) {
    61  			t.Errorf("got line %d; want %d", got.line, i+linebase)
    62  		}
    63  
    64  		if got.tok != want.tok {
    65  			t.Errorf("got tok = %s; want %s", got.tok, want.tok)
    66  			continue
    67  		}
    68  
    69  		switch want.tok {
    70  		case _Semi:
    71  			if got.lit != "semicolon" {
    72  				t.Errorf("got %s; want semicolon", got.lit)
    73  			}
    74  
    75  		case _Name, _Literal:
    76  			if got.lit != want.src {
    77  				t.Errorf("got lit = %q; want %q", got.lit, want.src)
    78  				continue
    79  			}
    80  			nlsemi = true
    81  
    82  		case _Operator, _AssignOp, _IncOp:
    83  			if got.op != want.op {
    84  				t.Errorf("got op = %s; want %s", got.op, want.op)
    85  				continue
    86  			}
    87  			if got.prec != want.prec {
    88  				t.Errorf("got prec = %d; want %d", got.prec, want.prec)
    89  				continue
    90  			}
    91  			nlsemi = want.tok == _IncOp
    92  
    93  		case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return:
    94  			nlsemi = true
    95  		}
    96  
    97  		if nlsemi {
    98  			got.next()
    99  			if got.tok != _Semi {
   100  				t.Errorf("got tok = %s; want ;", got.tok)
   101  				continue
   102  			}
   103  			if got.lit != "newline" {
   104  				t.Errorf("got %s; want newline", got.lit)
   105  			}
   106  		}
   107  
   108  		got.next()
   109  	}
   110  
   111  	if got.tok != _EOF {
   112  		t.Errorf("got %q; want _EOF", got.tok)
   113  	}
   114  }
   115  
   116  var sampleTokens = [...]struct {
   117  	tok  token
   118  	src  string
   119  	op   Operator
   120  	prec int
   121  }{
   122  	// name samples
   123  	{_Name, "x", 0, 0},
   124  	{_Name, "X123", 0, 0},
   125  	{_Name, "foo", 0, 0},
   126  	{_Name, "Foo123", 0, 0},
   127  	{_Name, "foo_bar", 0, 0},
   128  	{_Name, "_", 0, 0},
   129  	{_Name, "_foobar", 0, 0},
   130  	{_Name, "a۰۱۸", 0, 0},
   131  	{_Name, "foo६४", 0, 0},
   132  	{_Name, "bar9876", 0, 0},
   133  	{_Name, "ŝ", 0, 0},
   134  	{_Name, "ŝfoo", 0, 0},
   135  
   136  	// literal samples
   137  	{_Literal, "0", 0, 0},
   138  	{_Literal, "1", 0, 0},
   139  	{_Literal, "12345", 0, 0},
   140  	{_Literal, "123456789012345678890123456789012345678890", 0, 0},
   141  	{_Literal, "01234567", 0, 0},
   142  	{_Literal, "0x0", 0, 0},
   143  	{_Literal, "0xcafebabe", 0, 0},
   144  	{_Literal, "0.", 0, 0},
   145  	{_Literal, "0.e0", 0, 0},
   146  	{_Literal, "0.e-1", 0, 0},
   147  	{_Literal, "0.e+123", 0, 0},
   148  	{_Literal, ".0", 0, 0},
   149  	{_Literal, ".0E00", 0, 0},
   150  	{_Literal, ".0E-0123", 0, 0},
   151  	{_Literal, ".0E+12345678901234567890", 0, 0},
   152  	{_Literal, ".45e1", 0, 0},
   153  	{_Literal, "3.14159265", 0, 0},
   154  	{_Literal, "1e0", 0, 0},
   155  	{_Literal, "1e+100", 0, 0},
   156  	{_Literal, "1e-100", 0, 0},
   157  	{_Literal, "2.71828e-1000", 0, 0},
   158  	{_Literal, "0i", 0, 0},
   159  	{_Literal, "1i", 0, 0},
   160  	{_Literal, "012345678901234567889i", 0, 0},
   161  	{_Literal, "123456789012345678890i", 0, 0},
   162  	{_Literal, "0.i", 0, 0},
   163  	{_Literal, ".0i", 0, 0},
   164  	{_Literal, "3.14159265i", 0, 0},
   165  	{_Literal, "1e0i", 0, 0},
   166  	{_Literal, "1e+100i", 0, 0},
   167  	{_Literal, "1e-100i", 0, 0},
   168  	{_Literal, "2.71828e-1000i", 0, 0},
   169  	{_Literal, "'a'", 0, 0},
   170  	{_Literal, "'\\000'", 0, 0},
   171  	{_Literal, "'\\xFF'", 0, 0},
   172  	{_Literal, "'\\uff16'", 0, 0},
   173  	{_Literal, "'\\U0000ff16'", 0, 0},
   174  	{_Literal, "`foobar`", 0, 0},
   175  	{_Literal, "`foo\tbar`", 0, 0},
   176  	{_Literal, "`\r`", 0, 0},
   177  
   178  	// operators
   179  	{_Operator, "||", OrOr, precOrOr},
   180  
   181  	{_Operator, "&&", AndAnd, precAndAnd},
   182  
   183  	{_Operator, "==", Eql, precCmp},
   184  	{_Operator, "!=", Neq, precCmp},
   185  	{_Operator, "<", Lss, precCmp},
   186  	{_Operator, "<=", Leq, precCmp},
   187  	{_Operator, ">", Gtr, precCmp},
   188  	{_Operator, ">=", Geq, precCmp},
   189  
   190  	{_Operator, "+", Add, precAdd},
   191  	{_Operator, "-", Sub, precAdd},
   192  	{_Operator, "|", Or, precAdd},
   193  	{_Operator, "^", Xor, precAdd},
   194  
   195  	{_Star, "*", Mul, precMul},
   196  	{_Operator, "/", Div, precMul},
   197  	{_Operator, "%", Rem, precMul},
   198  	{_Operator, "&", And, precMul},
   199  	{_Operator, "&^", AndNot, precMul},
   200  	{_Operator, "<<", Shl, precMul},
   201  	{_Operator, ">>", Shr, precMul},
   202  
   203  	// assignment operations
   204  	{_AssignOp, "+=", Add, precAdd},
   205  	{_AssignOp, "-=", Sub, precAdd},
   206  	{_AssignOp, "|=", Or, precAdd},
   207  	{_AssignOp, "^=", Xor, precAdd},
   208  
   209  	{_AssignOp, "*=", Mul, precMul},
   210  	{_AssignOp, "/=", Div, precMul},
   211  	{_AssignOp, "%=", Rem, precMul},
   212  	{_AssignOp, "&=", And, precMul},
   213  	{_AssignOp, "&^=", AndNot, precMul},
   214  	{_AssignOp, "<<=", Shl, precMul},
   215  	{_AssignOp, ">>=", Shr, precMul},
   216  
   217  	// other operations
   218  	{_IncOp, "++", Add, precAdd},
   219  	{_IncOp, "--", Sub, precAdd},
   220  	{_Assign, "=", 0, 0},
   221  	{_Define, ":=", 0, 0},
   222  	{_Arrow, "<-", 0, 0},
   223  
   224  	// delimiters
   225  	{_Lparen, "(", 0, 0},
   226  	{_Lbrack, "[", 0, 0},
   227  	{_Lbrace, "{", 0, 0},
   228  	{_Rparen, ")", 0, 0},
   229  	{_Rbrack, "]", 0, 0},
   230  	{_Rbrace, "}", 0, 0},
   231  	{_Comma, ",", 0, 0},
   232  	{_Semi, ";", 0, 0},
   233  	{_Colon, ":", 0, 0},
   234  	{_Dot, ".", 0, 0},
   235  	{_DotDotDot, "...", 0, 0},
   236  
   237  	// keywords
   238  	{_Break, "break", 0, 0},
   239  	{_Case, "case", 0, 0},
   240  	{_Chan, "chan", 0, 0},
   241  	{_Const, "const", 0, 0},
   242  	{_Continue, "continue", 0, 0},
   243  	{_Default, "default", 0, 0},
   244  	{_Defer, "defer", 0, 0},
   245  	{_Else, "else", 0, 0},
   246  	{_Fallthrough, "fallthrough", 0, 0},
   247  	{_For, "for", 0, 0},
   248  	{_Func, "func", 0, 0},
   249  	{_Go, "go", 0, 0},
   250  	{_Goto, "goto", 0, 0},
   251  	{_If, "if", 0, 0},
   252  	{_Import, "import", 0, 0},
   253  	{_Interface, "interface", 0, 0},
   254  	{_Map, "map", 0, 0},
   255  	{_Package, "package", 0, 0},
   256  	{_Range, "range", 0, 0},
   257  	{_Return, "return", 0, 0},
   258  	{_Select, "select", 0, 0},
   259  	{_Struct, "struct", 0, 0},
   260  	{_Switch, "switch", 0, 0},
   261  	{_Type, "type", 0, 0},
   262  	{_Var, "var", 0, 0},
   263  }
   264  
   265  func TestScanErrors(t *testing.T) {
   266  	for _, test := range []struct {
   267  		src, msg  string
   268  		line, col uint // 0-based
   269  	}{
   270  		// Note: Positions for lexical errors are the earliest position
   271  		// where the error is apparent, not the beginning of the respective
   272  		// token.
   273  
   274  		// rune-level errors
   275  		{"fo\x00o", "invalid NUL character", 0, 2},
   276  		{"foo\n\ufeff bar", "invalid BOM in the middle of the file", 1, 0},
   277  		{"foo\n\n\xff    ", "invalid UTF-8 encoding", 2, 0},
   278  
   279  		// token-level errors
   280  		{"\u00BD" /* ½ */, "invalid identifier character U+00BD '½'", 0, 0},
   281  		{"\U0001d736\U0001d737\U0001d738_½" /* 𝜶𝜷𝜸_½ */, "invalid identifier character U+00BD '½'", 0, 13 /* byte offset */},
   282  		{"\U0001d7d8" /* 𝟘 */, "identifier cannot begin with digit U+1D7D8 '𝟘'", 0, 0},
   283  		{"foo\U0001d7d8_½" /* foo𝟘_½ */, "invalid identifier character U+00BD '½'", 0, 8 /* byte offset */},
   284  
   285  		{"x + ~y", "bitwise complement operator is ^", 0, 4},
   286  		{"foo$bar = 0", "invalid character U+0024 '$'", 0, 3},
   287  		{"const x = 0xyz", "malformed hex constant", 0, 12},
   288  		{"0123456789", "malformed octal constant", 0, 10},
   289  		{"0123456789. /* foobar", "comment not terminated", 0, 12},   // valid float constant
   290  		{"0123456789e0 /*\nfoobar", "comment not terminated", 0, 13}, // valid float constant
   291  		{"var a, b = 08, 07\n", "malformed octal constant", 0, 13},
   292  		{"(x + 1.0e+x)", "malformed floating-point constant exponent", 0, 10},
   293  
   294  		{`''`, "empty character literal or unescaped ' in character literal", 0, 1},
   295  		{"'\n", "newline in character literal", 0, 1},
   296  		{`'\`, "invalid character literal (missing closing ')", 0, 0},
   297  		{`'\'`, "invalid character literal (missing closing ')", 0, 0},
   298  		{`'\x`, "invalid character literal (missing closing ')", 0, 0},
   299  		{`'\x'`, "non-hex character in escape sequence: '", 0, 3},
   300  		{`'\y'`, "unknown escape sequence", 0, 2},
   301  		{`'\x0'`, "non-hex character in escape sequence: '", 0, 4},
   302  		{`'\00'`, "non-octal character in escape sequence: '", 0, 4},
   303  		{`'\377' /*`, "comment not terminated", 0, 7}, // valid octal escape
   304  		{`'\378`, "non-octal character in escape sequence: 8", 0, 4},
   305  		{`'\400'`, "octal escape value > 255: 256", 0, 5},
   306  		{`'xx`, "invalid character literal (missing closing ')", 0, 0},
   307  		{`'xx'`, "invalid character literal (more than one character)", 0, 0},
   308  
   309  		{"\"\n", "newline in string", 0, 1},
   310  		{`"`, "string not terminated", 0, 0},
   311  		{`"foo`, "string not terminated", 0, 0},
   312  		{"`", "string not terminated", 0, 0},
   313  		{"`foo", "string not terminated", 0, 0},
   314  		{"/*/", "comment not terminated", 0, 0},
   315  		{"/*\n\nfoo", "comment not terminated", 0, 0},
   316  		{"/*\n\nfoo", "comment not terminated", 0, 0},
   317  		{`"\`, "string not terminated", 0, 0},
   318  		{`"\"`, "string not terminated", 0, 0},
   319  		{`"\x`, "string not terminated", 0, 0},
   320  		{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
   321  		{`"\y"`, "unknown escape sequence", 0, 2},
   322  		{`"\x0"`, "non-hex character in escape sequence: \"", 0, 4},
   323  		{`"\00"`, "non-octal character in escape sequence: \"", 0, 4},
   324  		{`"\377" /*`, "comment not terminated", 0, 7}, // valid octal escape
   325  		{`"\378"`, "non-octal character in escape sequence: 8", 0, 4},
   326  		{`"\400"`, "octal escape value > 255: 256", 0, 5},
   327  
   328  		{`s := "foo\z"`, "unknown escape sequence", 0, 10},
   329  		{`s := "foo\z00\nbar"`, "unknown escape sequence", 0, 10},
   330  		{`"\x`, "string not terminated", 0, 0},
   331  		{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
   332  		{`var s string = "\x"`, "non-hex character in escape sequence: \"", 0, 18},
   333  		{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 0, 18},
   334  
   335  		// former problem cases
   336  		{"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0},
   337  	} {
   338  		var s scanner
   339  		nerrors := 0
   340  		s.init(&bytesReader{[]byte(test.src)}, func(line, col uint, msg string) {
   341  			nerrors++
   342  			// only check the first error
   343  			if nerrors == 1 {
   344  				if msg != test.msg {
   345  					t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
   346  				}
   347  				if line != test.line+linebase {
   348  					t.Errorf("%q: got line = %d; want %d", test.src, line, test.line+linebase)
   349  				}
   350  				if col != test.col+colbase {
   351  					t.Errorf("%q: got col = %d; want %d", test.src, col, test.col+colbase)
   352  				}
   353  			} else if nerrors > 1 {
   354  				// TODO(gri) make this use position info
   355  				t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
   356  			}
   357  		}, nil)
   358  
   359  		for {
   360  			s.next()
   361  			if s.tok == _EOF {
   362  				break
   363  			}
   364  		}
   365  
   366  		if nerrors == 0 {
   367  			t.Errorf("%q: got no error; want %q", test.src, test.msg)
   368  		}
   369  	}
   370  }
   371  
   372  func TestIssue21938(t *testing.T) {
   373  	s := "/*" + strings.Repeat(" ", 4089) + "*/ .5"
   374  
   375  	var got scanner
   376  	got.init(strings.NewReader(s), nil, nil)
   377  	got.next()
   378  
   379  	if got.tok != _Literal || got.lit != ".5" {
   380  		t.Errorf("got %s %q; want %s %q", got.tok, got.lit, _Literal, ".5")
   381  	}
   382  }