github.com/yukk001/go1.10.8@v0.0.0-20190813125351-6df2d3982e20/src/cmd/compile/internal/syntax/scanner_test.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package syntax 6 7 import ( 8 "fmt" 9 "os" 10 "strings" 11 "testing" 12 ) 13 14 func TestScanner(t *testing.T) { 15 if testing.Short() { 16 t.Skip("skipping test in short mode") 17 } 18 19 src, err := os.Open("parser.go") 20 if err != nil { 21 t.Fatal(err) 22 } 23 defer src.Close() 24 25 var s scanner 26 s.init(src, nil, nil) 27 for { 28 s.next() 29 if s.tok == _EOF { 30 break 31 } 32 switch s.tok { 33 case _Name: 34 fmt.Println(s.line, s.tok, "=>", s.lit) 35 case _Operator: 36 fmt.Println(s.line, s.tok, "=>", s.op, s.prec) 37 default: 38 fmt.Println(s.line, s.tok) 39 } 40 } 41 } 42 43 func TestTokens(t *testing.T) { 44 // make source 45 var buf []byte 46 for i, s := range sampleTokens { 47 buf = append(buf, "\t\t\t\t"[:i&3]...) // leading indentation 48 buf = append(buf, s.src...) // token 49 buf = append(buf, " "[:i&7]...) // trailing spaces 50 buf = append(buf, "/* foo */ // bar\n"...) // comments 51 } 52 53 // scan source 54 var got scanner 55 got.init(&bytesReader{buf}, nil, nil) 56 got.next() 57 for i, want := range sampleTokens { 58 nlsemi := false 59 60 if got.line != uint(i+linebase) { 61 t.Errorf("got line %d; want %d", got.line, i+linebase) 62 } 63 64 if got.tok != want.tok { 65 t.Errorf("got tok = %s; want %s", got.tok, want.tok) 66 continue 67 } 68 69 switch want.tok { 70 case _Semi: 71 if got.lit != "semicolon" { 72 t.Errorf("got %s; want semicolon", got.lit) 73 } 74 75 case _Name, _Literal: 76 if got.lit != want.src { 77 t.Errorf("got lit = %q; want %q", got.lit, want.src) 78 continue 79 } 80 nlsemi = true 81 82 case _Operator, _AssignOp, _IncOp: 83 if got.op != want.op { 84 t.Errorf("got op = %s; want %s", got.op, want.op) 85 continue 86 } 87 if got.prec != want.prec { 88 t.Errorf("got prec = %d; want %d", got.prec, want.prec) 89 continue 90 } 91 nlsemi = want.tok == _IncOp 92 93 case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return: 94 nlsemi = true 95 } 96 97 if nlsemi { 98 got.next() 99 if got.tok != _Semi { 100 t.Errorf("got tok = %s; want ;", got.tok) 101 continue 102 } 103 if got.lit != "newline" { 104 t.Errorf("got %s; want newline", got.lit) 105 } 106 } 107 108 got.next() 109 } 110 111 if got.tok != _EOF { 112 t.Errorf("got %q; want _EOF", got.tok) 113 } 114 } 115 116 var sampleTokens = [...]struct { 117 tok token 118 src string 119 op Operator 120 prec int 121 }{ 122 // name samples 123 {_Name, "x", 0, 0}, 124 {_Name, "X123", 0, 0}, 125 {_Name, "foo", 0, 0}, 126 {_Name, "Foo123", 0, 0}, 127 {_Name, "foo_bar", 0, 0}, 128 {_Name, "_", 0, 0}, 129 {_Name, "_foobar", 0, 0}, 130 {_Name, "a۰۱۸", 0, 0}, 131 {_Name, "foo६४", 0, 0}, 132 {_Name, "bar9876", 0, 0}, 133 {_Name, "ŝ", 0, 0}, 134 {_Name, "ŝfoo", 0, 0}, 135 136 // literal samples 137 {_Literal, "0", 0, 0}, 138 {_Literal, "1", 0, 0}, 139 {_Literal, "12345", 0, 0}, 140 {_Literal, "123456789012345678890123456789012345678890", 0, 0}, 141 {_Literal, "01234567", 0, 0}, 142 {_Literal, "0x0", 0, 0}, 143 {_Literal, "0xcafebabe", 0, 0}, 144 {_Literal, "0.", 0, 0}, 145 {_Literal, "0.e0", 0, 0}, 146 {_Literal, "0.e-1", 0, 0}, 147 {_Literal, "0.e+123", 0, 0}, 148 {_Literal, ".0", 0, 0}, 149 {_Literal, ".0E00", 0, 0}, 150 {_Literal, ".0E-0123", 0, 0}, 151 {_Literal, ".0E+12345678901234567890", 0, 0}, 152 {_Literal, ".45e1", 0, 0}, 153 {_Literal, "3.14159265", 0, 0}, 154 {_Literal, "1e0", 0, 0}, 155 {_Literal, "1e+100", 0, 0}, 156 {_Literal, "1e-100", 0, 0}, 157 {_Literal, "2.71828e-1000", 0, 0}, 158 {_Literal, "0i", 0, 0}, 159 {_Literal, "1i", 0, 0}, 160 {_Literal, "012345678901234567889i", 0, 0}, 161 {_Literal, "123456789012345678890i", 0, 0}, 162 {_Literal, "0.i", 0, 0}, 163 {_Literal, ".0i", 0, 0}, 164 {_Literal, "3.14159265i", 0, 0}, 165 {_Literal, "1e0i", 0, 0}, 166 {_Literal, "1e+100i", 0, 0}, 167 {_Literal, "1e-100i", 0, 0}, 168 {_Literal, "2.71828e-1000i", 0, 0}, 169 {_Literal, "'a'", 0, 0}, 170 {_Literal, "'\\000'", 0, 0}, 171 {_Literal, "'\\xFF'", 0, 0}, 172 {_Literal, "'\\uff16'", 0, 0}, 173 {_Literal, "'\\U0000ff16'", 0, 0}, 174 {_Literal, "`foobar`", 0, 0}, 175 {_Literal, "`foo\tbar`", 0, 0}, 176 {_Literal, "`\r`", 0, 0}, 177 178 // operators 179 {_Operator, "||", OrOr, precOrOr}, 180 181 {_Operator, "&&", AndAnd, precAndAnd}, 182 183 {_Operator, "==", Eql, precCmp}, 184 {_Operator, "!=", Neq, precCmp}, 185 {_Operator, "<", Lss, precCmp}, 186 {_Operator, "<=", Leq, precCmp}, 187 {_Operator, ">", Gtr, precCmp}, 188 {_Operator, ">=", Geq, precCmp}, 189 190 {_Operator, "+", Add, precAdd}, 191 {_Operator, "-", Sub, precAdd}, 192 {_Operator, "|", Or, precAdd}, 193 {_Operator, "^", Xor, precAdd}, 194 195 {_Star, "*", Mul, precMul}, 196 {_Operator, "/", Div, precMul}, 197 {_Operator, "%", Rem, precMul}, 198 {_Operator, "&", And, precMul}, 199 {_Operator, "&^", AndNot, precMul}, 200 {_Operator, "<<", Shl, precMul}, 201 {_Operator, ">>", Shr, precMul}, 202 203 // assignment operations 204 {_AssignOp, "+=", Add, precAdd}, 205 {_AssignOp, "-=", Sub, precAdd}, 206 {_AssignOp, "|=", Or, precAdd}, 207 {_AssignOp, "^=", Xor, precAdd}, 208 209 {_AssignOp, "*=", Mul, precMul}, 210 {_AssignOp, "/=", Div, precMul}, 211 {_AssignOp, "%=", Rem, precMul}, 212 {_AssignOp, "&=", And, precMul}, 213 {_AssignOp, "&^=", AndNot, precMul}, 214 {_AssignOp, "<<=", Shl, precMul}, 215 {_AssignOp, ">>=", Shr, precMul}, 216 217 // other operations 218 {_IncOp, "++", Add, precAdd}, 219 {_IncOp, "--", Sub, precAdd}, 220 {_Assign, "=", 0, 0}, 221 {_Define, ":=", 0, 0}, 222 {_Arrow, "<-", 0, 0}, 223 224 // delimiters 225 {_Lparen, "(", 0, 0}, 226 {_Lbrack, "[", 0, 0}, 227 {_Lbrace, "{", 0, 0}, 228 {_Rparen, ")", 0, 0}, 229 {_Rbrack, "]", 0, 0}, 230 {_Rbrace, "}", 0, 0}, 231 {_Comma, ",", 0, 0}, 232 {_Semi, ";", 0, 0}, 233 {_Colon, ":", 0, 0}, 234 {_Dot, ".", 0, 0}, 235 {_DotDotDot, "...", 0, 0}, 236 237 // keywords 238 {_Break, "break", 0, 0}, 239 {_Case, "case", 0, 0}, 240 {_Chan, "chan", 0, 0}, 241 {_Const, "const", 0, 0}, 242 {_Continue, "continue", 0, 0}, 243 {_Default, "default", 0, 0}, 244 {_Defer, "defer", 0, 0}, 245 {_Else, "else", 0, 0}, 246 {_Fallthrough, "fallthrough", 0, 0}, 247 {_For, "for", 0, 0}, 248 {_Func, "func", 0, 0}, 249 {_Go, "go", 0, 0}, 250 {_Goto, "goto", 0, 0}, 251 {_If, "if", 0, 0}, 252 {_Import, "import", 0, 0}, 253 {_Interface, "interface", 0, 0}, 254 {_Map, "map", 0, 0}, 255 {_Package, "package", 0, 0}, 256 {_Range, "range", 0, 0}, 257 {_Return, "return", 0, 0}, 258 {_Select, "select", 0, 0}, 259 {_Struct, "struct", 0, 0}, 260 {_Switch, "switch", 0, 0}, 261 {_Type, "type", 0, 0}, 262 {_Var, "var", 0, 0}, 263 } 264 265 func TestScanErrors(t *testing.T) { 266 for _, test := range []struct { 267 src, msg string 268 line, col uint // 0-based 269 }{ 270 // Note: Positions for lexical errors are the earliest position 271 // where the error is apparent, not the beginning of the respective 272 // token. 273 274 // rune-level errors 275 {"fo\x00o", "invalid NUL character", 0, 2}, 276 {"foo\n\ufeff bar", "invalid BOM in the middle of the file", 1, 0}, 277 {"foo\n\n\xff ", "invalid UTF-8 encoding", 2, 0}, 278 279 // token-level errors 280 {"\u00BD" /* ½ */, "invalid identifier character U+00BD '½'", 0, 0}, 281 {"\U0001d736\U0001d737\U0001d738_½" /* 𝜶𝜷𝜸_½ */, "invalid identifier character U+00BD '½'", 0, 13 /* byte offset */}, 282 {"\U0001d7d8" /* 𝟘 */, "identifier cannot begin with digit U+1D7D8 '𝟘'", 0, 0}, 283 {"foo\U0001d7d8_½" /* foo𝟘_½ */, "invalid identifier character U+00BD '½'", 0, 8 /* byte offset */}, 284 285 {"x + ~y", "bitwise complement operator is ^", 0, 4}, 286 {"foo$bar = 0", "invalid character U+0024 '$'", 0, 3}, 287 {"const x = 0xyz", "malformed hex constant", 0, 12}, 288 {"0123456789", "malformed octal constant", 0, 10}, 289 {"0123456789. /* foobar", "comment not terminated", 0, 12}, // valid float constant 290 {"0123456789e0 /*\nfoobar", "comment not terminated", 0, 13}, // valid float constant 291 {"var a, b = 08, 07\n", "malformed octal constant", 0, 13}, 292 {"(x + 1.0e+x)", "malformed floating-point constant exponent", 0, 10}, 293 294 {`''`, "empty character literal or unescaped ' in character literal", 0, 1}, 295 {"'\n", "newline in character literal", 0, 1}, 296 {`'\`, "invalid character literal (missing closing ')", 0, 0}, 297 {`'\'`, "invalid character literal (missing closing ')", 0, 0}, 298 {`'\x`, "invalid character literal (missing closing ')", 0, 0}, 299 {`'\x'`, "non-hex character in escape sequence: '", 0, 3}, 300 {`'\y'`, "unknown escape sequence", 0, 2}, 301 {`'\x0'`, "non-hex character in escape sequence: '", 0, 4}, 302 {`'\00'`, "non-octal character in escape sequence: '", 0, 4}, 303 {`'\377' /*`, "comment not terminated", 0, 7}, // valid octal escape 304 {`'\378`, "non-octal character in escape sequence: 8", 0, 4}, 305 {`'\400'`, "octal escape value > 255: 256", 0, 5}, 306 {`'xx`, "invalid character literal (missing closing ')", 0, 0}, 307 {`'xx'`, "invalid character literal (more than one character)", 0, 0}, 308 309 {"\"\n", "newline in string", 0, 1}, 310 {`"`, "string not terminated", 0, 0}, 311 {`"foo`, "string not terminated", 0, 0}, 312 {"`", "string not terminated", 0, 0}, 313 {"`foo", "string not terminated", 0, 0}, 314 {"/*/", "comment not terminated", 0, 0}, 315 {"/*\n\nfoo", "comment not terminated", 0, 0}, 316 {"/*\n\nfoo", "comment not terminated", 0, 0}, 317 {`"\`, "string not terminated", 0, 0}, 318 {`"\"`, "string not terminated", 0, 0}, 319 {`"\x`, "string not terminated", 0, 0}, 320 {`"\x"`, "non-hex character in escape sequence: \"", 0, 3}, 321 {`"\y"`, "unknown escape sequence", 0, 2}, 322 {`"\x0"`, "non-hex character in escape sequence: \"", 0, 4}, 323 {`"\00"`, "non-octal character in escape sequence: \"", 0, 4}, 324 {`"\377" /*`, "comment not terminated", 0, 7}, // valid octal escape 325 {`"\378"`, "non-octal character in escape sequence: 8", 0, 4}, 326 {`"\400"`, "octal escape value > 255: 256", 0, 5}, 327 328 {`s := "foo\z"`, "unknown escape sequence", 0, 10}, 329 {`s := "foo\z00\nbar"`, "unknown escape sequence", 0, 10}, 330 {`"\x`, "string not terminated", 0, 0}, 331 {`"\x"`, "non-hex character in escape sequence: \"", 0, 3}, 332 {`var s string = "\x"`, "non-hex character in escape sequence: \"", 0, 18}, 333 {`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 0, 18}, 334 335 // former problem cases 336 {"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0}, 337 } { 338 var s scanner 339 nerrors := 0 340 s.init(&bytesReader{[]byte(test.src)}, func(line, col uint, msg string) { 341 nerrors++ 342 // only check the first error 343 if nerrors == 1 { 344 if msg != test.msg { 345 t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg) 346 } 347 if line != test.line+linebase { 348 t.Errorf("%q: got line = %d; want %d", test.src, line, test.line+linebase) 349 } 350 if col != test.col+colbase { 351 t.Errorf("%q: got col = %d; want %d", test.src, col, test.col+colbase) 352 } 353 } else if nerrors > 1 { 354 // TODO(gri) make this use position info 355 t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line) 356 } 357 }, nil) 358 359 for { 360 s.next() 361 if s.tok == _EOF { 362 break 363 } 364 } 365 366 if nerrors == 0 { 367 t.Errorf("%q: got no error; want %q", test.src, test.msg) 368 } 369 } 370 } 371 372 func TestIssue21938(t *testing.T) { 373 s := "/*" + strings.Repeat(" ", 4089) + "*/ .5" 374 375 var got scanner 376 got.init(strings.NewReader(s), nil, nil) 377 got.next() 378 379 if got.tok != _Literal || got.lit != ".5" { 380 t.Errorf("got %s %q; want %s %q", got.tok, got.lit, _Literal, ".5") 381 } 382 }