github.com/corona10/go@v0.0.0-20180224231303-7a218942be57/src/cmd/compile/internal/syntax/scanner_test.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package syntax 6 7 import ( 8 "bytes" 9 "fmt" 10 "os" 11 "strings" 12 "testing" 13 ) 14 15 func TestScanner(t *testing.T) { 16 if testing.Short() { 17 t.Skip("skipping test in short mode") 18 } 19 20 src, err := os.Open("parser.go") 21 if err != nil { 22 t.Fatal(err) 23 } 24 defer src.Close() 25 26 var s scanner 27 s.init(src, nil, 0) 28 for { 29 s.next() 30 if s.tok == _EOF { 31 break 32 } 33 switch s.tok { 34 case _Name: 35 fmt.Println(s.line, s.tok, "=>", s.lit) 36 case _Operator: 37 fmt.Println(s.line, s.tok, "=>", s.op, s.prec) 38 default: 39 fmt.Println(s.line, s.tok) 40 } 41 } 42 } 43 44 func TestTokens(t *testing.T) { 45 // make source 46 var buf bytes.Buffer 47 for i, s := range sampleTokens { 48 buf.WriteString("\t\t\t\t"[:i&3]) // leading indentation 49 buf.WriteString(s.src) // token 50 buf.WriteString(" "[:i&7]) // trailing spaces 51 buf.WriteString("/*line foo:1 */ // bar\n") // comments (don't crash w/o directive handler) 52 } 53 54 // scan source 55 var got scanner 56 got.init(&buf, nil, 0) 57 got.next() 58 for i, want := range sampleTokens { 59 nlsemi := false 60 61 if got.line != uint(i+linebase) { 62 t.Errorf("got line %d; want %d", got.line, i+linebase) 63 } 64 65 if got.tok != want.tok { 66 t.Errorf("got tok = %s; want %s", got.tok, want.tok) 67 continue 68 } 69 70 switch want.tok { 71 case _Semi: 72 if got.lit != "semicolon" { 73 t.Errorf("got %s; want semicolon", got.lit) 74 } 75 76 case _Name, _Literal: 77 if got.lit != want.src { 78 t.Errorf("got lit = %q; want %q", got.lit, want.src) 79 continue 80 } 81 nlsemi = true 82 83 case _Operator, _AssignOp, _IncOp: 84 if got.op != want.op { 85 t.Errorf("got op = %s; want %s", got.op, want.op) 86 continue 87 } 88 if got.prec != want.prec { 89 t.Errorf("got prec = %d; want %d", got.prec, want.prec) 90 continue 91 } 92 nlsemi = want.tok == _IncOp 93 94 case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return: 95 nlsemi = true 96 } 97 98 if nlsemi { 99 got.next() 100 if got.tok != _Semi { 101 t.Errorf("got tok = %s; want ;", got.tok) 102 continue 103 } 104 if got.lit != "newline" { 105 t.Errorf("got %s; want newline", got.lit) 106 } 107 } 108 109 got.next() 110 } 111 112 if got.tok != _EOF { 113 t.Errorf("got %q; want _EOF", got.tok) 114 } 115 } 116 117 var sampleTokens = [...]struct { 118 tok token 119 src string 120 op Operator 121 prec int 122 }{ 123 // name samples 124 {_Name, "x", 0, 0}, 125 {_Name, "X123", 0, 0}, 126 {_Name, "foo", 0, 0}, 127 {_Name, "Foo123", 0, 0}, 128 {_Name, "foo_bar", 0, 0}, 129 {_Name, "_", 0, 0}, 130 {_Name, "_foobar", 0, 0}, 131 {_Name, "a۰۱۸", 0, 0}, 132 {_Name, "foo६४", 0, 0}, 133 {_Name, "bar9876", 0, 0}, 134 {_Name, "ŝ", 0, 0}, 135 {_Name, "ŝfoo", 0, 0}, 136 137 // literal samples 138 {_Literal, "0", 0, 0}, 139 {_Literal, "1", 0, 0}, 140 {_Literal, "12345", 0, 0}, 141 {_Literal, "123456789012345678890123456789012345678890", 0, 0}, 142 {_Literal, "01234567", 0, 0}, 143 {_Literal, "0x0", 0, 0}, 144 {_Literal, "0xcafebabe", 0, 0}, 145 {_Literal, "0.", 0, 0}, 146 {_Literal, "0.e0", 0, 0}, 147 {_Literal, "0.e-1", 0, 0}, 148 {_Literal, "0.e+123", 0, 0}, 149 {_Literal, ".0", 0, 0}, 150 {_Literal, ".0E00", 0, 0}, 151 {_Literal, ".0E-0123", 0, 0}, 152 {_Literal, ".0E+12345678901234567890", 0, 0}, 153 {_Literal, ".45e1", 0, 0}, 154 {_Literal, "3.14159265", 0, 0}, 155 {_Literal, "1e0", 0, 0}, 156 {_Literal, "1e+100", 0, 0}, 157 {_Literal, "1e-100", 0, 0}, 158 {_Literal, "2.71828e-1000", 0, 0}, 159 {_Literal, "0i", 0, 0}, 160 {_Literal, "1i", 0, 0}, 161 {_Literal, "012345678901234567889i", 0, 0}, 162 {_Literal, "123456789012345678890i", 0, 0}, 163 {_Literal, "0.i", 0, 0}, 164 {_Literal, ".0i", 0, 0}, 165 {_Literal, "3.14159265i", 0, 0}, 166 {_Literal, "1e0i", 0, 0}, 167 {_Literal, "1e+100i", 0, 0}, 168 {_Literal, "1e-100i", 0, 0}, 169 {_Literal, "2.71828e-1000i", 0, 0}, 170 {_Literal, "'a'", 0, 0}, 171 {_Literal, "'\\000'", 0, 0}, 172 {_Literal, "'\\xFF'", 0, 0}, 173 {_Literal, "'\\uff16'", 0, 0}, 174 {_Literal, "'\\U0000ff16'", 0, 0}, 175 {_Literal, "`foobar`", 0, 0}, 176 {_Literal, "`foo\tbar`", 0, 0}, 177 {_Literal, "`\r`", 0, 0}, 178 179 // operators 180 {_Operator, "||", OrOr, precOrOr}, 181 182 {_Operator, "&&", AndAnd, precAndAnd}, 183 184 {_Operator, "==", Eql, precCmp}, 185 {_Operator, "!=", Neq, precCmp}, 186 {_Operator, "<", Lss, precCmp}, 187 {_Operator, "<=", Leq, precCmp}, 188 {_Operator, ">", Gtr, precCmp}, 189 {_Operator, ">=", Geq, precCmp}, 190 191 {_Operator, "+", Add, precAdd}, 192 {_Operator, "-", Sub, precAdd}, 193 {_Operator, "|", Or, precAdd}, 194 {_Operator, "^", Xor, precAdd}, 195 196 {_Star, "*", Mul, precMul}, 197 {_Operator, "/", Div, precMul}, 198 {_Operator, "%", Rem, precMul}, 199 {_Operator, "&", And, precMul}, 200 {_Operator, "&^", AndNot, precMul}, 201 {_Operator, "<<", Shl, precMul}, 202 {_Operator, ">>", Shr, precMul}, 203 204 // assignment operations 205 {_AssignOp, "+=", Add, precAdd}, 206 {_AssignOp, "-=", Sub, precAdd}, 207 {_AssignOp, "|=", Or, precAdd}, 208 {_AssignOp, "^=", Xor, precAdd}, 209 210 {_AssignOp, "*=", Mul, precMul}, 211 {_AssignOp, "/=", Div, precMul}, 212 {_AssignOp, "%=", Rem, precMul}, 213 {_AssignOp, "&=", And, precMul}, 214 {_AssignOp, "&^=", AndNot, precMul}, 215 {_AssignOp, "<<=", Shl, precMul}, 216 {_AssignOp, ">>=", Shr, precMul}, 217 218 // other operations 219 {_IncOp, "++", Add, precAdd}, 220 {_IncOp, "--", Sub, precAdd}, 221 {_Assign, "=", 0, 0}, 222 {_Define, ":=", 0, 0}, 223 {_Arrow, "<-", 0, 0}, 224 225 // delimiters 226 {_Lparen, "(", 0, 0}, 227 {_Lbrack, "[", 0, 0}, 228 {_Lbrace, "{", 0, 0}, 229 {_Rparen, ")", 0, 0}, 230 {_Rbrack, "]", 0, 0}, 231 {_Rbrace, "}", 0, 0}, 232 {_Comma, ",", 0, 0}, 233 {_Semi, ";", 0, 0}, 234 {_Colon, ":", 0, 0}, 235 {_Dot, ".", 0, 0}, 236 {_DotDotDot, "...", 0, 0}, 237 238 // keywords 239 {_Break, "break", 0, 0}, 240 {_Case, "case", 0, 0}, 241 {_Chan, "chan", 0, 0}, 242 {_Const, "const", 0, 0}, 243 {_Continue, "continue", 0, 0}, 244 {_Default, "default", 0, 0}, 245 {_Defer, "defer", 0, 0}, 246 {_Else, "else", 0, 0}, 247 {_Fallthrough, "fallthrough", 0, 0}, 248 {_For, "for", 0, 0}, 249 {_Func, "func", 0, 0}, 250 {_Go, "go", 0, 0}, 251 {_Goto, "goto", 0, 0}, 252 {_If, "if", 0, 0}, 253 {_Import, "import", 0, 0}, 254 {_Interface, "interface", 0, 0}, 255 {_Map, "map", 0, 0}, 256 {_Package, "package", 0, 0}, 257 {_Range, "range", 0, 0}, 258 {_Return, "return", 0, 0}, 259 {_Select, "select", 0, 0}, 260 {_Struct, "struct", 0, 0}, 261 {_Switch, "switch", 0, 0}, 262 {_Type, "type", 0, 0}, 263 {_Var, "var", 0, 0}, 264 } 265 266 func TestComments(t *testing.T) { 267 type comment struct { 268 line, col uint // 0-based 269 text string 270 } 271 272 for _, test := range []struct { 273 src string 274 want comment 275 }{ 276 // no comments 277 {"no comment here", comment{0, 0, ""}}, 278 {" /", comment{0, 0, ""}}, 279 {"\n /*/", comment{0, 0, ""}}, 280 281 //-style comments 282 {"// line comment\n", comment{0, 0, "// line comment"}}, 283 {"package p // line comment\n", comment{0, 10, "// line comment"}}, 284 {"//\n//\n\t// want this one\r\n", comment{2, 1, "// want this one\r"}}, 285 {"\n\n//\n", comment{2, 0, "//"}}, 286 {"//", comment{0, 0, "//"}}, 287 288 /*-style comments */ 289 {"/* regular comment */", comment{0, 0, "/* regular comment */"}}, 290 {"package p /* regular comment", comment{0, 0, ""}}, 291 {"\n\n\n/*\n*//* want this one */", comment{4, 2, "/* want this one */"}}, 292 {"\n\n/**/", comment{2, 0, "/**/"}}, 293 {"/*", comment{0, 0, ""}}, 294 } { 295 var s scanner 296 var got comment 297 s.init(strings.NewReader(test.src), 298 func(line, col uint, msg string) { 299 if msg[0] != '/' { 300 // error 301 if msg != "comment not terminated" { 302 t.Errorf("%q: %s", test.src, msg) 303 } 304 return 305 } 306 got = comment{line - linebase, col - colbase, msg} // keep last one 307 }, comments) 308 309 for { 310 s.next() 311 if s.tok == _EOF { 312 break 313 } 314 } 315 316 want := test.want 317 if got.line != want.line || got.col != want.col { 318 t.Errorf("%q: got position %d:%d; want %d:%d", test.src, got.line, got.col, want.line, want.col) 319 } 320 if got.text != want.text { 321 t.Errorf("%q: got %q; want %q", test.src, got.text, want.text) 322 } 323 } 324 } 325 326 func TestScanErrors(t *testing.T) { 327 for _, test := range []struct { 328 src, msg string 329 line, col uint // 0-based 330 }{ 331 // Note: Positions for lexical errors are the earliest position 332 // where the error is apparent, not the beginning of the respective 333 // token. 334 335 // rune-level errors 336 {"fo\x00o", "invalid NUL character", 0, 2}, 337 {"foo\n\ufeff bar", "invalid BOM in the middle of the file", 1, 0}, 338 {"foo\n\n\xff ", "invalid UTF-8 encoding", 2, 0}, 339 340 // token-level errors 341 {"\u00BD" /* ½ */, "invalid identifier character U+00BD '½'", 0, 0}, 342 {"\U0001d736\U0001d737\U0001d738_½" /* 𝜶𝜷𝜸_½ */, "invalid identifier character U+00BD '½'", 0, 13 /* byte offset */}, 343 {"\U0001d7d8" /* 𝟘 */, "identifier cannot begin with digit U+1D7D8 '𝟘'", 0, 0}, 344 {"foo\U0001d7d8_½" /* foo𝟘_½ */, "invalid identifier character U+00BD '½'", 0, 8 /* byte offset */}, 345 346 {"x + ~y", "invalid character U+007E '~'", 0, 4}, 347 {"foo$bar = 0", "invalid character U+0024 '$'", 0, 3}, 348 {"const x = 0xyz", "malformed hex constant", 0, 12}, 349 {"0123456789", "malformed octal constant", 0, 10}, 350 {"0123456789. /* foobar", "comment not terminated", 0, 12}, // valid float constant 351 {"0123456789e0 /*\nfoobar", "comment not terminated", 0, 13}, // valid float constant 352 {"var a, b = 08, 07\n", "malformed octal constant", 0, 13}, 353 {"(x + 1.0e+x)", "malformed floating-point constant exponent", 0, 10}, 354 355 {`''`, "empty character literal or unescaped ' in character literal", 0, 1}, 356 {"'\n", "newline in character literal", 0, 1}, 357 {`'\`, "invalid character literal (missing closing ')", 0, 0}, 358 {`'\'`, "invalid character literal (missing closing ')", 0, 0}, 359 {`'\x`, "invalid character literal (missing closing ')", 0, 0}, 360 {`'\x'`, "non-hex character in escape sequence: '", 0, 3}, 361 {`'\y'`, "unknown escape sequence", 0, 2}, 362 {`'\x0'`, "non-hex character in escape sequence: '", 0, 4}, 363 {`'\00'`, "non-octal character in escape sequence: '", 0, 4}, 364 {`'\377' /*`, "comment not terminated", 0, 7}, // valid octal escape 365 {`'\378`, "non-octal character in escape sequence: 8", 0, 4}, 366 {`'\400'`, "octal escape value > 255: 256", 0, 5}, 367 {`'xx`, "invalid character literal (missing closing ')", 0, 0}, 368 {`'xx'`, "invalid character literal (more than one character)", 0, 0}, 369 370 {"\"\n", "newline in string", 0, 1}, 371 {`"`, "string not terminated", 0, 0}, 372 {`"foo`, "string not terminated", 0, 0}, 373 {"`", "string not terminated", 0, 0}, 374 {"`foo", "string not terminated", 0, 0}, 375 {"/*/", "comment not terminated", 0, 0}, 376 {"/*\n\nfoo", "comment not terminated", 0, 0}, 377 {`"\`, "string not terminated", 0, 0}, 378 {`"\"`, "string not terminated", 0, 0}, 379 {`"\x`, "string not terminated", 0, 0}, 380 {`"\x"`, "non-hex character in escape sequence: \"", 0, 3}, 381 {`"\y"`, "unknown escape sequence", 0, 2}, 382 {`"\x0"`, "non-hex character in escape sequence: \"", 0, 4}, 383 {`"\00"`, "non-octal character in escape sequence: \"", 0, 4}, 384 {`"\377" /*`, "comment not terminated", 0, 7}, // valid octal escape 385 {`"\378"`, "non-octal character in escape sequence: 8", 0, 4}, 386 {`"\400"`, "octal escape value > 255: 256", 0, 5}, 387 388 {`s := "foo\z"`, "unknown escape sequence", 0, 10}, 389 {`s := "foo\z00\nbar"`, "unknown escape sequence", 0, 10}, 390 {`"\x`, "string not terminated", 0, 0}, 391 {`"\x"`, "non-hex character in escape sequence: \"", 0, 3}, 392 {`var s string = "\x"`, "non-hex character in escape sequence: \"", 0, 18}, 393 {`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 0, 18}, 394 395 // former problem cases 396 {"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0}, 397 } { 398 var s scanner 399 nerrors := 0 400 s.init(strings.NewReader(test.src), func(line, col uint, msg string) { 401 nerrors++ 402 // only check the first error 403 if nerrors == 1 { 404 if msg != test.msg { 405 t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg) 406 } 407 if line != test.line+linebase { 408 t.Errorf("%q: got line = %d; want %d", test.src, line, test.line+linebase) 409 } 410 if col != test.col+colbase { 411 t.Errorf("%q: got col = %d; want %d", test.src, col, test.col+colbase) 412 } 413 } else if nerrors > 1 { 414 // TODO(gri) make this use position info 415 t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line) 416 } 417 }, 0) 418 419 for { 420 s.next() 421 if s.tok == _EOF { 422 break 423 } 424 } 425 426 if nerrors == 0 { 427 t.Errorf("%q: got no error; want %q", test.src, test.msg) 428 } 429 } 430 } 431 432 func TestIssue21938(t *testing.T) { 433 s := "/*" + strings.Repeat(" ", 4089) + "*/ .5" 434 435 var got scanner 436 got.init(strings.NewReader(s), nil, 0) 437 got.next() 438 439 if got.tok != _Literal || got.lit != ".5" { 440 t.Errorf("got %s %q; want %s %q", got.tok, got.lit, _Literal, ".5") 441 } 442 }