github.com/corona10/go@v0.0.0-20180224231303-7a218942be57/src/go/scanner/scanner_test.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package scanner 6 7 import ( 8 "go/token" 9 "io/ioutil" 10 "os" 11 "path/filepath" 12 "runtime" 13 "testing" 14 ) 15 16 var fset = token.NewFileSet() 17 18 const /* class */ ( 19 special = iota 20 literal 21 operator 22 keyword 23 ) 24 25 func tokenclass(tok token.Token) int { 26 switch { 27 case tok.IsLiteral(): 28 return literal 29 case tok.IsOperator(): 30 return operator 31 case tok.IsKeyword(): 32 return keyword 33 } 34 return special 35 } 36 37 type elt struct { 38 tok token.Token 39 lit string 40 class int 41 } 42 43 var tokens = [...]elt{ 44 // Special tokens 45 {token.COMMENT, "/* a comment */", special}, 46 {token.COMMENT, "// a comment \n", special}, 47 {token.COMMENT, "/*\r*/", special}, 48 {token.COMMENT, "/**\r/*/", special}, // issue 11151 49 {token.COMMENT, "/**\r\r/*/", special}, 50 {token.COMMENT, "//\r\n", special}, 51 52 // Identifiers and basic type literals 53 {token.IDENT, "foobar", literal}, 54 {token.IDENT, "a۰۱۸", literal}, 55 {token.IDENT, "foo६४", literal}, 56 {token.IDENT, "bar9876", literal}, 57 {token.IDENT, "ŝ", literal}, // was bug (issue 4000) 58 {token.IDENT, "ŝfoo", literal}, // was bug (issue 4000) 59 {token.INT, "0", literal}, 60 {token.INT, "1", literal}, 61 {token.INT, "123456789012345678890", literal}, 62 {token.INT, "01234567", literal}, 63 {token.INT, "0xcafebabe", literal}, 64 {token.FLOAT, "0.", literal}, 65 {token.FLOAT, ".0", literal}, 66 {token.FLOAT, "3.14159265", literal}, 67 {token.FLOAT, "1e0", literal}, 68 {token.FLOAT, "1e+100", literal}, 69 {token.FLOAT, "1e-100", literal}, 70 {token.FLOAT, "2.71828e-1000", literal}, 71 {token.IMAG, "0i", literal}, 72 {token.IMAG, "1i", literal}, 73 {token.IMAG, "012345678901234567889i", literal}, 74 {token.IMAG, "123456789012345678890i", literal}, 75 {token.IMAG, "0.i", literal}, 76 {token.IMAG, ".0i", literal}, 77 {token.IMAG, "3.14159265i", literal}, 78 {token.IMAG, "1e0i", literal}, 79 {token.IMAG, "1e+100i", literal}, 80 {token.IMAG, "1e-100i", literal}, 81 {token.IMAG, "2.71828e-1000i", literal}, 82 {token.CHAR, "'a'", literal}, 83 {token.CHAR, "'\\000'", literal}, 84 {token.CHAR, "'\\xFF'", literal}, 85 {token.CHAR, "'\\uff16'", literal}, 86 {token.CHAR, "'\\U0000ff16'", literal}, 87 {token.STRING, "`foobar`", literal}, 88 {token.STRING, "`" + `foo 89 bar` + 90 "`", 91 literal, 92 }, 93 {token.STRING, "`\r`", literal}, 94 {token.STRING, "`foo\r\nbar`", literal}, 95 96 // Operators and delimiters 97 {token.ADD, "+", operator}, 98 {token.SUB, "-", operator}, 99 {token.MUL, "*", operator}, 100 {token.QUO, "/", operator}, 101 {token.REM, "%", operator}, 102 103 {token.AND, "&", operator}, 104 {token.OR, "|", operator}, 105 {token.XOR, "^", operator}, 106 {token.SHL, "<<", operator}, 107 {token.SHR, ">>", operator}, 108 {token.AND_NOT, "&^", operator}, 109 110 {token.ADD_ASSIGN, "+=", operator}, 111 {token.SUB_ASSIGN, "-=", operator}, 112 {token.MUL_ASSIGN, "*=", operator}, 113 {token.QUO_ASSIGN, "/=", operator}, 114 {token.REM_ASSIGN, "%=", operator}, 115 116 {token.AND_ASSIGN, "&=", operator}, 117 {token.OR_ASSIGN, "|=", operator}, 118 {token.XOR_ASSIGN, "^=", operator}, 119 {token.SHL_ASSIGN, "<<=", operator}, 120 {token.SHR_ASSIGN, ">>=", operator}, 121 {token.AND_NOT_ASSIGN, "&^=", operator}, 122 123 {token.LAND, "&&", operator}, 124 {token.LOR, "||", operator}, 125 {token.ARROW, "<-", operator}, 126 {token.INC, "++", operator}, 127 {token.DEC, "--", operator}, 128 129 {token.EQL, "==", operator}, 130 {token.LSS, "<", operator}, 131 {token.GTR, ">", operator}, 132 {token.ASSIGN, "=", operator}, 133 {token.NOT, "!", operator}, 134 135 {token.NEQ, "!=", operator}, 136 {token.LEQ, "<=", operator}, 137 {token.GEQ, ">=", operator}, 138 {token.DEFINE, ":=", operator}, 139 {token.ELLIPSIS, "...", operator}, 140 141 {token.LPAREN, "(", operator}, 142 {token.LBRACK, "[", operator}, 143 {token.LBRACE, "{", operator}, 144 {token.COMMA, ",", operator}, 145 {token.PERIOD, ".", operator}, 146 147 {token.RPAREN, ")", operator}, 148 {token.RBRACK, "]", operator}, 149 {token.RBRACE, "}", operator}, 150 {token.SEMICOLON, ";", operator}, 151 {token.COLON, ":", operator}, 152 153 // Keywords 154 {token.BREAK, "break", keyword}, 155 {token.CASE, "case", keyword}, 156 {token.CHAN, "chan", keyword}, 157 {token.CONST, "const", keyword}, 158 {token.CONTINUE, "continue", keyword}, 159 160 {token.DEFAULT, "default", keyword}, 161 {token.DEFER, "defer", keyword}, 162 {token.ELSE, "else", keyword}, 163 {token.FALLTHROUGH, "fallthrough", keyword}, 164 {token.FOR, "for", keyword}, 165 166 {token.FUNC, "func", keyword}, 167 {token.GO, "go", keyword}, 168 {token.GOTO, "goto", keyword}, 169 {token.IF, "if", keyword}, 170 {token.IMPORT, "import", keyword}, 171 172 {token.INTERFACE, "interface", keyword}, 173 {token.MAP, "map", keyword}, 174 {token.PACKAGE, "package", keyword}, 175 {token.RANGE, "range", keyword}, 176 {token.RETURN, "return", keyword}, 177 178 {token.SELECT, "select", keyword}, 179 {token.STRUCT, "struct", keyword}, 180 {token.SWITCH, "switch", keyword}, 181 {token.TYPE, "type", keyword}, 182 {token.VAR, "var", keyword}, 183 } 184 185 const whitespace = " \t \n\n\n" // to separate tokens 186 187 var source = func() []byte { 188 var src []byte 189 for _, t := range tokens { 190 src = append(src, t.lit...) 191 src = append(src, whitespace...) 192 } 193 return src 194 }() 195 196 func newlineCount(s string) int { 197 n := 0 198 for i := 0; i < len(s); i++ { 199 if s[i] == '\n' { 200 n++ 201 } 202 } 203 return n 204 } 205 206 func checkPos(t *testing.T, lit string, p token.Pos, expected token.Position) { 207 pos := fset.Position(p) 208 if pos.Filename != expected.Filename { 209 t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename) 210 } 211 if pos.Offset != expected.Offset { 212 t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset) 213 } 214 if pos.Line != expected.Line { 215 t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line) 216 } 217 if pos.Column != expected.Column { 218 t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column) 219 } 220 } 221 222 // Verify that calling Scan() provides the correct results. 223 func TestScan(t *testing.T) { 224 whitespace_linecount := newlineCount(whitespace) 225 226 // error handler 227 eh := func(_ token.Position, msg string) { 228 t.Errorf("error handler called (msg = %s)", msg) 229 } 230 231 // verify scan 232 var s Scanner 233 s.Init(fset.AddFile("", fset.Base(), len(source)), source, eh, ScanComments|dontInsertSemis) 234 235 // set up expected position 236 epos := token.Position{ 237 Filename: "", 238 Offset: 0, 239 Line: 1, 240 Column: 1, 241 } 242 243 index := 0 244 for { 245 pos, tok, lit := s.Scan() 246 247 // check position 248 if tok == token.EOF { 249 // correction for EOF 250 epos.Line = newlineCount(string(source)) 251 epos.Column = 2 252 } 253 checkPos(t, lit, pos, epos) 254 255 // check token 256 e := elt{token.EOF, "", special} 257 if index < len(tokens) { 258 e = tokens[index] 259 index++ 260 } 261 if tok != e.tok { 262 t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok) 263 } 264 265 // check token class 266 if tokenclass(tok) != e.class { 267 t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class) 268 } 269 270 // check literal 271 elit := "" 272 switch e.tok { 273 case token.COMMENT: 274 // no CRs in comments 275 elit = string(stripCR([]byte(e.lit), e.lit[1] == '*')) 276 //-style comment literal doesn't contain newline 277 if elit[1] == '/' { 278 elit = elit[0 : len(elit)-1] 279 } 280 case token.IDENT: 281 elit = e.lit 282 case token.SEMICOLON: 283 elit = ";" 284 default: 285 if e.tok.IsLiteral() { 286 // no CRs in raw string literals 287 elit = e.lit 288 if elit[0] == '`' { 289 elit = string(stripCR([]byte(elit), false)) 290 } 291 } else if e.tok.IsKeyword() { 292 elit = e.lit 293 } 294 } 295 if lit != elit { 296 t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit) 297 } 298 299 if tok == token.EOF { 300 break 301 } 302 303 // update position 304 epos.Offset += len(e.lit) + len(whitespace) 305 epos.Line += newlineCount(e.lit) + whitespace_linecount 306 307 } 308 309 if s.ErrorCount != 0 { 310 t.Errorf("found %d errors", s.ErrorCount) 311 } 312 } 313 314 func TestStripCR(t *testing.T) { 315 for _, test := range []struct{ have, want string }{ 316 {"//\n", "//\n"}, 317 {"//\r\n", "//\n"}, 318 {"//\r\r\r\n", "//\n"}, 319 {"//\r*\r/\r\n", "//*/\n"}, 320 {"/**/", "/**/"}, 321 {"/*\r/*/", "/*/*/"}, 322 {"/*\r*/", "/**/"}, 323 {"/**\r/*/", "/**\r/*/"}, 324 {"/*\r/\r*\r/*/", "/*/*\r/*/"}, 325 {"/*\r\r\r\r*/", "/**/"}, 326 } { 327 got := string(stripCR([]byte(test.have), len(test.have) >= 2 && test.have[1] == '*')) 328 if got != test.want { 329 t.Errorf("stripCR(%q) = %q; want %q", test.have, got, test.want) 330 } 331 } 332 } 333 334 func checkSemi(t *testing.T, line string, mode Mode) { 335 var S Scanner 336 file := fset.AddFile("TestSemis", fset.Base(), len(line)) 337 S.Init(file, []byte(line), nil, mode) 338 pos, tok, lit := S.Scan() 339 for tok != token.EOF { 340 if tok == token.ILLEGAL { 341 // the illegal token literal indicates what 342 // kind of semicolon literal to expect 343 semiLit := "\n" 344 if lit[0] == '#' { 345 semiLit = ";" 346 } 347 // next token must be a semicolon 348 semiPos := file.Position(pos) 349 semiPos.Offset++ 350 semiPos.Column++ 351 pos, tok, lit = S.Scan() 352 if tok == token.SEMICOLON { 353 if lit != semiLit { 354 t.Errorf(`bad literal for %q: got %q, expected %q`, line, lit, semiLit) 355 } 356 checkPos(t, line, pos, semiPos) 357 } else { 358 t.Errorf("bad token for %q: got %s, expected ;", line, tok) 359 } 360 } else if tok == token.SEMICOLON { 361 t.Errorf("bad token for %q: got ;, expected no ;", line) 362 } 363 pos, tok, lit = S.Scan() 364 } 365 } 366 367 var lines = []string{ 368 // # indicates a semicolon present in the source 369 // $ indicates an automatically inserted semicolon 370 "", 371 "\ufeff#;", // first BOM is ignored 372 "#;", 373 "foo$\n", 374 "123$\n", 375 "1.2$\n", 376 "'x'$\n", 377 `"x"` + "$\n", 378 "`x`$\n", 379 380 "+\n", 381 "-\n", 382 "*\n", 383 "/\n", 384 "%\n", 385 386 "&\n", 387 "|\n", 388 "^\n", 389 "<<\n", 390 ">>\n", 391 "&^\n", 392 393 "+=\n", 394 "-=\n", 395 "*=\n", 396 "/=\n", 397 "%=\n", 398 399 "&=\n", 400 "|=\n", 401 "^=\n", 402 "<<=\n", 403 ">>=\n", 404 "&^=\n", 405 406 "&&\n", 407 "||\n", 408 "<-\n", 409 "++$\n", 410 "--$\n", 411 412 "==\n", 413 "<\n", 414 ">\n", 415 "=\n", 416 "!\n", 417 418 "!=\n", 419 "<=\n", 420 ">=\n", 421 ":=\n", 422 "...\n", 423 424 "(\n", 425 "[\n", 426 "{\n", 427 ",\n", 428 ".\n", 429 430 ")$\n", 431 "]$\n", 432 "}$\n", 433 "#;\n", 434 ":\n", 435 436 "break$\n", 437 "case\n", 438 "chan\n", 439 "const\n", 440 "continue$\n", 441 442 "default\n", 443 "defer\n", 444 "else\n", 445 "fallthrough$\n", 446 "for\n", 447 448 "func\n", 449 "go\n", 450 "goto\n", 451 "if\n", 452 "import\n", 453 454 "interface\n", 455 "map\n", 456 "package\n", 457 "range\n", 458 "return$\n", 459 460 "select\n", 461 "struct\n", 462 "switch\n", 463 "type\n", 464 "var\n", 465 466 "foo$//comment\n", 467 "foo$//comment", 468 "foo$/*comment*/\n", 469 "foo$/*\n*/", 470 "foo$/*comment*/ \n", 471 "foo$/*\n*/ ", 472 473 "foo $// comment\n", 474 "foo $// comment", 475 "foo $/*comment*/\n", 476 "foo $/*\n*/", 477 "foo $/* */ /* \n */ bar$/**/\n", 478 "foo $/*0*/ /*1*/ /*2*/\n", 479 480 "foo $/*comment*/ \n", 481 "foo $/*0*/ /*1*/ /*2*/ \n", 482 "foo $/**/ /*-------------*/ /*----\n*/bar $/* \n*/baa$\n", 483 "foo $/* an EOF terminates a line */", 484 "foo $/* an EOF terminates a line */ /*", 485 "foo $/* an EOF terminates a line */ //", 486 487 "package main$\n\nfunc main() {\n\tif {\n\t\treturn /* */ }$\n}$\n", 488 "package main$", 489 } 490 491 func TestSemis(t *testing.T) { 492 for _, line := range lines { 493 checkSemi(t, line, 0) 494 checkSemi(t, line, ScanComments) 495 496 // if the input ended in newlines, the input must tokenize the 497 // same with or without those newlines 498 for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- { 499 checkSemi(t, line[0:i], 0) 500 checkSemi(t, line[0:i], ScanComments) 501 } 502 } 503 } 504 505 type segment struct { 506 srcline string // a line of source text 507 filename string // filename for current token 508 line int // line number for current token 509 } 510 511 var segments = []segment{ 512 // exactly one token per line since the test consumes one token per segment 513 {" line1", filepath.Join("dir", "TestLineComments"), 1}, 514 {"\nline2", filepath.Join("dir", "TestLineComments"), 2}, 515 {"\nline3 //line File1.go:100", filepath.Join("dir", "TestLineComments"), 3}, // bad line comment, ignored 516 {"\nline4", filepath.Join("dir", "TestLineComments"), 4}, 517 {"\n//line File1.go:100\n line100", filepath.Join("dir", "File1.go"), 100}, 518 {"\n//line \t :42\n line1", "", 42}, 519 {"\n//line File2.go:200\n line200", filepath.Join("dir", "File2.go"), 200}, 520 {"\n//line foo\t:42\n line42", filepath.Join("dir", "foo"), 42}, 521 {"\n //line foo:42\n line44", filepath.Join("dir", "foo"), 44}, // bad line comment, ignored 522 {"\n//line foo 42\n line46", filepath.Join("dir", "foo"), 46}, // bad line comment, ignored 523 {"\n//line foo:42 extra text\n line48", filepath.Join("dir", "foo"), 48}, // bad line comment, ignored 524 {"\n//line ./foo:42\n line42", filepath.Join("dir", "foo"), 42}, 525 {"\n//line a/b/c/File1.go:100\n line100", filepath.Join("dir", "a", "b", "c", "File1.go"), 100}, 526 } 527 528 var unixsegments = []segment{ 529 {"\n//line /bar:42\n line42", "/bar", 42}, 530 } 531 532 var winsegments = []segment{ 533 {"\n//line c:\\bar:42\n line42", "c:\\bar", 42}, 534 {"\n//line c:\\dir\\File1.go:100\n line100", "c:\\dir\\File1.go", 100}, 535 } 536 537 // Verify that comments of the form "//line filename:line" are interpreted correctly. 538 func TestLineComments(t *testing.T) { 539 segs := segments 540 if runtime.GOOS == "windows" { 541 segs = append(segs, winsegments...) 542 } else { 543 segs = append(segs, unixsegments...) 544 } 545 546 // make source 547 var src string 548 for _, e := range segs { 549 src += e.srcline 550 } 551 552 // verify scan 553 var S Scanner 554 file := fset.AddFile(filepath.Join("dir", "TestLineComments"), fset.Base(), len(src)) 555 S.Init(file, []byte(src), nil, dontInsertSemis) 556 for _, s := range segs { 557 p, _, lit := S.Scan() 558 pos := file.Position(p) 559 checkPos(t, lit, p, token.Position{ 560 Filename: s.filename, 561 Offset: pos.Offset, 562 Line: s.line, 563 Column: pos.Column, 564 }) 565 } 566 567 if S.ErrorCount != 0 { 568 t.Errorf("found %d errors", S.ErrorCount) 569 } 570 } 571 572 // Verify that initializing the same scanner more than once works correctly. 573 func TestInit(t *testing.T) { 574 var s Scanner 575 576 // 1st init 577 src1 := "if true { }" 578 f1 := fset.AddFile("src1", fset.Base(), len(src1)) 579 s.Init(f1, []byte(src1), nil, dontInsertSemis) 580 if f1.Size() != len(src1) { 581 t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1)) 582 } 583 s.Scan() // if 584 s.Scan() // true 585 _, tok, _ := s.Scan() // { 586 if tok != token.LBRACE { 587 t.Errorf("bad token: got %s, expected %s", tok, token.LBRACE) 588 } 589 590 // 2nd init 591 src2 := "go true { ]" 592 f2 := fset.AddFile("src2", fset.Base(), len(src2)) 593 s.Init(f2, []byte(src2), nil, dontInsertSemis) 594 if f2.Size() != len(src2) { 595 t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2)) 596 } 597 _, tok, _ = s.Scan() // go 598 if tok != token.GO { 599 t.Errorf("bad token: got %s, expected %s", tok, token.GO) 600 } 601 602 if s.ErrorCount != 0 { 603 t.Errorf("found %d errors", s.ErrorCount) 604 } 605 } 606 607 func TestStdErrorHander(t *testing.T) { 608 const src = "@\n" + // illegal character, cause an error 609 "@ @\n" + // two errors on the same line 610 "//line File2:20\n" + 611 "@\n" + // different file, but same line 612 "//line File2:1\n" + 613 "@ @\n" + // same file, decreasing line number 614 "//line File1:1\n" + 615 "@ @ @" // original file, line 1 again 616 617 var list ErrorList 618 eh := func(pos token.Position, msg string) { list.Add(pos, msg) } 619 620 var s Scanner 621 s.Init(fset.AddFile("File1", fset.Base(), len(src)), []byte(src), eh, dontInsertSemis) 622 for { 623 if _, tok, _ := s.Scan(); tok == token.EOF { 624 break 625 } 626 } 627 628 if len(list) != s.ErrorCount { 629 t.Errorf("found %d errors, expected %d", len(list), s.ErrorCount) 630 } 631 632 if len(list) != 9 { 633 t.Errorf("found %d raw errors, expected 9", len(list)) 634 PrintError(os.Stderr, list) 635 } 636 637 list.Sort() 638 if len(list) != 9 { 639 t.Errorf("found %d sorted errors, expected 9", len(list)) 640 PrintError(os.Stderr, list) 641 } 642 643 list.RemoveMultiples() 644 if len(list) != 4 { 645 t.Errorf("found %d one-per-line errors, expected 4", len(list)) 646 PrintError(os.Stderr, list) 647 } 648 } 649 650 type errorCollector struct { 651 cnt int // number of errors encountered 652 msg string // last error message encountered 653 pos token.Position // last error position encountered 654 } 655 656 func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err string) { 657 var s Scanner 658 var h errorCollector 659 eh := func(pos token.Position, msg string) { 660 h.cnt++ 661 h.msg = msg 662 h.pos = pos 663 } 664 s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), eh, ScanComments|dontInsertSemis) 665 _, tok0, lit0 := s.Scan() 666 if tok0 != tok { 667 t.Errorf("%q: got %s, expected %s", src, tok0, tok) 668 } 669 if tok0 != token.ILLEGAL && lit0 != lit { 670 t.Errorf("%q: got literal %q, expected %q", src, lit0, lit) 671 } 672 cnt := 0 673 if err != "" { 674 cnt = 1 675 } 676 if h.cnt != cnt { 677 t.Errorf("%q: got cnt %d, expected %d", src, h.cnt, cnt) 678 } 679 if h.msg != err { 680 t.Errorf("%q: got msg %q, expected %q", src, h.msg, err) 681 } 682 if h.pos.Offset != pos { 683 t.Errorf("%q: got offset %d, expected %d", src, h.pos.Offset, pos) 684 } 685 } 686 687 var errors = []struct { 688 src string 689 tok token.Token 690 pos int 691 lit string 692 err string 693 }{ 694 {"\a", token.ILLEGAL, 0, "", "illegal character U+0007"}, 695 {`#`, token.ILLEGAL, 0, "", "illegal character U+0023 '#'"}, 696 {`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"}, 697 {`' '`, token.CHAR, 0, `' '`, ""}, 698 {`''`, token.CHAR, 0, `''`, "illegal rune literal"}, 699 {`'12'`, token.CHAR, 0, `'12'`, "illegal rune literal"}, 700 {`'123'`, token.CHAR, 0, `'123'`, "illegal rune literal"}, 701 {`'\0'`, token.CHAR, 3, `'\0'`, "illegal character U+0027 ''' in escape sequence"}, 702 {`'\07'`, token.CHAR, 4, `'\07'`, "illegal character U+0027 ''' in escape sequence"}, 703 {`'\8'`, token.CHAR, 2, `'\8'`, "unknown escape sequence"}, 704 {`'\08'`, token.CHAR, 3, `'\08'`, "illegal character U+0038 '8' in escape sequence"}, 705 {`'\x'`, token.CHAR, 3, `'\x'`, "illegal character U+0027 ''' in escape sequence"}, 706 {`'\x0'`, token.CHAR, 4, `'\x0'`, "illegal character U+0027 ''' in escape sequence"}, 707 {`'\x0g'`, token.CHAR, 4, `'\x0g'`, "illegal character U+0067 'g' in escape sequence"}, 708 {`'\u'`, token.CHAR, 3, `'\u'`, "illegal character U+0027 ''' in escape sequence"}, 709 {`'\u0'`, token.CHAR, 4, `'\u0'`, "illegal character U+0027 ''' in escape sequence"}, 710 {`'\u00'`, token.CHAR, 5, `'\u00'`, "illegal character U+0027 ''' in escape sequence"}, 711 {`'\u000'`, token.CHAR, 6, `'\u000'`, "illegal character U+0027 ''' in escape sequence"}, 712 {`'\u000`, token.CHAR, 6, `'\u000`, "escape sequence not terminated"}, 713 {`'\u0000'`, token.CHAR, 0, `'\u0000'`, ""}, 714 {`'\U'`, token.CHAR, 3, `'\U'`, "illegal character U+0027 ''' in escape sequence"}, 715 {`'\U0'`, token.CHAR, 4, `'\U0'`, "illegal character U+0027 ''' in escape sequence"}, 716 {`'\U00'`, token.CHAR, 5, `'\U00'`, "illegal character U+0027 ''' in escape sequence"}, 717 {`'\U000'`, token.CHAR, 6, `'\U000'`, "illegal character U+0027 ''' in escape sequence"}, 718 {`'\U0000'`, token.CHAR, 7, `'\U0000'`, "illegal character U+0027 ''' in escape sequence"}, 719 {`'\U00000'`, token.CHAR, 8, `'\U00000'`, "illegal character U+0027 ''' in escape sequence"}, 720 {`'\U000000'`, token.CHAR, 9, `'\U000000'`, "illegal character U+0027 ''' in escape sequence"}, 721 {`'\U0000000'`, token.CHAR, 10, `'\U0000000'`, "illegal character U+0027 ''' in escape sequence"}, 722 {`'\U0000000`, token.CHAR, 10, `'\U0000000`, "escape sequence not terminated"}, 723 {`'\U00000000'`, token.CHAR, 0, `'\U00000000'`, ""}, 724 {`'\Uffffffff'`, token.CHAR, 2, `'\Uffffffff'`, "escape sequence is invalid Unicode code point"}, 725 {`'`, token.CHAR, 0, `'`, "rune literal not terminated"}, 726 {`'\`, token.CHAR, 2, `'\`, "escape sequence not terminated"}, 727 {"'\n", token.CHAR, 0, "'", "rune literal not terminated"}, 728 {"'\n ", token.CHAR, 0, "'", "rune literal not terminated"}, 729 {`""`, token.STRING, 0, `""`, ""}, 730 {`"abc`, token.STRING, 0, `"abc`, "string literal not terminated"}, 731 {"\"abc\n", token.STRING, 0, `"abc`, "string literal not terminated"}, 732 {"\"abc\n ", token.STRING, 0, `"abc`, "string literal not terminated"}, 733 {"``", token.STRING, 0, "``", ""}, 734 {"`", token.STRING, 0, "`", "raw string literal not terminated"}, 735 {"/**/", token.COMMENT, 0, "/**/", ""}, 736 {"/*", token.COMMENT, 0, "/*", "comment not terminated"}, 737 {"077", token.INT, 0, "077", ""}, 738 {"078.", token.FLOAT, 0, "078.", ""}, 739 {"07801234567.", token.FLOAT, 0, "07801234567.", ""}, 740 {"078e0", token.FLOAT, 0, "078e0", ""}, 741 {"0E", token.FLOAT, 0, "0E", "illegal floating-point exponent"}, // issue 17621 742 {"078", token.INT, 0, "078", "illegal octal number"}, 743 {"07800000009", token.INT, 0, "07800000009", "illegal octal number"}, 744 {"0x", token.INT, 0, "0x", "illegal hexadecimal number"}, 745 {"0X", token.INT, 0, "0X", "illegal hexadecimal number"}, 746 {"\"abc\x00def\"", token.STRING, 4, "\"abc\x00def\"", "illegal character NUL"}, 747 {"\"abc\x80def\"", token.STRING, 4, "\"abc\x80def\"", "illegal UTF-8 encoding"}, 748 {"\ufeff\ufeff", token.ILLEGAL, 3, "\ufeff\ufeff", "illegal byte order mark"}, // only first BOM is ignored 749 {"//\ufeff", token.COMMENT, 2, "//\ufeff", "illegal byte order mark"}, // only first BOM is ignored 750 {"'\ufeff" + `'`, token.CHAR, 1, "'\ufeff" + `'`, "illegal byte order mark"}, // only first BOM is ignored 751 {`"` + "abc\ufeffdef" + `"`, token.STRING, 4, `"` + "abc\ufeffdef" + `"`, "illegal byte order mark"}, // only first BOM is ignored 752 } 753 754 func TestScanErrors(t *testing.T) { 755 for _, e := range errors { 756 checkError(t, e.src, e.tok, e.pos, e.lit, e.err) 757 } 758 } 759 760 // Verify that no comments show up as literal values when skipping comments. 761 func TestIssue10213(t *testing.T) { 762 var src = ` 763 var ( 764 A = 1 // foo 765 ) 766 767 var ( 768 B = 2 769 // foo 770 ) 771 772 var C = 3 // foo 773 774 var D = 4 775 // foo 776 777 func anycode() { 778 // foo 779 } 780 ` 781 var s Scanner 782 s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), nil, 0) 783 for { 784 pos, tok, lit := s.Scan() 785 class := tokenclass(tok) 786 if lit != "" && class != keyword && class != literal && tok != token.SEMICOLON { 787 t.Errorf("%s: tok = %s, lit = %q", fset.Position(pos), tok, lit) 788 } 789 if tok <= token.EOF { 790 break 791 } 792 } 793 } 794 795 func BenchmarkScan(b *testing.B) { 796 b.StopTimer() 797 fset := token.NewFileSet() 798 file := fset.AddFile("", fset.Base(), len(source)) 799 var s Scanner 800 b.StartTimer() 801 for i := 0; i < b.N; i++ { 802 s.Init(file, source, nil, ScanComments) 803 for { 804 _, tok, _ := s.Scan() 805 if tok == token.EOF { 806 break 807 } 808 } 809 } 810 } 811 812 func BenchmarkScanFile(b *testing.B) { 813 b.StopTimer() 814 const filename = "scanner.go" 815 src, err := ioutil.ReadFile(filename) 816 if err != nil { 817 panic(err) 818 } 819 fset := token.NewFileSet() 820 file := fset.AddFile(filename, fset.Base(), len(src)) 821 b.SetBytes(int64(len(src))) 822 var s Scanner 823 b.StartTimer() 824 for i := 0; i < b.N; i++ { 825 s.Init(file, src, nil, ScanComments) 826 for { 827 _, tok, _ := s.Scan() 828 if tok == token.EOF { 829 break 830 } 831 } 832 } 833 }