github.com/joomcode/cue@v0.4.4-0.20221111115225-539fe3512047/cue/scanner/scanner_test.go (about) 1 // Copyright 2018 The CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package scanner 16 17 import ( 18 "fmt" 19 "io/ioutil" 20 "os" 21 "path/filepath" 22 "runtime" 23 "strings" 24 "testing" 25 26 "github.com/google/go-cmp/cmp" 27 28 "github.com/joomcode/cue/cue/errors" 29 "github.com/joomcode/cue/cue/token" 30 ) 31 32 const /* class */ ( 33 special = iota 34 literal 35 operator 36 keyword 37 ) 38 39 func tokenclass(tok token.Token) int { 40 switch { 41 case tok.IsLiteral(): 42 return literal 43 case tok.IsOperator(): 44 return operator 45 case tok.IsKeyword(): 46 return keyword 47 } 48 return special 49 } 50 51 type elt struct { 52 tok token.Token 53 lit string 54 class int 55 } 56 57 var testTokens = [...]elt{ 58 // Special tokens 59 {token.COMMENT, "// a comment \n", special}, 60 {token.COMMENT, "//\r\n", special}, 61 62 // Attributes 63 {token.ATTRIBUTE, "@foo()", special}, 64 {token.ATTRIBUTE, "@foo(,,)", special}, 65 {token.ATTRIBUTE, "@foo(a)", special}, 66 {token.ATTRIBUTE, "@foo(aa=b)", special}, 67 {token.ATTRIBUTE, "@foo(,a=b)", special}, 68 {token.ATTRIBUTE, `@foo(",a=b")`, special}, 69 {token.ATTRIBUTE, `@foo(##"\(),a=b"##)`, special}, 70 {token.ATTRIBUTE, `@foo("",a="")`, special}, 71 {token.ATTRIBUTE, `@foo(2,bytes,a.b=c)`, special}, 72 {token.ATTRIBUTE, `@foo([{()}]())`, special}, 73 {token.ATTRIBUTE, `@foo("{")`, special}, 74 75 // Identifiers and basic type literals 76 {token.BOTTOM, "_|_", literal}, 77 78 {token.IDENT, "foobar", literal}, 79 {token.IDENT, "$foobar", literal}, 80 {token.IDENT, "#foobar", literal}, 81 // {token.IDENT, "#0", literal}, 82 {token.IDENT, "#", literal}, 83 {token.IDENT, "_foobar", literal}, 84 {token.IDENT, "__foobar", literal}, 85 {token.IDENT, "#_foobar", literal}, 86 {token.IDENT, "_#foobar", literal}, 87 {token.IDENT, "__#foobar", literal}, 88 {token.IDENT, "a۰۱۸", literal}, 89 {token.IDENT, "foo६४", literal}, 90 {token.IDENT, "bar9876", literal}, 91 {token.IDENT, "ŝ", literal}, 92 {token.IDENT, "ŝfoo", literal}, 93 {token.INT, "0", literal}, 94 {token.INT, "1", literal}, 95 {token.INT, "123456789012345678890", literal}, 96 {token.INT, "12345_67890_12345_6788_90", literal}, 97 {token.INT, "1234567M", literal}, 98 {token.INT, "1234567Mi", literal}, 99 {token.INT, "1234567", literal}, 100 {token.INT, ".3Mi", literal}, 101 {token.INT, "3.3Mi", literal}, 102 {token.INT, "0xcafebabe", literal}, 103 {token.INT, "0b1100_1001", literal}, 104 {token.INT, "0o1234567", literal}, 105 {token.FLOAT, "0.", literal}, 106 {token.FLOAT, ".0", literal}, 107 {token.FLOAT, "3.14159265", literal}, 108 {token.FLOAT, "1e0", literal}, 109 {token.FLOAT, "1e+100", literal}, 110 {token.FLOAT, "1e-100", literal}, 111 {token.FLOAT, "1E+100", literal}, 112 {token.FLOAT, "1E-100", literal}, 113 {token.FLOAT, "0e-5", literal}, 114 {token.FLOAT, "0e+100", literal}, 115 {token.FLOAT, "0e-100", literal}, 116 {token.FLOAT, "0E+100", literal}, 117 {token.FLOAT, "0E-100", literal}, 118 {token.FLOAT, "2.71828e-1000", literal}, 119 {token.STRING, "'a'", literal}, 120 {token.STRING, "'\\000'", literal}, 121 {token.STRING, "'\\xFF'", literal}, 122 {token.STRING, "'\\uff16'", literal}, 123 {token.STRING, "'\\uD801'", literal}, 124 {token.STRING, "'\\U0000ff16'", literal}, 125 {token.STRING, "'foobar'", literal}, 126 {token.STRING, `'foo\/bar'`, literal}, 127 {token.STRING, `#" ""#`, literal}, 128 {token.STRING, `#"foobar"#`, literal}, 129 {token.STRING, `#"\r"#`, literal}, 130 {token.STRING, `#"\("#`, literal}, 131 {token.STRING, `#"\q"#`, literal}, 132 {token.STRING, `###"\##q"###`, literal}, 133 {token.STRING, "'" + `\r` + "'", literal}, 134 {token.STRING, "'foo" + `\r\n` + "bar'", literal}, 135 {token.STRING, `"foobar"`, literal}, 136 {token.STRING, "\"\"\"\n foobar\n \"\"\"", literal}, 137 {token.STRING, "#\"\"\"\n \\(foobar\n \"\"\"#", literal}, 138 // TODO: should we preserve the \r instead and have it removed by the 139 // literal parser? This would allow preserving \r for formatting without 140 // changing the semantics of evaluation. 141 {token.STRING, "#\"\"\"\r\n \\(foobar\n \"\"\"#", literal}, 142 143 // Operators and delimiters 144 {token.ADD, "+", operator}, 145 {token.SUB, "-", operator}, 146 {token.MUL, "*", operator}, 147 {token.QUO, "/", operator}, 148 149 {token.AND, "&", operator}, 150 {token.OR, "|", operator}, 151 152 {token.LAND, "&&", operator}, 153 {token.LOR, "||", operator}, 154 155 {token.EQL, "==", operator}, 156 {token.LSS, "<", operator}, 157 {token.GTR, ">", operator}, 158 {token.BIND, "=", operator}, 159 {token.NOT, "!", operator}, 160 161 {token.NEQ, "!=", operator}, 162 {token.LEQ, "<=", operator}, 163 {token.GEQ, ">=", operator}, 164 {token.ELLIPSIS, "...", operator}, 165 166 {token.MAT, "=~", operator}, 167 {token.NMAT, "!~", operator}, 168 169 {token.LPAREN, "(", operator}, 170 {token.LBRACK, "[", operator}, 171 {token.LBRACE, "{", operator}, 172 {token.COMMA, ",", operator}, 173 {token.PERIOD, ".", operator}, 174 {token.OPTION, "?", operator}, 175 176 {token.RPAREN, ")", operator}, 177 {token.RBRACK, "]", operator}, 178 {token.RBRACE, "}", operator}, 179 {token.COLON, ":", operator}, 180 {token.ISA, "::", operator}, 181 182 // Keywords 183 {token.TRUE, "true", keyword}, 184 {token.FALSE, "false", keyword}, 185 {token.NULL, "null", keyword}, 186 187 {token.FOR, "for", keyword}, 188 {token.IF, "if", keyword}, 189 {token.IN, "in", keyword}, 190 } 191 192 const whitespace = " \t \n\n\n" // to separate tokens 193 194 var source = func() []byte { 195 var src []byte 196 for _, t := range testTokens { 197 src = append(src, t.lit...) 198 src = append(src, whitespace...) 199 } 200 return src 201 }() 202 203 func newlineCount(s string) int { 204 n := 0 205 for i := 0; i < len(s); i++ { 206 if s[i] == '\n' { 207 n++ 208 } 209 } 210 return n 211 } 212 213 func checkPosScan(t *testing.T, lit string, p token.Pos, expected token.Position) { 214 pos := p.Position() 215 if pos.Filename != expected.Filename { 216 t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename) 217 } 218 if pos.Offset != expected.Offset { 219 t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset) 220 } 221 if pos.Line != expected.Line { 222 t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line) 223 } 224 if pos.Column != expected.Column { 225 t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column) 226 } 227 } 228 229 // Verify that calling Scan() provides the correct results. 230 func TestScan(t *testing.T) { 231 whitespace_linecount := newlineCount(whitespace) 232 233 // error handler 234 eh := func(_ token.Pos, msg string, args []interface{}) { 235 t.Errorf("error handler called (msg = %s)", fmt.Sprintf(msg, args...)) 236 } 237 238 // verify scan 239 var s Scanner 240 s.Init(token.NewFile("", 1, len(source)), source, eh, ScanComments|dontInsertCommas) 241 242 // set up expected position 243 epos := token.Position{ 244 Filename: "", 245 Offset: 0, 246 Line: 1, 247 Column: 1, 248 } 249 250 index := 0 251 for { 252 pos, tok, lit := s.Scan() 253 254 // check position 255 if tok == token.EOF { 256 // correction for EOF 257 epos.Line = newlineCount(string(source)) 258 epos.Column = 2 259 } 260 checkPosScan(t, lit, pos, epos) 261 262 // check token 263 e := elt{token.EOF, "", special} 264 if index < len(testTokens) { 265 e = testTokens[index] 266 index++ 267 } 268 if tok != e.tok { 269 t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok) 270 } 271 272 // check token class 273 if tokenclass(tok) != e.class { 274 t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class) 275 } 276 277 // check literal 278 elit := "" 279 switch e.tok { 280 case token.COMMENT: 281 // no CRs in comments 282 elit = string(stripCR([]byte(e.lit))) 283 //-style comment literal doesn't contain newline 284 if elit[1] == '/' { 285 elit = elit[0 : len(elit)-1] 286 } 287 case token.ATTRIBUTE: 288 elit = e.lit 289 case token.IDENT: 290 elit = e.lit 291 case token.COMMA: 292 elit = "," 293 default: 294 if e.tok.IsLiteral() { 295 // no CRs in raw string literals 296 elit = e.lit 297 if elit[0] == '`' { 298 elit = string(stripCR([]byte(elit))) 299 } 300 } else if e.tok.IsKeyword() { 301 elit = e.lit 302 } 303 } 304 if lit != elit { 305 t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit) 306 } 307 308 if tok == token.EOF { 309 break 310 } 311 312 // update position 313 epos.Offset += len(e.lit) + len(whitespace) 314 epos.Line += newlineCount(e.lit) + whitespace_linecount 315 316 } 317 318 if s.ErrorCount != 0 { 319 t.Errorf("found %d errors", s.ErrorCount) 320 } 321 } 322 323 func checkComma(t *testing.T, line string, mode Mode) { 324 var S Scanner 325 file := token.NewFile("TestCommas", 1, len(line)) 326 S.Init(file, []byte(line), nil, mode) 327 pos, tok, lit := S.Scan() 328 for tok != token.EOF { 329 if tok == token.ILLEGAL { 330 // the illegal token literal indicates what 331 // kind of semicolon literal to expect 332 commaLit := "\n" 333 if lit[0] == '~' { 334 commaLit = "," 335 } 336 // next token must be a comma 337 commaPos := file.Position(pos) 338 commaPos.Offset++ 339 commaPos.Column++ 340 pos, tok, lit = S.Scan() 341 if tok == token.COMMA { 342 if lit != commaLit { 343 t.Errorf(`bad literal for %q: got %q (%q), expected %q`, line, lit, tok, commaLit) 344 } 345 checkPosScan(t, line, pos, commaPos) 346 } else { 347 t.Errorf("bad token for %q: got %s, expected ','", line, tok) 348 } 349 } else if tok == token.COMMA { 350 t.Errorf("bad token for %q: got ',', expected no ','", line) 351 } 352 pos, tok, lit = S.Scan() 353 } 354 } 355 356 var lines = []string{ 357 // ~ indicates a comma present in the source 358 // ^ indicates an automatically inserted comma 359 "", 360 "\ufeff~,", // first BOM is ignored 361 "~,", 362 "foo^\n", 363 "_foo^\n", 364 "123^\n", 365 "1.2^\n", 366 "'x'^\n", 367 "_|_^\n", 368 "_|_^\n", 369 `"x"` + "^\n", 370 "#'x'#^\n", 371 `""" 372 foo 373 """` + "^\n", 374 // `""" 375 // foo \(bar) 376 // """` + "^\n", 377 `''' 378 foo 379 '''` + "^\n", 380 381 "+\n", 382 "-\n", 383 "*\n", 384 "/\n", 385 386 "&\n", 387 // "&^\n", 388 "|\n", 389 390 "&&\n", 391 "||\n", 392 "<-\n", 393 "->\n", 394 395 "==\n", 396 "<\n", 397 ">\n", 398 "=\n", 399 "!\n", 400 401 "!=\n", 402 "<=\n", 403 ">=\n", 404 ":=\n", 405 "...^\n", 406 407 "(\n", 408 "[\n", 409 "[[\n", 410 "{\n", 411 "{{\n", 412 "~,\n", 413 ".\n", 414 415 ")^\n", 416 "]^\n", 417 "]]^\n", 418 "}^\n", 419 "}}^\n", 420 ":\n", 421 "::\n", 422 ";^\n", 423 424 "true^\n", 425 "false^\n", 426 "null^\n", 427 428 "foo^//comment\n", 429 "foo^//comment", 430 431 "foo ^// comment\n", 432 "foo ^// comment", 433 434 "foo ^", 435 "foo ^//", 436 437 "package main^\n\nfoo: bar^", 438 "package main^", 439 } 440 441 func TestCommas(t *testing.T) { 442 for _, line := range lines { 443 checkComma(t, line, 0) 444 checkComma(t, line, ScanComments) 445 446 // if the input ended in newlines, the input must tokenize the 447 // same with or without those newlines 448 for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- { 449 checkComma(t, line[0:i], 0) 450 checkComma(t, line[0:i], ScanComments) 451 } 452 } 453 } 454 455 func TestRelative(t *testing.T) { 456 test := ` 457 package foo 458 459 // comment 460 a: 1 // a 461 b : 5 462 // line one 463 // line two 464 c 465 : "dfs" 466 , d: "foo" 467 ` 468 want := []string{ 469 `newline IDENT package`, 470 `blank IDENT foo`, 471 "elided , \n", 472 `section COMMENT // comment`, 473 `newline IDENT a`, 474 `nospace : `, 475 `blank INT 1`, 476 "elided , \n", 477 `blank COMMENT // a`, 478 `newline IDENT b`, 479 `blank : `, 480 `blank INT 5`, 481 "elided , \n", 482 "newline COMMENT // line one", 483 "newline COMMENT // line two", 484 `newline IDENT c`, 485 `newline : `, 486 `blank STRING "dfs"`, 487 "newline , ,", 488 "blank IDENT d", 489 "nospace : ", 490 `blank STRING "foo"`, 491 "elided , \n", 492 } 493 var S Scanner 494 f := token.NewFile("TestCommas", 1, len(test)) 495 S.Init(f, []byte(test), nil, ScanComments) 496 pos, tok, lit := S.Scan() 497 got := []string{} 498 for tok != token.EOF { 499 got = append(got, fmt.Sprintf("%-7s %-8s %s", pos.RelPos(), tok, lit)) 500 pos, tok, lit = S.Scan() 501 } 502 if !cmp.Equal(got, want) { 503 t.Error(cmp.Diff(got, want)) 504 } 505 } 506 507 type segment struct { 508 srcline string // a line of source text 509 filename string // filename for current token 510 line int // line number for current token 511 } 512 513 var segments = []segment{ 514 // exactly one token per line since the test consumes one token per segment 515 {" line1", filepath.Join("dir", "TestLineComments"), 1}, 516 {"\nline2", filepath.Join("dir", "TestLineComments"), 2}, 517 {"\nline3 //line File1.go:100", filepath.Join("dir", "TestLineComments"), 3}, // bad line comment, ignored 518 {"\nline4", filepath.Join("dir", "TestLineComments"), 4}, 519 {"\n//line File1.go:100\n line100", filepath.Join("dir", "File1.go"), 100}, 520 {"\n//line \t :42\n line1", "", 42}, 521 {"\n//line File2.go:200\n line200", filepath.Join("dir", "File2.go"), 200}, 522 {"\n//line foo\t:42\n line42", filepath.Join("dir", "foo"), 42}, 523 {"\n //line foo:42\n line44", filepath.Join("dir", "foo"), 44}, // bad line comment, ignored 524 {"\n//line foo 42\n line46", filepath.Join("dir", "foo"), 46}, // bad line comment, ignored 525 {"\n//line foo:42 extra text\n line48", filepath.Join("dir", "foo"), 48}, // bad line comment, ignored 526 {"\n//line ./foo:42\n line42", filepath.Join("dir", "foo"), 42}, 527 {"\n//line a/b/c/File1.go:100\n line100", filepath.Join("dir", "a", "b", "c", "File1.go"), 100}, 528 } 529 530 var unixsegments = []segment{ 531 {"\n//line /bar:42\n line42", "/bar", 42}, 532 } 533 534 var winsegments = []segment{ 535 {"\n//line c:\\bar:42\n line42", "c:\\bar", 42}, 536 {"\n//line c:\\dir\\File1.go:100\n line100", "c:\\dir\\File1.go", 100}, 537 } 538 539 // Verify that comments of the form "//line filename:line" are interpreted correctly. 540 func TestLineComments(t *testing.T) { 541 segs := segments 542 if runtime.GOOS == "windows" { 543 segs = append(segs, winsegments...) 544 } else { 545 segs = append(segs, unixsegments...) 546 } 547 548 // make source 549 var src string 550 for _, e := range segs { 551 src += e.srcline 552 } 553 554 // verify scan 555 var S Scanner 556 f := token.NewFile(filepath.Join("dir", "TestLineComments"), 1, len(src)) 557 S.Init(f, []byte(src), nil, dontInsertCommas) 558 for _, s := range segs { 559 p, _, lit := S.Scan() 560 pos := f.Position(p) 561 checkPosScan(t, lit, p, token.Position{ 562 Filename: s.filename, 563 Offset: pos.Offset, 564 Line: s.line, 565 Column: pos.Column, 566 }) 567 } 568 569 if S.ErrorCount != 0 { 570 t.Errorf("found %d errors", S.ErrorCount) 571 } 572 } 573 574 // Verify that initializing the same scanner more than once works correctly. 575 func TestInit(t *testing.T) { 576 var s Scanner 577 578 // 1st init 579 src1 := "false true { }" 580 f1 := token.NewFile("src1", 1, len(src1)) 581 s.Init(f1, []byte(src1), nil, dontInsertCommas) 582 if f1.Size() != len(src1) { 583 t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1)) 584 } 585 s.Scan() // false 586 s.Scan() // true 587 _, tok, _ := s.Scan() // { 588 if tok != token.LBRACE { 589 t.Errorf("bad token: got %s, expected %s", tok, token.LBRACE) 590 } 591 592 // 2nd init 593 src2 := "null true { ]" 594 f2 := token.NewFile("src2", 1, len(src2)) 595 s.Init(f2, []byte(src2), nil, dontInsertCommas) 596 if f2.Size() != len(src2) { 597 t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2)) 598 } 599 _, tok, _ = s.Scan() // go 600 if tok != token.NULL { 601 t.Errorf("bad token: got %s, expected %s", tok, token.NULL) 602 } 603 604 if s.ErrorCount != 0 { 605 t.Errorf("found %d errors", s.ErrorCount) 606 } 607 } 608 609 func TestScanInterpolation(t *testing.T) { 610 // error handler 611 eh := func(pos token.Pos, msg string, args []interface{}) { 612 msg = fmt.Sprintf(msg, args...) 613 t.Errorf("error handler called (pos = %v, msg = %s)", pos, msg) 614 } 615 trim := func(s string) string { return strings.Trim(s, `#"\()`) } 616 617 sources := []string{ 618 `"first\(first)\\second\(second)"`, 619 `#"first\#(first)\second\#(second)"#`, 620 `"level\( ["foo", "level", level ][2] )end\( end )"`, 621 `##"level\##( ["foo", "level", level ][2] )end\##( end )"##`, 622 `"level\( { "foo": 1, "bar": level } )end\(end)"`, 623 } 624 for i, src := range sources { 625 name := fmt.Sprintf("tsrc%d", i) 626 t.Run(name, func(t *testing.T) { 627 f := token.NewFile(name, 1, len(src)) 628 629 // verify scan 630 var s Scanner 631 s.Init(f, []byte(src), eh, ScanComments) 632 633 count := 0 634 var lit, str string 635 for tok := token.ILLEGAL; tok != token.EOF; { 636 switch tok { 637 case token.LPAREN: 638 count++ 639 case token.RPAREN: 640 if count--; count == 0 { 641 str = trim(s.ResumeInterpolation()) 642 } 643 case token.INTERPOLATION: 644 str = trim(lit) 645 case token.IDENT: 646 if lit != str { 647 t.Errorf("str: got %v; want %v", lit, str) 648 } 649 } 650 _, tok, lit = s.Scan() 651 } 652 }) 653 } 654 } 655 656 func TestStdErrorHander(t *testing.T) { 657 const src = "~\n" + // illegal character, cause an error 658 "~ ~\n" + // two errors on the same line 659 "//line File2:20\n" + 660 "~\n" + // different file, but same line 661 "//line File2:1\n" + 662 "~ ~\n" + // same file, decreasing line number 663 "//line File1:1\n" + 664 "~ ~ ~" // original file, line 1 again 665 666 var list errors.Error 667 eh := func(pos token.Pos, msg string, args []interface{}) { 668 list = errors.Append(list, errors.Newf(pos, msg, args...)) 669 } 670 671 var s Scanner 672 s.Init(token.NewFile("File1", 1, len(src)), []byte(src), eh, dontInsertCommas) 673 for { 674 if _, tok, _ := s.Scan(); tok == token.EOF { 675 break 676 } 677 } 678 679 n := len(errors.Errors(list)) 680 if n != s.ErrorCount { 681 t.Errorf("found %d errors, expected %d", n, s.ErrorCount) 682 } 683 684 if n != 9 { 685 t.Errorf("found %d raw errors, expected 9", n) 686 errors.Print(os.Stderr, list, nil) 687 } 688 689 n = len(errors.Errors(errors.Sanitize(list))) 690 if n != 4 { 691 t.Errorf("found %d one-per-line errors, expected 4", n) 692 errors.Print(os.Stderr, list, nil) 693 } 694 } 695 696 type errorCollector struct { 697 cnt int // number of errors encountered 698 msg string // last error message encountered 699 pos token.Pos // last error position encountered 700 } 701 702 func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err string) { 703 t.Helper() 704 var s Scanner 705 var h errorCollector 706 eh := func(pos token.Pos, msg string, args []interface{}) { 707 h.cnt++ 708 h.msg = fmt.Sprintf(msg, args...) 709 h.pos = pos 710 } 711 s.Init(token.NewFile("", 1, len(src)), []byte(src), eh, ScanComments|dontInsertCommas) 712 _, tok0, lit0 := s.Scan() 713 if tok0 != tok { 714 t.Errorf("%q: got %s, expected %s", src, tok0, tok) 715 } 716 if tok0 != token.ILLEGAL && lit0 != lit { 717 t.Errorf("%q: got literal %q, expected %q", src, lit0, lit) 718 } 719 cnt := 0 720 if err != "" { 721 cnt = 1 722 } 723 if h.cnt != cnt { 724 t.Errorf("%q: got cnt %d, expected %d", src, h.cnt, cnt) 725 } 726 if h.msg != err { 727 t.Errorf("%q: got msg %q, expected %q", src, h.msg, err) 728 } 729 if h.pos.Offset() != pos { 730 t.Errorf("%q: got offset %d, expected %d", src, h.pos.Offset(), pos) 731 } 732 } 733 734 var errorTests = []struct { 735 src string 736 tok token.Token 737 pos int 738 lit string 739 err string 740 }{ 741 {"`", token.ILLEGAL, 0, "", "illegal character U+0060 '`'"}, 742 743 {"\a", token.ILLEGAL, 0, "", "illegal character U+0007"}, 744 {`^`, token.ILLEGAL, 0, "", "illegal character U+005E '^'"}, 745 {`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"}, 746 {`_|`, token.ILLEGAL, 0, "", "illegal token '_|'; expected '_'"}, 747 748 {`@`, token.ATTRIBUTE, 1, `@`, "invalid attribute: expected '('"}, 749 {`@foo`, token.ATTRIBUTE, 4, `@foo`, "invalid attribute: expected '('"}, 750 {`@foo(`, token.ATTRIBUTE, 5, `@foo(`, "attribute missing ')'"}, 751 {`@foo( `, token.ATTRIBUTE, 6, `@foo( `, "attribute missing ')'"}, 752 {`@foo( ""])`, token.ATTRIBUTE, 9, `@foo( ""])`, "unexpected ']'"}, 753 {`@foo(3})`, token.ATTRIBUTE, 7, `@foo(3})`, "unexpected '}'"}, 754 {`@foo(["")])`, token.ATTRIBUTE, 9, `@foo(["")])`, "unexpected ')'"}, 755 {`@foo(""`, token.ATTRIBUTE, 7, `@foo(""`, "attribute missing ')'"}, 756 {`@foo(aa`, token.ATTRIBUTE, 7, `@foo(aa`, "attribute missing ')'"}, 757 {`@foo("\(())")`, token.ATTRIBUTE, 7, `@foo("\(())")`, "interpolation not allowed in attribute"}, 758 759 // {`' '`, STRING, 0, `' '`, ""}, 760 // {"`\0`", STRING, 3, `'\0'`, "illegal character U+0027 ''' in escape sequence"}, 761 // {`'\07'`, STRING, 4, `'\07'`, "illegal character U+0027 ''' in escape sequence"}, 762 {`"\8"`, token.STRING, 2, `"\8"`, "unknown escape sequence"}, 763 {`"\08"`, token.STRING, 3, `"\08"`, "illegal character U+0038 '8' in escape sequence"}, 764 {`"\x"`, token.STRING, 3, `"\x"`, "illegal character U+0022 '\"' in escape sequence"}, 765 {`"\x0"`, token.STRING, 4, `"\x0"`, "illegal character U+0022 '\"' in escape sequence"}, 766 {`"\x0g"`, token.STRING, 4, `"\x0g"`, "illegal character U+0067 'g' in escape sequence"}, 767 {`"\u"`, token.STRING, 3, `"\u"`, "illegal character U+0022 '\"' in escape sequence"}, 768 {`"\u0"`, token.STRING, 4, `"\u0"`, "illegal character U+0022 '\"' in escape sequence"}, 769 {`"\u00"`, token.STRING, 5, `"\u00"`, "illegal character U+0022 '\"' in escape sequence"}, 770 {`"\u000"`, token.STRING, 6, `"\u000"`, "illegal character U+0022 '\"' in escape sequence"}, 771 // {`"\u000`, token.STRING, 6, `"\u000`, "string literal not terminated"}, two errors 772 {`"\u0000"`, token.STRING, 0, `"\u0000"`, ""}, 773 {`"\U"`, token.STRING, 3, `"\U"`, "illegal character U+0022 '\"' in escape sequence"}, 774 {`"\U0"`, token.STRING, 4, `"\U0"`, "illegal character U+0022 '\"' in escape sequence"}, 775 {`"\U00"`, token.STRING, 5, `"\U00"`, "illegal character U+0022 '\"' in escape sequence"}, 776 {`"\U000"`, token.STRING, 6, `"\U000"`, "illegal character U+0022 '\"' in escape sequence"}, 777 {`"\U0000"`, token.STRING, 7, `"\U0000"`, "illegal character U+0022 '\"' in escape sequence"}, 778 {`"\U00000"`, token.STRING, 8, `"\U00000"`, "illegal character U+0022 '\"' in escape sequence"}, 779 {`"\U000000"`, token.STRING, 9, `"\U000000"`, "illegal character U+0022 '\"' in escape sequence"}, 780 {`"\U0000000"`, token.STRING, 10, `"\U0000000"`, "illegal character U+0022 '\"' in escape sequence"}, 781 // {`"\U0000000`, token.STRING, 10, `"\U0000000`, "string literal not terminated"}, // escape sequence not terminated"}, two errors 782 {`"\U00000000"`, token.STRING, 0, `"\U00000000"`, ""}, 783 {`"\Uffffffff"`, token.STRING, 2, `"\Uffffffff"`, "escape sequence is invalid Unicode code point"}, 784 {`'`, token.STRING, 0, `'`, "string literal not terminated"}, 785 {`"`, token.STRING, 0, `"`, "string literal not terminated"}, 786 {`""`, token.STRING, 0, `""`, ""}, 787 {`"abc`, token.STRING, 0, `"abc`, "string literal not terminated"}, 788 {`""abc`, token.STRING, 0, `""`, ""}, 789 {"\"\"\"\nabc", token.STRING, 0, "\"\"\"\nabc", "string literal not terminated"}, 790 {"'''\nabc", token.STRING, 0, "'''\nabc", "string literal not terminated"}, 791 {"\"abc\n", token.STRING, 0, `"abc`, "string literal not terminated"}, 792 {"\"abc\n ", token.STRING, 0, `"abc`, "string literal not terminated"}, 793 {"\"abc\r\n ", token.STRING, 0, "\"abc\r", "string literal not terminated"}, 794 {`#""`, token.STRING, 0, `#""`, "string literal not terminated"}, 795 {`#"""`, token.STRING, 0, `#"""`, `expected newline after multiline quote #"""`}, 796 {`#""#`, token.STRING, 0, `#""#`, ""}, 797 // {"$", IDENT, 0, "$", ""}, // TODO: for root of file? 798 {"#'", token.STRING, 0, "#'", "string literal not terminated"}, 799 {"''", token.STRING, 0, "''", ""}, 800 {"'", token.STRING, 0, "'", "string literal not terminated"}, 801 {`"\("`, token.INTERPOLATION, 0, `"\(`, ""}, 802 {`#"\("#`, token.STRING, 0, `#"\("#`, ""}, 803 {`#"\#("#`, token.INTERPOLATION, 0, `#"\#(`, ""}, 804 {`"\q"`, token.STRING, 2, `"\q"`, "unknown escape sequence"}, 805 {`#"\q"#`, token.STRING, 0, `#"\q"#`, ""}, 806 {`#"\#q"#`, token.STRING, 4, `#"\#q"#`, "unknown escape sequence"}, 807 {"0", token.INT, 0, "0", ""}, 808 {"077", token.INT, 0, "077", "illegal integer number"}, 809 {"078.", token.FLOAT, 0, "078.", ""}, 810 {"07801234567.", token.FLOAT, 0, "07801234567.", ""}, 811 {"078e0", token.FLOAT, 0, "078e0", ""}, 812 {"078", token.INT, 0, "078", "illegal integer number"}, 813 {"07800000009", token.INT, 0, "07800000009", "illegal integer number"}, 814 {"0x", token.INT, 0, "0x", "illegal hexadecimal number"}, 815 {"0X", token.INT, 0, "0X", "illegal hexadecimal number"}, 816 {"0Xbeef_", token.INT, 6, "0Xbeef_", "illegal '_' in number"}, 817 {"0Xbeef__beef", token.INT, 7, "0Xbeef__beef", "illegal '_' in number"}, 818 {"0b", token.INT, 0, "0b", "illegal binary number"}, 819 {"0o", token.INT, 0, "0o", "illegal octal number"}, 820 // {"123456789012345678890_i", IMAG, 21, "123456789012345678890_i", "illegal '_' in number"}, 821 {"\"abc\x00def\"", token.STRING, 4, "\"abc\x00def\"", "illegal character NUL"}, 822 {"\"abc\x80def\"", token.STRING, 4, "\"abc\x80def\"", "illegal UTF-8 encoding"}, 823 {"\ufeff\ufeff", token.ILLEGAL, 3, "\ufeff\ufeff", "illegal byte order mark"}, // only first BOM is ignored 824 {"//\ufeff", token.COMMENT, 2, "//\ufeff", "illegal byte order mark"}, // only first BOM is ignored 825 // {"`a\ufeff`", IDENT, 2, "`a\ufeff`", "illegal byte order mark"}, // only first BOM is ignored 826 {`"` + "abc\ufeffdef" + `"`, token.STRING, 4, `"` + "abc\ufeffdef" + `"`, "illegal byte order mark"}, // only first BOM is ignored 827 } 828 829 func TestScanErrors(t *testing.T) { 830 for _, e := range errorTests { 831 t.Run(e.src, func(t *testing.T) { 832 checkError(t, e.src, e.tok, e.pos, e.lit, e.err) 833 }) 834 } 835 } 836 837 // Verify that no comments show up as literal values when skipping comments. 838 func TestNoLiteralComments(t *testing.T) { 839 var src = ` 840 a: { 841 A: 1 // foo 842 } 843 844 b :: { 845 B: 2 846 // foo 847 } 848 849 c: 3 // foo 850 851 d: 4 852 // foo 853 854 b anycode(): { 855 // foo 856 } 857 ` 858 var s Scanner 859 s.Init(token.NewFile("", 1, len(src)), []byte(src), nil, 0) 860 for { 861 pos, tok, lit := s.Scan() 862 class := tokenclass(tok) 863 if lit != "" && class != keyword && class != literal && tok != token.COMMA { 864 t.Errorf("%s: tok = %s, lit = %q", pos, tok, lit) 865 } 866 if tok <= token.EOF { 867 break 868 } 869 } 870 } 871 872 func BenchmarkScan(b *testing.B) { 873 b.StopTimer() 874 file := token.NewFile("", 1, len(source)) 875 var s Scanner 876 b.StartTimer() 877 for i := 0; i < b.N; i++ { 878 s.Init(file, source, nil, ScanComments) 879 for { 880 _, tok, _ := s.Scan() 881 if tok == token.EOF { 882 break 883 } 884 } 885 } 886 } 887 888 func BenchmarkScanFile(b *testing.B) { 889 b.StopTimer() 890 const filename = "go" 891 src, err := ioutil.ReadFile(filename) 892 if err != nil { 893 panic(err) 894 } 895 file := token.NewFile(filename, 1, len(src)) 896 b.SetBytes(int64(len(src))) 897 var s Scanner 898 b.StartTimer() 899 for i := 0; i < b.N; i++ { 900 s.Init(file, src, nil, ScanComments) 901 for { 902 _, tok, _ := s.Scan() 903 if tok == token.EOF { 904 break 905 } 906 } 907 } 908 }