cuelang.org/go@v0.10.1/cue/scanner/scanner_test.go (about) 1 // Copyright 2018 The CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package scanner 16 17 import ( 18 "fmt" 19 "os" 20 "path/filepath" 21 "runtime" 22 "strings" 23 "testing" 24 25 "github.com/google/go-cmp/cmp" 26 27 "cuelang.org/go/cue/errors" 28 "cuelang.org/go/cue/token" 29 ) 30 31 const /* class */ ( 32 special = iota 33 literal 34 operator 35 keyword 36 ) 37 38 func tokenclass(tok token.Token) int { 39 switch { 40 case tok.IsLiteral(): 41 return literal 42 case tok.IsOperator(): 43 return operator 44 case tok.IsKeyword(): 45 return keyword 46 } 47 return special 48 } 49 50 type elt struct { 51 tok token.Token 52 lit string 53 class int 54 } 55 56 var testTokens = [...]elt{ 57 // Special tokens 58 {token.COMMENT, "// a comment \n", special}, 59 {token.COMMENT, "//\r\n", special}, 60 61 // Attributes 62 {token.ATTRIBUTE, "@foo()", special}, 63 {token.ATTRIBUTE, "@foo(,,)", special}, 64 {token.ATTRIBUTE, "@foo(a)", special}, 65 {token.ATTRIBUTE, "@foo(aa=b)", special}, 66 {token.ATTRIBUTE, "@foo(,a=b)", special}, 67 {token.ATTRIBUTE, `@foo(",a=b")`, special}, 68 {token.ATTRIBUTE, `@foo(##"\(),a=b"##)`, special}, 69 {token.ATTRIBUTE, `@foo("",a="")`, special}, 70 {token.ATTRIBUTE, `@foo(2,bytes,a.b=c)`, special}, 71 {token.ATTRIBUTE, `@foo([{()}]())`, special}, 72 {token.ATTRIBUTE, `@foo("{")`, special}, 73 74 // Identifiers and basic type literals 75 {token.BOTTOM, "_|_", literal}, 76 77 {token.IDENT, "foobar", literal}, 78 {token.IDENT, "$foobar", literal}, 79 {token.IDENT, "#foobar", literal}, 80 // {token.IDENT, "#0", literal}, 81 {token.IDENT, "#", literal}, 82 {token.IDENT, "_foobar", literal}, 83 {token.IDENT, "__foobar", literal}, 84 {token.IDENT, "#_foobar", literal}, 85 {token.IDENT, "_#foobar", literal}, 86 {token.IDENT, "__#foobar", literal}, 87 {token.IDENT, "a۰۱۸", literal}, 88 {token.IDENT, "foo६४", literal}, 89 {token.IDENT, "bar9876", literal}, 90 {token.IDENT, "ŝ", literal}, 91 {token.IDENT, "ŝfoo", literal}, 92 {token.INT, "0", literal}, 93 {token.INT, "1", literal}, 94 {token.INT, "123456789012345678890", literal}, 95 {token.INT, "12345_67890_12345_6788_90", literal}, 96 {token.INT, "1234567M", literal}, 97 {token.INT, "1234567Mi", literal}, 98 {token.INT, "1234567", literal}, 99 {token.INT, ".3Mi", literal}, 100 {token.INT, "3.3Mi", literal}, 101 {token.INT, "0xcafebabe", literal}, 102 {token.INT, "0b1100_1001", literal}, 103 {token.INT, "0o1234567", literal}, 104 {token.FLOAT, "0.", literal}, 105 {token.FLOAT, ".0", literal}, 106 {token.FLOAT, "3.14159265", literal}, 107 {token.FLOAT, "1e0", literal}, 108 {token.FLOAT, "1e+100", literal}, 109 {token.FLOAT, "1e-100", literal}, 110 {token.FLOAT, "1E+100", literal}, 111 {token.FLOAT, "1E-100", literal}, 112 {token.FLOAT, "0e-5", literal}, 113 {token.FLOAT, "0e+100", literal}, 114 {token.FLOAT, "0e-100", literal}, 115 {token.FLOAT, "0E+100", literal}, 116 {token.FLOAT, "0E-100", literal}, 117 {token.FLOAT, "2.71828e-1000", literal}, 118 {token.STRING, "'a'", literal}, 119 {token.STRING, "'\\000'", literal}, 120 {token.STRING, "'\\xFF'", literal}, 121 {token.STRING, "'\\uff16'", literal}, 122 {token.STRING, "'\\uD801'", literal}, 123 {token.STRING, "'\\U0000ff16'", literal}, 124 {token.STRING, "'foobar'", literal}, 125 {token.STRING, `'foo\/bar'`, literal}, 126 {token.STRING, `#" ""#`, literal}, 127 {token.STRING, `#"" "#`, literal}, 128 {token.STRING, `#""hello""#`, literal}, 129 {token.STRING, `##""# "##`, literal}, 130 {token.STRING, `####""###"####`, literal}, 131 {token.STRING, "##\"\"\"\n\"\"\"#\n\"\"\"##", literal}, 132 {token.STRING, `##"####"##`, literal}, 133 {token.STRING, `#"foobar"#`, literal}, 134 {token.STRING, `#" """#`, literal}, 135 {token.STRING, `#"\r"#`, literal}, 136 {token.STRING, `#"\("#`, literal}, 137 {token.STRING, `#"\q"#`, literal}, 138 {token.STRING, `###"\##q"###`, literal}, 139 {token.STRING, "'" + `\r` + "'", literal}, 140 {token.STRING, "'foo" + `\r\n` + "bar'", literal}, 141 {token.STRING, `"foobar"`, literal}, 142 {token.STRING, "\"\"\"\n foobar\n \"\"\"", literal}, 143 {token.STRING, "#\"\"\"\n \\(foobar\n \"\"\"#", literal}, 144 // TODO: should we preserve the \r instead and have it removed by the 145 // literal parser? This would allow preserving \r for formatting without 146 // changing the semantics of evaluation. 147 {token.STRING, "#\"\"\"\r\n \\(foobar\n \"\"\"#", literal}, 148 149 // Operators and delimiters 150 {token.ADD, "+", operator}, 151 {token.SUB, "-", operator}, 152 {token.MUL, "*", operator}, 153 {token.QUO, "/", operator}, 154 155 {token.AND, "&", operator}, 156 {token.OR, "|", operator}, 157 158 {token.LAND, "&&", operator}, 159 {token.LOR, "||", operator}, 160 161 {token.EQL, "==", operator}, 162 {token.LSS, "<", operator}, 163 {token.GTR, ">", operator}, 164 {token.BIND, "=", operator}, 165 {token.NOT, "!", operator}, 166 167 {token.NEQ, "!=", operator}, 168 {token.LEQ, "<=", operator}, 169 {token.GEQ, ">=", operator}, 170 {token.ELLIPSIS, "...", operator}, 171 172 {token.MAT, "=~", operator}, 173 {token.NMAT, "!~", operator}, 174 175 {token.LPAREN, "(", operator}, 176 {token.LBRACK, "[", operator}, 177 {token.LBRACE, "{", operator}, 178 {token.COMMA, ",", operator}, 179 {token.PERIOD, ".", operator}, 180 {token.OPTION, "?", operator}, 181 182 {token.RPAREN, ")", operator}, 183 {token.RBRACK, "]", operator}, 184 {token.RBRACE, "}", operator}, 185 {token.COLON, ":", operator}, 186 187 // Keywords 188 {token.TRUE, "true", keyword}, 189 {token.FALSE, "false", keyword}, 190 {token.NULL, "null", keyword}, 191 192 {token.FOR, "for", keyword}, 193 {token.IF, "if", keyword}, 194 {token.IN, "in", keyword}, 195 } 196 197 const whitespace = " \t \n\n\n" // to separate tokens 198 199 var source = func() []byte { 200 var src []byte 201 for _, t := range testTokens { 202 src = append(src, t.lit...) 203 src = append(src, whitespace...) 204 } 205 return src 206 }() 207 208 func newlineCount(s string) int { 209 n := 0 210 for i := 0; i < len(s); i++ { 211 if s[i] == '\n' { 212 n++ 213 } 214 } 215 return n 216 } 217 218 func checkPosScan(t *testing.T, lit string, p token.Pos, expected token.Position) { 219 pos := p.Position() 220 if pos.Filename != expected.Filename { 221 t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename) 222 } 223 if pos.Offset != expected.Offset { 224 t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset) 225 } 226 if pos.Line != expected.Line { 227 t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line) 228 } 229 if pos.Column != expected.Column { 230 t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column) 231 } 232 } 233 234 // Verify that calling Scan() provides the correct results. 235 func TestScan(t *testing.T) { 236 whitespace_linecount := newlineCount(whitespace) 237 238 // error handler 239 eh := func(_ token.Pos, msg string, args []interface{}) { 240 t.Errorf("error handler called (msg = %s)", fmt.Sprintf(msg, args...)) 241 } 242 243 // verify scan 244 var s Scanner 245 s.Init(token.NewFile("", -1, len(source)), source, eh, ScanComments|DontInsertCommas) 246 247 // set up expected position 248 epos := token.Position{ 249 Filename: "", 250 Offset: 0, 251 Line: 1, 252 Column: 1, 253 } 254 255 index := 0 256 for { 257 pos, tok, lit := s.Scan() 258 259 // check position 260 if tok == token.EOF { 261 // correction for EOF 262 epos.Line = newlineCount(string(source)) 263 epos.Column = 2 264 } 265 checkPosScan(t, lit, pos, epos) 266 267 // check token 268 e := elt{token.EOF, "", special} 269 if index < len(testTokens) { 270 e = testTokens[index] 271 index++ 272 } 273 if tok != e.tok { 274 t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok) 275 } 276 277 // check token class 278 if tokenclass(tok) != e.class { 279 t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class) 280 } 281 282 // check literal 283 elit := "" 284 switch e.tok { 285 case token.COMMENT: 286 // no CRs in comments 287 elit = string(stripCR([]byte(e.lit))) 288 //-style comment literal doesn't contain newline 289 if elit[1] == '/' { 290 elit = elit[0 : len(elit)-1] 291 } 292 case token.ATTRIBUTE: 293 elit = e.lit 294 case token.IDENT: 295 elit = e.lit 296 case token.COMMA: 297 elit = "," 298 default: 299 if e.tok.IsLiteral() { 300 // no CRs in raw string literals 301 elit = e.lit 302 if elit[0] == '`' { 303 elit = string(stripCR([]byte(elit))) 304 } 305 } else if e.tok.IsKeyword() { 306 elit = e.lit 307 } 308 } 309 if lit != elit { 310 t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit) 311 } 312 313 if tok == token.EOF { 314 break 315 } 316 317 // update position 318 epos.Offset += len(e.lit) + len(whitespace) 319 epos.Line += newlineCount(e.lit) + whitespace_linecount 320 321 } 322 323 if s.ErrorCount != 0 { 324 t.Errorf("found %d errors", s.ErrorCount) 325 } 326 } 327 328 func checkComma(t *testing.T, line string, mode Mode) { 329 var S Scanner 330 file := token.NewFile("TestCommas", -1, len(line)) 331 S.Init(file, []byte(line), nil, mode) 332 pos, tok, lit := S.Scan() 333 for tok != token.EOF { 334 if tok == token.ILLEGAL { 335 // the illegal token literal indicates what 336 // kind of semicolon literal to expect 337 commaLit := "\n" 338 if lit[0] == '~' { 339 commaLit = "," 340 } 341 // next token must be a comma 342 commaPos := file.Position(pos) 343 commaPos.Offset++ 344 commaPos.Column++ 345 pos, tok, lit = S.Scan() 346 if tok == token.COMMA { 347 if lit != commaLit { 348 t.Errorf(`bad literal for %q: got %q (%q), expected %q`, line, lit, tok, commaLit) 349 } 350 checkPosScan(t, line, pos, commaPos) 351 } else { 352 t.Errorf("bad token for %q: got %s, expected ','", line, tok) 353 } 354 } else if tok == token.COMMA { 355 t.Errorf("bad token for %q: got ',', expected no ','", line) 356 } 357 pos, tok, lit = S.Scan() 358 } 359 } 360 361 var lines = []string{ 362 // ~ indicates a comma present in the source 363 // ^ indicates an automatically inserted comma 364 "", 365 "\ufeff~,", // first BOM is ignored 366 "~,", 367 "foo^\n", 368 "_foo^\n", 369 "123^\n", 370 "1.2^\n", 371 "'x'^\n", 372 "_|_^\n", 373 "_|_^\n", 374 `"x"` + "^\n", 375 "#'x'#^\n", 376 `""" 377 foo 378 """` + "^\n", 379 // `""" 380 // foo \(bar) 381 // """` + "^\n", 382 `''' 383 foo 384 '''` + "^\n", 385 386 "+\n", 387 "-\n", 388 "*\n", 389 "/\n", 390 391 "&\n", 392 // "&^\n", 393 "|\n", 394 395 "&&\n", 396 "||\n", 397 "<-\n", 398 "->\n", 399 400 "==\n", 401 "<\n", 402 ">\n", 403 "=\n", 404 "!\n", 405 406 "!=\n", 407 "<=\n", 408 ">=\n", 409 ":=\n", 410 "...^\n", 411 412 "(\n", 413 "[\n", 414 "[[\n", 415 "{\n", 416 "{{\n", 417 "~,\n", 418 ".\n", 419 420 ")^\n", 421 "]^\n", 422 "]]^\n", 423 "}^\n", 424 "}}^\n", 425 ":\n", 426 "::\n", 427 ";^\n", 428 429 "true^\n", 430 "false^\n", 431 "null^\n", 432 433 "foo^//comment\n", 434 "foo^//comment", 435 436 "foo ^// comment\n", 437 "foo ^// comment", 438 439 "foo ^", 440 "foo ^//", 441 442 "package main^\n\nfoo: bar^", 443 "package main^", 444 } 445 446 func TestCommas(t *testing.T) { 447 for _, line := range lines { 448 checkComma(t, line, 0) 449 checkComma(t, line, ScanComments) 450 451 // if the input ended in newlines, the input must tokenize the 452 // same with or without those newlines 453 for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- { 454 checkComma(t, line[0:i], 0) 455 checkComma(t, line[0:i], ScanComments) 456 } 457 } 458 } 459 460 func TestRelative(t *testing.T) { 461 test := ` 462 package foo 463 464 // comment 465 a: 1 // a 466 b : 5 467 // line one 468 // line two 469 c 470 : "dfs" 471 , d: "foo" 472 ` 473 want := []string{ 474 `newline IDENT package`, 475 `blank IDENT foo`, 476 "elided , \n", 477 `section COMMENT // comment`, 478 `newline IDENT a`, 479 `nospace : `, 480 `blank INT 1`, 481 "elided , \n", 482 `blank COMMENT // a`, 483 `newline IDENT b`, 484 `blank : `, 485 `blank INT 5`, 486 "elided , \n", 487 "newline COMMENT // line one", 488 "newline COMMENT // line two", 489 `newline IDENT c`, 490 `newline : `, 491 `blank STRING "dfs"`, 492 "newline , ,", 493 "blank IDENT d", 494 "nospace : ", 495 `blank STRING "foo"`, 496 "elided , \n", 497 } 498 var S Scanner 499 f := token.NewFile("TestCommas", -1, len(test)) 500 S.Init(f, []byte(test), nil, ScanComments) 501 pos, tok, lit := S.Scan() 502 got := []string{} 503 for tok != token.EOF { 504 got = append(got, fmt.Sprintf("%-7s %-8s %s", pos.RelPos(), tok, lit)) 505 pos, tok, lit = S.Scan() 506 } 507 if diff := cmp.Diff(got, want); diff != "" { 508 t.Error(diff) 509 } 510 } 511 512 type segment struct { 513 srcline string // a line of source text 514 filename string // filename for current token 515 line int // line number for current token 516 } 517 518 var segments = []segment{ 519 // exactly one token per line since the test consumes one token per segment 520 {" line1", filepath.Join("dir", "TestLineComments"), 1}, 521 {"\nline2", filepath.Join("dir", "TestLineComments"), 2}, 522 {"\nline3 //line File1.go:100", filepath.Join("dir", "TestLineComments"), 3}, // bad line comment, ignored 523 {"\nline4", filepath.Join("dir", "TestLineComments"), 4}, 524 {"\n//line File1.go:100\n line100", filepath.Join("dir", "File1.go"), 100}, 525 {"\n//line \t :42\n line1", "", 42}, 526 {"\n//line File2.go:200\n line200", filepath.Join("dir", "File2.go"), 200}, 527 {"\n//line foo\t:42\n line42", filepath.Join("dir", "foo"), 42}, 528 {"\n //line foo:42\n line44", filepath.Join("dir", "foo"), 44}, // bad line comment, ignored 529 {"\n//line foo 42\n line46", filepath.Join("dir", "foo"), 46}, // bad line comment, ignored 530 {"\n//line foo:42 extra text\n line48", filepath.Join("dir", "foo"), 48}, // bad line comment, ignored 531 {"\n//line ./foo:42\n line42", filepath.Join("dir", "foo"), 42}, 532 {"\n//line a/b/c/File1.go:100\n line100", filepath.Join("dir", "a", "b", "c", "File1.go"), 100}, 533 } 534 535 var unixsegments = []segment{ 536 {"\n//line /bar:42\n line42", "/bar", 42}, 537 } 538 539 var winsegments = []segment{ 540 {"\n//line c:\\bar:42\n line42", "c:\\bar", 42}, 541 {"\n//line c:\\dir\\File1.go:100\n line100", "c:\\dir\\File1.go", 100}, 542 } 543 544 // Verify that comments of the form "//line filename:line" are interpreted correctly. 545 func TestLineComments(t *testing.T) { 546 segs := segments 547 if runtime.GOOS == "windows" { 548 segs = append(segs, winsegments...) 549 } else { 550 segs = append(segs, unixsegments...) 551 } 552 553 // make source 554 var src string 555 for _, e := range segs { 556 src += e.srcline 557 } 558 559 // verify scan 560 var S Scanner 561 f := token.NewFile(filepath.Join("dir", "TestLineComments"), -1, len(src)) 562 S.Init(f, []byte(src), nil, DontInsertCommas) 563 for _, s := range segs { 564 p, _, lit := S.Scan() 565 pos := f.Position(p) 566 checkPosScan(t, lit, p, token.Position{ 567 Filename: s.filename, 568 Offset: pos.Offset, 569 Line: s.line, 570 Column: pos.Column, 571 }) 572 } 573 574 if S.ErrorCount != 0 { 575 t.Errorf("found %d errors", S.ErrorCount) 576 } 577 } 578 579 // Verify that initializing the same scanner more than once works correctly. 580 func TestInit(t *testing.T) { 581 var s Scanner 582 583 // 1st init 584 src1 := "false true { }" 585 f1 := token.NewFile("src1", -1, len(src1)) 586 s.Init(f1, []byte(src1), nil, DontInsertCommas) 587 if f1.Size() != len(src1) { 588 t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1)) 589 } 590 s.Scan() // false 591 s.Scan() // true 592 _, tok, _ := s.Scan() // { 593 if tok != token.LBRACE { 594 t.Errorf("bad token: got %s, expected %s", tok, token.LBRACE) 595 } 596 597 // 2nd init 598 src2 := "null true { ]" 599 f2 := token.NewFile("src2", -1, len(src2)) 600 s.Init(f2, []byte(src2), nil, DontInsertCommas) 601 if f2.Size() != len(src2) { 602 t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2)) 603 } 604 _, tok, _ = s.Scan() // go 605 if tok != token.NULL { 606 t.Errorf("bad token: got %s, expected %s", tok, token.NULL) 607 } 608 609 if s.ErrorCount != 0 { 610 t.Errorf("found %d errors", s.ErrorCount) 611 } 612 } 613 614 func TestScanInterpolation(t *testing.T) { 615 // error handler 616 eh := func(pos token.Pos, msg string, args []interface{}) { 617 msg = fmt.Sprintf(msg, args...) 618 t.Errorf("error handler called (pos = %v, msg = %s)", pos, msg) 619 } 620 trim := func(s string) string { return strings.Trim(s, `#"\()`) } 621 622 sources := []string{ 623 `"first\(first)\\second\(second)"`, 624 `#"first\#(first)\second\#(second)"#`, 625 `"level\( ["foo", "level", level ][2] )end\( end )"`, 626 `##"level\##( ["foo", "level", level ][2] )end\##( end )"##`, 627 `"level\( { "foo": 1, "bar": level } )end\(end)"`, 628 } 629 for i, src := range sources { 630 name := fmt.Sprintf("tsrc%d", i) 631 t.Run(name, func(t *testing.T) { 632 f := token.NewFile(name, -1, len(src)) 633 634 // verify scan 635 var s Scanner 636 s.Init(f, []byte(src), eh, ScanComments) 637 638 count := 0 639 var lit, str string 640 for tok := token.ILLEGAL; tok != token.EOF; { 641 switch tok { 642 case token.LPAREN: 643 count++ 644 case token.RPAREN: 645 if count--; count == 0 { 646 str = trim(s.ResumeInterpolation()) 647 } 648 case token.INTERPOLATION: 649 str = trim(lit) 650 case token.IDENT: 651 if lit != str { 652 t.Errorf("str: got %v; want %v", lit, str) 653 } 654 } 655 _, tok, lit = s.Scan() 656 } 657 }) 658 } 659 } 660 661 func TestStdErrorHander(t *testing.T) { 662 const src = "~\n" + // illegal character, cause an error 663 "~ ~\n" + // two errors on the same line 664 "//line File2:20\n" + 665 "~\n" + // different file, but same line 666 "//line File2:1\n" + 667 "~ ~\n" + // same file, decreasing line number 668 "//line File1:1\n" + 669 "~ ~ ~" // original file, line 1 again 670 671 var list errors.Error 672 eh := func(pos token.Pos, msg string, args []interface{}) { 673 list = errors.Append(list, errors.Newf(pos, msg, args...)) 674 } 675 676 var s Scanner 677 s.Init(token.NewFile("File1", -1, len(src)), []byte(src), eh, DontInsertCommas) 678 for { 679 if _, tok, _ := s.Scan(); tok == token.EOF { 680 break 681 } 682 } 683 684 n := len(errors.Errors(list)) 685 if n != s.ErrorCount { 686 t.Errorf("found %d errors, expected %d", n, s.ErrorCount) 687 } 688 689 if n != 9 { 690 t.Errorf("found %d raw errors, expected 9", n) 691 errors.Print(os.Stderr, list, nil) 692 } 693 694 n = len(errors.Errors(errors.Sanitize(list))) 695 if n != 8 { 696 t.Errorf("found %d one-per-line errors, expected 8", n) 697 errors.Print(os.Stderr, list, nil) 698 } 699 } 700 701 type errorCollector struct { 702 cnt int // number of errors encountered 703 msg string // last error message encountered 704 pos token.Pos // last error position encountered 705 } 706 707 func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err string) { 708 t.Helper() 709 var s Scanner 710 var h errorCollector 711 eh := func(pos token.Pos, msg string, args []interface{}) { 712 h.cnt++ 713 h.msg = fmt.Sprintf(msg, args...) 714 h.pos = pos 715 } 716 s.Init(token.NewFile("", -1, len(src)), []byte(src), eh, ScanComments|DontInsertCommas) 717 _, tok0, lit0 := s.Scan() 718 if tok0 != tok { 719 t.Errorf("%q: got %s, expected %s", src, tok0, tok) 720 } 721 if tok0 != token.ILLEGAL && lit0 != lit { 722 t.Errorf("%q: got literal %q, expected %q", src, lit0, lit) 723 } 724 cnt := 0 725 if err != "" { 726 cnt = 1 727 } 728 if h.cnt != cnt { 729 t.Errorf("%q: got cnt %d, expected %d", src, h.cnt, cnt) 730 } 731 if h.msg != err { 732 t.Errorf("%q: got msg %q, expected %q", src, h.msg, err) 733 } 734 if h.pos.Offset() != pos { 735 t.Errorf("%q: got offset %d, expected %d", src, h.pos.Offset(), pos) 736 } 737 } 738 739 var errorTests = []struct { 740 src string 741 tok token.Token 742 pos int 743 lit string 744 err string 745 }{ 746 {"`", token.ILLEGAL, 0, "", "illegal character U+0060 '`'"}, 747 748 {"\a", token.ILLEGAL, 0, "", "illegal character U+0007"}, 749 {`^`, token.ILLEGAL, 0, "", "illegal character U+005E '^'"}, 750 {`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"}, 751 {`_|`, token.ILLEGAL, 0, "", "illegal token '_|'; expected '_'"}, 752 753 {`@`, token.ATTRIBUTE, 1, `@`, "invalid attribute: expected '('"}, 754 {`@foo`, token.ATTRIBUTE, 4, `@foo`, "invalid attribute: expected '('"}, 755 {`@foo(`, token.ATTRIBUTE, 5, `@foo(`, "attribute missing ')'"}, 756 {`@foo( `, token.ATTRIBUTE, 6, `@foo( `, "attribute missing ')'"}, 757 {`@foo( ""])`, token.ATTRIBUTE, 9, `@foo( ""])`, "unexpected ']'"}, 758 {`@foo(3})`, token.ATTRIBUTE, 7, `@foo(3})`, "unexpected '}'"}, 759 {`@foo(["")])`, token.ATTRIBUTE, 9, `@foo(["")])`, "unexpected ')'"}, 760 {`@foo(""`, token.ATTRIBUTE, 7, `@foo(""`, "attribute missing ')'"}, 761 {`@foo(aa`, token.ATTRIBUTE, 7, `@foo(aa`, "attribute missing ')'"}, 762 {`@foo("\(())")`, token.ATTRIBUTE, 7, `@foo("\(())")`, "interpolation not allowed in attribute"}, 763 764 // {`' '`, STRING, 0, `' '`, ""}, 765 // {"`\0`", STRING, 3, `'\0'`, "illegal character U+0027 ''' in escape sequence"}, 766 // {`'\07'`, STRING, 4, `'\07'`, "illegal character U+0027 ''' in escape sequence"}, 767 {`"\8"`, token.STRING, 2, `"\8"`, "unknown escape sequence"}, 768 {`"\08"`, token.STRING, 3, `"\08"`, "illegal character U+0038 '8' in escape sequence"}, 769 {`"\x"`, token.STRING, 3, `"\x"`, "illegal character U+0022 '\"' in escape sequence"}, 770 {`"\x0"`, token.STRING, 4, `"\x0"`, "illegal character U+0022 '\"' in escape sequence"}, 771 {`"\x0g"`, token.STRING, 4, `"\x0g"`, "illegal character U+0067 'g' in escape sequence"}, 772 {`"\u"`, token.STRING, 3, `"\u"`, "illegal character U+0022 '\"' in escape sequence"}, 773 {`"\u0"`, token.STRING, 4, `"\u0"`, "illegal character U+0022 '\"' in escape sequence"}, 774 {`"\u00"`, token.STRING, 5, `"\u00"`, "illegal character U+0022 '\"' in escape sequence"}, 775 {`"\u000"`, token.STRING, 6, `"\u000"`, "illegal character U+0022 '\"' in escape sequence"}, 776 // {`"\u000`, token.STRING, 6, `"\u000`, "string literal not terminated"}, two errors 777 {`"\u0000"`, token.STRING, 0, `"\u0000"`, ""}, 778 {`"\U"`, token.STRING, 3, `"\U"`, "illegal character U+0022 '\"' in escape sequence"}, 779 {`"\U0"`, token.STRING, 4, `"\U0"`, "illegal character U+0022 '\"' in escape sequence"}, 780 {`"\U00"`, token.STRING, 5, `"\U00"`, "illegal character U+0022 '\"' in escape sequence"}, 781 {`"\U000"`, token.STRING, 6, `"\U000"`, "illegal character U+0022 '\"' in escape sequence"}, 782 {`"\U0000"`, token.STRING, 7, `"\U0000"`, "illegal character U+0022 '\"' in escape sequence"}, 783 {`"\U00000"`, token.STRING, 8, `"\U00000"`, "illegal character U+0022 '\"' in escape sequence"}, 784 {`"\U000000"`, token.STRING, 9, `"\U000000"`, "illegal character U+0022 '\"' in escape sequence"}, 785 {`"\U0000000"`, token.STRING, 10, `"\U0000000"`, "illegal character U+0022 '\"' in escape sequence"}, 786 // {`"\U0000000`, token.STRING, 10, `"\U0000000`, "string literal not terminated"}, // escape sequence not terminated"}, two errors 787 {`"\U00000000"`, token.STRING, 0, `"\U00000000"`, ""}, 788 {`"\Uffffffff"`, token.STRING, 2, `"\Uffffffff"`, "escape sequence is invalid Unicode code point"}, 789 {`'`, token.STRING, 0, `'`, "string literal not terminated"}, 790 {`"`, token.STRING, 0, `"`, "string literal not terminated"}, 791 {`""`, token.STRING, 0, `""`, ""}, 792 {`"abc`, token.STRING, 0, `"abc`, "string literal not terminated"}, 793 {`""abc`, token.STRING, 0, `""`, ""}, 794 {"\"\"\"\nabc", token.STRING, 0, "\"\"\"\nabc", "string literal not terminated"}, 795 {"'''\nabc", token.STRING, 0, "'''\nabc", "string literal not terminated"}, 796 {"\"abc\n", token.STRING, 0, `"abc`, "string literal not terminated"}, 797 {"\"abc\n ", token.STRING, 0, `"abc`, "string literal not terminated"}, 798 {"\"abc\r\n ", token.STRING, 0, "\"abc\r", "string literal not terminated"}, 799 {`#""`, token.STRING, 0, `#""`, "string literal not terminated"}, 800 {`#"""`, token.STRING, 0, `#"""`, `expected newline after multiline quote #"""`}, 801 {`#""#`, token.STRING, 0, `#""#`, ""}, 802 // {"$", IDENT, 0, "$", ""}, // TODO: for root of file? 803 {"#'", token.STRING, 0, "#'", "string literal not terminated"}, 804 {"''", token.STRING, 0, "''", ""}, 805 {"'", token.STRING, 0, "'", "string literal not terminated"}, 806 {`"\("`, token.INTERPOLATION, 0, `"\(`, ""}, 807 {`#"\("#`, token.STRING, 0, `#"\("#`, ""}, 808 {`#"\#("#`, token.INTERPOLATION, 0, `#"\#(`, ""}, 809 {`"\q"`, token.STRING, 2, `"\q"`, "unknown escape sequence"}, 810 {`#"\q"#`, token.STRING, 0, `#"\q"#`, ""}, 811 {`#"\#q"#`, token.STRING, 4, `#"\#q"#`, "unknown escape sequence"}, 812 {"0", token.INT, 0, "0", ""}, 813 {"077", token.INT, 0, "077", "illegal integer number"}, 814 {"078.", token.FLOAT, 0, "078.", ""}, 815 {"07801234567.", token.FLOAT, 0, "07801234567.", ""}, 816 {"078e0", token.FLOAT, 0, "078e0", ""}, 817 {"078", token.INT, 0, "078", "illegal integer number"}, 818 {"07800000009", token.INT, 0, "07800000009", "illegal integer number"}, 819 {"0x", token.INT, 0, "0x", "illegal hexadecimal number"}, 820 {"0X", token.INT, 0, "0X", "illegal hexadecimal number"}, 821 {"0Xbeef_", token.INT, 6, "0Xbeef_", "illegal '_' in number"}, 822 {"0Xbeef__beef", token.INT, 7, "0Xbeef__beef", "illegal '_' in number"}, 823 {"0b", token.INT, 0, "0b", "illegal binary number"}, 824 {"0o", token.INT, 0, "0o", "illegal octal number"}, 825 // {"123456789012345678890_i", IMAG, 21, "123456789012345678890_i", "illegal '_' in number"}, 826 {"\"abc\x00def\"", token.STRING, 4, "\"abc\x00def\"", "illegal character NUL"}, 827 {"\"abc\x80def\"", token.STRING, 4, "\"abc\x80def\"", "illegal UTF-8 encoding"}, 828 {"\ufeff\ufeff", token.ILLEGAL, 3, "\ufeff\ufeff", "illegal byte order mark"}, // only first BOM is ignored 829 {"//\ufeff", token.COMMENT, 2, "//\ufeff", "illegal byte order mark"}, // only first BOM is ignored 830 // {"`a\ufeff`", IDENT, 2, "`a\ufeff`", "illegal byte order mark"}, // only first BOM is ignored 831 {`"` + "abc\ufeffdef" + `"`, token.STRING, 4, `"` + "abc\ufeffdef" + `"`, "illegal byte order mark"}, // only first BOM is ignored 832 } 833 834 func TestScanErrors(t *testing.T) { 835 for _, e := range errorTests { 836 t.Run(e.src, func(t *testing.T) { 837 checkError(t, e.src, e.tok, e.pos, e.lit, e.err) 838 }) 839 } 840 } 841 842 // Verify that no comments show up as literal values when skipping comments. 843 func TestNoLiteralComments(t *testing.T) { 844 var src = ` 845 a: { 846 A: 1 // foo 847 } 848 849 #b: { 850 B: 2 851 // foo 852 } 853 854 c: 3 // foo 855 856 d: 4 857 // foo 858 859 b anycode(): { 860 // foo 861 } 862 ` 863 var s Scanner 864 s.Init(token.NewFile("", -1, len(src)), []byte(src), nil, 0) 865 for { 866 pos, tok, lit := s.Scan() 867 class := tokenclass(tok) 868 if lit != "" && class != keyword && class != literal && tok != token.COMMA { 869 t.Errorf("%s: tok = %s, lit = %q", pos, tok, lit) 870 } 871 if tok <= token.EOF { 872 break 873 } 874 } 875 } 876 877 func BenchmarkScan(b *testing.B) { 878 b.StopTimer() 879 file := token.NewFile("", -1, len(source)) 880 var s Scanner 881 b.StartTimer() 882 for i := 0; i < b.N; i++ { 883 s.Init(file, source, nil, ScanComments) 884 for { 885 _, tok, _ := s.Scan() 886 if tok == token.EOF { 887 break 888 } 889 } 890 } 891 } 892 893 func BenchmarkScanFile(b *testing.B) { 894 b.StopTimer() 895 const filename = "go" 896 src, err := os.ReadFile(filename) 897 if err != nil { 898 panic(err) 899 } 900 file := token.NewFile(filename, -1, len(src)) 901 b.SetBytes(int64(len(src))) 902 var s Scanner 903 b.StartTimer() 904 for i := 0; i < b.N; i++ { 905 s.Init(file, src, nil, ScanComments) 906 for { 907 _, tok, _ := s.Scan() 908 if tok == token.EOF { 909 break 910 } 911 } 912 } 913 }