cuelang.org/go@v0.13.0/cue/scanner/scanner_test.go (about) 1 // Copyright 2018 The CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package scanner 16 17 import ( 18 "fmt" 19 "os" 20 "strings" 21 "testing" 22 23 "github.com/google/go-cmp/cmp" 24 25 "cuelang.org/go/cue/errors" 26 "cuelang.org/go/cue/token" 27 ) 28 29 const /* class */ ( 30 special = iota 31 literal 32 operator 33 keyword 34 ) 35 36 func tokenclass(tok token.Token) int { 37 switch { 38 case tok.IsLiteral(): 39 return literal 40 case tok.IsOperator(): 41 return operator 42 case tok.IsKeyword(): 43 return keyword 44 } 45 return special 46 } 47 48 type elt struct { 49 tok token.Token 50 lit string 51 class int 52 } 53 54 var testTokens = [...]elt{ 55 // Special tokens 56 {token.COMMENT, "// a comment \n", special}, 57 {token.COMMENT, "//\r\n", special}, 58 59 // Attributes 60 {token.ATTRIBUTE, "@foo()", special}, 61 {token.ATTRIBUTE, "@foo(,,)", special}, 62 {token.ATTRIBUTE, "@foo(a)", special}, 63 {token.ATTRIBUTE, "@foo(aa=b)", special}, 64 {token.ATTRIBUTE, "@foo(,a=b)", special}, 65 {token.ATTRIBUTE, `@foo(",a=b")`, special}, 66 {token.ATTRIBUTE, `@foo(##"\(),a=b"##)`, special}, 67 {token.ATTRIBUTE, `@foo("",a="")`, special}, 68 {token.ATTRIBUTE, `@foo(2,bytes,a.b=c)`, special}, 69 {token.ATTRIBUTE, `@foo([{()}]())`, special}, 70 {token.ATTRIBUTE, `@foo("{")`, special}, 71 72 // Identifiers and basic type literals 73 {token.BOTTOM, "_|_", literal}, 74 75 {token.IDENT, "foobar", literal}, 76 {token.IDENT, "$foobar", literal}, 77 {token.IDENT, "#foobar", literal}, 78 // {token.IDENT, "#0", literal}, 79 {token.IDENT, "#", literal}, 80 {token.IDENT, "_foobar", literal}, 81 {token.IDENT, "__foobar", literal}, 82 {token.IDENT, "#_foobar", literal}, 83 {token.IDENT, "_#foobar", literal}, 84 {token.IDENT, "__#foobar", literal}, 85 {token.IDENT, "a۰۱۸", literal}, 86 {token.IDENT, "foo६४", literal}, 87 {token.IDENT, "bar9876", literal}, 88 {token.IDENT, "ŝ", literal}, 89 {token.IDENT, "ŝfoo", literal}, 90 {token.INT, "0", literal}, 91 {token.INT, "1", literal}, 92 {token.INT, "123456789012345678890", literal}, 93 {token.INT, "12345_67890_12345_6788_90", literal}, 94 {token.INT, "1234567M", literal}, 95 {token.INT, "1234567Mi", literal}, 96 {token.INT, "1234567", literal}, 97 {token.INT, ".3Mi", literal}, 98 {token.INT, "3.3Mi", literal}, 99 {token.INT, "0xcafebabe", literal}, 100 {token.INT, "0b1100_1001", literal}, 101 {token.INT, "0o1234567", literal}, 102 {token.FLOAT, "0.", literal}, 103 {token.FLOAT, ".0", literal}, 104 {token.FLOAT, "3.14159265", literal}, 105 {token.FLOAT, "1e0", literal}, 106 {token.FLOAT, "1e+100", literal}, 107 {token.FLOAT, "1e-100", literal}, 108 {token.FLOAT, "1E+100", literal}, 109 {token.FLOAT, "1E-100", literal}, 110 {token.FLOAT, "0e-5", literal}, 111 {token.FLOAT, "0e+100", literal}, 112 {token.FLOAT, "0e-100", literal}, 113 {token.FLOAT, "0E+100", literal}, 114 {token.FLOAT, "0E-100", literal}, 115 {token.FLOAT, "2.71828e-1000", literal}, 116 {token.STRING, "'a'", literal}, 117 {token.STRING, "'\\000'", literal}, 118 {token.STRING, "'\\xFF'", literal}, 119 {token.STRING, "'\\uff16'", literal}, 120 {token.STRING, "'\\uD801'", literal}, 121 {token.STRING, "'\\U0000ff16'", literal}, 122 {token.STRING, "'foobar'", literal}, 123 {token.STRING, `'foo\/bar'`, literal}, 124 {token.STRING, `#" ""#`, literal}, 125 {token.STRING, `#"" "#`, literal}, 126 {token.STRING, `#""hello""#`, literal}, 127 {token.STRING, `##""# "##`, literal}, 128 {token.STRING, `####""###"####`, literal}, 129 {token.STRING, "##\"\"\"\n\"\"\"#\n\"\"\"##", literal}, 130 {token.STRING, `##"####"##`, literal}, 131 {token.STRING, `#"foobar"#`, literal}, 132 {token.STRING, `#" """#`, literal}, 133 {token.STRING, `#"\r"#`, literal}, 134 {token.STRING, `#"\("#`, literal}, 135 {token.STRING, `#"\q"#`, literal}, 136 {token.STRING, `###"\##q"###`, literal}, 137 {token.STRING, "'" + `\r` + "'", literal}, 138 {token.STRING, "'foo" + `\r\n` + "bar'", literal}, 139 {token.STRING, `"foobar"`, literal}, 140 {token.STRING, "\"\"\"\n foobar\n \"\"\"", literal}, 141 {token.STRING, "#\"\"\"\n \\(foobar\n \"\"\"#", literal}, 142 // TODO: should we preserve the \r instead and have it removed by the 143 // literal parser? This would allow preserving \r for formatting without 144 // changing the semantics of evaluation. 145 {token.STRING, "#\"\"\"\r\n \\(foobar\n \"\"\"#", literal}, 146 147 // Operators and delimiters 148 {token.ADD, "+", operator}, 149 {token.SUB, "-", operator}, 150 {token.MUL, "*", operator}, 151 {token.QUO, "/", operator}, 152 153 {token.AND, "&", operator}, 154 {token.OR, "|", operator}, 155 156 {token.LAND, "&&", operator}, 157 {token.LOR, "||", operator}, 158 159 {token.EQL, "==", operator}, 160 {token.LSS, "<", operator}, 161 {token.GTR, ">", operator}, 162 {token.BIND, "=", operator}, 163 {token.NOT, "!", operator}, 164 165 {token.NEQ, "!=", operator}, 166 {token.LEQ, "<=", operator}, 167 {token.GEQ, ">=", operator}, 168 {token.ELLIPSIS, "...", operator}, 169 170 {token.MAT, "=~", operator}, 171 {token.NMAT, "!~", operator}, 172 173 {token.LPAREN, "(", operator}, 174 {token.LBRACK, "[", operator}, 175 {token.LBRACE, "{", operator}, 176 {token.COMMA, ",", operator}, 177 {token.PERIOD, ".", operator}, 178 {token.OPTION, "?", operator}, 179 180 {token.RPAREN, ")", operator}, 181 {token.RBRACK, "]", operator}, 182 {token.RBRACE, "}", operator}, 183 {token.COLON, ":", operator}, 184 185 // Keywords 186 {token.TRUE, "true", keyword}, 187 {token.FALSE, "false", keyword}, 188 {token.NULL, "null", keyword}, 189 190 {token.FOR, "for", keyword}, 191 {token.IF, "if", keyword}, 192 {token.IN, "in", keyword}, 193 } 194 195 const whitespace = " \t \n\n\n" // to separate tokens 196 197 var source = func() []byte { 198 var src []byte 199 for _, t := range testTokens { 200 src = append(src, t.lit...) 201 src = append(src, whitespace...) 202 } 203 return src 204 }() 205 206 func newlineCount(s string) int { 207 n := 0 208 for i := 0; i < len(s); i++ { 209 if s[i] == '\n' { 210 n++ 211 } 212 } 213 return n 214 } 215 216 func checkPosScan(t *testing.T, lit string, p token.Pos, expected token.Position) { 217 pos := p.Position() 218 if pos.Filename != expected.Filename { 219 t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename) 220 } 221 if pos.Offset != expected.Offset { 222 t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset) 223 } 224 if pos.Line != expected.Line { 225 t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line) 226 } 227 if pos.Column != expected.Column { 228 t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column) 229 } 230 } 231 232 // Verify that calling Scan() provides the correct results. 233 func TestScan(t *testing.T) { 234 whitespace_linecount := newlineCount(whitespace) 235 236 // error handler 237 eh := func(_ token.Pos, msg string, args []interface{}) { 238 t.Errorf("error handler called (msg = %s)", fmt.Sprintf(msg, args...)) 239 } 240 241 // verify scan 242 var s Scanner 243 s.Init(token.NewFile("", -1, len(source)), source, eh, ScanComments|DontInsertCommas) 244 245 // set up expected position 246 epos := token.Position{ 247 Filename: "", 248 Offset: 0, 249 Line: 1, 250 Column: 1, 251 } 252 253 index := 0 254 for { 255 pos, tok, lit := s.Scan() 256 257 // check position 258 if tok == token.EOF { 259 // correction for EOF 260 epos.Line = newlineCount(string(source)) 261 epos.Column = 2 262 } 263 checkPosScan(t, lit, pos, epos) 264 265 // check token 266 e := elt{token.EOF, "", special} 267 if index < len(testTokens) { 268 e = testTokens[index] 269 index++ 270 } 271 if tok != e.tok { 272 t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok) 273 } 274 275 // check token class 276 if tokenclass(tok) != e.class { 277 t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class) 278 } 279 280 // check literal 281 elit := "" 282 switch e.tok { 283 case token.COMMENT: 284 // no CRs in comments 285 elit = string(stripCR([]byte(e.lit))) 286 //-style comment literal doesn't contain newline 287 if elit[1] == '/' { 288 elit = elit[0 : len(elit)-1] 289 } 290 case token.ATTRIBUTE: 291 elit = e.lit 292 case token.IDENT: 293 elit = e.lit 294 case token.COMMA: 295 elit = "," 296 default: 297 if e.tok.IsLiteral() { 298 // no CRs in raw string literals 299 elit = e.lit 300 if elit[0] == '`' { 301 elit = string(stripCR([]byte(elit))) 302 } 303 } else if e.tok.IsKeyword() { 304 elit = e.lit 305 } 306 } 307 if lit != elit { 308 t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit) 309 } 310 311 if tok == token.EOF { 312 break 313 } 314 315 // update position 316 epos.Offset += len(e.lit) + len(whitespace) 317 epos.Line += newlineCount(e.lit) + whitespace_linecount 318 319 } 320 321 if s.ErrorCount != 0 { 322 t.Errorf("found %d errors", s.ErrorCount) 323 } 324 } 325 326 func checkComma(t *testing.T, line string, mode Mode) { 327 var S Scanner 328 file := token.NewFile("TestCommas", -1, len(line)) 329 S.Init(file, []byte(line), nil, mode) 330 pos, tok, lit := S.Scan() 331 for tok != token.EOF { 332 if tok == token.ILLEGAL { 333 // the illegal token literal indicates what 334 // kind of semicolon literal to expect 335 commaLit := "\n" 336 if lit[0] == '~' { 337 commaLit = "," 338 } 339 // next token must be a comma 340 commaPos := file.Position(pos) 341 commaPos.Offset++ 342 commaPos.Column++ 343 pos, tok, lit = S.Scan() 344 if tok == token.COMMA { 345 if lit != commaLit { 346 t.Errorf(`bad literal for %q: got %q (%q), expected %q`, line, lit, tok, commaLit) 347 } 348 checkPosScan(t, line, pos, commaPos) 349 } else { 350 t.Errorf("bad token for %q: got %s, expected ','", line, tok) 351 } 352 } else if tok == token.COMMA { 353 t.Errorf("bad token for %q: got ',', expected no ','", line) 354 } 355 pos, tok, lit = S.Scan() 356 } 357 } 358 359 var lines = []string{ 360 // ~ indicates a comma present in the source 361 // ^ indicates an automatically inserted comma 362 "", 363 "\ufeff~,", // first BOM is ignored 364 "~,", 365 "foo^\n", 366 "_foo^\n", 367 "123^\n", 368 "1.2^\n", 369 "'x'^\n", 370 "_|_^\n", 371 "_|_^\n", 372 `"x"` + "^\n", 373 "#'x'#^\n", 374 `""" 375 foo 376 """` + "^\n", 377 // `""" 378 // foo \(bar) 379 // """` + "^\n", 380 `''' 381 foo 382 '''` + "^\n", 383 384 "+\n", 385 "-\n", 386 "*\n", 387 "/\n", 388 389 "&\n", 390 // "&^\n", 391 "|\n", 392 393 "&&\n", 394 "||\n", 395 "<-\n", 396 "->\n", 397 398 "==\n", 399 "<\n", 400 ">\n", 401 "=\n", 402 "!\n", 403 404 "!=\n", 405 "<=\n", 406 ">=\n", 407 ":=\n", 408 "...^\n", 409 410 "(\n", 411 "[\n", 412 "[[\n", 413 "{\n", 414 "{{\n", 415 "~,\n", 416 ".\n", 417 418 ")^\n", 419 "]^\n", 420 "]]^\n", 421 "}^\n", 422 "}}^\n", 423 ":\n", 424 "::\n", 425 ";^\n", 426 427 "true^\n", 428 "false^\n", 429 "null^\n", 430 431 "foo^//comment\n", 432 "foo^//comment", 433 434 "foo ^// comment\n", 435 "foo ^// comment", 436 437 "foo ^", 438 "foo ^//", 439 440 "package main^\n\nfoo: bar^", 441 "package main^", 442 } 443 444 func TestCommas(t *testing.T) { 445 for _, line := range lines { 446 checkComma(t, line, 0) 447 checkComma(t, line, ScanComments) 448 449 // if the input ended in newlines, the input must tokenize the 450 // same with or without those newlines 451 for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- { 452 checkComma(t, line[0:i], 0) 453 checkComma(t, line[0:i], ScanComments) 454 } 455 } 456 } 457 458 func TestRelative(t *testing.T) { 459 test := ` 460 package foo 461 462 // comment 463 a: 1 // a 464 b : 5 465 // line one 466 // line two 467 c 468 : "dfs" 469 , d: "foo" 470 ` 471 want := []string{ 472 `newline IDENT package`, 473 `blank IDENT foo`, 474 "elided , \n", 475 `section COMMENT // comment`, 476 `newline IDENT a`, 477 `nospace : `, 478 `blank INT 1`, 479 "elided , \n", 480 `blank COMMENT // a`, 481 `newline IDENT b`, 482 `blank : `, 483 `blank INT 5`, 484 "elided , \n", 485 "newline COMMENT // line one", 486 "newline COMMENT // line two", 487 `newline IDENT c`, 488 `newline : `, 489 `blank STRING "dfs"`, 490 "newline , ,", 491 "blank IDENT d", 492 "nospace : ", 493 `blank STRING "foo"`, 494 "elided , \n", 495 } 496 var S Scanner 497 f := token.NewFile("TestCommas", -1, len(test)) 498 S.Init(f, []byte(test), nil, ScanComments) 499 pos, tok, lit := S.Scan() 500 got := []string{} 501 for tok != token.EOF { 502 got = append(got, fmt.Sprintf("%-7s %-8s %s", pos.RelPos(), tok, lit)) 503 pos, tok, lit = S.Scan() 504 } 505 if diff := cmp.Diff(got, want); diff != "" { 506 t.Error(diff) 507 } 508 } 509 510 // Verify that initializing the same scanner more than once works correctly. 511 func TestInit(t *testing.T) { 512 var s Scanner 513 514 // 1st init 515 src1 := "false true { }" 516 f1 := token.NewFile("src1", -1, len(src1)) 517 s.Init(f1, []byte(src1), nil, DontInsertCommas) 518 if f1.Size() != len(src1) { 519 t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1)) 520 } 521 s.Scan() // false 522 s.Scan() // true 523 _, tok, _ := s.Scan() // { 524 if tok != token.LBRACE { 525 t.Errorf("bad token: got %s, expected %s", tok, token.LBRACE) 526 } 527 528 // 2nd init 529 src2 := "null true { ]" 530 f2 := token.NewFile("src2", -1, len(src2)) 531 s.Init(f2, []byte(src2), nil, DontInsertCommas) 532 if f2.Size() != len(src2) { 533 t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2)) 534 } 535 _, tok, _ = s.Scan() // go 536 if tok != token.NULL { 537 t.Errorf("bad token: got %s, expected %s", tok, token.NULL) 538 } 539 540 if s.ErrorCount != 0 { 541 t.Errorf("found %d errors", s.ErrorCount) 542 } 543 } 544 545 func TestScanInterpolation(t *testing.T) { 546 // error handler 547 eh := func(pos token.Pos, msg string, args []interface{}) { 548 msg = fmt.Sprintf(msg, args...) 549 t.Errorf("error handler called (pos = %v, msg = %s)", pos, msg) 550 } 551 trim := func(s string) string { return strings.Trim(s, `#"\()`) } 552 553 sources := []string{ 554 `"first\(first)\\second\(second)"`, 555 `#"first\#(first)\second\#(second)"#`, 556 `"level\( ["foo", "level", level ][2] )end\( end )"`, 557 `##"level\##( ["foo", "level", level ][2] )end\##( end )"##`, 558 `"level\( { "foo": 1, "bar": level } )end\(end)"`, 559 } 560 for i, src := range sources { 561 name := fmt.Sprintf("tsrc%d", i) 562 t.Run(name, func(t *testing.T) { 563 f := token.NewFile(name, -1, len(src)) 564 565 // verify scan 566 var s Scanner 567 s.Init(f, []byte(src), eh, ScanComments) 568 569 count := 0 570 var lit, str string 571 for tok := token.ILLEGAL; tok != token.EOF; { 572 switch tok { 573 case token.LPAREN: 574 count++ 575 case token.RPAREN: 576 if count--; count == 0 { 577 str = trim(s.ResumeInterpolation()) 578 } 579 case token.INTERPOLATION: 580 str = trim(lit) 581 case token.IDENT: 582 if lit != str { 583 t.Errorf("str: got %v; want %v", lit, str) 584 } 585 } 586 _, tok, lit = s.Scan() 587 } 588 }) 589 } 590 } 591 592 func TestStdErrorHander(t *testing.T) { 593 const src = "~\n" + // illegal character, cause an error 594 "~ ~\n" + // two errors on the same line 595 "//line File2:20\n" + 596 "~\n" + // different file, but same line 597 "//line File2:1\n" + 598 "~ ~\n" + // same file, decreasing line number 599 "//line File1:1\n" + 600 "~ ~ ~" // original file, line 1 again 601 602 var list errors.Error 603 eh := func(pos token.Pos, msg string, args []interface{}) { 604 list = errors.Append(list, errors.Newf(pos, msg, args...)) 605 } 606 607 var s Scanner 608 s.Init(token.NewFile("File1", -1, len(src)), []byte(src), eh, DontInsertCommas) 609 for { 610 if _, tok, _ := s.Scan(); tok == token.EOF { 611 break 612 } 613 } 614 615 n := len(errors.Errors(list)) 616 if n != s.ErrorCount { 617 t.Errorf("found %d errors, expected %d", n, s.ErrorCount) 618 } 619 620 if n != 9 { 621 t.Errorf("found %d raw errors, expected 9", n) 622 errors.Print(os.Stderr, list, nil) 623 } 624 625 // Note that this is 9 errors when sanitized, and not 8, 626 // as we currently don't support //line comment directives. 627 n = len(errors.Errors(errors.Sanitize(list))) 628 if n != 9 { 629 t.Errorf("found %d one-per-line errors, expected 9", n) 630 errors.Print(os.Stderr, list, nil) 631 } 632 } 633 634 type errorCollector struct { 635 cnt int // number of errors encountered 636 msg string // last error message encountered 637 pos token.Pos // last error position encountered 638 } 639 640 func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err string) { 641 t.Helper() 642 var s Scanner 643 var h errorCollector 644 eh := func(pos token.Pos, msg string, args []interface{}) { 645 h.cnt++ 646 h.msg = fmt.Sprintf(msg, args...) 647 h.pos = pos 648 } 649 s.Init(token.NewFile("", -1, len(src)), []byte(src), eh, ScanComments|DontInsertCommas) 650 _, tok0, lit0 := s.Scan() 651 if tok0 != tok { 652 t.Errorf("%q: got %s, expected %s", src, tok0, tok) 653 } 654 if tok0 != token.ILLEGAL && lit0 != lit { 655 t.Errorf("%q: got literal %q, expected %q", src, lit0, lit) 656 } 657 cnt := 0 658 if err != "" { 659 cnt = 1 660 } 661 if h.cnt != cnt { 662 t.Errorf("%q: got cnt %d, expected %d", src, h.cnt, cnt) 663 } 664 if h.msg != err { 665 t.Errorf("%q: got msg %q, expected %q", src, h.msg, err) 666 } 667 if h.pos.Offset() != pos { 668 t.Errorf("%q: got offset %d, expected %d", src, h.pos.Offset(), pos) 669 } 670 } 671 672 var errorTests = []struct { 673 src string 674 tok token.Token 675 pos int 676 lit string 677 err string 678 }{ 679 {"`", token.ILLEGAL, 0, "", "illegal character U+0060 '`'"}, 680 681 {"\a", token.ILLEGAL, 0, "", "illegal character U+0007"}, 682 {`^`, token.ILLEGAL, 0, "", "illegal character U+005E '^'"}, 683 {`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"}, 684 {`_|`, token.ILLEGAL, 0, "", "illegal token '_|'; expected '_'"}, 685 686 {`@`, token.ATTRIBUTE, 1, `@`, "invalid attribute: expected '('"}, 687 {`@foo`, token.ATTRIBUTE, 4, `@foo`, "invalid attribute: expected '('"}, 688 {`@foo(`, token.ATTRIBUTE, 5, `@foo(`, "attribute missing ')'"}, 689 {`@foo( `, token.ATTRIBUTE, 6, `@foo( `, "attribute missing ')'"}, 690 {`@foo( ""])`, token.ATTRIBUTE, 9, `@foo( ""])`, "unexpected ']'"}, 691 {`@foo(3})`, token.ATTRIBUTE, 7, `@foo(3})`, "unexpected '}'"}, 692 {`@foo(["")])`, token.ATTRIBUTE, 9, `@foo(["")])`, "unexpected ')'"}, 693 {`@foo(""`, token.ATTRIBUTE, 7, `@foo(""`, "attribute missing ')'"}, 694 {`@foo(aa`, token.ATTRIBUTE, 7, `@foo(aa`, "attribute missing ')'"}, 695 {`@foo("\(())")`, token.ATTRIBUTE, 7, `@foo("\(())")`, "interpolation not allowed in attribute"}, 696 697 // {`' '`, STRING, 0, `' '`, ""}, 698 // {"`\0`", STRING, 3, `'\0'`, "illegal character U+0027 ''' in escape sequence"}, 699 // {`'\07'`, STRING, 4, `'\07'`, "illegal character U+0027 ''' in escape sequence"}, 700 {`"\8"`, token.STRING, 2, `"\8"`, "unknown escape sequence"}, 701 {`"\08"`, token.STRING, 3, `"\08"`, "illegal character U+0038 '8' in escape sequence"}, 702 {`"\x"`, token.STRING, 3, `"\x"`, "illegal character U+0022 '\"' in escape sequence"}, 703 {`"\x0"`, token.STRING, 4, `"\x0"`, "illegal character U+0022 '\"' in escape sequence"}, 704 {`"\x0g"`, token.STRING, 4, `"\x0g"`, "illegal character U+0067 'g' in escape sequence"}, 705 {`"\u"`, token.STRING, 3, `"\u"`, "illegal character U+0022 '\"' in escape sequence"}, 706 {`"\u0"`, token.STRING, 4, `"\u0"`, "illegal character U+0022 '\"' in escape sequence"}, 707 {`"\u00"`, token.STRING, 5, `"\u00"`, "illegal character U+0022 '\"' in escape sequence"}, 708 {`"\u000"`, token.STRING, 6, `"\u000"`, "illegal character U+0022 '\"' in escape sequence"}, 709 // {`"\u000`, token.STRING, 6, `"\u000`, "string literal not terminated"}, two errors 710 {`"\u0000"`, token.STRING, 0, `"\u0000"`, ""}, 711 {`"\U"`, token.STRING, 3, `"\U"`, "illegal character U+0022 '\"' in escape sequence"}, 712 {`"\U0"`, token.STRING, 4, `"\U0"`, "illegal character U+0022 '\"' in escape sequence"}, 713 {`"\U00"`, token.STRING, 5, `"\U00"`, "illegal character U+0022 '\"' in escape sequence"}, 714 {`"\U000"`, token.STRING, 6, `"\U000"`, "illegal character U+0022 '\"' in escape sequence"}, 715 {`"\U0000"`, token.STRING, 7, `"\U0000"`, "illegal character U+0022 '\"' in escape sequence"}, 716 {`"\U00000"`, token.STRING, 8, `"\U00000"`, "illegal character U+0022 '\"' in escape sequence"}, 717 {`"\U000000"`, token.STRING, 9, `"\U000000"`, "illegal character U+0022 '\"' in escape sequence"}, 718 {`"\U0000000"`, token.STRING, 10, `"\U0000000"`, "illegal character U+0022 '\"' in escape sequence"}, 719 // {`"\U0000000`, token.STRING, 10, `"\U0000000`, "string literal not terminated"}, // escape sequence not terminated"}, two errors 720 {`"\U00000000"`, token.STRING, 0, `"\U00000000"`, ""}, 721 {`"\Uffffffff"`, token.STRING, 2, `"\Uffffffff"`, "escape sequence is invalid Unicode code point"}, 722 {`'`, token.STRING, 0, `'`, "string literal not terminated"}, 723 {`"`, token.STRING, 0, `"`, "string literal not terminated"}, 724 {`""`, token.STRING, 0, `""`, ""}, 725 {`"abc`, token.STRING, 0, `"abc`, "string literal not terminated"}, 726 {`""abc`, token.STRING, 0, `""`, ""}, 727 {"\"\"\"\nabc", token.STRING, 0, "\"\"\"\nabc", "string literal not terminated"}, 728 {"'''\nabc", token.STRING, 0, "'''\nabc", "string literal not terminated"}, 729 {"\"abc\n", token.STRING, 0, `"abc`, "string literal not terminated"}, 730 {"\"abc\n ", token.STRING, 0, `"abc`, "string literal not terminated"}, 731 {"\"abc\r\n ", token.STRING, 0, "\"abc\r", "string literal not terminated"}, 732 {`#""`, token.STRING, 0, `#""`, "string literal not terminated"}, 733 {`#"""`, token.STRING, 0, `#"""`, `expected newline after multiline quote #"""`}, 734 {`#""#`, token.STRING, 0, `#""#`, ""}, 735 // {"$", IDENT, 0, "$", ""}, // TODO: for root of file? 736 {"#'", token.STRING, 0, "#'", "string literal not terminated"}, 737 {"''", token.STRING, 0, "''", ""}, 738 {"'", token.STRING, 0, "'", "string literal not terminated"}, 739 {`"\("`, token.INTERPOLATION, 0, `"\(`, ""}, 740 {`#"\("#`, token.STRING, 0, `#"\("#`, ""}, 741 {`#"\#("#`, token.INTERPOLATION, 0, `#"\#(`, ""}, 742 {`"\q"`, token.STRING, 2, `"\q"`, "unknown escape sequence"}, 743 {`#"\q"#`, token.STRING, 0, `#"\q"#`, ""}, 744 {`#"\#q"#`, token.STRING, 4, `#"\#q"#`, "unknown escape sequence"}, 745 {"0", token.INT, 0, "0", ""}, 746 {"077", token.INT, 0, "077", "illegal integer number"}, 747 {"078.", token.FLOAT, 0, "078.", ""}, 748 {"07801234567.", token.FLOAT, 0, "07801234567.", ""}, 749 {"078e0", token.FLOAT, 0, "078e0", ""}, 750 {"078", token.INT, 0, "078", "illegal integer number"}, 751 {"07800000009", token.INT, 0, "07800000009", "illegal integer number"}, 752 {"0x", token.INT, 0, "0x", "illegal hexadecimal number"}, 753 {"0X", token.INT, 0, "0X", "illegal hexadecimal number"}, 754 {"0Xbeef_", token.INT, 6, "0Xbeef_", "illegal '_' in number"}, 755 {"0Xbeef__beef", token.INT, 7, "0Xbeef__beef", "illegal '_' in number"}, 756 {"0b", token.INT, 0, "0b", "illegal binary number"}, 757 {"0o", token.INT, 0, "0o", "illegal octal number"}, 758 // {"123456789012345678890_i", IMAG, 21, "123456789012345678890_i", "illegal '_' in number"}, 759 {"\"abc\x00def\"", token.STRING, 4, "\"abc\x00def\"", "illegal character NUL"}, 760 {"\"abc\x80def\"", token.STRING, 4, "\"abc\x80def\"", "illegal UTF-8 encoding"}, 761 {"\ufeff\ufeff", token.ILLEGAL, 3, "\ufeff\ufeff", "illegal byte order mark"}, // only first BOM is ignored 762 {"//\ufeff", token.COMMENT, 2, "//\ufeff", "illegal byte order mark"}, // only first BOM is ignored 763 // {"`a\ufeff`", IDENT, 2, "`a\ufeff`", "illegal byte order mark"}, // only first BOM is ignored 764 {`"` + "abc\ufeffdef" + `"`, token.STRING, 4, `"` + "abc\ufeffdef" + `"`, "illegal byte order mark"}, // only first BOM is ignored 765 } 766 767 func TestScanErrors(t *testing.T) { 768 for _, e := range errorTests { 769 t.Run(e.src, func(t *testing.T) { 770 checkError(t, e.src, e.tok, e.pos, e.lit, e.err) 771 }) 772 } 773 } 774 775 // Verify that no comments show up as literal values when skipping comments. 776 func TestNoLiteralComments(t *testing.T) { 777 var src = ` 778 a: { 779 A: 1 // foo 780 } 781 782 #b: { 783 B: 2 784 // foo 785 } 786 787 c: 3 // foo 788 789 d: 4 790 // foo 791 792 b anycode(): { 793 // foo 794 } 795 ` 796 var s Scanner 797 s.Init(token.NewFile("", -1, len(src)), []byte(src), nil, 0) 798 for { 799 pos, tok, lit := s.Scan() 800 class := tokenclass(tok) 801 if lit != "" && class != keyword && class != literal && tok != token.COMMA { 802 t.Errorf("%s: tok = %s, lit = %q", pos, tok, lit) 803 } 804 if tok <= token.EOF { 805 break 806 } 807 } 808 } 809 810 func BenchmarkScan(b *testing.B) { 811 b.StopTimer() 812 file := token.NewFile("", -1, len(source)) 813 var s Scanner 814 b.StartTimer() 815 for i := 0; i < b.N; i++ { 816 s.Init(file, source, nil, ScanComments) 817 for { 818 _, tok, _ := s.Scan() 819 if tok == token.EOF { 820 break 821 } 822 } 823 } 824 } 825 826 func BenchmarkScanFile(b *testing.B) { 827 b.StopTimer() 828 const filename = "go" 829 src, err := os.ReadFile(filename) 830 if err != nil { 831 panic(err) 832 } 833 file := token.NewFile(filename, -1, len(src)) 834 b.SetBytes(int64(len(src))) 835 var s Scanner 836 b.StartTimer() 837 for i := 0; i < b.N; i++ { 838 s.Init(file, src, nil, ScanComments) 839 for { 840 _, tok, _ := s.Scan() 841 if tok == token.EOF { 842 break 843 } 844 } 845 } 846 }