github.com/evanw/esbuild@v0.21.4/internal/js_lexer/js_lexer.go (about) 1 package js_lexer 2 3 // The lexer converts a source file to a stream of tokens. Unlike many 4 // compilers, esbuild does not run the lexer to completion before the parser is 5 // started. Instead, the lexer is called repeatedly by the parser as the parser 6 // parses the file. This is because many tokens are context-sensitive and need 7 // high-level information from the parser. Examples are regular expression 8 // literals and JSX elements. 9 // 10 // For efficiency, the text associated with textual tokens is stored in two 11 // separate ways depending on the token. Identifiers use UTF-8 encoding which 12 // allows them to be slices of the input file without allocating extra memory. 13 // Strings use UTF-16 encoding so they can represent unicode surrogates 14 // accurately. 15 16 import ( 17 "fmt" 18 "strconv" 19 "strings" 20 "unicode/utf8" 21 22 "github.com/evanw/esbuild/internal/ast" 23 "github.com/evanw/esbuild/internal/config" 24 "github.com/evanw/esbuild/internal/helpers" 25 "github.com/evanw/esbuild/internal/js_ast" 26 "github.com/evanw/esbuild/internal/logger" 27 ) 28 29 type T uint8 30 31 // If you add a new token, remember to add it to "tokenToString" too 32 const ( 33 TEndOfFile T = iota 34 TSyntaxError 35 36 // "#!/usr/bin/env node" 37 THashbang 38 39 // Literals 40 TNoSubstitutionTemplateLiteral // Contents are in lexer.StringLiteral ([]uint16) 41 TNumericLiteral // Contents are in lexer.Number (float64) 42 TStringLiteral // Contents are in lexer.StringLiteral ([]uint16) 43 TBigIntegerLiteral // Contents are in lexer.Identifier (string) 44 45 // Pseudo-literals 46 TTemplateHead // Contents are in lexer.StringLiteral ([]uint16) 47 TTemplateMiddle // Contents are in lexer.StringLiteral ([]uint16) 48 TTemplateTail // Contents are in lexer.StringLiteral ([]uint16) 49 50 // Punctuation 51 TAmpersand 52 TAmpersandAmpersand 53 TAsterisk 54 TAsteriskAsterisk 55 TAt 56 TBar 57 TBarBar 58 TCaret 59 TCloseBrace 60 TCloseBracket 61 TCloseParen 62 TColon 63 TComma 64 TDot 65 TDotDotDot 66 TEqualsEquals 67 TEqualsEqualsEquals 68 TEqualsGreaterThan 69 TExclamation 70 TExclamationEquals 71 TExclamationEqualsEquals 72 TGreaterThan 73 TGreaterThanEquals 74 TGreaterThanGreaterThan 75 TGreaterThanGreaterThanGreaterThan 76 TLessThan 77 TLessThanEquals 78 TLessThanLessThan 79 TMinus 80 TMinusMinus 81 TOpenBrace 82 TOpenBracket 83 TOpenParen 84 TPercent 85 TPlus 86 TPlusPlus 87 TQuestion 88 TQuestionDot 89 TQuestionQuestion 90 TSemicolon 91 TSlash 92 TTilde 93 94 // Assignments (keep in sync with IsAssign() below) 95 TAmpersandAmpersandEquals 96 TAmpersandEquals 97 TAsteriskAsteriskEquals 98 TAsteriskEquals 99 TBarBarEquals 100 TBarEquals 101 TCaretEquals 102 TEquals 103 TGreaterThanGreaterThanEquals 104 TGreaterThanGreaterThanGreaterThanEquals 105 TLessThanLessThanEquals 106 TMinusEquals 107 TPercentEquals 108 TPlusEquals 109 TQuestionQuestionEquals 110 TSlashEquals 111 112 // Class-private fields and methods 113 TPrivateIdentifier 114 115 // Identifiers 116 TIdentifier // Contents are in lexer.Identifier (string) 117 TEscapedKeyword // A keyword that has been escaped as an identifer 118 119 // Reserved words 120 TBreak 121 TCase 122 TCatch 123 TClass 124 TConst 125 TContinue 126 TDebugger 127 TDefault 128 TDelete 129 TDo 130 TElse 131 TEnum 132 TExport 133 TExtends 134 TFalse 135 TFinally 136 TFor 137 TFunction 138 TIf 139 TImport 140 TIn 141 TInstanceof 142 TNew 143 TNull 144 TReturn 145 TSuper 146 TSwitch 147 TThis 148 TThrow 149 TTrue 150 TTry 151 TTypeof 152 TVar 153 TVoid 154 TWhile 155 TWith 156 ) 157 158 func (t T) IsAssign() bool { 159 return t >= TAmpersandAmpersandEquals && t <= TSlashEquals 160 } 161 162 var Keywords = map[string]T{ 163 // Reserved words 164 "break": TBreak, 165 "case": TCase, 166 "catch": TCatch, 167 "class": TClass, 168 "const": TConst, 169 "continue": TContinue, 170 "debugger": TDebugger, 171 "default": TDefault, 172 "delete": TDelete, 173 "do": TDo, 174 "else": TElse, 175 "enum": TEnum, 176 "export": TExport, 177 "extends": TExtends, 178 "false": TFalse, 179 "finally": TFinally, 180 "for": TFor, 181 "function": TFunction, 182 "if": TIf, 183 "import": TImport, 184 "in": TIn, 185 "instanceof": TInstanceof, 186 "new": TNew, 187 "null": TNull, 188 "return": TReturn, 189 "super": TSuper, 190 "switch": TSwitch, 191 "this": TThis, 192 "throw": TThrow, 193 "true": TTrue, 194 "try": TTry, 195 "typeof": TTypeof, 196 "var": TVar, 197 "void": TVoid, 198 "while": TWhile, 199 "with": TWith, 200 } 201 202 var StrictModeReservedWords = map[string]bool{ 203 "implements": true, 204 "interface": true, 205 "let": true, 206 "package": true, 207 "private": true, 208 "protected": true, 209 "public": true, 210 "static": true, 211 "yield": true, 212 } 213 214 // This represents a string that is maybe a substring of the current file's 215 // "source.Contents" string. The point of doing this is that if it is a 216 // substring (the common case), then we can represent it more efficiently. 217 // 218 // For compactness and performance, the JS AST represents identifiers as a 219 // symbol reference instead of as a string. However, we need to track the 220 // string between the first pass and the second pass because the string is only 221 // resolved to a symbol in the second pass. To avoid allocating extra memory 222 // to store the string, we instead use an index+length slice of the original JS 223 // source code. That index is what "Start" represents here. The length is just 224 // "len(String)". 225 // 226 // Set "Start" to invalid (the zero value) if "String" is not a substring of 227 // "source.Contents". This is the case for escaped identifiers. For example, 228 // the identifier "fo\u006f" would be "MaybeSubstring{String: "foo"}". It's 229 // critical that any code changing the "String" also set "Start" to the zero 230 // value, which is best done by just overwriting the whole "MaybeSubstring". 231 // 232 // The substring range used to be recovered automatically from the string but 233 // that relied on the Go "unsafe" package which can hypothetically break under 234 // certain Go compiler optimization passes, so it has been removed and replaced 235 // with this more error-prone approach that doesn't use "unsafe". 236 type MaybeSubstring struct { 237 String string 238 Start ast.Index32 239 } 240 241 type Lexer struct { 242 LegalCommentsBeforeToken []logger.Range 243 CommentsBeforeToken []logger.Range 244 AllComments []logger.Range 245 Identifier MaybeSubstring 246 log logger.Log 247 source logger.Source 248 JSXFactoryPragmaComment logger.Span 249 JSXFragmentPragmaComment logger.Span 250 JSXRuntimePragmaComment logger.Span 251 JSXImportSourcePragmaComment logger.Span 252 SourceMappingURL logger.Span 253 BadArrowInTSXSuggestion string 254 255 // Escape sequences in string literals are decoded lazily because they are 256 // not interpreted inside tagged templates, and tagged templates can contain 257 // invalid escape sequences. If the decoded array is nil, the encoded value 258 // should be passed to "tryToDecodeEscapeSequences" first. 259 decodedStringLiteralOrNil []uint16 260 encodedStringLiteralText string 261 262 errorSuffix string 263 tracker logger.LineColumnTracker 264 265 encodedStringLiteralStart int 266 267 Number float64 268 current int 269 start int 270 end int 271 ApproximateNewlineCount int 272 CouldBeBadArrowInTSX int 273 BadArrowInTSXRange logger.Range 274 LegacyOctalLoc logger.Loc 275 AwaitKeywordLoc logger.Loc 276 FnOrArrowStartLoc logger.Loc 277 PreviousBackslashQuoteInJSX logger.Range 278 LegacyHTMLCommentRange logger.Range 279 codePoint rune 280 prevErrorLoc logger.Loc 281 json JSONFlavor 282 Token T 283 ts config.TSOptions 284 HasNewlineBefore bool 285 HasCommentBefore CommentBefore 286 IsLegacyOctalLiteral bool 287 PrevTokenWasAwaitKeyword bool 288 rescanCloseBraceAsTemplateToken bool 289 forGlobalName bool 290 291 // The log is disabled during speculative scans that may backtrack 292 IsLogDisabled bool 293 } 294 295 type CommentBefore uint8 296 297 const ( 298 PureCommentBefore CommentBefore = 1 << iota 299 KeyCommentBefore 300 NoSideEffectsCommentBefore 301 ) 302 303 type LexerPanic struct{} 304 305 func NewLexer(log logger.Log, source logger.Source, ts config.TSOptions) Lexer { 306 lexer := Lexer{ 307 log: log, 308 source: source, 309 tracker: logger.MakeLineColumnTracker(&source), 310 prevErrorLoc: logger.Loc{Start: -1}, 311 FnOrArrowStartLoc: logger.Loc{Start: -1}, 312 ts: ts, 313 json: NotJSON, 314 } 315 lexer.step() 316 lexer.Next() 317 return lexer 318 } 319 320 func NewLexerGlobalName(log logger.Log, source logger.Source) Lexer { 321 lexer := Lexer{ 322 log: log, 323 source: source, 324 tracker: logger.MakeLineColumnTracker(&source), 325 prevErrorLoc: logger.Loc{Start: -1}, 326 FnOrArrowStartLoc: logger.Loc{Start: -1}, 327 forGlobalName: true, 328 json: NotJSON, 329 } 330 lexer.step() 331 lexer.Next() 332 return lexer 333 } 334 335 type JSONFlavor uint8 336 337 const ( 338 // Specification: https://json.org/ 339 JSON JSONFlavor = iota 340 341 // TypeScript's JSON superset is not documented but appears to allow: 342 // - Comments: https://github.com/microsoft/TypeScript/issues/4987 343 // - Trailing commas 344 // - Full JS number syntax 345 TSConfigJSON 346 347 // This is used by the JavaScript lexer 348 NotJSON 349 ) 350 351 func NewLexerJSON(log logger.Log, source logger.Source, json JSONFlavor, errorSuffix string) Lexer { 352 lexer := Lexer{ 353 log: log, 354 source: source, 355 tracker: logger.MakeLineColumnTracker(&source), 356 prevErrorLoc: logger.Loc{Start: -1}, 357 FnOrArrowStartLoc: logger.Loc{Start: -1}, 358 errorSuffix: errorSuffix, 359 json: json, 360 } 361 lexer.step() 362 lexer.Next() 363 return lexer 364 } 365 366 func (lexer *Lexer) Loc() logger.Loc { 367 return logger.Loc{Start: int32(lexer.start)} 368 } 369 370 func (lexer *Lexer) Range() logger.Range { 371 return logger.Range{Loc: logger.Loc{Start: int32(lexer.start)}, Len: int32(lexer.end - lexer.start)} 372 } 373 374 func (lexer *Lexer) Raw() string { 375 return lexer.source.Contents[lexer.start:lexer.end] 376 } 377 378 func (lexer *Lexer) rawIdentifier() MaybeSubstring { 379 return MaybeSubstring{lexer.Raw(), ast.MakeIndex32(uint32(lexer.start))} 380 } 381 382 func (lexer *Lexer) StringLiteral() []uint16 { 383 if lexer.decodedStringLiteralOrNil == nil { 384 // Lazily decode escape sequences if needed 385 if decoded, ok, end := lexer.tryToDecodeEscapeSequences(lexer.encodedStringLiteralStart, lexer.encodedStringLiteralText, true /* reportErrors */); !ok { 386 lexer.end = end 387 lexer.SyntaxError() 388 } else { 389 lexer.decodedStringLiteralOrNil = decoded 390 } 391 } 392 return lexer.decodedStringLiteralOrNil 393 } 394 395 func (lexer *Lexer) CookedAndRawTemplateContents() ([]uint16, string) { 396 var raw string 397 398 switch lexer.Token { 399 case TNoSubstitutionTemplateLiteral, TTemplateTail: 400 // "`x`" or "}x`" 401 raw = lexer.source.Contents[lexer.start+1 : lexer.end-1] 402 403 case TTemplateHead, TTemplateMiddle: 404 // "`x${" or "}x${" 405 raw = lexer.source.Contents[lexer.start+1 : lexer.end-2] 406 } 407 408 if strings.IndexByte(raw, '\r') != -1 { 409 // From the specification: 410 // 411 // 11.8.6.1 Static Semantics: TV and TRV 412 // 413 // TV excludes the code units of LineContinuation while TRV includes 414 // them. <CR><LF> and <CR> LineTerminatorSequences are normalized to 415 // <LF> for both TV and TRV. An explicit EscapeSequence is needed to 416 // include a <CR> or <CR><LF> sequence. 417 418 bytes := []byte(raw) 419 end := 0 420 i := 0 421 422 for i < len(bytes) { 423 c := bytes[i] 424 i++ 425 426 if c == '\r' { 427 // Convert '\r\n' into '\n' 428 if i < len(bytes) && bytes[i] == '\n' { 429 i++ 430 } 431 432 // Convert '\r' into '\n' 433 c = '\n' 434 } 435 436 bytes[end] = c 437 end++ 438 } 439 440 raw = string(bytes[:end]) 441 } 442 443 // This will return nil on failure, which will become "undefined" for the tag 444 cooked, _, _ := lexer.tryToDecodeEscapeSequences(lexer.start+1, raw, false /* reportErrors */) 445 return cooked, raw 446 } 447 448 func (lexer *Lexer) IsIdentifierOrKeyword() bool { 449 return lexer.Token >= TIdentifier 450 } 451 452 func (lexer *Lexer) IsContextualKeyword(text string) bool { 453 return lexer.Token == TIdentifier && lexer.Raw() == text 454 } 455 456 func (lexer *Lexer) ExpectContextualKeyword(text string) { 457 if !lexer.IsContextualKeyword(text) { 458 lexer.ExpectedString(fmt.Sprintf("%q", text)) 459 } 460 lexer.Next() 461 } 462 463 func (lexer *Lexer) SyntaxError() { 464 loc := logger.Loc{Start: int32(lexer.end)} 465 message := "Unexpected end of file" 466 if lexer.end < len(lexer.source.Contents) { 467 c, _ := utf8.DecodeRuneInString(lexer.source.Contents[lexer.end:]) 468 if c < 0x20 { 469 message = fmt.Sprintf("Syntax error \"\\x%02X\"", c) 470 } else if c >= 0x80 { 471 message = fmt.Sprintf("Syntax error \"\\u{%x}\"", c) 472 } else if c != '"' { 473 message = fmt.Sprintf("Syntax error \"%c\"", c) 474 } else { 475 message = "Syntax error '\"'" 476 } 477 } 478 lexer.addRangeError(logger.Range{Loc: loc}, message) 479 panic(LexerPanic{}) 480 } 481 482 func (lexer *Lexer) ExpectedString(text string) { 483 // Provide a friendly error message about "await" without "async" 484 if lexer.PrevTokenWasAwaitKeyword { 485 var notes []logger.MsgData 486 if lexer.FnOrArrowStartLoc.Start != -1 { 487 note := lexer.tracker.MsgData(logger.Range{Loc: lexer.FnOrArrowStartLoc}, 488 "Consider adding the \"async\" keyword here:") 489 note.Location.Suggestion = "async" 490 notes = []logger.MsgData{note} 491 } 492 lexer.AddRangeErrorWithNotes(RangeOfIdentifier(lexer.source, lexer.AwaitKeywordLoc), 493 "\"await\" can only be used inside an \"async\" function", 494 notes) 495 panic(LexerPanic{}) 496 } 497 498 found := fmt.Sprintf("%q", lexer.Raw()) 499 if lexer.start == len(lexer.source.Contents) { 500 found = "end of file" 501 } 502 503 suggestion := "" 504 if strings.HasPrefix(text, "\"") && strings.HasSuffix(text, "\"") { 505 suggestion = text[1 : len(text)-1] 506 } 507 508 lexer.addRangeErrorWithSuggestion(lexer.Range(), fmt.Sprintf("Expected %s%s but found %s", text, lexer.errorSuffix, found), suggestion) 509 panic(LexerPanic{}) 510 } 511 512 func (lexer *Lexer) Expected(token T) { 513 if text, ok := tokenToString[token]; ok { 514 lexer.ExpectedString(text) 515 } else { 516 lexer.Unexpected() 517 } 518 } 519 520 func (lexer *Lexer) Unexpected() { 521 found := fmt.Sprintf("%q", lexer.Raw()) 522 if lexer.start == len(lexer.source.Contents) { 523 found = "end of file" 524 } 525 lexer.addRangeError(lexer.Range(), fmt.Sprintf("Unexpected %s%s", found, lexer.errorSuffix)) 526 panic(LexerPanic{}) 527 } 528 529 func (lexer *Lexer) Expect(token T) { 530 if lexer.Token != token { 531 lexer.Expected(token) 532 } 533 lexer.Next() 534 } 535 536 func (lexer *Lexer) ExpectOrInsertSemicolon() { 537 if lexer.Token == TSemicolon || (!lexer.HasNewlineBefore && 538 lexer.Token != TCloseBrace && lexer.Token != TEndOfFile) { 539 lexer.Expect(TSemicolon) 540 } 541 } 542 543 // This parses a single "<" token. If that is the first part of a longer token, 544 // this function splits off the first "<" and leaves the remainder of the 545 // current token as another, smaller token. For example, "<<=" becomes "<=". 546 func (lexer *Lexer) ExpectLessThan(isInsideJSXElement bool) { 547 switch lexer.Token { 548 case TLessThan: 549 if isInsideJSXElement { 550 lexer.NextInsideJSXElement() 551 } else { 552 lexer.Next() 553 } 554 555 case TLessThanEquals: 556 lexer.Token = TEquals 557 lexer.start++ 558 lexer.maybeExpandEquals() 559 560 case TLessThanLessThan: 561 lexer.Token = TLessThan 562 lexer.start++ 563 564 case TLessThanLessThanEquals: 565 lexer.Token = TLessThanEquals 566 lexer.start++ 567 568 default: 569 lexer.Expected(TLessThan) 570 } 571 } 572 573 // This parses a single ">" token. If that is the first part of a longer token, 574 // this function splits off the first ">" and leaves the remainder of the 575 // current token as another, smaller token. For example, ">>=" becomes ">=". 576 func (lexer *Lexer) ExpectGreaterThan(isInsideJSXElement bool) { 577 switch lexer.Token { 578 case TGreaterThan: 579 if isInsideJSXElement { 580 lexer.NextInsideJSXElement() 581 } else { 582 lexer.Next() 583 } 584 585 case TGreaterThanEquals: 586 lexer.Token = TEquals 587 lexer.start++ 588 lexer.maybeExpandEquals() 589 590 case TGreaterThanGreaterThan: 591 lexer.Token = TGreaterThan 592 lexer.start++ 593 594 case TGreaterThanGreaterThanEquals: 595 lexer.Token = TGreaterThanEquals 596 lexer.start++ 597 598 case TGreaterThanGreaterThanGreaterThan: 599 lexer.Token = TGreaterThanGreaterThan 600 lexer.start++ 601 602 case TGreaterThanGreaterThanGreaterThanEquals: 603 lexer.Token = TGreaterThanGreaterThanEquals 604 lexer.start++ 605 606 default: 607 lexer.Expected(TGreaterThan) 608 } 609 } 610 611 func (lexer *Lexer) maybeExpandEquals() { 612 switch lexer.codePoint { 613 case '>': 614 // "=" + ">" = "=>" 615 lexer.Token = TEqualsGreaterThan 616 lexer.step() 617 618 case '=': 619 // "=" + "=" = "==" 620 lexer.Token = TEqualsEquals 621 lexer.step() 622 623 if lexer.Token == '=' { 624 // "=" + "==" = "===" 625 lexer.Token = TEqualsEqualsEquals 626 lexer.step() 627 } 628 } 629 } 630 631 func RangeOfIdentifier(source logger.Source, loc logger.Loc) logger.Range { 632 text := source.Contents[loc.Start:] 633 if len(text) == 0 { 634 return logger.Range{Loc: loc, Len: 0} 635 } 636 637 i := 0 638 c, _ := utf8.DecodeRuneInString(text[i:]) 639 640 // Handle private names 641 if c == '#' { 642 i++ 643 c, _ = utf8.DecodeRuneInString(text[i:]) 644 } 645 646 if js_ast.IsIdentifierStart(c) || c == '\\' { 647 // Search for the end of the identifier 648 for i < len(text) { 649 c2, width2 := utf8.DecodeRuneInString(text[i:]) 650 if c2 == '\\' { 651 i += width2 652 653 // Skip over bracketed unicode escapes such as "\u{10000}" 654 if i+2 < len(text) && text[i] == 'u' && text[i+1] == '{' { 655 i += 2 656 for i < len(text) { 657 if text[i] == '}' { 658 i++ 659 break 660 } 661 i++ 662 } 663 } 664 } else if !js_ast.IsIdentifierContinue(c2) { 665 return logger.Range{Loc: loc, Len: int32(i)} 666 } else { 667 i += width2 668 } 669 } 670 } 671 672 // When minifying, this identifier may have originally been a string 673 return source.RangeOfString(loc) 674 } 675 676 type KeyOrValue uint8 677 678 const ( 679 KeyRange KeyOrValue = iota 680 ValueRange 681 KeyAndValueRange 682 ) 683 684 func RangeOfImportAssertOrWith(source logger.Source, assertOrWith ast.AssertOrWithEntry, which KeyOrValue) logger.Range { 685 if which == KeyRange { 686 return RangeOfIdentifier(source, assertOrWith.KeyLoc) 687 } 688 if which == ValueRange { 689 return source.RangeOfString(assertOrWith.ValueLoc) 690 } 691 loc := RangeOfIdentifier(source, assertOrWith.KeyLoc).Loc 692 return logger.Range{Loc: loc, Len: source.RangeOfString(assertOrWith.ValueLoc).End() - loc.Start} 693 } 694 695 func (lexer *Lexer) ExpectJSXElementChild(token T) { 696 if lexer.Token != token { 697 lexer.Expected(token) 698 } 699 lexer.NextJSXElementChild() 700 } 701 702 func (lexer *Lexer) NextJSXElementChild() { 703 lexer.HasNewlineBefore = false 704 originalStart := lexer.end 705 706 for { 707 lexer.start = lexer.end 708 lexer.Token = 0 709 710 switch lexer.codePoint { 711 case -1: // This indicates the end of the file 712 lexer.Token = TEndOfFile 713 714 case '{': 715 lexer.step() 716 lexer.Token = TOpenBrace 717 718 case '<': 719 lexer.step() 720 lexer.Token = TLessThan 721 722 default: 723 needsFixing := false 724 725 stringLiteral: 726 for { 727 switch lexer.codePoint { 728 case -1, '{', '<': 729 // Stop when the string ends 730 break stringLiteral 731 732 case '&', '\r', '\n', '\u2028', '\u2029': 733 // This needs fixing if it has an entity or if it's a multi-line string 734 needsFixing = true 735 lexer.step() 736 737 case '}', '>': 738 // These technically aren't valid JSX: https://facebook.github.io/jsx/ 739 // 740 // JSXTextCharacter : 741 // * SourceCharacter but not one of {, <, > or } 742 // 743 var replacement string 744 if lexer.codePoint == '}' { 745 replacement = "{'}'}" 746 } else { 747 replacement = "{'>'}" 748 } 749 msg := logger.Msg{ 750 Kind: logger.Error, 751 Data: lexer.tracker.MsgData(logger.Range{Loc: logger.Loc{Start: int32(lexer.end)}, Len: 1}, 752 fmt.Sprintf("The character \"%c\" is not valid inside a JSX element", lexer.codePoint)), 753 } 754 755 // Attempt to provide a better error message if this looks like an arrow function 756 if lexer.CouldBeBadArrowInTSX > 0 && lexer.codePoint == '>' && lexer.source.Contents[lexer.end-1] == '=' { 757 msg.Notes = []logger.MsgData{lexer.tracker.MsgData(lexer.BadArrowInTSXRange, 758 "TypeScript's TSX syntax interprets arrow functions with a single generic type parameter as an opening JSX element. "+ 759 "If you want it to be interpreted as an arrow function instead, you need to add a trailing comma after the type parameter to disambiguate:")} 760 msg.Notes[0].Location.Suggestion = lexer.BadArrowInTSXSuggestion 761 } else { 762 msg.Notes = []logger.MsgData{{Text: fmt.Sprintf("Did you mean to escape it as %q instead?", replacement)}} 763 msg.Data.Location.Suggestion = replacement 764 if !lexer.ts.Parse { 765 // TypeScript treats this as an error but Babel doesn't treat this 766 // as an error yet, so allow this in JS for now. Babel version 8 767 // was supposed to be released in 2021 but was never released. If 768 // it's released in the future, this can be changed to an error too. 769 // 770 // More context: 771 // * TypeScript change: https://github.com/microsoft/TypeScript/issues/36341 772 // * Babel 8 change: https://github.com/babel/babel/issues/11042 773 // * Babel 8 release: https://github.com/babel/babel/issues/10746 774 // 775 msg.Kind = logger.Warning 776 } 777 } 778 779 lexer.log.AddMsg(msg) 780 lexer.step() 781 782 default: 783 // Non-ASCII strings need the slow path 784 if lexer.codePoint >= 0x80 { 785 needsFixing = true 786 } 787 lexer.step() 788 } 789 } 790 791 lexer.Token = TStringLiteral 792 text := lexer.source.Contents[originalStart:lexer.end] 793 794 if needsFixing { 795 // Slow path 796 lexer.decodedStringLiteralOrNil = fixWhitespaceAndDecodeJSXEntities(text) 797 798 // Skip this token if it turned out to be empty after trimming 799 if len(lexer.decodedStringLiteralOrNil) == 0 { 800 lexer.HasNewlineBefore = true 801 continue 802 } 803 } else { 804 // Fast path 805 n := len(text) 806 copy := make([]uint16, n) 807 for i := 0; i < n; i++ { 808 copy[i] = uint16(text[i]) 809 } 810 lexer.decodedStringLiteralOrNil = copy 811 } 812 } 813 814 break 815 } 816 } 817 818 func (lexer *Lexer) ExpectInsideJSXElement(token T) { 819 if lexer.Token != token { 820 lexer.Expected(token) 821 } 822 lexer.NextInsideJSXElement() 823 } 824 825 func (lexer *Lexer) NextInsideJSXElement() { 826 lexer.HasNewlineBefore = false 827 828 for { 829 lexer.start = lexer.end 830 lexer.Token = 0 831 832 switch lexer.codePoint { 833 case -1: // This indicates the end of the file 834 lexer.Token = TEndOfFile 835 836 case '\r', '\n', '\u2028', '\u2029': 837 lexer.step() 838 lexer.HasNewlineBefore = true 839 continue 840 841 case '\t', ' ': 842 lexer.step() 843 continue 844 845 case '.': 846 lexer.step() 847 lexer.Token = TDot 848 849 case ':': 850 lexer.step() 851 lexer.Token = TColon 852 853 case '=': 854 lexer.step() 855 lexer.Token = TEquals 856 857 case '{': 858 lexer.step() 859 lexer.Token = TOpenBrace 860 861 case '}': 862 lexer.step() 863 lexer.Token = TCloseBrace 864 865 case '<': 866 lexer.step() 867 lexer.Token = TLessThan 868 869 case '>': 870 lexer.step() 871 lexer.Token = TGreaterThan 872 873 case '/': 874 // '/' or '//' or '/* ... */' 875 lexer.step() 876 switch lexer.codePoint { 877 case '/': 878 singleLineComment: 879 for { 880 lexer.step() 881 switch lexer.codePoint { 882 case '\r', '\n', '\u2028', '\u2029': 883 break singleLineComment 884 885 case -1: // This indicates the end of the file 886 break singleLineComment 887 } 888 } 889 continue 890 891 case '*': 892 lexer.step() 893 startRange := lexer.Range() 894 multiLineComment: 895 for { 896 switch lexer.codePoint { 897 case '*': 898 lexer.step() 899 if lexer.codePoint == '/' { 900 lexer.step() 901 break multiLineComment 902 } 903 904 case '\r', '\n', '\u2028', '\u2029': 905 lexer.step() 906 lexer.HasNewlineBefore = true 907 908 case -1: // This indicates the end of the file 909 lexer.start = lexer.end 910 lexer.AddRangeErrorWithNotes(logger.Range{Loc: lexer.Loc()}, "Expected \"*/\" to terminate multi-line comment", 911 []logger.MsgData{lexer.tracker.MsgData(startRange, "The multi-line comment starts here:")}) 912 panic(LexerPanic{}) 913 914 default: 915 lexer.step() 916 } 917 } 918 continue 919 920 default: 921 lexer.Token = TSlash 922 } 923 924 case '\'', '"': 925 var backslash logger.Range 926 quote := lexer.codePoint 927 needsDecode := false 928 lexer.step() 929 930 stringLiteral: 931 for { 932 switch lexer.codePoint { 933 case -1: // This indicates the end of the file 934 lexer.SyntaxError() 935 936 case '&': 937 needsDecode = true 938 lexer.step() 939 940 case '\\': 941 backslash = logger.Range{Loc: logger.Loc{Start: int32(lexer.end)}, Len: 1} 942 lexer.step() 943 continue 944 945 case quote: 946 if backslash.Len > 0 { 947 backslash.Len++ 948 lexer.PreviousBackslashQuoteInJSX = backslash 949 } 950 lexer.step() 951 break stringLiteral 952 953 default: 954 // Non-ASCII strings need the slow path 955 if lexer.codePoint >= 0x80 { 956 needsDecode = true 957 } 958 lexer.step() 959 } 960 backslash = logger.Range{} 961 } 962 963 lexer.Token = TStringLiteral 964 text := lexer.source.Contents[lexer.start+1 : lexer.end-1] 965 966 if needsDecode { 967 // Slow path 968 lexer.decodedStringLiteralOrNil = decodeJSXEntities([]uint16{}, text) 969 } else { 970 // Fast path 971 n := len(text) 972 copy := make([]uint16, n) 973 for i := 0; i < n; i++ { 974 copy[i] = uint16(text[i]) 975 } 976 lexer.decodedStringLiteralOrNil = copy 977 } 978 979 default: 980 // Check for unusual whitespace characters 981 if js_ast.IsWhitespace(lexer.codePoint) { 982 lexer.step() 983 continue 984 } 985 986 if js_ast.IsIdentifierStart(lexer.codePoint) { 987 lexer.step() 988 for js_ast.IsIdentifierContinue(lexer.codePoint) || lexer.codePoint == '-' { 989 lexer.step() 990 } 991 992 lexer.Identifier = lexer.rawIdentifier() 993 lexer.Token = TIdentifier 994 break 995 } 996 997 lexer.end = lexer.current 998 lexer.Token = TSyntaxError 999 } 1000 1001 return 1002 } 1003 } 1004 1005 func (lexer *Lexer) Next() { 1006 lexer.HasNewlineBefore = lexer.end == 0 1007 lexer.HasCommentBefore = 0 1008 lexer.PrevTokenWasAwaitKeyword = false 1009 lexer.LegalCommentsBeforeToken = lexer.LegalCommentsBeforeToken[:0] 1010 lexer.CommentsBeforeToken = lexer.CommentsBeforeToken[:0] 1011 1012 for { 1013 lexer.start = lexer.end 1014 lexer.Token = 0 1015 1016 switch lexer.codePoint { 1017 case -1: // This indicates the end of the file 1018 lexer.Token = TEndOfFile 1019 1020 case '#': 1021 if lexer.start == 0 && strings.HasPrefix(lexer.source.Contents, "#!") { 1022 // "#!/usr/bin/env node" 1023 lexer.Token = THashbang 1024 hashbang: 1025 for { 1026 lexer.step() 1027 switch lexer.codePoint { 1028 case '\r', '\n', '\u2028', '\u2029': 1029 break hashbang 1030 1031 case -1: // This indicates the end of the file 1032 break hashbang 1033 } 1034 } 1035 lexer.Identifier = lexer.rawIdentifier() 1036 } else { 1037 // "#foo" 1038 lexer.step() 1039 if lexer.codePoint == '\\' { 1040 lexer.Identifier, _ = lexer.scanIdentifierWithEscapes(privateIdentifier) 1041 } else { 1042 if !js_ast.IsIdentifierStart(lexer.codePoint) { 1043 lexer.SyntaxError() 1044 } 1045 lexer.step() 1046 for js_ast.IsIdentifierContinue(lexer.codePoint) { 1047 lexer.step() 1048 } 1049 if lexer.codePoint == '\\' { 1050 lexer.Identifier, _ = lexer.scanIdentifierWithEscapes(privateIdentifier) 1051 } else { 1052 lexer.Identifier = lexer.rawIdentifier() 1053 } 1054 } 1055 lexer.Token = TPrivateIdentifier 1056 } 1057 1058 case '\r', '\n', '\u2028', '\u2029': 1059 lexer.step() 1060 lexer.HasNewlineBefore = true 1061 continue 1062 1063 case '\t', ' ': 1064 lexer.step() 1065 continue 1066 1067 case '(': 1068 lexer.step() 1069 lexer.Token = TOpenParen 1070 1071 case ')': 1072 lexer.step() 1073 lexer.Token = TCloseParen 1074 1075 case '[': 1076 lexer.step() 1077 lexer.Token = TOpenBracket 1078 1079 case ']': 1080 lexer.step() 1081 lexer.Token = TCloseBracket 1082 1083 case '{': 1084 lexer.step() 1085 lexer.Token = TOpenBrace 1086 1087 case '}': 1088 lexer.step() 1089 lexer.Token = TCloseBrace 1090 1091 case ',': 1092 lexer.step() 1093 lexer.Token = TComma 1094 1095 case ':': 1096 lexer.step() 1097 lexer.Token = TColon 1098 1099 case ';': 1100 lexer.step() 1101 lexer.Token = TSemicolon 1102 1103 case '@': 1104 lexer.step() 1105 lexer.Token = TAt 1106 1107 case '~': 1108 lexer.step() 1109 lexer.Token = TTilde 1110 1111 case '?': 1112 // '?' or '?.' or '??' or '??=' 1113 lexer.step() 1114 switch lexer.codePoint { 1115 case '?': 1116 lexer.step() 1117 switch lexer.codePoint { 1118 case '=': 1119 lexer.step() 1120 lexer.Token = TQuestionQuestionEquals 1121 default: 1122 lexer.Token = TQuestionQuestion 1123 } 1124 case '.': 1125 lexer.Token = TQuestion 1126 current := lexer.current 1127 contents := lexer.source.Contents 1128 1129 // Lookahead to disambiguate with 'a?.1:b' 1130 if current < len(contents) { 1131 c := contents[current] 1132 if c < '0' || c > '9' { 1133 lexer.step() 1134 lexer.Token = TQuestionDot 1135 } 1136 } 1137 default: 1138 lexer.Token = TQuestion 1139 } 1140 1141 case '%': 1142 // '%' or '%=' 1143 lexer.step() 1144 switch lexer.codePoint { 1145 case '=': 1146 lexer.step() 1147 lexer.Token = TPercentEquals 1148 default: 1149 lexer.Token = TPercent 1150 } 1151 1152 case '&': 1153 // '&' or '&=' or '&&' or '&&=' 1154 lexer.step() 1155 switch lexer.codePoint { 1156 case '=': 1157 lexer.step() 1158 lexer.Token = TAmpersandEquals 1159 case '&': 1160 lexer.step() 1161 switch lexer.codePoint { 1162 case '=': 1163 lexer.step() 1164 lexer.Token = TAmpersandAmpersandEquals 1165 default: 1166 lexer.Token = TAmpersandAmpersand 1167 } 1168 default: 1169 lexer.Token = TAmpersand 1170 } 1171 1172 case '|': 1173 // '|' or '|=' or '||' or '||=' 1174 lexer.step() 1175 switch lexer.codePoint { 1176 case '=': 1177 lexer.step() 1178 lexer.Token = TBarEquals 1179 case '|': 1180 lexer.step() 1181 switch lexer.codePoint { 1182 case '=': 1183 lexer.step() 1184 lexer.Token = TBarBarEquals 1185 default: 1186 lexer.Token = TBarBar 1187 } 1188 default: 1189 lexer.Token = TBar 1190 } 1191 1192 case '^': 1193 // '^' or '^=' 1194 lexer.step() 1195 switch lexer.codePoint { 1196 case '=': 1197 lexer.step() 1198 lexer.Token = TCaretEquals 1199 default: 1200 lexer.Token = TCaret 1201 } 1202 1203 case '+': 1204 // '+' or '+=' or '++' 1205 lexer.step() 1206 switch lexer.codePoint { 1207 case '=': 1208 lexer.step() 1209 lexer.Token = TPlusEquals 1210 case '+': 1211 lexer.step() 1212 lexer.Token = TPlusPlus 1213 default: 1214 lexer.Token = TPlus 1215 } 1216 1217 case '-': 1218 // '-' or '-=' or '--' or '-->' 1219 lexer.step() 1220 switch lexer.codePoint { 1221 case '=': 1222 lexer.step() 1223 lexer.Token = TMinusEquals 1224 case '-': 1225 lexer.step() 1226 1227 // Handle legacy HTML-style comments 1228 if lexer.codePoint == '>' && lexer.HasNewlineBefore { 1229 lexer.step() 1230 lexer.LegacyHTMLCommentRange = lexer.Range() 1231 lexer.log.AddID(logger.MsgID_JS_HTMLCommentInJS, logger.Warning, &lexer.tracker, lexer.Range(), 1232 "Treating \"-->\" as the start of a legacy HTML single-line comment") 1233 singleLineHTMLCloseComment: 1234 for { 1235 switch lexer.codePoint { 1236 case '\r', '\n', '\u2028', '\u2029': 1237 break singleLineHTMLCloseComment 1238 1239 case -1: // This indicates the end of the file 1240 break singleLineHTMLCloseComment 1241 } 1242 lexer.step() 1243 } 1244 continue 1245 } 1246 1247 lexer.Token = TMinusMinus 1248 default: 1249 lexer.Token = TMinus 1250 if lexer.json == JSON && lexer.codePoint != '.' && (lexer.codePoint < '0' || lexer.codePoint > '9') { 1251 lexer.Unexpected() 1252 } 1253 } 1254 1255 case '*': 1256 // '*' or '*=' or '**' or '**=' 1257 lexer.step() 1258 switch lexer.codePoint { 1259 case '=': 1260 lexer.step() 1261 lexer.Token = TAsteriskEquals 1262 1263 case '*': 1264 lexer.step() 1265 switch lexer.codePoint { 1266 case '=': 1267 lexer.step() 1268 lexer.Token = TAsteriskAsteriskEquals 1269 1270 default: 1271 lexer.Token = TAsteriskAsterisk 1272 } 1273 1274 default: 1275 lexer.Token = TAsterisk 1276 } 1277 1278 case '/': 1279 // '/' or '/=' or '//' or '/* ... */' 1280 lexer.step() 1281 if lexer.forGlobalName { 1282 lexer.Token = TSlash 1283 break 1284 } 1285 switch lexer.codePoint { 1286 case '=': 1287 lexer.step() 1288 lexer.Token = TSlashEquals 1289 1290 case '/': 1291 singleLineComment: 1292 for { 1293 lexer.step() 1294 switch lexer.codePoint { 1295 case '\r', '\n', '\u2028', '\u2029': 1296 break singleLineComment 1297 1298 case -1: // This indicates the end of the file 1299 break singleLineComment 1300 } 1301 } 1302 if lexer.json == JSON { 1303 lexer.addRangeError(lexer.Range(), "JSON does not support comments") 1304 } 1305 lexer.scanCommentText() 1306 continue 1307 1308 case '*': 1309 lexer.step() 1310 startRange := lexer.Range() 1311 multiLineComment: 1312 for { 1313 switch lexer.codePoint { 1314 case '*': 1315 lexer.step() 1316 if lexer.codePoint == '/' { 1317 lexer.step() 1318 break multiLineComment 1319 } 1320 1321 case '\r', '\n', '\u2028', '\u2029': 1322 lexer.step() 1323 lexer.HasNewlineBefore = true 1324 1325 case -1: // This indicates the end of the file 1326 lexer.start = lexer.end 1327 lexer.AddRangeErrorWithNotes(logger.Range{Loc: lexer.Loc()}, "Expected \"*/\" to terminate multi-line comment", 1328 []logger.MsgData{lexer.tracker.MsgData(startRange, "The multi-line comment starts here:")}) 1329 panic(LexerPanic{}) 1330 1331 default: 1332 lexer.step() 1333 } 1334 } 1335 if lexer.json == JSON { 1336 lexer.addRangeError(lexer.Range(), "JSON does not support comments") 1337 } 1338 lexer.scanCommentText() 1339 continue 1340 1341 default: 1342 lexer.Token = TSlash 1343 } 1344 1345 case '=': 1346 // '=' or '=>' or '==' or '===' 1347 lexer.step() 1348 switch lexer.codePoint { 1349 case '>': 1350 lexer.step() 1351 lexer.Token = TEqualsGreaterThan 1352 case '=': 1353 lexer.step() 1354 switch lexer.codePoint { 1355 case '=': 1356 lexer.step() 1357 lexer.Token = TEqualsEqualsEquals 1358 default: 1359 lexer.Token = TEqualsEquals 1360 } 1361 default: 1362 lexer.Token = TEquals 1363 } 1364 1365 case '<': 1366 // '<' or '<<' or '<=' or '<<=' or '<!--' 1367 lexer.step() 1368 switch lexer.codePoint { 1369 case '=': 1370 lexer.step() 1371 lexer.Token = TLessThanEquals 1372 case '<': 1373 lexer.step() 1374 switch lexer.codePoint { 1375 case '=': 1376 lexer.step() 1377 lexer.Token = TLessThanLessThanEquals 1378 default: 1379 lexer.Token = TLessThanLessThan 1380 } 1381 1382 // Handle legacy HTML-style comments 1383 case '!': 1384 if strings.HasPrefix(lexer.source.Contents[lexer.start:], "<!--") { 1385 lexer.step() 1386 lexer.step() 1387 lexer.step() 1388 lexer.LegacyHTMLCommentRange = lexer.Range() 1389 lexer.log.AddID(logger.MsgID_JS_HTMLCommentInJS, logger.Warning, &lexer.tracker, lexer.Range(), 1390 "Treating \"<!--\" as the start of a legacy HTML single-line comment") 1391 singleLineHTMLOpenComment: 1392 for { 1393 switch lexer.codePoint { 1394 case '\r', '\n', '\u2028', '\u2029': 1395 break singleLineHTMLOpenComment 1396 1397 case -1: // This indicates the end of the file 1398 break singleLineHTMLOpenComment 1399 } 1400 lexer.step() 1401 } 1402 continue 1403 } 1404 1405 lexer.Token = TLessThan 1406 1407 default: 1408 lexer.Token = TLessThan 1409 } 1410 1411 case '>': 1412 // '>' or '>>' or '>>>' or '>=' or '>>=' or '>>>=' 1413 lexer.step() 1414 switch lexer.codePoint { 1415 case '=': 1416 lexer.step() 1417 lexer.Token = TGreaterThanEquals 1418 case '>': 1419 lexer.step() 1420 switch lexer.codePoint { 1421 case '=': 1422 lexer.step() 1423 lexer.Token = TGreaterThanGreaterThanEquals 1424 case '>': 1425 lexer.step() 1426 switch lexer.codePoint { 1427 case '=': 1428 lexer.step() 1429 lexer.Token = TGreaterThanGreaterThanGreaterThanEquals 1430 default: 1431 lexer.Token = TGreaterThanGreaterThanGreaterThan 1432 } 1433 default: 1434 lexer.Token = TGreaterThanGreaterThan 1435 } 1436 default: 1437 lexer.Token = TGreaterThan 1438 } 1439 1440 case '!': 1441 // '!' or '!=' or '!==' 1442 lexer.step() 1443 switch lexer.codePoint { 1444 case '=': 1445 lexer.step() 1446 switch lexer.codePoint { 1447 case '=': 1448 lexer.step() 1449 lexer.Token = TExclamationEqualsEquals 1450 default: 1451 lexer.Token = TExclamationEquals 1452 } 1453 default: 1454 lexer.Token = TExclamation 1455 } 1456 1457 case '\'', '"', '`': 1458 quote := lexer.codePoint 1459 needsSlowPath := false 1460 suffixLen := 1 1461 1462 if quote != '`' { 1463 lexer.Token = TStringLiteral 1464 } else if lexer.rescanCloseBraceAsTemplateToken { 1465 lexer.Token = TTemplateTail 1466 } else { 1467 lexer.Token = TNoSubstitutionTemplateLiteral 1468 } 1469 lexer.step() 1470 1471 stringLiteral: 1472 for { 1473 switch lexer.codePoint { 1474 case '\\': 1475 needsSlowPath = true 1476 lexer.step() 1477 1478 // Handle Windows CRLF 1479 if lexer.codePoint == '\r' && lexer.json != JSON { 1480 lexer.step() 1481 if lexer.codePoint == '\n' { 1482 lexer.step() 1483 } 1484 continue 1485 } 1486 1487 case -1: // This indicates the end of the file 1488 lexer.addRangeError(logger.Range{Loc: logger.Loc{Start: int32(lexer.end)}}, "Unterminated string literal") 1489 panic(LexerPanic{}) 1490 1491 case '\r': 1492 if quote != '`' { 1493 lexer.addRangeError(logger.Range{Loc: logger.Loc{Start: int32(lexer.end)}}, "Unterminated string literal") 1494 panic(LexerPanic{}) 1495 } 1496 1497 // Template literals require newline normalization 1498 needsSlowPath = true 1499 1500 case '\n': 1501 if quote != '`' { 1502 lexer.addRangeError(logger.Range{Loc: logger.Loc{Start: int32(lexer.end)}}, "Unterminated string literal") 1503 panic(LexerPanic{}) 1504 } 1505 1506 case '$': 1507 if quote == '`' { 1508 lexer.step() 1509 if lexer.codePoint == '{' { 1510 suffixLen = 2 1511 lexer.step() 1512 if lexer.rescanCloseBraceAsTemplateToken { 1513 lexer.Token = TTemplateMiddle 1514 } else { 1515 lexer.Token = TTemplateHead 1516 } 1517 break stringLiteral 1518 } 1519 continue stringLiteral 1520 } 1521 1522 case quote: 1523 lexer.step() 1524 break stringLiteral 1525 1526 default: 1527 // Non-ASCII strings need the slow path 1528 if lexer.codePoint >= 0x80 { 1529 needsSlowPath = true 1530 } else if lexer.json == JSON && lexer.codePoint < 0x20 { 1531 lexer.SyntaxError() 1532 } 1533 } 1534 lexer.step() 1535 } 1536 1537 text := lexer.source.Contents[lexer.start+1 : lexer.end-suffixLen] 1538 1539 if needsSlowPath { 1540 // Slow path 1541 lexer.decodedStringLiteralOrNil = nil 1542 lexer.encodedStringLiteralStart = lexer.start + 1 1543 lexer.encodedStringLiteralText = text 1544 } else { 1545 // Fast path 1546 n := len(text) 1547 copy := make([]uint16, n) 1548 for i := 0; i < n; i++ { 1549 copy[i] = uint16(text[i]) 1550 } 1551 lexer.decodedStringLiteralOrNil = copy 1552 } 1553 1554 if quote == '\'' && (lexer.json == JSON || lexer.json == TSConfigJSON) { 1555 lexer.addRangeError(lexer.Range(), "JSON strings must use double quotes") 1556 } 1557 1558 // Note: This case is hot in profiles 1559 case '_', '$', 1560 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 1561 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 1562 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 1563 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z': 1564 // This is a fast path for long ASCII identifiers. Doing this in a loop 1565 // first instead of doing "step()" and "js_ast.IsIdentifierContinue()" like we 1566 // do after this is noticeably faster in the common case of ASCII-only 1567 // text. For example, doing this sped up end-to-end consuming of a large 1568 // TypeScript type declaration file from 97ms to 79ms (around 20% faster). 1569 contents := lexer.source.Contents 1570 n := len(contents) 1571 i := lexer.current 1572 for i < n { 1573 c := contents[i] 1574 if (c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && (c < '0' || c > '9') && c != '_' && c != '$' { 1575 break 1576 } 1577 i++ 1578 } 1579 lexer.current = i 1580 1581 // Now do the slow path for any remaining non-ASCII identifier characters 1582 lexer.step() 1583 if lexer.codePoint >= 0x80 { 1584 for js_ast.IsIdentifierContinue(lexer.codePoint) { 1585 lexer.step() 1586 } 1587 } 1588 1589 // If there's a slash, then we're in the extra-slow (and extra-rare) case 1590 // where the identifier has embedded escapes 1591 if lexer.codePoint == '\\' { 1592 lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier) 1593 break 1594 } 1595 1596 // Otherwise (if there was no escape) we can slice the code verbatim 1597 lexer.Identifier = lexer.rawIdentifier() 1598 lexer.Token = Keywords[lexer.Raw()] 1599 if lexer.Token == 0 { 1600 lexer.Token = TIdentifier 1601 } 1602 1603 case '\\': 1604 lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier) 1605 1606 case '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 1607 lexer.parseNumericLiteralOrDot() 1608 1609 default: 1610 // Check for unusual whitespace characters 1611 if js_ast.IsWhitespace(lexer.codePoint) { 1612 lexer.step() 1613 continue 1614 } 1615 1616 if js_ast.IsIdentifierStart(lexer.codePoint) { 1617 lexer.step() 1618 for js_ast.IsIdentifierContinue(lexer.codePoint) { 1619 lexer.step() 1620 } 1621 if lexer.codePoint == '\\' { 1622 lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier) 1623 } else { 1624 lexer.Token = TIdentifier 1625 lexer.Identifier = lexer.rawIdentifier() 1626 } 1627 break 1628 } 1629 1630 lexer.end = lexer.current 1631 lexer.Token = TSyntaxError 1632 } 1633 1634 return 1635 } 1636 } 1637 1638 type identifierKind uint8 1639 1640 const ( 1641 normalIdentifier identifierKind = iota 1642 privateIdentifier 1643 ) 1644 1645 // This is an edge case that doesn't really exist in the wild, so it doesn't 1646 // need to be as fast as possible. 1647 func (lexer *Lexer) scanIdentifierWithEscapes(kind identifierKind) (MaybeSubstring, T) { 1648 // First pass: scan over the identifier to see how long it is 1649 for { 1650 // Scan a unicode escape sequence. There is at least one because that's 1651 // what caused us to get on this slow path in the first place. 1652 if lexer.codePoint == '\\' { 1653 lexer.step() 1654 if lexer.codePoint != 'u' { 1655 lexer.SyntaxError() 1656 } 1657 lexer.step() 1658 if lexer.codePoint == '{' { 1659 // Variable-length 1660 lexer.step() 1661 for lexer.codePoint != '}' { 1662 switch lexer.codePoint { 1663 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 1664 'a', 'b', 'c', 'd', 'e', 'f', 1665 'A', 'B', 'C', 'D', 'E', 'F': 1666 lexer.step() 1667 default: 1668 lexer.SyntaxError() 1669 } 1670 } 1671 lexer.step() 1672 } else { 1673 // Fixed-length 1674 for j := 0; j < 4; j++ { 1675 switch lexer.codePoint { 1676 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 1677 'a', 'b', 'c', 'd', 'e', 'f', 1678 'A', 'B', 'C', 'D', 'E', 'F': 1679 lexer.step() 1680 default: 1681 lexer.SyntaxError() 1682 } 1683 } 1684 } 1685 continue 1686 } 1687 1688 // Stop when we reach the end of the identifier 1689 if !js_ast.IsIdentifierContinue(lexer.codePoint) { 1690 break 1691 } 1692 lexer.step() 1693 } 1694 1695 // Second pass: re-use our existing escape sequence parser 1696 decoded, ok, end := lexer.tryToDecodeEscapeSequences(lexer.start, lexer.Raw(), true /* reportErrors */) 1697 if !ok { 1698 lexer.end = end 1699 lexer.SyntaxError() 1700 } 1701 text := string(helpers.UTF16ToString(decoded)) 1702 1703 // Even though it was escaped, it must still be a valid identifier 1704 identifier := text 1705 if kind == privateIdentifier { 1706 identifier = identifier[1:] // Skip over the "#" 1707 } 1708 if !js_ast.IsIdentifier(identifier) { 1709 lexer.addRangeError(logger.Range{Loc: logger.Loc{Start: int32(lexer.start)}, Len: int32(lexer.end - lexer.start)}, 1710 fmt.Sprintf("Invalid identifier: %q", text)) 1711 } 1712 1713 // Escaped keywords are not allowed to work as actual keywords, but they are 1714 // allowed wherever we allow identifiers or keywords. For example: 1715 // 1716 // // This is an error (equivalent to "var var;") 1717 // var \u0076\u0061\u0072; 1718 // 1719 // // This is an error (equivalent to "var foo;" except for this rule) 1720 // \u0076\u0061\u0072 foo; 1721 // 1722 // // This is an fine (equivalent to "foo.var;") 1723 // foo.\u0076\u0061\u0072; 1724 // 1725 if Keywords[text] != 0 { 1726 return MaybeSubstring{String: text}, TEscapedKeyword 1727 } else { 1728 return MaybeSubstring{String: text}, TIdentifier 1729 } 1730 } 1731 1732 func (lexer *Lexer) parseNumericLiteralOrDot() { 1733 // Number or dot 1734 first := lexer.codePoint 1735 lexer.step() 1736 1737 // Dot without a digit after it 1738 if first == '.' && (lexer.codePoint < '0' || lexer.codePoint > '9') { 1739 // "..." 1740 if lexer.codePoint == '.' && 1741 lexer.current < len(lexer.source.Contents) && 1742 lexer.source.Contents[lexer.current] == '.' { 1743 lexer.step() 1744 lexer.step() 1745 lexer.Token = TDotDotDot 1746 return 1747 } 1748 1749 // "." 1750 lexer.Token = TDot 1751 return 1752 } 1753 1754 underscoreCount := 0 1755 lastUnderscoreEnd := 0 1756 hasDotOrExponent := first == '.' 1757 isMissingDigitAfterDot := false 1758 base := 0.0 1759 lexer.IsLegacyOctalLiteral = false 1760 1761 // Assume this is a number, but potentially change to a bigint later 1762 lexer.Token = TNumericLiteral 1763 1764 // Check for binary, octal, or hexadecimal literal 1765 if first == '0' { 1766 switch lexer.codePoint { 1767 case 'b', 'B': 1768 base = 2 1769 1770 case 'o', 'O': 1771 base = 8 1772 1773 case 'x', 'X': 1774 base = 16 1775 1776 case '0', '1', '2', '3', '4', '5', '6', '7', '_': 1777 base = 8 1778 lexer.IsLegacyOctalLiteral = true 1779 1780 case '8', '9': 1781 lexer.IsLegacyOctalLiteral = true 1782 } 1783 } 1784 1785 if base != 0 { 1786 // Integer literal 1787 isFirst := true 1788 isInvalidLegacyOctalLiteral := false 1789 lexer.Number = 0 1790 if !lexer.IsLegacyOctalLiteral { 1791 lexer.step() 1792 } 1793 1794 integerLiteral: 1795 for { 1796 switch lexer.codePoint { 1797 case '_': 1798 // Cannot have multiple underscores in a row 1799 if lastUnderscoreEnd > 0 && lexer.end == lastUnderscoreEnd+1 { 1800 lexer.SyntaxError() 1801 } 1802 1803 // The first digit must exist 1804 if isFirst || lexer.IsLegacyOctalLiteral { 1805 lexer.SyntaxError() 1806 } 1807 1808 lastUnderscoreEnd = lexer.end 1809 underscoreCount++ 1810 1811 case '0', '1': 1812 lexer.Number = lexer.Number*base + float64(lexer.codePoint-'0') 1813 1814 case '2', '3', '4', '5', '6', '7': 1815 if base == 2 { 1816 lexer.SyntaxError() 1817 } 1818 lexer.Number = lexer.Number*base + float64(lexer.codePoint-'0') 1819 1820 case '8', '9': 1821 if lexer.IsLegacyOctalLiteral { 1822 isInvalidLegacyOctalLiteral = true 1823 } else if base < 10 { 1824 lexer.SyntaxError() 1825 } 1826 lexer.Number = lexer.Number*base + float64(lexer.codePoint-'0') 1827 1828 case 'A', 'B', 'C', 'D', 'E', 'F': 1829 if base != 16 { 1830 lexer.SyntaxError() 1831 } 1832 lexer.Number = lexer.Number*base + float64(lexer.codePoint+10-'A') 1833 1834 case 'a', 'b', 'c', 'd', 'e', 'f': 1835 if base != 16 { 1836 lexer.SyntaxError() 1837 } 1838 lexer.Number = lexer.Number*base + float64(lexer.codePoint+10-'a') 1839 1840 default: 1841 // The first digit must exist 1842 if isFirst { 1843 lexer.SyntaxError() 1844 } 1845 1846 break integerLiteral 1847 } 1848 1849 lexer.step() 1850 isFirst = false 1851 } 1852 1853 isBigIntegerLiteral := lexer.codePoint == 'n' && !hasDotOrExponent 1854 1855 // Slow path: do we need to re-scan the input as text? 1856 if isBigIntegerLiteral || isInvalidLegacyOctalLiteral { 1857 text := lexer.rawIdentifier() 1858 1859 // Can't use a leading zero for bigint literals 1860 if isBigIntegerLiteral && lexer.IsLegacyOctalLiteral { 1861 lexer.SyntaxError() 1862 } 1863 1864 // Filter out underscores 1865 if underscoreCount > 0 { 1866 bytes := make([]byte, 0, len(text.String)-underscoreCount) 1867 for i := 0; i < len(text.String); i++ { 1868 c := text.String[i] 1869 if c != '_' { 1870 bytes = append(bytes, c) 1871 } 1872 } 1873 text = MaybeSubstring{String: string(bytes)} 1874 } 1875 1876 // Store bigints as text to avoid precision loss 1877 if isBigIntegerLiteral { 1878 lexer.Identifier = text 1879 } else if isInvalidLegacyOctalLiteral { 1880 // Legacy octal literals may turn out to be a base 10 literal after all 1881 value, _ := strconv.ParseFloat(text.String, 64) 1882 lexer.Number = value 1883 } 1884 } 1885 } else { 1886 // Floating-point literal 1887 isInvalidLegacyOctalLiteral := first == '0' && (lexer.codePoint == '8' || lexer.codePoint == '9') 1888 1889 // Initial digits 1890 for { 1891 if lexer.codePoint < '0' || lexer.codePoint > '9' { 1892 if lexer.codePoint != '_' { 1893 break 1894 } 1895 1896 // Cannot have multiple underscores in a row 1897 if lastUnderscoreEnd > 0 && lexer.end == lastUnderscoreEnd+1 { 1898 lexer.SyntaxError() 1899 } 1900 1901 // The specification forbids underscores in this case 1902 if isInvalidLegacyOctalLiteral { 1903 lexer.SyntaxError() 1904 } 1905 1906 lastUnderscoreEnd = lexer.end 1907 underscoreCount++ 1908 } 1909 lexer.step() 1910 } 1911 1912 // Fractional digits 1913 if first != '.' && lexer.codePoint == '.' { 1914 // An underscore must not come last 1915 if lastUnderscoreEnd > 0 && lexer.end == lastUnderscoreEnd+1 { 1916 lexer.end-- 1917 lexer.SyntaxError() 1918 } 1919 1920 hasDotOrExponent = true 1921 lexer.step() 1922 if lexer.codePoint == '_' { 1923 lexer.SyntaxError() 1924 } 1925 isMissingDigitAfterDot = true 1926 for { 1927 if lexer.codePoint >= '0' && lexer.codePoint <= '9' { 1928 isMissingDigitAfterDot = false 1929 } else { 1930 if lexer.codePoint != '_' { 1931 break 1932 } 1933 1934 // Cannot have multiple underscores in a row 1935 if lastUnderscoreEnd > 0 && lexer.end == lastUnderscoreEnd+1 { 1936 lexer.SyntaxError() 1937 } 1938 1939 lastUnderscoreEnd = lexer.end 1940 underscoreCount++ 1941 } 1942 lexer.step() 1943 } 1944 } 1945 1946 // Exponent 1947 if lexer.codePoint == 'e' || lexer.codePoint == 'E' { 1948 // An underscore must not come last 1949 if lastUnderscoreEnd > 0 && lexer.end == lastUnderscoreEnd+1 { 1950 lexer.end-- 1951 lexer.SyntaxError() 1952 } 1953 1954 hasDotOrExponent = true 1955 lexer.step() 1956 if lexer.codePoint == '+' || lexer.codePoint == '-' { 1957 lexer.step() 1958 } 1959 if lexer.codePoint < '0' || lexer.codePoint > '9' { 1960 lexer.SyntaxError() 1961 } 1962 for { 1963 if lexer.codePoint < '0' || lexer.codePoint > '9' { 1964 if lexer.codePoint != '_' { 1965 break 1966 } 1967 1968 // Cannot have multiple underscores in a row 1969 if lastUnderscoreEnd > 0 && lexer.end == lastUnderscoreEnd+1 { 1970 lexer.SyntaxError() 1971 } 1972 1973 lastUnderscoreEnd = lexer.end 1974 underscoreCount++ 1975 } 1976 lexer.step() 1977 } 1978 } 1979 1980 // Take a slice of the text to parse 1981 text := lexer.rawIdentifier() 1982 1983 // Filter out underscores 1984 if underscoreCount > 0 { 1985 bytes := make([]byte, 0, len(text.String)-underscoreCount) 1986 for i := 0; i < len(text.String); i++ { 1987 c := text.String[i] 1988 if c != '_' { 1989 bytes = append(bytes, c) 1990 } 1991 } 1992 text = MaybeSubstring{String: string(bytes)} 1993 } 1994 1995 if lexer.codePoint == 'n' && !hasDotOrExponent { 1996 // The only bigint literal that can start with 0 is "0n" 1997 if len(text.String) > 1 && first == '0' { 1998 lexer.SyntaxError() 1999 } 2000 2001 // Store bigints as text to avoid precision loss 2002 lexer.Identifier = text 2003 } else if !hasDotOrExponent && lexer.end-lexer.start < 10 { 2004 // Parse a 32-bit integer (very fast path) 2005 var number uint32 = 0 2006 for _, c := range text.String { 2007 number = number*10 + uint32(c-'0') 2008 } 2009 lexer.Number = float64(number) 2010 } else { 2011 // Parse a double-precision floating-point number 2012 value, _ := strconv.ParseFloat(text.String, 64) 2013 lexer.Number = value 2014 } 2015 } 2016 2017 // An underscore must not come last 2018 if lastUnderscoreEnd > 0 && lexer.end == lastUnderscoreEnd+1 { 2019 lexer.end-- 2020 lexer.SyntaxError() 2021 } 2022 2023 // Handle bigint literals after the underscore-at-end check above 2024 if lexer.codePoint == 'n' && !hasDotOrExponent { 2025 lexer.Token = TBigIntegerLiteral 2026 lexer.step() 2027 } 2028 2029 // Identifiers can't occur immediately after numbers 2030 if js_ast.IsIdentifierStart(lexer.codePoint) { 2031 lexer.SyntaxError() 2032 } 2033 2034 // None of these are allowed in JSON 2035 if lexer.json == JSON && (first == '.' || base != 0 || underscoreCount > 0 || isMissingDigitAfterDot) { 2036 lexer.Unexpected() 2037 } 2038 } 2039 2040 func (lexer *Lexer) ScanRegExp() { 2041 validateAndStep := func() { 2042 if lexer.codePoint == '\\' { 2043 lexer.step() 2044 } 2045 2046 switch lexer.codePoint { 2047 case -1, // This indicates the end of the file 2048 '\r', '\n', 0x2028, 0x2029: // Newlines aren't allowed in regular expressions 2049 lexer.addRangeError(logger.Range{Loc: logger.Loc{Start: int32(lexer.end)}}, "Unterminated regular expression") 2050 panic(LexerPanic{}) 2051 2052 default: 2053 lexer.step() 2054 } 2055 } 2056 2057 for { 2058 switch lexer.codePoint { 2059 case '/': 2060 lexer.step() 2061 bits := uint32(0) 2062 for js_ast.IsIdentifierContinue(lexer.codePoint) { 2063 switch lexer.codePoint { 2064 case 'd', 'g', 'i', 'm', 's', 'u', 'v', 'y': 2065 bit := uint32(1) << uint32(lexer.codePoint-'a') 2066 if (bit & bits) != 0 { 2067 // Reject duplicate flags 2068 r1 := logger.Range{Loc: logger.Loc{Start: int32(lexer.start)}, Len: 1} 2069 r2 := logger.Range{Loc: logger.Loc{Start: int32(lexer.end)}, Len: 1} 2070 for r1.Loc.Start < r2.Loc.Start && lexer.source.Contents[r1.Loc.Start] != byte(lexer.codePoint) { 2071 r1.Loc.Start++ 2072 } 2073 lexer.log.AddErrorWithNotes(&lexer.tracker, r2, 2074 fmt.Sprintf("Duplicate flag \"%c\" in regular expression", lexer.codePoint), 2075 []logger.MsgData{lexer.tracker.MsgData(r1, 2076 fmt.Sprintf("The first \"%c\" was here:", lexer.codePoint))}) 2077 } else { 2078 bits |= bit 2079 } 2080 lexer.step() 2081 2082 default: 2083 lexer.SyntaxError() 2084 } 2085 } 2086 return 2087 2088 case '[': 2089 lexer.step() 2090 for lexer.codePoint != ']' { 2091 validateAndStep() 2092 } 2093 lexer.step() 2094 2095 default: 2096 validateAndStep() 2097 } 2098 } 2099 } 2100 2101 func decodeJSXEntities(decoded []uint16, text string) []uint16 { 2102 i := 0 2103 2104 for i < len(text) { 2105 c, width := utf8.DecodeRuneInString(text[i:]) 2106 i += width 2107 2108 if c == '&' { 2109 length := strings.IndexByte(text[i:], ';') 2110 if length > 0 { 2111 entity := text[i : i+length] 2112 if entity[0] == '#' { 2113 number := entity[1:] 2114 base := 10 2115 if len(number) > 1 && number[0] == 'x' { 2116 number = number[1:] 2117 base = 16 2118 } 2119 if value, err := strconv.ParseInt(number, base, 32); err == nil { 2120 c = rune(value) 2121 i += length + 1 2122 } 2123 } else if value, ok := jsxEntity[entity]; ok { 2124 c = value 2125 i += length + 1 2126 } 2127 } 2128 } 2129 2130 if c <= 0xFFFF { 2131 decoded = append(decoded, uint16(c)) 2132 } else { 2133 c -= 0x10000 2134 decoded = append(decoded, uint16(0xD800+((c>>10)&0x3FF)), uint16(0xDC00+(c&0x3FF))) 2135 } 2136 } 2137 2138 return decoded 2139 } 2140 2141 func fixWhitespaceAndDecodeJSXEntities(text string) []uint16 { 2142 afterLastNonWhitespace := -1 2143 decoded := []uint16{} 2144 i := 0 2145 2146 // Trim whitespace off the end of the first line 2147 firstNonWhitespace := 0 2148 2149 // Split into lines 2150 for i < len(text) { 2151 c, width := utf8.DecodeRuneInString(text[i:]) 2152 2153 switch c { 2154 case '\r', '\n', '\u2028', '\u2029': 2155 // Newline 2156 if firstNonWhitespace != -1 && afterLastNonWhitespace != -1 { 2157 if len(decoded) > 0 { 2158 decoded = append(decoded, ' ') 2159 } 2160 2161 // Trim whitespace off the start and end of lines in the middle 2162 decoded = decodeJSXEntities(decoded, text[firstNonWhitespace:afterLastNonWhitespace]) 2163 } 2164 2165 // Reset for the next line 2166 firstNonWhitespace = -1 2167 2168 case '\t', ' ': 2169 // Whitespace 2170 2171 default: 2172 // Check for unusual whitespace characters 2173 if !js_ast.IsWhitespace(c) { 2174 afterLastNonWhitespace = i + width 2175 if firstNonWhitespace == -1 { 2176 firstNonWhitespace = i 2177 } 2178 } 2179 } 2180 2181 i += width 2182 } 2183 2184 if firstNonWhitespace != -1 { 2185 if len(decoded) > 0 { 2186 decoded = append(decoded, ' ') 2187 } 2188 2189 // Trim whitespace off the start of the last line 2190 decoded = decodeJSXEntities(decoded, text[firstNonWhitespace:]) 2191 } 2192 2193 return decoded 2194 } 2195 2196 // If this fails, this returns "nil, false, end" where "end" is the value to 2197 // store to "lexer.end" before calling "lexer.SyntaxError()" if relevant 2198 func (lexer *Lexer) tryToDecodeEscapeSequences(start int, text string, reportErrors bool) ([]uint16, bool, int) { 2199 decoded := []uint16{} 2200 i := 0 2201 2202 for i < len(text) { 2203 c, width := utf8.DecodeRuneInString(text[i:]) 2204 i += width 2205 2206 switch c { 2207 case '\r': 2208 // From the specification: 2209 // 2210 // 11.8.6.1 Static Semantics: TV and TRV 2211 // 2212 // TV excludes the code units of LineContinuation while TRV includes 2213 // them. <CR><LF> and <CR> LineTerminatorSequences are normalized to 2214 // <LF> for both TV and TRV. An explicit EscapeSequence is needed to 2215 // include a <CR> or <CR><LF> sequence. 2216 2217 // Convert '\r\n' into '\n' 2218 if i < len(text) && text[i] == '\n' { 2219 i++ 2220 } 2221 2222 // Convert '\r' into '\n' 2223 decoded = append(decoded, '\n') 2224 continue 2225 2226 case '\\': 2227 c2, width2 := utf8.DecodeRuneInString(text[i:]) 2228 i += width2 2229 2230 switch c2 { 2231 case 'b': 2232 decoded = append(decoded, '\b') 2233 continue 2234 2235 case 'f': 2236 decoded = append(decoded, '\f') 2237 continue 2238 2239 case 'n': 2240 decoded = append(decoded, '\n') 2241 continue 2242 2243 case 'r': 2244 decoded = append(decoded, '\r') 2245 continue 2246 2247 case 't': 2248 decoded = append(decoded, '\t') 2249 continue 2250 2251 case 'v': 2252 if lexer.json == JSON { 2253 return nil, false, start + i - width2 2254 } 2255 2256 decoded = append(decoded, '\v') 2257 continue 2258 2259 case '0', '1', '2', '3', '4', '5', '6', '7': 2260 octalStart := i - 2 2261 if lexer.json == JSON { 2262 return nil, false, start + i - width2 2263 } 2264 2265 // 1-3 digit octal 2266 isBad := false 2267 value := c2 - '0' 2268 c3, width3 := utf8.DecodeRuneInString(text[i:]) 2269 switch c3 { 2270 case '0', '1', '2', '3', '4', '5', '6', '7': 2271 value = value*8 + c3 - '0' 2272 i += width3 2273 c4, width4 := utf8.DecodeRuneInString(text[i:]) 2274 switch c4 { 2275 case '0', '1', '2', '3', '4', '5', '6', '7': 2276 temp := value*8 + c4 - '0' 2277 if temp < 256 { 2278 value = temp 2279 i += width4 2280 } 2281 case '8', '9': 2282 isBad = true 2283 } 2284 case '8', '9': 2285 isBad = true 2286 } 2287 c = value 2288 2289 // Forbid the use of octal literals other than "\0" 2290 if isBad || text[octalStart:i] != "\\0" { 2291 lexer.LegacyOctalLoc = logger.Loc{Start: int32(start + octalStart)} 2292 } 2293 2294 case '8', '9': 2295 c = c2 2296 2297 // Forbid the invalid octal literals "\8" and "\9" 2298 lexer.LegacyOctalLoc = logger.Loc{Start: int32(start + i - 2)} 2299 2300 case 'x': 2301 if lexer.json == JSON { 2302 return nil, false, start + i - width2 2303 } 2304 2305 // 2-digit hexadecimal 2306 value := '\000' 2307 for j := 0; j < 2; j++ { 2308 c3, width3 := utf8.DecodeRuneInString(text[i:]) 2309 i += width3 2310 switch c3 { 2311 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 2312 value = value*16 | (c3 - '0') 2313 case 'a', 'b', 'c', 'd', 'e', 'f': 2314 value = value*16 | (c3 + 10 - 'a') 2315 case 'A', 'B', 'C', 'D', 'E', 'F': 2316 value = value*16 | (c3 + 10 - 'A') 2317 default: 2318 return nil, false, start + i - width3 2319 } 2320 } 2321 c = value 2322 2323 case 'u': 2324 // Unicode 2325 value := '\000' 2326 2327 // Check the first character 2328 c3, width3 := utf8.DecodeRuneInString(text[i:]) 2329 i += width3 2330 2331 if c3 == '{' { 2332 if lexer.json == JSON { 2333 return nil, false, start + i - width2 2334 } 2335 2336 // Variable-length 2337 hexStart := i - width - width2 - width3 2338 isFirst := true 2339 isOutOfRange := false 2340 variableLength: 2341 for { 2342 c3, width3 = utf8.DecodeRuneInString(text[i:]) 2343 i += width3 2344 2345 switch c3 { 2346 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 2347 value = value*16 | (c3 - '0') 2348 case 'a', 'b', 'c', 'd', 'e', 'f': 2349 value = value*16 | (c3 + 10 - 'a') 2350 case 'A', 'B', 'C', 'D', 'E', 'F': 2351 value = value*16 | (c3 + 10 - 'A') 2352 case '}': 2353 if isFirst { 2354 return nil, false, start + i - width3 2355 } 2356 break variableLength 2357 default: 2358 return nil, false, start + i - width3 2359 } 2360 2361 if value > utf8.MaxRune { 2362 isOutOfRange = true 2363 } 2364 2365 isFirst = false 2366 } 2367 2368 if isOutOfRange && reportErrors { 2369 lexer.addRangeError(logger.Range{Loc: logger.Loc{Start: int32(start + hexStart)}, Len: int32(i - hexStart)}, 2370 "Unicode escape sequence is out of range") 2371 panic(LexerPanic{}) 2372 } 2373 } else { 2374 // Fixed-length 2375 for j := 0; j < 4; j++ { 2376 switch c3 { 2377 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 2378 value = value*16 | (c3 - '0') 2379 case 'a', 'b', 'c', 'd', 'e', 'f': 2380 value = value*16 | (c3 + 10 - 'a') 2381 case 'A', 'B', 'C', 'D', 'E', 'F': 2382 value = value*16 | (c3 + 10 - 'A') 2383 default: 2384 return nil, false, start + i - width3 2385 } 2386 2387 if j < 3 { 2388 c3, width3 = utf8.DecodeRuneInString(text[i:]) 2389 i += width3 2390 } 2391 } 2392 } 2393 c = value 2394 2395 case '\r': 2396 if lexer.json == JSON { 2397 return nil, false, start + i - width2 2398 } 2399 2400 // Ignore line continuations. A line continuation is not an escaped newline. 2401 if i < len(text) && text[i] == '\n' { 2402 // Make sure Windows CRLF counts as a single newline 2403 i++ 2404 } 2405 continue 2406 2407 case '\n', '\u2028', '\u2029': 2408 if lexer.json == JSON { 2409 return nil, false, start + i - width2 2410 } 2411 2412 // Ignore line continuations. A line continuation is not an escaped newline. 2413 continue 2414 2415 default: 2416 if lexer.json == JSON { 2417 switch c2 { 2418 case '"', '\\', '/': 2419 2420 default: 2421 return nil, false, start + i - width2 2422 } 2423 } 2424 2425 c = c2 2426 } 2427 } 2428 2429 if c <= 0xFFFF { 2430 decoded = append(decoded, uint16(c)) 2431 } else { 2432 c -= 0x10000 2433 decoded = append(decoded, uint16(0xD800+((c>>10)&0x3FF)), uint16(0xDC00+(c&0x3FF))) 2434 } 2435 } 2436 2437 return decoded, true, 0 2438 } 2439 2440 func (lexer *Lexer) RescanCloseBraceAsTemplateToken() { 2441 if lexer.Token != TCloseBrace { 2442 lexer.Expected(TCloseBrace) 2443 } 2444 2445 lexer.rescanCloseBraceAsTemplateToken = true 2446 lexer.codePoint = '`' 2447 lexer.current = lexer.end 2448 lexer.end -= 1 2449 lexer.Next() 2450 lexer.rescanCloseBraceAsTemplateToken = false 2451 } 2452 2453 func (lexer *Lexer) step() { 2454 codePoint, width := utf8.DecodeRuneInString(lexer.source.Contents[lexer.current:]) 2455 2456 // Use -1 to indicate the end of the file 2457 if width == 0 { 2458 codePoint = -1 2459 } 2460 2461 // Track the approximate number of newlines in the file so we can preallocate 2462 // the line offset table in the printer for source maps. The line offset table 2463 // is the #1 highest allocation in the heap profile, so this is worth doing. 2464 // This count is approximate because it handles "\n" and "\r\n" (the common 2465 // cases) but not "\r" or "\u2028" or "\u2029". Getting this wrong is harmless 2466 // because it's only a preallocation. The array will just grow if it's too small. 2467 if codePoint == '\n' { 2468 lexer.ApproximateNewlineCount++ 2469 } 2470 2471 lexer.codePoint = codePoint 2472 lexer.end = lexer.current 2473 lexer.current += width 2474 } 2475 2476 func (lexer *Lexer) addRangeError(r logger.Range, text string) { 2477 // Don't report multiple errors in the same spot 2478 if r.Loc == lexer.prevErrorLoc { 2479 return 2480 } 2481 lexer.prevErrorLoc = r.Loc 2482 2483 if !lexer.IsLogDisabled { 2484 lexer.log.AddError(&lexer.tracker, r, text) 2485 } 2486 } 2487 2488 func (lexer *Lexer) addRangeErrorWithSuggestion(r logger.Range, text string, suggestion string) { 2489 // Don't report multiple errors in the same spot 2490 if r.Loc == lexer.prevErrorLoc { 2491 return 2492 } 2493 lexer.prevErrorLoc = r.Loc 2494 2495 if !lexer.IsLogDisabled { 2496 data := lexer.tracker.MsgData(r, text) 2497 data.Location.Suggestion = suggestion 2498 lexer.log.AddMsg(logger.Msg{Kind: logger.Error, Data: data}) 2499 } 2500 } 2501 2502 func (lexer *Lexer) AddRangeErrorWithNotes(r logger.Range, text string, notes []logger.MsgData) { 2503 // Don't report multiple errors in the same spot 2504 if r.Loc == lexer.prevErrorLoc { 2505 return 2506 } 2507 lexer.prevErrorLoc = r.Loc 2508 2509 if !lexer.IsLogDisabled { 2510 lexer.log.AddErrorWithNotes(&lexer.tracker, r, text, notes) 2511 } 2512 } 2513 2514 func hasPrefixWithWordBoundary(text string, prefix string) bool { 2515 t := len(text) 2516 p := len(prefix) 2517 if t >= p && text[0:p] == prefix { 2518 if t == p { 2519 return true 2520 } 2521 c, _ := utf8.DecodeRuneInString(text[p:]) 2522 if !js_ast.IsIdentifierContinue(c) { 2523 return true 2524 } 2525 } 2526 return false 2527 } 2528 2529 type pragmaArg uint8 2530 2531 const ( 2532 pragmaNoSpaceFirst pragmaArg = iota 2533 pragmaSkipSpaceFirst 2534 ) 2535 2536 func scanForPragmaArg(kind pragmaArg, start int, pragma string, text string) (logger.Span, bool) { 2537 text = text[len(pragma):] 2538 start += len(pragma) 2539 2540 if text == "" { 2541 return logger.Span{}, false 2542 } 2543 2544 // One or more whitespace characters 2545 c, width := utf8.DecodeRuneInString(text) 2546 if kind == pragmaSkipSpaceFirst { 2547 if !js_ast.IsWhitespace(c) { 2548 return logger.Span{}, false 2549 } 2550 for js_ast.IsWhitespace(c) { 2551 text = text[width:] 2552 start += width 2553 if text == "" { 2554 return logger.Span{}, false 2555 } 2556 c, width = utf8.DecodeRuneInString(text) 2557 } 2558 } 2559 2560 // One or more non-whitespace characters 2561 i := 0 2562 for !js_ast.IsWhitespace(c) { 2563 i += width 2564 if i >= len(text) { 2565 break 2566 } 2567 c, width = utf8.DecodeRuneInString(text[i:]) 2568 if js_ast.IsWhitespace(c) { 2569 break 2570 } 2571 } 2572 2573 return logger.Span{ 2574 Text: text[:i], 2575 Range: logger.Range{ 2576 Loc: logger.Loc{Start: int32(start)}, 2577 Len: int32(i), 2578 }, 2579 }, true 2580 } 2581 2582 func isUpperASCII(c byte) bool { 2583 return c >= 'A' && c <= 'Z' 2584 } 2585 2586 func isLetterASCII(c byte) bool { 2587 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') 2588 } 2589 2590 func (lexer *Lexer) scanCommentText() { 2591 text := lexer.source.Contents[lexer.start:lexer.end] 2592 hasLegalAnnotation := len(text) > 2 && text[2] == '!' 2593 isMultiLineComment := text[1] == '*' 2594 omitFromGeneralCommentPreservation := false 2595 2596 // Save the original comment text so we can subtract comments from the 2597 // character frequency analysis used by symbol minification 2598 lexer.AllComments = append(lexer.AllComments, lexer.Range()) 2599 2600 // Omit the trailing "*/" from the checks below 2601 endOfCommentText := len(text) 2602 if isMultiLineComment { 2603 endOfCommentText -= 2 2604 } 2605 2606 for i, n := 0, len(text); i < n; i++ { 2607 switch text[i] { 2608 case '#': 2609 rest := text[i+1 : endOfCommentText] 2610 if hasPrefixWithWordBoundary(rest, "__PURE__") { 2611 omitFromGeneralCommentPreservation = true 2612 lexer.HasCommentBefore |= PureCommentBefore 2613 } else if hasPrefixWithWordBoundary(rest, "__KEY__") { 2614 omitFromGeneralCommentPreservation = true 2615 lexer.HasCommentBefore |= KeyCommentBefore 2616 } else if hasPrefixWithWordBoundary(rest, "__NO_SIDE_EFFECTS__") { 2617 omitFromGeneralCommentPreservation = true 2618 lexer.HasCommentBefore |= NoSideEffectsCommentBefore 2619 } else if i == 2 && strings.HasPrefix(rest, " sourceMappingURL=") { 2620 if arg, ok := scanForPragmaArg(pragmaNoSpaceFirst, lexer.start+i+1, " sourceMappingURL=", rest); ok { 2621 omitFromGeneralCommentPreservation = true 2622 lexer.SourceMappingURL = arg 2623 } 2624 } 2625 2626 case '@': 2627 rest := text[i+1 : endOfCommentText] 2628 if hasPrefixWithWordBoundary(rest, "__PURE__") { 2629 omitFromGeneralCommentPreservation = true 2630 lexer.HasCommentBefore |= PureCommentBefore 2631 } else if hasPrefixWithWordBoundary(rest, "__KEY__") { 2632 omitFromGeneralCommentPreservation = true 2633 lexer.HasCommentBefore |= KeyCommentBefore 2634 } else if hasPrefixWithWordBoundary(rest, "__NO_SIDE_EFFECTS__") { 2635 omitFromGeneralCommentPreservation = true 2636 lexer.HasCommentBefore |= NoSideEffectsCommentBefore 2637 } else if hasPrefixWithWordBoundary(rest, "preserve") || hasPrefixWithWordBoundary(rest, "license") { 2638 hasLegalAnnotation = true 2639 } else if hasPrefixWithWordBoundary(rest, "jsx") { 2640 if arg, ok := scanForPragmaArg(pragmaSkipSpaceFirst, lexer.start+i+1, "jsx", rest); ok { 2641 lexer.JSXFactoryPragmaComment = arg 2642 } 2643 } else if hasPrefixWithWordBoundary(rest, "jsxFrag") { 2644 if arg, ok := scanForPragmaArg(pragmaSkipSpaceFirst, lexer.start+i+1, "jsxFrag", rest); ok { 2645 lexer.JSXFragmentPragmaComment = arg 2646 } 2647 } else if hasPrefixWithWordBoundary(rest, "jsxRuntime") { 2648 if arg, ok := scanForPragmaArg(pragmaSkipSpaceFirst, lexer.start+i+1, "jsxRuntime", rest); ok { 2649 lexer.JSXRuntimePragmaComment = arg 2650 } 2651 } else if hasPrefixWithWordBoundary(rest, "jsxImportSource") { 2652 if arg, ok := scanForPragmaArg(pragmaSkipSpaceFirst, lexer.start+i+1, "jsxImportSource", rest); ok { 2653 lexer.JSXImportSourcePragmaComment = arg 2654 } 2655 } else if i == 2 && strings.HasPrefix(rest, " sourceMappingURL=") { 2656 if arg, ok := scanForPragmaArg(pragmaNoSpaceFirst, lexer.start+i+1, " sourceMappingURL=", rest); ok { 2657 omitFromGeneralCommentPreservation = true 2658 lexer.SourceMappingURL = arg 2659 } 2660 } 2661 } 2662 } 2663 2664 if hasLegalAnnotation { 2665 lexer.LegalCommentsBeforeToken = append(lexer.LegalCommentsBeforeToken, lexer.Range()) 2666 } 2667 2668 if !omitFromGeneralCommentPreservation { 2669 lexer.CommentsBeforeToken = append(lexer.CommentsBeforeToken, lexer.Range()) 2670 } 2671 }