github.com/dop251/goja@v0.0.0-20240220182346-e401ed450204/parser/lexer.go (about) 1 package parser 2 3 import ( 4 "errors" 5 "fmt" 6 "strconv" 7 "strings" 8 "unicode" 9 "unicode/utf16" 10 "unicode/utf8" 11 12 "golang.org/x/text/unicode/rangetable" 13 14 "github.com/dop251/goja/file" 15 "github.com/dop251/goja/token" 16 "github.com/dop251/goja/unistring" 17 ) 18 19 var ( 20 unicodeRangeIdNeg = rangetable.Merge(unicode.Pattern_Syntax, unicode.Pattern_White_Space) 21 unicodeRangeIdStartPos = rangetable.Merge(unicode.Letter, unicode.Nl, unicode.Other_ID_Start) 22 unicodeRangeIdContPos = rangetable.Merge(unicodeRangeIdStartPos, unicode.Mn, unicode.Mc, unicode.Nd, unicode.Pc, unicode.Other_ID_Continue) 23 ) 24 25 func isDecimalDigit(chr rune) bool { 26 return '0' <= chr && chr <= '9' 27 } 28 29 func IsIdentifier(s string) bool { 30 if s == "" { 31 return false 32 } 33 r, size := utf8.DecodeRuneInString(s) 34 if !isIdentifierStart(r) { 35 return false 36 } 37 for _, r := range s[size:] { 38 if !isIdentifierPart(r) { 39 return false 40 } 41 } 42 return true 43 } 44 45 func digitValue(chr rune) int { 46 switch { 47 case '0' <= chr && chr <= '9': 48 return int(chr - '0') 49 case 'a' <= chr && chr <= 'f': 50 return int(chr - 'a' + 10) 51 case 'A' <= chr && chr <= 'F': 52 return int(chr - 'A' + 10) 53 } 54 return 16 // Larger than any legal digit value 55 } 56 57 func isDigit(chr rune, base int) bool { 58 return digitValue(chr) < base 59 } 60 61 func isIdStartUnicode(r rune) bool { 62 return unicode.Is(unicodeRangeIdStartPos, r) && !unicode.Is(unicodeRangeIdNeg, r) 63 } 64 65 func isIdPartUnicode(r rune) bool { 66 return unicode.Is(unicodeRangeIdContPos, r) && !unicode.Is(unicodeRangeIdNeg, r) || r == '\u200C' || r == '\u200D' 67 } 68 69 func isIdentifierStart(chr rune) bool { 70 return chr == '$' || chr == '_' || chr == '\\' || 71 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' || 72 chr >= utf8.RuneSelf && isIdStartUnicode(chr) 73 } 74 75 func isIdentifierPart(chr rune) bool { 76 return chr == '$' || chr == '_' || chr == '\\' || 77 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' || 78 '0' <= chr && chr <= '9' || 79 chr >= utf8.RuneSelf && isIdPartUnicode(chr) 80 } 81 82 func (self *_parser) scanIdentifier() (string, unistring.String, bool, string) { 83 offset := self.chrOffset 84 hasEscape := false 85 isUnicode := false 86 length := 0 87 for isIdentifierPart(self.chr) { 88 r := self.chr 89 length++ 90 if r == '\\' { 91 hasEscape = true 92 distance := self.chrOffset - offset 93 self.read() 94 if self.chr != 'u' { 95 return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr)) 96 } 97 var value rune 98 if self._peek() == '{' { 99 self.read() 100 value = -1 101 for value <= utf8.MaxRune { 102 self.read() 103 if self.chr == '}' { 104 break 105 } 106 decimal, ok := hex2decimal(byte(self.chr)) 107 if !ok { 108 return "", "", false, "Invalid Unicode escape sequence" 109 } 110 if value == -1 { 111 value = decimal 112 } else { 113 value = value<<4 | decimal 114 } 115 } 116 if value == -1 { 117 return "", "", false, "Invalid Unicode escape sequence" 118 } 119 } else { 120 for j := 0; j < 4; j++ { 121 self.read() 122 decimal, ok := hex2decimal(byte(self.chr)) 123 if !ok { 124 return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr)) 125 } 126 value = value<<4 | decimal 127 } 128 } 129 if value == '\\' { 130 return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value)) 131 } else if distance == 0 { 132 if !isIdentifierStart(value) { 133 return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value)) 134 } 135 } else if distance > 0 { 136 if !isIdentifierPart(value) { 137 return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value)) 138 } 139 } 140 r = value 141 } 142 if r >= utf8.RuneSelf { 143 isUnicode = true 144 if r > 0xFFFF { 145 length++ 146 } 147 } 148 self.read() 149 } 150 151 literal := self.str[offset:self.chrOffset] 152 var parsed unistring.String 153 if hasEscape || isUnicode { 154 var err string 155 // TODO strict 156 parsed, err = parseStringLiteral(literal, length, isUnicode, false) 157 if err != "" { 158 return "", "", false, err 159 } 160 } else { 161 parsed = unistring.String(literal) 162 } 163 164 return literal, parsed, hasEscape, "" 165 } 166 167 // 7.2 168 func isLineWhiteSpace(chr rune) bool { 169 switch chr { 170 case '\u0009', '\u000b', '\u000c', '\u0020', '\u00a0', '\ufeff': 171 return true 172 case '\u000a', '\u000d', '\u2028', '\u2029': 173 return false 174 case '\u0085': 175 return false 176 } 177 return unicode.IsSpace(chr) 178 } 179 180 // 7.3 181 func isLineTerminator(chr rune) bool { 182 switch chr { 183 case '\u000a', '\u000d', '\u2028', '\u2029': 184 return true 185 } 186 return false 187 } 188 189 type parserState struct { 190 idx file.Idx 191 tok token.Token 192 literal string 193 parsedLiteral unistring.String 194 implicitSemicolon, insertSemicolon bool 195 chr rune 196 chrOffset, offset int 197 errorCount int 198 } 199 200 func (self *_parser) mark(state *parserState) *parserState { 201 if state == nil { 202 state = &parserState{} 203 } 204 state.idx, state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset = 205 self.idx, self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset 206 207 state.errorCount = len(self.errors) 208 return state 209 } 210 211 func (self *_parser) restore(state *parserState) { 212 self.idx, self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset = 213 state.idx, state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset 214 self.errors = self.errors[:state.errorCount] 215 } 216 217 func (self *_parser) peek() token.Token { 218 implicitSemicolon, insertSemicolon, chr, chrOffset, offset := self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset 219 tok, _, _, _ := self.scan() 220 self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset = implicitSemicolon, insertSemicolon, chr, chrOffset, offset 221 return tok 222 } 223 224 func (self *_parser) scan() (tkn token.Token, literal string, parsedLiteral unistring.String, idx file.Idx) { 225 226 self.implicitSemicolon = false 227 228 for { 229 self.skipWhiteSpace() 230 231 idx = self.idxOf(self.chrOffset) 232 insertSemicolon := false 233 234 switch chr := self.chr; { 235 case isIdentifierStart(chr): 236 var err string 237 var hasEscape bool 238 literal, parsedLiteral, hasEscape, err = self.scanIdentifier() 239 if err != "" { 240 tkn = token.ILLEGAL 241 break 242 } 243 if len(parsedLiteral) > 1 { 244 // Keywords are longer than 1 character, avoid lookup otherwise 245 var strict bool 246 tkn, strict = token.IsKeyword(string(parsedLiteral)) 247 if hasEscape { 248 self.insertSemicolon = true 249 if tkn == 0 || self.isBindingId(tkn) { 250 tkn = token.IDENTIFIER 251 } else { 252 tkn = token.ESCAPED_RESERVED_WORD 253 } 254 return 255 } 256 switch tkn { 257 case 0: // Not a keyword 258 // no-op 259 case token.KEYWORD: 260 if strict { 261 // TODO If strict and in strict mode, then this is not a break 262 break 263 } 264 return 265 266 case 267 token.BOOLEAN, 268 token.NULL, 269 token.THIS, 270 token.BREAK, 271 token.THROW, // A newline after a throw is not allowed, but we need to detect it 272 token.YIELD, 273 token.RETURN, 274 token.CONTINUE, 275 token.DEBUGGER: 276 self.insertSemicolon = true 277 return 278 279 case token.ASYNC: 280 // async only has special meaning if not followed by a LineTerminator 281 if self.skipWhiteSpaceCheckLineTerminator() { 282 self.insertSemicolon = true 283 tkn = token.IDENTIFIER 284 } 285 return 286 default: 287 return 288 289 } 290 } 291 self.insertSemicolon = true 292 tkn = token.IDENTIFIER 293 return 294 case '0' <= chr && chr <= '9': 295 self.insertSemicolon = true 296 tkn, literal = self.scanNumericLiteral(false) 297 return 298 default: 299 self.read() 300 switch chr { 301 case -1: 302 if self.insertSemicolon { 303 self.insertSemicolon = false 304 self.implicitSemicolon = true 305 } 306 tkn = token.EOF 307 case '\r', '\n', '\u2028', '\u2029': 308 self.insertSemicolon = false 309 self.implicitSemicolon = true 310 continue 311 case ':': 312 tkn = token.COLON 313 case '.': 314 if digitValue(self.chr) < 10 { 315 insertSemicolon = true 316 tkn, literal = self.scanNumericLiteral(true) 317 } else { 318 if self.chr == '.' { 319 self.read() 320 if self.chr == '.' { 321 self.read() 322 tkn = token.ELLIPSIS 323 } else { 324 tkn = token.ILLEGAL 325 } 326 } else { 327 tkn = token.PERIOD 328 } 329 } 330 case ',': 331 tkn = token.COMMA 332 case ';': 333 tkn = token.SEMICOLON 334 case '(': 335 tkn = token.LEFT_PARENTHESIS 336 case ')': 337 tkn = token.RIGHT_PARENTHESIS 338 insertSemicolon = true 339 case '[': 340 tkn = token.LEFT_BRACKET 341 case ']': 342 tkn = token.RIGHT_BRACKET 343 insertSemicolon = true 344 case '{': 345 tkn = token.LEFT_BRACE 346 case '}': 347 tkn = token.RIGHT_BRACE 348 insertSemicolon = true 349 case '+': 350 tkn = self.switch3(token.PLUS, token.ADD_ASSIGN, '+', token.INCREMENT) 351 if tkn == token.INCREMENT { 352 insertSemicolon = true 353 } 354 case '-': 355 tkn = self.switch3(token.MINUS, token.SUBTRACT_ASSIGN, '-', token.DECREMENT) 356 if tkn == token.DECREMENT { 357 insertSemicolon = true 358 } 359 case '*': 360 if self.chr == '*' { 361 self.read() 362 tkn = self.switch2(token.EXPONENT, token.EXPONENT_ASSIGN) 363 } else { 364 tkn = self.switch2(token.MULTIPLY, token.MULTIPLY_ASSIGN) 365 } 366 case '/': 367 if self.chr == '/' { 368 self.skipSingleLineComment() 369 continue 370 } else if self.chr == '*' { 371 if self.skipMultiLineComment() { 372 self.insertSemicolon = false 373 self.implicitSemicolon = true 374 } 375 continue 376 } else { 377 // Could be division, could be RegExp literal 378 tkn = self.switch2(token.SLASH, token.QUOTIENT_ASSIGN) 379 insertSemicolon = true 380 } 381 case '%': 382 tkn = self.switch2(token.REMAINDER, token.REMAINDER_ASSIGN) 383 case '^': 384 tkn = self.switch2(token.EXCLUSIVE_OR, token.EXCLUSIVE_OR_ASSIGN) 385 case '<': 386 tkn = self.switch4(token.LESS, token.LESS_OR_EQUAL, '<', token.SHIFT_LEFT, token.SHIFT_LEFT_ASSIGN) 387 case '>': 388 tkn = self.switch6(token.GREATER, token.GREATER_OR_EQUAL, '>', token.SHIFT_RIGHT, token.SHIFT_RIGHT_ASSIGN, '>', token.UNSIGNED_SHIFT_RIGHT, token.UNSIGNED_SHIFT_RIGHT_ASSIGN) 389 case '=': 390 if self.chr == '>' { 391 self.read() 392 if self.implicitSemicolon { 393 tkn = token.ILLEGAL 394 } else { 395 tkn = token.ARROW 396 } 397 } else { 398 tkn = self.switch2(token.ASSIGN, token.EQUAL) 399 if tkn == token.EQUAL && self.chr == '=' { 400 self.read() 401 tkn = token.STRICT_EQUAL 402 } 403 } 404 case '!': 405 tkn = self.switch2(token.NOT, token.NOT_EQUAL) 406 if tkn == token.NOT_EQUAL && self.chr == '=' { 407 self.read() 408 tkn = token.STRICT_NOT_EQUAL 409 } 410 case '&': 411 tkn = self.switch3(token.AND, token.AND_ASSIGN, '&', token.LOGICAL_AND) 412 case '|': 413 tkn = self.switch3(token.OR, token.OR_ASSIGN, '|', token.LOGICAL_OR) 414 case '~': 415 tkn = token.BITWISE_NOT 416 case '?': 417 if self.chr == '.' && !isDecimalDigit(self._peek()) { 418 self.read() 419 tkn = token.QUESTION_DOT 420 } else if self.chr == '?' { 421 self.read() 422 tkn = token.COALESCE 423 } else { 424 tkn = token.QUESTION_MARK 425 } 426 case '"', '\'': 427 insertSemicolon = true 428 tkn = token.STRING 429 var err string 430 literal, parsedLiteral, err = self.scanString(self.chrOffset-1, true) 431 if err != "" { 432 tkn = token.ILLEGAL 433 } 434 case '`': 435 tkn = token.BACKTICK 436 case '#': 437 if self.chrOffset == 1 && self.chr == '!' { 438 self.skipSingleLineComment() 439 continue 440 } 441 442 var err string 443 literal, parsedLiteral, _, err = self.scanIdentifier() 444 if err != "" || literal == "" { 445 tkn = token.ILLEGAL 446 break 447 } 448 self.insertSemicolon = true 449 tkn = token.PRIVATE_IDENTIFIER 450 return 451 default: 452 self.errorUnexpected(idx, chr) 453 tkn = token.ILLEGAL 454 } 455 } 456 self.insertSemicolon = insertSemicolon 457 return 458 } 459 } 460 461 func (self *_parser) switch2(tkn0, tkn1 token.Token) token.Token { 462 if self.chr == '=' { 463 self.read() 464 return tkn1 465 } 466 return tkn0 467 } 468 469 func (self *_parser) switch3(tkn0, tkn1 token.Token, chr2 rune, tkn2 token.Token) token.Token { 470 if self.chr == '=' { 471 self.read() 472 return tkn1 473 } 474 if self.chr == chr2 { 475 self.read() 476 return tkn2 477 } 478 return tkn0 479 } 480 481 func (self *_parser) switch4(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token) token.Token { 482 if self.chr == '=' { 483 self.read() 484 return tkn1 485 } 486 if self.chr == chr2 { 487 self.read() 488 if self.chr == '=' { 489 self.read() 490 return tkn3 491 } 492 return tkn2 493 } 494 return tkn0 495 } 496 497 func (self *_parser) switch6(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token, chr3 rune, tkn4, tkn5 token.Token) token.Token { 498 if self.chr == '=' { 499 self.read() 500 return tkn1 501 } 502 if self.chr == chr2 { 503 self.read() 504 if self.chr == '=' { 505 self.read() 506 return tkn3 507 } 508 if self.chr == chr3 { 509 self.read() 510 if self.chr == '=' { 511 self.read() 512 return tkn5 513 } 514 return tkn4 515 } 516 return tkn2 517 } 518 return tkn0 519 } 520 521 func (self *_parser) _peek() rune { 522 if self.offset < self.length { 523 return rune(self.str[self.offset]) 524 } 525 return -1 526 } 527 528 func (self *_parser) read() { 529 if self.offset < self.length { 530 self.chrOffset = self.offset 531 chr, width := rune(self.str[self.offset]), 1 532 if chr >= utf8.RuneSelf { // !ASCII 533 chr, width = utf8.DecodeRuneInString(self.str[self.offset:]) 534 if chr == utf8.RuneError && width == 1 { 535 self.error(self.chrOffset, "Invalid UTF-8 character") 536 } 537 } 538 self.offset += width 539 self.chr = chr 540 } else { 541 self.chrOffset = self.length 542 self.chr = -1 // EOF 543 } 544 } 545 546 func (self *_parser) skipSingleLineComment() { 547 for self.chr != -1 { 548 self.read() 549 if isLineTerminator(self.chr) { 550 return 551 } 552 } 553 } 554 555 func (self *_parser) skipMultiLineComment() (hasLineTerminator bool) { 556 self.read() 557 for self.chr >= 0 { 558 chr := self.chr 559 if chr == '\r' || chr == '\n' || chr == '\u2028' || chr == '\u2029' { 560 hasLineTerminator = true 561 break 562 } 563 self.read() 564 if chr == '*' && self.chr == '/' { 565 self.read() 566 return 567 } 568 } 569 for self.chr >= 0 { 570 chr := self.chr 571 self.read() 572 if chr == '*' && self.chr == '/' { 573 self.read() 574 return 575 } 576 } 577 578 self.errorUnexpected(0, self.chr) 579 return 580 } 581 582 func (self *_parser) skipWhiteSpaceCheckLineTerminator() bool { 583 for { 584 switch self.chr { 585 case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff': 586 self.read() 587 continue 588 case '\r': 589 if self._peek() == '\n' { 590 self.read() 591 } 592 fallthrough 593 case '\u2028', '\u2029', '\n': 594 return true 595 } 596 if self.chr >= utf8.RuneSelf { 597 if unicode.IsSpace(self.chr) { 598 self.read() 599 continue 600 } 601 } 602 break 603 } 604 return false 605 } 606 607 func (self *_parser) skipWhiteSpace() { 608 for { 609 switch self.chr { 610 case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff': 611 self.read() 612 continue 613 case '\r': 614 if self._peek() == '\n' { 615 self.read() 616 } 617 fallthrough 618 case '\u2028', '\u2029', '\n': 619 if self.insertSemicolon { 620 return 621 } 622 self.read() 623 continue 624 } 625 if self.chr >= utf8.RuneSelf { 626 if unicode.IsSpace(self.chr) { 627 self.read() 628 continue 629 } 630 } 631 break 632 } 633 } 634 635 func (self *_parser) scanMantissa(base int) { 636 for digitValue(self.chr) < base { 637 self.read() 638 } 639 } 640 641 func (self *_parser) scanEscape(quote rune) (int, bool) { 642 643 var length, base uint32 644 chr := self.chr 645 switch chr { 646 case '0', '1', '2', '3', '4', '5', '6', '7': 647 // Octal: 648 length, base = 3, 8 649 case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'': 650 self.read() 651 return 1, false 652 case '\r': 653 self.read() 654 if self.chr == '\n' { 655 self.read() 656 return 2, false 657 } 658 return 1, false 659 case '\n': 660 self.read() 661 return 1, false 662 case '\u2028', '\u2029': 663 self.read() 664 return 1, true 665 case 'x': 666 self.read() 667 length, base = 2, 16 668 case 'u': 669 self.read() 670 if self.chr == '{' { 671 self.read() 672 length, base = 0, 16 673 } else { 674 length, base = 4, 16 675 } 676 default: 677 self.read() // Always make progress 678 } 679 680 if base > 0 { 681 var value uint32 682 if length > 0 { 683 for ; length > 0 && self.chr != quote && self.chr >= 0; length-- { 684 digit := uint32(digitValue(self.chr)) 685 if digit >= base { 686 break 687 } 688 value = value*base + digit 689 self.read() 690 } 691 } else { 692 for self.chr != quote && self.chr >= 0 && value < utf8.MaxRune { 693 if self.chr == '}' { 694 self.read() 695 break 696 } 697 digit := uint32(digitValue(self.chr)) 698 if digit >= base { 699 break 700 } 701 value = value*base + digit 702 self.read() 703 } 704 } 705 chr = rune(value) 706 } 707 if chr >= utf8.RuneSelf { 708 if chr > 0xFFFF { 709 return 2, true 710 } 711 return 1, true 712 } 713 return 1, false 714 } 715 716 func (self *_parser) scanString(offset int, parse bool) (literal string, parsed unistring.String, err string) { 717 // " ' / 718 quote := rune(self.str[offset]) 719 length := 0 720 isUnicode := false 721 for self.chr != quote { 722 chr := self.chr 723 if chr == '\n' || chr == '\r' || chr < 0 { 724 goto newline 725 } 726 if quote == '/' && (self.chr == '\u2028' || self.chr == '\u2029') { 727 goto newline 728 } 729 self.read() 730 if chr == '\\' { 731 if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 { 732 if quote == '/' { 733 goto newline 734 } 735 self.scanNewline() 736 } else { 737 l, u := self.scanEscape(quote) 738 length += l 739 if u { 740 isUnicode = true 741 } 742 } 743 continue 744 } else if chr == '[' && quote == '/' { 745 // Allow a slash (/) in a bracket character class ([...]) 746 // TODO Fix this, this is hacky... 747 quote = -1 748 } else if chr == ']' && quote == -1 { 749 quote = '/' 750 } 751 if chr >= utf8.RuneSelf { 752 isUnicode = true 753 if chr > 0xFFFF { 754 length++ 755 } 756 } 757 length++ 758 } 759 760 // " ' / 761 self.read() 762 literal = self.str[offset:self.chrOffset] 763 if parse { 764 // TODO strict 765 parsed, err = parseStringLiteral(literal[1:len(literal)-1], length, isUnicode, false) 766 } 767 return 768 769 newline: 770 self.scanNewline() 771 errStr := "String not terminated" 772 if quote == '/' { 773 errStr = "Invalid regular expression: missing /" 774 self.error(self.idxOf(offset), errStr) 775 } 776 return "", "", errStr 777 } 778 779 func (self *_parser) scanNewline() { 780 if self.chr == '\u2028' || self.chr == '\u2029' { 781 self.read() 782 return 783 } 784 if self.chr == '\r' { 785 self.read() 786 if self.chr != '\n' { 787 return 788 } 789 } 790 self.read() 791 } 792 793 func (self *_parser) parseTemplateCharacters() (literal string, parsed unistring.String, finished bool, parseErr, err string) { 794 offset := self.chrOffset 795 var end int 796 length := 0 797 isUnicode := false 798 hasCR := false 799 for { 800 chr := self.chr 801 if chr < 0 { 802 goto unterminated 803 } 804 self.read() 805 if chr == '`' { 806 finished = true 807 end = self.chrOffset - 1 808 break 809 } 810 if chr == '\\' { 811 if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 { 812 if self.chr == '\r' { 813 hasCR = true 814 } 815 self.scanNewline() 816 } else { 817 if self.chr == '8' || self.chr == '9' { 818 if parseErr == "" { 819 parseErr = "\\8 and \\9 are not allowed in template strings." 820 } 821 } 822 l, u := self.scanEscape('`') 823 length += l 824 if u { 825 isUnicode = true 826 } 827 } 828 continue 829 } 830 if chr == '$' && self.chr == '{' { 831 self.read() 832 end = self.chrOffset - 2 833 break 834 } 835 if chr >= utf8.RuneSelf { 836 isUnicode = true 837 if chr > 0xFFFF { 838 length++ 839 } 840 } else if chr == '\r' { 841 hasCR = true 842 if self.chr == '\n' { 843 length-- 844 } 845 } 846 length++ 847 } 848 literal = self.str[offset:end] 849 if hasCR { 850 literal = normaliseCRLF(literal) 851 } 852 if parseErr == "" { 853 parsed, parseErr = parseStringLiteral(literal, length, isUnicode, true) 854 } 855 self.insertSemicolon = true 856 return 857 unterminated: 858 err = err_UnexpectedEndOfInput 859 finished = true 860 return 861 } 862 863 func normaliseCRLF(s string) string { 864 var buf strings.Builder 865 buf.Grow(len(s)) 866 for i := 0; i < len(s); i++ { 867 if s[i] == '\r' { 868 buf.WriteByte('\n') 869 if i < len(s)-1 && s[i+1] == '\n' { 870 i++ 871 } 872 } else { 873 buf.WriteByte(s[i]) 874 } 875 } 876 return buf.String() 877 } 878 879 func hex2decimal(chr byte) (value rune, ok bool) { 880 { 881 chr := rune(chr) 882 switch { 883 case '0' <= chr && chr <= '9': 884 return chr - '0', true 885 case 'a' <= chr && chr <= 'f': 886 return chr - 'a' + 10, true 887 case 'A' <= chr && chr <= 'F': 888 return chr - 'A' + 10, true 889 } 890 return 891 } 892 } 893 894 func parseNumberLiteral(literal string) (value interface{}, err error) { 895 // TODO Is Uint okay? What about -MAX_UINT 896 value, err = strconv.ParseInt(literal, 0, 64) 897 if err == nil { 898 return 899 } 900 901 parseIntErr := err // Save this first error, just in case 902 903 value, err = strconv.ParseFloat(literal, 64) 904 if err == nil { 905 return 906 } else if err.(*strconv.NumError).Err == strconv.ErrRange { 907 // Infinity, etc. 908 return value, nil 909 } 910 911 err = parseIntErr 912 913 if err.(*strconv.NumError).Err == strconv.ErrRange { 914 if len(literal) > 2 && literal[0] == '0' && (literal[1] == 'X' || literal[1] == 'x') { 915 // Could just be a very large number (e.g. 0x8000000000000000) 916 var value float64 917 literal = literal[2:] 918 for _, chr := range literal { 919 digit := digitValue(chr) 920 if digit >= 16 { 921 goto error 922 } 923 value = value*16 + float64(digit) 924 } 925 return value, nil 926 } 927 } 928 929 error: 930 return nil, errors.New("Illegal numeric literal") 931 } 932 933 func parseStringLiteral(literal string, length int, unicode, strict bool) (unistring.String, string) { 934 var sb strings.Builder 935 var chars []uint16 936 if unicode { 937 chars = make([]uint16, 1, length+1) 938 chars[0] = unistring.BOM 939 } else { 940 sb.Grow(length) 941 } 942 str := literal 943 for len(str) > 0 { 944 switch chr := str[0]; { 945 // We do not explicitly handle the case of the quote 946 // value, which can be: " ' / 947 // This assumes we're already passed a partially well-formed literal 948 case chr >= utf8.RuneSelf: 949 chr, size := utf8.DecodeRuneInString(str) 950 if chr <= 0xFFFF { 951 chars = append(chars, uint16(chr)) 952 } else { 953 first, second := utf16.EncodeRune(chr) 954 chars = append(chars, uint16(first), uint16(second)) 955 } 956 str = str[size:] 957 continue 958 case chr != '\\': 959 if unicode { 960 chars = append(chars, uint16(chr)) 961 } else { 962 sb.WriteByte(chr) 963 } 964 str = str[1:] 965 continue 966 } 967 968 if len(str) <= 1 { 969 panic("len(str) <= 1") 970 } 971 chr := str[1] 972 var value rune 973 if chr >= utf8.RuneSelf { 974 str = str[1:] 975 var size int 976 value, size = utf8.DecodeRuneInString(str) 977 str = str[size:] // \ + <character> 978 if value == '\u2028' || value == '\u2029' { 979 continue 980 } 981 } else { 982 str = str[2:] // \<character> 983 switch chr { 984 case 'b': 985 value = '\b' 986 case 'f': 987 value = '\f' 988 case 'n': 989 value = '\n' 990 case 'r': 991 value = '\r' 992 case 't': 993 value = '\t' 994 case 'v': 995 value = '\v' 996 case 'x', 'u': 997 size := 0 998 switch chr { 999 case 'x': 1000 size = 2 1001 case 'u': 1002 if str == "" || str[0] != '{' { 1003 size = 4 1004 } 1005 } 1006 if size > 0 { 1007 if len(str) < size { 1008 return "", fmt.Sprintf("invalid escape: \\%s: len(%q) != %d", string(chr), str, size) 1009 } 1010 for j := 0; j < size; j++ { 1011 decimal, ok := hex2decimal(str[j]) 1012 if !ok { 1013 return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size]) 1014 } 1015 value = value<<4 | decimal 1016 } 1017 } else { 1018 str = str[1:] 1019 var val rune 1020 value = -1 1021 for ; size < len(str); size++ { 1022 if str[size] == '}' { 1023 if size == 0 { 1024 return "", fmt.Sprintf("invalid escape: \\%s", string(chr)) 1025 } 1026 size++ 1027 value = val 1028 break 1029 } 1030 decimal, ok := hex2decimal(str[size]) 1031 if !ok { 1032 return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size+1]) 1033 } 1034 val = val<<4 | decimal 1035 if val > utf8.MaxRune { 1036 return "", fmt.Sprintf("undefined Unicode code-point: %q", str[:size+1]) 1037 } 1038 } 1039 if value == -1 { 1040 return "", fmt.Sprintf("unterminated \\u{: %q", str) 1041 } 1042 } 1043 str = str[size:] 1044 if chr == 'x' { 1045 break 1046 } 1047 if value > utf8.MaxRune { 1048 panic("value > utf8.MaxRune") 1049 } 1050 case '0': 1051 if len(str) == 0 || '0' > str[0] || str[0] > '7' { 1052 value = 0 1053 break 1054 } 1055 fallthrough 1056 case '1', '2', '3', '4', '5', '6', '7': 1057 if strict { 1058 return "", "Octal escape sequences are not allowed in this context" 1059 } 1060 value = rune(chr) - '0' 1061 j := 0 1062 for ; j < 2; j++ { 1063 if len(str) < j+1 { 1064 break 1065 } 1066 chr := str[j] 1067 if '0' > chr || chr > '7' { 1068 break 1069 } 1070 decimal := rune(str[j]) - '0' 1071 value = (value << 3) | decimal 1072 } 1073 str = str[j:] 1074 case '\\': 1075 value = '\\' 1076 case '\'', '"': 1077 value = rune(chr) 1078 case '\r': 1079 if len(str) > 0 { 1080 if str[0] == '\n' { 1081 str = str[1:] 1082 } 1083 } 1084 fallthrough 1085 case '\n': 1086 continue 1087 default: 1088 value = rune(chr) 1089 } 1090 } 1091 if unicode { 1092 if value <= 0xFFFF { 1093 chars = append(chars, uint16(value)) 1094 } else { 1095 first, second := utf16.EncodeRune(value) 1096 chars = append(chars, uint16(first), uint16(second)) 1097 } 1098 } else { 1099 if value >= utf8.RuneSelf { 1100 return "", "Unexpected unicode character" 1101 } 1102 sb.WriteByte(byte(value)) 1103 } 1104 } 1105 1106 if unicode { 1107 if len(chars) != length+1 { 1108 panic(fmt.Errorf("unexpected unicode length while parsing '%s'", literal)) 1109 } 1110 return unistring.FromUtf16(chars), "" 1111 } 1112 if sb.Len() != length { 1113 panic(fmt.Errorf("unexpected length while parsing '%s'", literal)) 1114 } 1115 return unistring.String(sb.String()), "" 1116 } 1117 1118 func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) { 1119 1120 offset := self.chrOffset 1121 tkn := token.NUMBER 1122 1123 if decimalPoint { 1124 offset-- 1125 self.scanMantissa(10) 1126 } else { 1127 if self.chr == '0' { 1128 self.read() 1129 base := 0 1130 switch self.chr { 1131 case 'x', 'X': 1132 base = 16 1133 case 'o', 'O': 1134 base = 8 1135 case 'b', 'B': 1136 base = 2 1137 case '.', 'e', 'E': 1138 // no-op 1139 default: 1140 // legacy octal 1141 self.scanMantissa(8) 1142 goto end 1143 } 1144 if base > 0 { 1145 self.read() 1146 if !isDigit(self.chr, base) { 1147 return token.ILLEGAL, self.str[offset:self.chrOffset] 1148 } 1149 self.scanMantissa(base) 1150 goto end 1151 } 1152 } else { 1153 self.scanMantissa(10) 1154 } 1155 if self.chr == '.' { 1156 self.read() 1157 self.scanMantissa(10) 1158 } 1159 } 1160 1161 if self.chr == 'e' || self.chr == 'E' { 1162 self.read() 1163 if self.chr == '-' || self.chr == '+' { 1164 self.read() 1165 } 1166 if isDecimalDigit(self.chr) { 1167 self.read() 1168 self.scanMantissa(10) 1169 } else { 1170 return token.ILLEGAL, self.str[offset:self.chrOffset] 1171 } 1172 } 1173 end: 1174 if isIdentifierStart(self.chr) || isDecimalDigit(self.chr) { 1175 return token.ILLEGAL, self.str[offset:self.chrOffset] 1176 } 1177 1178 return tkn, self.str[offset:self.chrOffset] 1179 }