go.ketch.com/lib/goja@v0.0.1/parser/lexer.go (about) 1 package parser 2 3 import ( 4 "errors" 5 "fmt" 6 "strconv" 7 "strings" 8 "unicode" 9 "unicode/utf16" 10 "unicode/utf8" 11 12 "golang.org/x/text/unicode/rangetable" 13 14 "go.ketch.com/lib/goja/file" 15 "go.ketch.com/lib/goja/token" 16 "go.ketch.com/lib/goja/unistring" 17 ) 18 19 var ( 20 unicodeRangeIdNeg = rangetable.Merge(unicode.Pattern_Syntax, unicode.Pattern_White_Space) 21 unicodeRangeIdStartPos = rangetable.Merge(unicode.Letter, unicode.Nl, unicode.Other_ID_Start) 22 unicodeRangeIdContPos = rangetable.Merge(unicodeRangeIdStartPos, unicode.Mn, unicode.Mc, unicode.Nd, unicode.Pc, unicode.Other_ID_Continue) 23 ) 24 25 func isDecimalDigit(chr rune) bool { 26 return '0' <= chr && chr <= '9' 27 } 28 29 func IsIdentifier(s string) bool { 30 if s == "" { 31 return false 32 } 33 r, size := utf8.DecodeRuneInString(s) 34 if !isIdentifierStart(r) { 35 return false 36 } 37 for _, r := range s[size:] { 38 if !isIdentifierPart(r) { 39 return false 40 } 41 } 42 return true 43 } 44 45 func digitValue(chr rune) int { 46 switch { 47 case '0' <= chr && chr <= '9': 48 return int(chr - '0') 49 case 'a' <= chr && chr <= 'f': 50 return int(chr - 'a' + 10) 51 case 'A' <= chr && chr <= 'F': 52 return int(chr - 'A' + 10) 53 } 54 return 16 // Larger than any legal digit value 55 } 56 57 func isDigit(chr rune, base int) bool { 58 return digitValue(chr) < base 59 } 60 61 func isIdStartUnicode(r rune) bool { 62 return unicode.Is(unicodeRangeIdStartPos, r) && !unicode.Is(unicodeRangeIdNeg, r) 63 } 64 65 func isIdPartUnicode(r rune) bool { 66 return unicode.Is(unicodeRangeIdContPos, r) && !unicode.Is(unicodeRangeIdNeg, r) || r == '\u200C' || r == '\u200D' 67 } 68 69 func isIdentifierStart(chr rune) bool { 70 return chr == '$' || chr == '_' || chr == '\\' || 71 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' || 72 chr >= utf8.RuneSelf && isIdStartUnicode(chr) 73 } 74 75 func isIdentifierPart(chr rune) bool { 76 return chr == '$' || chr == '_' || chr == '\\' || 77 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' || 78 '0' <= chr && chr <= '9' || 79 chr >= utf8.RuneSelf && isIdPartUnicode(chr) 80 } 81 82 func (self *_parser) scanIdentifier() (string, unistring.String, bool, string) { 83 offset := self.chrOffset 84 hasEscape := false 85 isUnicode := false 86 length := 0 87 for isIdentifierPart(self.chr) { 88 r := self.chr 89 length++ 90 if r == '\\' { 91 hasEscape = true 92 distance := self.chrOffset - offset 93 self.read() 94 if self.chr != 'u' { 95 return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr)) 96 } 97 var value rune 98 if self._peek() == '{' { 99 self.read() 100 value = -1 101 for value <= utf8.MaxRune { 102 self.read() 103 if self.chr == '}' { 104 break 105 } 106 decimal, ok := hex2decimal(byte(self.chr)) 107 if !ok { 108 return "", "", false, "Invalid Unicode escape sequence" 109 } 110 if value == -1 { 111 value = decimal 112 } else { 113 value = value<<4 | decimal 114 } 115 } 116 if value == -1 { 117 return "", "", false, "Invalid Unicode escape sequence" 118 } 119 } else { 120 for j := 0; j < 4; j++ { 121 self.read() 122 decimal, ok := hex2decimal(byte(self.chr)) 123 if !ok { 124 return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr)) 125 } 126 value = value<<4 | decimal 127 } 128 } 129 if value == '\\' { 130 return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value)) 131 } else if distance == 0 { 132 if !isIdentifierStart(value) { 133 return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value)) 134 } 135 } else if distance > 0 { 136 if !isIdentifierPart(value) { 137 return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value)) 138 } 139 } 140 r = value 141 } 142 if r >= utf8.RuneSelf { 143 isUnicode = true 144 if r > 0xFFFF { 145 length++ 146 } 147 } 148 self.read() 149 } 150 151 literal := self.str[offset:self.chrOffset] 152 var parsed unistring.String 153 if hasEscape || isUnicode { 154 var err string 155 // TODO strict 156 parsed, err = parseStringLiteral(literal, length, isUnicode, false) 157 if err != "" { 158 return "", "", false, err 159 } 160 } else { 161 parsed = unistring.String(literal) 162 } 163 164 return literal, parsed, hasEscape, "" 165 } 166 167 // 7.2 168 func isLineWhiteSpace(chr rune) bool { 169 switch chr { 170 case '\u0009', '\u000b', '\u000c', '\u0020', '\u00a0', '\ufeff': 171 return true 172 case '\u000a', '\u000d', '\u2028', '\u2029': 173 return false 174 case '\u0085': 175 return false 176 } 177 return unicode.IsSpace(chr) 178 } 179 180 // 7.3 181 func isLineTerminator(chr rune) bool { 182 switch chr { 183 case '\u000a', '\u000d', '\u2028', '\u2029': 184 return true 185 } 186 return false 187 } 188 189 type parserState struct { 190 tok token.Token 191 literal string 192 parsedLiteral unistring.String 193 implicitSemicolon, insertSemicolon bool 194 chr rune 195 chrOffset, offset int 196 errorCount int 197 } 198 199 func (self *_parser) mark(state *parserState) *parserState { 200 if state == nil { 201 state = &parserState{} 202 } 203 state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset = 204 self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset 205 206 state.errorCount = len(self.errors) 207 return state 208 } 209 210 func (self *_parser) restore(state *parserState) { 211 self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset = 212 state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset 213 self.errors = self.errors[:state.errorCount] 214 } 215 216 func (self *_parser) peek() token.Token { 217 implicitSemicolon, insertSemicolon, chr, chrOffset, offset := self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset 218 tok, _, _, _ := self.scan() 219 self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset = implicitSemicolon, insertSemicolon, chr, chrOffset, offset 220 return tok 221 } 222 223 func (self *_parser) scan() (tkn token.Token, literal string, parsedLiteral unistring.String, idx file.Idx) { 224 225 self.implicitSemicolon = false 226 227 for { 228 self.skipWhiteSpace() 229 230 idx = self.idxOf(self.chrOffset) 231 insertSemicolon := false 232 233 switch chr := self.chr; { 234 case isIdentifierStart(chr): 235 var err string 236 var hasEscape bool 237 literal, parsedLiteral, hasEscape, err = self.scanIdentifier() 238 if err != "" { 239 tkn = token.ILLEGAL 240 break 241 } 242 if len(parsedLiteral) > 1 { 243 // Keywords are longer than 1 character, avoid lookup otherwise 244 var strict bool 245 tkn, strict = token.IsKeyword(string(parsedLiteral)) 246 if hasEscape { 247 self.insertSemicolon = true 248 if tkn == 0 || token.IsUnreservedWord(tkn) { 249 tkn = token.IDENTIFIER 250 } else { 251 tkn = token.ESCAPED_RESERVED_WORD 252 } 253 return 254 } 255 switch tkn { 256 case 0: // Not a keyword 257 // no-op 258 case token.KEYWORD: 259 if strict { 260 // TODO If strict and in strict mode, then this is not a break 261 break 262 } 263 return 264 265 case 266 token.BOOLEAN, 267 token.NULL, 268 token.THIS, 269 token.BREAK, 270 token.THROW, // A newline after a throw is not allowed, but we need to detect it 271 token.RETURN, 272 token.CONTINUE, 273 token.DEBUGGER: 274 self.insertSemicolon = true 275 return 276 277 default: 278 return 279 280 } 281 } 282 self.insertSemicolon = true 283 tkn = token.IDENTIFIER 284 return 285 case '0' <= chr && chr <= '9': 286 self.insertSemicolon = true 287 tkn, literal = self.scanNumericLiteral(false) 288 return 289 default: 290 self.read() 291 switch chr { 292 case -1: 293 if self.insertSemicolon { 294 self.insertSemicolon = false 295 self.implicitSemicolon = true 296 } 297 tkn = token.EOF 298 case '\r', '\n', '\u2028', '\u2029': 299 self.insertSemicolon = false 300 self.implicitSemicolon = true 301 continue 302 case ':': 303 tkn = token.COLON 304 case '.': 305 if digitValue(self.chr) < 10 { 306 insertSemicolon = true 307 tkn, literal = self.scanNumericLiteral(true) 308 } else { 309 if self.chr == '.' { 310 self.read() 311 if self.chr == '.' { 312 self.read() 313 tkn = token.ELLIPSIS 314 } else { 315 tkn = token.ILLEGAL 316 } 317 } else { 318 tkn = token.PERIOD 319 } 320 } 321 case ',': 322 tkn = token.COMMA 323 case ';': 324 tkn = token.SEMICOLON 325 case '(': 326 tkn = token.LEFT_PARENTHESIS 327 case ')': 328 tkn = token.RIGHT_PARENTHESIS 329 insertSemicolon = true 330 case '[': 331 tkn = token.LEFT_BRACKET 332 case ']': 333 tkn = token.RIGHT_BRACKET 334 insertSemicolon = true 335 case '{': 336 tkn = token.LEFT_BRACE 337 case '}': 338 tkn = token.RIGHT_BRACE 339 insertSemicolon = true 340 case '+': 341 tkn = self.switch3(token.PLUS, token.ADD_ASSIGN, '+', token.INCREMENT) 342 if tkn == token.INCREMENT { 343 insertSemicolon = true 344 } 345 case '-': 346 tkn = self.switch3(token.MINUS, token.SUBTRACT_ASSIGN, '-', token.DECREMENT) 347 if tkn == token.DECREMENT { 348 insertSemicolon = true 349 } 350 case '*': 351 if self.chr == '*' { 352 self.read() 353 tkn = self.switch2(token.EXPONENT, token.EXPONENT_ASSIGN) 354 } else { 355 tkn = self.switch2(token.MULTIPLY, token.MULTIPLY_ASSIGN) 356 } 357 case '/': 358 if self.chr == '/' { 359 self.skipSingleLineComment() 360 continue 361 } else if self.chr == '*' { 362 if self.skipMultiLineComment() { 363 self.insertSemicolon = false 364 self.implicitSemicolon = true 365 } 366 continue 367 } else { 368 // Could be division, could be RegExp literal 369 tkn = self.switch2(token.SLASH, token.QUOTIENT_ASSIGN) 370 insertSemicolon = true 371 } 372 case '%': 373 tkn = self.switch2(token.REMAINDER, token.REMAINDER_ASSIGN) 374 case '^': 375 tkn = self.switch2(token.EXCLUSIVE_OR, token.EXCLUSIVE_OR_ASSIGN) 376 case '<': 377 tkn = self.switch4(token.LESS, token.LESS_OR_EQUAL, '<', token.SHIFT_LEFT, token.SHIFT_LEFT_ASSIGN) 378 case '>': 379 tkn = self.switch6(token.GREATER, token.GREATER_OR_EQUAL, '>', token.SHIFT_RIGHT, token.SHIFT_RIGHT_ASSIGN, '>', token.UNSIGNED_SHIFT_RIGHT, token.UNSIGNED_SHIFT_RIGHT_ASSIGN) 380 case '=': 381 if self.chr == '>' { 382 self.read() 383 if self.implicitSemicolon { 384 tkn = token.ILLEGAL 385 } else { 386 tkn = token.ARROW 387 } 388 } else { 389 tkn = self.switch2(token.ASSIGN, token.EQUAL) 390 if tkn == token.EQUAL && self.chr == '=' { 391 self.read() 392 tkn = token.STRICT_EQUAL 393 } 394 } 395 case '!': 396 tkn = self.switch2(token.NOT, token.NOT_EQUAL) 397 if tkn == token.NOT_EQUAL && self.chr == '=' { 398 self.read() 399 tkn = token.STRICT_NOT_EQUAL 400 } 401 case '&': 402 tkn = self.switch3(token.AND, token.AND_ASSIGN, '&', token.LOGICAL_AND) 403 case '|': 404 tkn = self.switch3(token.OR, token.OR_ASSIGN, '|', token.LOGICAL_OR) 405 case '~': 406 tkn = token.BITWISE_NOT 407 case '?': 408 if self.chr == '.' && !isDecimalDigit(self._peek()) { 409 self.read() 410 tkn = token.QUESTION_DOT 411 } else if self.chr == '?' { 412 self.read() 413 tkn = token.COALESCE 414 } else { 415 tkn = token.QUESTION_MARK 416 } 417 case '"', '\'': 418 insertSemicolon = true 419 tkn = token.STRING 420 var err string 421 literal, parsedLiteral, err = self.scanString(self.chrOffset-1, true) 422 if err != "" { 423 tkn = token.ILLEGAL 424 } 425 case '`': 426 tkn = token.BACKTICK 427 case '#': 428 if self.chrOffset == 1 && self.chr == '!' { 429 self.skipSingleLineComment() 430 continue 431 } 432 433 var err string 434 literal, parsedLiteral, _, err = self.scanIdentifier() 435 if err != "" || literal == "" { 436 tkn = token.ILLEGAL 437 break 438 } 439 self.insertSemicolon = true 440 tkn = token.PRIVATE_IDENTIFIER 441 return 442 default: 443 self.errorUnexpected(idx, chr) 444 tkn = token.ILLEGAL 445 } 446 } 447 self.insertSemicolon = insertSemicolon 448 return 449 } 450 } 451 452 func (self *_parser) switch2(tkn0, tkn1 token.Token) token.Token { 453 if self.chr == '=' { 454 self.read() 455 return tkn1 456 } 457 return tkn0 458 } 459 460 func (self *_parser) switch3(tkn0, tkn1 token.Token, chr2 rune, tkn2 token.Token) token.Token { 461 if self.chr == '=' { 462 self.read() 463 return tkn1 464 } 465 if self.chr == chr2 { 466 self.read() 467 return tkn2 468 } 469 return tkn0 470 } 471 472 func (self *_parser) switch4(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token) token.Token { 473 if self.chr == '=' { 474 self.read() 475 return tkn1 476 } 477 if self.chr == chr2 { 478 self.read() 479 if self.chr == '=' { 480 self.read() 481 return tkn3 482 } 483 return tkn2 484 } 485 return tkn0 486 } 487 488 func (self *_parser) switch6(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token, chr3 rune, tkn4, tkn5 token.Token) token.Token { 489 if self.chr == '=' { 490 self.read() 491 return tkn1 492 } 493 if self.chr == chr2 { 494 self.read() 495 if self.chr == '=' { 496 self.read() 497 return tkn3 498 } 499 if self.chr == chr3 { 500 self.read() 501 if self.chr == '=' { 502 self.read() 503 return tkn5 504 } 505 return tkn4 506 } 507 return tkn2 508 } 509 return tkn0 510 } 511 512 func (self *_parser) _peek() rune { 513 if self.offset < self.length { 514 return rune(self.str[self.offset]) 515 } 516 return -1 517 } 518 519 func (self *_parser) read() { 520 if self.offset < self.length { 521 self.chrOffset = self.offset 522 chr, width := rune(self.str[self.offset]), 1 523 if chr >= utf8.RuneSelf { // !ASCII 524 chr, width = utf8.DecodeRuneInString(self.str[self.offset:]) 525 if chr == utf8.RuneError && width == 1 { 526 self.error(self.chrOffset, "Invalid UTF-8 character") 527 } 528 } 529 self.offset += width 530 self.chr = chr 531 } else { 532 self.chrOffset = self.length 533 self.chr = -1 // EOF 534 } 535 } 536 537 func (self *_parser) skipSingleLineComment() { 538 for self.chr != -1 { 539 self.read() 540 if isLineTerminator(self.chr) { 541 return 542 } 543 } 544 } 545 546 func (self *_parser) skipMultiLineComment() (hasLineTerminator bool) { 547 self.read() 548 for self.chr >= 0 { 549 chr := self.chr 550 if chr == '\r' || chr == '\n' || chr == '\u2028' || chr == '\u2029' { 551 hasLineTerminator = true 552 break 553 } 554 self.read() 555 if chr == '*' && self.chr == '/' { 556 self.read() 557 return 558 } 559 } 560 for self.chr >= 0 { 561 chr := self.chr 562 self.read() 563 if chr == '*' && self.chr == '/' { 564 self.read() 565 return 566 } 567 } 568 569 self.errorUnexpected(0, self.chr) 570 return 571 } 572 573 func (self *_parser) skipWhiteSpace() { 574 for { 575 switch self.chr { 576 case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff': 577 self.read() 578 continue 579 case '\r': 580 if self._peek() == '\n' { 581 self.read() 582 } 583 fallthrough 584 case '\u2028', '\u2029', '\n': 585 if self.insertSemicolon { 586 return 587 } 588 self.read() 589 continue 590 } 591 if self.chr >= utf8.RuneSelf { 592 if unicode.IsSpace(self.chr) { 593 self.read() 594 continue 595 } 596 } 597 break 598 } 599 } 600 601 func (self *_parser) scanMantissa(base int) { 602 for digitValue(self.chr) < base { 603 self.read() 604 } 605 } 606 607 func (self *_parser) scanEscape(quote rune) (int, bool) { 608 609 var length, base uint32 610 chr := self.chr 611 switch chr { 612 case '0', '1', '2', '3', '4', '5', '6', '7': 613 // Octal: 614 length, base = 3, 8 615 case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'': 616 self.read() 617 return 1, false 618 case '\r': 619 self.read() 620 if self.chr == '\n' { 621 self.read() 622 return 2, false 623 } 624 return 1, false 625 case '\n': 626 self.read() 627 return 1, false 628 case '\u2028', '\u2029': 629 self.read() 630 return 1, true 631 case 'x': 632 self.read() 633 length, base = 2, 16 634 case 'u': 635 self.read() 636 if self.chr == '{' { 637 self.read() 638 length, base = 0, 16 639 } else { 640 length, base = 4, 16 641 } 642 default: 643 self.read() // Always make progress 644 } 645 646 if base > 0 { 647 var value uint32 648 if length > 0 { 649 for ; length > 0 && self.chr != quote && self.chr >= 0; length-- { 650 digit := uint32(digitValue(self.chr)) 651 if digit >= base { 652 break 653 } 654 value = value*base + digit 655 self.read() 656 } 657 } else { 658 for self.chr != quote && self.chr >= 0 && value < utf8.MaxRune { 659 if self.chr == '}' { 660 self.read() 661 break 662 } 663 digit := uint32(digitValue(self.chr)) 664 if digit >= base { 665 break 666 } 667 value = value*base + digit 668 self.read() 669 } 670 } 671 chr = rune(value) 672 } 673 if chr >= utf8.RuneSelf { 674 if chr > 0xFFFF { 675 return 2, true 676 } 677 return 1, true 678 } 679 return 1, false 680 } 681 682 func (self *_parser) scanString(offset int, parse bool) (literal string, parsed unistring.String, err string) { 683 // " ' / 684 quote := rune(self.str[offset]) 685 length := 0 686 isUnicode := false 687 for self.chr != quote { 688 chr := self.chr 689 if chr == '\n' || chr == '\r' || chr < 0 { 690 goto newline 691 } 692 if quote == '/' && (self.chr == '\u2028' || self.chr == '\u2029') { 693 goto newline 694 } 695 self.read() 696 if chr == '\\' { 697 if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 { 698 if quote == '/' { 699 goto newline 700 } 701 self.scanNewline() 702 } else { 703 l, u := self.scanEscape(quote) 704 length += l 705 if u { 706 isUnicode = true 707 } 708 } 709 continue 710 } else if chr == '[' && quote == '/' { 711 // Allow a slash (/) in a bracket character class ([...]) 712 // TODO Fix this, this is hacky... 713 quote = -1 714 } else if chr == ']' && quote == -1 { 715 quote = '/' 716 } 717 if chr >= utf8.RuneSelf { 718 isUnicode = true 719 if chr > 0xFFFF { 720 length++ 721 } 722 } 723 length++ 724 } 725 726 // " ' / 727 self.read() 728 literal = self.str[offset:self.chrOffset] 729 if parse { 730 // TODO strict 731 parsed, err = parseStringLiteral(literal[1:len(literal)-1], length, isUnicode, false) 732 } 733 return 734 735 newline: 736 self.scanNewline() 737 errStr := "String not terminated" 738 if quote == '/' { 739 errStr = "Invalid regular expression: missing /" 740 self.error(self.idxOf(offset), errStr) 741 } 742 return "", "", errStr 743 } 744 745 func (self *_parser) scanNewline() { 746 if self.chr == '\u2028' || self.chr == '\u2029' { 747 self.read() 748 return 749 } 750 if self.chr == '\r' { 751 self.read() 752 if self.chr != '\n' { 753 return 754 } 755 } 756 self.read() 757 } 758 759 func (self *_parser) parseTemplateCharacters() (literal string, parsed unistring.String, finished bool, parseErr, err string) { 760 offset := self.chrOffset 761 var end int 762 length := 0 763 isUnicode := false 764 hasCR := false 765 for { 766 chr := self.chr 767 if chr < 0 { 768 goto unterminated 769 } 770 self.read() 771 if chr == '`' { 772 finished = true 773 end = self.chrOffset - 1 774 break 775 } 776 if chr == '\\' { 777 if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 { 778 if self.chr == '\r' { 779 hasCR = true 780 } 781 self.scanNewline() 782 } else { 783 if self.chr == '8' || self.chr == '9' { 784 if parseErr == "" { 785 parseErr = "\\8 and \\9 are not allowed in template strings." 786 } 787 } 788 l, u := self.scanEscape('`') 789 length += l 790 if u { 791 isUnicode = true 792 } 793 } 794 continue 795 } 796 if chr == '$' && self.chr == '{' { 797 self.read() 798 end = self.chrOffset - 2 799 break 800 } 801 if chr >= utf8.RuneSelf { 802 isUnicode = true 803 if chr > 0xFFFF { 804 length++ 805 } 806 } else if chr == '\r' { 807 hasCR = true 808 if self.chr == '\n' { 809 length-- 810 } 811 } 812 length++ 813 } 814 literal = self.str[offset:end] 815 if hasCR { 816 literal = normaliseCRLF(literal) 817 } 818 if parseErr == "" { 819 parsed, parseErr = parseStringLiteral(literal, length, isUnicode, true) 820 } 821 self.insertSemicolon = true 822 return 823 unterminated: 824 err = err_UnexpectedEndOfInput 825 finished = true 826 return 827 } 828 829 func normaliseCRLF(s string) string { 830 var buf strings.Builder 831 buf.Grow(len(s)) 832 for i := 0; i < len(s); i++ { 833 if s[i] == '\r' { 834 buf.WriteByte('\n') 835 if i < len(s)-1 && s[i+1] == '\n' { 836 i++ 837 } 838 } else { 839 buf.WriteByte(s[i]) 840 } 841 } 842 return buf.String() 843 } 844 845 func hex2decimal(chr byte) (value rune, ok bool) { 846 { 847 chr := rune(chr) 848 switch { 849 case '0' <= chr && chr <= '9': 850 return chr - '0', true 851 case 'a' <= chr && chr <= 'f': 852 return chr - 'a' + 10, true 853 case 'A' <= chr && chr <= 'F': 854 return chr - 'A' + 10, true 855 } 856 return 857 } 858 } 859 860 func parseNumberLiteral(literal string) (value interface{}, err error) { 861 // TODO Is Uint okay? What about -MAX_UINT 862 value, err = strconv.ParseInt(literal, 0, 64) 863 if err == nil { 864 return 865 } 866 867 parseIntErr := err // Save this first error, just in case 868 869 value, err = strconv.ParseFloat(literal, 64) 870 if err == nil { 871 return 872 } else if err.(*strconv.NumError).Err == strconv.ErrRange { 873 // Infinity, etc. 874 return value, nil 875 } 876 877 err = parseIntErr 878 879 if err.(*strconv.NumError).Err == strconv.ErrRange { 880 if len(literal) > 2 && literal[0] == '0' && (literal[1] == 'X' || literal[1] == 'x') { 881 // Could just be a very large number (e.g. 0x8000000000000000) 882 var value float64 883 literal = literal[2:] 884 for _, chr := range literal { 885 digit := digitValue(chr) 886 if digit >= 16 { 887 goto error 888 } 889 value = value*16 + float64(digit) 890 } 891 return value, nil 892 } 893 } 894 895 error: 896 return nil, errors.New("Illegal numeric literal") 897 } 898 899 func parseStringLiteral(literal string, length int, unicode, strict bool) (unistring.String, string) { 900 var sb strings.Builder 901 var chars []uint16 902 if unicode { 903 chars = make([]uint16, 1, length+1) 904 chars[0] = unistring.BOM 905 } else { 906 sb.Grow(length) 907 } 908 str := literal 909 for len(str) > 0 { 910 switch chr := str[0]; { 911 // We do not explicitly handle the case of the quote 912 // value, which can be: " ' / 913 // This assumes we're already passed a partially well-formed literal 914 case chr >= utf8.RuneSelf: 915 chr, size := utf8.DecodeRuneInString(str) 916 if chr <= 0xFFFF { 917 chars = append(chars, uint16(chr)) 918 } else { 919 first, second := utf16.EncodeRune(chr) 920 chars = append(chars, uint16(first), uint16(second)) 921 } 922 str = str[size:] 923 continue 924 case chr != '\\': 925 if unicode { 926 chars = append(chars, uint16(chr)) 927 } else { 928 sb.WriteByte(chr) 929 } 930 str = str[1:] 931 continue 932 } 933 934 if len(str) <= 1 { 935 panic("len(str) <= 1") 936 } 937 chr := str[1] 938 var value rune 939 if chr >= utf8.RuneSelf { 940 str = str[1:] 941 var size int 942 value, size = utf8.DecodeRuneInString(str) 943 str = str[size:] // \ + <character> 944 if value == '\u2028' || value == '\u2029' { 945 continue 946 } 947 } else { 948 str = str[2:] // \<character> 949 switch chr { 950 case 'b': 951 value = '\b' 952 case 'f': 953 value = '\f' 954 case 'n': 955 value = '\n' 956 case 'r': 957 value = '\r' 958 case 't': 959 value = '\t' 960 case 'v': 961 value = '\v' 962 case 'x', 'u': 963 size := 0 964 switch chr { 965 case 'x': 966 size = 2 967 case 'u': 968 if str == "" || str[0] != '{' { 969 size = 4 970 } 971 } 972 if size > 0 { 973 if len(str) < size { 974 return "", fmt.Sprintf("invalid escape: \\%s: len(%q) != %d", string(chr), str, size) 975 } 976 for j := 0; j < size; j++ { 977 decimal, ok := hex2decimal(str[j]) 978 if !ok { 979 return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size]) 980 } 981 value = value<<4 | decimal 982 } 983 } else { 984 str = str[1:] 985 var val rune 986 value = -1 987 for ; size < len(str); size++ { 988 if str[size] == '}' { 989 if size == 0 { 990 return "", fmt.Sprintf("invalid escape: \\%s", string(chr)) 991 } 992 size++ 993 value = val 994 break 995 } 996 decimal, ok := hex2decimal(str[size]) 997 if !ok { 998 return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size+1]) 999 } 1000 val = val<<4 | decimal 1001 if val > utf8.MaxRune { 1002 return "", fmt.Sprintf("undefined Unicode code-point: %q", str[:size+1]) 1003 } 1004 } 1005 if value == -1 { 1006 return "", fmt.Sprintf("unterminated \\u{: %q", str) 1007 } 1008 } 1009 str = str[size:] 1010 if chr == 'x' { 1011 break 1012 } 1013 if value > utf8.MaxRune { 1014 panic("value > utf8.MaxRune") 1015 } 1016 case '0': 1017 if len(str) == 0 || '0' > str[0] || str[0] > '7' { 1018 value = 0 1019 break 1020 } 1021 fallthrough 1022 case '1', '2', '3', '4', '5', '6', '7': 1023 if strict { 1024 return "", "Octal escape sequences are not allowed in this context" 1025 } 1026 value = rune(chr) - '0' 1027 j := 0 1028 for ; j < 2; j++ { 1029 if len(str) < j+1 { 1030 break 1031 } 1032 chr := str[j] 1033 if '0' > chr || chr > '7' { 1034 break 1035 } 1036 decimal := rune(str[j]) - '0' 1037 value = (value << 3) | decimal 1038 } 1039 str = str[j:] 1040 case '\\': 1041 value = '\\' 1042 case '\'', '"': 1043 value = rune(chr) 1044 case '\r': 1045 if len(str) > 0 { 1046 if str[0] == '\n' { 1047 str = str[1:] 1048 } 1049 } 1050 fallthrough 1051 case '\n': 1052 continue 1053 default: 1054 value = rune(chr) 1055 } 1056 } 1057 if unicode { 1058 if value <= 0xFFFF { 1059 chars = append(chars, uint16(value)) 1060 } else { 1061 first, second := utf16.EncodeRune(value) 1062 chars = append(chars, uint16(first), uint16(second)) 1063 } 1064 } else { 1065 if value >= utf8.RuneSelf { 1066 return "", "Unexpected unicode character" 1067 } 1068 sb.WriteByte(byte(value)) 1069 } 1070 } 1071 1072 if unicode { 1073 if len(chars) != length+1 { 1074 panic(fmt.Errorf("unexpected unicode length while parsing '%s'", literal)) 1075 } 1076 return unistring.FromUtf16(chars), "" 1077 } 1078 if sb.Len() != length { 1079 panic(fmt.Errorf("unexpected length while parsing '%s'", literal)) 1080 } 1081 return unistring.String(sb.String()), "" 1082 } 1083 1084 func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) { 1085 1086 offset := self.chrOffset 1087 tkn := token.NUMBER 1088 1089 if decimalPoint { 1090 offset-- 1091 self.scanMantissa(10) 1092 } else { 1093 if self.chr == '0' { 1094 self.read() 1095 base := 0 1096 switch self.chr { 1097 case 'x', 'X': 1098 base = 16 1099 case 'o', 'O': 1100 base = 8 1101 case 'b', 'B': 1102 base = 2 1103 case '.', 'e', 'E': 1104 // no-op 1105 default: 1106 // legacy octal 1107 self.scanMantissa(8) 1108 goto end 1109 } 1110 if base > 0 { 1111 self.read() 1112 if !isDigit(self.chr, base) { 1113 return token.ILLEGAL, self.str[offset:self.chrOffset] 1114 } 1115 self.scanMantissa(base) 1116 goto end 1117 } 1118 } else { 1119 self.scanMantissa(10) 1120 } 1121 if self.chr == '.' { 1122 self.read() 1123 self.scanMantissa(10) 1124 } 1125 } 1126 1127 if self.chr == 'e' || self.chr == 'E' { 1128 self.read() 1129 if self.chr == '-' || self.chr == '+' { 1130 self.read() 1131 } 1132 if isDecimalDigit(self.chr) { 1133 self.read() 1134 self.scanMantissa(10) 1135 } else { 1136 return token.ILLEGAL, self.str[offset:self.chrOffset] 1137 } 1138 } 1139 end: 1140 if isIdentifierStart(self.chr) || isDecimalDigit(self.chr) { 1141 return token.ILLEGAL, self.str[offset:self.chrOffset] 1142 } 1143 1144 return tkn, self.str[offset:self.chrOffset] 1145 }