github.com/evanw/esbuild@v0.21.4/internal/css_lexer/css_lexer.go (about) 1 package css_lexer 2 3 import ( 4 "strings" 5 "unicode/utf8" 6 7 "github.com/evanw/esbuild/internal/logger" 8 ) 9 10 // The lexer converts a source file to a stream of tokens. Unlike esbuild's 11 // JavaScript lexer, this CSS lexer runs to completion before the CSS parser 12 // begins, resulting in a single array of all tokens in the file. 13 14 type T uint8 15 16 const eof = -1 17 18 const ( 19 TEndOfFile T = iota 20 21 TAtKeyword 22 TUnterminatedString 23 TBadURL 24 TCDC // "-->" 25 TCDO // "<!--" 26 TCloseBrace 27 TCloseBracket 28 TCloseParen 29 TColon 30 TComma 31 TDelim 32 TDelimAmpersand 33 TDelimAsterisk 34 TDelimBar 35 TDelimCaret 36 TDelimDollar 37 TDelimDot 38 TDelimEquals 39 TDelimExclamation 40 TDelimGreaterThan 41 TDelimMinus 42 TDelimPlus 43 TDelimSlash 44 TDelimTilde 45 TDimension 46 TFunction 47 THash 48 TIdent 49 TNumber 50 TOpenBrace 51 TOpenBracket 52 TOpenParen 53 TPercentage 54 TSemicolon 55 TString 56 TURL 57 TWhitespace 58 59 // This is never something that the lexer generates directly. Instead this is 60 // an esbuild-specific token for global/local names that "TIdent" tokens may 61 // be changed into. 62 TSymbol 63 ) 64 65 var tokenToString = []string{ 66 "end of file", 67 "@-keyword", 68 "bad string token", 69 "bad URL token", 70 "\"-->\"", 71 "\"<!--\"", 72 "\"}\"", 73 "\"]\"", 74 "\")\"", 75 "\":\"", 76 "\",\"", 77 "delimiter", 78 "\"&\"", 79 "\"*\"", 80 "\"|\"", 81 "\"^\"", 82 "\"$\"", 83 "\".\"", 84 "\"=\"", 85 "\"!\"", 86 "\">\"", 87 "\"-\"", 88 "\"+\"", 89 "\"/\"", 90 "\"~\"", 91 "dimension", 92 "function token", 93 "hash token", 94 "identifier", 95 "number", 96 "\"{\"", 97 "\"[\"", 98 "\"(\"", 99 "percentage", 100 "\";\"", 101 "string token", 102 "URL token", 103 "whitespace", 104 105 "identifier", 106 } 107 108 func (t T) String() string { 109 return tokenToString[t] 110 } 111 112 func (t T) IsNumeric() bool { 113 return t == TNumber || t == TPercentage || t == TDimension 114 } 115 116 type TokenFlags uint8 117 118 const ( 119 IsID TokenFlags = 1 << iota 120 DidWarnAboutSingleLineComment 121 ) 122 123 // This token struct is designed to be memory-efficient. It just references a 124 // range in the input file instead of directly containing the substring of text 125 // since a range takes up less memory than a string. 126 type Token struct { 127 Range logger.Range // 8 bytes 128 UnitOffset uint16 // 2 bytes 129 Kind T // 1 byte 130 Flags TokenFlags // 1 byte 131 } 132 133 func (token Token) DecodedText(contents string) string { 134 raw := contents[token.Range.Loc.Start:token.Range.End()] 135 136 switch token.Kind { 137 case TIdent, TDimension: 138 return decodeEscapesInToken(raw) 139 140 case TAtKeyword, THash: 141 return decodeEscapesInToken(raw[1:]) 142 143 case TFunction: 144 return decodeEscapesInToken(raw[:len(raw)-1]) 145 146 case TString: 147 return decodeEscapesInToken(raw[1 : len(raw)-1]) 148 149 case TURL: 150 start := 4 151 end := len(raw) 152 153 // Note: URL tokens with syntax errors may not have a trailing ")" 154 if raw[end-1] == ')' { 155 end-- 156 } 157 158 // Trim leading and trailing whitespace 159 for start < end && isWhitespace(rune(raw[start])) { 160 start++ 161 } 162 for start < end && isWhitespace(rune(raw[end-1])) { 163 end-- 164 } 165 166 return decodeEscapesInToken(raw[start:end]) 167 } 168 169 return raw 170 } 171 172 type lexer struct { 173 Options 174 log logger.Log 175 source logger.Source 176 allComments []logger.Range 177 legalCommentsBefore []Comment 178 sourceMappingURL logger.Span 179 tracker logger.LineColumnTracker 180 approximateNewlineCount int 181 current int 182 oldSingleLineCommentEnd logger.Loc 183 codePoint rune 184 Token Token 185 } 186 187 type Comment struct { 188 Text string 189 Loc logger.Loc 190 TokenIndexAfter uint32 191 } 192 193 type TokenizeResult struct { 194 Tokens []Token 195 AllComments []logger.Range 196 LegalComments []Comment 197 SourceMapComment logger.Span 198 ApproximateLineCount int32 199 } 200 201 type Options struct { 202 RecordAllComments bool 203 } 204 205 func Tokenize(log logger.Log, source logger.Source, options Options) TokenizeResult { 206 lexer := lexer{ 207 Options: options, 208 log: log, 209 source: source, 210 tracker: logger.MakeLineColumnTracker(&source), 211 } 212 lexer.step() 213 214 // The U+FEFF character is usually a zero-width non-breaking space. However, 215 // when it's used at the start of a text stream it is called a BOM (byte order 216 // mark) instead and indicates that the text stream is UTF-8 encoded. This is 217 // problematic for us because CSS does not treat U+FEFF as whitespace. Only 218 // " \t\r\n\f" characters are treated as whitespace. Skip over the BOM if it 219 // is present so it doesn't cause us trouble when we try to parse it. 220 if lexer.codePoint == '\uFEFF' { 221 lexer.step() 222 } 223 224 lexer.next() 225 var tokens []Token 226 var legalComments []Comment 227 for lexer.Token.Kind != TEndOfFile { 228 if lexer.legalCommentsBefore != nil { 229 for _, comment := range lexer.legalCommentsBefore { 230 comment.TokenIndexAfter = uint32(len(tokens)) 231 legalComments = append(legalComments, comment) 232 } 233 lexer.legalCommentsBefore = nil 234 } 235 tokens = append(tokens, lexer.Token) 236 lexer.next() 237 } 238 if lexer.legalCommentsBefore != nil { 239 for _, comment := range lexer.legalCommentsBefore { 240 comment.TokenIndexAfter = uint32(len(tokens)) 241 legalComments = append(legalComments, comment) 242 } 243 lexer.legalCommentsBefore = nil 244 } 245 return TokenizeResult{ 246 Tokens: tokens, 247 AllComments: lexer.allComments, 248 LegalComments: legalComments, 249 ApproximateLineCount: int32(lexer.approximateNewlineCount) + 1, 250 SourceMapComment: lexer.sourceMappingURL, 251 } 252 } 253 254 func (lexer *lexer) step() { 255 codePoint, width := utf8.DecodeRuneInString(lexer.source.Contents[lexer.current:]) 256 257 // Use -1 to indicate the end of the file 258 if width == 0 { 259 codePoint = eof 260 } 261 262 // Track the approximate number of newlines in the file so we can preallocate 263 // the line offset table in the printer for source maps. The line offset table 264 // is the #1 highest allocation in the heap profile, so this is worth doing. 265 // This count is approximate because it handles "\n" and "\r\n" (the common 266 // cases) but not "\r" or "\u2028" or "\u2029". Getting this wrong is harmless 267 // because it's only a preallocation. The array will just grow if it's too small. 268 if codePoint == '\n' { 269 lexer.approximateNewlineCount++ 270 } 271 272 lexer.codePoint = codePoint 273 lexer.Token.Range.Len = int32(lexer.current) - lexer.Token.Range.Loc.Start 274 lexer.current += width 275 } 276 277 func (lexer *lexer) next() { 278 // Reference: https://www.w3.org/TR/css-syntax-3/ 279 280 for { 281 lexer.Token = Token{Range: logger.Range{Loc: logger.Loc{Start: lexer.Token.Range.End()}}} 282 283 switch lexer.codePoint { 284 case eof: 285 lexer.Token.Kind = TEndOfFile 286 287 case '/': 288 lexer.step() 289 switch lexer.codePoint { 290 case '*': 291 lexer.step() 292 lexer.consumeToEndOfMultiLineComment(lexer.Token.Range) 293 continue 294 case '/': 295 // Warn when people use "//" comments, which are invalid in CSS 296 loc := lexer.Token.Range.Loc 297 if loc.Start >= lexer.oldSingleLineCommentEnd.Start { 298 contents := lexer.source.Contents 299 end := lexer.current 300 for end < len(contents) && !isNewline(rune(contents[end])) { 301 end++ 302 } 303 lexer.log.AddID(logger.MsgID_CSS_JSCommentInCSS, logger.Warning, &lexer.tracker, logger.Range{Loc: loc, Len: 2}, 304 "Comments in CSS use \"/* ... */\" instead of \"//\"") 305 lexer.oldSingleLineCommentEnd.Start = int32(end) 306 lexer.Token.Flags |= DidWarnAboutSingleLineComment 307 } 308 } 309 lexer.Token.Kind = TDelimSlash 310 311 case ' ', '\t', '\n', '\r', '\f': 312 lexer.step() 313 for { 314 if isWhitespace(lexer.codePoint) { 315 lexer.step() 316 } else if lexer.codePoint == '/' && lexer.current < len(lexer.source.Contents) && lexer.source.Contents[lexer.current] == '*' { 317 startRange := logger.Range{Loc: logger.Loc{Start: lexer.Token.Range.End()}, Len: 2} 318 lexer.step() 319 lexer.step() 320 lexer.consumeToEndOfMultiLineComment(startRange) 321 } else { 322 break 323 } 324 } 325 lexer.Token.Kind = TWhitespace 326 327 case '"', '\'': 328 lexer.Token.Kind = lexer.consumeString() 329 330 case '#': 331 lexer.step() 332 if IsNameContinue(lexer.codePoint) || lexer.isValidEscape() { 333 lexer.Token.Kind = THash 334 if lexer.wouldStartIdentifier() { 335 lexer.Token.Flags |= IsID 336 } 337 lexer.consumeName() 338 } else { 339 lexer.Token.Kind = TDelim 340 } 341 342 case '(': 343 lexer.step() 344 lexer.Token.Kind = TOpenParen 345 346 case ')': 347 lexer.step() 348 lexer.Token.Kind = TCloseParen 349 350 case '[': 351 lexer.step() 352 lexer.Token.Kind = TOpenBracket 353 354 case ']': 355 lexer.step() 356 lexer.Token.Kind = TCloseBracket 357 358 case '{': 359 lexer.step() 360 lexer.Token.Kind = TOpenBrace 361 362 case '}': 363 lexer.step() 364 lexer.Token.Kind = TCloseBrace 365 366 case ',': 367 lexer.step() 368 lexer.Token.Kind = TComma 369 370 case ':': 371 lexer.step() 372 lexer.Token.Kind = TColon 373 374 case ';': 375 lexer.step() 376 lexer.Token.Kind = TSemicolon 377 378 case '+': 379 if lexer.wouldStartNumber() { 380 lexer.Token.Kind = lexer.consumeNumeric() 381 } else { 382 lexer.step() 383 lexer.Token.Kind = TDelimPlus 384 } 385 386 case '.': 387 if lexer.wouldStartNumber() { 388 lexer.Token.Kind = lexer.consumeNumeric() 389 } else { 390 lexer.step() 391 lexer.Token.Kind = TDelimDot 392 } 393 394 case '-': 395 if lexer.wouldStartNumber() { 396 lexer.Token.Kind = lexer.consumeNumeric() 397 } else if lexer.current+2 <= len(lexer.source.Contents) && lexer.source.Contents[lexer.current:lexer.current+2] == "->" { 398 lexer.step() 399 lexer.step() 400 lexer.step() 401 lexer.Token.Kind = TCDC 402 } else if lexer.wouldStartIdentifier() { 403 lexer.Token.Kind = lexer.consumeIdentLike() 404 } else { 405 lexer.step() 406 lexer.Token.Kind = TDelimMinus 407 } 408 409 case '<': 410 if lexer.current+3 <= len(lexer.source.Contents) && lexer.source.Contents[lexer.current:lexer.current+3] == "!--" { 411 lexer.step() 412 lexer.step() 413 lexer.step() 414 lexer.step() 415 lexer.Token.Kind = TCDO 416 } else { 417 lexer.step() 418 lexer.Token.Kind = TDelim 419 } 420 421 case '@': 422 lexer.step() 423 if lexer.wouldStartIdentifier() { 424 lexer.consumeName() 425 lexer.Token.Kind = TAtKeyword 426 } else { 427 lexer.Token.Kind = TDelim 428 } 429 430 case '\\': 431 if lexer.isValidEscape() { 432 lexer.Token.Kind = lexer.consumeIdentLike() 433 } else { 434 lexer.step() 435 lexer.log.AddError(&lexer.tracker, lexer.Token.Range, "Invalid escape") 436 lexer.Token.Kind = TDelim 437 } 438 439 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 440 lexer.Token.Kind = lexer.consumeNumeric() 441 442 case '>': 443 lexer.step() 444 lexer.Token.Kind = TDelimGreaterThan 445 446 case '~': 447 lexer.step() 448 lexer.Token.Kind = TDelimTilde 449 450 case '&': 451 lexer.step() 452 lexer.Token.Kind = TDelimAmpersand 453 454 case '*': 455 lexer.step() 456 lexer.Token.Kind = TDelimAsterisk 457 458 case '|': 459 lexer.step() 460 lexer.Token.Kind = TDelimBar 461 462 case '!': 463 lexer.step() 464 lexer.Token.Kind = TDelimExclamation 465 466 case '=': 467 lexer.step() 468 lexer.Token.Kind = TDelimEquals 469 470 case '^': 471 lexer.step() 472 lexer.Token.Kind = TDelimCaret 473 474 case '$': 475 lexer.step() 476 lexer.Token.Kind = TDelimDollar 477 478 default: 479 if IsNameStart(lexer.codePoint) { 480 lexer.Token.Kind = lexer.consumeIdentLike() 481 } else { 482 lexer.step() 483 lexer.Token.Kind = TDelim 484 } 485 } 486 487 return 488 } 489 } 490 491 func (lexer *lexer) consumeToEndOfMultiLineComment(startRange logger.Range) { 492 startOfSourceMappingURL := 0 493 isLegalComment := false 494 495 switch lexer.codePoint { 496 case '#', '@': 497 // Keep track of the contents of the "sourceMappingURL=" comment 498 if strings.HasPrefix(lexer.source.Contents[lexer.current:], " sourceMappingURL=") { 499 startOfSourceMappingURL = lexer.current + len(" sourceMappingURL=") 500 } 501 502 case '!': 503 // Remember if this is a legal comment 504 isLegalComment = true 505 } 506 507 for { 508 switch lexer.codePoint { 509 case '*': 510 endOfSourceMappingURL := lexer.current - 1 511 lexer.step() 512 if lexer.codePoint == '/' { 513 commentEnd := lexer.current 514 lexer.step() 515 516 // Record the source mapping URL 517 if startOfSourceMappingURL != 0 { 518 r := logger.Range{Loc: logger.Loc{Start: int32(startOfSourceMappingURL)}} 519 text := lexer.source.Contents[startOfSourceMappingURL:endOfSourceMappingURL] 520 for int(r.Len) < len(text) && !isWhitespace(rune(text[r.Len])) { 521 r.Len++ 522 } 523 lexer.sourceMappingURL = logger.Span{Text: text[:r.Len], Range: r} 524 } 525 526 // Record all comments 527 commentRange := logger.Range{Loc: startRange.Loc, Len: int32(commentEnd) - startRange.Loc.Start} 528 if lexer.RecordAllComments { 529 lexer.allComments = append(lexer.allComments, commentRange) 530 } 531 532 // Record legal comments 533 if text := lexer.source.Contents[startRange.Loc.Start:commentEnd]; isLegalComment || containsAtPreserveOrAtLicense(text) { 534 text = lexer.source.CommentTextWithoutIndent(commentRange) 535 lexer.legalCommentsBefore = append(lexer.legalCommentsBefore, Comment{Loc: startRange.Loc, Text: text}) 536 } 537 return 538 } 539 540 case eof: // This indicates the end of the file 541 lexer.log.AddErrorWithNotes(&lexer.tracker, logger.Range{Loc: logger.Loc{Start: lexer.Token.Range.End()}}, 542 "Expected \"*/\" to terminate multi-line comment", 543 []logger.MsgData{lexer.tracker.MsgData(startRange, "The multi-line comment starts here:")}) 544 return 545 546 default: 547 lexer.step() 548 } 549 } 550 } 551 552 func containsAtPreserveOrAtLicense(text string) bool { 553 for i, c := range text { 554 if c == '@' && (strings.HasPrefix(text[i+1:], "preserve") || strings.HasPrefix(text[i+1:], "license")) { 555 return true 556 } 557 } 558 return false 559 } 560 561 func (lexer *lexer) isValidEscape() bool { 562 if lexer.codePoint != '\\' { 563 return false 564 } 565 c, _ := utf8.DecodeRuneInString(lexer.source.Contents[lexer.current:]) 566 return !isNewline(c) 567 } 568 569 func (lexer *lexer) wouldStartIdentifier() bool { 570 if IsNameStart(lexer.codePoint) { 571 return true 572 } 573 574 if lexer.codePoint == '-' { 575 c, width := utf8.DecodeRuneInString(lexer.source.Contents[lexer.current:]) 576 if c == utf8.RuneError && width <= 1 { 577 return false // Decoding error 578 } 579 if IsNameStart(c) || c == '-' { 580 return true 581 } 582 if c == '\\' { 583 c2, _ := utf8.DecodeRuneInString(lexer.source.Contents[lexer.current+width:]) 584 return !isNewline(c2) 585 } 586 return false 587 } 588 589 return lexer.isValidEscape() 590 } 591 592 func WouldStartIdentifierWithoutEscapes(text string) bool { 593 c, width := utf8.DecodeRuneInString(text) 594 if c == utf8.RuneError && width <= 1 { 595 return false // Decoding error 596 } 597 if IsNameStart(c) { 598 return true 599 } 600 601 if c == '-' { 602 c2, width2 := utf8.DecodeRuneInString(text[width:]) 603 if c2 == utf8.RuneError && width2 <= 1 { 604 return false // Decoding error 605 } 606 if IsNameStart(c2) || c2 == '-' { 607 return true 608 } 609 } 610 return false 611 } 612 613 func RangeOfIdentifier(source logger.Source, loc logger.Loc) logger.Range { 614 text := source.Contents[loc.Start:] 615 if len(text) == 0 { 616 return logger.Range{Loc: loc, Len: 0} 617 } 618 619 i := 0 620 n := len(text) 621 622 for { 623 c, width := utf8.DecodeRuneInString(text[i:]) 624 if IsNameContinue(c) { 625 i += width 626 continue 627 } 628 629 // Handle an escape 630 if c == '\\' && i+1 < n && !isNewline(rune(text[i+1])) { 631 i += width // Skip the backslash 632 c, width = utf8.DecodeRuneInString(text[i:]) 633 if _, ok := isHex(c); ok { 634 i += width 635 c, width = utf8.DecodeRuneInString(text[i:]) 636 for j := 0; j < 5; j++ { 637 if _, ok := isHex(c); !ok { 638 break 639 } 640 i += width 641 c, width = utf8.DecodeRuneInString(text[i:]) 642 } 643 if isWhitespace(c) { 644 i += width 645 } 646 } 647 continue 648 } 649 650 break 651 } 652 653 // Don't end with a whitespace 654 if i > 0 && isWhitespace(rune(text[i-1])) { 655 i-- 656 } 657 658 return logger.Range{Loc: loc, Len: int32(i)} 659 } 660 661 func (lexer *lexer) wouldStartNumber() bool { 662 if lexer.codePoint >= '0' && lexer.codePoint <= '9' { 663 return true 664 } else if lexer.codePoint == '.' { 665 contents := lexer.source.Contents 666 if lexer.current < len(contents) { 667 c := contents[lexer.current] 668 return c >= '0' && c <= '9' 669 } 670 } else if lexer.codePoint == '+' || lexer.codePoint == '-' { 671 contents := lexer.source.Contents 672 n := len(contents) 673 if lexer.current < n { 674 c := contents[lexer.current] 675 if c >= '0' && c <= '9' { 676 return true 677 } 678 if c == '.' && lexer.current+1 < n { 679 c = contents[lexer.current+1] 680 return c >= '0' && c <= '9' 681 } 682 } 683 } 684 return false 685 } 686 687 // Note: This function is hot in profiles 688 func (lexer *lexer) consumeName() string { 689 // Common case: no escapes, identifier is a substring of the input. Doing this 690 // in a tight loop that avoids UTF-8 decoding and that increments a single 691 // number instead of doing "step()" is noticeably faster. For example, doing 692 // this sped up end-to-end parsing and printing of a large CSS file from 97ms 693 // to 84ms (around 15% faster). 694 contents := lexer.source.Contents 695 if IsNameContinue(lexer.codePoint) { 696 n := len(contents) 697 i := lexer.current 698 for i < n && IsNameContinue(rune(contents[i])) { 699 i++ 700 } 701 lexer.current = i 702 lexer.step() 703 } 704 raw := contents[lexer.Token.Range.Loc.Start:lexer.Token.Range.End()] 705 if !lexer.isValidEscape() { 706 return raw 707 } 708 709 // Uncommon case: escapes, identifier is allocated 710 sb := strings.Builder{} 711 sb.WriteString(raw) 712 sb.WriteRune(lexer.consumeEscape()) 713 for { 714 if IsNameContinue(lexer.codePoint) { 715 sb.WriteRune(lexer.codePoint) 716 lexer.step() 717 } else if lexer.isValidEscape() { 718 sb.WriteRune(lexer.consumeEscape()) 719 } else { 720 break 721 } 722 } 723 return sb.String() 724 } 725 726 func (lexer *lexer) consumeEscape() rune { 727 lexer.step() // Skip the backslash 728 c := lexer.codePoint 729 730 if hex, ok := isHex(c); ok { 731 lexer.step() 732 for i := 0; i < 5; i++ { 733 if next, ok := isHex(lexer.codePoint); ok { 734 lexer.step() 735 hex = hex*16 + next 736 } else { 737 break 738 } 739 } 740 if isWhitespace(lexer.codePoint) { 741 lexer.step() 742 } 743 if hex == 0 || (hex >= 0xD800 && hex <= 0xDFFF) || hex > 0x10FFFF { 744 return utf8.RuneError 745 } 746 return rune(hex) 747 } 748 749 if c == eof { 750 return utf8.RuneError 751 } 752 753 lexer.step() 754 return c 755 } 756 757 func (lexer *lexer) consumeIdentLike() T { 758 name := lexer.consumeName() 759 760 if lexer.codePoint == '(' { 761 matchingLoc := logger.Loc{Start: lexer.Token.Range.End()} 762 lexer.step() 763 if len(name) == 3 { 764 u, r, l := name[0], name[1], name[2] 765 if (u == 'u' || u == 'U') && (r == 'r' || r == 'R') && (l == 'l' || l == 'L') { 766 // Save state 767 approximateNewlineCount := lexer.approximateNewlineCount 768 codePoint := lexer.codePoint 769 tokenRangeLen := lexer.Token.Range.Len 770 current := lexer.current 771 772 // Check to see if this is a URL token instead of a function 773 for isWhitespace(lexer.codePoint) { 774 lexer.step() 775 } 776 if lexer.codePoint != '"' && lexer.codePoint != '\'' { 777 return lexer.consumeURL(matchingLoc) 778 } 779 780 // Restore state (i.e. backtrack) 781 lexer.approximateNewlineCount = approximateNewlineCount 782 lexer.codePoint = codePoint 783 lexer.Token.Range.Len = tokenRangeLen 784 lexer.current = current 785 } 786 } 787 return TFunction 788 } 789 790 return TIdent 791 } 792 793 func (lexer *lexer) consumeURL(matchingLoc logger.Loc) T { 794 validURL: 795 for { 796 switch lexer.codePoint { 797 case ')': 798 lexer.step() 799 return TURL 800 801 case eof: 802 loc := logger.Loc{Start: lexer.Token.Range.End()} 803 lexer.log.AddIDWithNotes(logger.MsgID_CSS_CSSSyntaxError, logger.Warning, &lexer.tracker, logger.Range{Loc: loc}, "Expected \")\" to end URL token", 804 []logger.MsgData{lexer.tracker.MsgData(logger.Range{Loc: matchingLoc, Len: 1}, "The unbalanced \"(\" is here:")}) 805 return TURL 806 807 case ' ', '\t', '\n', '\r', '\f': 808 lexer.step() 809 for isWhitespace(lexer.codePoint) { 810 lexer.step() 811 } 812 if lexer.codePoint != ')' { 813 loc := logger.Loc{Start: lexer.Token.Range.End()} 814 lexer.log.AddIDWithNotes(logger.MsgID_CSS_CSSSyntaxError, logger.Warning, &lexer.tracker, logger.Range{Loc: loc}, "Expected \")\" to end URL token", 815 []logger.MsgData{lexer.tracker.MsgData(logger.Range{Loc: matchingLoc, Len: 1}, "The unbalanced \"(\" is here:")}) 816 if lexer.codePoint == eof { 817 return TURL 818 } 819 break validURL 820 } 821 lexer.step() 822 return TURL 823 824 case '"', '\'', '(': 825 r := logger.Range{Loc: logger.Loc{Start: lexer.Token.Range.End()}, Len: 1} 826 lexer.log.AddIDWithNotes(logger.MsgID_CSS_CSSSyntaxError, logger.Warning, &lexer.tracker, r, "Expected \")\" to end URL token", 827 []logger.MsgData{lexer.tracker.MsgData(logger.Range{Loc: matchingLoc, Len: 1}, "The unbalanced \"(\" is here:")}) 828 break validURL 829 830 case '\\': 831 if !lexer.isValidEscape() { 832 r := logger.Range{Loc: logger.Loc{Start: lexer.Token.Range.End()}, Len: 1} 833 lexer.log.AddID(logger.MsgID_CSS_CSSSyntaxError, logger.Warning, &lexer.tracker, r, "Invalid escape") 834 break validURL 835 } 836 lexer.consumeEscape() 837 838 default: 839 if isNonPrintable(lexer.codePoint) { 840 r := logger.Range{Loc: logger.Loc{Start: lexer.Token.Range.End()}, Len: 1} 841 lexer.log.AddID(logger.MsgID_CSS_CSSSyntaxError, logger.Warning, &lexer.tracker, r, "Unexpected non-printable character in URL token") 842 break validURL 843 } 844 lexer.step() 845 } 846 } 847 848 // Consume the remnants of a bad url 849 for { 850 switch lexer.codePoint { 851 case ')', eof: 852 lexer.step() 853 return TBadURL 854 855 case '\\': 856 if lexer.isValidEscape() { 857 lexer.consumeEscape() 858 } 859 } 860 lexer.step() 861 } 862 } 863 864 func (lexer *lexer) consumeString() T { 865 quote := lexer.codePoint 866 lexer.step() 867 868 for { 869 switch lexer.codePoint { 870 case '\\': 871 lexer.step() 872 873 // Handle Windows CRLF 874 if lexer.codePoint == '\r' { 875 lexer.step() 876 if lexer.codePoint == '\n' { 877 lexer.step() 878 } 879 continue 880 } 881 882 // Otherwise, fall through to ignore the character after the backslash 883 884 case eof, '\n', '\r', '\f': 885 lexer.log.AddID(logger.MsgID_CSS_CSSSyntaxError, logger.Warning, &lexer.tracker, 886 logger.Range{Loc: logger.Loc{Start: lexer.Token.Range.End()}}, 887 "Unterminated string token") 888 return TUnterminatedString 889 890 case quote: 891 lexer.step() 892 return TString 893 } 894 lexer.step() 895 } 896 } 897 898 func (lexer *lexer) consumeNumeric() T { 899 // Skip over leading sign 900 if lexer.codePoint == '+' || lexer.codePoint == '-' { 901 lexer.step() 902 } 903 904 // Skip over leading digits 905 for lexer.codePoint >= '0' && lexer.codePoint <= '9' { 906 lexer.step() 907 } 908 909 // Skip over digits after dot 910 if lexer.codePoint == '.' { 911 lexer.step() 912 for lexer.codePoint >= '0' && lexer.codePoint <= '9' { 913 lexer.step() 914 } 915 } 916 917 // Skip over exponent 918 if lexer.codePoint == 'e' || lexer.codePoint == 'E' { 919 contents := lexer.source.Contents 920 921 // Look ahead before advancing to make sure this is an exponent, not a unit 922 if lexer.current < len(contents) { 923 c := contents[lexer.current] 924 if (c == '+' || c == '-') && lexer.current+1 < len(contents) { 925 c = contents[lexer.current+1] 926 } 927 928 // Only consume this if it's an exponent 929 if c >= '0' && c <= '9' { 930 lexer.step() 931 if lexer.codePoint == '+' || lexer.codePoint == '-' { 932 lexer.step() 933 } 934 for lexer.codePoint >= '0' && lexer.codePoint <= '9' { 935 lexer.step() 936 } 937 } 938 } 939 } 940 941 // Determine the numeric type 942 if lexer.wouldStartIdentifier() { 943 lexer.Token.UnitOffset = uint16(lexer.Token.Range.Len) 944 lexer.consumeName() 945 return TDimension 946 } 947 if lexer.codePoint == '%' { 948 lexer.step() 949 return TPercentage 950 } 951 return TNumber 952 } 953 954 func IsNameStart(c rune) bool { 955 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c >= 0x80 || c == '\x00' 956 } 957 958 func IsNameContinue(c rune) bool { 959 return IsNameStart(c) || (c >= '0' && c <= '9') || c == '-' 960 } 961 962 func isNewline(c rune) bool { 963 switch c { 964 case '\n', '\r', '\f': 965 return true 966 } 967 return false 968 } 969 970 func isWhitespace(c rune) bool { 971 switch c { 972 case ' ', '\t', '\n', '\r', '\f': 973 return true 974 } 975 return false 976 } 977 978 func isHex(c rune) (int, bool) { 979 if c >= '0' && c <= '9' { 980 return int(c - '0'), true 981 } 982 if c >= 'a' && c <= 'f' { 983 return int(c + (10 - 'a')), true 984 } 985 if c >= 'A' && c <= 'F' { 986 return int(c + (10 - 'A')), true 987 } 988 return 0, false 989 } 990 991 func isNonPrintable(c rune) bool { 992 return c <= 0x08 || c == 0x0B || (c >= 0x0E && c <= 0x1F) || c == 0x7F 993 } 994 995 func decodeEscapesInToken(inner string) string { 996 i := 0 997 998 for i < len(inner) { 999 if c := inner[i]; c == '\\' || c == '\x00' { 1000 break 1001 } 1002 i++ 1003 } 1004 1005 if i == len(inner) { 1006 return inner 1007 } 1008 1009 sb := strings.Builder{} 1010 sb.WriteString(inner[:i]) 1011 inner = inner[i:] 1012 1013 for len(inner) > 0 { 1014 c, width := utf8.DecodeRuneInString(inner) 1015 inner = inner[width:] 1016 1017 if c != '\\' { 1018 if c == '\x00' { 1019 c = utf8.RuneError 1020 } 1021 sb.WriteRune(c) 1022 continue 1023 } 1024 1025 if len(inner) == 0 { 1026 sb.WriteRune(utf8.RuneError) 1027 continue 1028 } 1029 1030 c, width = utf8.DecodeRuneInString(inner) 1031 inner = inner[width:] 1032 hex, ok := isHex(c) 1033 1034 if !ok { 1035 if c == '\n' || c == '\f' { 1036 continue 1037 } 1038 1039 // Handle Windows CRLF 1040 if c == '\r' { 1041 c, width = utf8.DecodeRuneInString(inner) 1042 if c == '\n' { 1043 inner = inner[width:] 1044 } 1045 continue 1046 } 1047 1048 // If we get here, this is not a valid escape. However, this is still 1049 // allowed. In this case the backslash is just ignored. 1050 sb.WriteRune(c) 1051 continue 1052 } 1053 1054 // Parse up to five additional hex characters (so six in total) 1055 for i := 0; i < 5 && len(inner) > 0; i++ { 1056 c, width = utf8.DecodeRuneInString(inner) 1057 if next, ok := isHex(c); ok { 1058 inner = inner[width:] 1059 hex = hex*16 + next 1060 } else { 1061 break 1062 } 1063 } 1064 1065 if len(inner) > 0 { 1066 c, width = utf8.DecodeRuneInString(inner) 1067 if isWhitespace(c) { 1068 inner = inner[width:] 1069 } 1070 } 1071 1072 if hex == 0 || (hex >= 0xD800 && hex <= 0xDFFF) || hex > 0x10FFFF { 1073 sb.WriteRune(utf8.RuneError) 1074 continue 1075 } 1076 1077 sb.WriteRune(rune(hex)) 1078 } 1079 1080 return sb.String() 1081 }