github.com/anakojm/hugo-katex@v0.0.0-20231023141351-42d6f5de9c0b/tpl/internal/go_templates/htmltemplate/transition.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "strings" 10 ) 11 12 // transitionFunc is the array of context transition functions for text nodes. 13 // A transition function takes a context and template text input, and returns 14 // the updated context and the number of bytes consumed from the front of the 15 // input. 16 var transitionFunc = [...]func(context, []byte) (context, int){ 17 stateText: tText, 18 stateTag: tTag, 19 stateAttrName: tAttrName, 20 stateAfterName: tAfterName, 21 stateBeforeValue: tBeforeValue, 22 stateHTMLCmt: tHTMLCmt, 23 stateRCDATA: tSpecialTagEnd, 24 stateAttr: tAttr, 25 stateURL: tURL, 26 stateSrcset: tURL, 27 stateJS: tJS, 28 stateJSDqStr: tJSDelimited, 29 stateJSSqStr: tJSDelimited, 30 stateJSBqStr: tJSDelimited, 31 stateJSRegexp: tJSDelimited, 32 stateJSBlockCmt: tBlockCmt, 33 stateJSLineCmt: tLineCmt, 34 stateJSHTMLOpenCmt: tLineCmt, 35 stateJSHTMLCloseCmt: tLineCmt, 36 stateCSS: tCSS, 37 stateCSSDqStr: tCSSStr, 38 stateCSSSqStr: tCSSStr, 39 stateCSSDqURL: tCSSStr, 40 stateCSSSqURL: tCSSStr, 41 stateCSSURL: tCSSStr, 42 stateCSSBlockCmt: tBlockCmt, 43 stateCSSLineCmt: tLineCmt, 44 stateError: tError, 45 } 46 47 var commentStart = []byte("<!--") 48 var commentEnd = []byte("-->") 49 50 // tText is the context transition function for the text state. 51 func tText(c context, s []byte) (context, int) { 52 k := 0 53 for { 54 i := k + bytes.IndexByte(s[k:], '<') 55 if i < k || i+1 == len(s) { 56 return c, len(s) 57 } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) { 58 return context{state: stateHTMLCmt}, i + 4 59 } 60 i++ 61 end := false 62 if s[i] == '/' { 63 if i+1 == len(s) { 64 return c, len(s) 65 } 66 end, i = true, i+1 67 } 68 j, e := eatTagName(s, i) 69 if j != i { 70 if end { 71 e = elementNone 72 } 73 // We've found an HTML tag. 74 return context{state: stateTag, element: e}, j 75 } 76 k = j 77 } 78 } 79 80 var elementContentType = [...]state{ 81 elementNone: stateText, 82 elementScript: stateJS, 83 elementStyle: stateCSS, 84 elementTextarea: stateRCDATA, 85 elementTitle: stateRCDATA, 86 } 87 88 // tTag is the context transition function for the tag state. 89 func tTag(c context, s []byte) (context, int) { 90 // Find the attribute name. 91 i := eatWhiteSpace(s, 0) 92 if i == len(s) { 93 return c, len(s) 94 } 95 if s[i] == '>' { 96 return context{ 97 state: elementContentType[c.element], 98 element: c.element, 99 }, i + 1 100 } 101 j, err := eatAttrName(s, i) 102 if err != nil { 103 return context{state: stateError, err: err}, len(s) 104 } 105 state, attr := stateTag, attrNone 106 if i == j { 107 return context{ 108 state: stateError, 109 err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]), 110 }, len(s) 111 } 112 113 attrName := strings.ToLower(string(s[i:j])) 114 if c.element == elementScript && attrName == "type" { 115 attr = attrScriptType 116 } else { 117 switch attrType(attrName) { 118 case contentTypeURL: 119 attr = attrURL 120 case contentTypeCSS: 121 attr = attrStyle 122 case contentTypeJS: 123 attr = attrScript 124 case contentTypeSrcset: 125 attr = attrSrcset 126 } 127 } 128 129 if j == len(s) { 130 state = stateAttrName 131 } else { 132 state = stateAfterName 133 } 134 return context{state: state, element: c.element, attr: attr}, j 135 } 136 137 // tAttrName is the context transition function for stateAttrName. 138 func tAttrName(c context, s []byte) (context, int) { 139 i, err := eatAttrName(s, 0) 140 if err != nil { 141 return context{state: stateError, err: err}, len(s) 142 } else if i != len(s) { 143 c.state = stateAfterName 144 } 145 return c, i 146 } 147 148 // tAfterName is the context transition function for stateAfterName. 149 func tAfterName(c context, s []byte) (context, int) { 150 // Look for the start of the value. 151 i := eatWhiteSpace(s, 0) 152 if i == len(s) { 153 return c, len(s) 154 } else if s[i] != '=' { 155 // Occurs due to tag ending '>', and valueless attribute. 156 c.state = stateTag 157 return c, i 158 } 159 c.state = stateBeforeValue 160 // Consume the "=". 161 return c, i + 1 162 } 163 164 var attrStartStates = [...]state{ 165 attrNone: stateAttr, 166 attrScript: stateJS, 167 attrScriptType: stateAttr, 168 attrStyle: stateCSS, 169 attrURL: stateURL, 170 attrSrcset: stateSrcset, 171 } 172 173 // tBeforeValue is the context transition function for stateBeforeValue. 174 func tBeforeValue(c context, s []byte) (context, int) { 175 i := eatWhiteSpace(s, 0) 176 if i == len(s) { 177 return c, len(s) 178 } 179 // Find the attribute delimiter. 180 delim := delimSpaceOrTagEnd 181 switch s[i] { 182 case '\'': 183 delim, i = delimSingleQuote, i+1 184 case '"': 185 delim, i = delimDoubleQuote, i+1 186 } 187 c.state, c.delim = attrStartStates[c.attr], delim 188 return c, i 189 } 190 191 // tHTMLCmt is the context transition function for stateHTMLCmt. 192 func tHTMLCmt(c context, s []byte) (context, int) { 193 if i := bytes.Index(s, commentEnd); i != -1 { 194 return context{}, i + 3 195 } 196 return c, len(s) 197 } 198 199 // specialTagEndMarkers maps element types to the character sequence that 200 // case-insensitively signals the end of the special tag body. 201 var specialTagEndMarkers = [...][]byte{ 202 elementScript: []byte("script"), 203 elementStyle: []byte("style"), 204 elementTextarea: []byte("textarea"), 205 elementTitle: []byte("title"), 206 } 207 208 var ( 209 specialTagEndPrefix = []byte("</") 210 tagEndSeparators = []byte("> \t\n\f/") 211 ) 212 213 // tSpecialTagEnd is the context transition function for raw text and RCDATA 214 // element states. 215 func tSpecialTagEnd(c context, s []byte) (context, int) { 216 if c.element != elementNone { 217 // script end tags ("</script") within script literals are ignored, so that 218 // we can properly escape them. 219 if c.element == elementScript && (isInScriptLiteral(c.state) || isComment(c.state)) { 220 return c, len(s) 221 } 222 if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 { 223 return context{}, i 224 } 225 } 226 return c, len(s) 227 } 228 229 // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1 230 func indexTagEnd(s []byte, tag []byte) int { 231 res := 0 232 plen := len(specialTagEndPrefix) 233 for len(s) > 0 { 234 // Try to find the tag end prefix first 235 i := bytes.Index(s, specialTagEndPrefix) 236 if i == -1 { 237 return i 238 } 239 s = s[i+plen:] 240 // Try to match the actual tag if there is still space for it 241 if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) { 242 s = s[len(tag):] 243 // Check the tag is followed by a proper separator 244 if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 { 245 return res + i 246 } 247 res += len(tag) 248 } 249 res += i + plen 250 } 251 return -1 252 } 253 254 // tAttr is the context transition function for the attribute state. 255 func tAttr(c context, s []byte) (context, int) { 256 return c, len(s) 257 } 258 259 // tURL is the context transition function for the URL state. 260 func tURL(c context, s []byte) (context, int) { 261 if bytes.ContainsAny(s, "#?") { 262 c.urlPart = urlPartQueryOrFrag 263 } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone { 264 // HTML5 uses "Valid URL potentially surrounded by spaces" for 265 // attrs: https://www.w3.org/TR/html5/index.html#attributes-1 266 c.urlPart = urlPartPreQuery 267 } 268 return c, len(s) 269 } 270 271 // tJS is the context transition function for the JS state. 272 func tJS(c context, s []byte) (context, int) { 273 i := bytes.IndexAny(s, "\"`'/<-#") 274 if i == -1 { 275 // Entire input is non string, comment, regexp tokens. 276 c.jsCtx = nextJSCtx(s, c.jsCtx) 277 return c, len(s) 278 } 279 c.jsCtx = nextJSCtx(s[:i], c.jsCtx) 280 switch s[i] { 281 case '"': 282 c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp 283 case '\'': 284 c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp 285 case '`': 286 c.state, c.jsCtx = stateJSBqStr, jsCtxRegexp 287 case '/': 288 switch { 289 case i+1 < len(s) && s[i+1] == '/': 290 c.state, i = stateJSLineCmt, i+1 291 case i+1 < len(s) && s[i+1] == '*': 292 c.state, i = stateJSBlockCmt, i+1 293 case c.jsCtx == jsCtxRegexp: 294 c.state = stateJSRegexp 295 case c.jsCtx == jsCtxDivOp: 296 c.jsCtx = jsCtxRegexp 297 default: 298 return context{ 299 state: stateError, 300 err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]), 301 }, len(s) 302 } 303 // ECMAScript supports HTML style comments for legacy reasons, see Appendix 304 // B.1.1 "HTML-like Comments". The handling of these comments is somewhat 305 // confusing. Multi-line comments are not supported, i.e. anything on lines 306 // between the opening and closing tokens is not considered a comment, but 307 // anything following the opening or closing token, on the same line, is 308 // ignored. As such we simply treat any line prefixed with "<!--" or "-->" 309 // as if it were actually prefixed with "//" and move on. 310 case '<': 311 if i+3 < len(s) && bytes.Equal(commentStart, s[i:i+4]) { 312 c.state, i = stateJSHTMLOpenCmt, i+3 313 } 314 case '-': 315 if i+2 < len(s) && bytes.Equal(commentEnd, s[i:i+3]) { 316 c.state, i = stateJSHTMLCloseCmt, i+2 317 } 318 // ECMAScript also supports "hashbang" comment lines, see Section 12.5. 319 case '#': 320 if i+1 < len(s) && s[i+1] == '!' { 321 c.state, i = stateJSLineCmt, i+1 322 } 323 default: 324 panic("unreachable") 325 } 326 return c, i + 1 327 } 328 329 // tJSDelimited is the context transition function for the JS string and regexp 330 // states. 331 func tJSDelimited(c context, s []byte) (context, int) { 332 specials := `\"` 333 switch c.state { 334 case stateJSSqStr: 335 specials = `\'` 336 case stateJSBqStr: 337 specials = "`\\" 338 case stateJSRegexp: 339 specials = `\/[]` 340 } 341 342 k, inCharset := 0, false 343 for { 344 i := k + bytes.IndexAny(s[k:], specials) 345 if i < k { 346 break 347 } 348 switch s[i] { 349 case '\\': 350 i++ 351 if i == len(s) { 352 return context{ 353 state: stateError, 354 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s), 355 }, len(s) 356 } 357 case '[': 358 inCharset = true 359 case ']': 360 inCharset = false 361 case '/': 362 // If "</script" appears in a regex literal, the '/' should not 363 // close the regex literal, and it will later be escaped to 364 // "\x3C/script" in escapeText. 365 if i > 0 && i+7 <= len(s) && bytes.Compare(bytes.ToLower(s[i-1:i+7]), []byte("</script")) == 0 { 366 i++ 367 } else if !inCharset { 368 c.state, c.jsCtx = stateJS, jsCtxDivOp 369 return c, i + 1 370 } 371 default: 372 // end delimiter 373 if !inCharset { 374 c.state, c.jsCtx = stateJS, jsCtxDivOp 375 return c, i + 1 376 } 377 } 378 k = i + 1 379 } 380 381 if inCharset { 382 // This can be fixed by making context richer if interpolation 383 // into charsets is desired. 384 return context{ 385 state: stateError, 386 err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s), 387 }, len(s) 388 } 389 390 return c, len(s) 391 } 392 393 var blockCommentEnd = []byte("*/") 394 395 // tBlockCmt is the context transition function for /*comment*/ states. 396 func tBlockCmt(c context, s []byte) (context, int) { 397 i := bytes.Index(s, blockCommentEnd) 398 if i == -1 { 399 return c, len(s) 400 } 401 switch c.state { 402 case stateJSBlockCmt: 403 c.state = stateJS 404 case stateCSSBlockCmt: 405 c.state = stateCSS 406 default: 407 panic(c.state.String()) 408 } 409 return c, i + 2 410 } 411 412 // tLineCmt is the context transition function for //comment states, and the JS HTML-like comment state. 413 func tLineCmt(c context, s []byte) (context, int) { 414 var lineTerminators string 415 var endState state 416 switch c.state { 417 case stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt: 418 lineTerminators, endState = "\n\r\u2028\u2029", stateJS 419 case stateCSSLineCmt: 420 lineTerminators, endState = "\n\f\r", stateCSS 421 // Line comments are not part of any published CSS standard but 422 // are supported by the 4 major browsers. 423 // This defines line comments as 424 // LINECOMMENT ::= "//" [^\n\f\d]* 425 // since https://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines 426 // newlines: 427 // nl ::= #xA | #xD #xA | #xD | #xC 428 default: 429 panic(c.state.String()) 430 } 431 432 i := bytes.IndexAny(s, lineTerminators) 433 if i == -1 { 434 return c, len(s) 435 } 436 c.state = endState 437 // Per section 7.4 of EcmaScript 5 : https://es5.github.io/#x7.4 438 // "However, the LineTerminator at the end of the line is not 439 // considered to be part of the single-line comment; it is 440 // recognized separately by the lexical grammar and becomes part 441 // of the stream of input elements for the syntactic grammar." 442 return c, i 443 } 444 445 // tCSS is the context transition function for the CSS state. 446 func tCSS(c context, s []byte) (context, int) { 447 // CSS quoted strings are almost never used except for: 448 // (1) URLs as in background: "/foo.png" 449 // (2) Multiword font-names as in font-family: "Times New Roman" 450 // (3) List separators in content values as in inline-lists: 451 // <style> 452 // ul.inlineList { list-style: none; padding:0 } 453 // ul.inlineList > li { display: inline } 454 // ul.inlineList > li:before { content: ", " } 455 // ul.inlineList > li:first-child:before { content: "" } 456 // </style> 457 // <ul class=inlineList><li>One<li>Two<li>Three</ul> 458 // (4) Attribute value selectors as in a[href="http://example.com/"] 459 // 460 // We conservatively treat all strings as URLs, but make some 461 // allowances to avoid confusion. 462 // 463 // In (1), our conservative assumption is justified. 464 // In (2), valid font names do not contain ':', '?', or '#', so our 465 // conservative assumption is fine since we will never transition past 466 // urlPartPreQuery. 467 // In (3), our protocol heuristic should not be tripped, and there 468 // should not be non-space content after a '?' or '#', so as long as 469 // we only %-encode RFC 3986 reserved characters we are ok. 470 // In (4), we should URL escape for URL attributes, and for others we 471 // have the attribute name available if our conservative assumption 472 // proves problematic for real code. 473 474 k := 0 475 for { 476 i := k + bytes.IndexAny(s[k:], `("'/`) 477 if i < k { 478 return c, len(s) 479 } 480 switch s[i] { 481 case '(': 482 // Look for url to the left. 483 p := bytes.TrimRight(s[:i], "\t\n\f\r ") 484 if endsWithCSSKeyword(p, "url") { 485 j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r ")) 486 switch { 487 case j != len(s) && s[j] == '"': 488 c.state, j = stateCSSDqURL, j+1 489 case j != len(s) && s[j] == '\'': 490 c.state, j = stateCSSSqURL, j+1 491 default: 492 c.state = stateCSSURL 493 } 494 return c, j 495 } 496 case '/': 497 if i+1 < len(s) { 498 switch s[i+1] { 499 case '/': 500 c.state = stateCSSLineCmt 501 return c, i + 2 502 case '*': 503 c.state = stateCSSBlockCmt 504 return c, i + 2 505 } 506 } 507 case '"': 508 c.state = stateCSSDqStr 509 return c, i + 1 510 case '\'': 511 c.state = stateCSSSqStr 512 return c, i + 1 513 } 514 k = i + 1 515 } 516 } 517 518 // tCSSStr is the context transition function for the CSS string and URL states. 519 func tCSSStr(c context, s []byte) (context, int) { 520 var endAndEsc string 521 switch c.state { 522 case stateCSSDqStr, stateCSSDqURL: 523 endAndEsc = `\"` 524 case stateCSSSqStr, stateCSSSqURL: 525 endAndEsc = `\'` 526 case stateCSSURL: 527 // Unquoted URLs end with a newline or close parenthesis. 528 // The below includes the wc (whitespace character) and nl. 529 endAndEsc = "\\\t\n\f\r )" 530 default: 531 panic(c.state.String()) 532 } 533 534 k := 0 535 for { 536 i := k + bytes.IndexAny(s[k:], endAndEsc) 537 if i < k { 538 c, nread := tURL(c, decodeCSS(s[k:])) 539 return c, k + nread 540 } 541 if s[i] == '\\' { 542 i++ 543 if i == len(s) { 544 return context{ 545 state: stateError, 546 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s), 547 }, len(s) 548 } 549 } else { 550 c.state = stateCSS 551 return c, i + 1 552 } 553 c, _ = tURL(c, decodeCSS(s[:i+1])) 554 k = i + 1 555 } 556 } 557 558 // tError is the context transition function for the error state. 559 func tError(c context, s []byte) (context, int) { 560 return c, len(s) 561 } 562 563 // eatAttrName returns the largest j such that s[i:j] is an attribute name. 564 // It returns an error if s[i:] does not look like it begins with an 565 // attribute name, such as encountering a quote mark without a preceding 566 // equals sign. 567 func eatAttrName(s []byte, i int) (int, *Error) { 568 for j := i; j < len(s); j++ { 569 switch s[j] { 570 case ' ', '\t', '\n', '\f', '\r', '=', '>': 571 return j, nil 572 case '\'', '"', '<': 573 // These result in a parse warning in HTML5 and are 574 // indicative of serious problems if seen in an attr 575 // name in a template. 576 return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s) 577 default: 578 // No-op. 579 } 580 } 581 return len(s), nil 582 } 583 584 var elementNameMap = map[string]element{ 585 "script": elementScript, 586 "style": elementStyle, 587 "textarea": elementTextarea, 588 "title": elementTitle, 589 } 590 591 // asciiAlpha reports whether c is an ASCII letter. 592 func asciiAlpha(c byte) bool { 593 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' 594 } 595 596 // asciiAlphaNum reports whether c is an ASCII letter or digit. 597 func asciiAlphaNum(c byte) bool { 598 return asciiAlpha(c) || '0' <= c && c <= '9' 599 } 600 601 // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type. 602 func eatTagName(s []byte, i int) (int, element) { 603 if i == len(s) || !asciiAlpha(s[i]) { 604 return i, elementNone 605 } 606 j := i + 1 607 for j < len(s) { 608 x := s[j] 609 if asciiAlphaNum(x) { 610 j++ 611 continue 612 } 613 // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y". 614 if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) { 615 j += 2 616 continue 617 } 618 break 619 } 620 return j, elementNameMap[strings.ToLower(string(s[i:j]))] 621 } 622 623 // eatWhiteSpace returns the largest j such that s[i:j] is white space. 624 func eatWhiteSpace(s []byte, i int) int { 625 for j := i; j < len(s); j++ { 626 switch s[j] { 627 case ' ', '\t', '\n', '\f', '\r': 628 // No-op. 629 default: 630 return j 631 } 632 } 633 return len(s) 634 }