github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/html/template/transition.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "strings" 10 ) 11 12 // transitionFunc is the array of context transition functions for text nodes. 13 // A transition function takes a context and template text input, and returns 14 // the updated context and the number of bytes consumed from the front of the 15 // input. 16 var transitionFunc = [...]func(context, []byte) (context, int){ 17 stateText: tText, 18 stateTag: tTag, 19 stateAttrName: tAttrName, 20 stateAfterName: tAfterName, 21 stateBeforeValue: tBeforeValue, 22 stateHTMLCmt: tHTMLCmt, 23 stateRCDATA: tSpecialTagEnd, 24 stateAttr: tAttr, 25 stateURL: tURL, 26 stateSrcset: tURL, 27 stateJS: tJS, 28 stateJSDqStr: tJSDelimited, 29 stateJSSqStr: tJSDelimited, 30 stateJSBqStr: tJSDelimited, 31 stateJSRegexp: tJSDelimited, 32 stateJSBlockCmt: tBlockCmt, 33 stateJSLineCmt: tLineCmt, 34 stateCSS: tCSS, 35 stateCSSDqStr: tCSSStr, 36 stateCSSSqStr: tCSSStr, 37 stateCSSDqURL: tCSSStr, 38 stateCSSSqURL: tCSSStr, 39 stateCSSURL: tCSSStr, 40 stateCSSBlockCmt: tBlockCmt, 41 stateCSSLineCmt: tLineCmt, 42 stateError: tError, 43 } 44 45 var commentStart = []byte("<!--") 46 var commentEnd = []byte("-->") 47 48 // tText is the context transition function for the text state. 49 func tText(c context, s []byte) (context, int) { 50 k := 0 51 for { 52 i := k + bytes.IndexByte(s[k:], '<') 53 if i < k || i+1 == len(s) { 54 return c, len(s) 55 } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) { 56 return context{state: stateHTMLCmt}, i + 4 57 } 58 i++ 59 end := false 60 if s[i] == '/' { 61 if i+1 == len(s) { 62 return c, len(s) 63 } 64 end, i = true, i+1 65 } 66 j, e := eatTagName(s, i) 67 if j != i { 68 if end { 69 e = elementNone 70 } 71 // We've found an HTML tag. 72 return context{state: stateTag, element: e}, j 73 } 74 k = j 75 } 76 } 77 78 var elementContentType = [...]state{ 79 elementNone: stateText, 80 elementScript: stateJS, 81 elementStyle: stateCSS, 82 elementTextarea: stateRCDATA, 83 elementTitle: stateRCDATA, 84 } 85 86 // tTag is the context transition function for the tag state. 87 func tTag(c context, s []byte) (context, int) { 88 // Find the attribute name. 89 i := eatWhiteSpace(s, 0) 90 if i == len(s) { 91 return c, len(s) 92 } 93 if s[i] == '>' { 94 return context{ 95 state: elementContentType[c.element], 96 element: c.element, 97 }, i + 1 98 } 99 j, err := eatAttrName(s, i) 100 if err != nil { 101 return context{state: stateError, err: err}, len(s) 102 } 103 state, attr := stateTag, attrNone 104 if i == j { 105 return context{ 106 state: stateError, 107 err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]), 108 }, len(s) 109 } 110 111 attrName := strings.ToLower(string(s[i:j])) 112 if c.element == elementScript && attrName == "type" { 113 attr = attrScriptType 114 } else { 115 switch attrType(attrName) { 116 case contentTypeURL: 117 attr = attrURL 118 case contentTypeCSS: 119 attr = attrStyle 120 case contentTypeJS: 121 attr = attrScript 122 case contentTypeSrcset: 123 attr = attrSrcset 124 } 125 } 126 127 if j == len(s) { 128 state = stateAttrName 129 } else { 130 state = stateAfterName 131 } 132 return context{state: state, element: c.element, attr: attr}, j 133 } 134 135 // tAttrName is the context transition function for stateAttrName. 136 func tAttrName(c context, s []byte) (context, int) { 137 i, err := eatAttrName(s, 0) 138 if err != nil { 139 return context{state: stateError, err: err}, len(s) 140 } else if i != len(s) { 141 c.state = stateAfterName 142 } 143 return c, i 144 } 145 146 // tAfterName is the context transition function for stateAfterName. 147 func tAfterName(c context, s []byte) (context, int) { 148 // Look for the start of the value. 149 i := eatWhiteSpace(s, 0) 150 if i == len(s) { 151 return c, len(s) 152 } else if s[i] != '=' { 153 // Occurs due to tag ending '>', and valueless attribute. 154 c.state = stateTag 155 return c, i 156 } 157 c.state = stateBeforeValue 158 // Consume the "=". 159 return c, i + 1 160 } 161 162 var attrStartStates = [...]state{ 163 attrNone: stateAttr, 164 attrScript: stateJS, 165 attrScriptType: stateAttr, 166 attrStyle: stateCSS, 167 attrURL: stateURL, 168 attrSrcset: stateSrcset, 169 } 170 171 // tBeforeValue is the context transition function for stateBeforeValue. 172 func tBeforeValue(c context, s []byte) (context, int) { 173 i := eatWhiteSpace(s, 0) 174 if i == len(s) { 175 return c, len(s) 176 } 177 // Find the attribute delimiter. 178 delim := delimSpaceOrTagEnd 179 switch s[i] { 180 case '\'': 181 delim, i = delimSingleQuote, i+1 182 case '"': 183 delim, i = delimDoubleQuote, i+1 184 } 185 c.state, c.delim = attrStartStates[c.attr], delim 186 return c, i 187 } 188 189 // tHTMLCmt is the context transition function for stateHTMLCmt. 190 func tHTMLCmt(c context, s []byte) (context, int) { 191 if i := bytes.Index(s, commentEnd); i != -1 { 192 return context{}, i + 3 193 } 194 return c, len(s) 195 } 196 197 // specialTagEndMarkers maps element types to the character sequence that 198 // case-insensitively signals the end of the special tag body. 199 var specialTagEndMarkers = [...][]byte{ 200 elementScript: []byte("script"), 201 elementStyle: []byte("style"), 202 elementTextarea: []byte("textarea"), 203 elementTitle: []byte("title"), 204 } 205 206 var ( 207 specialTagEndPrefix = []byte("</") 208 tagEndSeparators = []byte("> \t\n\f/") 209 ) 210 211 // tSpecialTagEnd is the context transition function for raw text and RCDATA 212 // element states. 213 func tSpecialTagEnd(c context, s []byte) (context, int) { 214 if c.element != elementNone { 215 if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 { 216 return context{}, i 217 } 218 } 219 return c, len(s) 220 } 221 222 // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1 223 func indexTagEnd(s []byte, tag []byte) int { 224 res := 0 225 plen := len(specialTagEndPrefix) 226 for len(s) > 0 { 227 // Try to find the tag end prefix first 228 i := bytes.Index(s, specialTagEndPrefix) 229 if i == -1 { 230 return i 231 } 232 s = s[i+plen:] 233 // Try to match the actual tag if there is still space for it 234 if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) { 235 s = s[len(tag):] 236 // Check the tag is followed by a proper separator 237 if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 { 238 return res + i 239 } 240 res += len(tag) 241 } 242 res += i + plen 243 } 244 return -1 245 } 246 247 // tAttr is the context transition function for the attribute state. 248 func tAttr(c context, s []byte) (context, int) { 249 return c, len(s) 250 } 251 252 // tURL is the context transition function for the URL state. 253 func tURL(c context, s []byte) (context, int) { 254 if bytes.ContainsAny(s, "#?") { 255 c.urlPart = urlPartQueryOrFrag 256 } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone { 257 // HTML5 uses "Valid URL potentially surrounded by spaces" for 258 // attrs: https://www.w3.org/TR/html5/index.html#attributes-1 259 c.urlPart = urlPartPreQuery 260 } 261 return c, len(s) 262 } 263 264 // tJS is the context transition function for the JS state. 265 func tJS(c context, s []byte) (context, int) { 266 i := bytes.IndexAny(s, "\"`'/") 267 if i == -1 { 268 // Entire input is non string, comment, regexp tokens. 269 c.jsCtx = nextJSCtx(s, c.jsCtx) 270 return c, len(s) 271 } 272 c.jsCtx = nextJSCtx(s[:i], c.jsCtx) 273 switch s[i] { 274 case '"': 275 c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp 276 case '\'': 277 c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp 278 case '`': 279 c.state, c.jsCtx = stateJSBqStr, jsCtxRegexp 280 case '/': 281 switch { 282 case i+1 < len(s) && s[i+1] == '/': 283 c.state, i = stateJSLineCmt, i+1 284 case i+1 < len(s) && s[i+1] == '*': 285 c.state, i = stateJSBlockCmt, i+1 286 case c.jsCtx == jsCtxRegexp: 287 c.state = stateJSRegexp 288 case c.jsCtx == jsCtxDivOp: 289 c.jsCtx = jsCtxRegexp 290 default: 291 return context{ 292 state: stateError, 293 err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]), 294 }, len(s) 295 } 296 default: 297 panic("unreachable") 298 } 299 return c, i + 1 300 } 301 302 // tJSDelimited is the context transition function for the JS string and regexp 303 // states. 304 func tJSDelimited(c context, s []byte) (context, int) { 305 specials := `\"` 306 switch c.state { 307 case stateJSSqStr: 308 specials = `\'` 309 case stateJSBqStr: 310 specials = "`\\" 311 case stateJSRegexp: 312 specials = `\/[]` 313 } 314 315 k, inCharset := 0, false 316 for { 317 i := k + bytes.IndexAny(s[k:], specials) 318 if i < k { 319 break 320 } 321 switch s[i] { 322 case '\\': 323 i++ 324 if i == len(s) { 325 return context{ 326 state: stateError, 327 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s), 328 }, len(s) 329 } 330 case '[': 331 inCharset = true 332 case ']': 333 inCharset = false 334 default: 335 // end delimiter 336 if !inCharset { 337 c.state, c.jsCtx = stateJS, jsCtxDivOp 338 return c, i + 1 339 } 340 } 341 k = i + 1 342 } 343 344 if inCharset { 345 // This can be fixed by making context richer if interpolation 346 // into charsets is desired. 347 return context{ 348 state: stateError, 349 err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s), 350 }, len(s) 351 } 352 353 return c, len(s) 354 } 355 356 var blockCommentEnd = []byte("*/") 357 358 // tBlockCmt is the context transition function for /*comment*/ states. 359 func tBlockCmt(c context, s []byte) (context, int) { 360 i := bytes.Index(s, blockCommentEnd) 361 if i == -1 { 362 return c, len(s) 363 } 364 switch c.state { 365 case stateJSBlockCmt: 366 c.state = stateJS 367 case stateCSSBlockCmt: 368 c.state = stateCSS 369 default: 370 panic(c.state.String()) 371 } 372 return c, i + 2 373 } 374 375 // tLineCmt is the context transition function for //comment states. 376 func tLineCmt(c context, s []byte) (context, int) { 377 var lineTerminators string 378 var endState state 379 switch c.state { 380 case stateJSLineCmt: 381 lineTerminators, endState = "\n\r\u2028\u2029", stateJS 382 case stateCSSLineCmt: 383 lineTerminators, endState = "\n\f\r", stateCSS 384 // Line comments are not part of any published CSS standard but 385 // are supported by the 4 major browsers. 386 // This defines line comments as 387 // LINECOMMENT ::= "//" [^\n\f\d]* 388 // since https://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines 389 // newlines: 390 // nl ::= #xA | #xD #xA | #xD | #xC 391 default: 392 panic(c.state.String()) 393 } 394 395 i := bytes.IndexAny(s, lineTerminators) 396 if i == -1 { 397 return c, len(s) 398 } 399 c.state = endState 400 // Per section 7.4 of EcmaScript 5 : https://es5.github.io/#x7.4 401 // "However, the LineTerminator at the end of the line is not 402 // considered to be part of the single-line comment; it is 403 // recognized separately by the lexical grammar and becomes part 404 // of the stream of input elements for the syntactic grammar." 405 return c, i 406 } 407 408 // tCSS is the context transition function for the CSS state. 409 func tCSS(c context, s []byte) (context, int) { 410 // CSS quoted strings are almost never used except for: 411 // (1) URLs as in background: "/foo.png" 412 // (2) Multiword font-names as in font-family: "Times New Roman" 413 // (3) List separators in content values as in inline-lists: 414 // <style> 415 // ul.inlineList { list-style: none; padding:0 } 416 // ul.inlineList > li { display: inline } 417 // ul.inlineList > li:before { content: ", " } 418 // ul.inlineList > li:first-child:before { content: "" } 419 // </style> 420 // <ul class=inlineList><li>One<li>Two<li>Three</ul> 421 // (4) Attribute value selectors as in a[href="http://example.com/"] 422 // 423 // We conservatively treat all strings as URLs, but make some 424 // allowances to avoid confusion. 425 // 426 // In (1), our conservative assumption is justified. 427 // In (2), valid font names do not contain ':', '?', or '#', so our 428 // conservative assumption is fine since we will never transition past 429 // urlPartPreQuery. 430 // In (3), our protocol heuristic should not be tripped, and there 431 // should not be non-space content after a '?' or '#', so as long as 432 // we only %-encode RFC 3986 reserved characters we are ok. 433 // In (4), we should URL escape for URL attributes, and for others we 434 // have the attribute name available if our conservative assumption 435 // proves problematic for real code. 436 437 k := 0 438 for { 439 i := k + bytes.IndexAny(s[k:], `("'/`) 440 if i < k { 441 return c, len(s) 442 } 443 switch s[i] { 444 case '(': 445 // Look for url to the left. 446 p := bytes.TrimRight(s[:i], "\t\n\f\r ") 447 if endsWithCSSKeyword(p, "url") { 448 j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r ")) 449 switch { 450 case j != len(s) && s[j] == '"': 451 c.state, j = stateCSSDqURL, j+1 452 case j != len(s) && s[j] == '\'': 453 c.state, j = stateCSSSqURL, j+1 454 default: 455 c.state = stateCSSURL 456 } 457 return c, j 458 } 459 case '/': 460 if i+1 < len(s) { 461 switch s[i+1] { 462 case '/': 463 c.state = stateCSSLineCmt 464 return c, i + 2 465 case '*': 466 c.state = stateCSSBlockCmt 467 return c, i + 2 468 } 469 } 470 case '"': 471 c.state = stateCSSDqStr 472 return c, i + 1 473 case '\'': 474 c.state = stateCSSSqStr 475 return c, i + 1 476 } 477 k = i + 1 478 } 479 } 480 481 // tCSSStr is the context transition function for the CSS string and URL states. 482 func tCSSStr(c context, s []byte) (context, int) { 483 var endAndEsc string 484 switch c.state { 485 case stateCSSDqStr, stateCSSDqURL: 486 endAndEsc = `\"` 487 case stateCSSSqStr, stateCSSSqURL: 488 endAndEsc = `\'` 489 case stateCSSURL: 490 // Unquoted URLs end with a newline or close parenthesis. 491 // The below includes the wc (whitespace character) and nl. 492 endAndEsc = "\\\t\n\f\r )" 493 default: 494 panic(c.state.String()) 495 } 496 497 k := 0 498 for { 499 i := k + bytes.IndexAny(s[k:], endAndEsc) 500 if i < k { 501 c, nread := tURL(c, decodeCSS(s[k:])) 502 return c, k + nread 503 } 504 if s[i] == '\\' { 505 i++ 506 if i == len(s) { 507 return context{ 508 state: stateError, 509 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s), 510 }, len(s) 511 } 512 } else { 513 c.state = stateCSS 514 return c, i + 1 515 } 516 c, _ = tURL(c, decodeCSS(s[:i+1])) 517 k = i + 1 518 } 519 } 520 521 // tError is the context transition function for the error state. 522 func tError(c context, s []byte) (context, int) { 523 return c, len(s) 524 } 525 526 // eatAttrName returns the largest j such that s[i:j] is an attribute name. 527 // It returns an error if s[i:] does not look like it begins with an 528 // attribute name, such as encountering a quote mark without a preceding 529 // equals sign. 530 func eatAttrName(s []byte, i int) (int, *Error) { 531 for j := i; j < len(s); j++ { 532 switch s[j] { 533 case ' ', '\t', '\n', '\f', '\r', '=', '>': 534 return j, nil 535 case '\'', '"', '<': 536 // These result in a parse warning in HTML5 and are 537 // indicative of serious problems if seen in an attr 538 // name in a template. 539 return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s) 540 default: 541 // No-op. 542 } 543 } 544 return len(s), nil 545 } 546 547 var elementNameMap = map[string]element{ 548 "script": elementScript, 549 "style": elementStyle, 550 "textarea": elementTextarea, 551 "title": elementTitle, 552 } 553 554 // asciiAlpha reports whether c is an ASCII letter. 555 func asciiAlpha(c byte) bool { 556 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' 557 } 558 559 // asciiAlphaNum reports whether c is an ASCII letter or digit. 560 func asciiAlphaNum(c byte) bool { 561 return asciiAlpha(c) || '0' <= c && c <= '9' 562 } 563 564 // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type. 565 func eatTagName(s []byte, i int) (int, element) { 566 if i == len(s) || !asciiAlpha(s[i]) { 567 return i, elementNone 568 } 569 j := i + 1 570 for j < len(s) { 571 x := s[j] 572 if asciiAlphaNum(x) { 573 j++ 574 continue 575 } 576 // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y". 577 if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) { 578 j += 2 579 continue 580 } 581 break 582 } 583 return j, elementNameMap[strings.ToLower(string(s[i:j]))] 584 } 585 586 // eatWhiteSpace returns the largest j such that s[i:j] is white space. 587 func eatWhiteSpace(s []byte, i int) int { 588 for j := i; j < len(s); j++ { 589 switch s[j] { 590 case ' ', '\t', '\n', '\f', '\r': 591 // No-op. 592 default: 593 return j 594 } 595 } 596 return len(s) 597 }