github.com/ccccaoqing/test@v0.0.0-20220510085219-3985d23445c0/src/html/template/transition.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "strings" 10 ) 11 12 // transitionFunc is the array of context transition functions for text nodes. 13 // A transition function takes a context and template text input, and returns 14 // the updated context and the number of bytes consumed from the front of the 15 // input. 16 var transitionFunc = [...]func(context, []byte) (context, int){ 17 stateText: tText, 18 stateTag: tTag, 19 stateAttrName: tAttrName, 20 stateAfterName: tAfterName, 21 stateBeforeValue: tBeforeValue, 22 stateHTMLCmt: tHTMLCmt, 23 stateRCDATA: tSpecialTagEnd, 24 stateAttr: tAttr, 25 stateURL: tURL, 26 stateJS: tJS, 27 stateJSDqStr: tJSDelimited, 28 stateJSSqStr: tJSDelimited, 29 stateJSRegexp: tJSDelimited, 30 stateJSBlockCmt: tBlockCmt, 31 stateJSLineCmt: tLineCmt, 32 stateCSS: tCSS, 33 stateCSSDqStr: tCSSStr, 34 stateCSSSqStr: tCSSStr, 35 stateCSSDqURL: tCSSStr, 36 stateCSSSqURL: tCSSStr, 37 stateCSSURL: tCSSStr, 38 stateCSSBlockCmt: tBlockCmt, 39 stateCSSLineCmt: tLineCmt, 40 stateError: tError, 41 } 42 43 var commentStart = []byte("<!--") 44 var commentEnd = []byte("-->") 45 46 // tText is the context transition function for the text state. 47 func tText(c context, s []byte) (context, int) { 48 k := 0 49 for { 50 i := k + bytes.IndexByte(s[k:], '<') 51 if i < k || i+1 == len(s) { 52 return c, len(s) 53 } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) { 54 return context{state: stateHTMLCmt}, i + 4 55 } 56 i++ 57 end := false 58 if s[i] == '/' { 59 if i+1 == len(s) { 60 return c, len(s) 61 } 62 end, i = true, i+1 63 } 64 j, e := eatTagName(s, i) 65 if j != i { 66 if end { 67 e = elementNone 68 } 69 // We've found an HTML tag. 70 return context{state: stateTag, element: e}, j 71 } 72 k = j 73 } 74 } 75 76 var elementContentType = [...]state{ 77 elementNone: stateText, 78 elementScript: stateJS, 79 elementStyle: stateCSS, 80 elementTextarea: stateRCDATA, 81 elementTitle: stateRCDATA, 82 } 83 84 // tTag is the context transition function for the tag state. 85 func tTag(c context, s []byte) (context, int) { 86 // Find the attribute name. 87 i := eatWhiteSpace(s, 0) 88 if i == len(s) { 89 return c, len(s) 90 } 91 if s[i] == '>' { 92 return context{ 93 state: elementContentType[c.element], 94 element: c.element, 95 }, i + 1 96 } 97 j, err := eatAttrName(s, i) 98 if err != nil { 99 return context{state: stateError, err: err}, len(s) 100 } 101 state, attr := stateTag, attrNone 102 if i == j { 103 return context{ 104 state: stateError, 105 err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]), 106 }, len(s) 107 } 108 switch attrType(string(s[i:j])) { 109 case contentTypeURL: 110 attr = attrURL 111 case contentTypeCSS: 112 attr = attrStyle 113 case contentTypeJS: 114 attr = attrScript 115 } 116 if j == len(s) { 117 state = stateAttrName 118 } else { 119 state = stateAfterName 120 } 121 return context{state: state, element: c.element, attr: attr}, j 122 } 123 124 // tAttrName is the context transition function for stateAttrName. 125 func tAttrName(c context, s []byte) (context, int) { 126 i, err := eatAttrName(s, 0) 127 if err != nil { 128 return context{state: stateError, err: err}, len(s) 129 } else if i != len(s) { 130 c.state = stateAfterName 131 } 132 return c, i 133 } 134 135 // tAfterName is the context transition function for stateAfterName. 136 func tAfterName(c context, s []byte) (context, int) { 137 // Look for the start of the value. 138 i := eatWhiteSpace(s, 0) 139 if i == len(s) { 140 return c, len(s) 141 } else if s[i] != '=' { 142 // Occurs due to tag ending '>', and valueless attribute. 143 c.state = stateTag 144 return c, i 145 } 146 c.state = stateBeforeValue 147 // Consume the "=". 148 return c, i + 1 149 } 150 151 var attrStartStates = [...]state{ 152 attrNone: stateAttr, 153 attrScript: stateJS, 154 attrStyle: stateCSS, 155 attrURL: stateURL, 156 } 157 158 // tBeforeValue is the context transition function for stateBeforeValue. 159 func tBeforeValue(c context, s []byte) (context, int) { 160 i := eatWhiteSpace(s, 0) 161 if i == len(s) { 162 return c, len(s) 163 } 164 // Find the attribute delimiter. 165 delim := delimSpaceOrTagEnd 166 switch s[i] { 167 case '\'': 168 delim, i = delimSingleQuote, i+1 169 case '"': 170 delim, i = delimDoubleQuote, i+1 171 } 172 c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone 173 return c, i 174 } 175 176 // tHTMLCmt is the context transition function for stateHTMLCmt. 177 func tHTMLCmt(c context, s []byte) (context, int) { 178 if i := bytes.Index(s, commentEnd); i != -1 { 179 return context{}, i + 3 180 } 181 return c, len(s) 182 } 183 184 // specialTagEndMarkers maps element types to the character sequence that 185 // case-insensitively signals the end of the special tag body. 186 var specialTagEndMarkers = [...]string{ 187 elementScript: "</script", 188 elementStyle: "</style", 189 elementTextarea: "</textarea", 190 elementTitle: "</title", 191 } 192 193 // tSpecialTagEnd is the context transition function for raw text and RCDATA 194 // element states. 195 func tSpecialTagEnd(c context, s []byte) (context, int) { 196 if c.element != elementNone { 197 if i := strings.Index(strings.ToLower(string(s)), specialTagEndMarkers[c.element]); i != -1 { 198 return context{}, i 199 } 200 } 201 return c, len(s) 202 } 203 204 // tAttr is the context transition function for the attribute state. 205 func tAttr(c context, s []byte) (context, int) { 206 return c, len(s) 207 } 208 209 // tURL is the context transition function for the URL state. 210 func tURL(c context, s []byte) (context, int) { 211 if bytes.IndexAny(s, "#?") >= 0 { 212 c.urlPart = urlPartQueryOrFrag 213 } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone { 214 // HTML5 uses "Valid URL potentially surrounded by spaces" for 215 // attrs: http://www.w3.org/TR/html5/index.html#attributes-1 216 c.urlPart = urlPartPreQuery 217 } 218 return c, len(s) 219 } 220 221 // tJS is the context transition function for the JS state. 222 func tJS(c context, s []byte) (context, int) { 223 i := bytes.IndexAny(s, `"'/`) 224 if i == -1 { 225 // Entire input is non string, comment, regexp tokens. 226 c.jsCtx = nextJSCtx(s, c.jsCtx) 227 return c, len(s) 228 } 229 c.jsCtx = nextJSCtx(s[:i], c.jsCtx) 230 switch s[i] { 231 case '"': 232 c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp 233 case '\'': 234 c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp 235 case '/': 236 switch { 237 case i+1 < len(s) && s[i+1] == '/': 238 c.state, i = stateJSLineCmt, i+1 239 case i+1 < len(s) && s[i+1] == '*': 240 c.state, i = stateJSBlockCmt, i+1 241 case c.jsCtx == jsCtxRegexp: 242 c.state = stateJSRegexp 243 case c.jsCtx == jsCtxDivOp: 244 c.jsCtx = jsCtxRegexp 245 default: 246 return context{ 247 state: stateError, 248 err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]), 249 }, len(s) 250 } 251 default: 252 panic("unreachable") 253 } 254 return c, i + 1 255 } 256 257 // tJSDelimited is the context transition function for the JS string and regexp 258 // states. 259 func tJSDelimited(c context, s []byte) (context, int) { 260 specials := `\"` 261 switch c.state { 262 case stateJSSqStr: 263 specials = `\'` 264 case stateJSRegexp: 265 specials = `\/[]` 266 } 267 268 k, inCharset := 0, false 269 for { 270 i := k + bytes.IndexAny(s[k:], specials) 271 if i < k { 272 break 273 } 274 switch s[i] { 275 case '\\': 276 i++ 277 if i == len(s) { 278 return context{ 279 state: stateError, 280 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s), 281 }, len(s) 282 } 283 case '[': 284 inCharset = true 285 case ']': 286 inCharset = false 287 default: 288 // end delimiter 289 if !inCharset { 290 c.state, c.jsCtx = stateJS, jsCtxDivOp 291 return c, i + 1 292 } 293 } 294 k = i + 1 295 } 296 297 if inCharset { 298 // This can be fixed by making context richer if interpolation 299 // into charsets is desired. 300 return context{ 301 state: stateError, 302 err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s), 303 }, len(s) 304 } 305 306 return c, len(s) 307 } 308 309 var blockCommentEnd = []byte("*/") 310 311 // tBlockCmt is the context transition function for /*comment*/ states. 312 func tBlockCmt(c context, s []byte) (context, int) { 313 i := bytes.Index(s, blockCommentEnd) 314 if i == -1 { 315 return c, len(s) 316 } 317 switch c.state { 318 case stateJSBlockCmt: 319 c.state = stateJS 320 case stateCSSBlockCmt: 321 c.state = stateCSS 322 default: 323 panic(c.state.String()) 324 } 325 return c, i + 2 326 } 327 328 // tLineCmt is the context transition function for //comment states. 329 func tLineCmt(c context, s []byte) (context, int) { 330 var lineTerminators string 331 var endState state 332 switch c.state { 333 case stateJSLineCmt: 334 lineTerminators, endState = "\n\r\u2028\u2029", stateJS 335 case stateCSSLineCmt: 336 lineTerminators, endState = "\n\f\r", stateCSS 337 // Line comments are not part of any published CSS standard but 338 // are supported by the 4 major browsers. 339 // This defines line comments as 340 // LINECOMMENT ::= "//" [^\n\f\d]* 341 // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines 342 // newlines: 343 // nl ::= #xA | #xD #xA | #xD | #xC 344 default: 345 panic(c.state.String()) 346 } 347 348 i := bytes.IndexAny(s, lineTerminators) 349 if i == -1 { 350 return c, len(s) 351 } 352 c.state = endState 353 // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4 354 // "However, the LineTerminator at the end of the line is not 355 // considered to be part of the single-line comment; it is 356 // recognized separately by the lexical grammar and becomes part 357 // of the stream of input elements for the syntactic grammar." 358 return c, i 359 } 360 361 // tCSS is the context transition function for the CSS state. 362 func tCSS(c context, s []byte) (context, int) { 363 // CSS quoted strings are almost never used except for: 364 // (1) URLs as in background: "/foo.png" 365 // (2) Multiword font-names as in font-family: "Times New Roman" 366 // (3) List separators in content values as in inline-lists: 367 // <style> 368 // ul.inlineList { list-style: none; padding:0 } 369 // ul.inlineList > li { display: inline } 370 // ul.inlineList > li:before { content: ", " } 371 // ul.inlineList > li:first-child:before { content: "" } 372 // </style> 373 // <ul class=inlineList><li>One<li>Two<li>Three</ul> 374 // (4) Attribute value selectors as in a[href="http://example.com/"] 375 // 376 // We conservatively treat all strings as URLs, but make some 377 // allowances to avoid confusion. 378 // 379 // In (1), our conservative assumption is justified. 380 // In (2), valid font names do not contain ':', '?', or '#', so our 381 // conservative assumption is fine since we will never transition past 382 // urlPartPreQuery. 383 // In (3), our protocol heuristic should not be tripped, and there 384 // should not be non-space content after a '?' or '#', so as long as 385 // we only %-encode RFC 3986 reserved characters we are ok. 386 // In (4), we should URL escape for URL attributes, and for others we 387 // have the attribute name available if our conservative assumption 388 // proves problematic for real code. 389 390 k := 0 391 for { 392 i := k + bytes.IndexAny(s[k:], `("'/`) 393 if i < k { 394 return c, len(s) 395 } 396 switch s[i] { 397 case '(': 398 // Look for url to the left. 399 p := bytes.TrimRight(s[:i], "\t\n\f\r ") 400 if endsWithCSSKeyword(p, "url") { 401 j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r ")) 402 switch { 403 case j != len(s) && s[j] == '"': 404 c.state, j = stateCSSDqURL, j+1 405 case j != len(s) && s[j] == '\'': 406 c.state, j = stateCSSSqURL, j+1 407 default: 408 c.state = stateCSSURL 409 } 410 return c, j 411 } 412 case '/': 413 if i+1 < len(s) { 414 switch s[i+1] { 415 case '/': 416 c.state = stateCSSLineCmt 417 return c, i + 2 418 case '*': 419 c.state = stateCSSBlockCmt 420 return c, i + 2 421 } 422 } 423 case '"': 424 c.state = stateCSSDqStr 425 return c, i + 1 426 case '\'': 427 c.state = stateCSSSqStr 428 return c, i + 1 429 } 430 k = i + 1 431 } 432 } 433 434 // tCSSStr is the context transition function for the CSS string and URL states. 435 func tCSSStr(c context, s []byte) (context, int) { 436 var endAndEsc string 437 switch c.state { 438 case stateCSSDqStr, stateCSSDqURL: 439 endAndEsc = `\"` 440 case stateCSSSqStr, stateCSSSqURL: 441 endAndEsc = `\'` 442 case stateCSSURL: 443 // Unquoted URLs end with a newline or close parenthesis. 444 // The below includes the wc (whitespace character) and nl. 445 endAndEsc = "\\\t\n\f\r )" 446 default: 447 panic(c.state.String()) 448 } 449 450 k := 0 451 for { 452 i := k + bytes.IndexAny(s[k:], endAndEsc) 453 if i < k { 454 c, nread := tURL(c, decodeCSS(s[k:])) 455 return c, k + nread 456 } 457 if s[i] == '\\' { 458 i++ 459 if i == len(s) { 460 return context{ 461 state: stateError, 462 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s), 463 }, len(s) 464 } 465 } else { 466 c.state = stateCSS 467 return c, i + 1 468 } 469 c, _ = tURL(c, decodeCSS(s[:i+1])) 470 k = i + 1 471 } 472 } 473 474 // tError is the context transition function for the error state. 475 func tError(c context, s []byte) (context, int) { 476 return c, len(s) 477 } 478 479 // eatAttrName returns the largest j such that s[i:j] is an attribute name. 480 // It returns an error if s[i:] does not look like it begins with an 481 // attribute name, such as encountering a quote mark without a preceding 482 // equals sign. 483 func eatAttrName(s []byte, i int) (int, *Error) { 484 for j := i; j < len(s); j++ { 485 switch s[j] { 486 case ' ', '\t', '\n', '\f', '\r', '=', '>': 487 return j, nil 488 case '\'', '"', '<': 489 // These result in a parse warning in HTML5 and are 490 // indicative of serious problems if seen in an attr 491 // name in a template. 492 return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s) 493 default: 494 // No-op. 495 } 496 } 497 return len(s), nil 498 } 499 500 var elementNameMap = map[string]element{ 501 "script": elementScript, 502 "style": elementStyle, 503 "textarea": elementTextarea, 504 "title": elementTitle, 505 } 506 507 // asciiAlpha reports whether c is an ASCII letter. 508 func asciiAlpha(c byte) bool { 509 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' 510 } 511 512 // asciiAlphaNum reports whether c is an ASCII letter or digit. 513 func asciiAlphaNum(c byte) bool { 514 return asciiAlpha(c) || '0' <= c && c <= '9' 515 } 516 517 // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type. 518 func eatTagName(s []byte, i int) (int, element) { 519 if i == len(s) || !asciiAlpha(s[i]) { 520 return i, elementNone 521 } 522 j := i + 1 523 for j < len(s) { 524 x := s[j] 525 if asciiAlphaNum(x) { 526 j++ 527 continue 528 } 529 // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y". 530 if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) { 531 j += 2 532 continue 533 } 534 break 535 } 536 return j, elementNameMap[strings.ToLower(string(s[i:j]))] 537 } 538 539 // eatWhiteSpace returns the largest j such that s[i:j] is white space. 540 func eatWhiteSpace(s []byte, i int) int { 541 for j := i; j < len(s); j++ { 542 switch s[j] { 543 case ' ', '\t', '\n', '\f', '\r': 544 // No-op. 545 default: 546 return j 547 } 548 } 549 return len(s) 550 }