github.com/hlts2/go@v0.0.0-20170904000733-812b34efaed8/src/html/template/transition.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "strings" 10 ) 11 12 // transitionFunc is the array of context transition functions for text nodes. 13 // A transition function takes a context and template text input, and returns 14 // the updated context and the number of bytes consumed from the front of the 15 // input. 16 var transitionFunc = [...]func(context, []byte) (context, int){ 17 stateText: tText, 18 stateTag: tTag, 19 stateAttrName: tAttrName, 20 stateAfterName: tAfterName, 21 stateBeforeValue: tBeforeValue, 22 stateHTMLCmt: tHTMLCmt, 23 stateRCDATA: tSpecialTagEnd, 24 stateAttr: tAttr, 25 stateURL: tURL, 26 stateJS: tJS, 27 stateJSDqStr: tJSDelimited, 28 stateJSSqStr: tJSDelimited, 29 stateJSRegexp: tJSDelimited, 30 stateJSBlockCmt: tBlockCmt, 31 stateJSLineCmt: tLineCmt, 32 stateCSS: tCSS, 33 stateCSSDqStr: tCSSStr, 34 stateCSSSqStr: tCSSStr, 35 stateCSSDqURL: tCSSStr, 36 stateCSSSqURL: tCSSStr, 37 stateCSSURL: tCSSStr, 38 stateCSSBlockCmt: tBlockCmt, 39 stateCSSLineCmt: tLineCmt, 40 stateError: tError, 41 } 42 43 var commentStart = []byte("<!--") 44 var commentEnd = []byte("-->") 45 46 // tText is the context transition function for the text state. 47 func tText(c context, s []byte) (context, int) { 48 k := 0 49 for { 50 i := k + bytes.IndexByte(s[k:], '<') 51 if i < k || i+1 == len(s) { 52 return c, len(s) 53 } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) { 54 return context{state: stateHTMLCmt}, i + 4 55 } 56 i++ 57 end := false 58 if s[i] == '/' { 59 if i+1 == len(s) { 60 return c, len(s) 61 } 62 end, i = true, i+1 63 } 64 j, e := eatTagName(s, i) 65 if j != i { 66 if end { 67 e = elementNone 68 } 69 // We've found an HTML tag. 70 return context{state: stateTag, element: e}, j 71 } 72 k = j 73 } 74 } 75 76 var elementContentType = [...]state{ 77 elementNone: stateText, 78 elementScript: stateJS, 79 elementStyle: stateCSS, 80 elementTextarea: stateRCDATA, 81 elementTitle: stateRCDATA, 82 } 83 84 // tTag is the context transition function for the tag state. 85 func tTag(c context, s []byte) (context, int) { 86 // Find the attribute name. 87 i := eatWhiteSpace(s, 0) 88 if i == len(s) { 89 return c, len(s) 90 } 91 if s[i] == '>' { 92 return context{ 93 state: elementContentType[c.element], 94 element: c.element, 95 }, i + 1 96 } 97 j, err := eatAttrName(s, i) 98 if err != nil { 99 return context{state: stateError, err: err}, len(s) 100 } 101 state, attr := stateTag, attrNone 102 if i == j { 103 return context{ 104 state: stateError, 105 err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]), 106 }, len(s) 107 } 108 109 attrName := strings.ToLower(string(s[i:j])) 110 if c.element == elementScript && attrName == "type" { 111 attr = attrScriptType 112 } else { 113 switch attrType(attrName) { 114 case contentTypeURL: 115 attr = attrURL 116 case contentTypeCSS: 117 attr = attrStyle 118 case contentTypeJS: 119 attr = attrScript 120 } 121 } 122 123 if j == len(s) { 124 state = stateAttrName 125 } else { 126 state = stateAfterName 127 } 128 return context{state: state, element: c.element, attr: attr}, j 129 } 130 131 // tAttrName is the context transition function for stateAttrName. 132 func tAttrName(c context, s []byte) (context, int) { 133 i, err := eatAttrName(s, 0) 134 if err != nil { 135 return context{state: stateError, err: err}, len(s) 136 } else if i != len(s) { 137 c.state = stateAfterName 138 } 139 return c, i 140 } 141 142 // tAfterName is the context transition function for stateAfterName. 143 func tAfterName(c context, s []byte) (context, int) { 144 // Look for the start of the value. 145 i := eatWhiteSpace(s, 0) 146 if i == len(s) { 147 return c, len(s) 148 } else if s[i] != '=' { 149 // Occurs due to tag ending '>', and valueless attribute. 150 c.state = stateTag 151 return c, i 152 } 153 c.state = stateBeforeValue 154 // Consume the "=". 155 return c, i + 1 156 } 157 158 var attrStartStates = [...]state{ 159 attrNone: stateAttr, 160 attrScript: stateJS, 161 attrScriptType: stateAttr, 162 attrStyle: stateCSS, 163 attrURL: stateURL, 164 } 165 166 // tBeforeValue is the context transition function for stateBeforeValue. 167 func tBeforeValue(c context, s []byte) (context, int) { 168 i := eatWhiteSpace(s, 0) 169 if i == len(s) { 170 return c, len(s) 171 } 172 // Find the attribute delimiter. 173 delim := delimSpaceOrTagEnd 174 switch s[i] { 175 case '\'': 176 delim, i = delimSingleQuote, i+1 177 case '"': 178 delim, i = delimDoubleQuote, i+1 179 } 180 c.state, c.delim = attrStartStates[c.attr], delim 181 return c, i 182 } 183 184 // tHTMLCmt is the context transition function for stateHTMLCmt. 185 func tHTMLCmt(c context, s []byte) (context, int) { 186 if i := bytes.Index(s, commentEnd); i != -1 { 187 return context{}, i + 3 188 } 189 return c, len(s) 190 } 191 192 // specialTagEndMarkers maps element types to the character sequence that 193 // case-insensitively signals the end of the special tag body. 194 var specialTagEndMarkers = [...][]byte{ 195 elementScript: []byte("script"), 196 elementStyle: []byte("style"), 197 elementTextarea: []byte("textarea"), 198 elementTitle: []byte("title"), 199 } 200 201 var ( 202 specialTagEndPrefix = []byte("</") 203 tagEndSeparators = []byte("> \t\n\f/") 204 ) 205 206 // tSpecialTagEnd is the context transition function for raw text and RCDATA 207 // element states. 208 func tSpecialTagEnd(c context, s []byte) (context, int) { 209 if c.element != elementNone { 210 if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 { 211 return context{}, i 212 } 213 } 214 return c, len(s) 215 } 216 217 // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1 218 func indexTagEnd(s []byte, tag []byte) int { 219 res := 0 220 plen := len(specialTagEndPrefix) 221 for len(s) > 0 { 222 // Try to find the tag end prefix first 223 i := bytes.Index(s, specialTagEndPrefix) 224 if i == -1 { 225 return i 226 } 227 s = s[i+plen:] 228 // Try to match the actual tag if there is still space for it 229 if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) { 230 s = s[len(tag):] 231 // Check the tag is followed by a proper separator 232 if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 { 233 return res + i 234 } 235 res += len(tag) 236 } 237 res += i + plen 238 } 239 return -1 240 } 241 242 // tAttr is the context transition function for the attribute state. 243 func tAttr(c context, s []byte) (context, int) { 244 return c, len(s) 245 } 246 247 // tURL is the context transition function for the URL state. 248 func tURL(c context, s []byte) (context, int) { 249 if bytes.ContainsAny(s, "#?") { 250 c.urlPart = urlPartQueryOrFrag 251 } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone { 252 // HTML5 uses "Valid URL potentially surrounded by spaces" for 253 // attrs: http://www.w3.org/TR/html5/index.html#attributes-1 254 c.urlPart = urlPartPreQuery 255 } 256 return c, len(s) 257 } 258 259 // tJS is the context transition function for the JS state. 260 func tJS(c context, s []byte) (context, int) { 261 i := bytes.IndexAny(s, `"'/`) 262 if i == -1 { 263 // Entire input is non string, comment, regexp tokens. 264 c.jsCtx = nextJSCtx(s, c.jsCtx) 265 return c, len(s) 266 } 267 c.jsCtx = nextJSCtx(s[:i], c.jsCtx) 268 switch s[i] { 269 case '"': 270 c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp 271 case '\'': 272 c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp 273 case '/': 274 switch { 275 case i+1 < len(s) && s[i+1] == '/': 276 c.state, i = stateJSLineCmt, i+1 277 case i+1 < len(s) && s[i+1] == '*': 278 c.state, i = stateJSBlockCmt, i+1 279 case c.jsCtx == jsCtxRegexp: 280 c.state = stateJSRegexp 281 case c.jsCtx == jsCtxDivOp: 282 c.jsCtx = jsCtxRegexp 283 default: 284 return context{ 285 state: stateError, 286 err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]), 287 }, len(s) 288 } 289 default: 290 panic("unreachable") 291 } 292 return c, i + 1 293 } 294 295 // tJSDelimited is the context transition function for the JS string and regexp 296 // states. 297 func tJSDelimited(c context, s []byte) (context, int) { 298 specials := `\"` 299 switch c.state { 300 case stateJSSqStr: 301 specials = `\'` 302 case stateJSRegexp: 303 specials = `\/[]` 304 } 305 306 k, inCharset := 0, false 307 for { 308 i := k + bytes.IndexAny(s[k:], specials) 309 if i < k { 310 break 311 } 312 switch s[i] { 313 case '\\': 314 i++ 315 if i == len(s) { 316 return context{ 317 state: stateError, 318 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s), 319 }, len(s) 320 } 321 case '[': 322 inCharset = true 323 case ']': 324 inCharset = false 325 default: 326 // end delimiter 327 if !inCharset { 328 c.state, c.jsCtx = stateJS, jsCtxDivOp 329 return c, i + 1 330 } 331 } 332 k = i + 1 333 } 334 335 if inCharset { 336 // This can be fixed by making context richer if interpolation 337 // into charsets is desired. 338 return context{ 339 state: stateError, 340 err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s), 341 }, len(s) 342 } 343 344 return c, len(s) 345 } 346 347 var blockCommentEnd = []byte("*/") 348 349 // tBlockCmt is the context transition function for /*comment*/ states. 350 func tBlockCmt(c context, s []byte) (context, int) { 351 i := bytes.Index(s, blockCommentEnd) 352 if i == -1 { 353 return c, len(s) 354 } 355 switch c.state { 356 case stateJSBlockCmt: 357 c.state = stateJS 358 case stateCSSBlockCmt: 359 c.state = stateCSS 360 default: 361 panic(c.state.String()) 362 } 363 return c, i + 2 364 } 365 366 // tLineCmt is the context transition function for //comment states. 367 func tLineCmt(c context, s []byte) (context, int) { 368 var lineTerminators string 369 var endState state 370 switch c.state { 371 case stateJSLineCmt: 372 lineTerminators, endState = "\n\r\u2028\u2029", stateJS 373 case stateCSSLineCmt: 374 lineTerminators, endState = "\n\f\r", stateCSS 375 // Line comments are not part of any published CSS standard but 376 // are supported by the 4 major browsers. 377 // This defines line comments as 378 // LINECOMMENT ::= "//" [^\n\f\d]* 379 // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines 380 // newlines: 381 // nl ::= #xA | #xD #xA | #xD | #xC 382 default: 383 panic(c.state.String()) 384 } 385 386 i := bytes.IndexAny(s, lineTerminators) 387 if i == -1 { 388 return c, len(s) 389 } 390 c.state = endState 391 // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4 392 // "However, the LineTerminator at the end of the line is not 393 // considered to be part of the single-line comment; it is 394 // recognized separately by the lexical grammar and becomes part 395 // of the stream of input elements for the syntactic grammar." 396 return c, i 397 } 398 399 // tCSS is the context transition function for the CSS state. 400 func tCSS(c context, s []byte) (context, int) { 401 // CSS quoted strings are almost never used except for: 402 // (1) URLs as in background: "/foo.png" 403 // (2) Multiword font-names as in font-family: "Times New Roman" 404 // (3) List separators in content values as in inline-lists: 405 // <style> 406 // ul.inlineList { list-style: none; padding:0 } 407 // ul.inlineList > li { display: inline } 408 // ul.inlineList > li:before { content: ", " } 409 // ul.inlineList > li:first-child:before { content: "" } 410 // </style> 411 // <ul class=inlineList><li>One<li>Two<li>Three</ul> 412 // (4) Attribute value selectors as in a[href="http://example.com/"] 413 // 414 // We conservatively treat all strings as URLs, but make some 415 // allowances to avoid confusion. 416 // 417 // In (1), our conservative assumption is justified. 418 // In (2), valid font names do not contain ':', '?', or '#', so our 419 // conservative assumption is fine since we will never transition past 420 // urlPartPreQuery. 421 // In (3), our protocol heuristic should not be tripped, and there 422 // should not be non-space content after a '?' or '#', so as long as 423 // we only %-encode RFC 3986 reserved characters we are ok. 424 // In (4), we should URL escape for URL attributes, and for others we 425 // have the attribute name available if our conservative assumption 426 // proves problematic for real code. 427 428 k := 0 429 for { 430 i := k + bytes.IndexAny(s[k:], `("'/`) 431 if i < k { 432 return c, len(s) 433 } 434 switch s[i] { 435 case '(': 436 // Look for url to the left. 437 p := bytes.TrimRight(s[:i], "\t\n\f\r ") 438 if endsWithCSSKeyword(p, "url") { 439 j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r ")) 440 switch { 441 case j != len(s) && s[j] == '"': 442 c.state, j = stateCSSDqURL, j+1 443 case j != len(s) && s[j] == '\'': 444 c.state, j = stateCSSSqURL, j+1 445 default: 446 c.state = stateCSSURL 447 } 448 return c, j 449 } 450 case '/': 451 if i+1 < len(s) { 452 switch s[i+1] { 453 case '/': 454 c.state = stateCSSLineCmt 455 return c, i + 2 456 case '*': 457 c.state = stateCSSBlockCmt 458 return c, i + 2 459 } 460 } 461 case '"': 462 c.state = stateCSSDqStr 463 return c, i + 1 464 case '\'': 465 c.state = stateCSSSqStr 466 return c, i + 1 467 } 468 k = i + 1 469 } 470 } 471 472 // tCSSStr is the context transition function for the CSS string and URL states. 473 func tCSSStr(c context, s []byte) (context, int) { 474 var endAndEsc string 475 switch c.state { 476 case stateCSSDqStr, stateCSSDqURL: 477 endAndEsc = `\"` 478 case stateCSSSqStr, stateCSSSqURL: 479 endAndEsc = `\'` 480 case stateCSSURL: 481 // Unquoted URLs end with a newline or close parenthesis. 482 // The below includes the wc (whitespace character) and nl. 483 endAndEsc = "\\\t\n\f\r )" 484 default: 485 panic(c.state.String()) 486 } 487 488 k := 0 489 for { 490 i := k + bytes.IndexAny(s[k:], endAndEsc) 491 if i < k { 492 c, nread := tURL(c, decodeCSS(s[k:])) 493 return c, k + nread 494 } 495 if s[i] == '\\' { 496 i++ 497 if i == len(s) { 498 return context{ 499 state: stateError, 500 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s), 501 }, len(s) 502 } 503 } else { 504 c.state = stateCSS 505 return c, i + 1 506 } 507 c, _ = tURL(c, decodeCSS(s[:i+1])) 508 k = i + 1 509 } 510 } 511 512 // tError is the context transition function for the error state. 513 func tError(c context, s []byte) (context, int) { 514 return c, len(s) 515 } 516 517 // eatAttrName returns the largest j such that s[i:j] is an attribute name. 518 // It returns an error if s[i:] does not look like it begins with an 519 // attribute name, such as encountering a quote mark without a preceding 520 // equals sign. 521 func eatAttrName(s []byte, i int) (int, *Error) { 522 for j := i; j < len(s); j++ { 523 switch s[j] { 524 case ' ', '\t', '\n', '\f', '\r', '=', '>': 525 return j, nil 526 case '\'', '"', '<': 527 // These result in a parse warning in HTML5 and are 528 // indicative of serious problems if seen in an attr 529 // name in a template. 530 return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s) 531 default: 532 // No-op. 533 } 534 } 535 return len(s), nil 536 } 537 538 var elementNameMap = map[string]element{ 539 "script": elementScript, 540 "style": elementStyle, 541 "textarea": elementTextarea, 542 "title": elementTitle, 543 } 544 545 // asciiAlpha reports whether c is an ASCII letter. 546 func asciiAlpha(c byte) bool { 547 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' 548 } 549 550 // asciiAlphaNum reports whether c is an ASCII letter or digit. 551 func asciiAlphaNum(c byte) bool { 552 return asciiAlpha(c) || '0' <= c && c <= '9' 553 } 554 555 // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type. 556 func eatTagName(s []byte, i int) (int, element) { 557 if i == len(s) || !asciiAlpha(s[i]) { 558 return i, elementNone 559 } 560 j := i + 1 561 for j < len(s) { 562 x := s[j] 563 if asciiAlphaNum(x) { 564 j++ 565 continue 566 } 567 // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y". 568 if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) { 569 j += 2 570 continue 571 } 572 break 573 } 574 return j, elementNameMap[strings.ToLower(string(s[i:j]))] 575 } 576 577 // eatWhiteSpace returns the largest j such that s[i:j] is white space. 578 func eatWhiteSpace(s []byte, i int) int { 579 for j := i; j < len(s); j++ { 580 switch s[j] { 581 case ' ', '\t', '\n', '\f', '\r': 582 // No-op. 583 default: 584 return j 585 } 586 } 587 return len(s) 588 }