github.com/huandu/go@v0.0.0-20151114150818-04e615e41150/src/html/template/transition.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "strings" 10 ) 11 12 // transitionFunc is the array of context transition functions for text nodes. 13 // A transition function takes a context and template text input, and returns 14 // the updated context and the number of bytes consumed from the front of the 15 // input. 16 var transitionFunc = [...]func(context, []byte) (context, int){ 17 stateText: tText, 18 stateTag: tTag, 19 stateAttrName: tAttrName, 20 stateAfterName: tAfterName, 21 stateBeforeValue: tBeforeValue, 22 stateHTMLCmt: tHTMLCmt, 23 stateRCDATA: tSpecialTagEnd, 24 stateAttr: tAttr, 25 stateURL: tURL, 26 stateJS: tJS, 27 stateJSDqStr: tJSDelimited, 28 stateJSSqStr: tJSDelimited, 29 stateJSRegexp: tJSDelimited, 30 stateJSBlockCmt: tBlockCmt, 31 stateJSLineCmt: tLineCmt, 32 stateCSS: tCSS, 33 stateCSSDqStr: tCSSStr, 34 stateCSSSqStr: tCSSStr, 35 stateCSSDqURL: tCSSStr, 36 stateCSSSqURL: tCSSStr, 37 stateCSSURL: tCSSStr, 38 stateCSSBlockCmt: tBlockCmt, 39 stateCSSLineCmt: tLineCmt, 40 stateError: tError, 41 } 42 43 var commentStart = []byte("<!--") 44 var commentEnd = []byte("-->") 45 46 // tText is the context transition function for the text state. 47 func tText(c context, s []byte) (context, int) { 48 k := 0 49 for { 50 i := k + bytes.IndexByte(s[k:], '<') 51 if i < k || i+1 == len(s) { 52 return c, len(s) 53 } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) { 54 return context{state: stateHTMLCmt}, i + 4 55 } 56 i++ 57 end := false 58 if s[i] == '/' { 59 if i+1 == len(s) { 60 return c, len(s) 61 } 62 end, i = true, i+1 63 } 64 j, e := eatTagName(s, i) 65 if j != i { 66 if end { 67 e = elementNone 68 } 69 // We've found an HTML tag. 70 return context{state: stateTag, element: e}, j 71 } 72 k = j 73 } 74 } 75 76 var elementContentType = [...]state{ 77 elementNone: stateText, 78 elementScript: stateJS, 79 elementStyle: stateCSS, 80 elementTextarea: stateRCDATA, 81 elementTitle: stateRCDATA, 82 } 83 84 // tTag is the context transition function for the tag state. 85 func tTag(c context, s []byte) (context, int) { 86 // Find the attribute name. 87 i := eatWhiteSpace(s, 0) 88 if i == len(s) { 89 return c, len(s) 90 } 91 if s[i] == '>' { 92 return context{ 93 state: elementContentType[c.element], 94 element: c.element, 95 }, i + 1 96 } 97 j, err := eatAttrName(s, i) 98 if err != nil { 99 return context{state: stateError, err: err}, len(s) 100 } 101 state, attr := stateTag, attrNone 102 if i == j { 103 return context{ 104 state: stateError, 105 err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]), 106 }, len(s) 107 } 108 switch attrType(string(s[i:j])) { 109 case contentTypeURL: 110 attr = attrURL 111 case contentTypeCSS: 112 attr = attrStyle 113 case contentTypeJS: 114 attr = attrScript 115 } 116 if j == len(s) { 117 state = stateAttrName 118 } else { 119 state = stateAfterName 120 } 121 return context{state: state, element: c.element, attr: attr}, j 122 } 123 124 // tAttrName is the context transition function for stateAttrName. 125 func tAttrName(c context, s []byte) (context, int) { 126 i, err := eatAttrName(s, 0) 127 if err != nil { 128 return context{state: stateError, err: err}, len(s) 129 } else if i != len(s) { 130 c.state = stateAfterName 131 } 132 return c, i 133 } 134 135 // tAfterName is the context transition function for stateAfterName. 136 func tAfterName(c context, s []byte) (context, int) { 137 // Look for the start of the value. 138 i := eatWhiteSpace(s, 0) 139 if i == len(s) { 140 return c, len(s) 141 } else if s[i] != '=' { 142 // Occurs due to tag ending '>', and valueless attribute. 143 c.state = stateTag 144 return c, i 145 } 146 c.state = stateBeforeValue 147 // Consume the "=". 148 return c, i + 1 149 } 150 151 var attrStartStates = [...]state{ 152 attrNone: stateAttr, 153 attrScript: stateJS, 154 attrStyle: stateCSS, 155 attrURL: stateURL, 156 } 157 158 // tBeforeValue is the context transition function for stateBeforeValue. 159 func tBeforeValue(c context, s []byte) (context, int) { 160 i := eatWhiteSpace(s, 0) 161 if i == len(s) { 162 return c, len(s) 163 } 164 // Find the attribute delimiter. 165 delim := delimSpaceOrTagEnd 166 switch s[i] { 167 case '\'': 168 delim, i = delimSingleQuote, i+1 169 case '"': 170 delim, i = delimDoubleQuote, i+1 171 } 172 c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone 173 return c, i 174 } 175 176 // tHTMLCmt is the context transition function for stateHTMLCmt. 177 func tHTMLCmt(c context, s []byte) (context, int) { 178 if i := bytes.Index(s, commentEnd); i != -1 { 179 return context{}, i + 3 180 } 181 return c, len(s) 182 } 183 184 // specialTagEndMarkers maps element types to the character sequence that 185 // case-insensitively signals the end of the special tag body. 186 var specialTagEndMarkers = [...][]byte{ 187 elementScript: []byte("script"), 188 elementStyle: []byte("style"), 189 elementTextarea: []byte("textarea"), 190 elementTitle: []byte("title"), 191 } 192 193 var ( 194 specialTagEndPrefix = []byte("</") 195 tagEndSeparators = []byte("> \t\n\f/") 196 ) 197 198 // tSpecialTagEnd is the context transition function for raw text and RCDATA 199 // element states. 200 func tSpecialTagEnd(c context, s []byte) (context, int) { 201 if c.element != elementNone { 202 if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 { 203 return context{}, i 204 } 205 } 206 return c, len(s) 207 } 208 209 // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1 210 func indexTagEnd(s []byte, tag []byte) int { 211 res := 0 212 plen := len(specialTagEndPrefix) 213 for len(s) > 0 { 214 // Try to find the tag end prefix first 215 i := bytes.Index(s, specialTagEndPrefix) 216 if i == -1 { 217 return i 218 } 219 s = s[i+plen:] 220 // Try to match the actual tag if there is still space for it 221 if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) { 222 s = s[len(tag):] 223 // Check the tag is followed by a proper separator 224 if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 { 225 return res + i 226 } 227 res += len(tag) 228 } 229 res += i + plen 230 } 231 return -1 232 } 233 234 // tAttr is the context transition function for the attribute state. 235 func tAttr(c context, s []byte) (context, int) { 236 return c, len(s) 237 } 238 239 // tURL is the context transition function for the URL state. 240 func tURL(c context, s []byte) (context, int) { 241 if bytes.IndexAny(s, "#?") >= 0 { 242 c.urlPart = urlPartQueryOrFrag 243 } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone { 244 // HTML5 uses "Valid URL potentially surrounded by spaces" for 245 // attrs: http://www.w3.org/TR/html5/index.html#attributes-1 246 c.urlPart = urlPartPreQuery 247 } 248 return c, len(s) 249 } 250 251 // tJS is the context transition function for the JS state. 252 func tJS(c context, s []byte) (context, int) { 253 i := bytes.IndexAny(s, `"'/`) 254 if i == -1 { 255 // Entire input is non string, comment, regexp tokens. 256 c.jsCtx = nextJSCtx(s, c.jsCtx) 257 return c, len(s) 258 } 259 c.jsCtx = nextJSCtx(s[:i], c.jsCtx) 260 switch s[i] { 261 case '"': 262 c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp 263 case '\'': 264 c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp 265 case '/': 266 switch { 267 case i+1 < len(s) && s[i+1] == '/': 268 c.state, i = stateJSLineCmt, i+1 269 case i+1 < len(s) && s[i+1] == '*': 270 c.state, i = stateJSBlockCmt, i+1 271 case c.jsCtx == jsCtxRegexp: 272 c.state = stateJSRegexp 273 case c.jsCtx == jsCtxDivOp: 274 c.jsCtx = jsCtxRegexp 275 default: 276 return context{ 277 state: stateError, 278 err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]), 279 }, len(s) 280 } 281 default: 282 panic("unreachable") 283 } 284 return c, i + 1 285 } 286 287 // tJSDelimited is the context transition function for the JS string and regexp 288 // states. 289 func tJSDelimited(c context, s []byte) (context, int) { 290 specials := `\"` 291 switch c.state { 292 case stateJSSqStr: 293 specials = `\'` 294 case stateJSRegexp: 295 specials = `\/[]` 296 } 297 298 k, inCharset := 0, false 299 for { 300 i := k + bytes.IndexAny(s[k:], specials) 301 if i < k { 302 break 303 } 304 switch s[i] { 305 case '\\': 306 i++ 307 if i == len(s) { 308 return context{ 309 state: stateError, 310 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s), 311 }, len(s) 312 } 313 case '[': 314 inCharset = true 315 case ']': 316 inCharset = false 317 default: 318 // end delimiter 319 if !inCharset { 320 c.state, c.jsCtx = stateJS, jsCtxDivOp 321 return c, i + 1 322 } 323 } 324 k = i + 1 325 } 326 327 if inCharset { 328 // This can be fixed by making context richer if interpolation 329 // into charsets is desired. 330 return context{ 331 state: stateError, 332 err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s), 333 }, len(s) 334 } 335 336 return c, len(s) 337 } 338 339 var blockCommentEnd = []byte("*/") 340 341 // tBlockCmt is the context transition function for /*comment*/ states. 342 func tBlockCmt(c context, s []byte) (context, int) { 343 i := bytes.Index(s, blockCommentEnd) 344 if i == -1 { 345 return c, len(s) 346 } 347 switch c.state { 348 case stateJSBlockCmt: 349 c.state = stateJS 350 case stateCSSBlockCmt: 351 c.state = stateCSS 352 default: 353 panic(c.state.String()) 354 } 355 return c, i + 2 356 } 357 358 // tLineCmt is the context transition function for //comment states. 359 func tLineCmt(c context, s []byte) (context, int) { 360 var lineTerminators string 361 var endState state 362 switch c.state { 363 case stateJSLineCmt: 364 lineTerminators, endState = "\n\r\u2028\u2029", stateJS 365 case stateCSSLineCmt: 366 lineTerminators, endState = "\n\f\r", stateCSS 367 // Line comments are not part of any published CSS standard but 368 // are supported by the 4 major browsers. 369 // This defines line comments as 370 // LINECOMMENT ::= "//" [^\n\f\d]* 371 // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines 372 // newlines: 373 // nl ::= #xA | #xD #xA | #xD | #xC 374 default: 375 panic(c.state.String()) 376 } 377 378 i := bytes.IndexAny(s, lineTerminators) 379 if i == -1 { 380 return c, len(s) 381 } 382 c.state = endState 383 // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4 384 // "However, the LineTerminator at the end of the line is not 385 // considered to be part of the single-line comment; it is 386 // recognized separately by the lexical grammar and becomes part 387 // of the stream of input elements for the syntactic grammar." 388 return c, i 389 } 390 391 // tCSS is the context transition function for the CSS state. 392 func tCSS(c context, s []byte) (context, int) { 393 // CSS quoted strings are almost never used except for: 394 // (1) URLs as in background: "/foo.png" 395 // (2) Multiword font-names as in font-family: "Times New Roman" 396 // (3) List separators in content values as in inline-lists: 397 // <style> 398 // ul.inlineList { list-style: none; padding:0 } 399 // ul.inlineList > li { display: inline } 400 // ul.inlineList > li:before { content: ", " } 401 // ul.inlineList > li:first-child:before { content: "" } 402 // </style> 403 // <ul class=inlineList><li>One<li>Two<li>Three</ul> 404 // (4) Attribute value selectors as in a[href="http://example.com/"] 405 // 406 // We conservatively treat all strings as URLs, but make some 407 // allowances to avoid confusion. 408 // 409 // In (1), our conservative assumption is justified. 410 // In (2), valid font names do not contain ':', '?', or '#', so our 411 // conservative assumption is fine since we will never transition past 412 // urlPartPreQuery. 413 // In (3), our protocol heuristic should not be tripped, and there 414 // should not be non-space content after a '?' or '#', so as long as 415 // we only %-encode RFC 3986 reserved characters we are ok. 416 // In (4), we should URL escape for URL attributes, and for others we 417 // have the attribute name available if our conservative assumption 418 // proves problematic for real code. 419 420 k := 0 421 for { 422 i := k + bytes.IndexAny(s[k:], `("'/`) 423 if i < k { 424 return c, len(s) 425 } 426 switch s[i] { 427 case '(': 428 // Look for url to the left. 429 p := bytes.TrimRight(s[:i], "\t\n\f\r ") 430 if endsWithCSSKeyword(p, "url") { 431 j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r ")) 432 switch { 433 case j != len(s) && s[j] == '"': 434 c.state, j = stateCSSDqURL, j+1 435 case j != len(s) && s[j] == '\'': 436 c.state, j = stateCSSSqURL, j+1 437 default: 438 c.state = stateCSSURL 439 } 440 return c, j 441 } 442 case '/': 443 if i+1 < len(s) { 444 switch s[i+1] { 445 case '/': 446 c.state = stateCSSLineCmt 447 return c, i + 2 448 case '*': 449 c.state = stateCSSBlockCmt 450 return c, i + 2 451 } 452 } 453 case '"': 454 c.state = stateCSSDqStr 455 return c, i + 1 456 case '\'': 457 c.state = stateCSSSqStr 458 return c, i + 1 459 } 460 k = i + 1 461 } 462 } 463 464 // tCSSStr is the context transition function for the CSS string and URL states. 465 func tCSSStr(c context, s []byte) (context, int) { 466 var endAndEsc string 467 switch c.state { 468 case stateCSSDqStr, stateCSSDqURL: 469 endAndEsc = `\"` 470 case stateCSSSqStr, stateCSSSqURL: 471 endAndEsc = `\'` 472 case stateCSSURL: 473 // Unquoted URLs end with a newline or close parenthesis. 474 // The below includes the wc (whitespace character) and nl. 475 endAndEsc = "\\\t\n\f\r )" 476 default: 477 panic(c.state.String()) 478 } 479 480 k := 0 481 for { 482 i := k + bytes.IndexAny(s[k:], endAndEsc) 483 if i < k { 484 c, nread := tURL(c, decodeCSS(s[k:])) 485 return c, k + nread 486 } 487 if s[i] == '\\' { 488 i++ 489 if i == len(s) { 490 return context{ 491 state: stateError, 492 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s), 493 }, len(s) 494 } 495 } else { 496 c.state = stateCSS 497 return c, i + 1 498 } 499 c, _ = tURL(c, decodeCSS(s[:i+1])) 500 k = i + 1 501 } 502 } 503 504 // tError is the context transition function for the error state. 505 func tError(c context, s []byte) (context, int) { 506 return c, len(s) 507 } 508 509 // eatAttrName returns the largest j such that s[i:j] is an attribute name. 510 // It returns an error if s[i:] does not look like it begins with an 511 // attribute name, such as encountering a quote mark without a preceding 512 // equals sign. 513 func eatAttrName(s []byte, i int) (int, *Error) { 514 for j := i; j < len(s); j++ { 515 switch s[j] { 516 case ' ', '\t', '\n', '\f', '\r', '=', '>': 517 return j, nil 518 case '\'', '"', '<': 519 // These result in a parse warning in HTML5 and are 520 // indicative of serious problems if seen in an attr 521 // name in a template. 522 return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s) 523 default: 524 // No-op. 525 } 526 } 527 return len(s), nil 528 } 529 530 var elementNameMap = map[string]element{ 531 "script": elementScript, 532 "style": elementStyle, 533 "textarea": elementTextarea, 534 "title": elementTitle, 535 } 536 537 // asciiAlpha reports whether c is an ASCII letter. 538 func asciiAlpha(c byte) bool { 539 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' 540 } 541 542 // asciiAlphaNum reports whether c is an ASCII letter or digit. 543 func asciiAlphaNum(c byte) bool { 544 return asciiAlpha(c) || '0' <= c && c <= '9' 545 } 546 547 // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type. 548 func eatTagName(s []byte, i int) (int, element) { 549 if i == len(s) || !asciiAlpha(s[i]) { 550 return i, elementNone 551 } 552 j := i + 1 553 for j < len(s) { 554 x := s[j] 555 if asciiAlphaNum(x) { 556 j++ 557 continue 558 } 559 // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y". 560 if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) { 561 j += 2 562 continue 563 } 564 break 565 } 566 return j, elementNameMap[strings.ToLower(string(s[i:j]))] 567 } 568 569 // eatWhiteSpace returns the largest j such that s[i:j] is white space. 570 func eatWhiteSpace(s []byte, i int) int { 571 for j := i; j < len(s); j++ { 572 switch s[j] { 573 case ' ', '\t', '\n', '\f', '\r': 574 // No-op. 575 default: 576 return j 577 } 578 } 579 return len(s) 580 }