github.com/sanprasirt/go@v0.0.0-20170607001320-a027466e4b6d/src/html/template/escape.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "fmt" 10 "html" 11 "io" 12 "text/template" 13 "text/template/parse" 14 ) 15 16 // escapeTemplate rewrites the named template, which must be 17 // associated with t, to guarantee that the output of any of the named 18 // templates is properly escaped. If no error is returned, then the named templates have 19 // been modified. Otherwise the named templates have been rendered 20 // unusable. 21 func escapeTemplate(tmpl *Template, node parse.Node, name string) error { 22 e := newEscaper(tmpl) 23 c, _ := e.escapeTree(context{}, node, name, 0) 24 var err error 25 if c.err != nil { 26 err, c.err.Name = c.err, name 27 } else if c.state != stateText { 28 err = &Error{ErrEndContext, nil, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)} 29 } 30 if err != nil { 31 // Prevent execution of unsafe templates. 32 if t := tmpl.set[name]; t != nil { 33 t.escapeErr = err 34 t.text.Tree = nil 35 t.Tree = nil 36 } 37 return err 38 } 39 e.commit() 40 if t := tmpl.set[name]; t != nil { 41 t.escapeErr = escapeOK 42 t.Tree = t.text.Tree 43 } 44 return nil 45 } 46 47 // evalArgs formats the list of arguments into a string. It is equivalent to 48 // fmt.Sprint(args...), except that it deferences all pointers. 49 func evalArgs(args ...interface{}) string { 50 // Optimization for simple common case of a single string argument. 51 if len(args) == 1 { 52 if s, ok := args[0].(string); ok { 53 return s 54 } 55 } 56 for i, arg := range args { 57 args[i] = indirectToStringerOrError(arg) 58 } 59 return fmt.Sprint(args...) 60 } 61 62 // funcMap maps command names to functions that render their inputs safe. 63 var funcMap = template.FuncMap{ 64 "_html_template_attrescaper": attrEscaper, 65 "_html_template_commentescaper": commentEscaper, 66 "_html_template_cssescaper": cssEscaper, 67 "_html_template_cssvaluefilter": cssValueFilter, 68 "_html_template_htmlnamefilter": htmlNameFilter, 69 "_html_template_htmlescaper": htmlEscaper, 70 "_html_template_jsregexpescaper": jsRegexpEscaper, 71 "_html_template_jsstrescaper": jsStrEscaper, 72 "_html_template_jsvalescaper": jsValEscaper, 73 "_html_template_nospaceescaper": htmlNospaceEscaper, 74 "_html_template_rcdataescaper": rcdataEscaper, 75 "_html_template_urlescaper": urlEscaper, 76 "_html_template_urlfilter": urlFilter, 77 "_html_template_urlnormalizer": urlNormalizer, 78 "_eval_args_": evalArgs, 79 } 80 81 // escaper collects type inferences about templates and changes needed to make 82 // templates injection safe. 83 type escaper struct { 84 tmpl *Template 85 // output[templateName] is the output context for a templateName that 86 // has been mangled to include its input context. 87 output map[string]context 88 // derived[c.mangle(name)] maps to a template derived from the template 89 // named name templateName for the start context c. 90 derived map[string]*template.Template 91 // called[templateName] is a set of called mangled template names. 92 called map[string]bool 93 // xxxNodeEdits are the accumulated edits to apply during commit. 94 // Such edits are not applied immediately in case a template set 95 // executes a given template in different escaping contexts. 96 actionNodeEdits map[*parse.ActionNode][]string 97 templateNodeEdits map[*parse.TemplateNode]string 98 textNodeEdits map[*parse.TextNode][]byte 99 } 100 101 // newEscaper creates a blank escaper for the given set. 102 func newEscaper(t *Template) *escaper { 103 return &escaper{ 104 t, 105 map[string]context{}, 106 map[string]*template.Template{}, 107 map[string]bool{}, 108 map[*parse.ActionNode][]string{}, 109 map[*parse.TemplateNode]string{}, 110 map[*parse.TextNode][]byte{}, 111 } 112 } 113 114 // filterFailsafe is an innocuous word that is emitted in place of unsafe values 115 // by sanitizer functions. It is not a keyword in any programming language, 116 // contains no special characters, is not empty, and when it appears in output 117 // it is distinct enough that a developer can find the source of the problem 118 // via a search engine. 119 const filterFailsafe = "ZgotmplZ" 120 121 // escape escapes a template node. 122 func (e *escaper) escape(c context, n parse.Node) context { 123 switch n := n.(type) { 124 case *parse.ActionNode: 125 return e.escapeAction(c, n) 126 case *parse.IfNode: 127 return e.escapeBranch(c, &n.BranchNode, "if") 128 case *parse.ListNode: 129 return e.escapeList(c, n) 130 case *parse.RangeNode: 131 return e.escapeBranch(c, &n.BranchNode, "range") 132 case *parse.TemplateNode: 133 return e.escapeTemplate(c, n) 134 case *parse.TextNode: 135 return e.escapeText(c, n) 136 case *parse.WithNode: 137 return e.escapeBranch(c, &n.BranchNode, "with") 138 } 139 panic("escaping " + n.String() + " is unimplemented") 140 } 141 142 // allIdents returns the names of the identifiers under the Ident field of the node, 143 // which might be a singleton (Identifier) or a slice (Field or Chain). 144 func allIdents(node parse.Node) []string { 145 switch node := node.(type) { 146 case *parse.IdentifierNode: 147 return []string{node.Ident} 148 case *parse.FieldNode: 149 return node.Ident 150 case *parse.ChainNode: 151 return node.Field 152 } 153 return nil 154 } 155 156 // escapeAction escapes an action template node. 157 func (e *escaper) escapeAction(c context, n *parse.ActionNode) context { 158 if len(n.Pipe.Decl) != 0 { 159 // A local variable assignment, not an interpolation. 160 return c 161 } 162 c = nudge(c) 163 // Check for disallowed use of predefined escapers in the pipeline. 164 for pos, idNode := range n.Pipe.Cmds { 165 for _, ident := range allIdents(idNode.Args[0]) { 166 if _, ok := predefinedEscapers[ident]; ok { 167 if pos < len(n.Pipe.Cmds)-1 || 168 c.state == stateAttr && c.delim == delimSpaceOrTagEnd && ident == "html" { 169 return context{ 170 state: stateError, 171 err: errorf(ErrPredefinedEscaper, n, n.Line, "predefined escaper %q disallowed in template", ident), 172 } 173 } 174 } 175 } 176 } 177 s := make([]string, 0, 3) 178 switch c.state { 179 case stateError: 180 return c 181 case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL: 182 switch c.urlPart { 183 case urlPartNone: 184 s = append(s, "_html_template_urlfilter") 185 fallthrough 186 case urlPartPreQuery: 187 switch c.state { 188 case stateCSSDqStr, stateCSSSqStr: 189 s = append(s, "_html_template_cssescaper") 190 default: 191 s = append(s, "_html_template_urlnormalizer") 192 } 193 case urlPartQueryOrFrag: 194 s = append(s, "_html_template_urlescaper") 195 case urlPartUnknown: 196 return context{ 197 state: stateError, 198 err: errorf(ErrAmbigContext, n, n.Line, "%s appears in an ambiguous context within a URL", n), 199 } 200 default: 201 panic(c.urlPart.String()) 202 } 203 case stateJS: 204 s = append(s, "_html_template_jsvalescaper") 205 // A slash after a value starts a div operator. 206 c.jsCtx = jsCtxDivOp 207 case stateJSDqStr, stateJSSqStr: 208 s = append(s, "_html_template_jsstrescaper") 209 case stateJSRegexp: 210 s = append(s, "_html_template_jsregexpescaper") 211 case stateCSS: 212 s = append(s, "_html_template_cssvaluefilter") 213 case stateText: 214 s = append(s, "_html_template_htmlescaper") 215 case stateRCDATA: 216 s = append(s, "_html_template_rcdataescaper") 217 case stateAttr: 218 // Handled below in delim check. 219 case stateAttrName, stateTag: 220 c.state = stateAttrName 221 s = append(s, "_html_template_htmlnamefilter") 222 default: 223 if isComment(c.state) { 224 s = append(s, "_html_template_commentescaper") 225 } else { 226 panic("unexpected state " + c.state.String()) 227 } 228 } 229 switch c.delim { 230 case delimNone: 231 // No extra-escaping needed for raw text content. 232 case delimSpaceOrTagEnd: 233 s = append(s, "_html_template_nospaceescaper") 234 default: 235 s = append(s, "_html_template_attrescaper") 236 } 237 e.editActionNode(n, s) 238 return c 239 } 240 241 // ensurePipelineContains ensures that the pipeline ends with the commands with 242 // the identifiers in s in order. If the pipeline ends with a predefined escaper 243 // (i.e. "html" or "urlquery"), merge it with the identifiers in s. 244 func ensurePipelineContains(p *parse.PipeNode, s []string) { 245 if len(s) == 0 { 246 // Do not rewrite pipeline if we have no escapers to insert. 247 return 248 } 249 // Precondition: p.Cmds contains at most one predefined escaper and the 250 // escaper will be present at p.Cmds[len(p.Cmds)-1]. This precondition is 251 // always true because of the checks in escapeAction. 252 pipelineLen := len(p.Cmds) 253 if pipelineLen > 0 { 254 lastCmd := p.Cmds[pipelineLen-1] 255 if idNode, ok := lastCmd.Args[0].(*parse.IdentifierNode); ok { 256 if esc := idNode.Ident; predefinedEscapers[esc] { 257 // Pipeline ends with a predefined escaper. 258 if len(p.Cmds) == 1 && len(lastCmd.Args) > 1 { 259 // Special case: pipeline is of the form {{ esc arg1 arg2 ... argN }}, 260 // where esc is the predefined escaper, and arg1...argN are its arguments. 261 // Convert this into the equivalent form 262 // {{ _eval_args_ arg1 arg2 ... argN | esc }}, so that esc can be easily 263 // merged with the escapers in s. 264 lastCmd.Args[0] = parse.NewIdentifier("_eval_args_").SetTree(nil).SetPos(lastCmd.Args[0].Position()) 265 p.Cmds = appendCmd(p.Cmds, newIdentCmd(esc, p.Position())) 266 pipelineLen++ 267 } 268 // If any of the commands in s that we are about to insert is equivalent 269 // to the predefined escaper, use the predefined escaper instead. 270 dup := false 271 for i, escaper := range s { 272 if escFnsEq(esc, escaper) { 273 s[i] = idNode.Ident 274 dup = true 275 } 276 } 277 if dup { 278 // The predefined escaper will already be inserted along with the 279 // escapers in s, so do not copy it to the rewritten pipeline. 280 pipelineLen-- 281 } 282 } 283 } 284 } 285 // Rewrite the pipeline, creating the escapers in s at the end of the pipeline. 286 newCmds := make([]*parse.CommandNode, pipelineLen, pipelineLen+len(s)) 287 copy(newCmds, p.Cmds) 288 for _, name := range s { 289 newCmds = appendCmd(newCmds, newIdentCmd(name, p.Position())) 290 } 291 p.Cmds = newCmds 292 } 293 294 // predefinedEscapers contains template predefined escapers that are equivalent 295 // to some contextual escapers. Keep in sync with equivEscapers. 296 var predefinedEscapers = map[string]bool{ 297 "html": true, 298 "urlquery": true, 299 } 300 301 // equivEscapers matches contextual escapers to equivalent predefined 302 // template escapers. 303 var equivEscapers = map[string]string{ 304 // The following pairs of HTML escapers provide equivalent security 305 // guarantees, since they all escape '\000', '\'', '"', '&', '<', and '>'. 306 "_html_template_attrescaper": "html", 307 "_html_template_htmlescaper": "html", 308 "_html_template_rcdataescaper": "html", 309 // These two URL escapers produce URLs safe for embedding in a URL query by 310 // percent-encoding all the reserved characters specified in RFC 3986 Section 311 // 2.2 312 "_html_template_urlescaper": "urlquery", 313 // These two functions are not actually equivalent; urlquery is stricter as it 314 // escapes reserved characters (e.g. '#'), while _html_template_urlnormalizer 315 // does not. It is therefore only safe to replace _html_template_urlnormalizer 316 // with urlquery (this happens in ensurePipelineContains), but not the otherI've 317 // way around. We keep this entry around to preserve the behavior of templates 318 // written before Go 1.9, which might depend on this substitution taking place. 319 "_html_template_urlnormalizer": "urlquery", 320 } 321 322 // escFnsEq reports whether the two escaping functions are equivalent. 323 func escFnsEq(a, b string) bool { 324 if e := equivEscapers[a]; e != "" { 325 a = e 326 } 327 if e := equivEscapers[b]; e != "" { 328 b = e 329 } 330 return a == b 331 } 332 333 // redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x) 334 // for all x. 335 var redundantFuncs = map[string]map[string]bool{ 336 "_html_template_commentescaper": { 337 "_html_template_attrescaper": true, 338 "_html_template_nospaceescaper": true, 339 "_html_template_htmlescaper": true, 340 }, 341 "_html_template_cssescaper": { 342 "_html_template_attrescaper": true, 343 }, 344 "_html_template_jsregexpescaper": { 345 "_html_template_attrescaper": true, 346 }, 347 "_html_template_jsstrescaper": { 348 "_html_template_attrescaper": true, 349 }, 350 "_html_template_urlescaper": { 351 "_html_template_urlnormalizer": true, 352 }, 353 } 354 355 // appendCmd appends the given command to the end of the command pipeline 356 // unless it is redundant with the last command. 357 func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode { 358 if n := len(cmds); n != 0 { 359 last, okLast := cmds[n-1].Args[0].(*parse.IdentifierNode) 360 next, okNext := cmd.Args[0].(*parse.IdentifierNode) 361 if okLast && okNext && redundantFuncs[last.Ident][next.Ident] { 362 return cmds 363 } 364 } 365 return append(cmds, cmd) 366 } 367 368 // indexOfStr is the first i such that eq(s, strs[i]) or -1 if s was not found. 369 func indexOfStr(s string, strs []string, eq func(a, b string) bool) int { 370 for i, t := range strs { 371 if eq(s, t) { 372 return i 373 } 374 } 375 return -1 376 } 377 378 // newIdentCmd produces a command containing a single identifier node. 379 func newIdentCmd(identifier string, pos parse.Pos) *parse.CommandNode { 380 return &parse.CommandNode{ 381 NodeType: parse.NodeCommand, 382 Args: []parse.Node{parse.NewIdentifier(identifier).SetTree(nil).SetPos(pos)}, // TODO: SetTree. 383 } 384 } 385 386 // nudge returns the context that would result from following empty string 387 // transitions from the input context. 388 // For example, parsing: 389 // `<a href=` 390 // will end in context{stateBeforeValue, attrURL}, but parsing one extra rune: 391 // `<a href=x` 392 // will end in context{stateURL, delimSpaceOrTagEnd, ...}. 393 // There are two transitions that happen when the 'x' is seen: 394 // (1) Transition from a before-value state to a start-of-value state without 395 // consuming any character. 396 // (2) Consume 'x' and transition past the first value character. 397 // In this case, nudging produces the context after (1) happens. 398 func nudge(c context) context { 399 switch c.state { 400 case stateTag: 401 // In `<foo {{.}}`, the action should emit an attribute. 402 c.state = stateAttrName 403 case stateBeforeValue: 404 // In `<foo bar={{.}}`, the action is an undelimited value. 405 c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone 406 case stateAfterName: 407 // In `<foo bar {{.}}`, the action is an attribute name. 408 c.state, c.attr = stateAttrName, attrNone 409 } 410 return c 411 } 412 413 // join joins the two contexts of a branch template node. The result is an 414 // error context if either of the input contexts are error contexts, or if the 415 // the input contexts differ. 416 func join(a, b context, node parse.Node, nodeName string) context { 417 if a.state == stateError { 418 return a 419 } 420 if b.state == stateError { 421 return b 422 } 423 if a.eq(b) { 424 return a 425 } 426 427 c := a 428 c.urlPart = b.urlPart 429 if c.eq(b) { 430 // The contexts differ only by urlPart. 431 c.urlPart = urlPartUnknown 432 return c 433 } 434 435 c = a 436 c.jsCtx = b.jsCtx 437 if c.eq(b) { 438 // The contexts differ only by jsCtx. 439 c.jsCtx = jsCtxUnknown 440 return c 441 } 442 443 // Allow a nudged context to join with an unnudged one. 444 // This means that 445 // <p title={{if .C}}{{.}}{{end}} 446 // ends in an unquoted value state even though the else branch 447 // ends in stateBeforeValue. 448 if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) { 449 if e := join(c, d, node, nodeName); e.state != stateError { 450 return e 451 } 452 } 453 454 return context{ 455 state: stateError, 456 err: errorf(ErrBranchEnd, node, 0, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b), 457 } 458 } 459 460 // escapeBranch escapes a branch template node: "if", "range" and "with". 461 func (e *escaper) escapeBranch(c context, n *parse.BranchNode, nodeName string) context { 462 c0 := e.escapeList(c, n.List) 463 if nodeName == "range" && c0.state != stateError { 464 // The "true" branch of a "range" node can execute multiple times. 465 // We check that executing n.List once results in the same context 466 // as executing n.List twice. 467 c1, _ := e.escapeListConditionally(c0, n.List, nil) 468 c0 = join(c0, c1, n, nodeName) 469 if c0.state == stateError { 470 // Make clear that this is a problem on loop re-entry 471 // since developers tend to overlook that branch when 472 // debugging templates. 473 c0.err.Line = n.Line 474 c0.err.Description = "on range loop re-entry: " + c0.err.Description 475 return c0 476 } 477 } 478 c1 := e.escapeList(c, n.ElseList) 479 return join(c0, c1, n, nodeName) 480 } 481 482 // escapeList escapes a list template node. 483 func (e *escaper) escapeList(c context, n *parse.ListNode) context { 484 if n == nil { 485 return c 486 } 487 for _, m := range n.Nodes { 488 c = e.escape(c, m) 489 } 490 return c 491 } 492 493 // escapeListConditionally escapes a list node but only preserves edits and 494 // inferences in e if the inferences and output context satisfy filter. 495 // It returns the best guess at an output context, and the result of the filter 496 // which is the same as whether e was updated. 497 func (e *escaper) escapeListConditionally(c context, n *parse.ListNode, filter func(*escaper, context) bool) (context, bool) { 498 e1 := newEscaper(e.tmpl) 499 // Make type inferences available to f. 500 for k, v := range e.output { 501 e1.output[k] = v 502 } 503 c = e1.escapeList(c, n) 504 ok := filter != nil && filter(e1, c) 505 if ok { 506 // Copy inferences and edits from e1 back into e. 507 for k, v := range e1.output { 508 e.output[k] = v 509 } 510 for k, v := range e1.derived { 511 e.derived[k] = v 512 } 513 for k, v := range e1.called { 514 e.called[k] = v 515 } 516 for k, v := range e1.actionNodeEdits { 517 e.editActionNode(k, v) 518 } 519 for k, v := range e1.templateNodeEdits { 520 e.editTemplateNode(k, v) 521 } 522 for k, v := range e1.textNodeEdits { 523 e.editTextNode(k, v) 524 } 525 } 526 return c, ok 527 } 528 529 // escapeTemplate escapes a {{template}} call node. 530 func (e *escaper) escapeTemplate(c context, n *parse.TemplateNode) context { 531 c, name := e.escapeTree(c, n, n.Name, n.Line) 532 if name != n.Name { 533 e.editTemplateNode(n, name) 534 } 535 return c 536 } 537 538 // escapeTree escapes the named template starting in the given context as 539 // necessary and returns its output context. 540 func (e *escaper) escapeTree(c context, node parse.Node, name string, line int) (context, string) { 541 // Mangle the template name with the input context to produce a reliable 542 // identifier. 543 dname := c.mangle(name) 544 e.called[dname] = true 545 if out, ok := e.output[dname]; ok { 546 // Already escaped. 547 return out, dname 548 } 549 t := e.template(name) 550 if t == nil { 551 // Two cases: The template exists but is empty, or has never been mentioned at 552 // all. Distinguish the cases in the error messages. 553 if e.tmpl.set[name] != nil { 554 return context{ 555 state: stateError, 556 err: errorf(ErrNoSuchTemplate, node, line, "%q is an incomplete or empty template", name), 557 }, dname 558 } 559 return context{ 560 state: stateError, 561 err: errorf(ErrNoSuchTemplate, node, line, "no such template %q", name), 562 }, dname 563 } 564 if dname != name { 565 // Use any template derived during an earlier call to escapeTemplate 566 // with different top level templates, or clone if necessary. 567 dt := e.template(dname) 568 if dt == nil { 569 dt = template.New(dname) 570 dt.Tree = &parse.Tree{Name: dname, Root: t.Root.CopyList()} 571 e.derived[dname] = dt 572 } 573 t = dt 574 } 575 return e.computeOutCtx(c, t), dname 576 } 577 578 // computeOutCtx takes a template and its start context and computes the output 579 // context while storing any inferences in e. 580 func (e *escaper) computeOutCtx(c context, t *template.Template) context { 581 // Propagate context over the body. 582 c1, ok := e.escapeTemplateBody(c, t) 583 if !ok { 584 // Look for a fixed point by assuming c1 as the output context. 585 if c2, ok2 := e.escapeTemplateBody(c1, t); ok2 { 586 c1, ok = c2, true 587 } 588 // Use c1 as the error context if neither assumption worked. 589 } 590 if !ok && c1.state != stateError { 591 return context{ 592 state: stateError, 593 err: errorf(ErrOutputContext, t.Tree.Root, 0, "cannot compute output context for template %s", t.Name()), 594 } 595 } 596 return c1 597 } 598 599 // escapeTemplateBody escapes the given template assuming the given output 600 // context, and returns the best guess at the output context and whether the 601 // assumption was correct. 602 func (e *escaper) escapeTemplateBody(c context, t *template.Template) (context, bool) { 603 filter := func(e1 *escaper, c1 context) bool { 604 if c1.state == stateError { 605 // Do not update the input escaper, e. 606 return false 607 } 608 if !e1.called[t.Name()] { 609 // If t is not recursively called, then c1 is an 610 // accurate output context. 611 return true 612 } 613 // c1 is accurate if it matches our assumed output context. 614 return c.eq(c1) 615 } 616 // We need to assume an output context so that recursive template calls 617 // take the fast path out of escapeTree instead of infinitely recursing. 618 // Naively assuming that the input context is the same as the output 619 // works >90% of the time. 620 e.output[t.Name()] = c 621 return e.escapeListConditionally(c, t.Tree.Root, filter) 622 } 623 624 // delimEnds maps each delim to a string of characters that terminate it. 625 var delimEnds = [...]string{ 626 delimDoubleQuote: `"`, 627 delimSingleQuote: "'", 628 // Determined empirically by running the below in various browsers. 629 // var div = document.createElement("DIV"); 630 // for (var i = 0; i < 0x10000; ++i) { 631 // div.innerHTML = "<span title=x" + String.fromCharCode(i) + "-bar>"; 632 // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0) 633 // document.write("<p>U+" + i.toString(16)); 634 // } 635 delimSpaceOrTagEnd: " \t\n\f\r>", 636 } 637 638 var doctypeBytes = []byte("<!DOCTYPE") 639 640 // escapeText escapes a text template node. 641 func (e *escaper) escapeText(c context, n *parse.TextNode) context { 642 s, written, i, b := n.Text, 0, 0, new(bytes.Buffer) 643 for i != len(s) { 644 c1, nread := contextAfterText(c, s[i:]) 645 i1 := i + nread 646 if c.state == stateText || c.state == stateRCDATA { 647 end := i1 648 if c1.state != c.state { 649 for j := end - 1; j >= i; j-- { 650 if s[j] == '<' { 651 end = j 652 break 653 } 654 } 655 } 656 for j := i; j < end; j++ { 657 if s[j] == '<' && !bytes.HasPrefix(bytes.ToUpper(s[j:]), doctypeBytes) { 658 b.Write(s[written:j]) 659 b.WriteString("<") 660 written = j + 1 661 } 662 } 663 } else if isComment(c.state) && c.delim == delimNone { 664 switch c.state { 665 case stateJSBlockCmt: 666 // http://es5.github.com/#x7.4: 667 // "Comments behave like white space and are 668 // discarded except that, if a MultiLineComment 669 // contains a line terminator character, then 670 // the entire comment is considered to be a 671 // LineTerminator for purposes of parsing by 672 // the syntactic grammar." 673 if bytes.ContainsAny(s[written:i1], "\n\r\u2028\u2029") { 674 b.WriteByte('\n') 675 } else { 676 b.WriteByte(' ') 677 } 678 case stateCSSBlockCmt: 679 b.WriteByte(' ') 680 } 681 written = i1 682 } 683 if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone { 684 // Preserve the portion between written and the comment start. 685 cs := i1 - 2 686 if c1.state == stateHTMLCmt { 687 // "<!--" instead of "/*" or "//" 688 cs -= 2 689 } 690 b.Write(s[written:cs]) 691 written = i1 692 } 693 if i == i1 && c.state == c1.state { 694 panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:])) 695 } 696 c, i = c1, i1 697 } 698 699 if written != 0 && c.state != stateError { 700 if !isComment(c.state) || c.delim != delimNone { 701 b.Write(n.Text[written:]) 702 } 703 e.editTextNode(n, b.Bytes()) 704 } 705 return c 706 } 707 708 // contextAfterText starts in context c, consumes some tokens from the front of 709 // s, then returns the context after those tokens and the unprocessed suffix. 710 func contextAfterText(c context, s []byte) (context, int) { 711 if c.delim == delimNone { 712 c1, i := tSpecialTagEnd(c, s) 713 if i == 0 { 714 // A special end tag (`</script>`) has been seen and 715 // all content preceding it has been consumed. 716 return c1, 0 717 } 718 // Consider all content up to any end tag. 719 return transitionFunc[c.state](c, s[:i]) 720 } 721 722 // We are at the beginning of an attribute value. 723 724 i := bytes.IndexAny(s, delimEnds[c.delim]) 725 if i == -1 { 726 i = len(s) 727 } 728 if c.delim == delimSpaceOrTagEnd { 729 // http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state 730 // lists the runes below as error characters. 731 // Error out because HTML parsers may differ on whether 732 // "<a id= onclick=f(" ends inside id's or onclick's value, 733 // "<a class=`foo " ends inside a value, 734 // "<a style=font:'Arial'" needs open-quote fixup. 735 // IE treats '`' as a quotation character. 736 if j := bytes.IndexAny(s[:i], "\"'<=`"); j >= 0 { 737 return context{ 738 state: stateError, 739 err: errorf(ErrBadHTML, nil, 0, "%q in unquoted attr: %q", s[j:j+1], s[:i]), 740 }, len(s) 741 } 742 } 743 if i == len(s) { 744 // Remain inside the attribute. 745 // Decode the value so non-HTML rules can easily handle 746 // <button onclick="alert("Hi!")"> 747 // without having to entity decode token boundaries. 748 for u := []byte(html.UnescapeString(string(s))); len(u) != 0; { 749 c1, i1 := transitionFunc[c.state](c, u) 750 c, u = c1, u[i1:] 751 } 752 return c, len(s) 753 } 754 755 element := c.element 756 757 // If this is a non-JS "type" attribute inside "script" tag, do not treat the contents as JS. 758 if c.state == stateAttr && c.element == elementScript && c.attr == attrScriptType && !isJSType(string(s[:i])) { 759 element = elementNone 760 } 761 762 if c.delim != delimSpaceOrTagEnd { 763 // Consume any quote. 764 i++ 765 } 766 // On exiting an attribute, we discard all state information 767 // except the state and element. 768 return context{state: stateTag, element: element}, i 769 } 770 771 // editActionNode records a change to an action pipeline for later commit. 772 func (e *escaper) editActionNode(n *parse.ActionNode, cmds []string) { 773 if _, ok := e.actionNodeEdits[n]; ok { 774 panic(fmt.Sprintf("node %s shared between templates", n)) 775 } 776 e.actionNodeEdits[n] = cmds 777 } 778 779 // editTemplateNode records a change to a {{template}} callee for later commit. 780 func (e *escaper) editTemplateNode(n *parse.TemplateNode, callee string) { 781 if _, ok := e.templateNodeEdits[n]; ok { 782 panic(fmt.Sprintf("node %s shared between templates", n)) 783 } 784 e.templateNodeEdits[n] = callee 785 } 786 787 // editTextNode records a change to a text node for later commit. 788 func (e *escaper) editTextNode(n *parse.TextNode, text []byte) { 789 if _, ok := e.textNodeEdits[n]; ok { 790 panic(fmt.Sprintf("node %s shared between templates", n)) 791 } 792 e.textNodeEdits[n] = text 793 } 794 795 // commit applies changes to actions and template calls needed to contextually 796 // autoescape content and adds any derived templates to the set. 797 func (e *escaper) commit() { 798 for name := range e.output { 799 e.template(name).Funcs(funcMap) 800 } 801 for _, t := range e.derived { 802 if _, err := e.tmpl.text.AddParseTree(t.Name(), t.Tree); err != nil { 803 panic("error adding derived template") 804 } 805 } 806 for n, s := range e.actionNodeEdits { 807 ensurePipelineContains(n.Pipe, s) 808 } 809 for n, name := range e.templateNodeEdits { 810 n.Name = name 811 } 812 for n, s := range e.textNodeEdits { 813 n.Text = s 814 } 815 } 816 817 // template returns the named template given a mangled template name. 818 func (e *escaper) template(name string) *template.Template { 819 t := e.tmpl.text.Lookup(name) 820 if t == nil { 821 t = e.derived[name] 822 } 823 return t 824 } 825 826 // Forwarding functions so that clients need only import this package 827 // to reach the general escaping functions of text/template. 828 829 // HTMLEscape writes to w the escaped HTML equivalent of the plain text data b. 830 func HTMLEscape(w io.Writer, b []byte) { 831 template.HTMLEscape(w, b) 832 } 833 834 // HTMLEscapeString returns the escaped HTML equivalent of the plain text data s. 835 func HTMLEscapeString(s string) string { 836 return template.HTMLEscapeString(s) 837 } 838 839 // HTMLEscaper returns the escaped HTML equivalent of the textual 840 // representation of its arguments. 841 func HTMLEscaper(args ...interface{}) string { 842 return template.HTMLEscaper(args...) 843 } 844 845 // JSEscape writes to w the escaped JavaScript equivalent of the plain text data b. 846 func JSEscape(w io.Writer, b []byte) { 847 template.JSEscape(w, b) 848 } 849 850 // JSEscapeString returns the escaped JavaScript equivalent of the plain text data s. 851 func JSEscapeString(s string) string { 852 return template.JSEscapeString(s) 853 } 854 855 // JSEscaper returns the escaped JavaScript equivalent of the textual 856 // representation of its arguments. 857 func JSEscaper(args ...interface{}) string { 858 return template.JSEscaper(args...) 859 } 860 861 // URLQueryEscaper returns the escaped value of the textual representation of 862 // its arguments in a form suitable for embedding in a URL query. 863 func URLQueryEscaper(args ...interface{}) string { 864 return template.URLQueryEscaper(args...) 865 }