github.com/flyinox/gosm@v0.0.0-20171117061539-16768cb62077/src/html/template/escape.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "fmt" 10 "html" 11 "io" 12 "text/template" 13 "text/template/parse" 14 ) 15 16 // escapeTemplate rewrites the named template, which must be 17 // associated with t, to guarantee that the output of any of the named 18 // templates is properly escaped. If no error is returned, then the named templates have 19 // been modified. Otherwise the named templates have been rendered 20 // unusable. 21 func escapeTemplate(tmpl *Template, node parse.Node, name string) error { 22 c, _ := tmpl.esc.escapeTree(context{}, node, name, 0) 23 var err error 24 if c.err != nil { 25 err, c.err.Name = c.err, name 26 } else if c.state != stateText { 27 err = &Error{ErrEndContext, nil, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)} 28 } 29 if err != nil { 30 // Prevent execution of unsafe templates. 31 if t := tmpl.set[name]; t != nil { 32 t.escapeErr = err 33 t.text.Tree = nil 34 t.Tree = nil 35 } 36 return err 37 } 38 tmpl.esc.commit() 39 if t := tmpl.set[name]; t != nil { 40 t.escapeErr = escapeOK 41 t.Tree = t.text.Tree 42 } 43 return nil 44 } 45 46 // evalArgs formats the list of arguments into a string. It is equivalent to 47 // fmt.Sprint(args...), except that it deferences all pointers. 48 func evalArgs(args ...interface{}) string { 49 // Optimization for simple common case of a single string argument. 50 if len(args) == 1 { 51 if s, ok := args[0].(string); ok { 52 return s 53 } 54 } 55 for i, arg := range args { 56 args[i] = indirectToStringerOrError(arg) 57 } 58 return fmt.Sprint(args...) 59 } 60 61 // funcMap maps command names to functions that render their inputs safe. 62 var funcMap = template.FuncMap{ 63 "_html_template_attrescaper": attrEscaper, 64 "_html_template_commentescaper": commentEscaper, 65 "_html_template_cssescaper": cssEscaper, 66 "_html_template_cssvaluefilter": cssValueFilter, 67 "_html_template_htmlnamefilter": htmlNameFilter, 68 "_html_template_htmlescaper": htmlEscaper, 69 "_html_template_jsregexpescaper": jsRegexpEscaper, 70 "_html_template_jsstrescaper": jsStrEscaper, 71 "_html_template_jsvalescaper": jsValEscaper, 72 "_html_template_nospaceescaper": htmlNospaceEscaper, 73 "_html_template_rcdataescaper": rcdataEscaper, 74 "_html_template_urlescaper": urlEscaper, 75 "_html_template_urlfilter": urlFilter, 76 "_html_template_urlnormalizer": urlNormalizer, 77 "_eval_args_": evalArgs, 78 } 79 80 // escaper collects type inferences about templates and changes needed to make 81 // templates injection safe. 82 type escaper struct { 83 // ns is the nameSpace that this escaper is associated with. 84 ns *nameSpace 85 // output[templateName] is the output context for a templateName that 86 // has been mangled to include its input context. 87 output map[string]context 88 // derived[c.mangle(name)] maps to a template derived from the template 89 // named name templateName for the start context c. 90 derived map[string]*template.Template 91 // called[templateName] is a set of called mangled template names. 92 called map[string]bool 93 // xxxNodeEdits are the accumulated edits to apply during commit. 94 // Such edits are not applied immediately in case a template set 95 // executes a given template in different escaping contexts. 96 actionNodeEdits map[*parse.ActionNode][]string 97 templateNodeEdits map[*parse.TemplateNode]string 98 textNodeEdits map[*parse.TextNode][]byte 99 } 100 101 // makeEscaper creates a blank escaper for the given set. 102 func makeEscaper(n *nameSpace) escaper { 103 return escaper{ 104 n, 105 map[string]context{}, 106 map[string]*template.Template{}, 107 map[string]bool{}, 108 map[*parse.ActionNode][]string{}, 109 map[*parse.TemplateNode]string{}, 110 map[*parse.TextNode][]byte{}, 111 } 112 } 113 114 // filterFailsafe is an innocuous word that is emitted in place of unsafe values 115 // by sanitizer functions. It is not a keyword in any programming language, 116 // contains no special characters, is not empty, and when it appears in output 117 // it is distinct enough that a developer can find the source of the problem 118 // via a search engine. 119 const filterFailsafe = "ZgotmplZ" 120 121 // escape escapes a template node. 122 func (e *escaper) escape(c context, n parse.Node) context { 123 switch n := n.(type) { 124 case *parse.ActionNode: 125 return e.escapeAction(c, n) 126 case *parse.IfNode: 127 return e.escapeBranch(c, &n.BranchNode, "if") 128 case *parse.ListNode: 129 return e.escapeList(c, n) 130 case *parse.RangeNode: 131 return e.escapeBranch(c, &n.BranchNode, "range") 132 case *parse.TemplateNode: 133 return e.escapeTemplate(c, n) 134 case *parse.TextNode: 135 return e.escapeText(c, n) 136 case *parse.WithNode: 137 return e.escapeBranch(c, &n.BranchNode, "with") 138 } 139 panic("escaping " + n.String() + " is unimplemented") 140 } 141 142 // escapeAction escapes an action template node. 143 func (e *escaper) escapeAction(c context, n *parse.ActionNode) context { 144 if len(n.Pipe.Decl) != 0 { 145 // A local variable assignment, not an interpolation. 146 return c 147 } 148 c = nudge(c) 149 // Check for disallowed use of predefined escapers in the pipeline. 150 for pos, idNode := range n.Pipe.Cmds { 151 node, ok := idNode.Args[0].(*parse.IdentifierNode) 152 if !ok { 153 // A predefined escaper "esc" will never be found as an identifier in a 154 // Chain or Field node, since: 155 // - "esc.x ..." is invalid, since predefined escapers return strings, and 156 // strings do not have methods, keys or fields. 157 // - "... .esc" is invalid, since predefined escapers are global functions, 158 // not methods or fields of any types. 159 // Therefore, it is safe to ignore these two node types. 160 continue 161 } 162 ident := node.Ident 163 if _, ok := predefinedEscapers[ident]; ok { 164 if pos < len(n.Pipe.Cmds)-1 || 165 c.state == stateAttr && c.delim == delimSpaceOrTagEnd && ident == "html" { 166 return context{ 167 state: stateError, 168 err: errorf(ErrPredefinedEscaper, n, n.Line, "predefined escaper %q disallowed in template", ident), 169 } 170 } 171 } 172 } 173 s := make([]string, 0, 3) 174 switch c.state { 175 case stateError: 176 return c 177 case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL: 178 switch c.urlPart { 179 case urlPartNone: 180 s = append(s, "_html_template_urlfilter") 181 fallthrough 182 case urlPartPreQuery: 183 switch c.state { 184 case stateCSSDqStr, stateCSSSqStr: 185 s = append(s, "_html_template_cssescaper") 186 default: 187 s = append(s, "_html_template_urlnormalizer") 188 } 189 case urlPartQueryOrFrag: 190 s = append(s, "_html_template_urlescaper") 191 case urlPartUnknown: 192 return context{ 193 state: stateError, 194 err: errorf(ErrAmbigContext, n, n.Line, "%s appears in an ambiguous context within a URL", n), 195 } 196 default: 197 panic(c.urlPart.String()) 198 } 199 case stateJS: 200 s = append(s, "_html_template_jsvalescaper") 201 // A slash after a value starts a div operator. 202 c.jsCtx = jsCtxDivOp 203 case stateJSDqStr, stateJSSqStr: 204 s = append(s, "_html_template_jsstrescaper") 205 case stateJSRegexp: 206 s = append(s, "_html_template_jsregexpescaper") 207 case stateCSS: 208 s = append(s, "_html_template_cssvaluefilter") 209 case stateText: 210 s = append(s, "_html_template_htmlescaper") 211 case stateRCDATA: 212 s = append(s, "_html_template_rcdataescaper") 213 case stateAttr: 214 // Handled below in delim check. 215 case stateAttrName, stateTag: 216 c.state = stateAttrName 217 s = append(s, "_html_template_htmlnamefilter") 218 default: 219 if isComment(c.state) { 220 s = append(s, "_html_template_commentescaper") 221 } else { 222 panic("unexpected state " + c.state.String()) 223 } 224 } 225 switch c.delim { 226 case delimNone: 227 // No extra-escaping needed for raw text content. 228 case delimSpaceOrTagEnd: 229 s = append(s, "_html_template_nospaceescaper") 230 default: 231 s = append(s, "_html_template_attrescaper") 232 } 233 e.editActionNode(n, s) 234 return c 235 } 236 237 // ensurePipelineContains ensures that the pipeline ends with the commands with 238 // the identifiers in s in order. If the pipeline ends with a predefined escaper 239 // (i.e. "html" or "urlquery"), merge it with the identifiers in s. 240 func ensurePipelineContains(p *parse.PipeNode, s []string) { 241 if len(s) == 0 { 242 // Do not rewrite pipeline if we have no escapers to insert. 243 return 244 } 245 // Precondition: p.Cmds contains at most one predefined escaper and the 246 // escaper will be present at p.Cmds[len(p.Cmds)-1]. This precondition is 247 // always true because of the checks in escapeAction. 248 pipelineLen := len(p.Cmds) 249 if pipelineLen > 0 { 250 lastCmd := p.Cmds[pipelineLen-1] 251 if idNode, ok := lastCmd.Args[0].(*parse.IdentifierNode); ok { 252 if esc := idNode.Ident; predefinedEscapers[esc] { 253 // Pipeline ends with a predefined escaper. 254 if len(p.Cmds) == 1 && len(lastCmd.Args) > 1 { 255 // Special case: pipeline is of the form {{ esc arg1 arg2 ... argN }}, 256 // where esc is the predefined escaper, and arg1...argN are its arguments. 257 // Convert this into the equivalent form 258 // {{ _eval_args_ arg1 arg2 ... argN | esc }}, so that esc can be easily 259 // merged with the escapers in s. 260 lastCmd.Args[0] = parse.NewIdentifier("_eval_args_").SetTree(nil).SetPos(lastCmd.Args[0].Position()) 261 p.Cmds = appendCmd(p.Cmds, newIdentCmd(esc, p.Position())) 262 pipelineLen++ 263 } 264 // If any of the commands in s that we are about to insert is equivalent 265 // to the predefined escaper, use the predefined escaper instead. 266 dup := false 267 for i, escaper := range s { 268 if escFnsEq(esc, escaper) { 269 s[i] = idNode.Ident 270 dup = true 271 } 272 } 273 if dup { 274 // The predefined escaper will already be inserted along with the 275 // escapers in s, so do not copy it to the rewritten pipeline. 276 pipelineLen-- 277 } 278 } 279 } 280 } 281 // Rewrite the pipeline, creating the escapers in s at the end of the pipeline. 282 newCmds := make([]*parse.CommandNode, pipelineLen, pipelineLen+len(s)) 283 copy(newCmds, p.Cmds) 284 for _, name := range s { 285 newCmds = appendCmd(newCmds, newIdentCmd(name, p.Position())) 286 } 287 p.Cmds = newCmds 288 } 289 290 // predefinedEscapers contains template predefined escapers that are equivalent 291 // to some contextual escapers. Keep in sync with equivEscapers. 292 var predefinedEscapers = map[string]bool{ 293 "html": true, 294 "urlquery": true, 295 } 296 297 // equivEscapers matches contextual escapers to equivalent predefined 298 // template escapers. 299 var equivEscapers = map[string]string{ 300 // The following pairs of HTML escapers provide equivalent security 301 // guarantees, since they all escape '\000', '\'', '"', '&', '<', and '>'. 302 "_html_template_attrescaper": "html", 303 "_html_template_htmlescaper": "html", 304 "_html_template_rcdataescaper": "html", 305 // These two URL escapers produce URLs safe for embedding in a URL query by 306 // percent-encoding all the reserved characters specified in RFC 3986 Section 307 // 2.2 308 "_html_template_urlescaper": "urlquery", 309 // These two functions are not actually equivalent; urlquery is stricter as it 310 // escapes reserved characters (e.g. '#'), while _html_template_urlnormalizer 311 // does not. It is therefore only safe to replace _html_template_urlnormalizer 312 // with urlquery (this happens in ensurePipelineContains), but not the otherI've 313 // way around. We keep this entry around to preserve the behavior of templates 314 // written before Go 1.9, which might depend on this substitution taking place. 315 "_html_template_urlnormalizer": "urlquery", 316 } 317 318 // escFnsEq reports whether the two escaping functions are equivalent. 319 func escFnsEq(a, b string) bool { 320 if e := equivEscapers[a]; e != "" { 321 a = e 322 } 323 if e := equivEscapers[b]; e != "" { 324 b = e 325 } 326 return a == b 327 } 328 329 // redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x) 330 // for all x. 331 var redundantFuncs = map[string]map[string]bool{ 332 "_html_template_commentescaper": { 333 "_html_template_attrescaper": true, 334 "_html_template_nospaceescaper": true, 335 "_html_template_htmlescaper": true, 336 }, 337 "_html_template_cssescaper": { 338 "_html_template_attrescaper": true, 339 }, 340 "_html_template_jsregexpescaper": { 341 "_html_template_attrescaper": true, 342 }, 343 "_html_template_jsstrescaper": { 344 "_html_template_attrescaper": true, 345 }, 346 "_html_template_urlescaper": { 347 "_html_template_urlnormalizer": true, 348 }, 349 } 350 351 // appendCmd appends the given command to the end of the command pipeline 352 // unless it is redundant with the last command. 353 func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode { 354 if n := len(cmds); n != 0 { 355 last, okLast := cmds[n-1].Args[0].(*parse.IdentifierNode) 356 next, okNext := cmd.Args[0].(*parse.IdentifierNode) 357 if okLast && okNext && redundantFuncs[last.Ident][next.Ident] { 358 return cmds 359 } 360 } 361 return append(cmds, cmd) 362 } 363 364 // indexOfStr is the first i such that eq(s, strs[i]) or -1 if s was not found. 365 func indexOfStr(s string, strs []string, eq func(a, b string) bool) int { 366 for i, t := range strs { 367 if eq(s, t) { 368 return i 369 } 370 } 371 return -1 372 } 373 374 // newIdentCmd produces a command containing a single identifier node. 375 func newIdentCmd(identifier string, pos parse.Pos) *parse.CommandNode { 376 return &parse.CommandNode{ 377 NodeType: parse.NodeCommand, 378 Args: []parse.Node{parse.NewIdentifier(identifier).SetTree(nil).SetPos(pos)}, // TODO: SetTree. 379 } 380 } 381 382 // nudge returns the context that would result from following empty string 383 // transitions from the input context. 384 // For example, parsing: 385 // `<a href=` 386 // will end in context{stateBeforeValue, attrURL}, but parsing one extra rune: 387 // `<a href=x` 388 // will end in context{stateURL, delimSpaceOrTagEnd, ...}. 389 // There are two transitions that happen when the 'x' is seen: 390 // (1) Transition from a before-value state to a start-of-value state without 391 // consuming any character. 392 // (2) Consume 'x' and transition past the first value character. 393 // In this case, nudging produces the context after (1) happens. 394 func nudge(c context) context { 395 switch c.state { 396 case stateTag: 397 // In `<foo {{.}}`, the action should emit an attribute. 398 c.state = stateAttrName 399 case stateBeforeValue: 400 // In `<foo bar={{.}}`, the action is an undelimited value. 401 c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone 402 case stateAfterName: 403 // In `<foo bar {{.}}`, the action is an attribute name. 404 c.state, c.attr = stateAttrName, attrNone 405 } 406 return c 407 } 408 409 // join joins the two contexts of a branch template node. The result is an 410 // error context if either of the input contexts are error contexts, or if the 411 // the input contexts differ. 412 func join(a, b context, node parse.Node, nodeName string) context { 413 if a.state == stateError { 414 return a 415 } 416 if b.state == stateError { 417 return b 418 } 419 if a.eq(b) { 420 return a 421 } 422 423 c := a 424 c.urlPart = b.urlPart 425 if c.eq(b) { 426 // The contexts differ only by urlPart. 427 c.urlPart = urlPartUnknown 428 return c 429 } 430 431 c = a 432 c.jsCtx = b.jsCtx 433 if c.eq(b) { 434 // The contexts differ only by jsCtx. 435 c.jsCtx = jsCtxUnknown 436 return c 437 } 438 439 // Allow a nudged context to join with an unnudged one. 440 // This means that 441 // <p title={{if .C}}{{.}}{{end}} 442 // ends in an unquoted value state even though the else branch 443 // ends in stateBeforeValue. 444 if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) { 445 if e := join(c, d, node, nodeName); e.state != stateError { 446 return e 447 } 448 } 449 450 return context{ 451 state: stateError, 452 err: errorf(ErrBranchEnd, node, 0, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b), 453 } 454 } 455 456 // escapeBranch escapes a branch template node: "if", "range" and "with". 457 func (e *escaper) escapeBranch(c context, n *parse.BranchNode, nodeName string) context { 458 c0 := e.escapeList(c, n.List) 459 if nodeName == "range" && c0.state != stateError { 460 // The "true" branch of a "range" node can execute multiple times. 461 // We check that executing n.List once results in the same context 462 // as executing n.List twice. 463 c1, _ := e.escapeListConditionally(c0, n.List, nil) 464 c0 = join(c0, c1, n, nodeName) 465 if c0.state == stateError { 466 // Make clear that this is a problem on loop re-entry 467 // since developers tend to overlook that branch when 468 // debugging templates. 469 c0.err.Line = n.Line 470 c0.err.Description = "on range loop re-entry: " + c0.err.Description 471 return c0 472 } 473 } 474 c1 := e.escapeList(c, n.ElseList) 475 return join(c0, c1, n, nodeName) 476 } 477 478 // escapeList escapes a list template node. 479 func (e *escaper) escapeList(c context, n *parse.ListNode) context { 480 if n == nil { 481 return c 482 } 483 for _, m := range n.Nodes { 484 c = e.escape(c, m) 485 } 486 return c 487 } 488 489 // escapeListConditionally escapes a list node but only preserves edits and 490 // inferences in e if the inferences and output context satisfy filter. 491 // It returns the best guess at an output context, and the result of the filter 492 // which is the same as whether e was updated. 493 func (e *escaper) escapeListConditionally(c context, n *parse.ListNode, filter func(*escaper, context) bool) (context, bool) { 494 e1 := makeEscaper(e.ns) 495 // Make type inferences available to f. 496 for k, v := range e.output { 497 e1.output[k] = v 498 } 499 c = e1.escapeList(c, n) 500 ok := filter != nil && filter(&e1, c) 501 if ok { 502 // Copy inferences and edits from e1 back into e. 503 for k, v := range e1.output { 504 e.output[k] = v 505 } 506 for k, v := range e1.derived { 507 e.derived[k] = v 508 } 509 for k, v := range e1.called { 510 e.called[k] = v 511 } 512 for k, v := range e1.actionNodeEdits { 513 e.editActionNode(k, v) 514 } 515 for k, v := range e1.templateNodeEdits { 516 e.editTemplateNode(k, v) 517 } 518 for k, v := range e1.textNodeEdits { 519 e.editTextNode(k, v) 520 } 521 } 522 return c, ok 523 } 524 525 // escapeTemplate escapes a {{template}} call node. 526 func (e *escaper) escapeTemplate(c context, n *parse.TemplateNode) context { 527 c, name := e.escapeTree(c, n, n.Name, n.Line) 528 if name != n.Name { 529 e.editTemplateNode(n, name) 530 } 531 return c 532 } 533 534 // escapeTree escapes the named template starting in the given context as 535 // necessary and returns its output context. 536 func (e *escaper) escapeTree(c context, node parse.Node, name string, line int) (context, string) { 537 // Mangle the template name with the input context to produce a reliable 538 // identifier. 539 dname := c.mangle(name) 540 e.called[dname] = true 541 if out, ok := e.output[dname]; ok { 542 // Already escaped. 543 return out, dname 544 } 545 t := e.template(name) 546 if t == nil { 547 // Two cases: The template exists but is empty, or has never been mentioned at 548 // all. Distinguish the cases in the error messages. 549 if e.ns.set[name] != nil { 550 return context{ 551 state: stateError, 552 err: errorf(ErrNoSuchTemplate, node, line, "%q is an incomplete or empty template", name), 553 }, dname 554 } 555 return context{ 556 state: stateError, 557 err: errorf(ErrNoSuchTemplate, node, line, "no such template %q", name), 558 }, dname 559 } 560 if dname != name { 561 // Use any template derived during an earlier call to escapeTemplate 562 // with different top level templates, or clone if necessary. 563 dt := e.template(dname) 564 if dt == nil { 565 dt = template.New(dname) 566 dt.Tree = &parse.Tree{Name: dname, Root: t.Root.CopyList()} 567 e.derived[dname] = dt 568 } 569 t = dt 570 } 571 return e.computeOutCtx(c, t), dname 572 } 573 574 // computeOutCtx takes a template and its start context and computes the output 575 // context while storing any inferences in e. 576 func (e *escaper) computeOutCtx(c context, t *template.Template) context { 577 // Propagate context over the body. 578 c1, ok := e.escapeTemplateBody(c, t) 579 if !ok { 580 // Look for a fixed point by assuming c1 as the output context. 581 if c2, ok2 := e.escapeTemplateBody(c1, t); ok2 { 582 c1, ok = c2, true 583 } 584 // Use c1 as the error context if neither assumption worked. 585 } 586 if !ok && c1.state != stateError { 587 return context{ 588 state: stateError, 589 err: errorf(ErrOutputContext, t.Tree.Root, 0, "cannot compute output context for template %s", t.Name()), 590 } 591 } 592 return c1 593 } 594 595 // escapeTemplateBody escapes the given template assuming the given output 596 // context, and returns the best guess at the output context and whether the 597 // assumption was correct. 598 func (e *escaper) escapeTemplateBody(c context, t *template.Template) (context, bool) { 599 filter := func(e1 *escaper, c1 context) bool { 600 if c1.state == stateError { 601 // Do not update the input escaper, e. 602 return false 603 } 604 if !e1.called[t.Name()] { 605 // If t is not recursively called, then c1 is an 606 // accurate output context. 607 return true 608 } 609 // c1 is accurate if it matches our assumed output context. 610 return c.eq(c1) 611 } 612 // We need to assume an output context so that recursive template calls 613 // take the fast path out of escapeTree instead of infinitely recursing. 614 // Naively assuming that the input context is the same as the output 615 // works >90% of the time. 616 e.output[t.Name()] = c 617 return e.escapeListConditionally(c, t.Tree.Root, filter) 618 } 619 620 // delimEnds maps each delim to a string of characters that terminate it. 621 var delimEnds = [...]string{ 622 delimDoubleQuote: `"`, 623 delimSingleQuote: "'", 624 // Determined empirically by running the below in various browsers. 625 // var div = document.createElement("DIV"); 626 // for (var i = 0; i < 0x10000; ++i) { 627 // div.innerHTML = "<span title=x" + String.fromCharCode(i) + "-bar>"; 628 // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0) 629 // document.write("<p>U+" + i.toString(16)); 630 // } 631 delimSpaceOrTagEnd: " \t\n\f\r>", 632 } 633 634 var doctypeBytes = []byte("<!DOCTYPE") 635 636 // escapeText escapes a text template node. 637 func (e *escaper) escapeText(c context, n *parse.TextNode) context { 638 s, written, i, b := n.Text, 0, 0, new(bytes.Buffer) 639 for i != len(s) { 640 c1, nread := contextAfterText(c, s[i:]) 641 i1 := i + nread 642 if c.state == stateText || c.state == stateRCDATA { 643 end := i1 644 if c1.state != c.state { 645 for j := end - 1; j >= i; j-- { 646 if s[j] == '<' { 647 end = j 648 break 649 } 650 } 651 } 652 for j := i; j < end; j++ { 653 if s[j] == '<' && !bytes.HasPrefix(bytes.ToUpper(s[j:]), doctypeBytes) { 654 b.Write(s[written:j]) 655 b.WriteString("<") 656 written = j + 1 657 } 658 } 659 } else if isComment(c.state) && c.delim == delimNone { 660 switch c.state { 661 case stateJSBlockCmt: 662 // http://es5.github.com/#x7.4: 663 // "Comments behave like white space and are 664 // discarded except that, if a MultiLineComment 665 // contains a line terminator character, then 666 // the entire comment is considered to be a 667 // LineTerminator for purposes of parsing by 668 // the syntactic grammar." 669 if bytes.ContainsAny(s[written:i1], "\n\r\u2028\u2029") { 670 b.WriteByte('\n') 671 } else { 672 b.WriteByte(' ') 673 } 674 case stateCSSBlockCmt: 675 b.WriteByte(' ') 676 } 677 written = i1 678 } 679 if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone { 680 // Preserve the portion between written and the comment start. 681 cs := i1 - 2 682 if c1.state == stateHTMLCmt { 683 // "<!--" instead of "/*" or "//" 684 cs -= 2 685 } 686 b.Write(s[written:cs]) 687 written = i1 688 } 689 if i == i1 && c.state == c1.state { 690 panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:])) 691 } 692 c, i = c1, i1 693 } 694 695 if written != 0 && c.state != stateError { 696 if !isComment(c.state) || c.delim != delimNone { 697 b.Write(n.Text[written:]) 698 } 699 e.editTextNode(n, b.Bytes()) 700 } 701 return c 702 } 703 704 // contextAfterText starts in context c, consumes some tokens from the front of 705 // s, then returns the context after those tokens and the unprocessed suffix. 706 func contextAfterText(c context, s []byte) (context, int) { 707 if c.delim == delimNone { 708 c1, i := tSpecialTagEnd(c, s) 709 if i == 0 { 710 // A special end tag (`</script>`) has been seen and 711 // all content preceding it has been consumed. 712 return c1, 0 713 } 714 // Consider all content up to any end tag. 715 return transitionFunc[c.state](c, s[:i]) 716 } 717 718 // We are at the beginning of an attribute value. 719 720 i := bytes.IndexAny(s, delimEnds[c.delim]) 721 if i == -1 { 722 i = len(s) 723 } 724 if c.delim == delimSpaceOrTagEnd { 725 // http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state 726 // lists the runes below as error characters. 727 // Error out because HTML parsers may differ on whether 728 // "<a id= onclick=f(" ends inside id's or onclick's value, 729 // "<a class=`foo " ends inside a value, 730 // "<a style=font:'Arial'" needs open-quote fixup. 731 // IE treats '`' as a quotation character. 732 if j := bytes.IndexAny(s[:i], "\"'<=`"); j >= 0 { 733 return context{ 734 state: stateError, 735 err: errorf(ErrBadHTML, nil, 0, "%q in unquoted attr: %q", s[j:j+1], s[:i]), 736 }, len(s) 737 } 738 } 739 if i == len(s) { 740 // Remain inside the attribute. 741 // Decode the value so non-HTML rules can easily handle 742 // <button onclick="alert("Hi!")"> 743 // without having to entity decode token boundaries. 744 for u := []byte(html.UnescapeString(string(s))); len(u) != 0; { 745 c1, i1 := transitionFunc[c.state](c, u) 746 c, u = c1, u[i1:] 747 } 748 return c, len(s) 749 } 750 751 element := c.element 752 753 // If this is a non-JS "type" attribute inside "script" tag, do not treat the contents as JS. 754 if c.state == stateAttr && c.element == elementScript && c.attr == attrScriptType && !isJSType(string(s[:i])) { 755 element = elementNone 756 } 757 758 if c.delim != delimSpaceOrTagEnd { 759 // Consume any quote. 760 i++ 761 } 762 // On exiting an attribute, we discard all state information 763 // except the state and element. 764 return context{state: stateTag, element: element}, i 765 } 766 767 // editActionNode records a change to an action pipeline for later commit. 768 func (e *escaper) editActionNode(n *parse.ActionNode, cmds []string) { 769 if _, ok := e.actionNodeEdits[n]; ok { 770 panic(fmt.Sprintf("node %s shared between templates", n)) 771 } 772 e.actionNodeEdits[n] = cmds 773 } 774 775 // editTemplateNode records a change to a {{template}} callee for later commit. 776 func (e *escaper) editTemplateNode(n *parse.TemplateNode, callee string) { 777 if _, ok := e.templateNodeEdits[n]; ok { 778 panic(fmt.Sprintf("node %s shared between templates", n)) 779 } 780 e.templateNodeEdits[n] = callee 781 } 782 783 // editTextNode records a change to a text node for later commit. 784 func (e *escaper) editTextNode(n *parse.TextNode, text []byte) { 785 if _, ok := e.textNodeEdits[n]; ok { 786 panic(fmt.Sprintf("node %s shared between templates", n)) 787 } 788 e.textNodeEdits[n] = text 789 } 790 791 // commit applies changes to actions and template calls needed to contextually 792 // autoescape content and adds any derived templates to the set. 793 func (e *escaper) commit() { 794 for name := range e.output { 795 e.template(name).Funcs(funcMap) 796 } 797 // Any template from the name space associated with this escaper can be used 798 // to add derived templates to the underlying text/template name space. 799 tmpl := e.arbitraryTemplate() 800 for _, t := range e.derived { 801 if _, err := tmpl.text.AddParseTree(t.Name(), t.Tree); err != nil { 802 panic("error adding derived template") 803 } 804 } 805 for n, s := range e.actionNodeEdits { 806 ensurePipelineContains(n.Pipe, s) 807 } 808 for n, name := range e.templateNodeEdits { 809 n.Name = name 810 } 811 for n, s := range e.textNodeEdits { 812 n.Text = s 813 } 814 // Reset state that is specific to this commit so that the same changes are 815 // not re-applied to the template on subsequent calls to commit. 816 e.called = make(map[string]bool) 817 e.actionNodeEdits = make(map[*parse.ActionNode][]string) 818 e.templateNodeEdits = make(map[*parse.TemplateNode]string) 819 e.textNodeEdits = make(map[*parse.TextNode][]byte) 820 } 821 822 // template returns the named template given a mangled template name. 823 func (e *escaper) template(name string) *template.Template { 824 // Any template from the name space associated with this escaper can be used 825 // to look up templates in the underlying text/template name space. 826 t := e.arbitraryTemplate().text.Lookup(name) 827 if t == nil { 828 t = e.derived[name] 829 } 830 return t 831 } 832 833 // arbitraryTemplate returns an arbitrary template from the name space 834 // associated with e and panics if no templates are found. 835 func (e *escaper) arbitraryTemplate() *Template { 836 for _, t := range e.ns.set { 837 return t 838 } 839 panic("no templates in name space") 840 } 841 842 // Forwarding functions so that clients need only import this package 843 // to reach the general escaping functions of text/template. 844 845 // HTMLEscape writes to w the escaped HTML equivalent of the plain text data b. 846 func HTMLEscape(w io.Writer, b []byte) { 847 template.HTMLEscape(w, b) 848 } 849 850 // HTMLEscapeString returns the escaped HTML equivalent of the plain text data s. 851 func HTMLEscapeString(s string) string { 852 return template.HTMLEscapeString(s) 853 } 854 855 // HTMLEscaper returns the escaped HTML equivalent of the textual 856 // representation of its arguments. 857 func HTMLEscaper(args ...interface{}) string { 858 return template.HTMLEscaper(args...) 859 } 860 861 // JSEscape writes to w the escaped JavaScript equivalent of the plain text data b. 862 func JSEscape(w io.Writer, b []byte) { 863 template.JSEscape(w, b) 864 } 865 866 // JSEscapeString returns the escaped JavaScript equivalent of the plain text data s. 867 func JSEscapeString(s string) string { 868 return template.JSEscapeString(s) 869 } 870 871 // JSEscaper returns the escaped JavaScript equivalent of the textual 872 // representation of its arguments. 873 func JSEscaper(args ...interface{}) string { 874 return template.JSEscaper(args...) 875 } 876 877 // URLQueryEscaper returns the escaped value of the textual representation of 878 // its arguments in a form suitable for embedding in a URL query. 879 func URLQueryEscaper(args ...interface{}) string { 880 return template.URLQueryEscaper(args...) 881 }