github.com/anakojm/hugo-katex@v0.0.0-20231023141351-42d6f5de9c0b/tpl/internal/go_templates/htmltemplate/escape.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "fmt" 10 "html" 11 12 //"internal/godebug" 13 "io" 14 "regexp" 15 16 template "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate" 17 "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate/parse" 18 ) 19 20 // escapeTemplate rewrites the named template, which must be 21 // associated with t, to guarantee that the output of any of the named 22 // templates is properly escaped. If no error is returned, then the named templates have 23 // been modified. Otherwise the named templates have been rendered 24 // unusable. 25 func escapeTemplate(tmpl *Template, node parse.Node, name string) error { 26 c, _ := tmpl.esc.escapeTree(context{}, node, name, 0) 27 var err error 28 if c.err != nil { 29 err, c.err.Name = c.err, name 30 } else if c.state != stateText { 31 err = &Error{ErrEndContext, nil, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)} 32 } 33 if err != nil { 34 // Prevent execution of unsafe templates. 35 if t := tmpl.set[name]; t != nil { 36 t.escapeErr = err 37 t.text.Tree = nil 38 t.Tree = nil 39 } 40 return err 41 } 42 tmpl.esc.commit() 43 if t := tmpl.set[name]; t != nil { 44 t.escapeErr = escapeOK 45 t.Tree = t.text.Tree 46 } 47 return nil 48 } 49 50 // evalArgs formats the list of arguments into a string. It is equivalent to 51 // fmt.Sprint(args...), except that it dereferences all pointers. 52 func evalArgs(args ...any) string { 53 // Optimization for simple common case of a single string argument. 54 if len(args) == 1 { 55 if s, ok := args[0].(string); ok { 56 return s 57 } 58 } 59 for i, arg := range args { 60 args[i] = indirectToStringerOrError(arg) 61 } 62 return fmt.Sprint(args...) 63 } 64 65 // funcMap maps command names to functions that render their inputs safe. 66 var funcMap = template.FuncMap{ 67 "_html_template_attrescaper": attrEscaper, 68 "_html_template_commentescaper": commentEscaper, 69 "_html_template_cssescaper": cssEscaper, 70 "_html_template_cssvaluefilter": cssValueFilter, 71 "_html_template_htmlnamefilter": htmlNameFilter, 72 "_html_template_htmlescaper": htmlEscaper, 73 "_html_template_jsregexpescaper": jsRegexpEscaper, 74 "_html_template_jsstrescaper": jsStrEscaper, 75 "_html_template_jsvalescaper": jsValEscaper, 76 "_html_template_nospaceescaper": htmlNospaceEscaper, 77 "_html_template_rcdataescaper": rcdataEscaper, 78 "_html_template_srcsetescaper": srcsetFilterAndEscaper, 79 "_html_template_urlescaper": urlEscaper, 80 "_html_template_urlfilter": urlFilter, 81 "_html_template_urlnormalizer": urlNormalizer, 82 "_eval_args_": evalArgs, 83 } 84 85 // escaper collects type inferences about templates and changes needed to make 86 // templates injection safe. 87 type escaper struct { 88 // ns is the nameSpace that this escaper is associated with. 89 ns *nameSpace 90 // output[templateName] is the output context for a templateName that 91 // has been mangled to include its input context. 92 output map[string]context 93 // derived[c.mangle(name)] maps to a template derived from the template 94 // named name templateName for the start context c. 95 derived map[string]*template.Template 96 // called[templateName] is a set of called mangled template names. 97 called map[string]bool 98 // xxxNodeEdits are the accumulated edits to apply during commit. 99 // Such edits are not applied immediately in case a template set 100 // executes a given template in different escaping contexts. 101 actionNodeEdits map[*parse.ActionNode][]string 102 templateNodeEdits map[*parse.TemplateNode]string 103 textNodeEdits map[*parse.TextNode][]byte 104 // rangeContext holds context about the current range loop. 105 rangeContext *rangeContext 106 } 107 108 // rangeContext holds information about the current range loop. 109 type rangeContext struct { 110 outer *rangeContext // outer loop 111 breaks []context // context at each break action 112 continues []context // context at each continue action 113 } 114 115 // makeEscaper creates a blank escaper for the given set. 116 func makeEscaper(n *nameSpace) escaper { 117 return escaper{ 118 n, 119 map[string]context{}, 120 map[string]*template.Template{}, 121 map[string]bool{}, 122 map[*parse.ActionNode][]string{}, 123 map[*parse.TemplateNode]string{}, 124 map[*parse.TextNode][]byte{}, 125 nil, 126 } 127 } 128 129 // filterFailsafe is an innocuous word that is emitted in place of unsafe values 130 // by sanitizer functions. It is not a keyword in any programming language, 131 // contains no special characters, is not empty, and when it appears in output 132 // it is distinct enough that a developer can find the source of the problem 133 // via a search engine. 134 const filterFailsafe = "ZgotmplZ" 135 136 // escape escapes a template node. 137 func (e *escaper) escape(c context, n parse.Node) context { 138 switch n := n.(type) { 139 case *parse.ActionNode: 140 return e.escapeAction(c, n) 141 case *parse.BreakNode: 142 c.n = n 143 e.rangeContext.breaks = append(e.rangeContext.breaks, c) 144 return context{state: stateDead} 145 case *parse.CommentNode: 146 return c 147 case *parse.ContinueNode: 148 c.n = n 149 e.rangeContext.continues = append(e.rangeContext.breaks, c) 150 return context{state: stateDead} 151 case *parse.IfNode: 152 return e.escapeBranch(c, &n.BranchNode, "if") 153 case *parse.ListNode: 154 return e.escapeList(c, n) 155 case *parse.RangeNode: 156 return e.escapeBranch(c, &n.BranchNode, "range") 157 case *parse.TemplateNode: 158 return e.escapeTemplate(c, n) 159 case *parse.TextNode: 160 return e.escapeText(c, n) 161 case *parse.WithNode: 162 return e.escapeBranch(c, &n.BranchNode, "with") 163 } 164 panic("escaping " + n.String() + " is unimplemented") 165 } 166 167 // Modified by Hugo. 168 // var debugAllowActionJSTmpl = godebug.New("jstmpllitinterp") 169 170 // escapeAction escapes an action template node. 171 func (e *escaper) escapeAction(c context, n *parse.ActionNode) context { 172 if len(n.Pipe.Decl) != 0 { 173 // A local variable assignment, not an interpolation. 174 return c 175 } 176 c = nudge(c) 177 // Check for disallowed use of predefined escapers in the pipeline. 178 for pos, idNode := range n.Pipe.Cmds { 179 node, ok := idNode.Args[0].(*parse.IdentifierNode) 180 if !ok { 181 // A predefined escaper "esc" will never be found as an identifier in a 182 // Chain or Field node, since: 183 // - "esc.x ..." is invalid, since predefined escapers return strings, and 184 // strings do not have methods, keys or fields. 185 // - "... .esc" is invalid, since predefined escapers are global functions, 186 // not methods or fields of any types. 187 // Therefore, it is safe to ignore these two node types. 188 continue 189 } 190 ident := node.Ident 191 if _, ok := predefinedEscapers[ident]; ok { 192 if pos < len(n.Pipe.Cmds)-1 || 193 c.state == stateAttr && c.delim == delimSpaceOrTagEnd && ident == "html" { 194 return context{ 195 state: stateError, 196 err: errorf(ErrPredefinedEscaper, n, n.Line, "predefined escaper %q disallowed in template", ident), 197 } 198 } 199 } 200 } 201 s := make([]string, 0, 3) 202 switch c.state { 203 case stateError: 204 return c 205 case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL: 206 switch c.urlPart { 207 case urlPartNone: 208 s = append(s, "_html_template_urlfilter") 209 fallthrough 210 case urlPartPreQuery: 211 switch c.state { 212 case stateCSSDqStr, stateCSSSqStr: 213 s = append(s, "_html_template_cssescaper") 214 default: 215 s = append(s, "_html_template_urlnormalizer") 216 } 217 case urlPartQueryOrFrag: 218 s = append(s, "_html_template_urlescaper") 219 case urlPartUnknown: 220 return context{ 221 state: stateError, 222 err: errorf(ErrAmbigContext, n, n.Line, "%s appears in an ambiguous context within a URL", n), 223 } 224 default: 225 panic(c.urlPart.String()) 226 } 227 case stateJS: 228 s = append(s, "_html_template_jsvalescaper") 229 // A slash after a value starts a div operator. 230 c.jsCtx = jsCtxDivOp 231 case stateJSDqStr, stateJSSqStr: 232 s = append(s, "_html_template_jsstrescaper") 233 case stateJSBqStr: 234 if SecurityAllowActionJSTmpl.Load() { 235 // debugAllowActionJSTmpl.IncNonDefault() 236 s = append(s, "_html_template_jsstrescaper") 237 } else { 238 return context{ 239 state: stateError, 240 err: errorf(ErrJSTemplate, n, n.Line, "%s appears in a JS template literal", n), 241 } 242 } 243 case stateJSRegexp: 244 s = append(s, "_html_template_jsregexpescaper") 245 case stateCSS: 246 s = append(s, "_html_template_cssvaluefilter") 247 case stateText: 248 s = append(s, "_html_template_htmlescaper") 249 case stateRCDATA: 250 s = append(s, "_html_template_rcdataescaper") 251 case stateAttr: 252 // Handled below in delim check. 253 case stateAttrName, stateTag: 254 c.state = stateAttrName 255 s = append(s, "_html_template_htmlnamefilter") 256 case stateSrcset: 257 s = append(s, "_html_template_srcsetescaper") 258 default: 259 if isComment(c.state) { 260 s = append(s, "_html_template_commentescaper") 261 } else { 262 panic("unexpected state " + c.state.String()) 263 } 264 } 265 switch c.delim { 266 case delimNone: 267 // No extra-escaping needed for raw text content. 268 case delimSpaceOrTagEnd: 269 s = append(s, "_html_template_nospaceescaper") 270 default: 271 s = append(s, "_html_template_attrescaper") 272 } 273 e.editActionNode(n, s) 274 return c 275 } 276 277 // ensurePipelineContains ensures that the pipeline ends with the commands with 278 // the identifiers in s in order. If the pipeline ends with a predefined escaper 279 // (i.e. "html" or "urlquery"), merge it with the identifiers in s. 280 func ensurePipelineContains(p *parse.PipeNode, s []string) { 281 if len(s) == 0 { 282 // Do not rewrite pipeline if we have no escapers to insert. 283 return 284 } 285 // Precondition: p.Cmds contains at most one predefined escaper and the 286 // escaper will be present at p.Cmds[len(p.Cmds)-1]. This precondition is 287 // always true because of the checks in escapeAction. 288 pipelineLen := len(p.Cmds) 289 if pipelineLen > 0 { 290 lastCmd := p.Cmds[pipelineLen-1] 291 if idNode, ok := lastCmd.Args[0].(*parse.IdentifierNode); ok { 292 if esc := idNode.Ident; predefinedEscapers[esc] { 293 // Pipeline ends with a predefined escaper. 294 if len(p.Cmds) == 1 && len(lastCmd.Args) > 1 { 295 // Special case: pipeline is of the form {{ esc arg1 arg2 ... argN }}, 296 // where esc is the predefined escaper, and arg1...argN are its arguments. 297 // Convert this into the equivalent form 298 // {{ _eval_args_ arg1 arg2 ... argN | esc }}, so that esc can be easily 299 // merged with the escapers in s. 300 lastCmd.Args[0] = parse.NewIdentifier("_eval_args_").SetTree(nil).SetPos(lastCmd.Args[0].Position()) 301 p.Cmds = appendCmd(p.Cmds, newIdentCmd(esc, p.Position())) 302 pipelineLen++ 303 } 304 // If any of the commands in s that we are about to insert is equivalent 305 // to the predefined escaper, use the predefined escaper instead. 306 dup := false 307 for i, escaper := range s { 308 if escFnsEq(esc, escaper) { 309 s[i] = idNode.Ident 310 dup = true 311 } 312 } 313 if dup { 314 // The predefined escaper will already be inserted along with the 315 // escapers in s, so do not copy it to the rewritten pipeline. 316 pipelineLen-- 317 } 318 } 319 } 320 } 321 // Rewrite the pipeline, creating the escapers in s at the end of the pipeline. 322 newCmds := make([]*parse.CommandNode, pipelineLen, pipelineLen+len(s)) 323 insertedIdents := make(map[string]bool) 324 for i := 0; i < pipelineLen; i++ { 325 cmd := p.Cmds[i] 326 newCmds[i] = cmd 327 if idNode, ok := cmd.Args[0].(*parse.IdentifierNode); ok { 328 insertedIdents[normalizeEscFn(idNode.Ident)] = true 329 } 330 } 331 for _, name := range s { 332 if !insertedIdents[normalizeEscFn(name)] { 333 // When two templates share an underlying parse tree via the use of 334 // AddParseTree and one template is executed after the other, this check 335 // ensures that escapers that were already inserted into the pipeline on 336 // the first escaping pass do not get inserted again. 337 newCmds = appendCmd(newCmds, newIdentCmd(name, p.Position())) 338 } 339 } 340 p.Cmds = newCmds 341 } 342 343 // predefinedEscapers contains template predefined escapers that are equivalent 344 // to some contextual escapers. Keep in sync with equivEscapers. 345 var predefinedEscapers = map[string]bool{ 346 "html": true, 347 "urlquery": true, 348 } 349 350 // equivEscapers matches contextual escapers to equivalent predefined 351 // template escapers. 352 var equivEscapers = map[string]string{ 353 // The following pairs of HTML escapers provide equivalent security 354 // guarantees, since they all escape '\000', '\'', '"', '&', '<', and '>'. 355 "_html_template_attrescaper": "html", 356 "_html_template_htmlescaper": "html", 357 "_html_template_rcdataescaper": "html", 358 // These two URL escapers produce URLs safe for embedding in a URL query by 359 // percent-encoding all the reserved characters specified in RFC 3986 Section 360 // 2.2 361 "_html_template_urlescaper": "urlquery", 362 // These two functions are not actually equivalent; urlquery is stricter as it 363 // escapes reserved characters (e.g. '#'), while _html_template_urlnormalizer 364 // does not. It is therefore only safe to replace _html_template_urlnormalizer 365 // with urlquery (this happens in ensurePipelineContains), but not the otherI've 366 // way around. We keep this entry around to preserve the behavior of templates 367 // written before Go 1.9, which might depend on this substitution taking place. 368 "_html_template_urlnormalizer": "urlquery", 369 } 370 371 // escFnsEq reports whether the two escaping functions are equivalent. 372 func escFnsEq(a, b string) bool { 373 return normalizeEscFn(a) == normalizeEscFn(b) 374 } 375 376 // normalizeEscFn(a) is equal to normalizeEscFn(b) for any pair of names of 377 // escaper functions a and b that are equivalent. 378 func normalizeEscFn(e string) string { 379 if norm := equivEscapers[e]; norm != "" { 380 return norm 381 } 382 return e 383 } 384 385 // redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x) 386 // for all x. 387 var redundantFuncs = map[string]map[string]bool{ 388 "_html_template_commentescaper": { 389 "_html_template_attrescaper": true, 390 "_html_template_htmlescaper": true, 391 }, 392 "_html_template_cssescaper": { 393 "_html_template_attrescaper": true, 394 }, 395 "_html_template_jsregexpescaper": { 396 "_html_template_attrescaper": true, 397 }, 398 "_html_template_jsstrescaper": { 399 "_html_template_attrescaper": true, 400 }, 401 "_html_template_urlescaper": { 402 "_html_template_urlnormalizer": true, 403 }, 404 } 405 406 // appendCmd appends the given command to the end of the command pipeline 407 // unless it is redundant with the last command. 408 func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode { 409 if n := len(cmds); n != 0 { 410 last, okLast := cmds[n-1].Args[0].(*parse.IdentifierNode) 411 next, okNext := cmd.Args[0].(*parse.IdentifierNode) 412 if okLast && okNext && redundantFuncs[last.Ident][next.Ident] { 413 return cmds 414 } 415 } 416 return append(cmds, cmd) 417 } 418 419 // newIdentCmd produces a command containing a single identifier node. 420 func newIdentCmd(identifier string, pos parse.Pos) *parse.CommandNode { 421 return &parse.CommandNode{ 422 NodeType: parse.NodeCommand, 423 Args: []parse.Node{parse.NewIdentifier(identifier).SetTree(nil).SetPos(pos)}, // TODO: SetTree. 424 } 425 } 426 427 // nudge returns the context that would result from following empty string 428 // transitions from the input context. 429 // For example, parsing: 430 // 431 // `<a href=` 432 // 433 // will end in context{stateBeforeValue, attrURL}, but parsing one extra rune: 434 // 435 // `<a href=x` 436 // 437 // will end in context{stateURL, delimSpaceOrTagEnd, ...}. 438 // There are two transitions that happen when the 'x' is seen: 439 // (1) Transition from a before-value state to a start-of-value state without 440 // 441 // consuming any character. 442 // 443 // (2) Consume 'x' and transition past the first value character. 444 // In this case, nudging produces the context after (1) happens. 445 func nudge(c context) context { 446 switch c.state { 447 case stateTag: 448 // In `<foo {{.}}`, the action should emit an attribute. 449 c.state = stateAttrName 450 case stateBeforeValue: 451 // In `<foo bar={{.}}`, the action is an undelimited value. 452 c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone 453 case stateAfterName: 454 // In `<foo bar {{.}}`, the action is an attribute name. 455 c.state, c.attr = stateAttrName, attrNone 456 } 457 return c 458 } 459 460 // join joins the two contexts of a branch template node. The result is an 461 // error context if either of the input contexts are error contexts, or if the 462 // input contexts differ. 463 func join(a, b context, node parse.Node, nodeName string) context { 464 if a.state == stateError { 465 return a 466 } 467 if b.state == stateError { 468 return b 469 } 470 if a.state == stateDead { 471 return b 472 } 473 if b.state == stateDead { 474 return a 475 } 476 if a.eq(b) { 477 return a 478 } 479 480 c := a 481 c.urlPart = b.urlPart 482 if c.eq(b) { 483 // The contexts differ only by urlPart. 484 c.urlPart = urlPartUnknown 485 return c 486 } 487 488 c = a 489 c.jsCtx = b.jsCtx 490 if c.eq(b) { 491 // The contexts differ only by jsCtx. 492 c.jsCtx = jsCtxUnknown 493 return c 494 } 495 496 // Allow a nudged context to join with an unnudged one. 497 // This means that 498 // <p title={{if .C}}{{.}}{{end}} 499 // ends in an unquoted value state even though the else branch 500 // ends in stateBeforeValue. 501 if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) { 502 if e := join(c, d, node, nodeName); e.state != stateError { 503 return e 504 } 505 } 506 507 return context{ 508 state: stateError, 509 err: errorf(ErrBranchEnd, node, 0, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b), 510 } 511 } 512 513 // escapeBranch escapes a branch template node: "if", "range" and "with". 514 func (e *escaper) escapeBranch(c context, n *parse.BranchNode, nodeName string) context { 515 if nodeName == "range" { 516 e.rangeContext = &rangeContext{outer: e.rangeContext} 517 } 518 c0 := e.escapeList(c, n.List) 519 if nodeName == "range" { 520 if c0.state != stateError { 521 c0 = joinRange(c0, e.rangeContext) 522 } 523 e.rangeContext = e.rangeContext.outer 524 if c0.state == stateError { 525 return c0 526 } 527 528 // The "true" branch of a "range" node can execute multiple times. 529 // We check that executing n.List once results in the same context 530 // as executing n.List twice. 531 e.rangeContext = &rangeContext{outer: e.rangeContext} 532 c1, _ := e.escapeListConditionally(c0, n.List, nil) 533 c0 = join(c0, c1, n, nodeName) 534 if c0.state == stateError { 535 e.rangeContext = e.rangeContext.outer 536 // Make clear that this is a problem on loop re-entry 537 // since developers tend to overlook that branch when 538 // debugging templates. 539 c0.err.Line = n.Line 540 c0.err.Description = "on range loop re-entry: " + c0.err.Description 541 return c0 542 } 543 c0 = joinRange(c0, e.rangeContext) 544 e.rangeContext = e.rangeContext.outer 545 if c0.state == stateError { 546 return c0 547 } 548 } 549 c1 := e.escapeList(c, n.ElseList) 550 return join(c0, c1, n, nodeName) 551 } 552 553 func joinRange(c0 context, rc *rangeContext) context { 554 // Merge contexts at break and continue statements into overall body context. 555 // In theory we could treat breaks differently from continues, but for now it is 556 // enough to treat them both as going back to the start of the loop (which may then stop). 557 for _, c := range rc.breaks { 558 c0 = join(c0, c, c.n, "range") 559 if c0.state == stateError { 560 c0.err.Line = c.n.(*parse.BreakNode).Line 561 c0.err.Description = "at range loop break: " + c0.err.Description 562 return c0 563 } 564 } 565 for _, c := range rc.continues { 566 c0 = join(c0, c, c.n, "range") 567 if c0.state == stateError { 568 c0.err.Line = c.n.(*parse.ContinueNode).Line 569 c0.err.Description = "at range loop continue: " + c0.err.Description 570 return c0 571 } 572 } 573 return c0 574 } 575 576 // escapeList escapes a list template node. 577 func (e *escaper) escapeList(c context, n *parse.ListNode) context { 578 if n == nil { 579 return c 580 } 581 for _, m := range n.Nodes { 582 c = e.escape(c, m) 583 if c.state == stateDead { 584 break 585 } 586 } 587 return c 588 } 589 590 // escapeListConditionally escapes a list node but only preserves edits and 591 // inferences in e if the inferences and output context satisfy filter. 592 // It returns the best guess at an output context, and the result of the filter 593 // which is the same as whether e was updated. 594 func (e *escaper) escapeListConditionally(c context, n *parse.ListNode, filter func(*escaper, context) bool) (context, bool) { 595 e1 := makeEscaper(e.ns) 596 e1.rangeContext = e.rangeContext 597 // Make type inferences available to f. 598 for k, v := range e.output { 599 e1.output[k] = v 600 } 601 c = e1.escapeList(c, n) 602 ok := filter != nil && filter(&e1, c) 603 if ok { 604 // Copy inferences and edits from e1 back into e. 605 for k, v := range e1.output { 606 e.output[k] = v 607 } 608 for k, v := range e1.derived { 609 e.derived[k] = v 610 } 611 for k, v := range e1.called { 612 e.called[k] = v 613 } 614 for k, v := range e1.actionNodeEdits { 615 e.editActionNode(k, v) 616 } 617 for k, v := range e1.templateNodeEdits { 618 e.editTemplateNode(k, v) 619 } 620 for k, v := range e1.textNodeEdits { 621 e.editTextNode(k, v) 622 } 623 } 624 return c, ok 625 } 626 627 // escapeTemplate escapes a {{template}} call node. 628 func (e *escaper) escapeTemplate(c context, n *parse.TemplateNode) context { 629 c, name := e.escapeTree(c, n, n.Name, n.Line) 630 if name != n.Name { 631 e.editTemplateNode(n, name) 632 } 633 return c 634 } 635 636 // escapeTree escapes the named template starting in the given context as 637 // necessary and returns its output context. 638 func (e *escaper) escapeTree(c context, node parse.Node, name string, line int) (context, string) { 639 // Mangle the template name with the input context to produce a reliable 640 // identifier. 641 dname := c.mangle(name) 642 e.called[dname] = true 643 if out, ok := e.output[dname]; ok { 644 // Already escaped. 645 return out, dname 646 } 647 t := e.template(name) 648 if t == nil { 649 // Two cases: The template exists but is empty, or has never been mentioned at 650 // all. Distinguish the cases in the error messages. 651 if e.ns.set[name] != nil { 652 return context{ 653 state: stateError, 654 err: errorf(ErrNoSuchTemplate, node, line, "%q is an incomplete or empty template", name), 655 }, dname 656 } 657 return context{ 658 state: stateError, 659 err: errorf(ErrNoSuchTemplate, node, line, "no such template %q", name), 660 }, dname 661 } 662 if dname != name { 663 // Use any template derived during an earlier call to escapeTemplate 664 // with different top level templates, or clone if necessary. 665 dt := e.template(dname) 666 if dt == nil { 667 dt = template.New(dname) 668 dt.Tree = &parse.Tree{Name: dname, Root: t.Root.CopyList()} 669 e.derived[dname] = dt 670 } 671 t = dt 672 } 673 return e.computeOutCtx(c, t), dname 674 } 675 676 // computeOutCtx takes a template and its start context and computes the output 677 // context while storing any inferences in e. 678 func (e *escaper) computeOutCtx(c context, t *template.Template) context { 679 // Propagate context over the body. 680 c1, ok := e.escapeTemplateBody(c, t) 681 if !ok { 682 // Look for a fixed point by assuming c1 as the output context. 683 if c2, ok2 := e.escapeTemplateBody(c1, t); ok2 { 684 c1, ok = c2, true 685 } 686 // Use c1 as the error context if neither assumption worked. 687 } 688 if !ok && c1.state != stateError { 689 return context{ 690 state: stateError, 691 err: errorf(ErrOutputContext, t.Tree.Root, 0, "cannot compute output context for template %s", t.Name()), 692 } 693 } 694 return c1 695 } 696 697 // escapeTemplateBody escapes the given template assuming the given output 698 // context, and returns the best guess at the output context and whether the 699 // assumption was correct. 700 func (e *escaper) escapeTemplateBody(c context, t *template.Template) (context, bool) { 701 filter := func(e1 *escaper, c1 context) bool { 702 if c1.state == stateError { 703 // Do not update the input escaper, e. 704 return false 705 } 706 if !e1.called[t.Name()] { 707 // If t is not recursively called, then c1 is an 708 // accurate output context. 709 return true 710 } 711 // c1 is accurate if it matches our assumed output context. 712 return c.eq(c1) 713 } 714 // We need to assume an output context so that recursive template calls 715 // take the fast path out of escapeTree instead of infinitely recurring. 716 // Naively assuming that the input context is the same as the output 717 // works >90% of the time. 718 e.output[t.Name()] = c 719 return e.escapeListConditionally(c, t.Tree.Root, filter) 720 } 721 722 // delimEnds maps each delim to a string of characters that terminate it. 723 var delimEnds = [...]string{ 724 delimDoubleQuote: `"`, 725 delimSingleQuote: "'", 726 // Determined empirically by running the below in various browsers. 727 // var div = document.createElement("DIV"); 728 // for (var i = 0; i < 0x10000; ++i) { 729 // div.innerHTML = "<span title=x" + String.fromCharCode(i) + "-bar>"; 730 // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0) 731 // document.write("<p>U+" + i.toString(16)); 732 // } 733 delimSpaceOrTagEnd: " \t\n\f\r>", 734 } 735 736 var ( 737 // Per WHATWG HTML specification, section 4.12.1.3, there are extremely 738 // complicated rules for how to handle the set of opening tags <!--, 739 // <script, and </script when they appear in JS literals (i.e. strings, 740 // regexs, and comments). The specification suggests a simple solution, 741 // rather than implementing the arcane ABNF, which involves simply escaping 742 // the opening bracket with \x3C. We use the below regex for this, since it 743 // makes doing the case-insensitive find-replace much simpler. 744 specialScriptTagRE = regexp.MustCompile("(?i)<(script|/script|!--)") 745 specialScriptTagReplacement = []byte("\\x3C$1") 746 ) 747 748 func containsSpecialScriptTag(s []byte) bool { 749 return specialScriptTagRE.Match(s) 750 } 751 752 func escapeSpecialScriptTags(s []byte) []byte { 753 return specialScriptTagRE.ReplaceAll(s, specialScriptTagReplacement) 754 } 755 756 var doctypeBytes = []byte("<!DOCTYPE") 757 758 // escapeText escapes a text template node. 759 func (e *escaper) escapeText(c context, n *parse.TextNode) context { 760 s, written, i, b := n.Text, 0, 0, new(bytes.Buffer) 761 for i != len(s) { 762 c1, nread := contextAfterText(c, s[i:]) 763 i1 := i + nread 764 if c.state == stateText || c.state == stateRCDATA { 765 end := i1 766 if c1.state != c.state { 767 for j := end - 1; j >= i; j-- { 768 if s[j] == '<' { 769 end = j 770 break 771 } 772 } 773 } 774 for j := i; j < end; j++ { 775 if s[j] == '<' && !bytes.HasPrefix(bytes.ToUpper(s[j:]), doctypeBytes) { 776 b.Write(s[written:j]) 777 b.WriteString("<") 778 written = j + 1 779 } 780 } 781 } else if isComment(c.state) && c.delim == delimNone { 782 switch c.state { 783 case stateJSBlockCmt: 784 // https://es5.github.io/#x7.4: 785 // "Comments behave like white space and are 786 // discarded except that, if a MultiLineComment 787 // contains a line terminator character, then 788 // the entire comment is considered to be a 789 // LineTerminator for purposes of parsing by 790 // the syntactic grammar." 791 if bytes.ContainsAny(s[written:i1], "\n\r\u2028\u2029") { 792 b.WriteByte('\n') 793 } else { 794 b.WriteByte(' ') 795 } 796 case stateCSSBlockCmt: 797 b.WriteByte(' ') 798 } 799 written = i1 800 } 801 if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone { 802 // Preserve the portion between written and the comment start. 803 cs := i1 - 2 804 if c1.state == stateHTMLCmt || c1.state == stateJSHTMLOpenCmt { 805 // "<!--" instead of "/*" or "//" 806 cs -= 2 807 } else if c1.state == stateJSHTMLCloseCmt { 808 // "-->" instead of "/*" or "//" 809 cs -= 1 810 } 811 b.Write(s[written:cs]) 812 written = i1 813 } 814 if isInScriptLiteral(c.state) && containsSpecialScriptTag(s[i:i1]) { 815 b.Write(s[written:i]) 816 b.Write(escapeSpecialScriptTags(s[i:i1])) 817 written = i1 818 } 819 if i == i1 && c.state == c1.state { 820 panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:])) 821 } 822 c, i = c1, i1 823 } 824 825 if written != 0 && c.state != stateError { 826 if !isComment(c.state) || c.delim != delimNone { 827 b.Write(n.Text[written:]) 828 } 829 e.editTextNode(n, b.Bytes()) 830 } 831 return c 832 } 833 834 // contextAfterText starts in context c, consumes some tokens from the front of 835 // s, then returns the context after those tokens and the unprocessed suffix. 836 func contextAfterText(c context, s []byte) (context, int) { 837 if c.delim == delimNone { 838 c1, i := tSpecialTagEnd(c, s) 839 if i == 0 { 840 // A special end tag (`</script>`) has been seen and 841 // all content preceding it has been consumed. 842 return c1, 0 843 } 844 // Consider all content up to any end tag. 845 return transitionFunc[c.state](c, s[:i]) 846 } 847 848 // We are at the beginning of an attribute value. 849 850 i := bytes.IndexAny(s, delimEnds[c.delim]) 851 if i == -1 { 852 i = len(s) 853 } 854 if c.delim == delimSpaceOrTagEnd { 855 // https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state 856 // lists the runes below as error characters. 857 // Error out because HTML parsers may differ on whether 858 // "<a id= onclick=f(" ends inside id's or onclick's value, 859 // "<a class=`foo " ends inside a value, 860 // "<a style=font:'Arial'" needs open-quote fixup. 861 // IE treats '`' as a quotation character. 862 if j := bytes.IndexAny(s[:i], "\"'<=`"); j >= 0 { 863 return context{ 864 state: stateError, 865 err: errorf(ErrBadHTML, nil, 0, "%q in unquoted attr: %q", s[j:j+1], s[:i]), 866 }, len(s) 867 } 868 } 869 if i == len(s) { 870 // Remain inside the attribute. 871 // Decode the value so non-HTML rules can easily handle 872 // <button onclick="alert("Hi!")"> 873 // without having to entity decode token boundaries. 874 for u := []byte(html.UnescapeString(string(s))); len(u) != 0; { 875 c1, i1 := transitionFunc[c.state](c, u) 876 c, u = c1, u[i1:] 877 } 878 return c, len(s) 879 } 880 881 element := c.element 882 883 // If this is a non-JS "type" attribute inside "script" tag, do not treat the contents as JS. 884 if c.state == stateAttr && c.element == elementScript && c.attr == attrScriptType && !isJSType(string(s[:i])) { 885 element = elementNone 886 } 887 888 if c.delim != delimSpaceOrTagEnd { 889 // Consume any quote. 890 i++ 891 } 892 // On exiting an attribute, we discard all state information 893 // except the state and element. 894 return context{state: stateTag, element: element}, i 895 } 896 897 // editActionNode records a change to an action pipeline for later commit. 898 func (e *escaper) editActionNode(n *parse.ActionNode, cmds []string) { 899 if _, ok := e.actionNodeEdits[n]; ok { 900 panic(fmt.Sprintf("node %s shared between templates", n)) 901 } 902 e.actionNodeEdits[n] = cmds 903 } 904 905 // editTemplateNode records a change to a {{template}} callee for later commit. 906 func (e *escaper) editTemplateNode(n *parse.TemplateNode, callee string) { 907 if _, ok := e.templateNodeEdits[n]; ok { 908 panic(fmt.Sprintf("node %s shared between templates", n)) 909 } 910 e.templateNodeEdits[n] = callee 911 } 912 913 // editTextNode records a change to a text node for later commit. 914 func (e *escaper) editTextNode(n *parse.TextNode, text []byte) { 915 if _, ok := e.textNodeEdits[n]; ok { 916 panic(fmt.Sprintf("node %s shared between templates", n)) 917 } 918 e.textNodeEdits[n] = text 919 } 920 921 // commit applies changes to actions and template calls needed to contextually 922 // autoescape content and adds any derived templates to the set. 923 func (e *escaper) commit() { 924 for name := range e.output { 925 e.template(name).Funcs(funcMap) 926 } 927 // Any template from the name space associated with this escaper can be used 928 // to add derived templates to the underlying text/template name space. 929 tmpl := e.arbitraryTemplate() 930 for _, t := range e.derived { 931 if _, err := tmpl.text.AddParseTree(t.Name(), t.Tree); err != nil { 932 panic("error adding derived template") 933 } 934 } 935 for n, s := range e.actionNodeEdits { 936 ensurePipelineContains(n.Pipe, s) 937 } 938 for n, name := range e.templateNodeEdits { 939 n.Name = name 940 } 941 for n, s := range e.textNodeEdits { 942 n.Text = s 943 } 944 // Reset state that is specific to this commit so that the same changes are 945 // not re-applied to the template on subsequent calls to commit. 946 e.called = make(map[string]bool) 947 e.actionNodeEdits = make(map[*parse.ActionNode][]string) 948 e.templateNodeEdits = make(map[*parse.TemplateNode]string) 949 e.textNodeEdits = make(map[*parse.TextNode][]byte) 950 } 951 952 // template returns the named template given a mangled template name. 953 func (e *escaper) template(name string) *template.Template { 954 // Any template from the name space associated with this escaper can be used 955 // to look up templates in the underlying text/template name space. 956 t := e.arbitraryTemplate().text.Lookup(name) 957 if t == nil { 958 t = e.derived[name] 959 } 960 return t 961 } 962 963 // arbitraryTemplate returns an arbitrary template from the name space 964 // associated with e and panics if no templates are found. 965 func (e *escaper) arbitraryTemplate() *Template { 966 for _, t := range e.ns.set { 967 return t 968 } 969 panic("no templates in name space") 970 } 971 972 // Forwarding functions so that clients need only import this package 973 // to reach the general escaping functions of text/template. 974 975 // HTMLEscape writes to w the escaped HTML equivalent of the plain text data b. 976 func HTMLEscape(w io.Writer, b []byte) { 977 template.HTMLEscape(w, b) 978 } 979 980 // HTMLEscapeString returns the escaped HTML equivalent of the plain text data s. 981 func HTMLEscapeString(s string) string { 982 return template.HTMLEscapeString(s) 983 } 984 985 // HTMLEscaper returns the escaped HTML equivalent of the textual 986 // representation of its arguments. 987 func HTMLEscaper(args ...any) string { 988 return template.HTMLEscaper(args...) 989 } 990 991 // JSEscape writes to w the escaped JavaScript equivalent of the plain text data b. 992 func JSEscape(w io.Writer, b []byte) { 993 template.JSEscape(w, b) 994 } 995 996 // JSEscapeString returns the escaped JavaScript equivalent of the plain text data s. 997 func JSEscapeString(s string) string { 998 return template.JSEscapeString(s) 999 } 1000 1001 // JSEscaper returns the escaped JavaScript equivalent of the textual 1002 // representation of its arguments. 1003 func JSEscaper(args ...any) string { 1004 return template.JSEscaper(args...) 1005 } 1006 1007 // URLQueryEscaper returns the escaped value of the textual representation of 1008 // its arguments in a form suitable for embedding in a URL query. 1009 func URLQueryEscaper(args ...any) string { 1010 return template.URLQueryEscaper(args...) 1011 }