golang.org/x/net@v0.25.1-0.20240516223405-c87a5b62e243/html/render.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package html 6 7 import ( 8 "bufio" 9 "errors" 10 "fmt" 11 "io" 12 "strings" 13 ) 14 15 type writer interface { 16 io.Writer 17 io.ByteWriter 18 WriteString(string) (int, error) 19 } 20 21 // Render renders the parse tree n to the given writer. 22 // 23 // Rendering is done on a 'best effort' basis: calling Parse on the output of 24 // Render will always result in something similar to the original tree, but it 25 // is not necessarily an exact clone unless the original tree was 'well-formed'. 26 // 'Well-formed' is not easily specified; the HTML5 specification is 27 // complicated. 28 // 29 // Calling Parse on arbitrary input typically results in a 'well-formed' parse 30 // tree. However, it is possible for Parse to yield a 'badly-formed' parse tree. 31 // For example, in a 'well-formed' parse tree, no <a> element is a child of 32 // another <a> element: parsing "<a><a>" results in two sibling elements. 33 // Similarly, in a 'well-formed' parse tree, no <a> element is a child of a 34 // <table> element: parsing "<p><table><a>" results in a <p> with two sibling 35 // children; the <a> is reparented to the <table>'s parent. However, calling 36 // Parse on "<a><table><a>" does not return an error, but the result has an <a> 37 // element with an <a> child, and is therefore not 'well-formed'. 38 // 39 // Programmatically constructed trees are typically also 'well-formed', but it 40 // is possible to construct a tree that looks innocuous but, when rendered and 41 // re-parsed, results in a different tree. A simple example is that a solitary 42 // text node would become a tree containing <html>, <head> and <body> elements. 43 // Another example is that the programmatic equivalent of "a<head>b</head>c" 44 // becomes "<html><head><head/><body>abc</body></html>". 45 func Render(w io.Writer, n *Node) error { 46 if x, ok := w.(writer); ok { 47 return render(x, n) 48 } 49 buf := bufio.NewWriter(w) 50 if err := render(buf, n); err != nil { 51 return err 52 } 53 return buf.Flush() 54 } 55 56 // plaintextAbort is returned from render1 when a <plaintext> element 57 // has been rendered. No more end tags should be rendered after that. 58 var plaintextAbort = errors.New("html: internal error (plaintext abort)") 59 60 func render(w writer, n *Node) error { 61 err := render1(w, n) 62 if err == plaintextAbort { 63 err = nil 64 } 65 return err 66 } 67 68 func render1(w writer, n *Node) error { 69 // Render non-element nodes; these are the easy cases. 70 switch n.Type { 71 case ErrorNode: 72 return errors.New("html: cannot render an ErrorNode node") 73 case TextNode: 74 return escape(w, n.Data) 75 case DocumentNode: 76 for c := n.FirstChild; c != nil; c = c.NextSibling { 77 if err := render1(w, c); err != nil { 78 return err 79 } 80 } 81 return nil 82 case ElementNode: 83 // No-op. 84 case CommentNode: 85 if _, err := w.WriteString("<!--"); err != nil { 86 return err 87 } 88 if err := escapeComment(w, n.Data); err != nil { 89 return err 90 } 91 if _, err := w.WriteString("-->"); err != nil { 92 return err 93 } 94 return nil 95 case DoctypeNode: 96 if _, err := w.WriteString("<!DOCTYPE "); err != nil { 97 return err 98 } 99 if err := escape(w, n.Data); err != nil { 100 return err 101 } 102 if n.Attr != nil { 103 var p, s string 104 for _, a := range n.Attr { 105 switch a.Key { 106 case "public": 107 p = a.Val 108 case "system": 109 s = a.Val 110 } 111 } 112 if p != "" { 113 if _, err := w.WriteString(" PUBLIC "); err != nil { 114 return err 115 } 116 if err := writeQuoted(w, p); err != nil { 117 return err 118 } 119 if s != "" { 120 if err := w.WriteByte(' '); err != nil { 121 return err 122 } 123 if err := writeQuoted(w, s); err != nil { 124 return err 125 } 126 } 127 } else if s != "" { 128 if _, err := w.WriteString(" SYSTEM "); err != nil { 129 return err 130 } 131 if err := writeQuoted(w, s); err != nil { 132 return err 133 } 134 } 135 } 136 return w.WriteByte('>') 137 case RawNode: 138 _, err := w.WriteString(n.Data) 139 return err 140 default: 141 return errors.New("html: unknown node type") 142 } 143 144 // Render the <xxx> opening tag. 145 if err := w.WriteByte('<'); err != nil { 146 return err 147 } 148 if _, err := w.WriteString(n.Data); err != nil { 149 return err 150 } 151 for _, a := range n.Attr { 152 if err := w.WriteByte(' '); err != nil { 153 return err 154 } 155 if a.Namespace != "" { 156 if _, err := w.WriteString(a.Namespace); err != nil { 157 return err 158 } 159 if err := w.WriteByte(':'); err != nil { 160 return err 161 } 162 } 163 if _, err := w.WriteString(a.Key); err != nil { 164 return err 165 } 166 if _, err := w.WriteString(`="`); err != nil { 167 return err 168 } 169 if err := escape(w, a.Val); err != nil { 170 return err 171 } 172 if err := w.WriteByte('"'); err != nil { 173 return err 174 } 175 } 176 if voidElements[n.Data] { 177 if n.FirstChild != nil { 178 return fmt.Errorf("html: void element <%s> has child nodes", n.Data) 179 } 180 _, err := w.WriteString("/>") 181 return err 182 } 183 if err := w.WriteByte('>'); err != nil { 184 return err 185 } 186 187 // Add initial newline where there is danger of a newline beging ignored. 188 if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") { 189 switch n.Data { 190 case "pre", "listing", "textarea": 191 if err := w.WriteByte('\n'); err != nil { 192 return err 193 } 194 } 195 } 196 197 // Render any child nodes 198 if childTextNodesAreLiteral(n) { 199 for c := n.FirstChild; c != nil; c = c.NextSibling { 200 if c.Type == TextNode { 201 if _, err := w.WriteString(c.Data); err != nil { 202 return err 203 } 204 } else { 205 if err := render1(w, c); err != nil { 206 return err 207 } 208 } 209 } 210 if n.Data == "plaintext" { 211 // Don't render anything else. <plaintext> must be the 212 // last element in the file, with no closing tag. 213 return plaintextAbort 214 } 215 } else { 216 for c := n.FirstChild; c != nil; c = c.NextSibling { 217 if err := render1(w, c); err != nil { 218 return err 219 } 220 } 221 } 222 223 // Render the </xxx> closing tag. 224 if _, err := w.WriteString("</"); err != nil { 225 return err 226 } 227 if _, err := w.WriteString(n.Data); err != nil { 228 return err 229 } 230 return w.WriteByte('>') 231 } 232 233 func childTextNodesAreLiteral(n *Node) bool { 234 // Per WHATWG HTML 13.3, if the parent of the current node is a style, 235 // script, xmp, iframe, noembed, noframes, or plaintext element, and the 236 // current node is a text node, append the value of the node's data 237 // literally. The specification is not explicit about it, but we only 238 // enforce this if we are in the HTML namespace (i.e. when the namespace is 239 // ""). 240 // NOTE: we also always include noscript elements, although the 241 // specification states that they should only be rendered as such if 242 // scripting is enabled for the node (which is not something we track). 243 if n.Namespace != "" { 244 return false 245 } 246 switch n.Data { 247 case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp": 248 return true 249 default: 250 return false 251 } 252 } 253 254 // writeQuoted writes s to w surrounded by quotes. Normally it will use double 255 // quotes, but if s contains a double quote, it will use single quotes. 256 // It is used for writing the identifiers in a doctype declaration. 257 // In valid HTML, they can't contain both types of quotes. 258 func writeQuoted(w writer, s string) error { 259 var q byte = '"' 260 if strings.Contains(s, `"`) { 261 q = '\'' 262 } 263 if err := w.WriteByte(q); err != nil { 264 return err 265 } 266 if _, err := w.WriteString(s); err != nil { 267 return err 268 } 269 if err := w.WriteByte(q); err != nil { 270 return err 271 } 272 return nil 273 } 274 275 // Section 12.1.2, "Elements", gives this list of void elements. Void elements 276 // are those that can't have any contents. 277 var voidElements = map[string]bool{ 278 "area": true, 279 "base": true, 280 "br": true, 281 "col": true, 282 "embed": true, 283 "hr": true, 284 "img": true, 285 "input": true, 286 "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility. 287 "link": true, 288 "meta": true, 289 "param": true, 290 "source": true, 291 "track": true, 292 "wbr": true, 293 }