github.com/Andyfoo/golang/x/net@v0.0.0-20190901054642-57c1bf301704/html/render.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package html 6 7 import ( 8 "bufio" 9 "errors" 10 "fmt" 11 "io" 12 "strings" 13 ) 14 15 type writer interface { 16 io.Writer 17 io.ByteWriter 18 WriteString(string) (int, error) 19 } 20 21 // Render renders the parse tree n to the given writer. 22 // 23 // Rendering is done on a 'best effort' basis: calling Parse on the output of 24 // Render will always result in something similar to the original tree, but it 25 // is not necessarily an exact clone unless the original tree was 'well-formed'. 26 // 'Well-formed' is not easily specified; the HTML5 specification is 27 // complicated. 28 // 29 // Calling Parse on arbitrary input typically results in a 'well-formed' parse 30 // tree. However, it is possible for Parse to yield a 'badly-formed' parse tree. 31 // For example, in a 'well-formed' parse tree, no <a> element is a child of 32 // another <a> element: parsing "<a><a>" results in two sibling elements. 33 // Similarly, in a 'well-formed' parse tree, no <a> element is a child of a 34 // <table> element: parsing "<p><table><a>" results in a <p> with two sibling 35 // children; the <a> is reparented to the <table>'s parent. However, calling 36 // Parse on "<a><table><a>" does not return an error, but the result has an <a> 37 // element with an <a> child, and is therefore not 'well-formed'. 38 // 39 // Programmatically constructed trees are typically also 'well-formed', but it 40 // is possible to construct a tree that looks innocuous but, when rendered and 41 // re-parsed, results in a different tree. A simple example is that a solitary 42 // text node would become a tree containing <html>, <head> and <body> elements. 43 // Another example is that the programmatic equivalent of "a<head>b</head>c" 44 // becomes "<html><head><head/><body>abc</body></html>". 45 func Render(w io.Writer, n *Node) error { 46 if x, ok := w.(writer); ok { 47 return render(x, n) 48 } 49 buf := bufio.NewWriter(w) 50 if err := render(buf, n); err != nil { 51 return err 52 } 53 return buf.Flush() 54 } 55 56 // plaintextAbort is returned from render1 when a <plaintext> element 57 // has been rendered. No more end tags should be rendered after that. 58 var plaintextAbort = errors.New("html: internal error (plaintext abort)") 59 60 func render(w writer, n *Node) error { 61 err := render1(w, n) 62 if err == plaintextAbort { 63 err = nil 64 } 65 return err 66 } 67 68 func render1(w writer, n *Node) error { 69 // Render non-element nodes; these are the easy cases. 70 switch n.Type { 71 case ErrorNode: 72 return errors.New("html: cannot render an ErrorNode node") 73 case TextNode: 74 return escape(w, n.Data) 75 case DocumentNode: 76 for c := n.FirstChild; c != nil; c = c.NextSibling { 77 if err := render1(w, c); err != nil { 78 return err 79 } 80 } 81 return nil 82 case ElementNode: 83 // No-op. 84 case CommentNode: 85 if _, err := w.WriteString("<!--"); err != nil { 86 return err 87 } 88 if _, err := w.WriteString(n.Data); err != nil { 89 return err 90 } 91 if _, err := w.WriteString("-->"); err != nil { 92 return err 93 } 94 return nil 95 case DoctypeNode: 96 if _, err := w.WriteString("<!DOCTYPE "); err != nil { 97 return err 98 } 99 if _, err := w.WriteString(n.Data); err != nil { 100 return err 101 } 102 if n.Attr != nil { 103 var p, s string 104 for _, a := range n.Attr { 105 switch a.Key { 106 case "public": 107 p = a.Val 108 case "system": 109 s = a.Val 110 } 111 } 112 if p != "" { 113 if _, err := w.WriteString(" PUBLIC "); err != nil { 114 return err 115 } 116 if err := writeQuoted(w, p); err != nil { 117 return err 118 } 119 if s != "" { 120 if err := w.WriteByte(' '); err != nil { 121 return err 122 } 123 if err := writeQuoted(w, s); err != nil { 124 return err 125 } 126 } 127 } else if s != "" { 128 if _, err := w.WriteString(" SYSTEM "); err != nil { 129 return err 130 } 131 if err := writeQuoted(w, s); err != nil { 132 return err 133 } 134 } 135 } 136 return w.WriteByte('>') 137 default: 138 return errors.New("html: unknown node type") 139 } 140 141 // Render the <xxx> opening tag. 142 if err := w.WriteByte('<'); err != nil { 143 return err 144 } 145 if _, err := w.WriteString(n.Data); err != nil { 146 return err 147 } 148 for _, a := range n.Attr { 149 if err := w.WriteByte(' '); err != nil { 150 return err 151 } 152 if a.Namespace != "" { 153 if _, err := w.WriteString(a.Namespace); err != nil { 154 return err 155 } 156 if err := w.WriteByte(':'); err != nil { 157 return err 158 } 159 } 160 if _, err := w.WriteString(a.Key); err != nil { 161 return err 162 } 163 if _, err := w.WriteString(`="`); err != nil { 164 return err 165 } 166 if err := escape(w, a.Val); err != nil { 167 return err 168 } 169 if err := w.WriteByte('"'); err != nil { 170 return err 171 } 172 } 173 if voidElements[n.Data] { 174 if n.FirstChild != nil { 175 return fmt.Errorf("html: void element <%s> has child nodes", n.Data) 176 } 177 _, err := w.WriteString("/>") 178 return err 179 } 180 if err := w.WriteByte('>'); err != nil { 181 return err 182 } 183 184 // Add initial newline where there is danger of a newline beging ignored. 185 if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") { 186 switch n.Data { 187 case "pre", "listing", "textarea": 188 if err := w.WriteByte('\n'); err != nil { 189 return err 190 } 191 } 192 } 193 194 // Render any child nodes. 195 switch n.Data { 196 case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp": 197 for c := n.FirstChild; c != nil; c = c.NextSibling { 198 if c.Type == TextNode { 199 if _, err := w.WriteString(c.Data); err != nil { 200 return err 201 } 202 } else { 203 if err := render1(w, c); err != nil { 204 return err 205 } 206 } 207 } 208 if n.Data == "plaintext" { 209 // Don't render anything else. <plaintext> must be the 210 // last element in the file, with no closing tag. 211 return plaintextAbort 212 } 213 default: 214 for c := n.FirstChild; c != nil; c = c.NextSibling { 215 if err := render1(w, c); err != nil { 216 return err 217 } 218 } 219 } 220 221 // Render the </xxx> closing tag. 222 if _, err := w.WriteString("</"); err != nil { 223 return err 224 } 225 if _, err := w.WriteString(n.Data); err != nil { 226 return err 227 } 228 return w.WriteByte('>') 229 } 230 231 // writeQuoted writes s to w surrounded by quotes. Normally it will use double 232 // quotes, but if s contains a double quote, it will use single quotes. 233 // It is used for writing the identifiers in a doctype declaration. 234 // In valid HTML, they can't contain both types of quotes. 235 func writeQuoted(w writer, s string) error { 236 var q byte = '"' 237 if strings.Contains(s, `"`) { 238 q = '\'' 239 } 240 if err := w.WriteByte(q); err != nil { 241 return err 242 } 243 if _, err := w.WriteString(s); err != nil { 244 return err 245 } 246 if err := w.WriteByte(q); err != nil { 247 return err 248 } 249 return nil 250 } 251 252 // Section 12.1.2, "Elements", gives this list of void elements. Void elements 253 // are those that can't have any contents. 254 var voidElements = map[string]bool{ 255 "area": true, 256 "base": true, 257 "br": true, 258 "col": true, 259 "command": true, 260 "embed": true, 261 "hr": true, 262 "img": true, 263 "input": true, 264 "keygen": true, 265 "link": true, 266 "meta": true, 267 "param": true, 268 "source": true, 269 "track": true, 270 "wbr": true, 271 }