github.com/Andyfoo/golang/x/net@v0.0.0-20190901054642-57c1bf301704/html/render.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package html
     6  
     7  import (
     8  	"bufio"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"strings"
    13  )
    14  
    15  type writer interface {
    16  	io.Writer
    17  	io.ByteWriter
    18  	WriteString(string) (int, error)
    19  }
    20  
    21  // Render renders the parse tree n to the given writer.
    22  //
    23  // Rendering is done on a 'best effort' basis: calling Parse on the output of
    24  // Render will always result in something similar to the original tree, but it
    25  // is not necessarily an exact clone unless the original tree was 'well-formed'.
    26  // 'Well-formed' is not easily specified; the HTML5 specification is
    27  // complicated.
    28  //
    29  // Calling Parse on arbitrary input typically results in a 'well-formed' parse
    30  // tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
    31  // For example, in a 'well-formed' parse tree, no <a> element is a child of
    32  // another <a> element: parsing "<a><a>" results in two sibling elements.
    33  // Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
    34  // <table> element: parsing "<p><table><a>" results in a <p> with two sibling
    35  // children; the <a> is reparented to the <table>'s parent. However, calling
    36  // Parse on "<a><table><a>" does not return an error, but the result has an <a>
    37  // element with an <a> child, and is therefore not 'well-formed'.
    38  //
    39  // Programmatically constructed trees are typically also 'well-formed', but it
    40  // is possible to construct a tree that looks innocuous but, when rendered and
    41  // re-parsed, results in a different tree. A simple example is that a solitary
    42  // text node would become a tree containing <html>, <head> and <body> elements.
    43  // Another example is that the programmatic equivalent of "a<head>b</head>c"
    44  // becomes "<html><head><head/><body>abc</body></html>".
    45  func Render(w io.Writer, n *Node) error {
    46  	if x, ok := w.(writer); ok {
    47  		return render(x, n)
    48  	}
    49  	buf := bufio.NewWriter(w)
    50  	if err := render(buf, n); err != nil {
    51  		return err
    52  	}
    53  	return buf.Flush()
    54  }
    55  
    56  // plaintextAbort is returned from render1 when a <plaintext> element
    57  // has been rendered. No more end tags should be rendered after that.
    58  var plaintextAbort = errors.New("html: internal error (plaintext abort)")
    59  
    60  func render(w writer, n *Node) error {
    61  	err := render1(w, n)
    62  	if err == plaintextAbort {
    63  		err = nil
    64  	}
    65  	return err
    66  }
    67  
    68  func render1(w writer, n *Node) error {
    69  	// Render non-element nodes; these are the easy cases.
    70  	switch n.Type {
    71  	case ErrorNode:
    72  		return errors.New("html: cannot render an ErrorNode node")
    73  	case TextNode:
    74  		return escape(w, n.Data)
    75  	case DocumentNode:
    76  		for c := n.FirstChild; c != nil; c = c.NextSibling {
    77  			if err := render1(w, c); err != nil {
    78  				return err
    79  			}
    80  		}
    81  		return nil
    82  	case ElementNode:
    83  		// No-op.
    84  	case CommentNode:
    85  		if _, err := w.WriteString("<!--"); err != nil {
    86  			return err
    87  		}
    88  		if _, err := w.WriteString(n.Data); err != nil {
    89  			return err
    90  		}
    91  		if _, err := w.WriteString("-->"); err != nil {
    92  			return err
    93  		}
    94  		return nil
    95  	case DoctypeNode:
    96  		if _, err := w.WriteString("<!DOCTYPE "); err != nil {
    97  			return err
    98  		}
    99  		if _, err := w.WriteString(n.Data); err != nil {
   100  			return err
   101  		}
   102  		if n.Attr != nil {
   103  			var p, s string
   104  			for _, a := range n.Attr {
   105  				switch a.Key {
   106  				case "public":
   107  					p = a.Val
   108  				case "system":
   109  					s = a.Val
   110  				}
   111  			}
   112  			if p != "" {
   113  				if _, err := w.WriteString(" PUBLIC "); err != nil {
   114  					return err
   115  				}
   116  				if err := writeQuoted(w, p); err != nil {
   117  					return err
   118  				}
   119  				if s != "" {
   120  					if err := w.WriteByte(' '); err != nil {
   121  						return err
   122  					}
   123  					if err := writeQuoted(w, s); err != nil {
   124  						return err
   125  					}
   126  				}
   127  			} else if s != "" {
   128  				if _, err := w.WriteString(" SYSTEM "); err != nil {
   129  					return err
   130  				}
   131  				if err := writeQuoted(w, s); err != nil {
   132  					return err
   133  				}
   134  			}
   135  		}
   136  		return w.WriteByte('>')
   137  	default:
   138  		return errors.New("html: unknown node type")
   139  	}
   140  
   141  	// Render the <xxx> opening tag.
   142  	if err := w.WriteByte('<'); err != nil {
   143  		return err
   144  	}
   145  	if _, err := w.WriteString(n.Data); err != nil {
   146  		return err
   147  	}
   148  	for _, a := range n.Attr {
   149  		if err := w.WriteByte(' '); err != nil {
   150  			return err
   151  		}
   152  		if a.Namespace != "" {
   153  			if _, err := w.WriteString(a.Namespace); err != nil {
   154  				return err
   155  			}
   156  			if err := w.WriteByte(':'); err != nil {
   157  				return err
   158  			}
   159  		}
   160  		if _, err := w.WriteString(a.Key); err != nil {
   161  			return err
   162  		}
   163  		if _, err := w.WriteString(`="`); err != nil {
   164  			return err
   165  		}
   166  		if err := escape(w, a.Val); err != nil {
   167  			return err
   168  		}
   169  		if err := w.WriteByte('"'); err != nil {
   170  			return err
   171  		}
   172  	}
   173  	if voidElements[n.Data] {
   174  		if n.FirstChild != nil {
   175  			return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
   176  		}
   177  		_, err := w.WriteString("/>")
   178  		return err
   179  	}
   180  	if err := w.WriteByte('>'); err != nil {
   181  		return err
   182  	}
   183  
   184  	// Add initial newline where there is danger of a newline beging ignored.
   185  	if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
   186  		switch n.Data {
   187  		case "pre", "listing", "textarea":
   188  			if err := w.WriteByte('\n'); err != nil {
   189  				return err
   190  			}
   191  		}
   192  	}
   193  
   194  	// Render any child nodes.
   195  	switch n.Data {
   196  	case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
   197  		for c := n.FirstChild; c != nil; c = c.NextSibling {
   198  			if c.Type == TextNode {
   199  				if _, err := w.WriteString(c.Data); err != nil {
   200  					return err
   201  				}
   202  			} else {
   203  				if err := render1(w, c); err != nil {
   204  					return err
   205  				}
   206  			}
   207  		}
   208  		if n.Data == "plaintext" {
   209  			// Don't render anything else. <plaintext> must be the
   210  			// last element in the file, with no closing tag.
   211  			return plaintextAbort
   212  		}
   213  	default:
   214  		for c := n.FirstChild; c != nil; c = c.NextSibling {
   215  			if err := render1(w, c); err != nil {
   216  				return err
   217  			}
   218  		}
   219  	}
   220  
   221  	// Render the </xxx> closing tag.
   222  	if _, err := w.WriteString("</"); err != nil {
   223  		return err
   224  	}
   225  	if _, err := w.WriteString(n.Data); err != nil {
   226  		return err
   227  	}
   228  	return w.WriteByte('>')
   229  }
   230  
   231  // writeQuoted writes s to w surrounded by quotes. Normally it will use double
   232  // quotes, but if s contains a double quote, it will use single quotes.
   233  // It is used for writing the identifiers in a doctype declaration.
   234  // In valid HTML, they can't contain both types of quotes.
   235  func writeQuoted(w writer, s string) error {
   236  	var q byte = '"'
   237  	if strings.Contains(s, `"`) {
   238  		q = '\''
   239  	}
   240  	if err := w.WriteByte(q); err != nil {
   241  		return err
   242  	}
   243  	if _, err := w.WriteString(s); err != nil {
   244  		return err
   245  	}
   246  	if err := w.WriteByte(q); err != nil {
   247  		return err
   248  	}
   249  	return nil
   250  }
   251  
   252  // Section 12.1.2, "Elements", gives this list of void elements. Void elements
   253  // are those that can't have any contents.
   254  var voidElements = map[string]bool{
   255  	"area":    true,
   256  	"base":    true,
   257  	"br":      true,
   258  	"col":     true,
   259  	"command": true,
   260  	"embed":   true,
   261  	"hr":      true,
   262  	"img":     true,
   263  	"input":   true,
   264  	"keygen":  true,
   265  	"link":    true,
   266  	"meta":    true,
   267  	"param":   true,
   268  	"source":  true,
   269  	"track":   true,
   270  	"wbr":     true,
   271  }