github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/docgen/extract/xhtml.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package extract
    12  
    13  import (
    14  	"bytes"
    15  	"io"
    16  	"strings"
    17  
    18  	"github.com/PuerkitoBio/goquery"
    19  	"golang.org/x/net/html"
    20  )
    21  
    22  // XHTMLtoHTML converts the XHTML railroad diagrams to HTML.
    23  func XHTMLtoHTML(r io.Reader) (string, error) {
    24  	b := new(bytes.Buffer)
    25  	z := html.NewTokenizer(r)
    26  	for {
    27  		tt := z.Next()
    28  		if tt == html.ErrorToken {
    29  			err := z.Err()
    30  			if err == io.EOF {
    31  				break
    32  			}
    33  			return "", z.Err()
    34  		}
    35  		t := z.Token()
    36  		switch t.Type {
    37  		case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken:
    38  			idx := strings.IndexByte(t.Data, ':')
    39  			t.Data = t.Data[idx+1:]
    40  		}
    41  		var na []html.Attribute
    42  		for _, a := range t.Attr {
    43  			if strings.HasPrefix(a.Key, "xmlns") {
    44  				continue
    45  			}
    46  			na = append(na, a)
    47  		}
    48  		t.Attr = na
    49  		b.WriteString(t.String())
    50  	}
    51  
    52  	doc, err := goquery.NewDocumentFromReader(b)
    53  	if err != nil {
    54  		return "", err
    55  	}
    56  	defs := doc.Find("defs")
    57  	dhtml, err := defs.First().Html()
    58  	if err != nil {
    59  		return "", err
    60  	}
    61  	doc.Find("head").AppendHtml(dhtml)
    62  	defs.Remove()
    63  	doc.Find("svg").First().Remove()
    64  	doc.Find("meta[http-equiv]").Remove()
    65  	doc.Find("head").PrependHtml(`<meta charset="UTF-8">`)
    66  	doc.Find("a[name]:not([href])").Each(func(_ int, s *goquery.Selection) {
    67  		name, exists := s.Attr("name")
    68  		if !exists {
    69  			return
    70  		}
    71  		s.SetAttr("href", "#"+name)
    72  	})
    73  	s, err := doc.Find("html").Html()
    74  	s = "<!DOCTYPE html><html>" + s + "</html>"
    75  	return s, err
    76  }
    77  
    78  // Tag returns the tag contents of r.
    79  func Tag(r io.Reader, tag string) (string, error) {
    80  	doc, err := goquery.NewDocumentFromReader(r)
    81  	if err != nil {
    82  		return "", err
    83  	}
    84  	node := doc.Find(tag).Get(0)
    85  	var b bytes.Buffer
    86  	if err := html.Render(&b, node); err != nil {
    87  		return "", err
    88  	}
    89  	return b.String(), nil
    90  }
    91  
    92  // InnerTag returns the inner contents of <tag> from r.
    93  func InnerTag(r io.Reader, tag string) (string, error) {
    94  	doc, err := goquery.NewDocumentFromReader(r)
    95  	if err != nil {
    96  		return "", err
    97  	}
    98  	return doc.Find(tag).Html()
    99  }