github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/docgen/extract/xhtml.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package extract 12 13 import ( 14 "bytes" 15 "io" 16 "strings" 17 18 "github.com/PuerkitoBio/goquery" 19 "golang.org/x/net/html" 20 ) 21 22 // XHTMLtoHTML converts the XHTML railroad diagrams to HTML. 23 func XHTMLtoHTML(r io.Reader) (string, error) { 24 b := new(bytes.Buffer) 25 z := html.NewTokenizer(r) 26 for { 27 tt := z.Next() 28 if tt == html.ErrorToken { 29 err := z.Err() 30 if err == io.EOF { 31 break 32 } 33 return "", z.Err() 34 } 35 t := z.Token() 36 switch t.Type { 37 case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken: 38 idx := strings.IndexByte(t.Data, ':') 39 t.Data = t.Data[idx+1:] 40 } 41 var na []html.Attribute 42 for _, a := range t.Attr { 43 if strings.HasPrefix(a.Key, "xmlns") { 44 continue 45 } 46 na = append(na, a) 47 } 48 t.Attr = na 49 b.WriteString(t.String()) 50 } 51 52 doc, err := goquery.NewDocumentFromReader(b) 53 if err != nil { 54 return "", err 55 } 56 defs := doc.Find("defs") 57 dhtml, err := defs.First().Html() 58 if err != nil { 59 return "", err 60 } 61 doc.Find("head").AppendHtml(dhtml) 62 defs.Remove() 63 doc.Find("svg").First().Remove() 64 doc.Find("meta[http-equiv]").Remove() 65 doc.Find("head").PrependHtml(`<meta charset="UTF-8">`) 66 doc.Find("a[name]:not([href])").Each(func(_ int, s *goquery.Selection) { 67 name, exists := s.Attr("name") 68 if !exists { 69 return 70 } 71 s.SetAttr("href", "#"+name) 72 }) 73 s, err := doc.Find("html").Html() 74 s = "<!DOCTYPE html><html>" + s + "</html>" 75 return s, err 76 } 77 78 // Tag returns the tag contents of r. 79 func Tag(r io.Reader, tag string) (string, error) { 80 doc, err := goquery.NewDocumentFromReader(r) 81 if err != nil { 82 return "", err 83 } 84 node := doc.Find(tag).Get(0) 85 var b bytes.Buffer 86 if err := html.Render(&b, node); err != nil { 87 return "", err 88 } 89 return b.String(), nil 90 } 91 92 // InnerTag returns the inner contents of <tag> from r. 93 func InnerTag(r io.Reader, tag string) (string, error) { 94 doc, err := goquery.NewDocumentFromReader(r) 95 if err != nil { 96 return "", err 97 } 98 return doc.Find(tag).Html() 99 }