src.elv.sh@v0.21.0-dev.0.20240515223629-06979efb9a2a/pkg/md/html.go (about)

     1  package md
     2  
     3  import (
     4  	"fmt"
     5  	"strconv"
     6  	"strings"
     7  )
     8  
     9  var (
    10  	escapeHTML = strings.NewReplacer(
    11  		"&", "&amp;", `"`, "&quot;", "<", "&lt;", ">", "&gt;",
    12  		// No need to escape single quotes, since attributes in the output
    13  		// always use double quotes.
    14  	).Replace
    15  	// Modern browsers will happily accept almost anything in a URL attribute,
    16  	// except for the quote used by the attribute and space. But we try to be
    17  	// conservative and escape some characters, mostly following
    18  	// https://url.spec.whatwg.org/#url-code-points.
    19  	//
    20  	// We don't bother escaping control characters as they are unlikely to
    21  	// appear in Markdown text.
    22  	escapeURL = strings.NewReplacer(
    23  		`"`, "%22", `\`, "%5C", " ", "%20", "`", "%60",
    24  		"[", "%5B", "]", "%5D", "<", "%3C", ">", "%3E").Replace
    25  )
    26  
    27  // HTMLCodec converts markdown to HTML.
    28  type HTMLCodec struct {
    29  	strings.Builder
    30  	// If non-nil, will be called for each code block. The return value is
    31  	// inserted into the HTML output and should be properly escaped.
    32  	ConvertCodeBlock func(info, code string) string
    33  }
    34  
    35  var tags = []string{
    36  	OpThematicBreak: "<hr />\n",
    37  
    38  	OpBlockquoteStart: "<blockquote>\n", OpBlockquoteEnd: "</blockquote>\n",
    39  	OpListItemStart: "<li>\n", OpListItemEnd: "</li>\n",
    40  	OpBulletListStart: "<ul>\n", OpBulletListEnd: "</ul>\n",
    41  	OpOrderedListEnd: "</ol>\n",
    42  }
    43  
    44  func (c *HTMLCodec) Do(op Op) {
    45  	switch op.Type {
    46  	case OpHeading:
    47  		var attrs attrBuilder
    48  		if op.Info != "" {
    49  			// Only support #id since that's the only thing used in Elvish's
    50  			// Markdown right now. More can be added if needed.
    51  			if op.Info[0] == '#' {
    52  				attrs.set("id", op.Info[1:])
    53  			}
    54  		}
    55  		fmt.Fprintf(c, "<h%d%s>", op.Number, &attrs)
    56  		RenderInlineContentToHTML(&c.Builder, op.Content)
    57  		fmt.Fprintf(c, "</h%d>\n", op.Number)
    58  	case OpCodeBlock:
    59  		var attrs attrBuilder
    60  		language := ""
    61  		if op.Info != "" {
    62  			language, _, _ = strings.Cut(op.Info, " ")
    63  			attrs.set("class", "language-"+language)
    64  		}
    65  		fmt.Fprintf(c, "<pre><code%s>", &attrs)
    66  		if c.ConvertCodeBlock != nil {
    67  			c.WriteString(c.ConvertCodeBlock(op.Info, strings.Join(op.Lines, "\n")+"\n"))
    68  		} else {
    69  			for _, line := range op.Lines {
    70  				c.WriteString(escapeHTML(line))
    71  				c.WriteByte('\n')
    72  			}
    73  		}
    74  		c.WriteString("</code></pre>\n")
    75  	case OpHTMLBlock:
    76  		for _, line := range op.Lines {
    77  			c.WriteString(line)
    78  			c.WriteByte('\n')
    79  		}
    80  	case OpParagraph:
    81  		c.WriteString("<p>")
    82  		RenderInlineContentToHTML(&c.Builder, op.Content)
    83  		c.WriteString("</p>\n")
    84  	case OpOrderedListStart:
    85  		var attrs attrBuilder
    86  		if op.Number != 1 {
    87  			attrs.set("start", strconv.Itoa(op.Number))
    88  		}
    89  		fmt.Fprintf(c, "<ol%s>\n", &attrs)
    90  	default:
    91  		c.WriteString(tags[op.Type])
    92  	}
    93  }
    94  
    95  var inlineTags = []string{
    96  	OpNewLine:       "\n",
    97  	OpEmphasisStart: "<em>", OpEmphasisEnd: "</em>",
    98  	OpStrongEmphasisStart: "<strong>", OpStrongEmphasisEnd: "</strong>",
    99  	OpLinkEnd:       "</a>",
   100  	OpHardLineBreak: "<br />",
   101  }
   102  
   103  // RenderInlineContentToHTML renders inline content to HTML, writing to a
   104  // [strings.Builder]. This is useful for implementing an alternative
   105  // HTML-outputting [Codec].
   106  func RenderInlineContentToHTML(sb *strings.Builder, ops []InlineOp) {
   107  	for _, op := range ops {
   108  		doInline(sb, op)
   109  	}
   110  }
   111  
   112  func doInline(sb *strings.Builder, op InlineOp) {
   113  	switch op.Type {
   114  	case OpText:
   115  		sb.WriteString(escapeHTML(op.Text))
   116  	case OpCodeSpan:
   117  		sb.WriteString("<code>")
   118  		sb.WriteString(escapeHTML(op.Text))
   119  		sb.WriteString("</code>")
   120  	case OpRawHTML:
   121  		sb.WriteString(op.Text)
   122  	case OpLinkStart:
   123  		var attrs attrBuilder
   124  		attrs.set("href", escapeURL(op.Dest))
   125  		if op.Text != "" {
   126  			attrs.set("title", op.Text)
   127  		}
   128  		fmt.Fprintf(sb, "<a%s>", &attrs)
   129  	case OpImage:
   130  		var attrs attrBuilder
   131  		attrs.set("src", escapeURL(op.Dest))
   132  		attrs.set("alt", op.Alt)
   133  		if op.Text != "" {
   134  			attrs.set("title", op.Text)
   135  		}
   136  		fmt.Fprintf(sb, "<img%s />", &attrs)
   137  	case OpAutolink:
   138  		var attrs attrBuilder
   139  		attrs.set("href", escapeURL(op.Dest))
   140  		fmt.Fprintf(sb, "<a%s>%s</a>", &attrs, escapeHTML(op.Text))
   141  	default:
   142  		sb.WriteString(inlineTags[op.Type])
   143  	}
   144  }
   145  
   146  type attrBuilder struct{ strings.Builder }
   147  
   148  func (a *attrBuilder) set(k, v string) { fmt.Fprintf(a, ` %s="%s"`, k, escapeHTML(v)) }