github.com/brandur/modulir@v0.0.0-20240305213423-94ee82929cbd/modules/mmarkdownext/mmarkdownext.go (about)

     1  // Package mmarkdownext provides an extended version of Markdown that does
     2  // several passes to add additional niceties like adding footnotes and allowing
     3  // Go template helpers to be used..
     4  package mmarkdownext
     5  
     6  import (
     7  	"bytes"
     8  	"fmt"
     9  	"path/filepath"
    10  	"regexp"
    11  	"strings"
    12  	"text/template"
    13  
    14  	"golang.org/x/xerrors"
    15  	"gopkg.in/russross/blackfriday.v2"
    16  
    17  	"github.com/brandur/modulir/modules/mtemplate"
    18  )
    19  
    20  //////////////////////////////////////////////////////////////////////////////
    21  //
    22  //
    23  //
    24  // Public
    25  //
    26  //
    27  //
    28  //////////////////////////////////////////////////////////////////////////////
    29  
    30  // FuncMap is the map of helper functions that will be used when passing the
    31  // Markdown through a Go template step.
    32  var FuncMap = template.FuncMap{}
    33  
    34  // RenderOptions describes a rendering operation to be customized.
    35  type RenderOptions struct {
    36  	// AbsoluteURL is the absolute URL of the final site. If set, the Markdown
    37  	// renderer replaces the sources of any images or links that pointed to
    38  	// relative URLs with absolute URLs.
    39  	AbsoluteURL string
    40  
    41  	// NoFollow adds `rel="nofollow"` to any external links.
    42  	NoFollow bool
    43  
    44  	// NoFootnoteLinks disables linking to and from footnotes.
    45  	NoFootnoteLinks bool
    46  
    47  	// NoHeaderLinks disables automatic permalinks on headers.
    48  	NoHeaderLinks bool
    49  
    50  	// NoRetina disables the Retina.JS rendering attributes.
    51  	NoRetina bool
    52  
    53  	// TemplateData is data injected while rendering Go templates.
    54  	TemplateData interface{}
    55  }
    56  
    57  // Render a Markdown string to HTML while applying all custom project-specific
    58  // filters including footnotes and stable header links.
    59  func Render(s string, options *RenderOptions) (string, error) {
    60  	var err error
    61  	for _, f := range renderStack {
    62  		s, err = f(s, options)
    63  		if err != nil {
    64  			return "", err
    65  		}
    66  	}
    67  	return s, nil
    68  }
    69  
    70  //////////////////////////////////////////////////////////////////////////////
    71  //
    72  //
    73  //
    74  // Private
    75  //
    76  //
    77  //
    78  //////////////////////////////////////////////////////////////////////////////
    79  
    80  // renderStack is the full set of functions that we'll run on an input string
    81  // to get our fully rendered Markdown. This includes the rendering itself, but
    82  // also a number of custom transformation options.
    83  var renderStack = []func(string, *RenderOptions) (string, error){
    84  	//
    85  	// Pre-transformation functions
    86  	//
    87  
    88  	transformGoTemplate,
    89  	transformHeaders,
    90  
    91  	// DEPRECATED: Use Go template helpers instead.
    92  	transformFigures,
    93  
    94  	// The actual Blackfriday rendering
    95  	func(source string, _ *RenderOptions) (string, error) {
    96  		return string(blackfriday.Run([]byte(source))), nil
    97  	},
    98  
    99  	//
   100  	// Post-transformation functions
   101  	//
   102  
   103  	// DEPRECATED: Find a different way to do this.
   104  	transformCodeWithLanguagePrefix,
   105  
   106  	transformFootnotes,
   107  
   108  	// Should come before `transformImagesAndLinksToAbsoluteURLs` so that
   109  	// relative links that are later converted to absolute aren't tagged with
   110  	// `rel="nofollow"`.
   111  	transformLinksToNoFollow,
   112  
   113  	transformImagesAndLinksToAbsoluteURLs,
   114  	transformImagesToRetina,
   115  }
   116  
   117  // Look for any whitespace between HTML tags.
   118  var whitespaceRE = regexp.MustCompile(`>\s+<`)
   119  
   120  // Simply collapses certain HTML snippets by removing newlines and whitespace
   121  // between tags. This is mainline used to make HTML snippets readable as
   122  // constants, but then to make them fit a little more nicely into the rendered
   123  // markup.
   124  func collapseHTML(html string) string {
   125  	html = strings.ReplaceAll(html, "\n", "")
   126  	html = whitespaceRE.ReplaceAllString(html, "><")
   127  	html = strings.TrimSpace(html)
   128  	return html
   129  }
   130  
   131  var codeRE = regexp.MustCompile(`<code class="(\w+)">`)
   132  
   133  func transformCodeWithLanguagePrefix(source string, options *RenderOptions) (string, error) {
   134  	return codeRE.ReplaceAllString(source, `<code class="language-$1">`), nil
   135  }
   136  
   137  const figureHTML = `
   138  <figure>
   139    <p><a href="%s"><img src="%s" class="overflowing"></a></p>
   140    <figcaption>%s</figcaption>
   141  </figure>
   142  `
   143  
   144  var figureRE = regexp.MustCompile(`!fig src="(.*)" caption="(.*)"`)
   145  
   146  func transformFigures(source string, options *RenderOptions) (string, error) {
   147  	return figureRE.ReplaceAllStringFunc(source, func(figure string) string {
   148  		matches := figureRE.FindStringSubmatch(figure)
   149  		src := matches[1]
   150  
   151  		link := src
   152  		extension := filepath.Ext(link)
   153  		if extension != "" && extension != ".svg" {
   154  			link = link[0:len(src)-len(extension)] + "@2x" + extension
   155  		}
   156  
   157  		// This is a really ugly hack in that it relies on the regex above
   158  		// being greedy about quotes, but meh, I'll make it better when there's
   159  		// a good reason to.
   160  		caption := strings.ReplaceAll(matches[2], `\"`, `"`)
   161  
   162  		return fmt.Sprintf(figureHTML, link, src, caption)
   163  	}), nil
   164  }
   165  
   166  // Note that this should come early as we currently rely on a later step to
   167  // give images a retina srcset.
   168  func transformGoTemplate(source string, options *RenderOptions) (string, error) {
   169  	// Skip this step if it doesn't look like there's any Go template code
   170  	// contained in the source. (This may be a premature optimization.)
   171  	if !strings.Contains(source, "{{") {
   172  		return source, nil
   173  	}
   174  
   175  	tmpl, err := template.New("fmarkdownTemp").Funcs(FuncMap).Parse(source)
   176  	if err != nil {
   177  		return "", xerrors.Errorf("error parsing template: %w", err)
   178  	}
   179  
   180  	var templateData interface{}
   181  	if options != nil {
   182  		templateData = options.TemplateData
   183  	}
   184  
   185  	// Run the template to verify the output.
   186  	var b bytes.Buffer
   187  	err = tmpl.Execute(&b, templateData)
   188  	if err != nil {
   189  		return "", xerrors.Errorf("error executing template: %w", err)
   190  	}
   191  
   192  	// fmt.Printf("output in = %v ...\n", b.String())
   193  	return b.String(), nil
   194  }
   195  
   196  const headerHTML = `
   197  <h%v id="%s" class="link">
   198  	<a href="#%s">%s</a>
   199  </h%v>
   200  `
   201  
   202  const headerHTMLNoLink = `
   203  <h%v>%s</h%v>
   204  `
   205  
   206  // Matches one of the following:
   207  //
   208  //	# header
   209  //	# header (#header-id)
   210  //
   211  // For now, only match ## or more so as to remove code comments from
   212  // matches. We need a better way of doing that though.
   213  var headerRE = regexp.MustCompile(`(?m:^(#{2,})\s+(.*?)(\s+\(#(.*)\))?$)`)
   214  
   215  func transformHeaders(source string, options *RenderOptions) (string, error) {
   216  	headerNum := 0
   217  
   218  	// Tracks previously assigned headers so that we can detect duplicates.
   219  	headers := make(map[string]int)
   220  
   221  	source = headerRE.ReplaceAllStringFunc(source, func(header string) string {
   222  		matches := headerRE.FindStringSubmatch(header)
   223  
   224  		level := len(matches[1])
   225  		title := matches[2]
   226  		id := matches[4]
   227  
   228  		var newID string
   229  
   230  		if id == "" {
   231  			// Header with no name, assign a prefixed number.
   232  			newID = fmt.Sprintf("section-%v", headerNum)
   233  		} else {
   234  			occurrence, ok := headers[id]
   235  
   236  			if ok {
   237  				// Give duplicate IDs a suffix.
   238  				newID = fmt.Sprintf("%s-%d", id, occurrence)
   239  				headers[id]++
   240  			} else {
   241  				// Otherwise this is the first such ID we've seen.
   242  				newID = id
   243  				headers[id] = 1
   244  			}
   245  		}
   246  
   247  		headerNum++
   248  
   249  		// Replace the Markdown header with HTML equivalent.
   250  		if options != nil && options.NoHeaderLinks {
   251  			return collapseHTML(fmt.Sprintf(headerHTMLNoLink, level, title, level))
   252  		}
   253  
   254  		return collapseHTML(fmt.Sprintf(headerHTML, level, newID, newID, title, level))
   255  	})
   256  
   257  	return source, nil
   258  }
   259  
   260  // A layer that we wrap the entire footer section in for styling purposes.
   261  const footerWrapper = `
   262  <div class="footnotes">
   263    %s
   264  </div>
   265  `
   266  
   267  // HTML for a footnote within the document.
   268  const footnoteAnchorHTML = `
   269  <sup id="footnote-%s">
   270    <a href="#footnote-%s-source">%s</a>
   271  </sup>
   272  `
   273  
   274  // Same as footnoteAnchorHTML but without a link(this is used when sending
   275  // emails).
   276  const footnoteAnchorHTMLWithoutLink = `<sup><strong>%s</strong></sup>`
   277  
   278  // HTML for a reference to a footnote within the document.
   279  //
   280  // Make sure there's a single space before the <sup> because we're replacing
   281  // one as part of our search.
   282  const footnoteReferenceHTML = `
   283  <sup id="footnote-%s-source">
   284    <a href="#footnote-%s">%s</a>
   285  </sup>
   286  `
   287  
   288  // Same as footnoteReferenceHTML but without a link (this is used when sending
   289  // emails).
   290  //
   291  // Make sure there's a single space before the <sup> because we're replacing
   292  // one as part of our search.
   293  const footnoteReferenceHTMLWithoutLink = `<sup><strong>%s</strong></sup>`
   294  
   295  // Look for the section the section at the bottom of the page that looks like
   296  // <p>[1] (the paragraph tag is there because Markdown will have already
   297  // wrapped it by this point).
   298  var footerRE = regexp.MustCompile(`(?ms:^<p>\[\d+\].*)`)
   299  
   300  // Look for a single footnote within the footer.
   301  var footnoteRE = regexp.MustCompile(`\[(\d+)\](\s+.*)`)
   302  
   303  // Note that this must be a post-transform filter. If it wasn't, our Markdown
   304  // renderer would not render the Markdown inside the footnotes layer because it
   305  // would already be wrapped in HTML.
   306  func transformFootnotes(source string, options *RenderOptions) (string, error) {
   307  	footer := footerRE.FindString(source)
   308  
   309  	if footer != "" {
   310  		// remove the footer for now
   311  		source = strings.Replace(source, footer, "", 1)
   312  
   313  		footer = footnoteRE.ReplaceAllStringFunc(footer, func(footnote string) string {
   314  			// first create a footnote with an anchor that links can target
   315  			matches := footnoteRE.FindStringSubmatch(footnote)
   316  			number := matches[1]
   317  
   318  			var anchor string
   319  			if options != nil && options.NoFootnoteLinks {
   320  				anchor = fmt.Sprintf(footnoteAnchorHTMLWithoutLink, number) + matches[2]
   321  			} else {
   322  				anchor = fmt.Sprintf(footnoteAnchorHTML, number, number, number) + matches[2]
   323  			}
   324  
   325  			// Then replace all references in the body to this footnote.
   326  			//
   327  			// Note the leading space before ` [%s]`. This is a little hacky,
   328  			// but is there to try and ensure that we don't try to replace
   329  			// strings that look like footnote references, but aren't.
   330  			// `KEYS[1]` from `/redis-cluster` is an example of one of these
   331  			// strings that might be a false positive.
   332  			var reference string
   333  			if options != nil && options.NoFootnoteLinks {
   334  				reference = fmt.Sprintf(footnoteReferenceHTMLWithoutLink, number)
   335  			} else {
   336  				reference = fmt.Sprintf(footnoteReferenceHTML, number, number, number)
   337  			}
   338  			source = strings.ReplaceAll(source,
   339  				fmt.Sprintf(` [%s]`, number),
   340  				" "+collapseHTML(reference))
   341  
   342  			return collapseHTML(anchor)
   343  		})
   344  
   345  		// and wrap the whole footer section in a layer for styling
   346  		footer = fmt.Sprintf(footerWrapper, footer)
   347  		source += footer
   348  	}
   349  
   350  	return source, nil
   351  }
   352  
   353  var imageRE = regexp.MustCompile(`<img src="([^"]+)"([^>]*)`)
   354  
   355  func transformImagesToRetina(source string, options *RenderOptions) (string, error) {
   356  	if options != nil && options.NoRetina {
   357  		return source, nil
   358  	}
   359  
   360  	// The basic idea here is that we give every image a `srcset` that includes
   361  	// 2x so that browsers will replace it with a retina version.
   362  	return imageRE.ReplaceAllStringFunc(source, func(img string) string {
   363  		matches := imageRE.FindStringSubmatch(img)
   364  
   365  		// SVGs are resolution-agnostic and don't need replacing.
   366  		if filepath.Ext(matches[1]) == ".svg" {
   367  			return fmt.Sprintf(`<img src="%s"%s`, matches[1], matches[2])
   368  		}
   369  
   370  		// If the image already has a srcset, do nothing.
   371  		if strings.Contains(matches[2], "srcset") {
   372  			return fmt.Sprintf(`<img src="%s"%s`, matches[1], matches[2])
   373  		}
   374  
   375  		return fmt.Sprintf(`<img src="%s" srcset="%s 2x, %s 1x"%s`,
   376  			matches[1],
   377  			mtemplate.To2X(matches[1]),
   378  			matches[1],
   379  			matches[2],
   380  		)
   381  	}), nil
   382  }
   383  
   384  var relativeImageRE = regexp.MustCompile(`<img src="/`)
   385  
   386  var relativeLinkRE = regexp.MustCompile(`<a href="/`)
   387  
   388  func transformImagesAndLinksToAbsoluteURLs(source string, options *RenderOptions) (string, error) {
   389  	if options == nil || options.AbsoluteURL == "" {
   390  		return source, nil
   391  	}
   392  
   393  	source = relativeImageRE.ReplaceAllStringFunc(source, func(img string) string {
   394  		return `<img src="` + options.AbsoluteURL + `/`
   395  	})
   396  
   397  	source = relativeLinkRE.ReplaceAllStringFunc(source, func(img string) string {
   398  		return `<a href="` + options.AbsoluteURL + `/`
   399  	})
   400  
   401  	return source, nil
   402  }
   403  
   404  var absoluteLinkRE = regexp.MustCompile(`<a href="http[^"]+"`)
   405  
   406  func transformLinksToNoFollow(source string, options *RenderOptions) (string, error) {
   407  	if options == nil || !options.NoFollow {
   408  		return source, nil
   409  	}
   410  
   411  	return absoluteLinkRE.ReplaceAllStringFunc(source, func(link string) string {
   412  		return fmt.Sprintf(`%s rel="nofollow"`, link)
   413  	}), nil
   414  }