github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/markup/html.go (about)

     1  // Copyright 2023 The GitBundle Inc. All rights reserved.
     2  // Copyright 2017 The Gitea Authors. All rights reserved.
     3  // Use of this source code is governed by a MIT-style
     4  // license that can be found in the LICENSE file.
     5  
     6  package markup
     7  
     8  import (
     9  	"bytes"
    10  	"io"
    11  	"net/url"
    12  	"path"
    13  	"path/filepath"
    14  	"regexp"
    15  	"strings"
    16  	"sync"
    17  
    18  	"github.com/gitbundle/modules/base"
    19  	"github.com/gitbundle/modules/emoji"
    20  	"github.com/gitbundle/modules/git"
    21  	"github.com/gitbundle/modules/log"
    22  	"github.com/gitbundle/modules/markup/common"
    23  	"github.com/gitbundle/modules/references"
    24  	"github.com/gitbundle/modules/regexplru"
    25  	"github.com/gitbundle/modules/setting"
    26  	vars "github.com/gitbundle/modules/template_vars"
    27  	"github.com/gitbundle/modules/util"
    28  
    29  	"golang.org/x/net/html"
    30  	"golang.org/x/net/html/atom"
    31  	"mvdan.cc/xurls/v2"
    32  )
    33  
    34  // Issue name styles
    35  const (
    36  	IssueNameStyleNumeric      = "numeric"
    37  	IssueNameStyleAlphanumeric = "alphanumeric"
    38  	IssueNameStyleRegexp       = "regexp"
    39  )
    40  
    41  var (
    42  	// NOTE: All below regex matching do not perform any extra validation.
    43  	// Thus a link is produced even if the linked entity does not exist.
    44  	// While fast, this is also incorrect and lead to false positives.
    45  	// TODO: fix invalid linking issue
    46  
    47  	// valid chars in encoded path and parameter: [-+~_%.a-zA-Z0-9/]
    48  
    49  	// sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
    50  	// Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length
    51  	// so that abbreviated hash links can be used as well. This matches git and GitHub usability.
    52  	sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-f]{7,40})(?:\s|$|\)|\]|[.,](\s|$))`)
    53  
    54  	// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
    55  	shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
    56  
    57  	// anySHA1Pattern splits url containing SHA into parts
    58  	anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`)
    59  
    60  	// comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
    61  	comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,40})(\.\.\.?)([0-9a-f]{7,40})?(#[-+~_%.a-zA-Z0-9]+)?`)
    62  
    63  	validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
    64  
    65  	// While this email regex is definitely not perfect and I'm sure you can come up
    66  	// with edge cases, it is still accepted by the CommonMark specification, as
    67  	// well as the HTML5 spec:
    68  	//   http://spec.commonmark.org/0.28/#email-address
    69  	//   https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
    70  	emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|\\.(\\s|$))")
    71  
    72  	// blackfriday extensions create IDs like fn:user-content-footnote
    73  	blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`)
    74  
    75  	// EmojiShortCodeRegex find emoji by alias like :smile:
    76  	EmojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)
    77  )
    78  
    79  // CSS class for action keywords (e.g. "closes: #1")
    80  const keywordClass = "issue-keyword"
    81  
    82  // IsLink reports whether link fits valid format.
    83  func IsLink(link []byte) bool {
    84  	return isLink(link)
    85  }
    86  
    87  // isLink reports whether link fits valid format.
    88  func isLink(link []byte) bool {
    89  	return validLinksPattern.Match(link)
    90  }
    91  
    92  func isLinkStr(link string) bool {
    93  	return validLinksPattern.MatchString(link)
    94  }
    95  
    96  // regexp for full links to issues/pulls
    97  var issueFullPattern *regexp.Regexp
    98  
    99  // Once for to prevent races
   100  var issueFullPatternOnce sync.Once
   101  
   102  func getIssueFullPattern() *regexp.Regexp {
   103  	issueFullPatternOnce.Do(func() {
   104  		issueFullPattern = regexp.MustCompile(regexp.QuoteMeta(setting.AppURL) +
   105  			`[\w_.-]+/[\w_.-]+/(?:issues|pulls)/((?:\w{1,10}-)?[1-9][0-9]*)([\?|#](\S+)?)?\b`)
   106  	})
   107  	return issueFullPattern
   108  }
   109  
   110  // CustomLinkURLSchemes allows for additional schemes to be detected when parsing links within text
   111  func CustomLinkURLSchemes(schemes []string) {
   112  	schemes = append(schemes, "http", "https")
   113  	withAuth := make([]string, 0, len(schemes))
   114  	validScheme := regexp.MustCompile(`^[a-z]+$`)
   115  	for _, s := range schemes {
   116  		if !validScheme.MatchString(s) {
   117  			continue
   118  		}
   119  		without := false
   120  		for _, sna := range xurls.SchemesNoAuthority {
   121  			if s == sna {
   122  				without = true
   123  				break
   124  			}
   125  		}
   126  		if without {
   127  			s += ":"
   128  		} else {
   129  			s += "://"
   130  		}
   131  		withAuth = append(withAuth, s)
   132  	}
   133  	common.LinkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|"))
   134  }
   135  
   136  // IsSameDomain checks if given url string has the same hostname as current GitBundle instance
   137  func IsSameDomain(s string) bool {
   138  	if strings.HasPrefix(s, "/") {
   139  		return true
   140  	}
   141  	if uapp, err := url.Parse(setting.AppURL); err == nil {
   142  		if u, err := url.Parse(s); err == nil {
   143  			return u.Host == uapp.Host
   144  		}
   145  		return false
   146  	}
   147  	return false
   148  }
   149  
   150  type postProcessError struct {
   151  	context string
   152  	err     error
   153  }
   154  
   155  func (p *postProcessError) Error() string {
   156  	return "PostProcess: " + p.context + ", " + p.err.Error()
   157  }
   158  
   159  type processor func(ctx *RenderContext, node *html.Node)
   160  
   161  var defaultProcessors = []processor{
   162  	fullIssuePatternProcessor,
   163  	comparePatternProcessor,
   164  	fullSha1PatternProcessor,
   165  	shortLinkProcessor,
   166  	linkProcessor,
   167  	mentionProcessor,
   168  	issueIndexPatternProcessor,
   169  	sha1CurrentPatternProcessor,
   170  	emailAddressProcessor,
   171  	emojiProcessor,
   172  	emojiShortCodeProcessor,
   173  }
   174  
   175  // PostProcess does the final required transformations to the passed raw HTML
   176  // data, and ensures its validity. Transformations include: replacing links and
   177  // emails with HTML links, parsing shortlinks in the format of [[Link]], like
   178  // MediaWiki, linking issues in the format #ID, and mentions in the format
   179  // @user, and others.
   180  func PostProcess(
   181  	ctx *RenderContext,
   182  	input io.Reader,
   183  	output io.Writer,
   184  ) error {
   185  	return postProcess(ctx, defaultProcessors, input, output)
   186  }
   187  
   188  var commitMessageProcessors = []processor{
   189  	fullIssuePatternProcessor,
   190  	comparePatternProcessor,
   191  	fullSha1PatternProcessor,
   192  	linkProcessor,
   193  	mentionProcessor,
   194  	issueIndexPatternProcessor,
   195  	sha1CurrentPatternProcessor,
   196  	emailAddressProcessor,
   197  	emojiProcessor,
   198  	emojiShortCodeProcessor,
   199  }
   200  
   201  // RenderCommitMessage will use the same logic as PostProcess, but will disable
   202  // the shortLinkProcessor and will add a defaultLinkProcessor if defaultLink is
   203  // set, which changes every text node into a link to the passed default link.
   204  func RenderCommitMessage(
   205  	ctx *RenderContext,
   206  	content string,
   207  ) (string, error) {
   208  	procs := commitMessageProcessors
   209  	if ctx.DefaultLink != "" {
   210  		// we don't have to fear data races, because being
   211  		// commitMessageProcessors of fixed len and cap, every time we append
   212  		// something to it the slice is realloc+copied, so append always
   213  		// generates the slice ex-novo.
   214  		procs = append(procs, genDefaultLinkProcessor(ctx.DefaultLink))
   215  	}
   216  	return renderProcessString(ctx, procs, content)
   217  }
   218  
   219  var commitMessageSubjectProcessors = []processor{
   220  	fullIssuePatternProcessor,
   221  	comparePatternProcessor,
   222  	fullSha1PatternProcessor,
   223  	linkProcessor,
   224  	mentionProcessor,
   225  	issueIndexPatternProcessor,
   226  	sha1CurrentPatternProcessor,
   227  	emojiShortCodeProcessor,
   228  	emojiProcessor,
   229  }
   230  
   231  var emojiProcessors = []processor{
   232  	emojiShortCodeProcessor,
   233  	emojiProcessor,
   234  }
   235  
   236  // RenderCommitMessageSubject will use the same logic as PostProcess and
   237  // RenderCommitMessage, but will disable the shortLinkProcessor and
   238  // emailAddressProcessor, will add a defaultLinkProcessor if defaultLink is set,
   239  // which changes every text node into a link to the passed default link.
   240  func RenderCommitMessageSubject(
   241  	ctx *RenderContext,
   242  	content string,
   243  ) (string, error) {
   244  	procs := commitMessageSubjectProcessors
   245  	if ctx.DefaultLink != "" {
   246  		// we don't have to fear data races, because being
   247  		// commitMessageSubjectProcessors of fixed len and cap, every time we
   248  		// append something to it the slice is realloc+copied, so append always
   249  		// generates the slice ex-novo.
   250  		procs = append(procs, genDefaultLinkProcessor(ctx.DefaultLink))
   251  	}
   252  	return renderProcessString(ctx, procs, content)
   253  }
   254  
   255  // RenderIssueTitle to process title on individual issue/pull page
   256  func RenderIssueTitle(
   257  	ctx *RenderContext,
   258  	title string,
   259  ) (string, error) {
   260  	return renderProcessString(ctx, []processor{
   261  		issueIndexPatternProcessor,
   262  		sha1CurrentPatternProcessor,
   263  		emojiShortCodeProcessor,
   264  		emojiProcessor,
   265  	}, title)
   266  }
   267  
   268  func renderProcessString(ctx *RenderContext, procs []processor, content string) (string, error) {
   269  	var buf strings.Builder
   270  	if err := postProcess(ctx, procs, strings.NewReader(content), &buf); err != nil {
   271  		return "", err
   272  	}
   273  	return buf.String(), nil
   274  }
   275  
   276  // RenderDescriptionHTML will use similar logic as PostProcess, but will
   277  // use a single special linkProcessor.
   278  func RenderDescriptionHTML(
   279  	ctx *RenderContext,
   280  	content string,
   281  ) (string, error) {
   282  	return renderProcessString(ctx, []processor{
   283  		descriptionLinkProcessor,
   284  		emojiShortCodeProcessor,
   285  		emojiProcessor,
   286  	}, content)
   287  }
   288  
   289  // RenderEmoji for when we want to just process emoji and shortcodes
   290  // in various places it isn't already run through the normal markdown processor
   291  func RenderEmoji(
   292  	content string,
   293  ) (string, error) {
   294  	return renderProcessString(&RenderContext{}, emojiProcessors, content)
   295  }
   296  
   297  var (
   298  	tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL]\b)|(/?[hH][eE][aA][dD]\b))`)
   299  	nulCleaner = strings.NewReplacer("\000", "")
   300  )
   301  
   302  func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output io.Writer) error {
   303  	defer ctx.Cancel()
   304  	// FIXME: don't read all content to memory
   305  	rawHTML, err := io.ReadAll(input)
   306  	if err != nil {
   307  		return err
   308  	}
   309  
   310  	res := bytes.NewBuffer(make([]byte, 0, len(rawHTML)+50))
   311  	// prepend "<html><body>"
   312  	_, _ = res.WriteString("<html><body>")
   313  
   314  	// Strip out nuls - they're always invalid
   315  	_, _ = res.Write(tagCleaner.ReplaceAll([]byte(nulCleaner.Replace(string(rawHTML))), []byte("&lt;$1")))
   316  
   317  	// close the tags
   318  	_, _ = res.WriteString("</body></html>")
   319  
   320  	// parse the HTML
   321  	node, err := html.Parse(res)
   322  	if err != nil {
   323  		return &postProcessError{"invalid HTML", err}
   324  	}
   325  
   326  	if node.Type == html.DocumentNode {
   327  		node = node.FirstChild
   328  	}
   329  
   330  	visitNode(ctx, procs, procs, node)
   331  
   332  	newNodes := make([]*html.Node, 0, 5)
   333  
   334  	if node.Data == "html" {
   335  		node = node.FirstChild
   336  		for node != nil && node.Data != "body" {
   337  			node = node.NextSibling
   338  		}
   339  	}
   340  	if node != nil {
   341  		if node.Data == "body" {
   342  			child := node.FirstChild
   343  			for child != nil {
   344  				newNodes = append(newNodes, child)
   345  				child = child.NextSibling
   346  			}
   347  		} else {
   348  			newNodes = append(newNodes, node)
   349  		}
   350  	}
   351  
   352  	// Render everything to buf.
   353  	for _, node := range newNodes {
   354  		if err := html.Render(output, node); err != nil {
   355  			return &postProcessError{"error rendering processed HTML", err}
   356  		}
   357  	}
   358  	return nil
   359  }
   360  
   361  func visitNode(ctx *RenderContext, procs, textProcs []processor, node *html.Node) {
   362  	// Add user-content- to IDs if they don't already have them
   363  	for idx, attr := range node.Attr {
   364  		if attr.Key == "id" && !(strings.HasPrefix(attr.Val, "user-content-") || blackfridayExtRegex.MatchString(attr.Val)) {
   365  			node.Attr[idx].Val = "user-content-" + attr.Val
   366  		}
   367  
   368  		if attr.Key == "class" && attr.Val == "emoji" {
   369  			textProcs = nil
   370  		}
   371  	}
   372  
   373  	// We ignore code and pre.
   374  	switch node.Type {
   375  	case html.TextNode:
   376  		textNode(ctx, textProcs, node)
   377  	case html.ElementNode:
   378  		if node.Data == "img" {
   379  			for i, attr := range node.Attr {
   380  				if attr.Key != "src" {
   381  					continue
   382  				}
   383  				if len(attr.Val) > 0 && !isLinkStr(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") {
   384  					prefix := ctx.URLPrefix
   385  					if ctx.IsWiki {
   386  						prefix = util.URLJoin(prefix, "wiki", "raw")
   387  					}
   388  					prefix = strings.Replace(prefix, "/src/", "/media/", 1)
   389  
   390  					attr.Val = util.URLJoin(prefix, attr.Val)
   391  				}
   392  				attr.Val = camoHandleLink(attr.Val)
   393  				node.Attr[i] = attr
   394  			}
   395  		} else if node.Data == "a" {
   396  			// Restrict text in links to emojis
   397  			textProcs = emojiProcessors
   398  		} else if node.Data == "code" || node.Data == "pre" {
   399  			return
   400  		} else if node.Data == "i" {
   401  			for _, attr := range node.Attr {
   402  				if attr.Key != "class" {
   403  					continue
   404  				}
   405  				classes := strings.Split(attr.Val, " ")
   406  				for i, class := range classes {
   407  					if class == "icon" {
   408  						classes[0], classes[i] = classes[i], classes[0]
   409  						attr.Val = strings.Join(classes, " ")
   410  
   411  						// Remove all children of icons
   412  						child := node.FirstChild
   413  						for child != nil {
   414  							node.RemoveChild(child)
   415  							child = node.FirstChild
   416  						}
   417  						break
   418  					}
   419  				}
   420  			}
   421  		}
   422  		for n := node.FirstChild; n != nil; n = n.NextSibling {
   423  			visitNode(ctx, procs, textProcs, n)
   424  		}
   425  	}
   426  	// ignore everything else
   427  }
   428  
   429  // textNode runs the passed node through various processors, in order to handle
   430  // all kinds of special links handled by the post-processing.
   431  func textNode(ctx *RenderContext, procs []processor, node *html.Node) {
   432  	for _, processor := range procs {
   433  		processor(ctx, node)
   434  	}
   435  }
   436  
   437  // createKeyword() renders a highlighted version of an action keyword
   438  func createKeyword(content string) *html.Node {
   439  	span := &html.Node{
   440  		Type: html.ElementNode,
   441  		Data: atom.Span.String(),
   442  		Attr: []html.Attribute{},
   443  	}
   444  	span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: keywordClass})
   445  
   446  	text := &html.Node{
   447  		Type: html.TextNode,
   448  		Data: content,
   449  	}
   450  	span.AppendChild(text)
   451  
   452  	return span
   453  }
   454  
   455  func createEmoji(content, class, name string) *html.Node {
   456  	span := &html.Node{
   457  		Type: html.ElementNode,
   458  		Data: atom.Span.String(),
   459  		Attr: []html.Attribute{},
   460  	}
   461  	if class != "" {
   462  		span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: class})
   463  	}
   464  	if name != "" {
   465  		span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: name})
   466  	}
   467  
   468  	text := &html.Node{
   469  		Type: html.TextNode,
   470  		Data: content,
   471  	}
   472  
   473  	span.AppendChild(text)
   474  	return span
   475  }
   476  
   477  func createCustomEmoji(alias string) *html.Node {
   478  	span := &html.Node{
   479  		Type: html.ElementNode,
   480  		Data: atom.Span.String(),
   481  		Attr: []html.Attribute{},
   482  	}
   483  	span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: "emoji"})
   484  	span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: alias})
   485  
   486  	img := &html.Node{
   487  		Type:     html.ElementNode,
   488  		DataAtom: atom.Img,
   489  		Data:     "img",
   490  		Attr:     []html.Attribute{},
   491  	}
   492  	img.Attr = append(img.Attr, html.Attribute{Key: "alt", Val: ":" + alias + ":"})
   493  	img.Attr = append(img.Attr, html.Attribute{Key: "src", Val: setting.StaticURLPrefix + "/assets/img/emoji/" + alias + ".png"})
   494  
   495  	span.AppendChild(img)
   496  	return span
   497  }
   498  
   499  func createLink(href, content, class string) *html.Node {
   500  	a := &html.Node{
   501  		Type: html.ElementNode,
   502  		Data: atom.A.String(),
   503  		Attr: []html.Attribute{{Key: "href", Val: href}},
   504  	}
   505  
   506  	if class != "" {
   507  		a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class})
   508  	}
   509  
   510  	text := &html.Node{
   511  		Type: html.TextNode,
   512  		Data: content,
   513  	}
   514  
   515  	a.AppendChild(text)
   516  	return a
   517  }
   518  
   519  func createCodeLink(href, content, class string) *html.Node {
   520  	a := &html.Node{
   521  		Type: html.ElementNode,
   522  		Data: atom.A.String(),
   523  		Attr: []html.Attribute{{Key: "href", Val: href}},
   524  	}
   525  
   526  	if class != "" {
   527  		a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class})
   528  	}
   529  
   530  	text := &html.Node{
   531  		Type: html.TextNode,
   532  		Data: content,
   533  	}
   534  
   535  	code := &html.Node{
   536  		Type: html.ElementNode,
   537  		Data: atom.Code.String(),
   538  		Attr: []html.Attribute{{Key: "class", Val: "nohighlight"}},
   539  	}
   540  
   541  	code.AppendChild(text)
   542  	a.AppendChild(code)
   543  	return a
   544  }
   545  
   546  // replaceContent takes text node, and in its content it replaces a section of
   547  // it with the specified newNode.
   548  func replaceContent(node *html.Node, i, j int, newNode *html.Node) {
   549  	replaceContentList(node, i, j, []*html.Node{newNode})
   550  }
   551  
   552  // replaceContentList takes text node, and in its content it replaces a section of
   553  // it with the specified newNodes. An example to visualize how this can work can
   554  // be found here: https://play.golang.org/p/5zP8NnHZ03s
   555  func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) {
   556  	// get the data before and after the match
   557  	before := node.Data[:i]
   558  	after := node.Data[j:]
   559  
   560  	// Replace in the current node the text, so that it is only what it is
   561  	// supposed to have.
   562  	node.Data = before
   563  
   564  	// Get the current next sibling, before which we place the replaced data,
   565  	// and after that we place the new text node.
   566  	nextSibling := node.NextSibling
   567  	for _, n := range newNodes {
   568  		node.Parent.InsertBefore(n, nextSibling)
   569  	}
   570  	if after != "" {
   571  		node.Parent.InsertBefore(&html.Node{
   572  			Type: html.TextNode,
   573  			Data: after,
   574  		}, nextSibling)
   575  	}
   576  }
   577  
   578  func mentionProcessor(ctx *RenderContext, node *html.Node) {
   579  	start := 0
   580  	next := node.NextSibling
   581  	for node != nil && node != next && start < len(node.Data) {
   582  		// We replace only the first mention; other mentions will be addressed later
   583  		found, loc := references.FindFirstMentionBytes([]byte(node.Data[start:]))
   584  		if !found {
   585  			return
   586  		}
   587  		loc.Start += start
   588  		loc.End += start
   589  		mention := node.Data[loc.Start:loc.End]
   590  		var teams string
   591  		teams, ok := ctx.Metas["teams"]
   592  		// FIXME: util.URLJoin may not be necessary here:
   593  		// - setting.AppURL is defined to have a terminal '/' so unless mention[1:]
   594  		// is an AppSubURL link we can probably fallback to concatenation.
   595  		// team mention should follow @orgName/teamName style
   596  		if ok && strings.Contains(mention, "/") {
   597  			mentionOrgAndTeam := strings.Split(mention, "/")
   598  			if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") {
   599  				replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention"))
   600  				node = node.NextSibling.NextSibling
   601  				start = 0
   602  				continue
   603  			}
   604  			start = loc.End
   605  			continue
   606  		}
   607  		replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention"))
   608  		node = node.NextSibling.NextSibling
   609  		start = 0
   610  	}
   611  }
   612  
   613  func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
   614  	shortLinkProcessorFull(ctx, node, false)
   615  }
   616  
   617  func shortLinkProcessorFull(ctx *RenderContext, node *html.Node, noLink bool) {
   618  	next := node.NextSibling
   619  	for node != nil && node != next {
   620  		m := shortLinkPattern.FindStringSubmatchIndex(node.Data)
   621  		if m == nil {
   622  			return
   623  		}
   624  
   625  		content := node.Data[m[2]:m[3]]
   626  		tail := node.Data[m[4]:m[5]]
   627  		props := make(map[string]string)
   628  
   629  		// MediaWiki uses [[link|text]], while GitHub uses [[text|link]]
   630  		// It makes page handling terrible, but we prefer GitHub syntax
   631  		// And fall back to MediaWiki only when it is obvious from the look
   632  		// Of text and link contents
   633  		sl := strings.Split(content, "|")
   634  		for _, v := range sl {
   635  			if equalPos := strings.IndexByte(v, '='); equalPos == -1 {
   636  				// There is no equal in this argument; this is a mandatory arg
   637  				if props["name"] == "" {
   638  					if isLinkStr(v) {
   639  						// If we clearly see it is a link, we save it so
   640  
   641  						// But first we need to ensure, that if both mandatory args provided
   642  						// look like links, we stick to GitHub syntax
   643  						if props["link"] != "" {
   644  							props["name"] = props["link"]
   645  						}
   646  
   647  						props["link"] = strings.TrimSpace(v)
   648  					} else {
   649  						props["name"] = v
   650  					}
   651  				} else {
   652  					props["link"] = strings.TrimSpace(v)
   653  				}
   654  			} else {
   655  				// There is an equal; optional argument.
   656  
   657  				sep := strings.IndexByte(v, '=')
   658  				key, val := v[:sep], html.UnescapeString(v[sep+1:])
   659  
   660  				// When parsing HTML, x/net/html will change all quotes which are
   661  				// not used for syntax into UTF-8 quotes. So checking val[0] won't
   662  				// be enough, since that only checks a single byte.
   663  				if len(val) > 1 {
   664  					if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) ||
   665  						(strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) {
   666  						const lenQuote = len("‘")
   667  						val = val[lenQuote : len(val)-lenQuote]
   668  					} else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) ||
   669  						(strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) {
   670  						val = val[1 : len(val)-1]
   671  					} else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") {
   672  						const lenQuote = len("‘")
   673  						val = val[1 : len(val)-lenQuote]
   674  					}
   675  				}
   676  				props[key] = val
   677  			}
   678  		}
   679  
   680  		var name, link string
   681  		if props["link"] != "" {
   682  			link = props["link"]
   683  		} else if props["name"] != "" {
   684  			link = props["name"]
   685  		}
   686  		if props["title"] != "" {
   687  			name = props["title"]
   688  		} else if props["name"] != "" {
   689  			name = props["name"]
   690  		} else {
   691  			name = link
   692  		}
   693  
   694  		name += tail
   695  		image := false
   696  		switch ext := filepath.Ext(link); ext {
   697  		// fast path: empty string, ignore
   698  		case "":
   699  			// leave image as false
   700  		case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg":
   701  			image = true
   702  		}
   703  
   704  		childNode := &html.Node{}
   705  		linkNode := &html.Node{
   706  			FirstChild: childNode,
   707  			LastChild:  childNode,
   708  			Type:       html.ElementNode,
   709  			Data:       "a",
   710  			DataAtom:   atom.A,
   711  		}
   712  		childNode.Parent = linkNode
   713  		absoluteLink := isLinkStr(link)
   714  		if !absoluteLink {
   715  			if image {
   716  				link = strings.ReplaceAll(link, " ", "+")
   717  			} else {
   718  				link = strings.ReplaceAll(link, " ", "-")
   719  			}
   720  			if !strings.Contains(link, "/") {
   721  				link = url.PathEscape(link)
   722  			}
   723  		}
   724  		urlPrefix := ctx.URLPrefix
   725  		if image {
   726  			if !absoluteLink {
   727  				if IsSameDomain(urlPrefix) {
   728  					urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1)
   729  				}
   730  				if ctx.IsWiki {
   731  					link = util.URLJoin("wiki", "raw", link)
   732  				}
   733  				link = util.URLJoin(urlPrefix, link)
   734  			}
   735  			title := props["title"]
   736  			if title == "" {
   737  				title = props["alt"]
   738  			}
   739  			if title == "" {
   740  				title = path.Base(name)
   741  			}
   742  			alt := props["alt"]
   743  			if alt == "" {
   744  				alt = name
   745  			}
   746  
   747  			// make the childNode an image - if we can, we also place the alt
   748  			childNode.Type = html.ElementNode
   749  			childNode.Data = "img"
   750  			childNode.DataAtom = atom.Img
   751  			childNode.Attr = []html.Attribute{
   752  				{Key: "src", Val: link},
   753  				{Key: "title", Val: title},
   754  				{Key: "alt", Val: alt},
   755  			}
   756  			if alt == "" {
   757  				childNode.Attr = childNode.Attr[:2]
   758  			}
   759  		} else {
   760  			if !absoluteLink {
   761  				if ctx.IsWiki {
   762  					link = util.URLJoin("wiki", link)
   763  				}
   764  				link = util.URLJoin(urlPrefix, link)
   765  			}
   766  			childNode.Type = html.TextNode
   767  			childNode.Data = name
   768  		}
   769  		if noLink {
   770  			linkNode = childNode
   771  		} else {
   772  			linkNode.Attr = []html.Attribute{{Key: "href", Val: link}}
   773  		}
   774  		replaceContent(node, m[0], m[1], linkNode)
   775  		node = node.NextSibling.NextSibling
   776  	}
   777  }
   778  
   779  func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) {
   780  	if ctx.Metas == nil {
   781  		return
   782  	}
   783  
   784  	next := node.NextSibling
   785  	for node != nil && node != next {
   786  		m := getIssueFullPattern().FindStringSubmatchIndex(node.Data)
   787  		if m == nil {
   788  			return
   789  		}
   790  		link := node.Data[m[0]:m[1]]
   791  		id := "#" + node.Data[m[2]:m[3]]
   792  
   793  		// extract repo and org name from matched link like
   794  		// http://localhost:3000/gituser/myrepo/issues/1
   795  		linkParts := strings.Split(link, "/")
   796  		matchOrg := linkParts[len(linkParts)-4]
   797  		matchRepo := linkParts[len(linkParts)-3]
   798  
   799  		if matchOrg == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] {
   800  			// TODO if m[4]:m[5] is not nil, then link is to a comment,
   801  			// and we should indicate that in the text somehow
   802  			replaceContent(node, m[0], m[1], createLink(link, id, "ref-issue"))
   803  		} else {
   804  			orgRepoID := matchOrg + "/" + matchRepo + id
   805  			replaceContent(node, m[0], m[1], createLink(link, orgRepoID, "ref-issue"))
   806  		}
   807  		node = node.NextSibling.NextSibling
   808  	}
   809  }
   810  
   811  func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
   812  	if ctx.Metas == nil {
   813  		return
   814  	}
   815  	var (
   816  		found bool
   817  		ref   *references.RenderizableReference
   818  	)
   819  
   820  	next := node.NextSibling
   821  
   822  	for node != nil && node != next {
   823  		_, hasExtTrackFormat := ctx.Metas["format"]
   824  
   825  		// Repos with external issue trackers might still need to reference local PRs
   826  		// We need to concern with the first one that shows up in the text, whichever it is
   827  		isNumericStyle := ctx.Metas["style"] == "" || ctx.Metas["style"] == IssueNameStyleNumeric
   828  		foundNumeric, refNumeric := references.FindRenderizableReferenceNumeric(node.Data, hasExtTrackFormat && !isNumericStyle)
   829  
   830  		switch ctx.Metas["style"] {
   831  		case "", IssueNameStyleNumeric:
   832  			found, ref = foundNumeric, refNumeric
   833  		case IssueNameStyleAlphanumeric:
   834  			found, ref = references.FindRenderizableReferenceAlphanumeric(node.Data)
   835  		case IssueNameStyleRegexp:
   836  			pattern, err := regexplru.GetCompiled(ctx.Metas["regexp"])
   837  			if err != nil {
   838  				return
   839  			}
   840  			found, ref = references.FindRenderizableReferenceRegexp(node.Data, pattern)
   841  		}
   842  
   843  		// Repos with external issue trackers might still need to reference local PRs
   844  		// We need to concern with the first one that shows up in the text, whichever it is
   845  		if hasExtTrackFormat && !isNumericStyle && refNumeric != nil {
   846  			// If numeric (PR) was found, and it was BEFORE the non-numeric pattern, use that
   847  			// Allow a free-pass when non-numeric pattern wasn't found.
   848  			if found && (ref == nil || refNumeric.RefLocation.Start < ref.RefLocation.Start) {
   849  				found = foundNumeric
   850  				ref = refNumeric
   851  			}
   852  		}
   853  		if !found {
   854  			return
   855  		}
   856  
   857  		var link *html.Node
   858  		reftext := node.Data[ref.RefLocation.Start:ref.RefLocation.End]
   859  		if hasExtTrackFormat && !ref.IsPull {
   860  			ctx.Metas["index"] = ref.Issue
   861  
   862  			res, err := vars.Expand(ctx.Metas["format"], ctx.Metas)
   863  			if err != nil {
   864  				// here we could just log the error and continue the rendering
   865  				log.Error("unable to expand template vars for ref %s, err: %v", ref.Issue, err)
   866  			}
   867  
   868  			link = createLink(res, reftext, "ref-issue ref-external-issue")
   869  		} else {
   870  			// Path determines the type of link that will be rendered. It's unknown at this point whether
   871  			// the linked item is actually a PR or an issue. Luckily it's of no real consequence because
   872  			// GitBundle will redirect on click as appropriate.
   873  			path := "issues"
   874  			if ref.IsPull {
   875  				path = "pulls"
   876  			}
   877  			if ref.Owner == "" {
   878  				link = createLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], path, ref.Issue), reftext, "ref-issue")
   879  			} else {
   880  				link = createLink(util.URLJoin(setting.AppURL, ref.Owner, ref.Name, path, ref.Issue), reftext, "ref-issue")
   881  			}
   882  		}
   883  
   884  		if ref.Action == references.XRefActionNone {
   885  			replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link)
   886  			node = node.NextSibling.NextSibling
   887  			continue
   888  		}
   889  
   890  		// Decorate action keywords if actionable
   891  		var keyword *html.Node
   892  		if references.IsXrefActionable(ref, hasExtTrackFormat) {
   893  			keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End])
   894  		} else {
   895  			keyword = &html.Node{
   896  				Type: html.TextNode,
   897  				Data: node.Data[ref.ActionLocation.Start:ref.ActionLocation.End],
   898  			}
   899  		}
   900  		spaces := &html.Node{
   901  			Type: html.TextNode,
   902  			Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start],
   903  		}
   904  		replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link})
   905  		node = node.NextSibling.NextSibling.NextSibling.NextSibling
   906  	}
   907  }
   908  
   909  // fullSha1PatternProcessor renders SHA containing URLs
   910  func fullSha1PatternProcessor(ctx *RenderContext, node *html.Node) {
   911  	if ctx.Metas == nil {
   912  		return
   913  	}
   914  
   915  	next := node.NextSibling
   916  	for node != nil && node != next {
   917  		m := anySHA1Pattern.FindStringSubmatchIndex(node.Data)
   918  		if m == nil {
   919  			return
   920  		}
   921  
   922  		urlFull := node.Data[m[0]:m[1]]
   923  		text := base.ShortSha(node.Data[m[2]:m[3]])
   924  
   925  		// 3rd capture group matches a optional path
   926  		subpath := ""
   927  		if m[5] > 0 {
   928  			subpath = node.Data[m[4]:m[5]]
   929  		}
   930  
   931  		// 4th capture group matches a optional url hash
   932  		hash := ""
   933  		if m[7] > 0 {
   934  			hash = node.Data[m[6]:m[7]][1:]
   935  		}
   936  
   937  		start := m[0]
   938  		end := m[1]
   939  
   940  		// If url ends in '.', it's very likely that it is not part of the
   941  		// actual url but used to finish a sentence.
   942  		if strings.HasSuffix(urlFull, ".") {
   943  			end--
   944  			urlFull = urlFull[:len(urlFull)-1]
   945  			if hash != "" {
   946  				hash = hash[:len(hash)-1]
   947  			} else if subpath != "" {
   948  				subpath = subpath[:len(subpath)-1]
   949  			}
   950  		}
   951  
   952  		if subpath != "" {
   953  			text += subpath
   954  		}
   955  
   956  		if hash != "" {
   957  			text += " (" + hash + ")"
   958  		}
   959  		replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
   960  		node = node.NextSibling.NextSibling
   961  	}
   962  }
   963  
   964  func comparePatternProcessor(ctx *RenderContext, node *html.Node) {
   965  	if ctx.Metas == nil {
   966  		return
   967  	}
   968  
   969  	next := node.NextSibling
   970  	for node != nil && node != next {
   971  		m := comparePattern.FindStringSubmatchIndex(node.Data)
   972  		if m == nil {
   973  			return
   974  		}
   975  
   976  		// Ensure that every group (m[0]...m[7]) has a match
   977  		for i := 0; i < 8; i++ {
   978  			if m[i] == -1 {
   979  				return
   980  			}
   981  		}
   982  
   983  		urlFull := node.Data[m[0]:m[1]]
   984  		text1 := base.ShortSha(node.Data[m[2]:m[3]])
   985  		textDots := base.ShortSha(node.Data[m[4]:m[5]])
   986  		text2 := base.ShortSha(node.Data[m[6]:m[7]])
   987  
   988  		hash := ""
   989  		if m[9] > 0 {
   990  			hash = node.Data[m[8]:m[9]][1:]
   991  		}
   992  
   993  		start := m[0]
   994  		end := m[1]
   995  
   996  		// If url ends in '.', it's very likely that it is not part of the
   997  		// actual url but used to finish a sentence.
   998  		if strings.HasSuffix(urlFull, ".") {
   999  			end--
  1000  			urlFull = urlFull[:len(urlFull)-1]
  1001  			if hash != "" {
  1002  				hash = hash[:len(hash)-1]
  1003  			} else if text2 != "" {
  1004  				text2 = text2[:len(text2)-1]
  1005  			}
  1006  		}
  1007  
  1008  		text := text1 + textDots + text2
  1009  		if hash != "" {
  1010  			text += " (" + hash + ")"
  1011  		}
  1012  		replaceContent(node, start, end, createCodeLink(urlFull, text, "compare"))
  1013  		node = node.NextSibling.NextSibling
  1014  	}
  1015  }
  1016  
  1017  // emojiShortCodeProcessor for rendering text like :smile: into emoji
  1018  func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
  1019  	start := 0
  1020  	next := node.NextSibling
  1021  	for node != nil && node != next && start < len(node.Data) {
  1022  		m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:])
  1023  		if m == nil {
  1024  			return
  1025  		}
  1026  		m[0] += start
  1027  		m[1] += start
  1028  
  1029  		start = m[1]
  1030  
  1031  		alias := node.Data[m[0]:m[1]]
  1032  		alias = strings.ReplaceAll(alias, ":", "")
  1033  		converted := emoji.FromAlias(alias)
  1034  		if converted == nil {
  1035  			// check if this is a custom reaction
  1036  			if _, exist := setting.UI.CustomEmojisMap[alias]; exist {
  1037  				replaceContent(node, m[0], m[1], createCustomEmoji(alias))
  1038  				node = node.NextSibling.NextSibling
  1039  				start = 0
  1040  				continue
  1041  			}
  1042  			continue
  1043  		}
  1044  
  1045  		replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description))
  1046  		node = node.NextSibling.NextSibling
  1047  		start = 0
  1048  	}
  1049  }
  1050  
  1051  // emoji processor to match emoji and add emoji class
  1052  func emojiProcessor(ctx *RenderContext, node *html.Node) {
  1053  	start := 0
  1054  	next := node.NextSibling
  1055  	for node != nil && node != next && start < len(node.Data) {
  1056  		m := emoji.FindEmojiSubmatchIndex(node.Data[start:])
  1057  		if m == nil {
  1058  			return
  1059  		}
  1060  		m[0] += start
  1061  		m[1] += start
  1062  
  1063  		codepoint := node.Data[m[0]:m[1]]
  1064  		start = m[1]
  1065  		val := emoji.FromCode(codepoint)
  1066  		if val != nil {
  1067  			replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description))
  1068  			node = node.NextSibling.NextSibling
  1069  			start = 0
  1070  		}
  1071  	}
  1072  }
  1073  
  1074  // sha1CurrentPatternProcessor renders SHA1 strings to corresponding links that
  1075  // are assumed to be in the same repository.
  1076  func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
  1077  	if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || ctx.Metas["repoPath"] == "" {
  1078  		return
  1079  	}
  1080  
  1081  	start := 0
  1082  	next := node.NextSibling
  1083  	if ctx.ShaExistCache == nil {
  1084  		ctx.ShaExistCache = make(map[string]bool)
  1085  	}
  1086  	for node != nil && node != next && start < len(node.Data) {
  1087  		m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data[start:])
  1088  		if m == nil {
  1089  			return
  1090  		}
  1091  		m[2] += start
  1092  		m[3] += start
  1093  
  1094  		hash := node.Data[m[2]:m[3]]
  1095  		// The regex does not lie, it matches the hash pattern.
  1096  		// However, a regex cannot know if a hash actually exists or not.
  1097  		// We could assume that a SHA1 hash should probably contain alphas AND numerics
  1098  		// but that is not always the case.
  1099  		// Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash
  1100  		// as used by git and github for linking and thus we have to do similar.
  1101  		// Because of this, we check to make sure that a matched hash is actually
  1102  		// a commit in the repository before making it a link.
  1103  
  1104  		// check cache first
  1105  		exist, inCache := ctx.ShaExistCache[hash]
  1106  		if !inCache {
  1107  			if ctx.GitRepo == nil {
  1108  				var err error
  1109  				ctx.GitRepo, err = git.OpenRepository(ctx.Ctx, ctx.Metas["repoPath"])
  1110  				if err != nil {
  1111  					log.Error("unable to open repository: %s Error: %v", ctx.Metas["repoPath"], err)
  1112  					return
  1113  				}
  1114  				ctx.AddCancel(func() {
  1115  					ctx.GitRepo.Close()
  1116  					ctx.GitRepo = nil
  1117  				})
  1118  			}
  1119  
  1120  			exist = ctx.GitRepo.IsObjectExist(hash)
  1121  			ctx.ShaExistCache[hash] = exist
  1122  		}
  1123  
  1124  		if !exist {
  1125  			start = m[3]
  1126  			continue
  1127  		}
  1128  
  1129  		link := util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash)
  1130  		replaceContent(node, m[2], m[3], createCodeLink(link, base.ShortSha(hash), "commit"))
  1131  		start = 0
  1132  		node = node.NextSibling.NextSibling
  1133  	}
  1134  }
  1135  
  1136  // emailAddressProcessor replaces raw email addresses with a mailto: link.
  1137  func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
  1138  	next := node.NextSibling
  1139  	for node != nil && node != next {
  1140  		m := emailRegex.FindStringSubmatchIndex(node.Data)
  1141  		if m == nil {
  1142  			return
  1143  		}
  1144  
  1145  		mail := node.Data[m[2]:m[3]]
  1146  		replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto"))
  1147  		node = node.NextSibling.NextSibling
  1148  	}
  1149  }
  1150  
  1151  // linkProcessor creates links for any HTTP or HTTPS URL not captured by
  1152  // markdown.
  1153  func linkProcessor(ctx *RenderContext, node *html.Node) {
  1154  	next := node.NextSibling
  1155  	for node != nil && node != next {
  1156  		m := common.LinkRegex.FindStringIndex(node.Data)
  1157  		if m == nil {
  1158  			return
  1159  		}
  1160  
  1161  		uri := node.Data[m[0]:m[1]]
  1162  		replaceContent(node, m[0], m[1], createLink(uri, uri, "link"))
  1163  		node = node.NextSibling.NextSibling
  1164  	}
  1165  }
  1166  
  1167  func genDefaultLinkProcessor(defaultLink string) processor {
  1168  	return func(ctx *RenderContext, node *html.Node) {
  1169  		ch := &html.Node{
  1170  			Parent: node,
  1171  			Type:   html.TextNode,
  1172  			Data:   node.Data,
  1173  		}
  1174  
  1175  		node.Type = html.ElementNode
  1176  		node.Data = "a"
  1177  		node.DataAtom = atom.A
  1178  		node.Attr = []html.Attribute{
  1179  			{Key: "href", Val: defaultLink},
  1180  			{Key: "class", Val: "default-link"},
  1181  		}
  1182  		node.FirstChild, node.LastChild = ch, ch
  1183  	}
  1184  }
  1185  
  1186  // descriptionLinkProcessor creates links for DescriptionHTML
  1187  func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) {
  1188  	next := node.NextSibling
  1189  	for node != nil && node != next {
  1190  		m := common.LinkRegex.FindStringIndex(node.Data)
  1191  		if m == nil {
  1192  			return
  1193  		}
  1194  
  1195  		uri := node.Data[m[0]:m[1]]
  1196  		replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri))
  1197  		node = node.NextSibling.NextSibling
  1198  	}
  1199  }
  1200  
  1201  func createDescriptionLink(href, content string) *html.Node {
  1202  	textNode := &html.Node{
  1203  		Type: html.TextNode,
  1204  		Data: content,
  1205  	}
  1206  	linkNode := &html.Node{
  1207  		FirstChild: textNode,
  1208  		LastChild:  textNode,
  1209  		Type:       html.ElementNode,
  1210  		Data:       "a",
  1211  		DataAtom:   atom.A,
  1212  		Attr: []html.Attribute{
  1213  			{Key: "href", Val: href},
  1214  			{Key: "target", Val: "_blank"},
  1215  			{Key: "rel", Val: "noopener noreferrer"},
  1216  		},
  1217  	}
  1218  	textNode.Parent = linkNode
  1219  	return linkNode
  1220  }