github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/markup/html.go (about) 1 // Copyright 2023 The GitBundle Inc. All rights reserved. 2 // Copyright 2017 The Gitea Authors. All rights reserved. 3 // Use of this source code is governed by a MIT-style 4 // license that can be found in the LICENSE file. 5 6 package markup 7 8 import ( 9 "bytes" 10 "io" 11 "net/url" 12 "path" 13 "path/filepath" 14 "regexp" 15 "strings" 16 "sync" 17 18 "github.com/gitbundle/modules/base" 19 "github.com/gitbundle/modules/emoji" 20 "github.com/gitbundle/modules/git" 21 "github.com/gitbundle/modules/log" 22 "github.com/gitbundle/modules/markup/common" 23 "github.com/gitbundle/modules/references" 24 "github.com/gitbundle/modules/regexplru" 25 "github.com/gitbundle/modules/setting" 26 vars "github.com/gitbundle/modules/template_vars" 27 "github.com/gitbundle/modules/util" 28 29 "golang.org/x/net/html" 30 "golang.org/x/net/html/atom" 31 "mvdan.cc/xurls/v2" 32 ) 33 34 // Issue name styles 35 const ( 36 IssueNameStyleNumeric = "numeric" 37 IssueNameStyleAlphanumeric = "alphanumeric" 38 IssueNameStyleRegexp = "regexp" 39 ) 40 41 var ( 42 // NOTE: All below regex matching do not perform any extra validation. 43 // Thus a link is produced even if the linked entity does not exist. 44 // While fast, this is also incorrect and lead to false positives. 45 // TODO: fix invalid linking issue 46 47 // valid chars in encoded path and parameter: [-+~_%.a-zA-Z0-9/] 48 49 // sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae 50 // Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length 51 // so that abbreviated hash links can be used as well. This matches git and GitHub usability. 52 sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-f]{7,40})(?:\s|$|\)|\]|[.,](\s|$))`) 53 54 // shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax 55 shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`) 56 57 // anySHA1Pattern splits url containing SHA into parts 58 anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`) 59 60 // comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash" 61 comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,40})(\.\.\.?)([0-9a-f]{7,40})?(#[-+~_%.a-zA-Z0-9]+)?`) 62 63 validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`) 64 65 // While this email regex is definitely not perfect and I'm sure you can come up 66 // with edge cases, it is still accepted by the CommonMark specification, as 67 // well as the HTML5 spec: 68 // http://spec.commonmark.org/0.28/#email-address 69 // https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail) 70 emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|\\.(\\s|$))") 71 72 // blackfriday extensions create IDs like fn:user-content-footnote 73 blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`) 74 75 // EmojiShortCodeRegex find emoji by alias like :smile: 76 EmojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`) 77 ) 78 79 // CSS class for action keywords (e.g. "closes: #1") 80 const keywordClass = "issue-keyword" 81 82 // IsLink reports whether link fits valid format. 83 func IsLink(link []byte) bool { 84 return isLink(link) 85 } 86 87 // isLink reports whether link fits valid format. 88 func isLink(link []byte) bool { 89 return validLinksPattern.Match(link) 90 } 91 92 func isLinkStr(link string) bool { 93 return validLinksPattern.MatchString(link) 94 } 95 96 // regexp for full links to issues/pulls 97 var issueFullPattern *regexp.Regexp 98 99 // Once for to prevent races 100 var issueFullPatternOnce sync.Once 101 102 func getIssueFullPattern() *regexp.Regexp { 103 issueFullPatternOnce.Do(func() { 104 issueFullPattern = regexp.MustCompile(regexp.QuoteMeta(setting.AppURL) + 105 `[\w_.-]+/[\w_.-]+/(?:issues|pulls)/((?:\w{1,10}-)?[1-9][0-9]*)([\?|#](\S+)?)?\b`) 106 }) 107 return issueFullPattern 108 } 109 110 // CustomLinkURLSchemes allows for additional schemes to be detected when parsing links within text 111 func CustomLinkURLSchemes(schemes []string) { 112 schemes = append(schemes, "http", "https") 113 withAuth := make([]string, 0, len(schemes)) 114 validScheme := regexp.MustCompile(`^[a-z]+$`) 115 for _, s := range schemes { 116 if !validScheme.MatchString(s) { 117 continue 118 } 119 without := false 120 for _, sna := range xurls.SchemesNoAuthority { 121 if s == sna { 122 without = true 123 break 124 } 125 } 126 if without { 127 s += ":" 128 } else { 129 s += "://" 130 } 131 withAuth = append(withAuth, s) 132 } 133 common.LinkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|")) 134 } 135 136 // IsSameDomain checks if given url string has the same hostname as current GitBundle instance 137 func IsSameDomain(s string) bool { 138 if strings.HasPrefix(s, "/") { 139 return true 140 } 141 if uapp, err := url.Parse(setting.AppURL); err == nil { 142 if u, err := url.Parse(s); err == nil { 143 return u.Host == uapp.Host 144 } 145 return false 146 } 147 return false 148 } 149 150 type postProcessError struct { 151 context string 152 err error 153 } 154 155 func (p *postProcessError) Error() string { 156 return "PostProcess: " + p.context + ", " + p.err.Error() 157 } 158 159 type processor func(ctx *RenderContext, node *html.Node) 160 161 var defaultProcessors = []processor{ 162 fullIssuePatternProcessor, 163 comparePatternProcessor, 164 fullSha1PatternProcessor, 165 shortLinkProcessor, 166 linkProcessor, 167 mentionProcessor, 168 issueIndexPatternProcessor, 169 sha1CurrentPatternProcessor, 170 emailAddressProcessor, 171 emojiProcessor, 172 emojiShortCodeProcessor, 173 } 174 175 // PostProcess does the final required transformations to the passed raw HTML 176 // data, and ensures its validity. Transformations include: replacing links and 177 // emails with HTML links, parsing shortlinks in the format of [[Link]], like 178 // MediaWiki, linking issues in the format #ID, and mentions in the format 179 // @user, and others. 180 func PostProcess( 181 ctx *RenderContext, 182 input io.Reader, 183 output io.Writer, 184 ) error { 185 return postProcess(ctx, defaultProcessors, input, output) 186 } 187 188 var commitMessageProcessors = []processor{ 189 fullIssuePatternProcessor, 190 comparePatternProcessor, 191 fullSha1PatternProcessor, 192 linkProcessor, 193 mentionProcessor, 194 issueIndexPatternProcessor, 195 sha1CurrentPatternProcessor, 196 emailAddressProcessor, 197 emojiProcessor, 198 emojiShortCodeProcessor, 199 } 200 201 // RenderCommitMessage will use the same logic as PostProcess, but will disable 202 // the shortLinkProcessor and will add a defaultLinkProcessor if defaultLink is 203 // set, which changes every text node into a link to the passed default link. 204 func RenderCommitMessage( 205 ctx *RenderContext, 206 content string, 207 ) (string, error) { 208 procs := commitMessageProcessors 209 if ctx.DefaultLink != "" { 210 // we don't have to fear data races, because being 211 // commitMessageProcessors of fixed len and cap, every time we append 212 // something to it the slice is realloc+copied, so append always 213 // generates the slice ex-novo. 214 procs = append(procs, genDefaultLinkProcessor(ctx.DefaultLink)) 215 } 216 return renderProcessString(ctx, procs, content) 217 } 218 219 var commitMessageSubjectProcessors = []processor{ 220 fullIssuePatternProcessor, 221 comparePatternProcessor, 222 fullSha1PatternProcessor, 223 linkProcessor, 224 mentionProcessor, 225 issueIndexPatternProcessor, 226 sha1CurrentPatternProcessor, 227 emojiShortCodeProcessor, 228 emojiProcessor, 229 } 230 231 var emojiProcessors = []processor{ 232 emojiShortCodeProcessor, 233 emojiProcessor, 234 } 235 236 // RenderCommitMessageSubject will use the same logic as PostProcess and 237 // RenderCommitMessage, but will disable the shortLinkProcessor and 238 // emailAddressProcessor, will add a defaultLinkProcessor if defaultLink is set, 239 // which changes every text node into a link to the passed default link. 240 func RenderCommitMessageSubject( 241 ctx *RenderContext, 242 content string, 243 ) (string, error) { 244 procs := commitMessageSubjectProcessors 245 if ctx.DefaultLink != "" { 246 // we don't have to fear data races, because being 247 // commitMessageSubjectProcessors of fixed len and cap, every time we 248 // append something to it the slice is realloc+copied, so append always 249 // generates the slice ex-novo. 250 procs = append(procs, genDefaultLinkProcessor(ctx.DefaultLink)) 251 } 252 return renderProcessString(ctx, procs, content) 253 } 254 255 // RenderIssueTitle to process title on individual issue/pull page 256 func RenderIssueTitle( 257 ctx *RenderContext, 258 title string, 259 ) (string, error) { 260 return renderProcessString(ctx, []processor{ 261 issueIndexPatternProcessor, 262 sha1CurrentPatternProcessor, 263 emojiShortCodeProcessor, 264 emojiProcessor, 265 }, title) 266 } 267 268 func renderProcessString(ctx *RenderContext, procs []processor, content string) (string, error) { 269 var buf strings.Builder 270 if err := postProcess(ctx, procs, strings.NewReader(content), &buf); err != nil { 271 return "", err 272 } 273 return buf.String(), nil 274 } 275 276 // RenderDescriptionHTML will use similar logic as PostProcess, but will 277 // use a single special linkProcessor. 278 func RenderDescriptionHTML( 279 ctx *RenderContext, 280 content string, 281 ) (string, error) { 282 return renderProcessString(ctx, []processor{ 283 descriptionLinkProcessor, 284 emojiShortCodeProcessor, 285 emojiProcessor, 286 }, content) 287 } 288 289 // RenderEmoji for when we want to just process emoji and shortcodes 290 // in various places it isn't already run through the normal markdown processor 291 func RenderEmoji( 292 content string, 293 ) (string, error) { 294 return renderProcessString(&RenderContext{}, emojiProcessors, content) 295 } 296 297 var ( 298 tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL]\b)|(/?[hH][eE][aA][dD]\b))`) 299 nulCleaner = strings.NewReplacer("\000", "") 300 ) 301 302 func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output io.Writer) error { 303 defer ctx.Cancel() 304 // FIXME: don't read all content to memory 305 rawHTML, err := io.ReadAll(input) 306 if err != nil { 307 return err 308 } 309 310 res := bytes.NewBuffer(make([]byte, 0, len(rawHTML)+50)) 311 // prepend "<html><body>" 312 _, _ = res.WriteString("<html><body>") 313 314 // Strip out nuls - they're always invalid 315 _, _ = res.Write(tagCleaner.ReplaceAll([]byte(nulCleaner.Replace(string(rawHTML))), []byte("<$1"))) 316 317 // close the tags 318 _, _ = res.WriteString("</body></html>") 319 320 // parse the HTML 321 node, err := html.Parse(res) 322 if err != nil { 323 return &postProcessError{"invalid HTML", err} 324 } 325 326 if node.Type == html.DocumentNode { 327 node = node.FirstChild 328 } 329 330 visitNode(ctx, procs, procs, node) 331 332 newNodes := make([]*html.Node, 0, 5) 333 334 if node.Data == "html" { 335 node = node.FirstChild 336 for node != nil && node.Data != "body" { 337 node = node.NextSibling 338 } 339 } 340 if node != nil { 341 if node.Data == "body" { 342 child := node.FirstChild 343 for child != nil { 344 newNodes = append(newNodes, child) 345 child = child.NextSibling 346 } 347 } else { 348 newNodes = append(newNodes, node) 349 } 350 } 351 352 // Render everything to buf. 353 for _, node := range newNodes { 354 if err := html.Render(output, node); err != nil { 355 return &postProcessError{"error rendering processed HTML", err} 356 } 357 } 358 return nil 359 } 360 361 func visitNode(ctx *RenderContext, procs, textProcs []processor, node *html.Node) { 362 // Add user-content- to IDs if they don't already have them 363 for idx, attr := range node.Attr { 364 if attr.Key == "id" && !(strings.HasPrefix(attr.Val, "user-content-") || blackfridayExtRegex.MatchString(attr.Val)) { 365 node.Attr[idx].Val = "user-content-" + attr.Val 366 } 367 368 if attr.Key == "class" && attr.Val == "emoji" { 369 textProcs = nil 370 } 371 } 372 373 // We ignore code and pre. 374 switch node.Type { 375 case html.TextNode: 376 textNode(ctx, textProcs, node) 377 case html.ElementNode: 378 if node.Data == "img" { 379 for i, attr := range node.Attr { 380 if attr.Key != "src" { 381 continue 382 } 383 if len(attr.Val) > 0 && !isLinkStr(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") { 384 prefix := ctx.URLPrefix 385 if ctx.IsWiki { 386 prefix = util.URLJoin(prefix, "wiki", "raw") 387 } 388 prefix = strings.Replace(prefix, "/src/", "/media/", 1) 389 390 attr.Val = util.URLJoin(prefix, attr.Val) 391 } 392 attr.Val = camoHandleLink(attr.Val) 393 node.Attr[i] = attr 394 } 395 } else if node.Data == "a" { 396 // Restrict text in links to emojis 397 textProcs = emojiProcessors 398 } else if node.Data == "code" || node.Data == "pre" { 399 return 400 } else if node.Data == "i" { 401 for _, attr := range node.Attr { 402 if attr.Key != "class" { 403 continue 404 } 405 classes := strings.Split(attr.Val, " ") 406 for i, class := range classes { 407 if class == "icon" { 408 classes[0], classes[i] = classes[i], classes[0] 409 attr.Val = strings.Join(classes, " ") 410 411 // Remove all children of icons 412 child := node.FirstChild 413 for child != nil { 414 node.RemoveChild(child) 415 child = node.FirstChild 416 } 417 break 418 } 419 } 420 } 421 } 422 for n := node.FirstChild; n != nil; n = n.NextSibling { 423 visitNode(ctx, procs, textProcs, n) 424 } 425 } 426 // ignore everything else 427 } 428 429 // textNode runs the passed node through various processors, in order to handle 430 // all kinds of special links handled by the post-processing. 431 func textNode(ctx *RenderContext, procs []processor, node *html.Node) { 432 for _, processor := range procs { 433 processor(ctx, node) 434 } 435 } 436 437 // createKeyword() renders a highlighted version of an action keyword 438 func createKeyword(content string) *html.Node { 439 span := &html.Node{ 440 Type: html.ElementNode, 441 Data: atom.Span.String(), 442 Attr: []html.Attribute{}, 443 } 444 span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: keywordClass}) 445 446 text := &html.Node{ 447 Type: html.TextNode, 448 Data: content, 449 } 450 span.AppendChild(text) 451 452 return span 453 } 454 455 func createEmoji(content, class, name string) *html.Node { 456 span := &html.Node{ 457 Type: html.ElementNode, 458 Data: atom.Span.String(), 459 Attr: []html.Attribute{}, 460 } 461 if class != "" { 462 span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: class}) 463 } 464 if name != "" { 465 span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: name}) 466 } 467 468 text := &html.Node{ 469 Type: html.TextNode, 470 Data: content, 471 } 472 473 span.AppendChild(text) 474 return span 475 } 476 477 func createCustomEmoji(alias string) *html.Node { 478 span := &html.Node{ 479 Type: html.ElementNode, 480 Data: atom.Span.String(), 481 Attr: []html.Attribute{}, 482 } 483 span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: "emoji"}) 484 span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: alias}) 485 486 img := &html.Node{ 487 Type: html.ElementNode, 488 DataAtom: atom.Img, 489 Data: "img", 490 Attr: []html.Attribute{}, 491 } 492 img.Attr = append(img.Attr, html.Attribute{Key: "alt", Val: ":" + alias + ":"}) 493 img.Attr = append(img.Attr, html.Attribute{Key: "src", Val: setting.StaticURLPrefix + "/assets/img/emoji/" + alias + ".png"}) 494 495 span.AppendChild(img) 496 return span 497 } 498 499 func createLink(href, content, class string) *html.Node { 500 a := &html.Node{ 501 Type: html.ElementNode, 502 Data: atom.A.String(), 503 Attr: []html.Attribute{{Key: "href", Val: href}}, 504 } 505 506 if class != "" { 507 a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class}) 508 } 509 510 text := &html.Node{ 511 Type: html.TextNode, 512 Data: content, 513 } 514 515 a.AppendChild(text) 516 return a 517 } 518 519 func createCodeLink(href, content, class string) *html.Node { 520 a := &html.Node{ 521 Type: html.ElementNode, 522 Data: atom.A.String(), 523 Attr: []html.Attribute{{Key: "href", Val: href}}, 524 } 525 526 if class != "" { 527 a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class}) 528 } 529 530 text := &html.Node{ 531 Type: html.TextNode, 532 Data: content, 533 } 534 535 code := &html.Node{ 536 Type: html.ElementNode, 537 Data: atom.Code.String(), 538 Attr: []html.Attribute{{Key: "class", Val: "nohighlight"}}, 539 } 540 541 code.AppendChild(text) 542 a.AppendChild(code) 543 return a 544 } 545 546 // replaceContent takes text node, and in its content it replaces a section of 547 // it with the specified newNode. 548 func replaceContent(node *html.Node, i, j int, newNode *html.Node) { 549 replaceContentList(node, i, j, []*html.Node{newNode}) 550 } 551 552 // replaceContentList takes text node, and in its content it replaces a section of 553 // it with the specified newNodes. An example to visualize how this can work can 554 // be found here: https://play.golang.org/p/5zP8NnHZ03s 555 func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) { 556 // get the data before and after the match 557 before := node.Data[:i] 558 after := node.Data[j:] 559 560 // Replace in the current node the text, so that it is only what it is 561 // supposed to have. 562 node.Data = before 563 564 // Get the current next sibling, before which we place the replaced data, 565 // and after that we place the new text node. 566 nextSibling := node.NextSibling 567 for _, n := range newNodes { 568 node.Parent.InsertBefore(n, nextSibling) 569 } 570 if after != "" { 571 node.Parent.InsertBefore(&html.Node{ 572 Type: html.TextNode, 573 Data: after, 574 }, nextSibling) 575 } 576 } 577 578 func mentionProcessor(ctx *RenderContext, node *html.Node) { 579 start := 0 580 next := node.NextSibling 581 for node != nil && node != next && start < len(node.Data) { 582 // We replace only the first mention; other mentions will be addressed later 583 found, loc := references.FindFirstMentionBytes([]byte(node.Data[start:])) 584 if !found { 585 return 586 } 587 loc.Start += start 588 loc.End += start 589 mention := node.Data[loc.Start:loc.End] 590 var teams string 591 teams, ok := ctx.Metas["teams"] 592 // FIXME: util.URLJoin may not be necessary here: 593 // - setting.AppURL is defined to have a terminal '/' so unless mention[1:] 594 // is an AppSubURL link we can probably fallback to concatenation. 595 // team mention should follow @orgName/teamName style 596 if ok && strings.Contains(mention, "/") { 597 mentionOrgAndTeam := strings.Split(mention, "/") 598 if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") { 599 replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention")) 600 node = node.NextSibling.NextSibling 601 start = 0 602 continue 603 } 604 start = loc.End 605 continue 606 } 607 replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention")) 608 node = node.NextSibling.NextSibling 609 start = 0 610 } 611 } 612 613 func shortLinkProcessor(ctx *RenderContext, node *html.Node) { 614 shortLinkProcessorFull(ctx, node, false) 615 } 616 617 func shortLinkProcessorFull(ctx *RenderContext, node *html.Node, noLink bool) { 618 next := node.NextSibling 619 for node != nil && node != next { 620 m := shortLinkPattern.FindStringSubmatchIndex(node.Data) 621 if m == nil { 622 return 623 } 624 625 content := node.Data[m[2]:m[3]] 626 tail := node.Data[m[4]:m[5]] 627 props := make(map[string]string) 628 629 // MediaWiki uses [[link|text]], while GitHub uses [[text|link]] 630 // It makes page handling terrible, but we prefer GitHub syntax 631 // And fall back to MediaWiki only when it is obvious from the look 632 // Of text and link contents 633 sl := strings.Split(content, "|") 634 for _, v := range sl { 635 if equalPos := strings.IndexByte(v, '='); equalPos == -1 { 636 // There is no equal in this argument; this is a mandatory arg 637 if props["name"] == "" { 638 if isLinkStr(v) { 639 // If we clearly see it is a link, we save it so 640 641 // But first we need to ensure, that if both mandatory args provided 642 // look like links, we stick to GitHub syntax 643 if props["link"] != "" { 644 props["name"] = props["link"] 645 } 646 647 props["link"] = strings.TrimSpace(v) 648 } else { 649 props["name"] = v 650 } 651 } else { 652 props["link"] = strings.TrimSpace(v) 653 } 654 } else { 655 // There is an equal; optional argument. 656 657 sep := strings.IndexByte(v, '=') 658 key, val := v[:sep], html.UnescapeString(v[sep+1:]) 659 660 // When parsing HTML, x/net/html will change all quotes which are 661 // not used for syntax into UTF-8 quotes. So checking val[0] won't 662 // be enough, since that only checks a single byte. 663 if len(val) > 1 { 664 if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) || 665 (strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) { 666 const lenQuote = len("‘") 667 val = val[lenQuote : len(val)-lenQuote] 668 } else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) || 669 (strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) { 670 val = val[1 : len(val)-1] 671 } else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") { 672 const lenQuote = len("‘") 673 val = val[1 : len(val)-lenQuote] 674 } 675 } 676 props[key] = val 677 } 678 } 679 680 var name, link string 681 if props["link"] != "" { 682 link = props["link"] 683 } else if props["name"] != "" { 684 link = props["name"] 685 } 686 if props["title"] != "" { 687 name = props["title"] 688 } else if props["name"] != "" { 689 name = props["name"] 690 } else { 691 name = link 692 } 693 694 name += tail 695 image := false 696 switch ext := filepath.Ext(link); ext { 697 // fast path: empty string, ignore 698 case "": 699 // leave image as false 700 case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg": 701 image = true 702 } 703 704 childNode := &html.Node{} 705 linkNode := &html.Node{ 706 FirstChild: childNode, 707 LastChild: childNode, 708 Type: html.ElementNode, 709 Data: "a", 710 DataAtom: atom.A, 711 } 712 childNode.Parent = linkNode 713 absoluteLink := isLinkStr(link) 714 if !absoluteLink { 715 if image { 716 link = strings.ReplaceAll(link, " ", "+") 717 } else { 718 link = strings.ReplaceAll(link, " ", "-") 719 } 720 if !strings.Contains(link, "/") { 721 link = url.PathEscape(link) 722 } 723 } 724 urlPrefix := ctx.URLPrefix 725 if image { 726 if !absoluteLink { 727 if IsSameDomain(urlPrefix) { 728 urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1) 729 } 730 if ctx.IsWiki { 731 link = util.URLJoin("wiki", "raw", link) 732 } 733 link = util.URLJoin(urlPrefix, link) 734 } 735 title := props["title"] 736 if title == "" { 737 title = props["alt"] 738 } 739 if title == "" { 740 title = path.Base(name) 741 } 742 alt := props["alt"] 743 if alt == "" { 744 alt = name 745 } 746 747 // make the childNode an image - if we can, we also place the alt 748 childNode.Type = html.ElementNode 749 childNode.Data = "img" 750 childNode.DataAtom = atom.Img 751 childNode.Attr = []html.Attribute{ 752 {Key: "src", Val: link}, 753 {Key: "title", Val: title}, 754 {Key: "alt", Val: alt}, 755 } 756 if alt == "" { 757 childNode.Attr = childNode.Attr[:2] 758 } 759 } else { 760 if !absoluteLink { 761 if ctx.IsWiki { 762 link = util.URLJoin("wiki", link) 763 } 764 link = util.URLJoin(urlPrefix, link) 765 } 766 childNode.Type = html.TextNode 767 childNode.Data = name 768 } 769 if noLink { 770 linkNode = childNode 771 } else { 772 linkNode.Attr = []html.Attribute{{Key: "href", Val: link}} 773 } 774 replaceContent(node, m[0], m[1], linkNode) 775 node = node.NextSibling.NextSibling 776 } 777 } 778 779 func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) { 780 if ctx.Metas == nil { 781 return 782 } 783 784 next := node.NextSibling 785 for node != nil && node != next { 786 m := getIssueFullPattern().FindStringSubmatchIndex(node.Data) 787 if m == nil { 788 return 789 } 790 link := node.Data[m[0]:m[1]] 791 id := "#" + node.Data[m[2]:m[3]] 792 793 // extract repo and org name from matched link like 794 // http://localhost:3000/gituser/myrepo/issues/1 795 linkParts := strings.Split(link, "/") 796 matchOrg := linkParts[len(linkParts)-4] 797 matchRepo := linkParts[len(linkParts)-3] 798 799 if matchOrg == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] { 800 // TODO if m[4]:m[5] is not nil, then link is to a comment, 801 // and we should indicate that in the text somehow 802 replaceContent(node, m[0], m[1], createLink(link, id, "ref-issue")) 803 } else { 804 orgRepoID := matchOrg + "/" + matchRepo + id 805 replaceContent(node, m[0], m[1], createLink(link, orgRepoID, "ref-issue")) 806 } 807 node = node.NextSibling.NextSibling 808 } 809 } 810 811 func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) { 812 if ctx.Metas == nil { 813 return 814 } 815 var ( 816 found bool 817 ref *references.RenderizableReference 818 ) 819 820 next := node.NextSibling 821 822 for node != nil && node != next { 823 _, hasExtTrackFormat := ctx.Metas["format"] 824 825 // Repos with external issue trackers might still need to reference local PRs 826 // We need to concern with the first one that shows up in the text, whichever it is 827 isNumericStyle := ctx.Metas["style"] == "" || ctx.Metas["style"] == IssueNameStyleNumeric 828 foundNumeric, refNumeric := references.FindRenderizableReferenceNumeric(node.Data, hasExtTrackFormat && !isNumericStyle) 829 830 switch ctx.Metas["style"] { 831 case "", IssueNameStyleNumeric: 832 found, ref = foundNumeric, refNumeric 833 case IssueNameStyleAlphanumeric: 834 found, ref = references.FindRenderizableReferenceAlphanumeric(node.Data) 835 case IssueNameStyleRegexp: 836 pattern, err := regexplru.GetCompiled(ctx.Metas["regexp"]) 837 if err != nil { 838 return 839 } 840 found, ref = references.FindRenderizableReferenceRegexp(node.Data, pattern) 841 } 842 843 // Repos with external issue trackers might still need to reference local PRs 844 // We need to concern with the first one that shows up in the text, whichever it is 845 if hasExtTrackFormat && !isNumericStyle && refNumeric != nil { 846 // If numeric (PR) was found, and it was BEFORE the non-numeric pattern, use that 847 // Allow a free-pass when non-numeric pattern wasn't found. 848 if found && (ref == nil || refNumeric.RefLocation.Start < ref.RefLocation.Start) { 849 found = foundNumeric 850 ref = refNumeric 851 } 852 } 853 if !found { 854 return 855 } 856 857 var link *html.Node 858 reftext := node.Data[ref.RefLocation.Start:ref.RefLocation.End] 859 if hasExtTrackFormat && !ref.IsPull { 860 ctx.Metas["index"] = ref.Issue 861 862 res, err := vars.Expand(ctx.Metas["format"], ctx.Metas) 863 if err != nil { 864 // here we could just log the error and continue the rendering 865 log.Error("unable to expand template vars for ref %s, err: %v", ref.Issue, err) 866 } 867 868 link = createLink(res, reftext, "ref-issue ref-external-issue") 869 } else { 870 // Path determines the type of link that will be rendered. It's unknown at this point whether 871 // the linked item is actually a PR or an issue. Luckily it's of no real consequence because 872 // GitBundle will redirect on click as appropriate. 873 path := "issues" 874 if ref.IsPull { 875 path = "pulls" 876 } 877 if ref.Owner == "" { 878 link = createLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], path, ref.Issue), reftext, "ref-issue") 879 } else { 880 link = createLink(util.URLJoin(setting.AppURL, ref.Owner, ref.Name, path, ref.Issue), reftext, "ref-issue") 881 } 882 } 883 884 if ref.Action == references.XRefActionNone { 885 replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link) 886 node = node.NextSibling.NextSibling 887 continue 888 } 889 890 // Decorate action keywords if actionable 891 var keyword *html.Node 892 if references.IsXrefActionable(ref, hasExtTrackFormat) { 893 keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End]) 894 } else { 895 keyword = &html.Node{ 896 Type: html.TextNode, 897 Data: node.Data[ref.ActionLocation.Start:ref.ActionLocation.End], 898 } 899 } 900 spaces := &html.Node{ 901 Type: html.TextNode, 902 Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start], 903 } 904 replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link}) 905 node = node.NextSibling.NextSibling.NextSibling.NextSibling 906 } 907 } 908 909 // fullSha1PatternProcessor renders SHA containing URLs 910 func fullSha1PatternProcessor(ctx *RenderContext, node *html.Node) { 911 if ctx.Metas == nil { 912 return 913 } 914 915 next := node.NextSibling 916 for node != nil && node != next { 917 m := anySHA1Pattern.FindStringSubmatchIndex(node.Data) 918 if m == nil { 919 return 920 } 921 922 urlFull := node.Data[m[0]:m[1]] 923 text := base.ShortSha(node.Data[m[2]:m[3]]) 924 925 // 3rd capture group matches a optional path 926 subpath := "" 927 if m[5] > 0 { 928 subpath = node.Data[m[4]:m[5]] 929 } 930 931 // 4th capture group matches a optional url hash 932 hash := "" 933 if m[7] > 0 { 934 hash = node.Data[m[6]:m[7]][1:] 935 } 936 937 start := m[0] 938 end := m[1] 939 940 // If url ends in '.', it's very likely that it is not part of the 941 // actual url but used to finish a sentence. 942 if strings.HasSuffix(urlFull, ".") { 943 end-- 944 urlFull = urlFull[:len(urlFull)-1] 945 if hash != "" { 946 hash = hash[:len(hash)-1] 947 } else if subpath != "" { 948 subpath = subpath[:len(subpath)-1] 949 } 950 } 951 952 if subpath != "" { 953 text += subpath 954 } 955 956 if hash != "" { 957 text += " (" + hash + ")" 958 } 959 replaceContent(node, start, end, createCodeLink(urlFull, text, "commit")) 960 node = node.NextSibling.NextSibling 961 } 962 } 963 964 func comparePatternProcessor(ctx *RenderContext, node *html.Node) { 965 if ctx.Metas == nil { 966 return 967 } 968 969 next := node.NextSibling 970 for node != nil && node != next { 971 m := comparePattern.FindStringSubmatchIndex(node.Data) 972 if m == nil { 973 return 974 } 975 976 // Ensure that every group (m[0]...m[7]) has a match 977 for i := 0; i < 8; i++ { 978 if m[i] == -1 { 979 return 980 } 981 } 982 983 urlFull := node.Data[m[0]:m[1]] 984 text1 := base.ShortSha(node.Data[m[2]:m[3]]) 985 textDots := base.ShortSha(node.Data[m[4]:m[5]]) 986 text2 := base.ShortSha(node.Data[m[6]:m[7]]) 987 988 hash := "" 989 if m[9] > 0 { 990 hash = node.Data[m[8]:m[9]][1:] 991 } 992 993 start := m[0] 994 end := m[1] 995 996 // If url ends in '.', it's very likely that it is not part of the 997 // actual url but used to finish a sentence. 998 if strings.HasSuffix(urlFull, ".") { 999 end-- 1000 urlFull = urlFull[:len(urlFull)-1] 1001 if hash != "" { 1002 hash = hash[:len(hash)-1] 1003 } else if text2 != "" { 1004 text2 = text2[:len(text2)-1] 1005 } 1006 } 1007 1008 text := text1 + textDots + text2 1009 if hash != "" { 1010 text += " (" + hash + ")" 1011 } 1012 replaceContent(node, start, end, createCodeLink(urlFull, text, "compare")) 1013 node = node.NextSibling.NextSibling 1014 } 1015 } 1016 1017 // emojiShortCodeProcessor for rendering text like :smile: into emoji 1018 func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) { 1019 start := 0 1020 next := node.NextSibling 1021 for node != nil && node != next && start < len(node.Data) { 1022 m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:]) 1023 if m == nil { 1024 return 1025 } 1026 m[0] += start 1027 m[1] += start 1028 1029 start = m[1] 1030 1031 alias := node.Data[m[0]:m[1]] 1032 alias = strings.ReplaceAll(alias, ":", "") 1033 converted := emoji.FromAlias(alias) 1034 if converted == nil { 1035 // check if this is a custom reaction 1036 if _, exist := setting.UI.CustomEmojisMap[alias]; exist { 1037 replaceContent(node, m[0], m[1], createCustomEmoji(alias)) 1038 node = node.NextSibling.NextSibling 1039 start = 0 1040 continue 1041 } 1042 continue 1043 } 1044 1045 replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description)) 1046 node = node.NextSibling.NextSibling 1047 start = 0 1048 } 1049 } 1050 1051 // emoji processor to match emoji and add emoji class 1052 func emojiProcessor(ctx *RenderContext, node *html.Node) { 1053 start := 0 1054 next := node.NextSibling 1055 for node != nil && node != next && start < len(node.Data) { 1056 m := emoji.FindEmojiSubmatchIndex(node.Data[start:]) 1057 if m == nil { 1058 return 1059 } 1060 m[0] += start 1061 m[1] += start 1062 1063 codepoint := node.Data[m[0]:m[1]] 1064 start = m[1] 1065 val := emoji.FromCode(codepoint) 1066 if val != nil { 1067 replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description)) 1068 node = node.NextSibling.NextSibling 1069 start = 0 1070 } 1071 } 1072 } 1073 1074 // sha1CurrentPatternProcessor renders SHA1 strings to corresponding links that 1075 // are assumed to be in the same repository. 1076 func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) { 1077 if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || ctx.Metas["repoPath"] == "" { 1078 return 1079 } 1080 1081 start := 0 1082 next := node.NextSibling 1083 if ctx.ShaExistCache == nil { 1084 ctx.ShaExistCache = make(map[string]bool) 1085 } 1086 for node != nil && node != next && start < len(node.Data) { 1087 m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data[start:]) 1088 if m == nil { 1089 return 1090 } 1091 m[2] += start 1092 m[3] += start 1093 1094 hash := node.Data[m[2]:m[3]] 1095 // The regex does not lie, it matches the hash pattern. 1096 // However, a regex cannot know if a hash actually exists or not. 1097 // We could assume that a SHA1 hash should probably contain alphas AND numerics 1098 // but that is not always the case. 1099 // Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash 1100 // as used by git and github for linking and thus we have to do similar. 1101 // Because of this, we check to make sure that a matched hash is actually 1102 // a commit in the repository before making it a link. 1103 1104 // check cache first 1105 exist, inCache := ctx.ShaExistCache[hash] 1106 if !inCache { 1107 if ctx.GitRepo == nil { 1108 var err error 1109 ctx.GitRepo, err = git.OpenRepository(ctx.Ctx, ctx.Metas["repoPath"]) 1110 if err != nil { 1111 log.Error("unable to open repository: %s Error: %v", ctx.Metas["repoPath"], err) 1112 return 1113 } 1114 ctx.AddCancel(func() { 1115 ctx.GitRepo.Close() 1116 ctx.GitRepo = nil 1117 }) 1118 } 1119 1120 exist = ctx.GitRepo.IsObjectExist(hash) 1121 ctx.ShaExistCache[hash] = exist 1122 } 1123 1124 if !exist { 1125 start = m[3] 1126 continue 1127 } 1128 1129 link := util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash) 1130 replaceContent(node, m[2], m[3], createCodeLink(link, base.ShortSha(hash), "commit")) 1131 start = 0 1132 node = node.NextSibling.NextSibling 1133 } 1134 } 1135 1136 // emailAddressProcessor replaces raw email addresses with a mailto: link. 1137 func emailAddressProcessor(ctx *RenderContext, node *html.Node) { 1138 next := node.NextSibling 1139 for node != nil && node != next { 1140 m := emailRegex.FindStringSubmatchIndex(node.Data) 1141 if m == nil { 1142 return 1143 } 1144 1145 mail := node.Data[m[2]:m[3]] 1146 replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto")) 1147 node = node.NextSibling.NextSibling 1148 } 1149 } 1150 1151 // linkProcessor creates links for any HTTP or HTTPS URL not captured by 1152 // markdown. 1153 func linkProcessor(ctx *RenderContext, node *html.Node) { 1154 next := node.NextSibling 1155 for node != nil && node != next { 1156 m := common.LinkRegex.FindStringIndex(node.Data) 1157 if m == nil { 1158 return 1159 } 1160 1161 uri := node.Data[m[0]:m[1]] 1162 replaceContent(node, m[0], m[1], createLink(uri, uri, "link")) 1163 node = node.NextSibling.NextSibling 1164 } 1165 } 1166 1167 func genDefaultLinkProcessor(defaultLink string) processor { 1168 return func(ctx *RenderContext, node *html.Node) { 1169 ch := &html.Node{ 1170 Parent: node, 1171 Type: html.TextNode, 1172 Data: node.Data, 1173 } 1174 1175 node.Type = html.ElementNode 1176 node.Data = "a" 1177 node.DataAtom = atom.A 1178 node.Attr = []html.Attribute{ 1179 {Key: "href", Val: defaultLink}, 1180 {Key: "class", Val: "default-link"}, 1181 } 1182 node.FirstChild, node.LastChild = ch, ch 1183 } 1184 } 1185 1186 // descriptionLinkProcessor creates links for DescriptionHTML 1187 func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) { 1188 next := node.NextSibling 1189 for node != nil && node != next { 1190 m := common.LinkRegex.FindStringIndex(node.Data) 1191 if m == nil { 1192 return 1193 } 1194 1195 uri := node.Data[m[0]:m[1]] 1196 replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri)) 1197 node = node.NextSibling.NextSibling 1198 } 1199 } 1200 1201 func createDescriptionLink(href, content string) *html.Node { 1202 textNode := &html.Node{ 1203 Type: html.TextNode, 1204 Data: content, 1205 } 1206 linkNode := &html.Node{ 1207 FirstChild: textNode, 1208 LastChild: textNode, 1209 Type: html.ElementNode, 1210 Data: "a", 1211 DataAtom: atom.A, 1212 Attr: []html.Attribute{ 1213 {Key: "href", Val: href}, 1214 {Key: "target", Val: "_blank"}, 1215 {Key: "rel", Val: "noopener noreferrer"}, 1216 }, 1217 } 1218 textNode.Parent = linkNode 1219 return linkNode 1220 }