code.gitea.io/gitea@v1.19.3/modules/markup/html.go (about) 1 // Copyright 2017 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package markup 5 6 import ( 7 "bytes" 8 "io" 9 "net/url" 10 "path" 11 "path/filepath" 12 "regexp" 13 "strings" 14 "sync" 15 16 "code.gitea.io/gitea/modules/base" 17 "code.gitea.io/gitea/modules/emoji" 18 "code.gitea.io/gitea/modules/git" 19 "code.gitea.io/gitea/modules/log" 20 "code.gitea.io/gitea/modules/markup/common" 21 "code.gitea.io/gitea/modules/references" 22 "code.gitea.io/gitea/modules/regexplru" 23 "code.gitea.io/gitea/modules/setting" 24 "code.gitea.io/gitea/modules/templates/vars" 25 "code.gitea.io/gitea/modules/util" 26 27 "golang.org/x/net/html" 28 "golang.org/x/net/html/atom" 29 "mvdan.cc/xurls/v2" 30 ) 31 32 // Issue name styles 33 const ( 34 IssueNameStyleNumeric = "numeric" 35 IssueNameStyleAlphanumeric = "alphanumeric" 36 IssueNameStyleRegexp = "regexp" 37 ) 38 39 var ( 40 // NOTE: All below regex matching do not perform any extra validation. 41 // Thus a link is produced even if the linked entity does not exist. 42 // While fast, this is also incorrect and lead to false positives. 43 // TODO: fix invalid linking issue 44 45 // valid chars in encoded path and parameter: [-+~_%.a-zA-Z0-9/] 46 47 // sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae 48 // Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length 49 // so that abbreviated hash links can be used as well. This matches git and GitHub usability. 50 sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-f]{7,40})(?:\s|$|\)|\]|[.,](\s|$))`) 51 52 // shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax 53 shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`) 54 55 // anySHA1Pattern splits url containing SHA into parts 56 anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`) 57 58 // comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash" 59 comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,40})(\.\.\.?)([0-9a-f]{7,40})?(#[-+~_%.a-zA-Z0-9]+)?`) 60 61 validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`) 62 63 // While this email regex is definitely not perfect and I'm sure you can come up 64 // with edge cases, it is still accepted by the CommonMark specification, as 65 // well as the HTML5 spec: 66 // http://spec.commonmark.org/0.28/#email-address 67 // https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail) 68 emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|\\.(\\s|$))") 69 70 // blackfriday extensions create IDs like fn:user-content-footnote 71 blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`) 72 73 // EmojiShortCodeRegex find emoji by alias like :smile: 74 EmojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`) 75 ) 76 77 // CSS class for action keywords (e.g. "closes: #1") 78 const keywordClass = "issue-keyword" 79 80 // IsLink reports whether link fits valid format. 81 func IsLink(link []byte) bool { 82 return isLink(link) 83 } 84 85 // isLink reports whether link fits valid format. 86 func isLink(link []byte) bool { 87 return validLinksPattern.Match(link) 88 } 89 90 func isLinkStr(link string) bool { 91 return validLinksPattern.MatchString(link) 92 } 93 94 // regexp for full links to issues/pulls 95 var issueFullPattern *regexp.Regexp 96 97 // Once for to prevent races 98 var issueFullPatternOnce sync.Once 99 100 func getIssueFullPattern() *regexp.Regexp { 101 issueFullPatternOnce.Do(func() { 102 issueFullPattern = regexp.MustCompile(regexp.QuoteMeta(setting.AppURL) + 103 `[\w_.-]+/[\w_.-]+/(?:issues|pulls)/((?:\w{1,10}-)?[1-9][0-9]*)([\?|#](\S+)?)?\b`) 104 }) 105 return issueFullPattern 106 } 107 108 // CustomLinkURLSchemes allows for additional schemes to be detected when parsing links within text 109 func CustomLinkURLSchemes(schemes []string) { 110 schemes = append(schemes, "http", "https") 111 withAuth := make([]string, 0, len(schemes)) 112 validScheme := regexp.MustCompile(`^[a-z]+$`) 113 for _, s := range schemes { 114 if !validScheme.MatchString(s) { 115 continue 116 } 117 without := false 118 for _, sna := range xurls.SchemesNoAuthority { 119 if s == sna { 120 without = true 121 break 122 } 123 } 124 if without { 125 s += ":" 126 } else { 127 s += "://" 128 } 129 withAuth = append(withAuth, s) 130 } 131 common.LinkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|")) 132 } 133 134 // IsSameDomain checks if given url string has the same hostname as current Gitea instance 135 func IsSameDomain(s string) bool { 136 if strings.HasPrefix(s, "/") { 137 return true 138 } 139 if uapp, err := url.Parse(setting.AppURL); err == nil { 140 if u, err := url.Parse(s); err == nil { 141 return u.Host == uapp.Host 142 } 143 return false 144 } 145 return false 146 } 147 148 type postProcessError struct { 149 context string 150 err error 151 } 152 153 func (p *postProcessError) Error() string { 154 return "PostProcess: " + p.context + ", " + p.err.Error() 155 } 156 157 type processor func(ctx *RenderContext, node *html.Node) 158 159 var defaultProcessors = []processor{ 160 fullIssuePatternProcessor, 161 comparePatternProcessor, 162 fullSha1PatternProcessor, 163 shortLinkProcessor, 164 linkProcessor, 165 mentionProcessor, 166 issueIndexPatternProcessor, 167 commitCrossReferencePatternProcessor, 168 sha1CurrentPatternProcessor, 169 emailAddressProcessor, 170 emojiProcessor, 171 emojiShortCodeProcessor, 172 } 173 174 // PostProcess does the final required transformations to the passed raw HTML 175 // data, and ensures its validity. Transformations include: replacing links and 176 // emails with HTML links, parsing shortlinks in the format of [[Link]], like 177 // MediaWiki, linking issues in the format #ID, and mentions in the format 178 // @user, and others. 179 func PostProcess( 180 ctx *RenderContext, 181 input io.Reader, 182 output io.Writer, 183 ) error { 184 return postProcess(ctx, defaultProcessors, input, output) 185 } 186 187 var commitMessageProcessors = []processor{ 188 fullIssuePatternProcessor, 189 comparePatternProcessor, 190 fullSha1PatternProcessor, 191 linkProcessor, 192 mentionProcessor, 193 issueIndexPatternProcessor, 194 commitCrossReferencePatternProcessor, 195 sha1CurrentPatternProcessor, 196 emailAddressProcessor, 197 emojiProcessor, 198 emojiShortCodeProcessor, 199 } 200 201 // RenderCommitMessage will use the same logic as PostProcess, but will disable 202 // the shortLinkProcessor and will add a defaultLinkProcessor if defaultLink is 203 // set, which changes every text node into a link to the passed default link. 204 func RenderCommitMessage( 205 ctx *RenderContext, 206 content string, 207 ) (string, error) { 208 procs := commitMessageProcessors 209 if ctx.DefaultLink != "" { 210 // we don't have to fear data races, because being 211 // commitMessageProcessors of fixed len and cap, every time we append 212 // something to it the slice is realloc+copied, so append always 213 // generates the slice ex-novo. 214 procs = append(procs, genDefaultLinkProcessor(ctx.DefaultLink)) 215 } 216 return renderProcessString(ctx, procs, content) 217 } 218 219 var commitMessageSubjectProcessors = []processor{ 220 fullIssuePatternProcessor, 221 comparePatternProcessor, 222 fullSha1PatternProcessor, 223 linkProcessor, 224 mentionProcessor, 225 issueIndexPatternProcessor, 226 commitCrossReferencePatternProcessor, 227 sha1CurrentPatternProcessor, 228 emojiShortCodeProcessor, 229 emojiProcessor, 230 } 231 232 var emojiProcessors = []processor{ 233 emojiShortCodeProcessor, 234 emojiProcessor, 235 } 236 237 // RenderCommitMessageSubject will use the same logic as PostProcess and 238 // RenderCommitMessage, but will disable the shortLinkProcessor and 239 // emailAddressProcessor, will add a defaultLinkProcessor if defaultLink is set, 240 // which changes every text node into a link to the passed default link. 241 func RenderCommitMessageSubject( 242 ctx *RenderContext, 243 content string, 244 ) (string, error) { 245 procs := commitMessageSubjectProcessors 246 if ctx.DefaultLink != "" { 247 // we don't have to fear data races, because being 248 // commitMessageSubjectProcessors of fixed len and cap, every time we 249 // append something to it the slice is realloc+copied, so append always 250 // generates the slice ex-novo. 251 procs = append(procs, genDefaultLinkProcessor(ctx.DefaultLink)) 252 } 253 return renderProcessString(ctx, procs, content) 254 } 255 256 // RenderIssueTitle to process title on individual issue/pull page 257 func RenderIssueTitle( 258 ctx *RenderContext, 259 title string, 260 ) (string, error) { 261 return renderProcessString(ctx, []processor{ 262 issueIndexPatternProcessor, 263 commitCrossReferencePatternProcessor, 264 sha1CurrentPatternProcessor, 265 emojiShortCodeProcessor, 266 emojiProcessor, 267 }, title) 268 } 269 270 func renderProcessString(ctx *RenderContext, procs []processor, content string) (string, error) { 271 var buf strings.Builder 272 if err := postProcess(ctx, procs, strings.NewReader(content), &buf); err != nil { 273 return "", err 274 } 275 return buf.String(), nil 276 } 277 278 // RenderDescriptionHTML will use similar logic as PostProcess, but will 279 // use a single special linkProcessor. 280 func RenderDescriptionHTML( 281 ctx *RenderContext, 282 content string, 283 ) (string, error) { 284 return renderProcessString(ctx, []processor{ 285 descriptionLinkProcessor, 286 emojiShortCodeProcessor, 287 emojiProcessor, 288 }, content) 289 } 290 291 // RenderEmoji for when we want to just process emoji and shortcodes 292 // in various places it isn't already run through the normal markdown processor 293 func RenderEmoji( 294 ctx *RenderContext, 295 content string, 296 ) (string, error) { 297 return renderProcessString(ctx, emojiProcessors, content) 298 } 299 300 var ( 301 tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL]\b)|(/?[hH][eE][aA][dD]\b))`) 302 nulCleaner = strings.NewReplacer("\000", "") 303 ) 304 305 func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output io.Writer) error { 306 defer ctx.Cancel() 307 // FIXME: don't read all content to memory 308 rawHTML, err := io.ReadAll(input) 309 if err != nil { 310 return err 311 } 312 313 // parse the HTML 314 node, err := html.Parse(io.MultiReader( 315 // prepend "<html><body>" 316 strings.NewReader("<html><body>"), 317 // Strip out nuls - they're always invalid 318 bytes.NewReader(tagCleaner.ReplaceAll([]byte(nulCleaner.Replace(string(rawHTML))), []byte("<$1"))), 319 // close the tags 320 strings.NewReader("</body></html>"), 321 )) 322 if err != nil { 323 return &postProcessError{"invalid HTML", err} 324 } 325 326 if node.Type == html.DocumentNode { 327 node = node.FirstChild 328 } 329 330 visitNode(ctx, procs, procs, node) 331 332 newNodes := make([]*html.Node, 0, 5) 333 334 if node.Data == "html" { 335 node = node.FirstChild 336 for node != nil && node.Data != "body" { 337 node = node.NextSibling 338 } 339 } 340 if node != nil { 341 if node.Data == "body" { 342 child := node.FirstChild 343 for child != nil { 344 newNodes = append(newNodes, child) 345 child = child.NextSibling 346 } 347 } else { 348 newNodes = append(newNodes, node) 349 } 350 } 351 352 // Render everything to buf. 353 for _, node := range newNodes { 354 if err := html.Render(output, node); err != nil { 355 return &postProcessError{"error rendering processed HTML", err} 356 } 357 } 358 return nil 359 } 360 361 func visitNode(ctx *RenderContext, procs, textProcs []processor, node *html.Node) { 362 // Add user-content- to IDs and "#" links if they don't already have them 363 for idx, attr := range node.Attr { 364 val := strings.TrimPrefix(attr.Val, "#") 365 notHasPrefix := !(strings.HasPrefix(val, "user-content-") || blackfridayExtRegex.MatchString(val)) 366 367 if attr.Key == "id" && notHasPrefix { 368 node.Attr[idx].Val = "user-content-" + attr.Val 369 } 370 371 if attr.Key == "href" && strings.HasPrefix(attr.Val, "#") && notHasPrefix { 372 node.Attr[idx].Val = "#user-content-" + val 373 } 374 375 if attr.Key == "class" && attr.Val == "emoji" { 376 textProcs = nil 377 } 378 } 379 380 // We ignore code and pre. 381 switch node.Type { 382 case html.TextNode: 383 textNode(ctx, textProcs, node) 384 case html.ElementNode: 385 if node.Data == "img" { 386 for i, attr := range node.Attr { 387 if attr.Key != "src" { 388 continue 389 } 390 if len(attr.Val) > 0 && !isLinkStr(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") { 391 prefix := ctx.URLPrefix 392 if ctx.IsWiki { 393 prefix = util.URLJoin(prefix, "wiki", "raw") 394 } 395 prefix = strings.Replace(prefix, "/src/", "/media/", 1) 396 397 attr.Val = util.URLJoin(prefix, attr.Val) 398 } 399 attr.Val = camoHandleLink(attr.Val) 400 node.Attr[i] = attr 401 } 402 } else if node.Data == "a" { 403 // Restrict text in links to emojis 404 textProcs = emojiProcessors 405 } else if node.Data == "code" || node.Data == "pre" { 406 return 407 } else if node.Data == "i" { 408 for _, attr := range node.Attr { 409 if attr.Key != "class" { 410 continue 411 } 412 classes := strings.Split(attr.Val, " ") 413 for i, class := range classes { 414 if class == "icon" { 415 classes[0], classes[i] = classes[i], classes[0] 416 attr.Val = strings.Join(classes, " ") 417 418 // Remove all children of icons 419 child := node.FirstChild 420 for child != nil { 421 node.RemoveChild(child) 422 child = node.FirstChild 423 } 424 break 425 } 426 } 427 } 428 } 429 for n := node.FirstChild; n != nil; n = n.NextSibling { 430 visitNode(ctx, procs, textProcs, n) 431 } 432 } 433 // ignore everything else 434 } 435 436 // textNode runs the passed node through various processors, in order to handle 437 // all kinds of special links handled by the post-processing. 438 func textNode(ctx *RenderContext, procs []processor, node *html.Node) { 439 for _, processor := range procs { 440 processor(ctx, node) 441 } 442 } 443 444 // createKeyword() renders a highlighted version of an action keyword 445 func createKeyword(content string) *html.Node { 446 span := &html.Node{ 447 Type: html.ElementNode, 448 Data: atom.Span.String(), 449 Attr: []html.Attribute{}, 450 } 451 span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: keywordClass}) 452 453 text := &html.Node{ 454 Type: html.TextNode, 455 Data: content, 456 } 457 span.AppendChild(text) 458 459 return span 460 } 461 462 func createEmoji(content, class, name string) *html.Node { 463 span := &html.Node{ 464 Type: html.ElementNode, 465 Data: atom.Span.String(), 466 Attr: []html.Attribute{}, 467 } 468 if class != "" { 469 span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: class}) 470 } 471 if name != "" { 472 span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: name}) 473 } 474 475 text := &html.Node{ 476 Type: html.TextNode, 477 Data: content, 478 } 479 480 span.AppendChild(text) 481 return span 482 } 483 484 func createCustomEmoji(alias string) *html.Node { 485 span := &html.Node{ 486 Type: html.ElementNode, 487 Data: atom.Span.String(), 488 Attr: []html.Attribute{}, 489 } 490 span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: "emoji"}) 491 span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: alias}) 492 493 img := &html.Node{ 494 Type: html.ElementNode, 495 DataAtom: atom.Img, 496 Data: "img", 497 Attr: []html.Attribute{}, 498 } 499 img.Attr = append(img.Attr, html.Attribute{Key: "alt", Val: ":" + alias + ":"}) 500 img.Attr = append(img.Attr, html.Attribute{Key: "src", Val: setting.StaticURLPrefix + "/assets/img/emoji/" + alias + ".png"}) 501 502 span.AppendChild(img) 503 return span 504 } 505 506 func createLink(href, content, class string) *html.Node { 507 a := &html.Node{ 508 Type: html.ElementNode, 509 Data: atom.A.String(), 510 Attr: []html.Attribute{{Key: "href", Val: href}}, 511 } 512 513 if class != "" { 514 a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class}) 515 } 516 517 text := &html.Node{ 518 Type: html.TextNode, 519 Data: content, 520 } 521 522 a.AppendChild(text) 523 return a 524 } 525 526 func createCodeLink(href, content, class string) *html.Node { 527 a := &html.Node{ 528 Type: html.ElementNode, 529 Data: atom.A.String(), 530 Attr: []html.Attribute{{Key: "href", Val: href}}, 531 } 532 533 if class != "" { 534 a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class}) 535 } 536 537 text := &html.Node{ 538 Type: html.TextNode, 539 Data: content, 540 } 541 542 code := &html.Node{ 543 Type: html.ElementNode, 544 Data: atom.Code.String(), 545 Attr: []html.Attribute{{Key: "class", Val: "nohighlight"}}, 546 } 547 548 code.AppendChild(text) 549 a.AppendChild(code) 550 return a 551 } 552 553 // replaceContent takes text node, and in its content it replaces a section of 554 // it with the specified newNode. 555 func replaceContent(node *html.Node, i, j int, newNode *html.Node) { 556 replaceContentList(node, i, j, []*html.Node{newNode}) 557 } 558 559 // replaceContentList takes text node, and in its content it replaces a section of 560 // it with the specified newNodes. An example to visualize how this can work can 561 // be found here: https://play.golang.org/p/5zP8NnHZ03s 562 func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) { 563 // get the data before and after the match 564 before := node.Data[:i] 565 after := node.Data[j:] 566 567 // Replace in the current node the text, so that it is only what it is 568 // supposed to have. 569 node.Data = before 570 571 // Get the current next sibling, before which we place the replaced data, 572 // and after that we place the new text node. 573 nextSibling := node.NextSibling 574 for _, n := range newNodes { 575 node.Parent.InsertBefore(n, nextSibling) 576 } 577 if after != "" { 578 node.Parent.InsertBefore(&html.Node{ 579 Type: html.TextNode, 580 Data: after, 581 }, nextSibling) 582 } 583 } 584 585 func mentionProcessor(ctx *RenderContext, node *html.Node) { 586 start := 0 587 next := node.NextSibling 588 for node != nil && node != next && start < len(node.Data) { 589 // We replace only the first mention; other mentions will be addressed later 590 found, loc := references.FindFirstMentionBytes([]byte(node.Data[start:])) 591 if !found { 592 return 593 } 594 loc.Start += start 595 loc.End += start 596 mention := node.Data[loc.Start:loc.End] 597 var teams string 598 teams, ok := ctx.Metas["teams"] 599 // FIXME: util.URLJoin may not be necessary here: 600 // - setting.AppURL is defined to have a terminal '/' so unless mention[1:] 601 // is an AppSubURL link we can probably fallback to concatenation. 602 // team mention should follow @orgName/teamName style 603 if ok && strings.Contains(mention, "/") { 604 mentionOrgAndTeam := strings.Split(mention, "/") 605 if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") { 606 replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention")) 607 node = node.NextSibling.NextSibling 608 start = 0 609 continue 610 } 611 start = loc.End 612 continue 613 } 614 mentionedUsername := mention[1:] 615 616 if processorHelper.IsUsernameMentionable != nil && processorHelper.IsUsernameMentionable(ctx.Ctx, mentionedUsername) { 617 replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, mentionedUsername), mention, "mention")) 618 node = node.NextSibling.NextSibling 619 } else { 620 node = node.NextSibling 621 } 622 start = 0 623 } 624 } 625 626 func shortLinkProcessor(ctx *RenderContext, node *html.Node) { 627 shortLinkProcessorFull(ctx, node, false) 628 } 629 630 func shortLinkProcessorFull(ctx *RenderContext, node *html.Node, noLink bool) { 631 next := node.NextSibling 632 for node != nil && node != next { 633 m := shortLinkPattern.FindStringSubmatchIndex(node.Data) 634 if m == nil { 635 return 636 } 637 638 content := node.Data[m[2]:m[3]] 639 tail := node.Data[m[4]:m[5]] 640 props := make(map[string]string) 641 642 // MediaWiki uses [[link|text]], while GitHub uses [[text|link]] 643 // It makes page handling terrible, but we prefer GitHub syntax 644 // And fall back to MediaWiki only when it is obvious from the look 645 // Of text and link contents 646 sl := strings.Split(content, "|") 647 for _, v := range sl { 648 if equalPos := strings.IndexByte(v, '='); equalPos == -1 { 649 // There is no equal in this argument; this is a mandatory arg 650 if props["name"] == "" { 651 if isLinkStr(v) { 652 // If we clearly see it is a link, we save it so 653 654 // But first we need to ensure, that if both mandatory args provided 655 // look like links, we stick to GitHub syntax 656 if props["link"] != "" { 657 props["name"] = props["link"] 658 } 659 660 props["link"] = strings.TrimSpace(v) 661 } else { 662 props["name"] = v 663 } 664 } else { 665 props["link"] = strings.TrimSpace(v) 666 } 667 } else { 668 // There is an equal; optional argument. 669 670 sep := strings.IndexByte(v, '=') 671 key, val := v[:sep], html.UnescapeString(v[sep+1:]) 672 673 // When parsing HTML, x/net/html will change all quotes which are 674 // not used for syntax into UTF-8 quotes. So checking val[0] won't 675 // be enough, since that only checks a single byte. 676 if len(val) > 1 { 677 if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) || 678 (strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) { 679 const lenQuote = len("‘") 680 val = val[lenQuote : len(val)-lenQuote] 681 } else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) || 682 (strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) { 683 val = val[1 : len(val)-1] 684 } else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") { 685 const lenQuote = len("‘") 686 val = val[1 : len(val)-lenQuote] 687 } 688 } 689 props[key] = val 690 } 691 } 692 693 var name, link string 694 if props["link"] != "" { 695 link = props["link"] 696 } else if props["name"] != "" { 697 link = props["name"] 698 } 699 if props["title"] != "" { 700 name = props["title"] 701 } else if props["name"] != "" { 702 name = props["name"] 703 } else { 704 name = link 705 } 706 707 name += tail 708 image := false 709 switch ext := filepath.Ext(link); ext { 710 // fast path: empty string, ignore 711 case "": 712 // leave image as false 713 case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg": 714 image = true 715 } 716 717 childNode := &html.Node{} 718 linkNode := &html.Node{ 719 FirstChild: childNode, 720 LastChild: childNode, 721 Type: html.ElementNode, 722 Data: "a", 723 DataAtom: atom.A, 724 } 725 childNode.Parent = linkNode 726 absoluteLink := isLinkStr(link) 727 if !absoluteLink { 728 if image { 729 link = strings.ReplaceAll(link, " ", "+") 730 } else { 731 link = strings.ReplaceAll(link, " ", "-") 732 } 733 if !strings.Contains(link, "/") { 734 link = url.PathEscape(link) 735 } 736 } 737 urlPrefix := ctx.URLPrefix 738 if image { 739 if !absoluteLink { 740 if IsSameDomain(urlPrefix) { 741 urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1) 742 } 743 if ctx.IsWiki { 744 link = util.URLJoin("wiki", "raw", link) 745 } 746 link = util.URLJoin(urlPrefix, link) 747 } 748 title := props["title"] 749 if title == "" { 750 title = props["alt"] 751 } 752 if title == "" { 753 title = path.Base(name) 754 } 755 alt := props["alt"] 756 if alt == "" { 757 alt = name 758 } 759 760 // make the childNode an image - if we can, we also place the alt 761 childNode.Type = html.ElementNode 762 childNode.Data = "img" 763 childNode.DataAtom = atom.Img 764 childNode.Attr = []html.Attribute{ 765 {Key: "src", Val: link}, 766 {Key: "title", Val: title}, 767 {Key: "alt", Val: alt}, 768 } 769 if alt == "" { 770 childNode.Attr = childNode.Attr[:2] 771 } 772 } else { 773 if !absoluteLink { 774 if ctx.IsWiki { 775 link = util.URLJoin("wiki", link) 776 } 777 link = util.URLJoin(urlPrefix, link) 778 } 779 childNode.Type = html.TextNode 780 childNode.Data = name 781 } 782 if noLink { 783 linkNode = childNode 784 } else { 785 linkNode.Attr = []html.Attribute{{Key: "href", Val: link}} 786 } 787 replaceContent(node, m[0], m[1], linkNode) 788 node = node.NextSibling.NextSibling 789 } 790 } 791 792 func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) { 793 if ctx.Metas == nil { 794 return 795 } 796 797 next := node.NextSibling 798 for node != nil && node != next { 799 m := getIssueFullPattern().FindStringSubmatchIndex(node.Data) 800 if m == nil { 801 return 802 } 803 link := node.Data[m[0]:m[1]] 804 id := "#" + node.Data[m[2]:m[3]] 805 806 // extract repo and org name from matched link like 807 // http://localhost:3000/gituser/myrepo/issues/1 808 linkParts := strings.Split(link, "/") 809 matchOrg := linkParts[len(linkParts)-4] 810 matchRepo := linkParts[len(linkParts)-3] 811 812 if matchOrg == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] { 813 // TODO if m[4]:m[5] is not nil, then link is to a comment, 814 // and we should indicate that in the text somehow 815 replaceContent(node, m[0], m[1], createLink(link, id, "ref-issue")) 816 } else { 817 orgRepoID := matchOrg + "/" + matchRepo + id 818 replaceContent(node, m[0], m[1], createLink(link, orgRepoID, "ref-issue")) 819 } 820 node = node.NextSibling.NextSibling 821 } 822 } 823 824 func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) { 825 if ctx.Metas == nil { 826 return 827 } 828 var ( 829 found bool 830 ref *references.RenderizableReference 831 ) 832 833 next := node.NextSibling 834 835 for node != nil && node != next { 836 _, hasExtTrackFormat := ctx.Metas["format"] 837 838 // Repos with external issue trackers might still need to reference local PRs 839 // We need to concern with the first one that shows up in the text, whichever it is 840 isNumericStyle := ctx.Metas["style"] == "" || ctx.Metas["style"] == IssueNameStyleNumeric 841 foundNumeric, refNumeric := references.FindRenderizableReferenceNumeric(node.Data, hasExtTrackFormat && !isNumericStyle) 842 843 switch ctx.Metas["style"] { 844 case "", IssueNameStyleNumeric: 845 found, ref = foundNumeric, refNumeric 846 case IssueNameStyleAlphanumeric: 847 found, ref = references.FindRenderizableReferenceAlphanumeric(node.Data) 848 case IssueNameStyleRegexp: 849 pattern, err := regexplru.GetCompiled(ctx.Metas["regexp"]) 850 if err != nil { 851 return 852 } 853 found, ref = references.FindRenderizableReferenceRegexp(node.Data, pattern) 854 } 855 856 // Repos with external issue trackers might still need to reference local PRs 857 // We need to concern with the first one that shows up in the text, whichever it is 858 if hasExtTrackFormat && !isNumericStyle && refNumeric != nil { 859 // If numeric (PR) was found, and it was BEFORE the non-numeric pattern, use that 860 // Allow a free-pass when non-numeric pattern wasn't found. 861 if found && (ref == nil || refNumeric.RefLocation.Start < ref.RefLocation.Start) { 862 found = foundNumeric 863 ref = refNumeric 864 } 865 } 866 if !found { 867 return 868 } 869 870 var link *html.Node 871 reftext := node.Data[ref.RefLocation.Start:ref.RefLocation.End] 872 if hasExtTrackFormat && !ref.IsPull { 873 ctx.Metas["index"] = ref.Issue 874 875 res, err := vars.Expand(ctx.Metas["format"], ctx.Metas) 876 if err != nil { 877 // here we could just log the error and continue the rendering 878 log.Error("unable to expand template vars for ref %s, err: %v", ref.Issue, err) 879 } 880 881 link = createLink(res, reftext, "ref-issue ref-external-issue") 882 } else { 883 // Path determines the type of link that will be rendered. It's unknown at this point whether 884 // the linked item is actually a PR or an issue. Luckily it's of no real consequence because 885 // Gitea will redirect on click as appropriate. 886 path := "issues" 887 if ref.IsPull { 888 path = "pulls" 889 } 890 if ref.Owner == "" { 891 link = createLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], path, ref.Issue), reftext, "ref-issue") 892 } else { 893 link = createLink(util.URLJoin(setting.AppURL, ref.Owner, ref.Name, path, ref.Issue), reftext, "ref-issue") 894 } 895 } 896 897 if ref.Action == references.XRefActionNone { 898 replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link) 899 node = node.NextSibling.NextSibling 900 continue 901 } 902 903 // Decorate action keywords if actionable 904 var keyword *html.Node 905 if references.IsXrefActionable(ref, hasExtTrackFormat) { 906 keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End]) 907 } else { 908 keyword = &html.Node{ 909 Type: html.TextNode, 910 Data: node.Data[ref.ActionLocation.Start:ref.ActionLocation.End], 911 } 912 } 913 spaces := &html.Node{ 914 Type: html.TextNode, 915 Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start], 916 } 917 replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link}) 918 node = node.NextSibling.NextSibling.NextSibling.NextSibling 919 } 920 } 921 922 func commitCrossReferencePatternProcessor(ctx *RenderContext, node *html.Node) { 923 next := node.NextSibling 924 925 for node != nil && node != next { 926 found, ref := references.FindRenderizableCommitCrossReference(node.Data) 927 if !found { 928 return 929 } 930 931 reftext := ref.Owner + "/" + ref.Name + "@" + base.ShortSha(ref.CommitSha) 932 link := createLink(util.URLJoin(setting.AppSubURL, ref.Owner, ref.Name, "commit", ref.CommitSha), reftext, "commit") 933 934 replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link) 935 node = node.NextSibling.NextSibling 936 } 937 } 938 939 // fullSha1PatternProcessor renders SHA containing URLs 940 func fullSha1PatternProcessor(ctx *RenderContext, node *html.Node) { 941 if ctx.Metas == nil { 942 return 943 } 944 945 next := node.NextSibling 946 for node != nil && node != next { 947 m := anySHA1Pattern.FindStringSubmatchIndex(node.Data) 948 if m == nil { 949 return 950 } 951 952 urlFull := node.Data[m[0]:m[1]] 953 text := base.ShortSha(node.Data[m[2]:m[3]]) 954 955 // 3rd capture group matches a optional path 956 subpath := "" 957 if m[5] > 0 { 958 subpath = node.Data[m[4]:m[5]] 959 } 960 961 // 4th capture group matches a optional url hash 962 hash := "" 963 if m[7] > 0 { 964 hash = node.Data[m[6]:m[7]][1:] 965 } 966 967 start := m[0] 968 end := m[1] 969 970 // If url ends in '.', it's very likely that it is not part of the 971 // actual url but used to finish a sentence. 972 if strings.HasSuffix(urlFull, ".") { 973 end-- 974 urlFull = urlFull[:len(urlFull)-1] 975 if hash != "" { 976 hash = hash[:len(hash)-1] 977 } else if subpath != "" { 978 subpath = subpath[:len(subpath)-1] 979 } 980 } 981 982 if subpath != "" { 983 text += subpath 984 } 985 986 if hash != "" { 987 text += " (" + hash + ")" 988 } 989 replaceContent(node, start, end, createCodeLink(urlFull, text, "commit")) 990 node = node.NextSibling.NextSibling 991 } 992 } 993 994 func comparePatternProcessor(ctx *RenderContext, node *html.Node) { 995 if ctx.Metas == nil { 996 return 997 } 998 999 next := node.NextSibling 1000 for node != nil && node != next { 1001 m := comparePattern.FindStringSubmatchIndex(node.Data) 1002 if m == nil { 1003 return 1004 } 1005 1006 // Ensure that every group (m[0]...m[7]) has a match 1007 for i := 0; i < 8; i++ { 1008 if m[i] == -1 { 1009 return 1010 } 1011 } 1012 1013 urlFull := node.Data[m[0]:m[1]] 1014 text1 := base.ShortSha(node.Data[m[2]:m[3]]) 1015 textDots := base.ShortSha(node.Data[m[4]:m[5]]) 1016 text2 := base.ShortSha(node.Data[m[6]:m[7]]) 1017 1018 hash := "" 1019 if m[9] > 0 { 1020 hash = node.Data[m[8]:m[9]][1:] 1021 } 1022 1023 start := m[0] 1024 end := m[1] 1025 1026 // If url ends in '.', it's very likely that it is not part of the 1027 // actual url but used to finish a sentence. 1028 if strings.HasSuffix(urlFull, ".") { 1029 end-- 1030 urlFull = urlFull[:len(urlFull)-1] 1031 if hash != "" { 1032 hash = hash[:len(hash)-1] 1033 } else if text2 != "" { 1034 text2 = text2[:len(text2)-1] 1035 } 1036 } 1037 1038 text := text1 + textDots + text2 1039 if hash != "" { 1040 text += " (" + hash + ")" 1041 } 1042 replaceContent(node, start, end, createCodeLink(urlFull, text, "compare")) 1043 node = node.NextSibling.NextSibling 1044 } 1045 } 1046 1047 // emojiShortCodeProcessor for rendering text like :smile: into emoji 1048 func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) { 1049 start := 0 1050 next := node.NextSibling 1051 for node != nil && node != next && start < len(node.Data) { 1052 m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:]) 1053 if m == nil { 1054 return 1055 } 1056 m[0] += start 1057 m[1] += start 1058 1059 start = m[1] 1060 1061 alias := node.Data[m[0]:m[1]] 1062 alias = strings.ReplaceAll(alias, ":", "") 1063 converted := emoji.FromAlias(alias) 1064 if converted == nil { 1065 // check if this is a custom reaction 1066 if _, exist := setting.UI.CustomEmojisMap[alias]; exist { 1067 replaceContent(node, m[0], m[1], createCustomEmoji(alias)) 1068 node = node.NextSibling.NextSibling 1069 start = 0 1070 continue 1071 } 1072 continue 1073 } 1074 1075 replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description)) 1076 node = node.NextSibling.NextSibling 1077 start = 0 1078 } 1079 } 1080 1081 // emoji processor to match emoji and add emoji class 1082 func emojiProcessor(ctx *RenderContext, node *html.Node) { 1083 start := 0 1084 next := node.NextSibling 1085 for node != nil && node != next && start < len(node.Data) { 1086 m := emoji.FindEmojiSubmatchIndex(node.Data[start:]) 1087 if m == nil { 1088 return 1089 } 1090 m[0] += start 1091 m[1] += start 1092 1093 codepoint := node.Data[m[0]:m[1]] 1094 start = m[1] 1095 val := emoji.FromCode(codepoint) 1096 if val != nil { 1097 replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description)) 1098 node = node.NextSibling.NextSibling 1099 start = 0 1100 } 1101 } 1102 } 1103 1104 // sha1CurrentPatternProcessor renders SHA1 strings to corresponding links that 1105 // are assumed to be in the same repository. 1106 func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) { 1107 if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || ctx.Metas["repoPath"] == "" { 1108 return 1109 } 1110 1111 start := 0 1112 next := node.NextSibling 1113 if ctx.ShaExistCache == nil { 1114 ctx.ShaExistCache = make(map[string]bool) 1115 } 1116 for node != nil && node != next && start < len(node.Data) { 1117 m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data[start:]) 1118 if m == nil { 1119 return 1120 } 1121 m[2] += start 1122 m[3] += start 1123 1124 hash := node.Data[m[2]:m[3]] 1125 // The regex does not lie, it matches the hash pattern. 1126 // However, a regex cannot know if a hash actually exists or not. 1127 // We could assume that a SHA1 hash should probably contain alphas AND numerics 1128 // but that is not always the case. 1129 // Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash 1130 // as used by git and github for linking and thus we have to do similar. 1131 // Because of this, we check to make sure that a matched hash is actually 1132 // a commit in the repository before making it a link. 1133 1134 // check cache first 1135 exist, inCache := ctx.ShaExistCache[hash] 1136 if !inCache { 1137 if ctx.GitRepo == nil { 1138 var err error 1139 ctx.GitRepo, err = git.OpenRepository(ctx.Ctx, ctx.Metas["repoPath"]) 1140 if err != nil { 1141 log.Error("unable to open repository: %s Error: %v", ctx.Metas["repoPath"], err) 1142 return 1143 } 1144 ctx.AddCancel(func() { 1145 ctx.GitRepo.Close() 1146 ctx.GitRepo = nil 1147 }) 1148 } 1149 1150 exist = ctx.GitRepo.IsObjectExist(hash) 1151 ctx.ShaExistCache[hash] = exist 1152 } 1153 1154 if !exist { 1155 start = m[3] 1156 continue 1157 } 1158 1159 link := util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash) 1160 replaceContent(node, m[2], m[3], createCodeLink(link, base.ShortSha(hash), "commit")) 1161 start = 0 1162 node = node.NextSibling.NextSibling 1163 } 1164 } 1165 1166 // emailAddressProcessor replaces raw email addresses with a mailto: link. 1167 func emailAddressProcessor(ctx *RenderContext, node *html.Node) { 1168 next := node.NextSibling 1169 for node != nil && node != next { 1170 m := emailRegex.FindStringSubmatchIndex(node.Data) 1171 if m == nil { 1172 return 1173 } 1174 1175 mail := node.Data[m[2]:m[3]] 1176 replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto")) 1177 node = node.NextSibling.NextSibling 1178 } 1179 } 1180 1181 // linkProcessor creates links for any HTTP or HTTPS URL not captured by 1182 // markdown. 1183 func linkProcessor(ctx *RenderContext, node *html.Node) { 1184 next := node.NextSibling 1185 for node != nil && node != next { 1186 m := common.LinkRegex.FindStringIndex(node.Data) 1187 if m == nil { 1188 return 1189 } 1190 1191 uri := node.Data[m[0]:m[1]] 1192 replaceContent(node, m[0], m[1], createLink(uri, uri, "link")) 1193 node = node.NextSibling.NextSibling 1194 } 1195 } 1196 1197 func genDefaultLinkProcessor(defaultLink string) processor { 1198 return func(ctx *RenderContext, node *html.Node) { 1199 ch := &html.Node{ 1200 Parent: node, 1201 Type: html.TextNode, 1202 Data: node.Data, 1203 } 1204 1205 node.Type = html.ElementNode 1206 node.Data = "a" 1207 node.DataAtom = atom.A 1208 node.Attr = []html.Attribute{ 1209 {Key: "href", Val: defaultLink}, 1210 {Key: "class", Val: "default-link muted"}, 1211 } 1212 node.FirstChild, node.LastChild = ch, ch 1213 } 1214 } 1215 1216 // descriptionLinkProcessor creates links for DescriptionHTML 1217 func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) { 1218 next := node.NextSibling 1219 for node != nil && node != next { 1220 m := common.LinkRegex.FindStringIndex(node.Data) 1221 if m == nil { 1222 return 1223 } 1224 1225 uri := node.Data[m[0]:m[1]] 1226 replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri)) 1227 node = node.NextSibling.NextSibling 1228 } 1229 } 1230 1231 func createDescriptionLink(href, content string) *html.Node { 1232 textNode := &html.Node{ 1233 Type: html.TextNode, 1234 Data: content, 1235 } 1236 linkNode := &html.Node{ 1237 FirstChild: textNode, 1238 LastChild: textNode, 1239 Type: html.ElementNode, 1240 Data: "a", 1241 DataAtom: atom.A, 1242 Attr: []html.Attribute{ 1243 {Key: "href", Val: href}, 1244 {Key: "target", Val: "_blank"}, 1245 {Key: "rel", Val: "noopener noreferrer"}, 1246 }, 1247 } 1248 textNode.Parent = linkNode 1249 return linkNode 1250 }