code.gitea.io/gitea@v1.19.3/modules/references/references.go (about) 1 // Copyright 2019 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package references 5 6 import ( 7 "bytes" 8 "net/url" 9 "regexp" 10 "strconv" 11 "strings" 12 "sync" 13 14 "code.gitea.io/gitea/modules/log" 15 "code.gitea.io/gitea/modules/markup/mdstripper" 16 "code.gitea.io/gitea/modules/setting" 17 18 "github.com/yuin/goldmark/util" 19 ) 20 21 var ( 22 // validNamePattern performs only the most basic validation for user or repository names 23 // Repository name should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters. 24 validNamePattern = regexp.MustCompile(`^[a-z0-9_.-]+$`) 25 26 // NOTE: All below regex matching do not perform any extra validation. 27 // Thus a link is produced even if the linked entity does not exist. 28 // While fast, this is also incorrect and lead to false positives. 29 // TODO: fix invalid linking issue 30 31 // mentionPattern matches all mentions in the form of "@user" or "@org/team" 32 mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_]+\/?[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_][0-9a-zA-Z-_.]+\/?[0-9a-zA-Z-_.]+[0-9a-zA-Z-_])(?:\s|[:,;.?!]\s|[:,;.?!]?$|\)|\])`) 33 // issueNumericPattern matches string that references to a numeric issue, e.g. #1287 34 issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[|\')([#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`) 35 // issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234 36 issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$))`) 37 // crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository 38 // e.g. org/repo#12345 39 crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+[#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`) 40 // crossReferenceCommitPattern matches a string that references a commit in a different repository 41 // e.g. go-gitea/gitea@d8a994ef, go-gitea/gitea@d8a994ef243349f321568f9e36d5c3f444b99cae (7-40 characters) 42 crossReferenceCommitPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+)/([0-9a-zA-Z-_\.]+)@([0-9a-f]{7,40})(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`) 43 // spaceTrimmedPattern let's find the trailing space 44 spaceTrimmedPattern = regexp.MustCompile(`(?:.*[0-9a-zA-Z-_])\s`) 45 // timeLogPattern matches string for time tracking 46 timeLogPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@([0-9]+([\.,][0-9]+)?(w|d|m|h))+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`) 47 48 issueCloseKeywordsPat, issueReopenKeywordsPat *regexp.Regexp 49 issueKeywordsOnce sync.Once 50 51 giteaHostInit sync.Once 52 giteaHost string 53 giteaIssuePullPattern *regexp.Regexp 54 55 actionStrings = []string{ 56 "none", 57 "closes", 58 "reopens", 59 "neutered", 60 } 61 ) 62 63 // XRefAction represents the kind of effect a cross reference has once is resolved 64 type XRefAction int64 65 66 const ( 67 // XRefActionNone means the cross-reference is simply a comment 68 XRefActionNone XRefAction = iota // 0 69 // XRefActionCloses means the cross-reference should close an issue if it is resolved 70 XRefActionCloses // 1 71 // XRefActionReopens means the cross-reference should reopen an issue if it is resolved 72 XRefActionReopens // 2 73 // XRefActionNeutered means the cross-reference will no longer affect the source 74 XRefActionNeutered // 3 75 ) 76 77 func (a XRefAction) String() string { 78 return actionStrings[a] 79 } 80 81 // IssueReference contains an unverified cross-reference to a local issue or pull request 82 type IssueReference struct { 83 Index int64 84 Owner string 85 Name string 86 Action XRefAction 87 TimeLog string 88 } 89 90 // RenderizableReference contains an unverified cross-reference to with rendering information 91 // The IsPull member means that a `!num` reference was used instead of `#num`. 92 // This kind of reference is used to make pulls available when an external issue tracker 93 // is used. Otherwise, `#` and `!` are completely interchangeable. 94 type RenderizableReference struct { 95 Issue string 96 Owner string 97 Name string 98 CommitSha string 99 IsPull bool 100 RefLocation *RefSpan 101 Action XRefAction 102 ActionLocation *RefSpan 103 } 104 105 type rawReference struct { 106 index int64 107 owner string 108 name string 109 isPull bool 110 action XRefAction 111 issue string 112 refLocation *RefSpan 113 actionLocation *RefSpan 114 timeLog string 115 } 116 117 func rawToIssueReferenceList(reflist []*rawReference) []IssueReference { 118 refarr := make([]IssueReference, len(reflist)) 119 for i, r := range reflist { 120 refarr[i] = IssueReference{ 121 Index: r.index, 122 Owner: r.owner, 123 Name: r.name, 124 Action: r.action, 125 TimeLog: r.timeLog, 126 } 127 } 128 return refarr 129 } 130 131 // RefSpan is the position where the reference was found within the parsed text 132 type RefSpan struct { 133 Start int 134 End int 135 } 136 137 func makeKeywordsPat(words []string) *regexp.Regexp { 138 acceptedWords := parseKeywords(words) 139 if len(acceptedWords) == 0 { 140 // Never match 141 return nil 142 } 143 return regexp.MustCompile(`(?i)(?:\s|^|\(|\[)(` + strings.Join(acceptedWords, `|`) + `):? $`) 144 } 145 146 func parseKeywords(words []string) []string { 147 acceptedWords := make([]string, 0, 5) 148 wordPat := regexp.MustCompile(`^[\pL]+$`) 149 for _, word := range words { 150 word = strings.ToLower(strings.TrimSpace(word)) 151 // Accept Unicode letter class runes (a-z, á, à, ä, ) 152 if wordPat.MatchString(word) { 153 acceptedWords = append(acceptedWords, word) 154 } else { 155 log.Info("Invalid keyword: %s", word) 156 } 157 } 158 return acceptedWords 159 } 160 161 func newKeywords() { 162 issueKeywordsOnce.Do(func() { 163 // Delay initialization until after the settings module is initialized 164 doNewKeywords(setting.Repository.PullRequest.CloseKeywords, setting.Repository.PullRequest.ReopenKeywords) 165 }) 166 } 167 168 func doNewKeywords(close, reopen []string) { 169 issueCloseKeywordsPat = makeKeywordsPat(close) 170 issueReopenKeywordsPat = makeKeywordsPat(reopen) 171 } 172 173 // getGiteaHostName returns a normalized string with the local host name, with no scheme or port information 174 func getGiteaHostName() string { 175 giteaHostInit.Do(func() { 176 if uapp, err := url.Parse(setting.AppURL); err == nil { 177 giteaHost = strings.ToLower(uapp.Host) 178 giteaIssuePullPattern = regexp.MustCompile( 179 `(\s|^|\(|\[)` + 180 regexp.QuoteMeta(strings.TrimSpace(setting.AppURL)) + 181 `([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+)/` + 182 `((?:issues)|(?:pulls))/([0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`) 183 } else { 184 giteaHost = "" 185 giteaIssuePullPattern = nil 186 } 187 }) 188 return giteaHost 189 } 190 191 // getGiteaIssuePullPattern 192 func getGiteaIssuePullPattern() *regexp.Regexp { 193 getGiteaHostName() 194 return giteaIssuePullPattern 195 } 196 197 // FindAllMentionsMarkdown matches mention patterns in given content and 198 // returns a list of found unvalidated user names **not including** the @ prefix. 199 func FindAllMentionsMarkdown(content string) []string { 200 bcontent, _ := mdstripper.StripMarkdownBytes([]byte(content)) 201 locations := FindAllMentionsBytes(bcontent) 202 mentions := make([]string, len(locations)) 203 for i, val := range locations { 204 mentions[i] = string(bcontent[val.Start+1 : val.End]) 205 } 206 return mentions 207 } 208 209 // FindAllMentionsBytes matches mention patterns in given content 210 // and returns a list of locations for the unvalidated user names, including the @ prefix. 211 func FindAllMentionsBytes(content []byte) []RefSpan { 212 // Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and 213 // trailing spaces (\s@mention,\s), so if we get two consecutive references, the space 214 // from the second reference will be "eaten" by the first one: 215 // ...\s@mention1\s@mention2\s... --> ...`\s@mention1\s`, (not) `@mention2,\s...` 216 ret := make([]RefSpan, 0, 5) 217 pos := 0 218 for { 219 match := mentionPattern.FindSubmatchIndex(content[pos:]) 220 if match == nil { 221 break 222 } 223 ret = append(ret, RefSpan{Start: match[2] + pos, End: match[3] + pos}) 224 notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos]) 225 if notrail == nil { 226 pos = match[3] + pos 227 } else { 228 pos = match[3] + pos + notrail[1] - notrail[3] 229 } 230 } 231 return ret 232 } 233 234 // FindFirstMentionBytes matches the first mention in then given content 235 // and returns the location of the unvalidated user name, including the @ prefix. 236 func FindFirstMentionBytes(content []byte) (bool, RefSpan) { 237 mention := mentionPattern.FindSubmatchIndex(content) 238 if mention == nil { 239 return false, RefSpan{} 240 } 241 return true, RefSpan{Start: mention[2], End: mention[3]} 242 } 243 244 // FindAllIssueReferencesMarkdown strips content from markdown markup 245 // and returns a list of unvalidated references found in it. 246 func FindAllIssueReferencesMarkdown(content string) []IssueReference { 247 return rawToIssueReferenceList(findAllIssueReferencesMarkdown(content)) 248 } 249 250 func findAllIssueReferencesMarkdown(content string) []*rawReference { 251 bcontent, links := mdstripper.StripMarkdownBytes([]byte(content)) 252 return findAllIssueReferencesBytes(bcontent, links) 253 } 254 255 func convertFullHTMLReferencesToShortRefs(re *regexp.Regexp, contentBytes *[]byte) { 256 // We will iterate through the content, rewrite and simplify full references. 257 // 258 // We want to transform something like: 259 // 260 // this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo 261 // https://ourgitea.com/git/owner/repo/pulls/123456789 262 // 263 // Into something like: 264 // 265 // this is a #123456789, foo 266 // !123456789 267 268 pos := 0 269 for { 270 // re looks for something like: (\s|^|\(|\[)https://ourgitea.com/git/(owner/repo)/(issues)/(123456789)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$) 271 match := re.FindSubmatchIndex((*contentBytes)[pos:]) 272 if match == nil { 273 break 274 } 275 // match is a bunch of indices into the content from pos onwards so 276 // to simplify things let's just add pos to all of the indices in match 277 for i := range match { 278 match[i] += pos 279 } 280 281 // match[0]-match[1] is whole string 282 // match[2]-match[3] is preamble 283 284 // move the position to the end of the preamble 285 pos = match[3] 286 287 // match[4]-match[5] is owner/repo 288 // now copy the owner/repo to end of the preamble 289 endPos := pos + match[5] - match[4] 290 copy((*contentBytes)[pos:endPos], (*contentBytes)[match[4]:match[5]]) 291 292 // move the current position to the end of the newly copied owner/repo 293 pos = endPos 294 295 // Now set the issue/pull marker: 296 // 297 // match[6]-match[7] == 'issues' 298 (*contentBytes)[pos] = '#' 299 if string((*contentBytes)[match[6]:match[7]]) == "pulls" { 300 (*contentBytes)[pos] = '!' 301 } 302 pos++ 303 304 // Then add the issue/pull number 305 // 306 // match[8]-match[9] is the number 307 endPos = pos + match[9] - match[8] 308 copy((*contentBytes)[pos:endPos], (*contentBytes)[match[8]:match[9]]) 309 310 // Now copy what's left at the end of the string to the new end position 311 copy((*contentBytes)[endPos:], (*contentBytes)[match[9]:]) 312 // now we reset the length 313 314 // our new section has length endPos - match[3] 315 // our old section has length match[9] - match[3] 316 *contentBytes = (*contentBytes)[:len(*contentBytes)-match[9]+endPos] 317 pos = endPos 318 } 319 } 320 321 // FindAllIssueReferences returns a list of unvalidated references found in a string. 322 func FindAllIssueReferences(content string) []IssueReference { 323 // Need to convert fully qualified html references to local system to #/! short codes 324 contentBytes := []byte(content) 325 if re := getGiteaIssuePullPattern(); re != nil { 326 convertFullHTMLReferencesToShortRefs(re, &contentBytes) 327 } else { 328 log.Debug("No GiteaIssuePullPattern pattern") 329 } 330 return rawToIssueReferenceList(findAllIssueReferencesBytes(contentBytes, []string{})) 331 } 332 333 // FindRenderizableReferenceNumeric returns the first unvalidated reference found in a string. 334 func FindRenderizableReferenceNumeric(content string, prOnly bool) (bool, *RenderizableReference) { 335 match := issueNumericPattern.FindStringSubmatchIndex(content) 336 if match == nil { 337 if match = crossReferenceIssueNumericPattern.FindStringSubmatchIndex(content); match == nil { 338 return false, nil 339 } 340 } 341 r := getCrossReference(util.StringToReadOnlyBytes(content), match[2], match[3], false, prOnly) 342 if r == nil { 343 return false, nil 344 } 345 346 return true, &RenderizableReference{ 347 Issue: r.issue, 348 Owner: r.owner, 349 Name: r.name, 350 IsPull: r.isPull, 351 RefLocation: r.refLocation, 352 Action: r.action, 353 ActionLocation: r.actionLocation, 354 } 355 } 356 357 // FindRenderizableCommitCrossReference returns the first unvalidated commit cross reference found in a string. 358 func FindRenderizableCommitCrossReference(content string) (bool, *RenderizableReference) { 359 m := crossReferenceCommitPattern.FindStringSubmatchIndex(content) 360 if len(m) < 8 { 361 return false, nil 362 } 363 364 return true, &RenderizableReference{ 365 Owner: content[m[2]:m[3]], 366 Name: content[m[4]:m[5]], 367 CommitSha: content[m[6]:m[7]], 368 RefLocation: &RefSpan{Start: m[2], End: m[7]}, 369 } 370 } 371 372 // FindRenderizableReferenceRegexp returns the first regexp unvalidated references found in a string. 373 func FindRenderizableReferenceRegexp(content string, pattern *regexp.Regexp) (bool, *RenderizableReference) { 374 match := pattern.FindStringSubmatchIndex(content) 375 if len(match) < 4 { 376 return false, nil 377 } 378 379 action, location := findActionKeywords([]byte(content), match[2]) 380 381 return true, &RenderizableReference{ 382 Issue: content[match[2]:match[3]], 383 RefLocation: &RefSpan{Start: match[0], End: match[1]}, 384 Action: action, 385 ActionLocation: location, 386 IsPull: false, 387 } 388 } 389 390 // FindRenderizableReferenceAlphanumeric returns the first alphanumeric unvalidated references found in a string. 391 func FindRenderizableReferenceAlphanumeric(content string) (bool, *RenderizableReference) { 392 match := issueAlphanumericPattern.FindStringSubmatchIndex(content) 393 if match == nil { 394 return false, nil 395 } 396 397 action, location := findActionKeywords([]byte(content), match[2]) 398 399 return true, &RenderizableReference{ 400 Issue: content[match[2]:match[3]], 401 RefLocation: &RefSpan{Start: match[2], End: match[3]}, 402 Action: action, 403 ActionLocation: location, 404 IsPull: false, 405 } 406 } 407 408 // FindAllIssueReferencesBytes returns a list of unvalidated references found in a byte slice. 409 func findAllIssueReferencesBytes(content []byte, links []string) []*rawReference { 410 ret := make([]*rawReference, 0, 10) 411 pos := 0 412 413 // Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and 414 // trailing spaces (\s#ref,\s), so if we get two consecutive references, the space 415 // from the second reference will be "eaten" by the first one: 416 // ...\s#ref1\s#ref2\s... --> ...`\s#ref1\s`, (not) `#ref2,\s...` 417 for { 418 match := issueNumericPattern.FindSubmatchIndex(content[pos:]) 419 if match == nil { 420 break 421 } 422 if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil { 423 ret = append(ret, ref) 424 } 425 notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos]) 426 if notrail == nil { 427 pos = match[3] + pos 428 } else { 429 pos = match[3] + pos + notrail[1] - notrail[3] 430 } 431 } 432 433 pos = 0 434 435 for { 436 match := crossReferenceIssueNumericPattern.FindSubmatchIndex(content[pos:]) 437 if match == nil { 438 break 439 } 440 if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil { 441 ret = append(ret, ref) 442 } 443 notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos]) 444 if notrail == nil { 445 pos = match[3] + pos 446 } else { 447 pos = match[3] + pos + notrail[1] - notrail[3] 448 } 449 } 450 451 localhost := getGiteaHostName() 452 for _, link := range links { 453 if u, err := url.Parse(link); err == nil { 454 // Note: we're not attempting to match the URL scheme (http/https) 455 host := strings.ToLower(u.Host) 456 if host != "" && host != localhost { 457 continue 458 } 459 parts := strings.Split(u.EscapedPath(), "/") 460 // /user/repo/issues/3 461 if len(parts) != 5 || parts[0] != "" { 462 continue 463 } 464 var sep string 465 if parts[3] == "issues" { 466 sep = "#" 467 } else if parts[3] == "pulls" { 468 sep = "!" 469 } else { 470 continue 471 } 472 // Note: closing/reopening keywords not supported with URLs 473 bytes := []byte(parts[1] + "/" + parts[2] + sep + parts[4]) 474 if ref := getCrossReference(bytes, 0, len(bytes), true, false); ref != nil { 475 ref.refLocation = nil 476 ret = append(ret, ref) 477 } 478 } 479 } 480 481 if len(ret) == 0 { 482 return ret 483 } 484 485 pos = 0 486 487 for { 488 match := timeLogPattern.FindSubmatchIndex(content[pos:]) 489 if match == nil { 490 break 491 } 492 493 timeLogEntry := string(content[match[2]+pos+1 : match[3]+pos]) 494 495 var f *rawReference 496 for _, ref := range ret { 497 if ref.refLocation != nil && ref.refLocation.End < match[2]+pos && (f == nil || f.refLocation.End < ref.refLocation.End) { 498 f = ref 499 } 500 } 501 502 pos = match[1] + pos 503 504 if f == nil { 505 f = ret[0] 506 } 507 508 if len(f.timeLog) == 0 { 509 f.timeLog = timeLogEntry 510 } 511 } 512 513 return ret 514 } 515 516 func getCrossReference(content []byte, start, end int, fromLink, prOnly bool) *rawReference { 517 sep := bytes.IndexAny(content[start:end], "#!") 518 if sep < 0 { 519 return nil 520 } 521 isPull := content[start+sep] == '!' 522 if prOnly && !isPull { 523 return nil 524 } 525 repo := string(content[start : start+sep]) 526 issue := string(content[start+sep+1 : end]) 527 index, err := strconv.ParseInt(issue, 10, 64) 528 if err != nil { 529 return nil 530 } 531 if repo == "" { 532 if fromLink { 533 // Markdown links must specify owner/repo 534 return nil 535 } 536 action, location := findActionKeywords(content, start) 537 return &rawReference{ 538 index: index, 539 action: action, 540 issue: issue, 541 isPull: isPull, 542 refLocation: &RefSpan{Start: start, End: end}, 543 actionLocation: location, 544 } 545 } 546 parts := strings.Split(strings.ToLower(repo), "/") 547 if len(parts) != 2 { 548 return nil 549 } 550 owner, name := parts[0], parts[1] 551 if !validNamePattern.MatchString(owner) || !validNamePattern.MatchString(name) { 552 return nil 553 } 554 action, location := findActionKeywords(content, start) 555 return &rawReference{ 556 index: index, 557 owner: owner, 558 name: name, 559 action: action, 560 issue: issue, 561 isPull: isPull, 562 refLocation: &RefSpan{Start: start, End: end}, 563 actionLocation: location, 564 } 565 } 566 567 func findActionKeywords(content []byte, start int) (XRefAction, *RefSpan) { 568 newKeywords() 569 var m []int 570 if issueCloseKeywordsPat != nil { 571 m = issueCloseKeywordsPat.FindSubmatchIndex(content[:start]) 572 if m != nil { 573 return XRefActionCloses, &RefSpan{Start: m[2], End: m[3]} 574 } 575 } 576 if issueReopenKeywordsPat != nil { 577 m = issueReopenKeywordsPat.FindSubmatchIndex(content[:start]) 578 if m != nil { 579 return XRefActionReopens, &RefSpan{Start: m[2], End: m[3]} 580 } 581 } 582 return XRefActionNone, nil 583 } 584 585 // IsXrefActionable returns true if the xref action is actionable (i.e. produces a result when resolved) 586 func IsXrefActionable(ref *RenderizableReference, extTracker bool) bool { 587 if extTracker { 588 // External issues cannot be automatically closed 589 return false 590 } 591 return ref.Action == XRefActionCloses || ref.Action == XRefActionReopens 592 }