github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/references/references.go (about) 1 // Copyright 2023 The GitBundle Inc. All rights reserved. 2 // Copyright 2017 The Gitea Authors. All rights reserved. 3 // Use of this source code is governed by a MIT-style 4 // license that can be found in the LICENSE file. 5 6 package references 7 8 import ( 9 "bytes" 10 "net/url" 11 "regexp" 12 "strconv" 13 "strings" 14 "sync" 15 16 "github.com/gitbundle/modules/log" 17 "github.com/gitbundle/modules/markup/mdstripper" 18 "github.com/gitbundle/modules/setting" 19 20 "github.com/yuin/goldmark/util" 21 ) 22 23 var ( 24 // validNamePattern performs only the most basic validation for user or repository names 25 // Repository name should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters. 26 validNamePattern = regexp.MustCompile(`^[a-z0-9_.-]+$`) 27 28 // NOTE: All below regex matching do not perform any extra validation. 29 // Thus a link is produced even if the linked entity does not exist. 30 // While fast, this is also incorrect and lead to false positives. 31 // TODO: fix invalid linking issue 32 33 // mentionPattern matches all mentions in the form of "@user" or "@org/team" 34 mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_]+\/?[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_][0-9a-zA-Z-_.]+\/?[0-9a-zA-Z-_.]+[0-9a-zA-Z-_])(?:\s|[:,;.?!]\s|[:,;.?!]?$|\)|\])`) 35 // issueNumericPattern matches string that references to a numeric issue, e.g. #1287 36 issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[|\')([#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`) 37 // issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234 38 issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$))`) 39 // crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository 40 // e.g. gogits/gogs#12345 41 crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+[#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`) 42 // spaceTrimmedPattern let's us find the trailing space 43 spaceTrimmedPattern = regexp.MustCompile(`(?:.*[0-9a-zA-Z-_])\s`) 44 // timeLogPattern matches string for time tracking 45 timeLogPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@([0-9]+([\.,][0-9]+)?(w|d|m|h))+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`) 46 47 issueCloseKeywordsPat, issueReopenKeywordsPat *regexp.Regexp 48 issueKeywordsOnce sync.Once 49 50 giteaHostInit sync.Once 51 giteaHost string 52 giteaIssuePullPattern *regexp.Regexp 53 54 actionStrings = []string{ 55 "none", 56 "closes", 57 "reopens", 58 "neutered", 59 } 60 ) 61 62 // XRefAction represents the kind of effect a cross reference has once is resolved 63 type XRefAction int64 64 65 const ( 66 // XRefActionNone means the cross-reference is simply a comment 67 XRefActionNone XRefAction = iota // 0 68 // XRefActionCloses means the cross-reference should close an issue if it is resolved 69 XRefActionCloses // 1 70 // XRefActionReopens means the cross-reference should reopen an issue if it is resolved 71 XRefActionReopens // 2 72 // XRefActionNeutered means the cross-reference will no longer affect the source 73 XRefActionNeutered // 3 74 ) 75 76 func (a XRefAction) String() string { 77 return actionStrings[a] 78 } 79 80 // IssueReference contains an unverified cross-reference to a local issue or pull request 81 type IssueReference struct { 82 Index int64 83 Owner string 84 Name string 85 Action XRefAction 86 TimeLog string 87 } 88 89 // RenderizableReference contains an unverified cross-reference to with rendering information 90 // The IsPull member means that a `!num` reference was used instead of `#num`. 91 // This kind of reference is used to make pulls available when an external issue tracker 92 // is used. Otherwise, `#` and `!` are completely interchangeable. 93 type RenderizableReference struct { 94 Issue string 95 Owner string 96 Name string 97 IsPull bool 98 RefLocation *RefSpan 99 Action XRefAction 100 ActionLocation *RefSpan 101 } 102 103 type rawReference struct { 104 index int64 105 owner string 106 name string 107 isPull bool 108 action XRefAction 109 issue string 110 refLocation *RefSpan 111 actionLocation *RefSpan 112 timeLog string 113 } 114 115 func rawToIssueReferenceList(reflist []*rawReference) []IssueReference { 116 refarr := make([]IssueReference, len(reflist)) 117 for i, r := range reflist { 118 refarr[i] = IssueReference{ 119 Index: r.index, 120 Owner: r.owner, 121 Name: r.name, 122 Action: r.action, 123 TimeLog: r.timeLog, 124 } 125 } 126 return refarr 127 } 128 129 // RefSpan is the position where the reference was found within the parsed text 130 type RefSpan struct { 131 Start int 132 End int 133 } 134 135 func makeKeywordsPat(words []string) *regexp.Regexp { 136 acceptedWords := parseKeywords(words) 137 if len(acceptedWords) == 0 { 138 // Never match 139 return nil 140 } 141 return regexp.MustCompile(`(?i)(?:\s|^|\(|\[)(` + strings.Join(acceptedWords, `|`) + `):? $`) 142 } 143 144 func parseKeywords(words []string) []string { 145 acceptedWords := make([]string, 0, 5) 146 wordPat := regexp.MustCompile(`^[\pL]+$`) 147 for _, word := range words { 148 word = strings.ToLower(strings.TrimSpace(word)) 149 // Accept Unicode letter class runes (a-z, á, à, ä, ) 150 if wordPat.MatchString(word) { 151 acceptedWords = append(acceptedWords, word) 152 } else { 153 log.Info("Invalid keyword: %s", word) 154 } 155 } 156 return acceptedWords 157 } 158 159 func newKeywords() { 160 issueKeywordsOnce.Do(func() { 161 // Delay initialization until after the settings module is initialized 162 doNewKeywords(setting.Repository.PullRequest.CloseKeywords, setting.Repository.PullRequest.ReopenKeywords) 163 }) 164 } 165 166 func doNewKeywords(close, reopen []string) { 167 issueCloseKeywordsPat = makeKeywordsPat(close) 168 issueReopenKeywordsPat = makeKeywordsPat(reopen) 169 } 170 171 // getGiteaHostName returns a normalized string with the local host name, with no scheme or port information 172 func getGiteaHostName() string { 173 giteaHostInit.Do(func() { 174 if uapp, err := url.Parse(setting.AppURL); err == nil { 175 giteaHost = strings.ToLower(uapp.Host) 176 giteaIssuePullPattern = regexp.MustCompile( 177 `(\s|^|\(|\[)` + 178 regexp.QuoteMeta(strings.TrimSpace(setting.AppURL)) + 179 `([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+)/` + 180 `((?:issues)|(?:pulls))/([0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`) 181 } else { 182 giteaHost = "" 183 giteaIssuePullPattern = nil 184 } 185 }) 186 return giteaHost 187 } 188 189 // getGiteaIssuePullPattern 190 func getGiteaIssuePullPattern() *regexp.Regexp { 191 getGiteaHostName() 192 return giteaIssuePullPattern 193 } 194 195 // FindAllMentionsMarkdown matches mention patterns in given content and 196 // returns a list of found unvalidated user names **not including** the @ prefix. 197 func FindAllMentionsMarkdown(content string) []string { 198 bcontent, _ := mdstripper.StripMarkdownBytes([]byte(content)) 199 locations := FindAllMentionsBytes(bcontent) 200 mentions := make([]string, len(locations)) 201 for i, val := range locations { 202 mentions[i] = string(bcontent[val.Start+1 : val.End]) 203 } 204 return mentions 205 } 206 207 // FindAllMentionsBytes matches mention patterns in given content 208 // and returns a list of locations for the unvalidated user names, including the @ prefix. 209 func FindAllMentionsBytes(content []byte) []RefSpan { 210 // Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and 211 // trailing spaces (\s@mention,\s), so if we get two consecutive references, the space 212 // from the second reference will be "eaten" by the first one: 213 // ...\s@mention1\s@mention2\s... --> ...`\s@mention1\s`, (not) `@mention2,\s...` 214 ret := make([]RefSpan, 0, 5) 215 pos := 0 216 for { 217 match := mentionPattern.FindSubmatchIndex(content[pos:]) 218 if match == nil { 219 break 220 } 221 ret = append(ret, RefSpan{Start: match[2] + pos, End: match[3] + pos}) 222 notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos]) 223 if notrail == nil { 224 pos = match[3] + pos 225 } else { 226 pos = match[3] + pos + notrail[1] - notrail[3] 227 } 228 } 229 return ret 230 } 231 232 // FindFirstMentionBytes matches the first mention in then given content 233 // and returns the location of the unvalidated user name, including the @ prefix. 234 func FindFirstMentionBytes(content []byte) (bool, RefSpan) { 235 mention := mentionPattern.FindSubmatchIndex(content) 236 if mention == nil { 237 return false, RefSpan{} 238 } 239 return true, RefSpan{Start: mention[2], End: mention[3]} 240 } 241 242 // FindAllIssueReferencesMarkdown strips content from markdown markup 243 // and returns a list of unvalidated references found in it. 244 func FindAllIssueReferencesMarkdown(content string) []IssueReference { 245 return rawToIssueReferenceList(findAllIssueReferencesMarkdown(content)) 246 } 247 248 func findAllIssueReferencesMarkdown(content string) []*rawReference { 249 bcontent, links := mdstripper.StripMarkdownBytes([]byte(content)) 250 return findAllIssueReferencesBytes(bcontent, links) 251 } 252 253 func convertFullHTMLReferencesToShortRefs(re *regexp.Regexp, contentBytes *[]byte) { 254 // We will iterate through the content, rewrite and simplify full references. 255 // 256 // We want to transform something like: 257 // 258 // this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo 259 // https://ourgitea.com/git/owner/repo/pulls/123456789 260 // 261 // Into something like: 262 // 263 // this is a #123456789, foo 264 // !123456789 265 266 pos := 0 267 for { 268 // re looks for something like: (\s|^|\(|\[)https://ourgitea.com/git/(owner/repo)/(issues)/(123456789)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$) 269 match := re.FindSubmatchIndex((*contentBytes)[pos:]) 270 if match == nil { 271 break 272 } 273 // match is a bunch of indices into the content from pos onwards so 274 // to simplify things let's just add pos to all of the indices in match 275 for i := range match { 276 match[i] += pos 277 } 278 279 // match[0]-match[1] is whole string 280 // match[2]-match[3] is preamble 281 282 // move the position to the end of the preamble 283 pos = match[3] 284 285 // match[4]-match[5] is owner/repo 286 // now copy the owner/repo to end of the preamble 287 endPos := pos + match[5] - match[4] 288 copy((*contentBytes)[pos:endPos], (*contentBytes)[match[4]:match[5]]) 289 290 // move the current position to the end of the newly copied owner/repo 291 pos = endPos 292 293 // Now set the issue/pull marker: 294 // 295 // match[6]-match[7] == 'issues' 296 (*contentBytes)[pos] = '#' 297 if string((*contentBytes)[match[6]:match[7]]) == "pulls" { 298 (*contentBytes)[pos] = '!' 299 } 300 pos++ 301 302 // Then add the issue/pull number 303 // 304 // match[8]-match[9] is the number 305 endPos = pos + match[9] - match[8] 306 copy((*contentBytes)[pos:endPos], (*contentBytes)[match[8]:match[9]]) 307 308 // Now copy what's left at the end of the string to the new end position 309 copy((*contentBytes)[endPos:], (*contentBytes)[match[9]:]) 310 // now we reset the length 311 312 // our new section has length endPos - match[3] 313 // our old section has length match[9] - match[3] 314 *contentBytes = (*contentBytes)[:len(*contentBytes)-match[9]+endPos] 315 pos = endPos 316 } 317 } 318 319 // FindAllIssueReferences returns a list of unvalidated references found in a string. 320 func FindAllIssueReferences(content string) []IssueReference { 321 // Need to convert fully qualified html references to local system to #/! short codes 322 contentBytes := []byte(content) 323 if re := getGiteaIssuePullPattern(); re != nil { 324 convertFullHTMLReferencesToShortRefs(re, &contentBytes) 325 } else { 326 log.Debug("No GiteaIssuePullPattern pattern") 327 } 328 return rawToIssueReferenceList(findAllIssueReferencesBytes(contentBytes, []string{})) 329 } 330 331 // FindRenderizableReferenceNumeric returns the first unvalidated reference found in a string. 332 func FindRenderizableReferenceNumeric(content string, prOnly bool) (bool, *RenderizableReference) { 333 match := issueNumericPattern.FindStringSubmatchIndex(content) 334 if match == nil { 335 if match = crossReferenceIssueNumericPattern.FindStringSubmatchIndex(content); match == nil { 336 return false, nil 337 } 338 } 339 r := getCrossReference(util.StringToReadOnlyBytes(content), match[2], match[3], false, prOnly) 340 if r == nil { 341 return false, nil 342 } 343 344 return true, &RenderizableReference{ 345 Issue: r.issue, 346 Owner: r.owner, 347 Name: r.name, 348 IsPull: r.isPull, 349 RefLocation: r.refLocation, 350 Action: r.action, 351 ActionLocation: r.actionLocation, 352 } 353 } 354 355 // FindRenderizableReferenceRegexp returns the first regexp unvalidated references found in a string. 356 func FindRenderizableReferenceRegexp(content string, pattern *regexp.Regexp) (bool, *RenderizableReference) { 357 match := pattern.FindStringSubmatchIndex(content) 358 if len(match) < 4 { 359 return false, nil 360 } 361 362 action, location := findActionKeywords([]byte(content), match[2]) 363 364 return true, &RenderizableReference{ 365 Issue: content[match[2]:match[3]], 366 RefLocation: &RefSpan{Start: match[0], End: match[1]}, 367 Action: action, 368 ActionLocation: location, 369 IsPull: false, 370 } 371 } 372 373 // FindRenderizableReferenceAlphanumeric returns the first alphanumeric unvalidated references found in a string. 374 func FindRenderizableReferenceAlphanumeric(content string) (bool, *RenderizableReference) { 375 match := issueAlphanumericPattern.FindStringSubmatchIndex(content) 376 if match == nil { 377 return false, nil 378 } 379 380 action, location := findActionKeywords([]byte(content), match[2]) 381 382 return true, &RenderizableReference{ 383 Issue: string(content[match[2]:match[3]]), 384 RefLocation: &RefSpan{Start: match[2], End: match[3]}, 385 Action: action, 386 ActionLocation: location, 387 IsPull: false, 388 } 389 } 390 391 // FindAllIssueReferencesBytes returns a list of unvalidated references found in a byte slice. 392 func findAllIssueReferencesBytes(content []byte, links []string) []*rawReference { 393 ret := make([]*rawReference, 0, 10) 394 pos := 0 395 396 // Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and 397 // trailing spaces (\s#ref,\s), so if we get two consecutive references, the space 398 // from the second reference will be "eaten" by the first one: 399 // ...\s#ref1\s#ref2\s... --> ...`\s#ref1\s`, (not) `#ref2,\s...` 400 for { 401 match := issueNumericPattern.FindSubmatchIndex(content[pos:]) 402 if match == nil { 403 break 404 } 405 if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil { 406 ret = append(ret, ref) 407 } 408 notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos]) 409 if notrail == nil { 410 pos = match[3] + pos 411 } else { 412 pos = match[3] + pos + notrail[1] - notrail[3] 413 } 414 } 415 416 pos = 0 417 418 for { 419 match := crossReferenceIssueNumericPattern.FindSubmatchIndex(content[pos:]) 420 if match == nil { 421 break 422 } 423 if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil { 424 ret = append(ret, ref) 425 } 426 notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos]) 427 if notrail == nil { 428 pos = match[3] + pos 429 } else { 430 pos = match[3] + pos + notrail[1] - notrail[3] 431 } 432 } 433 434 localhost := getGiteaHostName() 435 for _, link := range links { 436 if u, err := url.Parse(link); err == nil { 437 // Note: we're not attempting to match the URL scheme (http/https) 438 host := strings.ToLower(u.Host) 439 if host != "" && host != localhost { 440 continue 441 } 442 parts := strings.Split(u.EscapedPath(), "/") 443 // /user/repo/issues/3 444 if len(parts) != 5 || parts[0] != "" { 445 continue 446 } 447 var sep string 448 if parts[3] == "issues" { 449 sep = "#" 450 } else if parts[3] == "pulls" { 451 sep = "!" 452 } else { 453 continue 454 } 455 // Note: closing/reopening keywords not supported with URLs 456 bytes := []byte(parts[1] + "/" + parts[2] + sep + parts[4]) 457 if ref := getCrossReference(bytes, 0, len(bytes), true, false); ref != nil { 458 ref.refLocation = nil 459 ret = append(ret, ref) 460 } 461 } 462 } 463 464 if len(ret) == 0 { 465 return ret 466 } 467 468 pos = 0 469 470 for { 471 match := timeLogPattern.FindSubmatchIndex(content[pos:]) 472 if match == nil { 473 break 474 } 475 476 timeLogEntry := string(content[match[2]+pos+1 : match[3]+pos]) 477 478 var f *rawReference 479 for _, ref := range ret { 480 if ref.refLocation != nil && ref.refLocation.End < match[2]+pos && (f == nil || f.refLocation.End < ref.refLocation.End) { 481 f = ref 482 } 483 } 484 485 pos = match[1] + pos 486 487 if f == nil { 488 f = ret[0] 489 } 490 491 if len(f.timeLog) == 0 { 492 f.timeLog = timeLogEntry 493 } 494 } 495 496 return ret 497 } 498 499 func getCrossReference(content []byte, start, end int, fromLink, prOnly bool) *rawReference { 500 sep := bytes.IndexAny(content[start:end], "#!") 501 if sep < 0 { 502 return nil 503 } 504 isPull := content[start+sep] == '!' 505 if prOnly && !isPull { 506 return nil 507 } 508 repo := string(content[start : start+sep]) 509 issue := string(content[start+sep+1 : end]) 510 index, err := strconv.ParseInt(string(issue), 10, 64) 511 if err != nil { 512 return nil 513 } 514 if repo == "" { 515 if fromLink { 516 // Markdown links must specify owner/repo 517 return nil 518 } 519 action, location := findActionKeywords(content, start) 520 return &rawReference{ 521 index: index, 522 action: action, 523 issue: issue, 524 isPull: isPull, 525 refLocation: &RefSpan{Start: start, End: end}, 526 actionLocation: location, 527 } 528 } 529 parts := strings.Split(strings.ToLower(repo), "/") 530 if len(parts) != 2 { 531 return nil 532 } 533 owner, name := parts[0], parts[1] 534 if !validNamePattern.MatchString(owner) || !validNamePattern.MatchString(name) { 535 return nil 536 } 537 action, location := findActionKeywords(content, start) 538 return &rawReference{ 539 index: index, 540 owner: owner, 541 name: name, 542 action: action, 543 issue: issue, 544 isPull: isPull, 545 refLocation: &RefSpan{Start: start, End: end}, 546 actionLocation: location, 547 } 548 } 549 550 func findActionKeywords(content []byte, start int) (XRefAction, *RefSpan) { 551 newKeywords() 552 var m []int 553 if issueCloseKeywordsPat != nil { 554 m = issueCloseKeywordsPat.FindSubmatchIndex(content[:start]) 555 if m != nil { 556 return XRefActionCloses, &RefSpan{Start: m[2], End: m[3]} 557 } 558 } 559 if issueReopenKeywordsPat != nil { 560 m = issueReopenKeywordsPat.FindSubmatchIndex(content[:start]) 561 if m != nil { 562 return XRefActionReopens, &RefSpan{Start: m[2], End: m[3]} 563 } 564 } 565 return XRefActionNone, nil 566 } 567 568 // IsXrefActionable returns true if the xref action is actionable (i.e. produces a result when resolved) 569 func IsXrefActionable(ref *RenderizableReference, extTracker bool) bool { 570 if extTracker { 571 // External issues cannot be automatically closed 572 return false 573 } 574 return ref.Action == XRefActionCloses || ref.Action == XRefActionReopens 575 }