github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/references/references.go (about)

     1  // Copyright 2023 The GitBundle Inc. All rights reserved.
     2  // Copyright 2017 The Gitea Authors. All rights reserved.
     3  // Use of this source code is governed by a MIT-style
     4  // license that can be found in the LICENSE file.
     5  
     6  package references
     7  
     8  import (
     9  	"bytes"
    10  	"net/url"
    11  	"regexp"
    12  	"strconv"
    13  	"strings"
    14  	"sync"
    15  
    16  	"github.com/gitbundle/modules/log"
    17  	"github.com/gitbundle/modules/markup/mdstripper"
    18  	"github.com/gitbundle/modules/setting"
    19  
    20  	"github.com/yuin/goldmark/util"
    21  )
    22  
    23  var (
    24  	// validNamePattern performs only the most basic validation for user or repository names
    25  	// Repository name should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters.
    26  	validNamePattern = regexp.MustCompile(`^[a-z0-9_.-]+$`)
    27  
    28  	// NOTE: All below regex matching do not perform any extra validation.
    29  	// Thus a link is produced even if the linked entity does not exist.
    30  	// While fast, this is also incorrect and lead to false positives.
    31  	// TODO: fix invalid linking issue
    32  
    33  	// mentionPattern matches all mentions in the form of "@user" or "@org/team"
    34  	mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_]+\/?[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_][0-9a-zA-Z-_.]+\/?[0-9a-zA-Z-_.]+[0-9a-zA-Z-_])(?:\s|[:,;.?!]\s|[:,;.?!]?$|\)|\])`)
    35  	// issueNumericPattern matches string that references to a numeric issue, e.g. #1287
    36  	issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[|\')([#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
    37  	// issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
    38  	issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$))`)
    39  	// crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
    40  	// e.g. gogits/gogs#12345
    41  	crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+[#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
    42  	// spaceTrimmedPattern let's us find the trailing space
    43  	spaceTrimmedPattern = regexp.MustCompile(`(?:.*[0-9a-zA-Z-_])\s`)
    44  	// timeLogPattern matches string for time tracking
    45  	timeLogPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@([0-9]+([\.,][0-9]+)?(w|d|m|h))+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
    46  
    47  	issueCloseKeywordsPat, issueReopenKeywordsPat *regexp.Regexp
    48  	issueKeywordsOnce                             sync.Once
    49  
    50  	giteaHostInit         sync.Once
    51  	giteaHost             string
    52  	giteaIssuePullPattern *regexp.Regexp
    53  
    54  	actionStrings = []string{
    55  		"none",
    56  		"closes",
    57  		"reopens",
    58  		"neutered",
    59  	}
    60  )
    61  
    62  // XRefAction represents the kind of effect a cross reference has once is resolved
    63  type XRefAction int64
    64  
    65  const (
    66  	// XRefActionNone means the cross-reference is simply a comment
    67  	XRefActionNone XRefAction = iota // 0
    68  	// XRefActionCloses means the cross-reference should close an issue if it is resolved
    69  	XRefActionCloses // 1
    70  	// XRefActionReopens means the cross-reference should reopen an issue if it is resolved
    71  	XRefActionReopens // 2
    72  	// XRefActionNeutered means the cross-reference will no longer affect the source
    73  	XRefActionNeutered // 3
    74  )
    75  
    76  func (a XRefAction) String() string {
    77  	return actionStrings[a]
    78  }
    79  
    80  // IssueReference contains an unverified cross-reference to a local issue or pull request
    81  type IssueReference struct {
    82  	Index   int64
    83  	Owner   string
    84  	Name    string
    85  	Action  XRefAction
    86  	TimeLog string
    87  }
    88  
    89  // RenderizableReference contains an unverified cross-reference to with rendering information
    90  // The IsPull member means that a `!num` reference was used instead of `#num`.
    91  // This kind of reference is used to make pulls available when an external issue tracker
    92  // is used. Otherwise, `#` and `!` are completely interchangeable.
    93  type RenderizableReference struct {
    94  	Issue          string
    95  	Owner          string
    96  	Name           string
    97  	IsPull         bool
    98  	RefLocation    *RefSpan
    99  	Action         XRefAction
   100  	ActionLocation *RefSpan
   101  }
   102  
   103  type rawReference struct {
   104  	index          int64
   105  	owner          string
   106  	name           string
   107  	isPull         bool
   108  	action         XRefAction
   109  	issue          string
   110  	refLocation    *RefSpan
   111  	actionLocation *RefSpan
   112  	timeLog        string
   113  }
   114  
   115  func rawToIssueReferenceList(reflist []*rawReference) []IssueReference {
   116  	refarr := make([]IssueReference, len(reflist))
   117  	for i, r := range reflist {
   118  		refarr[i] = IssueReference{
   119  			Index:   r.index,
   120  			Owner:   r.owner,
   121  			Name:    r.name,
   122  			Action:  r.action,
   123  			TimeLog: r.timeLog,
   124  		}
   125  	}
   126  	return refarr
   127  }
   128  
   129  // RefSpan is the position where the reference was found within the parsed text
   130  type RefSpan struct {
   131  	Start int
   132  	End   int
   133  }
   134  
   135  func makeKeywordsPat(words []string) *regexp.Regexp {
   136  	acceptedWords := parseKeywords(words)
   137  	if len(acceptedWords) == 0 {
   138  		// Never match
   139  		return nil
   140  	}
   141  	return regexp.MustCompile(`(?i)(?:\s|^|\(|\[)(` + strings.Join(acceptedWords, `|`) + `):? $`)
   142  }
   143  
   144  func parseKeywords(words []string) []string {
   145  	acceptedWords := make([]string, 0, 5)
   146  	wordPat := regexp.MustCompile(`^[\pL]+$`)
   147  	for _, word := range words {
   148  		word = strings.ToLower(strings.TrimSpace(word))
   149  		// Accept Unicode letter class runes (a-z, á, à, ä, )
   150  		if wordPat.MatchString(word) {
   151  			acceptedWords = append(acceptedWords, word)
   152  		} else {
   153  			log.Info("Invalid keyword: %s", word)
   154  		}
   155  	}
   156  	return acceptedWords
   157  }
   158  
   159  func newKeywords() {
   160  	issueKeywordsOnce.Do(func() {
   161  		// Delay initialization until after the settings module is initialized
   162  		doNewKeywords(setting.Repository.PullRequest.CloseKeywords, setting.Repository.PullRequest.ReopenKeywords)
   163  	})
   164  }
   165  
   166  func doNewKeywords(close, reopen []string) {
   167  	issueCloseKeywordsPat = makeKeywordsPat(close)
   168  	issueReopenKeywordsPat = makeKeywordsPat(reopen)
   169  }
   170  
   171  // getGiteaHostName returns a normalized string with the local host name, with no scheme or port information
   172  func getGiteaHostName() string {
   173  	giteaHostInit.Do(func() {
   174  		if uapp, err := url.Parse(setting.AppURL); err == nil {
   175  			giteaHost = strings.ToLower(uapp.Host)
   176  			giteaIssuePullPattern = regexp.MustCompile(
   177  				`(\s|^|\(|\[)` +
   178  					regexp.QuoteMeta(strings.TrimSpace(setting.AppURL)) +
   179  					`([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+)/` +
   180  					`((?:issues)|(?:pulls))/([0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
   181  		} else {
   182  			giteaHost = ""
   183  			giteaIssuePullPattern = nil
   184  		}
   185  	})
   186  	return giteaHost
   187  }
   188  
   189  // getGiteaIssuePullPattern
   190  func getGiteaIssuePullPattern() *regexp.Regexp {
   191  	getGiteaHostName()
   192  	return giteaIssuePullPattern
   193  }
   194  
   195  // FindAllMentionsMarkdown matches mention patterns in given content and
   196  // returns a list of found unvalidated user names **not including** the @ prefix.
   197  func FindAllMentionsMarkdown(content string) []string {
   198  	bcontent, _ := mdstripper.StripMarkdownBytes([]byte(content))
   199  	locations := FindAllMentionsBytes(bcontent)
   200  	mentions := make([]string, len(locations))
   201  	for i, val := range locations {
   202  		mentions[i] = string(bcontent[val.Start+1 : val.End])
   203  	}
   204  	return mentions
   205  }
   206  
   207  // FindAllMentionsBytes matches mention patterns in given content
   208  // and returns a list of locations for the unvalidated user names, including the @ prefix.
   209  func FindAllMentionsBytes(content []byte) []RefSpan {
   210  	// Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and
   211  	// trailing spaces (\s@mention,\s), so if we get two consecutive references, the space
   212  	// from the second reference will be "eaten" by the first one:
   213  	// ...\s@mention1\s@mention2\s...	--> ...`\s@mention1\s`, (not) `@mention2,\s...`
   214  	ret := make([]RefSpan, 0, 5)
   215  	pos := 0
   216  	for {
   217  		match := mentionPattern.FindSubmatchIndex(content[pos:])
   218  		if match == nil {
   219  			break
   220  		}
   221  		ret = append(ret, RefSpan{Start: match[2] + pos, End: match[3] + pos})
   222  		notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
   223  		if notrail == nil {
   224  			pos = match[3] + pos
   225  		} else {
   226  			pos = match[3] + pos + notrail[1] - notrail[3]
   227  		}
   228  	}
   229  	return ret
   230  }
   231  
   232  // FindFirstMentionBytes matches the first mention in then given content
   233  // and returns the location of the unvalidated user name, including the @ prefix.
   234  func FindFirstMentionBytes(content []byte) (bool, RefSpan) {
   235  	mention := mentionPattern.FindSubmatchIndex(content)
   236  	if mention == nil {
   237  		return false, RefSpan{}
   238  	}
   239  	return true, RefSpan{Start: mention[2], End: mention[3]}
   240  }
   241  
   242  // FindAllIssueReferencesMarkdown strips content from markdown markup
   243  // and returns a list of unvalidated references found in it.
   244  func FindAllIssueReferencesMarkdown(content string) []IssueReference {
   245  	return rawToIssueReferenceList(findAllIssueReferencesMarkdown(content))
   246  }
   247  
   248  func findAllIssueReferencesMarkdown(content string) []*rawReference {
   249  	bcontent, links := mdstripper.StripMarkdownBytes([]byte(content))
   250  	return findAllIssueReferencesBytes(bcontent, links)
   251  }
   252  
   253  func convertFullHTMLReferencesToShortRefs(re *regexp.Regexp, contentBytes *[]byte) {
   254  	// We will iterate through the content, rewrite and simplify full references.
   255  	//
   256  	// We want to transform something like:
   257  	//
   258  	// this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo
   259  	// https://ourgitea.com/git/owner/repo/pulls/123456789
   260  	//
   261  	// Into something like:
   262  	//
   263  	// this is a #123456789, foo
   264  	// !123456789
   265  
   266  	pos := 0
   267  	for {
   268  		// re looks for something like: (\s|^|\(|\[)https://ourgitea.com/git/(owner/repo)/(issues)/(123456789)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)
   269  		match := re.FindSubmatchIndex((*contentBytes)[pos:])
   270  		if match == nil {
   271  			break
   272  		}
   273  		// match is a bunch of indices into the content from pos onwards so
   274  		// to simplify things let's just add pos to all of the indices in match
   275  		for i := range match {
   276  			match[i] += pos
   277  		}
   278  
   279  		// match[0]-match[1] is whole string
   280  		// match[2]-match[3] is preamble
   281  
   282  		// move the position to the end of the preamble
   283  		pos = match[3]
   284  
   285  		// match[4]-match[5] is owner/repo
   286  		// now copy the owner/repo to end of the preamble
   287  		endPos := pos + match[5] - match[4]
   288  		copy((*contentBytes)[pos:endPos], (*contentBytes)[match[4]:match[5]])
   289  
   290  		// move the current position to the end of the newly copied owner/repo
   291  		pos = endPos
   292  
   293  		// Now set the issue/pull marker:
   294  		//
   295  		// match[6]-match[7] == 'issues'
   296  		(*contentBytes)[pos] = '#'
   297  		if string((*contentBytes)[match[6]:match[7]]) == "pulls" {
   298  			(*contentBytes)[pos] = '!'
   299  		}
   300  		pos++
   301  
   302  		// Then add the issue/pull number
   303  		//
   304  		// match[8]-match[9] is the number
   305  		endPos = pos + match[9] - match[8]
   306  		copy((*contentBytes)[pos:endPos], (*contentBytes)[match[8]:match[9]])
   307  
   308  		// Now copy what's left at the end of the string to the new end position
   309  		copy((*contentBytes)[endPos:], (*contentBytes)[match[9]:])
   310  		// now we reset the length
   311  
   312  		// our new section has length endPos - match[3]
   313  		// our old section has length match[9] - match[3]
   314  		*contentBytes = (*contentBytes)[:len(*contentBytes)-match[9]+endPos]
   315  		pos = endPos
   316  	}
   317  }
   318  
   319  // FindAllIssueReferences returns a list of unvalidated references found in a string.
   320  func FindAllIssueReferences(content string) []IssueReference {
   321  	// Need to convert fully qualified html references to local system to #/! short codes
   322  	contentBytes := []byte(content)
   323  	if re := getGiteaIssuePullPattern(); re != nil {
   324  		convertFullHTMLReferencesToShortRefs(re, &contentBytes)
   325  	} else {
   326  		log.Debug("No GiteaIssuePullPattern pattern")
   327  	}
   328  	return rawToIssueReferenceList(findAllIssueReferencesBytes(contentBytes, []string{}))
   329  }
   330  
   331  // FindRenderizableReferenceNumeric returns the first unvalidated reference found in a string.
   332  func FindRenderizableReferenceNumeric(content string, prOnly bool) (bool, *RenderizableReference) {
   333  	match := issueNumericPattern.FindStringSubmatchIndex(content)
   334  	if match == nil {
   335  		if match = crossReferenceIssueNumericPattern.FindStringSubmatchIndex(content); match == nil {
   336  			return false, nil
   337  		}
   338  	}
   339  	r := getCrossReference(util.StringToReadOnlyBytes(content), match[2], match[3], false, prOnly)
   340  	if r == nil {
   341  		return false, nil
   342  	}
   343  
   344  	return true, &RenderizableReference{
   345  		Issue:          r.issue,
   346  		Owner:          r.owner,
   347  		Name:           r.name,
   348  		IsPull:         r.isPull,
   349  		RefLocation:    r.refLocation,
   350  		Action:         r.action,
   351  		ActionLocation: r.actionLocation,
   352  	}
   353  }
   354  
   355  // FindRenderizableReferenceRegexp returns the first regexp unvalidated references found in a string.
   356  func FindRenderizableReferenceRegexp(content string, pattern *regexp.Regexp) (bool, *RenderizableReference) {
   357  	match := pattern.FindStringSubmatchIndex(content)
   358  	if len(match) < 4 {
   359  		return false, nil
   360  	}
   361  
   362  	action, location := findActionKeywords([]byte(content), match[2])
   363  
   364  	return true, &RenderizableReference{
   365  		Issue:          content[match[2]:match[3]],
   366  		RefLocation:    &RefSpan{Start: match[0], End: match[1]},
   367  		Action:         action,
   368  		ActionLocation: location,
   369  		IsPull:         false,
   370  	}
   371  }
   372  
   373  // FindRenderizableReferenceAlphanumeric returns the first alphanumeric unvalidated references found in a string.
   374  func FindRenderizableReferenceAlphanumeric(content string) (bool, *RenderizableReference) {
   375  	match := issueAlphanumericPattern.FindStringSubmatchIndex(content)
   376  	if match == nil {
   377  		return false, nil
   378  	}
   379  
   380  	action, location := findActionKeywords([]byte(content), match[2])
   381  
   382  	return true, &RenderizableReference{
   383  		Issue:          string(content[match[2]:match[3]]),
   384  		RefLocation:    &RefSpan{Start: match[2], End: match[3]},
   385  		Action:         action,
   386  		ActionLocation: location,
   387  		IsPull:         false,
   388  	}
   389  }
   390  
   391  // FindAllIssueReferencesBytes returns a list of unvalidated references found in a byte slice.
   392  func findAllIssueReferencesBytes(content []byte, links []string) []*rawReference {
   393  	ret := make([]*rawReference, 0, 10)
   394  	pos := 0
   395  
   396  	// Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and
   397  	// trailing spaces (\s#ref,\s), so if we get two consecutive references, the space
   398  	// from the second reference will be "eaten" by the first one:
   399  	// ...\s#ref1\s#ref2\s...	--> ...`\s#ref1\s`, (not) `#ref2,\s...`
   400  	for {
   401  		match := issueNumericPattern.FindSubmatchIndex(content[pos:])
   402  		if match == nil {
   403  			break
   404  		}
   405  		if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {
   406  			ret = append(ret, ref)
   407  		}
   408  		notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
   409  		if notrail == nil {
   410  			pos = match[3] + pos
   411  		} else {
   412  			pos = match[3] + pos + notrail[1] - notrail[3]
   413  		}
   414  	}
   415  
   416  	pos = 0
   417  
   418  	for {
   419  		match := crossReferenceIssueNumericPattern.FindSubmatchIndex(content[pos:])
   420  		if match == nil {
   421  			break
   422  		}
   423  		if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {
   424  			ret = append(ret, ref)
   425  		}
   426  		notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
   427  		if notrail == nil {
   428  			pos = match[3] + pos
   429  		} else {
   430  			pos = match[3] + pos + notrail[1] - notrail[3]
   431  		}
   432  	}
   433  
   434  	localhost := getGiteaHostName()
   435  	for _, link := range links {
   436  		if u, err := url.Parse(link); err == nil {
   437  			// Note: we're not attempting to match the URL scheme (http/https)
   438  			host := strings.ToLower(u.Host)
   439  			if host != "" && host != localhost {
   440  				continue
   441  			}
   442  			parts := strings.Split(u.EscapedPath(), "/")
   443  			// /user/repo/issues/3
   444  			if len(parts) != 5 || parts[0] != "" {
   445  				continue
   446  			}
   447  			var sep string
   448  			if parts[3] == "issues" {
   449  				sep = "#"
   450  			} else if parts[3] == "pulls" {
   451  				sep = "!"
   452  			} else {
   453  				continue
   454  			}
   455  			// Note: closing/reopening keywords not supported with URLs
   456  			bytes := []byte(parts[1] + "/" + parts[2] + sep + parts[4])
   457  			if ref := getCrossReference(bytes, 0, len(bytes), true, false); ref != nil {
   458  				ref.refLocation = nil
   459  				ret = append(ret, ref)
   460  			}
   461  		}
   462  	}
   463  
   464  	if len(ret) == 0 {
   465  		return ret
   466  	}
   467  
   468  	pos = 0
   469  
   470  	for {
   471  		match := timeLogPattern.FindSubmatchIndex(content[pos:])
   472  		if match == nil {
   473  			break
   474  		}
   475  
   476  		timeLogEntry := string(content[match[2]+pos+1 : match[3]+pos])
   477  
   478  		var f *rawReference
   479  		for _, ref := range ret {
   480  			if ref.refLocation != nil && ref.refLocation.End < match[2]+pos && (f == nil || f.refLocation.End < ref.refLocation.End) {
   481  				f = ref
   482  			}
   483  		}
   484  
   485  		pos = match[1] + pos
   486  
   487  		if f == nil {
   488  			f = ret[0]
   489  		}
   490  
   491  		if len(f.timeLog) == 0 {
   492  			f.timeLog = timeLogEntry
   493  		}
   494  	}
   495  
   496  	return ret
   497  }
   498  
   499  func getCrossReference(content []byte, start, end int, fromLink, prOnly bool) *rawReference {
   500  	sep := bytes.IndexAny(content[start:end], "#!")
   501  	if sep < 0 {
   502  		return nil
   503  	}
   504  	isPull := content[start+sep] == '!'
   505  	if prOnly && !isPull {
   506  		return nil
   507  	}
   508  	repo := string(content[start : start+sep])
   509  	issue := string(content[start+sep+1 : end])
   510  	index, err := strconv.ParseInt(string(issue), 10, 64)
   511  	if err != nil {
   512  		return nil
   513  	}
   514  	if repo == "" {
   515  		if fromLink {
   516  			// Markdown links must specify owner/repo
   517  			return nil
   518  		}
   519  		action, location := findActionKeywords(content, start)
   520  		return &rawReference{
   521  			index:          index,
   522  			action:         action,
   523  			issue:          issue,
   524  			isPull:         isPull,
   525  			refLocation:    &RefSpan{Start: start, End: end},
   526  			actionLocation: location,
   527  		}
   528  	}
   529  	parts := strings.Split(strings.ToLower(repo), "/")
   530  	if len(parts) != 2 {
   531  		return nil
   532  	}
   533  	owner, name := parts[0], parts[1]
   534  	if !validNamePattern.MatchString(owner) || !validNamePattern.MatchString(name) {
   535  		return nil
   536  	}
   537  	action, location := findActionKeywords(content, start)
   538  	return &rawReference{
   539  		index:          index,
   540  		owner:          owner,
   541  		name:           name,
   542  		action:         action,
   543  		issue:          issue,
   544  		isPull:         isPull,
   545  		refLocation:    &RefSpan{Start: start, End: end},
   546  		actionLocation: location,
   547  	}
   548  }
   549  
   550  func findActionKeywords(content []byte, start int) (XRefAction, *RefSpan) {
   551  	newKeywords()
   552  	var m []int
   553  	if issueCloseKeywordsPat != nil {
   554  		m = issueCloseKeywordsPat.FindSubmatchIndex(content[:start])
   555  		if m != nil {
   556  			return XRefActionCloses, &RefSpan{Start: m[2], End: m[3]}
   557  		}
   558  	}
   559  	if issueReopenKeywordsPat != nil {
   560  		m = issueReopenKeywordsPat.FindSubmatchIndex(content[:start])
   561  		if m != nil {
   562  			return XRefActionReopens, &RefSpan{Start: m[2], End: m[3]}
   563  		}
   564  	}
   565  	return XRefActionNone, nil
   566  }
   567  
   568  // IsXrefActionable returns true if the xref action is actionable (i.e. produces a result when resolved)
   569  func IsXrefActionable(ref *RenderizableReference, extTracker bool) bool {
   570  	if extTracker {
   571  		// External issues cannot be automatically closed
   572  		return false
   573  	}
   574  	return ref.Action == XRefActionCloses || ref.Action == XRefActionReopens
   575  }