code.gitea.io/gitea@v1.22.3/services/gitdiff/gitdiff.go (about)

     1  // Copyright 2014 The Gogs Authors. All rights reserved.
     2  // Copyright 2019 The Gitea Authors. All rights reserved.
     3  // SPDX-License-Identifier: MIT
     4  
     5  package gitdiff
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"context"
    11  	"fmt"
    12  	"html"
    13  	"html/template"
    14  	"io"
    15  	"net/url"
    16  	"sort"
    17  	"strings"
    18  	"time"
    19  
    20  	"code.gitea.io/gitea/models/db"
    21  	git_model "code.gitea.io/gitea/models/git"
    22  	issues_model "code.gitea.io/gitea/models/issues"
    23  	pull_model "code.gitea.io/gitea/models/pull"
    24  	user_model "code.gitea.io/gitea/models/user"
    25  	"code.gitea.io/gitea/modules/analyze"
    26  	"code.gitea.io/gitea/modules/charset"
    27  	"code.gitea.io/gitea/modules/git"
    28  	"code.gitea.io/gitea/modules/highlight"
    29  	"code.gitea.io/gitea/modules/lfs"
    30  	"code.gitea.io/gitea/modules/log"
    31  	"code.gitea.io/gitea/modules/optional"
    32  	"code.gitea.io/gitea/modules/setting"
    33  	"code.gitea.io/gitea/modules/translation"
    34  
    35  	"github.com/sergi/go-diff/diffmatchpatch"
    36  	stdcharset "golang.org/x/net/html/charset"
    37  	"golang.org/x/text/encoding"
    38  	"golang.org/x/text/transform"
    39  )
    40  
    41  // DiffLineType represents the type of DiffLine.
    42  type DiffLineType uint8
    43  
    44  // DiffLineType possible values.
    45  const (
    46  	DiffLinePlain DiffLineType = iota + 1
    47  	DiffLineAdd
    48  	DiffLineDel
    49  	DiffLineSection
    50  )
    51  
    52  // DiffFileType represents the type of DiffFile.
    53  type DiffFileType uint8
    54  
    55  // DiffFileType possible values.
    56  const (
    57  	DiffFileAdd DiffFileType = iota + 1
    58  	DiffFileChange
    59  	DiffFileDel
    60  	DiffFileRename
    61  	DiffFileCopy
    62  )
    63  
    64  // DiffLineExpandDirection represents the DiffLineSection expand direction
    65  type DiffLineExpandDirection uint8
    66  
    67  // DiffLineExpandDirection possible values.
    68  const (
    69  	DiffLineExpandNone DiffLineExpandDirection = iota + 1
    70  	DiffLineExpandSingle
    71  	DiffLineExpandUpDown
    72  	DiffLineExpandUp
    73  	DiffLineExpandDown
    74  )
    75  
    76  // DiffLine represents a line difference in a DiffSection.
    77  type DiffLine struct {
    78  	LeftIdx     int
    79  	RightIdx    int
    80  	Match       int
    81  	Type        DiffLineType
    82  	Content     string
    83  	Comments    []*issues_model.Comment
    84  	SectionInfo *DiffLineSectionInfo
    85  }
    86  
    87  // DiffLineSectionInfo represents diff line section meta data
    88  type DiffLineSectionInfo struct {
    89  	Path          string
    90  	LastLeftIdx   int
    91  	LastRightIdx  int
    92  	LeftIdx       int
    93  	RightIdx      int
    94  	LeftHunkSize  int
    95  	RightHunkSize int
    96  }
    97  
    98  // BlobExcerptChunkSize represent max lines of excerpt
    99  const BlobExcerptChunkSize = 20
   100  
   101  // GetType returns the type of DiffLine.
   102  func (d *DiffLine) GetType() int {
   103  	return int(d.Type)
   104  }
   105  
   106  // GetHTMLDiffLineType returns the diff line type name for HTML
   107  func (d *DiffLine) GetHTMLDiffLineType() string {
   108  	switch d.Type {
   109  	case DiffLineAdd:
   110  		return "add"
   111  	case DiffLineDel:
   112  		return "del"
   113  	case DiffLineSection:
   114  		return "tag"
   115  	}
   116  	return "same"
   117  }
   118  
   119  // CanComment returns whether a line can get commented
   120  func (d *DiffLine) CanComment() bool {
   121  	return len(d.Comments) == 0 && d.Type != DiffLineSection
   122  }
   123  
   124  // GetCommentSide returns the comment side of the first comment, if not set returns empty string
   125  func (d *DiffLine) GetCommentSide() string {
   126  	if len(d.Comments) == 0 {
   127  		return ""
   128  	}
   129  	return d.Comments[0].DiffSide()
   130  }
   131  
   132  // GetLineTypeMarker returns the line type marker
   133  func (d *DiffLine) GetLineTypeMarker() string {
   134  	if strings.IndexByte(" +-", d.Content[0]) > -1 {
   135  		return d.Content[0:1]
   136  	}
   137  	return ""
   138  }
   139  
   140  // GetBlobExcerptQuery builds query string to get blob excerpt
   141  func (d *DiffLine) GetBlobExcerptQuery() string {
   142  	query := fmt.Sprintf(
   143  		"last_left=%d&last_right=%d&"+
   144  			"left=%d&right=%d&"+
   145  			"left_hunk_size=%d&right_hunk_size=%d&"+
   146  			"path=%s",
   147  		d.SectionInfo.LastLeftIdx, d.SectionInfo.LastRightIdx,
   148  		d.SectionInfo.LeftIdx, d.SectionInfo.RightIdx,
   149  		d.SectionInfo.LeftHunkSize, d.SectionInfo.RightHunkSize,
   150  		url.QueryEscape(d.SectionInfo.Path))
   151  	return query
   152  }
   153  
   154  // GetExpandDirection gets DiffLineExpandDirection
   155  func (d *DiffLine) GetExpandDirection() DiffLineExpandDirection {
   156  	if d.Type != DiffLineSection || d.SectionInfo == nil || d.SectionInfo.LeftIdx-d.SectionInfo.LastLeftIdx <= 1 || d.SectionInfo.RightIdx-d.SectionInfo.LastRightIdx <= 1 {
   157  		return DiffLineExpandNone
   158  	}
   159  	if d.SectionInfo.LastLeftIdx <= 0 && d.SectionInfo.LastRightIdx <= 0 {
   160  		return DiffLineExpandUp
   161  	} else if d.SectionInfo.RightIdx-d.SectionInfo.LastRightIdx > BlobExcerptChunkSize && d.SectionInfo.RightHunkSize > 0 {
   162  		return DiffLineExpandUpDown
   163  	} else if d.SectionInfo.LeftHunkSize <= 0 && d.SectionInfo.RightHunkSize <= 0 {
   164  		return DiffLineExpandDown
   165  	}
   166  	return DiffLineExpandSingle
   167  }
   168  
   169  func getDiffLineSectionInfo(treePath, line string, lastLeftIdx, lastRightIdx int) *DiffLineSectionInfo {
   170  	leftLine, leftHunk, rightLine, righHunk := git.ParseDiffHunkString(line)
   171  
   172  	return &DiffLineSectionInfo{
   173  		Path:          treePath,
   174  		LastLeftIdx:   lastLeftIdx,
   175  		LastRightIdx:  lastRightIdx,
   176  		LeftIdx:       leftLine,
   177  		RightIdx:      rightLine,
   178  		LeftHunkSize:  leftHunk,
   179  		RightHunkSize: righHunk,
   180  	}
   181  }
   182  
   183  // escape a line's content or return <br> needed for copy/paste purposes
   184  func getLineContent(content string, locale translation.Locale) DiffInline {
   185  	if len(content) > 0 {
   186  		return DiffInlineWithUnicodeEscape(template.HTML(html.EscapeString(content)), locale)
   187  	}
   188  	return DiffInline{EscapeStatus: &charset.EscapeStatus{}, Content: "<br>"}
   189  }
   190  
   191  // DiffSection represents a section of a DiffFile.
   192  type DiffSection struct {
   193  	file     *DiffFile
   194  	FileName string
   195  	Name     string
   196  	Lines    []*DiffLine
   197  }
   198  
   199  var (
   200  	addedCodePrefix   = []byte(`<span class="added-code">`)
   201  	removedCodePrefix = []byte(`<span class="removed-code">`)
   202  	codeTagSuffix     = []byte(`</span>`)
   203  )
   204  
   205  func diffToHTML(lineWrapperTags []string, diffs []diffmatchpatch.Diff, lineType DiffLineType) string {
   206  	buf := bytes.NewBuffer(nil)
   207  	// restore the line wrapper tags <span class="line"> and <span class="cl">, if necessary
   208  	for _, tag := range lineWrapperTags {
   209  		buf.WriteString(tag)
   210  	}
   211  	for _, diff := range diffs {
   212  		switch {
   213  		case diff.Type == diffmatchpatch.DiffEqual:
   214  			buf.WriteString(diff.Text)
   215  		case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd:
   216  			buf.Write(addedCodePrefix)
   217  			buf.WriteString(diff.Text)
   218  			buf.Write(codeTagSuffix)
   219  		case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel:
   220  			buf.Write(removedCodePrefix)
   221  			buf.WriteString(diff.Text)
   222  			buf.Write(codeTagSuffix)
   223  		}
   224  	}
   225  	for range lineWrapperTags {
   226  		buf.WriteString("</span>")
   227  	}
   228  	return buf.String()
   229  }
   230  
   231  // GetLine gets a specific line by type (add or del) and file line number
   232  func (diffSection *DiffSection) GetLine(lineType DiffLineType, idx int) *DiffLine {
   233  	var (
   234  		difference    = 0
   235  		addCount      = 0
   236  		delCount      = 0
   237  		matchDiffLine *DiffLine
   238  	)
   239  
   240  LOOP:
   241  	for _, diffLine := range diffSection.Lines {
   242  		switch diffLine.Type {
   243  		case DiffLineAdd:
   244  			addCount++
   245  		case DiffLineDel:
   246  			delCount++
   247  		default:
   248  			if matchDiffLine != nil {
   249  				break LOOP
   250  			}
   251  			difference = diffLine.RightIdx - diffLine.LeftIdx
   252  			addCount = 0
   253  			delCount = 0
   254  		}
   255  
   256  		switch lineType {
   257  		case DiffLineDel:
   258  			if diffLine.RightIdx == 0 && diffLine.LeftIdx == idx-difference {
   259  				matchDiffLine = diffLine
   260  			}
   261  		case DiffLineAdd:
   262  			if diffLine.LeftIdx == 0 && diffLine.RightIdx == idx+difference {
   263  				matchDiffLine = diffLine
   264  			}
   265  		}
   266  	}
   267  
   268  	if addCount == delCount {
   269  		return matchDiffLine
   270  	}
   271  	return nil
   272  }
   273  
   274  var diffMatchPatch = diffmatchpatch.New()
   275  
   276  func init() {
   277  	diffMatchPatch.DiffEditCost = 100
   278  }
   279  
   280  // DiffInline is a struct that has a content and escape status
   281  type DiffInline struct {
   282  	EscapeStatus *charset.EscapeStatus
   283  	Content      template.HTML
   284  }
   285  
   286  // DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped
   287  func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline {
   288  	status, content := charset.EscapeControlHTML(s, locale)
   289  	return DiffInline{EscapeStatus: status, Content: content}
   290  }
   291  
   292  // DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped
   293  func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline {
   294  	highlighted, _ := highlight.Code(fileName, language, code)
   295  	status, content := charset.EscapeControlHTML(highlighted, locale)
   296  	return DiffInline{EscapeStatus: status, Content: content}
   297  }
   298  
   299  // GetComputedInlineDiffFor computes inline diff for the given line.
   300  func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline {
   301  	if setting.Git.DisableDiffHighlight {
   302  		return getLineContent(diffLine.Content[1:], locale)
   303  	}
   304  
   305  	var (
   306  		compareDiffLine *DiffLine
   307  		diff1           string
   308  		diff2           string
   309  	)
   310  
   311  	language := ""
   312  	if diffSection.file != nil {
   313  		language = diffSection.file.Language
   314  	}
   315  
   316  	// try to find equivalent diff line. ignore, otherwise
   317  	switch diffLine.Type {
   318  	case DiffLineSection:
   319  		return getLineContent(diffLine.Content[1:], locale)
   320  	case DiffLineAdd:
   321  		compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx)
   322  		if compareDiffLine == nil {
   323  			return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale)
   324  		}
   325  		diff1 = compareDiffLine.Content
   326  		diff2 = diffLine.Content
   327  	case DiffLineDel:
   328  		compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx)
   329  		if compareDiffLine == nil {
   330  			return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale)
   331  		}
   332  		diff1 = diffLine.Content
   333  		diff2 = compareDiffLine.Content
   334  	default:
   335  		if strings.IndexByte(" +-", diffLine.Content[0]) > -1 {
   336  			return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale)
   337  		}
   338  		return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content, locale)
   339  	}
   340  
   341  	hcd := newHighlightCodeDiff()
   342  	diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:])
   343  	// it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back
   344  	// if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)"
   345  	diffHTML := diffToHTML(nil, diffRecord, diffLine.Type)
   346  	return DiffInlineWithUnicodeEscape(template.HTML(diffHTML), locale)
   347  }
   348  
   349  // DiffFile represents a file diff.
   350  type DiffFile struct {
   351  	Name                      string
   352  	NameHash                  string
   353  	OldName                   string
   354  	Index                     int
   355  	Addition, Deletion        int
   356  	Type                      DiffFileType
   357  	IsCreated                 bool
   358  	IsDeleted                 bool
   359  	IsBin                     bool
   360  	IsLFSFile                 bool
   361  	IsRenamed                 bool
   362  	IsAmbiguous               bool
   363  	IsSubmodule               bool
   364  	Sections                  []*DiffSection
   365  	IsIncomplete              bool
   366  	IsIncompleteLineTooLong   bool
   367  	IsProtected               bool
   368  	IsGenerated               bool
   369  	IsVendored                bool
   370  	IsViewed                  bool // User specific
   371  	HasChangedSinceLastReview bool // User specific
   372  	Language                  string
   373  	Mode                      string
   374  	OldMode                   string
   375  }
   376  
   377  // GetType returns type of diff file.
   378  func (diffFile *DiffFile) GetType() int {
   379  	return int(diffFile.Type)
   380  }
   381  
   382  // GetTailSection creates a fake DiffLineSection if the last section is not the end of the file
   383  func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommitID, rightCommitID string) *DiffSection {
   384  	if len(diffFile.Sections) == 0 || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile {
   385  		return nil
   386  	}
   387  	leftCommit, err := gitRepo.GetCommit(leftCommitID)
   388  	if err != nil {
   389  		return nil
   390  	}
   391  	rightCommit, err := gitRepo.GetCommit(rightCommitID)
   392  	if err != nil {
   393  		return nil
   394  	}
   395  	lastSection := diffFile.Sections[len(diffFile.Sections)-1]
   396  	lastLine := lastSection.Lines[len(lastSection.Lines)-1]
   397  	leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name)
   398  	rightLineCount := getCommitFileLineCount(rightCommit, diffFile.Name)
   399  	if leftLineCount <= lastLine.LeftIdx || rightLineCount <= lastLine.RightIdx {
   400  		return nil
   401  	}
   402  	tailDiffLine := &DiffLine{
   403  		Type:    DiffLineSection,
   404  		Content: " ",
   405  		SectionInfo: &DiffLineSectionInfo{
   406  			Path:         diffFile.Name,
   407  			LastLeftIdx:  lastLine.LeftIdx,
   408  			LastRightIdx: lastLine.RightIdx,
   409  			LeftIdx:      leftLineCount,
   410  			RightIdx:     rightLineCount,
   411  		},
   412  	}
   413  	tailSection := &DiffSection{FileName: diffFile.Name, Lines: []*DiffLine{tailDiffLine}}
   414  	return tailSection
   415  }
   416  
   417  // GetDiffFileName returns the name of the diff file, or its old name in case it was deleted
   418  func (diffFile *DiffFile) GetDiffFileName() string {
   419  	if diffFile.Name == "" {
   420  		return diffFile.OldName
   421  	}
   422  	return diffFile.Name
   423  }
   424  
   425  func (diffFile *DiffFile) ShouldBeHidden() bool {
   426  	return diffFile.IsGenerated || diffFile.IsViewed
   427  }
   428  
   429  func (diffFile *DiffFile) ModeTranslationKey(mode string) string {
   430  	switch mode {
   431  	case "040000":
   432  		return "git.filemode.directory"
   433  	case "100644":
   434  		return "git.filemode.normal_file"
   435  	case "100755":
   436  		return "git.filemode.executable_file"
   437  	case "120000":
   438  		return "git.filemode.symbolic_link"
   439  	case "160000":
   440  		return "git.filemode.submodule"
   441  	default:
   442  		return mode
   443  	}
   444  }
   445  
   446  func getCommitFileLineCount(commit *git.Commit, filePath string) int {
   447  	blob, err := commit.GetBlobByPath(filePath)
   448  	if err != nil {
   449  		return 0
   450  	}
   451  	lineCount, err := blob.GetBlobLineCount()
   452  	if err != nil {
   453  		return 0
   454  	}
   455  	return lineCount
   456  }
   457  
   458  // Diff represents a difference between two git trees.
   459  type Diff struct {
   460  	Start, End                   string
   461  	NumFiles                     int
   462  	TotalAddition, TotalDeletion int
   463  	Files                        []*DiffFile
   464  	IsIncomplete                 bool
   465  	NumViewedFiles               int // user-specific
   466  }
   467  
   468  // LoadComments loads comments into each line
   469  func (diff *Diff) LoadComments(ctx context.Context, issue *issues_model.Issue, currentUser *user_model.User, showOutdatedComments bool) error {
   470  	allComments, err := issues_model.FetchCodeComments(ctx, issue, currentUser, showOutdatedComments)
   471  	if err != nil {
   472  		return err
   473  	}
   474  	for _, file := range diff.Files {
   475  		if lineCommits, ok := allComments[file.Name]; ok {
   476  			for _, section := range file.Sections {
   477  				for _, line := range section.Lines {
   478  					if comments, ok := lineCommits[int64(line.LeftIdx*-1)]; ok {
   479  						line.Comments = append(line.Comments, comments...)
   480  					}
   481  					if comments, ok := lineCommits[int64(line.RightIdx)]; ok {
   482  						line.Comments = append(line.Comments, comments...)
   483  					}
   484  					sort.SliceStable(line.Comments, func(i, j int) bool {
   485  						return line.Comments[i].CreatedUnix < line.Comments[j].CreatedUnix
   486  					})
   487  				}
   488  			}
   489  		}
   490  	}
   491  	return nil
   492  }
   493  
   494  const cmdDiffHead = "diff --git "
   495  
   496  // ParsePatch builds a Diff object from a io.Reader and some parameters.
   497  func ParsePatch(ctx context.Context, maxLines, maxLineCharacters, maxFiles int, reader io.Reader, skipToFile string) (*Diff, error) {
   498  	log.Debug("ParsePatch(%d, %d, %d, ..., %s)", maxLines, maxLineCharacters, maxFiles, skipToFile)
   499  	var curFile *DiffFile
   500  
   501  	skipping := skipToFile != ""
   502  
   503  	diff := &Diff{Files: make([]*DiffFile, 0)}
   504  
   505  	sb := strings.Builder{}
   506  
   507  	// OK let's set a reasonable buffer size.
   508  	// This should be at least the size of maxLineCharacters or 4096 whichever is larger.
   509  	readerSize := maxLineCharacters
   510  	if readerSize < 4096 {
   511  		readerSize = 4096
   512  	}
   513  
   514  	input := bufio.NewReaderSize(reader, readerSize)
   515  	line, err := input.ReadString('\n')
   516  	if err != nil {
   517  		if err == io.EOF {
   518  			return diff, nil
   519  		}
   520  		return diff, err
   521  	}
   522  
   523  	prepareValue := func(s, p string) string {
   524  		return strings.TrimSpace(strings.TrimPrefix(s, p))
   525  	}
   526  
   527  parsingLoop:
   528  	for {
   529  		// 1. A patch file always begins with `diff --git ` + `a/path b/path` (possibly quoted)
   530  		// if it does not we have bad input!
   531  		if !strings.HasPrefix(line, cmdDiffHead) {
   532  			return diff, fmt.Errorf("invalid first file line: %s", line)
   533  		}
   534  
   535  		if maxFiles > -1 && len(diff.Files) >= maxFiles {
   536  			lastFile := createDiffFile(diff, line)
   537  			diff.End = lastFile.Name
   538  			diff.IsIncomplete = true
   539  			_, err := io.Copy(io.Discard, reader)
   540  			if err != nil {
   541  				// By the definition of io.Copy this never returns io.EOF
   542  				return diff, fmt.Errorf("error during io.Copy: %w", err)
   543  			}
   544  			break parsingLoop
   545  		}
   546  
   547  		curFile = createDiffFile(diff, line)
   548  		if skipping {
   549  			if curFile.Name != skipToFile {
   550  				line, err = skipToNextDiffHead(input)
   551  				if err != nil {
   552  					if err == io.EOF {
   553  						return diff, nil
   554  					}
   555  					return diff, err
   556  				}
   557  				continue
   558  			}
   559  			skipping = false
   560  		}
   561  
   562  		diff.Files = append(diff.Files, curFile)
   563  
   564  		// 2. It is followed by one or more extended header lines:
   565  		//
   566  		//     old mode <mode>
   567  		//     new mode <mode>
   568  		//     deleted file mode <mode>
   569  		//     new file mode <mode>
   570  		//     copy from <path>
   571  		//     copy to <path>
   572  		//     rename from <path>
   573  		//     rename to <path>
   574  		//     similarity index <number>
   575  		//     dissimilarity index <number>
   576  		//     index <hash>..<hash> <mode>
   577  		//
   578  		// * <mode> 6-digit octal numbers including the file type and file permission bits.
   579  		// * <path> does not include the a/ and b/ prefixes
   580  		// * <number> percentage of unchanged lines for similarity, percentage of changed
   581  		//   lines dissimilarity as integer rounded down with terminal %. 100% => equal files.
   582  		// * The index line includes the blob object names before and after the change.
   583  		//   The <mode> is included if the file mode does not change; otherwise, separate
   584  		//   lines indicate the old and the new mode.
   585  		// 3. Following this header the "standard unified" diff format header may be encountered: (but not for every case...)
   586  		//
   587  		//     --- a/<path>
   588  		//     +++ b/<path>
   589  		//
   590  		// With multiple hunks
   591  		//
   592  		//     @@ <hunk descriptor> @@
   593  		//     +added line
   594  		//     -removed line
   595  		//      unchanged line
   596  		//
   597  		// 4. Binary files get:
   598  		//
   599  		//     Binary files a/<path> and b/<path> differ
   600  		//
   601  		// but one of a/<path> and b/<path> could be /dev/null.
   602  	curFileLoop:
   603  		for {
   604  			line, err = input.ReadString('\n')
   605  			if err != nil {
   606  				if err != io.EOF {
   607  					return diff, err
   608  				}
   609  				break parsingLoop
   610  			}
   611  
   612  			switch {
   613  			case strings.HasPrefix(line, cmdDiffHead):
   614  				break curFileLoop
   615  			case strings.HasPrefix(line, "old mode ") ||
   616  				strings.HasPrefix(line, "new mode "):
   617  
   618  				if strings.HasPrefix(line, "old mode ") {
   619  					curFile.OldMode = prepareValue(line, "old mode ")
   620  				}
   621  				if strings.HasPrefix(line, "new mode ") {
   622  					curFile.Mode = prepareValue(line, "new mode ")
   623  				}
   624  
   625  				if strings.HasSuffix(line, " 160000\n") {
   626  					curFile.IsSubmodule = true
   627  				}
   628  			case strings.HasPrefix(line, "rename from "):
   629  				curFile.IsRenamed = true
   630  				curFile.Type = DiffFileRename
   631  				if curFile.IsAmbiguous {
   632  					curFile.OldName = prepareValue(line, "rename from ")
   633  				}
   634  			case strings.HasPrefix(line, "rename to "):
   635  				curFile.IsRenamed = true
   636  				curFile.Type = DiffFileRename
   637  				if curFile.IsAmbiguous {
   638  					curFile.Name = prepareValue(line, "rename to ")
   639  					curFile.IsAmbiguous = false
   640  				}
   641  			case strings.HasPrefix(line, "copy from "):
   642  				curFile.IsRenamed = true
   643  				curFile.Type = DiffFileCopy
   644  				if curFile.IsAmbiguous {
   645  					curFile.OldName = prepareValue(line, "copy from ")
   646  				}
   647  			case strings.HasPrefix(line, "copy to "):
   648  				curFile.IsRenamed = true
   649  				curFile.Type = DiffFileCopy
   650  				if curFile.IsAmbiguous {
   651  					curFile.Name = prepareValue(line, "copy to ")
   652  					curFile.IsAmbiguous = false
   653  				}
   654  			case strings.HasPrefix(line, "new file"):
   655  				curFile.Type = DiffFileAdd
   656  				curFile.IsCreated = true
   657  				if strings.HasPrefix(line, "new file mode ") {
   658  					curFile.Mode = prepareValue(line, "new file mode ")
   659  				}
   660  				if strings.HasSuffix(line, " 160000\n") {
   661  					curFile.IsSubmodule = true
   662  				}
   663  			case strings.HasPrefix(line, "deleted"):
   664  				curFile.Type = DiffFileDel
   665  				curFile.IsDeleted = true
   666  				if strings.HasSuffix(line, " 160000\n") {
   667  					curFile.IsSubmodule = true
   668  				}
   669  			case strings.HasPrefix(line, "index"):
   670  				if strings.HasSuffix(line, " 160000\n") {
   671  					curFile.IsSubmodule = true
   672  				}
   673  			case strings.HasPrefix(line, "similarity index 100%"):
   674  				curFile.Type = DiffFileRename
   675  			case strings.HasPrefix(line, "Binary"):
   676  				curFile.IsBin = true
   677  			case strings.HasPrefix(line, "--- "):
   678  				// Handle ambiguous filenames
   679  				if curFile.IsAmbiguous {
   680  					// The shortest string that can end up here is:
   681  					// "--- a\t\n" without the quotes.
   682  					// This line has a len() of 7 but doesn't contain a oldName.
   683  					// So the amount that the line need is at least 8 or more.
   684  					// The code will otherwise panic for a out-of-bounds.
   685  					if len(line) > 7 && line[4] == 'a' {
   686  						curFile.OldName = line[6 : len(line)-1]
   687  						if line[len(line)-2] == '\t' {
   688  							curFile.OldName = curFile.OldName[:len(curFile.OldName)-1]
   689  						}
   690  					} else {
   691  						curFile.OldName = ""
   692  					}
   693  				}
   694  				// Otherwise do nothing with this line
   695  			case strings.HasPrefix(line, "+++ "):
   696  				// Handle ambiguous filenames
   697  				if curFile.IsAmbiguous {
   698  					if len(line) > 6 && line[4] == 'b' {
   699  						curFile.Name = line[6 : len(line)-1]
   700  						if line[len(line)-2] == '\t' {
   701  							curFile.Name = curFile.Name[:len(curFile.Name)-1]
   702  						}
   703  						if curFile.OldName == "" {
   704  							curFile.OldName = curFile.Name
   705  						}
   706  					} else {
   707  						curFile.Name = curFile.OldName
   708  					}
   709  					curFile.IsAmbiguous = false
   710  				}
   711  				// Otherwise do nothing with this line, but now switch to parsing hunks
   712  				lineBytes, isFragment, err := parseHunks(ctx, curFile, maxLines, maxLineCharacters, input)
   713  				diff.TotalAddition += curFile.Addition
   714  				diff.TotalDeletion += curFile.Deletion
   715  				if err != nil {
   716  					if err != io.EOF {
   717  						return diff, err
   718  					}
   719  					break parsingLoop
   720  				}
   721  				sb.Reset()
   722  				_, _ = sb.Write(lineBytes)
   723  				for isFragment {
   724  					lineBytes, isFragment, err = input.ReadLine()
   725  					if err != nil {
   726  						// Now by the definition of ReadLine this cannot be io.EOF
   727  						return diff, fmt.Errorf("unable to ReadLine: %w", err)
   728  					}
   729  					_, _ = sb.Write(lineBytes)
   730  				}
   731  				line = sb.String()
   732  				sb.Reset()
   733  
   734  				break curFileLoop
   735  			}
   736  		}
   737  	}
   738  
   739  	// TODO: There are numerous issues with this:
   740  	// - we might want to consider detecting encoding while parsing but...
   741  	// - we're likely to fail to get the correct encoding here anyway as we won't have enough information
   742  	diffLineTypeBuffers := make(map[DiffLineType]*bytes.Buffer, 3)
   743  	diffLineTypeDecoders := make(map[DiffLineType]*encoding.Decoder, 3)
   744  	diffLineTypeBuffers[DiffLinePlain] = new(bytes.Buffer)
   745  	diffLineTypeBuffers[DiffLineAdd] = new(bytes.Buffer)
   746  	diffLineTypeBuffers[DiffLineDel] = new(bytes.Buffer)
   747  	for _, f := range diff.Files {
   748  		f.NameHash = git.HashFilePathForWebUI(f.Name)
   749  
   750  		for _, buffer := range diffLineTypeBuffers {
   751  			buffer.Reset()
   752  		}
   753  		for _, sec := range f.Sections {
   754  			for _, l := range sec.Lines {
   755  				if l.Type == DiffLineSection {
   756  					continue
   757  				}
   758  				diffLineTypeBuffers[l.Type].WriteString(l.Content[1:])
   759  				diffLineTypeBuffers[l.Type].WriteString("\n")
   760  			}
   761  		}
   762  		for lineType, buffer := range diffLineTypeBuffers {
   763  			diffLineTypeDecoders[lineType] = nil
   764  			if buffer.Len() == 0 {
   765  				continue
   766  			}
   767  			charsetLabel, err := charset.DetectEncoding(buffer.Bytes())
   768  			if charsetLabel != "UTF-8" && err == nil {
   769  				encoding, _ := stdcharset.Lookup(charsetLabel)
   770  				if encoding != nil {
   771  					diffLineTypeDecoders[lineType] = encoding.NewDecoder()
   772  				}
   773  			}
   774  		}
   775  		for _, sec := range f.Sections {
   776  			for _, l := range sec.Lines {
   777  				decoder := diffLineTypeDecoders[l.Type]
   778  				if decoder != nil {
   779  					if c, _, err := transform.String(decoder, l.Content[1:]); err == nil {
   780  						l.Content = l.Content[0:1] + c
   781  					}
   782  				}
   783  			}
   784  		}
   785  	}
   786  
   787  	diff.NumFiles = len(diff.Files)
   788  	return diff, nil
   789  }
   790  
   791  func skipToNextDiffHead(input *bufio.Reader) (line string, err error) {
   792  	// need to skip until the next cmdDiffHead
   793  	var isFragment, wasFragment bool
   794  	var lineBytes []byte
   795  	for {
   796  		lineBytes, isFragment, err = input.ReadLine()
   797  		if err != nil {
   798  			return "", err
   799  		}
   800  		if wasFragment {
   801  			wasFragment = isFragment
   802  			continue
   803  		}
   804  		if bytes.HasPrefix(lineBytes, []byte(cmdDiffHead)) {
   805  			break
   806  		}
   807  		wasFragment = isFragment
   808  	}
   809  	line = string(lineBytes)
   810  	if isFragment {
   811  		var tail string
   812  		tail, err = input.ReadString('\n')
   813  		if err != nil {
   814  			return "", err
   815  		}
   816  		line += tail
   817  	}
   818  	return line, err
   819  }
   820  
   821  func parseHunks(ctx context.Context, curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio.Reader) (lineBytes []byte, isFragment bool, err error) {
   822  	sb := strings.Builder{}
   823  
   824  	var (
   825  		curSection        *DiffSection
   826  		curFileLinesCount int
   827  		curFileLFSPrefix  bool
   828  	)
   829  
   830  	lastLeftIdx := -1
   831  	leftLine, rightLine := 1, 1
   832  
   833  	for {
   834  		for isFragment {
   835  			curFile.IsIncomplete = true
   836  			curFile.IsIncompleteLineTooLong = true
   837  			_, isFragment, err = input.ReadLine()
   838  			if err != nil {
   839  				// Now by the definition of ReadLine this cannot be io.EOF
   840  				return nil, false, fmt.Errorf("unable to ReadLine: %w", err)
   841  			}
   842  		}
   843  		sb.Reset()
   844  		lineBytes, isFragment, err = input.ReadLine()
   845  		if err != nil {
   846  			if err == io.EOF {
   847  				return lineBytes, isFragment, err
   848  			}
   849  			err = fmt.Errorf("unable to ReadLine: %w", err)
   850  			return nil, false, err
   851  		}
   852  		if lineBytes[0] == 'd' {
   853  			// End of hunks
   854  			return lineBytes, isFragment, err
   855  		}
   856  
   857  		switch lineBytes[0] {
   858  		case '@':
   859  			if maxLines > -1 && curFileLinesCount >= maxLines {
   860  				curFile.IsIncomplete = true
   861  				continue
   862  			}
   863  
   864  			_, _ = sb.Write(lineBytes)
   865  			for isFragment {
   866  				// This is very odd indeed - we're in a section header and the line is too long
   867  				// This really shouldn't happen...
   868  				lineBytes, isFragment, err = input.ReadLine()
   869  				if err != nil {
   870  					// Now by the definition of ReadLine this cannot be io.EOF
   871  					return nil, false, fmt.Errorf("unable to ReadLine: %w", err)
   872  				}
   873  				_, _ = sb.Write(lineBytes)
   874  			}
   875  			line := sb.String()
   876  
   877  			// Create a new section to represent this hunk
   878  			curSection = &DiffSection{file: curFile}
   879  			lastLeftIdx = -1
   880  			curFile.Sections = append(curFile.Sections, curSection)
   881  
   882  			lineSectionInfo := getDiffLineSectionInfo(curFile.Name, line, leftLine-1, rightLine-1)
   883  			diffLine := &DiffLine{
   884  				Type:        DiffLineSection,
   885  				Content:     line,
   886  				SectionInfo: lineSectionInfo,
   887  			}
   888  			curSection.Lines = append(curSection.Lines, diffLine)
   889  			curSection.FileName = curFile.Name
   890  			// update line number.
   891  			leftLine = lineSectionInfo.LeftIdx
   892  			rightLine = lineSectionInfo.RightIdx
   893  			continue
   894  		case '\\':
   895  			if maxLines > -1 && curFileLinesCount >= maxLines {
   896  				curFile.IsIncomplete = true
   897  				continue
   898  			}
   899  			// This is used only to indicate that the current file does not have a terminal newline
   900  			if !bytes.Equal(lineBytes, []byte("\\ No newline at end of file")) {
   901  				return nil, false, fmt.Errorf("unexpected line in hunk: %s", string(lineBytes))
   902  			}
   903  			// Technically this should be the end the file!
   904  			// FIXME: we should be putting a marker at the end of the file if there is no terminal new line
   905  			continue
   906  		case '+':
   907  			curFileLinesCount++
   908  			curFile.Addition++
   909  			if maxLines > -1 && curFileLinesCount >= maxLines {
   910  				curFile.IsIncomplete = true
   911  				continue
   912  			}
   913  			diffLine := &DiffLine{Type: DiffLineAdd, RightIdx: rightLine, Match: -1}
   914  			rightLine++
   915  			if curSection == nil {
   916  				// Create a new section to represent this hunk
   917  				curSection = &DiffSection{file: curFile}
   918  				curFile.Sections = append(curFile.Sections, curSection)
   919  				lastLeftIdx = -1
   920  			}
   921  			if lastLeftIdx > -1 {
   922  				diffLine.Match = lastLeftIdx
   923  				curSection.Lines[lastLeftIdx].Match = len(curSection.Lines)
   924  				lastLeftIdx++
   925  				if lastLeftIdx >= len(curSection.Lines) || curSection.Lines[lastLeftIdx].Type != DiffLineDel {
   926  					lastLeftIdx = -1
   927  				}
   928  			}
   929  			curSection.Lines = append(curSection.Lines, diffLine)
   930  		case '-':
   931  			curFileLinesCount++
   932  			curFile.Deletion++
   933  			if maxLines > -1 && curFileLinesCount >= maxLines {
   934  				curFile.IsIncomplete = true
   935  				continue
   936  			}
   937  			diffLine := &DiffLine{Type: DiffLineDel, LeftIdx: leftLine, Match: -1}
   938  			if leftLine > 0 {
   939  				leftLine++
   940  			}
   941  			if curSection == nil {
   942  				// Create a new section to represent this hunk
   943  				curSection = &DiffSection{file: curFile}
   944  				curFile.Sections = append(curFile.Sections, curSection)
   945  				lastLeftIdx = -1
   946  			}
   947  			if len(curSection.Lines) == 0 || curSection.Lines[len(curSection.Lines)-1].Type != DiffLineDel {
   948  				lastLeftIdx = len(curSection.Lines)
   949  			}
   950  			curSection.Lines = append(curSection.Lines, diffLine)
   951  		case ' ':
   952  			curFileLinesCount++
   953  			if maxLines > -1 && curFileLinesCount >= maxLines {
   954  				curFile.IsIncomplete = true
   955  				continue
   956  			}
   957  			diffLine := &DiffLine{Type: DiffLinePlain, LeftIdx: leftLine, RightIdx: rightLine}
   958  			leftLine++
   959  			rightLine++
   960  			lastLeftIdx = -1
   961  			if curSection == nil {
   962  				// Create a new section to represent this hunk
   963  				curSection = &DiffSection{file: curFile}
   964  				curFile.Sections = append(curFile.Sections, curSection)
   965  			}
   966  			curSection.Lines = append(curSection.Lines, diffLine)
   967  		default:
   968  			// This is unexpected
   969  			return nil, false, fmt.Errorf("unexpected line in hunk: %s", string(lineBytes))
   970  		}
   971  
   972  		line := string(lineBytes)
   973  		if isFragment {
   974  			curFile.IsIncomplete = true
   975  			curFile.IsIncompleteLineTooLong = true
   976  			for isFragment {
   977  				lineBytes, isFragment, err = input.ReadLine()
   978  				if err != nil {
   979  					// Now by the definition of ReadLine this cannot be io.EOF
   980  					return lineBytes, isFragment, fmt.Errorf("unable to ReadLine: %w", err)
   981  				}
   982  			}
   983  		}
   984  		if len(line) > maxLineCharacters {
   985  			curFile.IsIncomplete = true
   986  			curFile.IsIncompleteLineTooLong = true
   987  			line = line[:maxLineCharacters]
   988  		}
   989  		curSection.Lines[len(curSection.Lines)-1].Content = line
   990  
   991  		// handle LFS
   992  		if line[1:] == lfs.MetaFileIdentifier {
   993  			curFileLFSPrefix = true
   994  		} else if curFileLFSPrefix && strings.HasPrefix(line[1:], lfs.MetaFileOidPrefix) {
   995  			oid := strings.TrimPrefix(line[1:], lfs.MetaFileOidPrefix)
   996  			if len(oid) == 64 {
   997  				m := &git_model.LFSMetaObject{Pointer: lfs.Pointer{Oid: oid}}
   998  				count, err := db.CountByBean(ctx, m)
   999  
  1000  				if err == nil && count > 0 {
  1001  					curFile.IsBin = true
  1002  					curFile.IsLFSFile = true
  1003  					curSection.Lines = nil
  1004  					lastLeftIdx = -1
  1005  				}
  1006  			}
  1007  		}
  1008  	}
  1009  }
  1010  
  1011  func createDiffFile(diff *Diff, line string) *DiffFile {
  1012  	// The a/ and b/ filenames are the same unless rename/copy is involved.
  1013  	// Especially, even for a creation or a deletion, /dev/null is not used
  1014  	// in place of the a/ or b/ filenames.
  1015  	//
  1016  	// When rename/copy is involved, file1 and file2 show the name of the
  1017  	// source file of the rename/copy and the name of the file that rename/copy
  1018  	// produces, respectively.
  1019  	//
  1020  	// Path names are quoted if necessary.
  1021  	//
  1022  	// This means that you should always be able to determine the file name even when there
  1023  	// there is potential ambiguity...
  1024  	//
  1025  	// but we can be simpler with our heuristics by just forcing git to prefix things nicely
  1026  	curFile := &DiffFile{
  1027  		Index:    len(diff.Files) + 1,
  1028  		Type:     DiffFileChange,
  1029  		Sections: make([]*DiffSection, 0, 10),
  1030  	}
  1031  
  1032  	rd := strings.NewReader(line[len(cmdDiffHead):] + " ")
  1033  	curFile.Type = DiffFileChange
  1034  	var oldNameAmbiguity, newNameAmbiguity bool
  1035  
  1036  	curFile.OldName, oldNameAmbiguity = readFileName(rd)
  1037  	curFile.Name, newNameAmbiguity = readFileName(rd)
  1038  	if oldNameAmbiguity && newNameAmbiguity {
  1039  		curFile.IsAmbiguous = true
  1040  		// OK we should bet that the oldName and the newName are the same if they can be made to be same
  1041  		// So we need to start again ...
  1042  		if (len(line)-len(cmdDiffHead)-1)%2 == 0 {
  1043  			// diff --git a/b b/b b/b b/b b/b b/b
  1044  			//
  1045  			midpoint := (len(line) + len(cmdDiffHead) - 1) / 2
  1046  			newPart, oldPart := line[len(cmdDiffHead):midpoint], line[midpoint+1:]
  1047  			if len(newPart) > 2 && len(oldPart) > 2 && newPart[2:] == oldPart[2:] {
  1048  				curFile.OldName = oldPart[2:]
  1049  				curFile.Name = oldPart[2:]
  1050  			}
  1051  		}
  1052  	}
  1053  
  1054  	curFile.IsRenamed = curFile.Name != curFile.OldName
  1055  	return curFile
  1056  }
  1057  
  1058  func readFileName(rd *strings.Reader) (string, bool) {
  1059  	ambiguity := false
  1060  	var name string
  1061  	char, _ := rd.ReadByte()
  1062  	_ = rd.UnreadByte()
  1063  	if char == '"' {
  1064  		fmt.Fscanf(rd, "%q ", &name)
  1065  		if len(name) == 0 {
  1066  			log.Error("Reader has no file name: reader=%+v", rd)
  1067  			return "", true
  1068  		}
  1069  
  1070  		if name[0] == '\\' {
  1071  			name = name[1:]
  1072  		}
  1073  	} else {
  1074  		// This technique is potentially ambiguous it may not be possible to uniquely identify the filenames from the diff line alone
  1075  		ambiguity = true
  1076  		fmt.Fscanf(rd, "%s ", &name)
  1077  		char, _ := rd.ReadByte()
  1078  		_ = rd.UnreadByte()
  1079  		for !(char == 0 || char == '"' || char == 'b') {
  1080  			var suffix string
  1081  			fmt.Fscanf(rd, "%s ", &suffix)
  1082  			name += " " + suffix
  1083  			char, _ = rd.ReadByte()
  1084  			_ = rd.UnreadByte()
  1085  		}
  1086  	}
  1087  	if len(name) < 2 {
  1088  		log.Error("Unable to determine name from reader: reader=%+v", rd)
  1089  		return "", true
  1090  	}
  1091  	return name[2:], ambiguity
  1092  }
  1093  
  1094  // DiffOptions represents the options for a DiffRange
  1095  type DiffOptions struct {
  1096  	BeforeCommitID     string
  1097  	AfterCommitID      string
  1098  	SkipTo             string
  1099  	MaxLines           int
  1100  	MaxLineCharacters  int
  1101  	MaxFiles           int
  1102  	WhitespaceBehavior git.TrustedCmdArgs
  1103  	DirectComparison   bool
  1104  }
  1105  
  1106  // GetDiff builds a Diff between two commits of a repository.
  1107  // Passing the empty string as beforeCommitID returns a diff from the parent commit.
  1108  // The whitespaceBehavior is either an empty string or a git flag
  1109  func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
  1110  	repoPath := gitRepo.Path
  1111  
  1112  	commit, err := gitRepo.GetCommit(opts.AfterCommitID)
  1113  	if err != nil {
  1114  		return nil, err
  1115  	}
  1116  
  1117  	cmdDiff := git.NewCommand(gitRepo.Ctx)
  1118  	objectFormat, err := gitRepo.GetObjectFormat()
  1119  	if err != nil {
  1120  		return nil, err
  1121  	}
  1122  
  1123  	if (len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == objectFormat.EmptyObjectID().String()) && commit.ParentCount() == 0 {
  1124  		cmdDiff.AddArguments("diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M").
  1125  			AddArguments(opts.WhitespaceBehavior...).
  1126  			AddDynamicArguments(objectFormat.EmptyTree().String()).
  1127  			AddDynamicArguments(opts.AfterCommitID)
  1128  	} else {
  1129  		actualBeforeCommitID := opts.BeforeCommitID
  1130  		if len(actualBeforeCommitID) == 0 {
  1131  			parentCommit, _ := commit.Parent(0)
  1132  			actualBeforeCommitID = parentCommit.ID.String()
  1133  		}
  1134  
  1135  		cmdDiff.AddArguments("diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M").
  1136  			AddArguments(opts.WhitespaceBehavior...).
  1137  			AddDynamicArguments(actualBeforeCommitID, opts.AfterCommitID)
  1138  		opts.BeforeCommitID = actualBeforeCommitID
  1139  	}
  1140  
  1141  	// In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file
  1142  	// so if we are using at least this version of git we don't have to tell ParsePatch to do
  1143  	// the skipping for us
  1144  	parsePatchSkipToFile := opts.SkipTo
  1145  	if opts.SkipTo != "" && git.DefaultFeatures().CheckVersionAtLeast("2.31") {
  1146  		cmdDiff.AddOptionFormat("--skip-to=%s", opts.SkipTo)
  1147  		parsePatchSkipToFile = ""
  1148  	}
  1149  
  1150  	cmdDiff.AddDashesAndList(files...)
  1151  
  1152  	reader, writer := io.Pipe()
  1153  	defer func() {
  1154  		_ = reader.Close()
  1155  		_ = writer.Close()
  1156  	}()
  1157  
  1158  	go func() {
  1159  		stderr := &bytes.Buffer{}
  1160  		cmdDiff.SetDescription(fmt.Sprintf("GetDiffRange [repo_path: %s]", repoPath))
  1161  		if err := cmdDiff.Run(&git.RunOpts{
  1162  			Timeout: time.Duration(setting.Git.Timeout.Default) * time.Second,
  1163  			Dir:     repoPath,
  1164  			Stdout:  writer,
  1165  			Stderr:  stderr,
  1166  		}); err != nil {
  1167  			log.Error("error during GetDiff(git diff dir: %s): %v, stderr: %s", repoPath, err, stderr.String())
  1168  		}
  1169  
  1170  		_ = writer.Close()
  1171  	}()
  1172  
  1173  	diff, err := ParsePatch(ctx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
  1174  	if err != nil {
  1175  		return nil, fmt.Errorf("unable to ParsePatch: %w", err)
  1176  	}
  1177  	diff.Start = opts.SkipTo
  1178  
  1179  	checker, deferable := gitRepo.CheckAttributeReader(opts.AfterCommitID)
  1180  	defer deferable()
  1181  
  1182  	for _, diffFile := range diff.Files {
  1183  		isVendored := optional.None[bool]()
  1184  		isGenerated := optional.None[bool]()
  1185  		if checker != nil {
  1186  			attrs, err := checker.CheckPath(diffFile.Name)
  1187  			if err == nil {
  1188  				isVendored = git.AttributeToBool(attrs, git.AttributeLinguistVendored)
  1189  				isGenerated = git.AttributeToBool(attrs, git.AttributeLinguistGenerated)
  1190  
  1191  				language := git.TryReadLanguageAttribute(attrs)
  1192  				if language.Has() {
  1193  					diffFile.Language = language.Value()
  1194  				}
  1195  			}
  1196  		}
  1197  
  1198  		if !isVendored.Has() {
  1199  			isVendored = optional.Some(analyze.IsVendor(diffFile.Name))
  1200  		}
  1201  		diffFile.IsVendored = isVendored.Value()
  1202  
  1203  		if !isGenerated.Has() {
  1204  			isGenerated = optional.Some(analyze.IsGenerated(diffFile.Name))
  1205  		}
  1206  		diffFile.IsGenerated = isGenerated.Value()
  1207  
  1208  		tailSection := diffFile.GetTailSection(gitRepo, opts.BeforeCommitID, opts.AfterCommitID)
  1209  		if tailSection != nil {
  1210  			diffFile.Sections = append(diffFile.Sections, tailSection)
  1211  		}
  1212  	}
  1213  
  1214  	separator := "..."
  1215  	if opts.DirectComparison {
  1216  		separator = ".."
  1217  	}
  1218  
  1219  	diffPaths := []string{opts.BeforeCommitID + separator + opts.AfterCommitID}
  1220  	if len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == objectFormat.EmptyObjectID().String() {
  1221  		diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID}
  1222  	}
  1223  	diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
  1224  	if err != nil && strings.Contains(err.Error(), "no merge base") {
  1225  		// git >= 2.28 now returns an error if base and head have become unrelated.
  1226  		// previously it would return the results of git diff --shortstat base head so let's try that...
  1227  		diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID}
  1228  		diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
  1229  	}
  1230  	if err != nil {
  1231  		return nil, err
  1232  	}
  1233  
  1234  	return diff, nil
  1235  }
  1236  
  1237  type PullDiffStats struct {
  1238  	TotalAddition, TotalDeletion int
  1239  }
  1240  
  1241  // GetPullDiffStats
  1242  func GetPullDiffStats(gitRepo *git.Repository, opts *DiffOptions) (*PullDiffStats, error) {
  1243  	repoPath := gitRepo.Path
  1244  
  1245  	diff := &PullDiffStats{}
  1246  
  1247  	separator := "..."
  1248  	if opts.DirectComparison {
  1249  		separator = ".."
  1250  	}
  1251  
  1252  	objectFormat, err := gitRepo.GetObjectFormat()
  1253  	if err != nil {
  1254  		return nil, err
  1255  	}
  1256  
  1257  	diffPaths := []string{opts.BeforeCommitID + separator + opts.AfterCommitID}
  1258  	if len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == objectFormat.EmptyObjectID().String() {
  1259  		diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID}
  1260  	}
  1261  
  1262  	_, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
  1263  	if err != nil && strings.Contains(err.Error(), "no merge base") {
  1264  		// git >= 2.28 now returns an error if base and head have become unrelated.
  1265  		// previously it would return the results of git diff --shortstat base head so let's try that...
  1266  		diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID}
  1267  		_, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
  1268  	}
  1269  	if err != nil {
  1270  		return nil, err
  1271  	}
  1272  
  1273  	return diff, nil
  1274  }
  1275  
  1276  // SyncAndGetUserSpecificDiff is like GetDiff, except that user specific data such as which files the given user has already viewed on the given PR will also be set
  1277  // Additionally, the database asynchronously is updated if files have changed since the last review
  1278  func SyncAndGetUserSpecificDiff(ctx context.Context, userID int64, pull *issues_model.PullRequest, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
  1279  	diff, err := GetDiff(ctx, gitRepo, opts, files...)
  1280  	if err != nil {
  1281  		return nil, err
  1282  	}
  1283  	review, err := pull_model.GetNewestReviewState(ctx, userID, pull.ID)
  1284  	if err != nil || review == nil || review.UpdatedFiles == nil {
  1285  		return diff, err
  1286  	}
  1287  
  1288  	latestCommit := opts.AfterCommitID
  1289  	if latestCommit == "" {
  1290  		latestCommit = pull.HeadBranch // opts.AfterCommitID is preferred because it handles PRs from forks correctly and the branch name doesn't
  1291  	}
  1292  
  1293  	changedFiles, err := gitRepo.GetFilesChangedBetween(review.CommitSHA, latestCommit)
  1294  	// There are way too many possible errors.
  1295  	// Examples are various git errors such as the commit the review was based on was gc'ed and hence doesn't exist anymore as well as unrecoverable errors where we should serve a 500 response
  1296  	// Due to the current architecture and physical limitation of needing to compare explicit error messages, we can only choose one approach without the code getting ugly
  1297  	// For SOME of the errors such as the gc'ed commit, it would be best to mark all files as changed
  1298  	// But as that does not work for all potential errors, we simply mark all files as unchanged and drop the error which always works, even if not as good as possible
  1299  	if err != nil {
  1300  		log.Error("Could not get changed files between %s and %s for pull request %d in repo with path %s. Assuming no changes. Error: %w", review.CommitSHA, latestCommit, pull.Index, gitRepo.Path, err)
  1301  	}
  1302  
  1303  	filesChangedSinceLastDiff := make(map[string]pull_model.ViewedState)
  1304  outer:
  1305  	for _, diffFile := range diff.Files {
  1306  		fileViewedState := review.UpdatedFiles[diffFile.GetDiffFileName()]
  1307  
  1308  		// Check whether it was previously detected that the file has changed since the last review
  1309  		if fileViewedState == pull_model.HasChanged {
  1310  			diffFile.HasChangedSinceLastReview = true
  1311  			continue
  1312  		}
  1313  
  1314  		filename := diffFile.GetDiffFileName()
  1315  
  1316  		// Check explicitly whether the file has changed since the last review
  1317  		for _, changedFile := range changedFiles {
  1318  			diffFile.HasChangedSinceLastReview = filename == changedFile
  1319  			if diffFile.HasChangedSinceLastReview {
  1320  				filesChangedSinceLastDiff[filename] = pull_model.HasChanged
  1321  				continue outer // We don't want to check if the file is viewed here as that would fold the file, which is in this case unwanted
  1322  			}
  1323  		}
  1324  		// Check whether the file has already been viewed
  1325  		if fileViewedState == pull_model.Viewed {
  1326  			diffFile.IsViewed = true
  1327  			diff.NumViewedFiles++
  1328  		}
  1329  	}
  1330  
  1331  	// Explicitly store files that have changed in the database, if any is present at all.
  1332  	// This has the benefit that the "Has Changed" attribute will be present as long as the user does not explicitly mark this file as viewed, so it will even survive a page reload after marking another file as viewed.
  1333  	// On the other hand, this means that even if a commit reverting an unseen change is committed, the file will still be seen as changed.
  1334  	if len(filesChangedSinceLastDiff) > 0 {
  1335  		err := pull_model.UpdateReviewState(ctx, review.UserID, review.PullID, review.CommitSHA, filesChangedSinceLastDiff)
  1336  		if err != nil {
  1337  			log.Warn("Could not update review for user %d, pull %d, commit %s and the changed files %v: %v", review.UserID, review.PullID, review.CommitSHA, filesChangedSinceLastDiff, err)
  1338  			return nil, err
  1339  		}
  1340  	}
  1341  
  1342  	return diff, nil
  1343  }
  1344  
  1345  // CommentAsDiff returns c.Patch as *Diff
  1346  func CommentAsDiff(ctx context.Context, c *issues_model.Comment) (*Diff, error) {
  1347  	diff, err := ParsePatch(ctx, setting.Git.MaxGitDiffLines,
  1348  		setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.Patch), "")
  1349  	if err != nil {
  1350  		log.Error("Unable to parse patch: %v", err)
  1351  		return nil, err
  1352  	}
  1353  	if len(diff.Files) == 0 {
  1354  		return nil, fmt.Errorf("no file found for comment ID: %d", c.ID)
  1355  	}
  1356  	secs := diff.Files[0].Sections
  1357  	if len(secs) == 0 {
  1358  		return nil, fmt.Errorf("no sections found for comment ID: %d", c.ID)
  1359  	}
  1360  	return diff, nil
  1361  }
  1362  
  1363  // CommentMustAsDiff executes AsDiff and logs the error instead of returning
  1364  func CommentMustAsDiff(ctx context.Context, c *issues_model.Comment) *Diff {
  1365  	if c == nil {
  1366  		return nil
  1367  	}
  1368  	defer func() {
  1369  		if err := recover(); err != nil {
  1370  			log.Error("PANIC whilst retrieving diff for comment[%d] Error: %v\nStack: %s", c.ID, err, log.Stack(2))
  1371  		}
  1372  	}()
  1373  	diff, err := CommentAsDiff(ctx, c)
  1374  	if err != nil {
  1375  		log.Warn("CommentMustAsDiff: %v", err)
  1376  	}
  1377  	return diff
  1378  }
  1379  
  1380  // GetWhitespaceFlag returns git diff flag for treating whitespaces
  1381  func GetWhitespaceFlag(whitespaceBehavior string) git.TrustedCmdArgs {
  1382  	whitespaceFlags := map[string]git.TrustedCmdArgs{
  1383  		"ignore-all":    {"-w"},
  1384  		"ignore-change": {"-b"},
  1385  		"ignore-eol":    {"--ignore-space-at-eol"},
  1386  		"show-all":      nil,
  1387  	}
  1388  
  1389  	if flag, ok := whitespaceFlags[whitespaceBehavior]; ok {
  1390  		return flag
  1391  	}
  1392  	log.Warn("unknown whitespace behavior: %q, default to 'show-all'", whitespaceBehavior)
  1393  	return nil
  1394  }