code.gitea.io/gitea@v1.19.3/modules/git/repo_compare.go (about)

     1  // Copyright 2015 The Gogs Authors. All rights reserved.
     2  // Copyright 2019 The Gitea Authors. All rights reserved.
     3  // SPDX-License-Identifier: MIT
     4  
     5  package git
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"context"
    11  	"errors"
    12  	"fmt"
    13  	"io"
    14  	"os"
    15  	"path/filepath"
    16  	"regexp"
    17  	"strconv"
    18  	"strings"
    19  	"time"
    20  
    21  	logger "code.gitea.io/gitea/modules/log"
    22  )
    23  
    24  // CompareInfo represents needed information for comparing references.
    25  type CompareInfo struct {
    26  	MergeBase    string
    27  	BaseCommitID string
    28  	HeadCommitID string
    29  	Commits      []*Commit
    30  	NumFiles     int
    31  }
    32  
    33  // GetMergeBase checks and returns merge base of two branches and the reference used as base.
    34  func (repo *Repository) GetMergeBase(tmpRemote, base, head string) (string, string, error) {
    35  	if tmpRemote == "" {
    36  		tmpRemote = "origin"
    37  	}
    38  
    39  	if tmpRemote != "origin" {
    40  		tmpBaseName := RemotePrefix + tmpRemote + "/tmp_" + base
    41  		// Fetch commit into a temporary branch in order to be able to handle commits and tags
    42  		_, _, err := NewCommand(repo.Ctx, "fetch", "--no-tags").AddDynamicArguments(tmpRemote).AddDashesAndList(base + ":" + tmpBaseName).RunStdString(&RunOpts{Dir: repo.Path})
    43  		if err == nil {
    44  			base = tmpBaseName
    45  		}
    46  	}
    47  
    48  	stdout, _, err := NewCommand(repo.Ctx, "merge-base").AddDashesAndList(base, head).RunStdString(&RunOpts{Dir: repo.Path})
    49  	return strings.TrimSpace(stdout), base, err
    50  }
    51  
    52  // GetCompareInfo generates and returns compare information between base and head branches of repositories.
    53  func (repo *Repository) GetCompareInfo(basePath, baseBranch, headBranch string, directComparison, fileOnly bool) (_ *CompareInfo, err error) {
    54  	var (
    55  		remoteBranch string
    56  		tmpRemote    string
    57  	)
    58  
    59  	// We don't need a temporary remote for same repository.
    60  	if repo.Path != basePath {
    61  		// Add a temporary remote
    62  		tmpRemote = strconv.FormatInt(time.Now().UnixNano(), 10)
    63  		if err = repo.AddRemote(tmpRemote, basePath, false); err != nil {
    64  			return nil, fmt.Errorf("AddRemote: %w", err)
    65  		}
    66  		defer func() {
    67  			if err := repo.RemoveRemote(tmpRemote); err != nil {
    68  				logger.Error("GetPullRequestInfo: RemoveRemote: %v", err)
    69  			}
    70  		}()
    71  	}
    72  
    73  	compareInfo := new(CompareInfo)
    74  
    75  	compareInfo.HeadCommitID, err = GetFullCommitID(repo.Ctx, repo.Path, headBranch)
    76  	if err != nil {
    77  		compareInfo.HeadCommitID = headBranch
    78  	}
    79  
    80  	compareInfo.MergeBase, remoteBranch, err = repo.GetMergeBase(tmpRemote, baseBranch, headBranch)
    81  	if err == nil {
    82  		compareInfo.BaseCommitID, err = GetFullCommitID(repo.Ctx, repo.Path, remoteBranch)
    83  		if err != nil {
    84  			compareInfo.BaseCommitID = remoteBranch
    85  		}
    86  		separator := "..."
    87  		baseCommitID := compareInfo.MergeBase
    88  		if directComparison {
    89  			separator = ".."
    90  			baseCommitID = compareInfo.BaseCommitID
    91  		}
    92  
    93  		// We have a common base - therefore we know that ... should work
    94  		if !fileOnly {
    95  			// avoid: ambiguous argument 'refs/a...refs/b': unknown revision or path not in the working tree. Use '--': 'git <command> [<revision>...] -- [<file>...]'
    96  			var logs []byte
    97  			logs, _, err = NewCommand(repo.Ctx, "log").AddArguments(prettyLogFormat).
    98  				AddDynamicArguments(baseCommitID + separator + headBranch).AddArguments("--").
    99  				RunStdBytes(&RunOpts{Dir: repo.Path})
   100  			if err != nil {
   101  				return nil, err
   102  			}
   103  			compareInfo.Commits, err = repo.parsePrettyFormatLogToList(logs)
   104  			if err != nil {
   105  				return nil, fmt.Errorf("parsePrettyFormatLogToList: %w", err)
   106  			}
   107  		} else {
   108  			compareInfo.Commits = []*Commit{}
   109  		}
   110  	} else {
   111  		compareInfo.Commits = []*Commit{}
   112  		compareInfo.MergeBase, err = GetFullCommitID(repo.Ctx, repo.Path, remoteBranch)
   113  		if err != nil {
   114  			compareInfo.MergeBase = remoteBranch
   115  		}
   116  		compareInfo.BaseCommitID = compareInfo.MergeBase
   117  	}
   118  
   119  	// Count number of changed files.
   120  	// This probably should be removed as we need to use shortstat elsewhere
   121  	// Now there is git diff --shortstat but this appears to be slower than simply iterating with --nameonly
   122  	compareInfo.NumFiles, err = repo.GetDiffNumChangedFiles(remoteBranch, headBranch, directComparison)
   123  	if err != nil {
   124  		return nil, err
   125  	}
   126  	return compareInfo, nil
   127  }
   128  
   129  type lineCountWriter struct {
   130  	numLines int
   131  }
   132  
   133  // Write counts the number of newlines in the provided bytestream
   134  func (l *lineCountWriter) Write(p []byte) (n int, err error) {
   135  	n = len(p)
   136  	l.numLines += bytes.Count(p, []byte{'\000'})
   137  	return n, err
   138  }
   139  
   140  // GetDiffNumChangedFiles counts the number of changed files
   141  // This is substantially quicker than shortstat but...
   142  func (repo *Repository) GetDiffNumChangedFiles(base, head string, directComparison bool) (int, error) {
   143  	// Now there is git diff --shortstat but this appears to be slower than simply iterating with --nameonly
   144  	w := &lineCountWriter{}
   145  	stderr := new(bytes.Buffer)
   146  
   147  	separator := "..."
   148  	if directComparison {
   149  		separator = ".."
   150  	}
   151  
   152  	// avoid: ambiguous argument 'refs/a...refs/b': unknown revision or path not in the working tree. Use '--': 'git <command> [<revision>...] -- [<file>...]'
   153  	if err := NewCommand(repo.Ctx, "diff", "-z", "--name-only").AddDynamicArguments(base + separator + head).AddArguments("--").
   154  		Run(&RunOpts{
   155  			Dir:    repo.Path,
   156  			Stdout: w,
   157  			Stderr: stderr,
   158  		}); err != nil {
   159  		if strings.Contains(stderr.String(), "no merge base") {
   160  			// git >= 2.28 now returns an error if base and head have become unrelated.
   161  			// previously it would return the results of git diff -z --name-only base head so let's try that...
   162  			w = &lineCountWriter{}
   163  			stderr.Reset()
   164  			if err = NewCommand(repo.Ctx, "diff", "-z", "--name-only").AddDynamicArguments(base, head).AddArguments("--").Run(&RunOpts{
   165  				Dir:    repo.Path,
   166  				Stdout: w,
   167  				Stderr: stderr,
   168  			}); err == nil {
   169  				return w.numLines, nil
   170  			}
   171  		}
   172  		return 0, fmt.Errorf("%w: Stderr: %s", err, stderr)
   173  	}
   174  	return w.numLines, nil
   175  }
   176  
   177  // GetDiffShortStat counts number of changed files, number of additions and deletions
   178  func (repo *Repository) GetDiffShortStat(base, head string) (numFiles, totalAdditions, totalDeletions int, err error) {
   179  	numFiles, totalAdditions, totalDeletions, err = GetDiffShortStat(repo.Ctx, repo.Path, nil, base+"..."+head)
   180  	if err != nil && strings.Contains(err.Error(), "no merge base") {
   181  		return GetDiffShortStat(repo.Ctx, repo.Path, nil, base, head)
   182  	}
   183  	return numFiles, totalAdditions, totalDeletions, err
   184  }
   185  
   186  // GetDiffShortStat counts number of changed files, number of additions and deletions
   187  func GetDiffShortStat(ctx context.Context, repoPath string, trustedArgs TrustedCmdArgs, dynamicArgs ...string) (numFiles, totalAdditions, totalDeletions int, err error) {
   188  	// Now if we call:
   189  	// $ git diff --shortstat 1ebb35b98889ff77299f24d82da426b434b0cca0...788b8b1440462d477f45b0088875
   190  	// we get:
   191  	// " 9902 files changed, 2034198 insertions(+), 298800 deletions(-)\n"
   192  	cmd := NewCommand(ctx, "diff", "--shortstat").AddArguments(trustedArgs...).AddDynamicArguments(dynamicArgs...)
   193  	stdout, _, err := cmd.RunStdString(&RunOpts{Dir: repoPath})
   194  	if err != nil {
   195  		return 0, 0, 0, err
   196  	}
   197  
   198  	return parseDiffStat(stdout)
   199  }
   200  
   201  var shortStatFormat = regexp.MustCompile(
   202  	`\s*(\d+) files? changed(?:, (\d+) insertions?\(\+\))?(?:, (\d+) deletions?\(-\))?`)
   203  
   204  var patchCommits = regexp.MustCompile(`^From\s(\w+)\s`)
   205  
   206  func parseDiffStat(stdout string) (numFiles, totalAdditions, totalDeletions int, err error) {
   207  	if len(stdout) == 0 || stdout == "\n" {
   208  		return 0, 0, 0, nil
   209  	}
   210  	groups := shortStatFormat.FindStringSubmatch(stdout)
   211  	if len(groups) != 4 {
   212  		return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s groups: %s", stdout, groups)
   213  	}
   214  
   215  	numFiles, err = strconv.Atoi(groups[1])
   216  	if err != nil {
   217  		return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumFiles %w", stdout, err)
   218  	}
   219  
   220  	if len(groups[2]) != 0 {
   221  		totalAdditions, err = strconv.Atoi(groups[2])
   222  		if err != nil {
   223  			return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumAdditions %w", stdout, err)
   224  		}
   225  	}
   226  
   227  	if len(groups[3]) != 0 {
   228  		totalDeletions, err = strconv.Atoi(groups[3])
   229  		if err != nil {
   230  			return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumDeletions %w", stdout, err)
   231  		}
   232  	}
   233  	return numFiles, totalAdditions, totalDeletions, err
   234  }
   235  
   236  // GetDiffOrPatch generates either diff or formatted patch data between given revisions
   237  func (repo *Repository) GetDiffOrPatch(base, head string, w io.Writer, patch, binary bool) error {
   238  	if patch {
   239  		return repo.GetPatch(base, head, w)
   240  	}
   241  	if binary {
   242  		return repo.GetDiffBinary(base, head, w)
   243  	}
   244  	return repo.GetDiff(base, head, w)
   245  }
   246  
   247  // GetDiff generates and returns patch data between given revisions, optimized for human readability
   248  func (repo *Repository) GetDiff(base, head string, w io.Writer) error {
   249  	return NewCommand(repo.Ctx, "diff", "-p").AddDynamicArguments(base, head).Run(&RunOpts{
   250  		Dir:    repo.Path,
   251  		Stdout: w,
   252  	})
   253  }
   254  
   255  // GetDiffBinary generates and returns patch data between given revisions, including binary diffs.
   256  func (repo *Repository) GetDiffBinary(base, head string, w io.Writer) error {
   257  	return NewCommand(repo.Ctx, "diff", "-p", "--binary", "--histogram").AddDynamicArguments(base, head).Run(&RunOpts{
   258  		Dir:    repo.Path,
   259  		Stdout: w,
   260  	})
   261  }
   262  
   263  // GetPatch generates and returns format-patch data between given revisions, able to be used with `git apply`
   264  func (repo *Repository) GetPatch(base, head string, w io.Writer) error {
   265  	stderr := new(bytes.Buffer)
   266  	err := NewCommand(repo.Ctx, "format-patch", "--binary", "--stdout").AddDynamicArguments(base + "..." + head).
   267  		Run(&RunOpts{
   268  			Dir:    repo.Path,
   269  			Stdout: w,
   270  			Stderr: stderr,
   271  		})
   272  	if err != nil && bytes.Contains(stderr.Bytes(), []byte("no merge base")) {
   273  		return NewCommand(repo.Ctx, "format-patch", "--binary", "--stdout").AddDynamicArguments(base, head).
   274  			Run(&RunOpts{
   275  				Dir:    repo.Path,
   276  				Stdout: w,
   277  			})
   278  	}
   279  	return err
   280  }
   281  
   282  // GetFilesChangedBetween returns a list of all files that have been changed between the given commits
   283  func (repo *Repository) GetFilesChangedBetween(base, head string) ([]string, error) {
   284  	stdout, _, err := NewCommand(repo.Ctx, "diff", "--name-only", "-z").AddDynamicArguments(base + ".." + head).RunStdString(&RunOpts{Dir: repo.Path})
   285  	if err != nil {
   286  		return nil, err
   287  	}
   288  	split := strings.Split(stdout, "\000")
   289  
   290  	// Because Git will always emit filenames with a terminal NUL ignore the last entry in the split - which will always be empty.
   291  	if len(split) > 0 {
   292  		split = split[:len(split)-1]
   293  	}
   294  
   295  	return split, err
   296  }
   297  
   298  // GetDiffFromMergeBase generates and return patch data from merge base to head
   299  func (repo *Repository) GetDiffFromMergeBase(base, head string, w io.Writer) error {
   300  	stderr := new(bytes.Buffer)
   301  	err := NewCommand(repo.Ctx, "diff", "-p", "--binary").AddDynamicArguments(base + "..." + head).
   302  		Run(&RunOpts{
   303  			Dir:    repo.Path,
   304  			Stdout: w,
   305  			Stderr: stderr,
   306  		})
   307  	if err != nil && bytes.Contains(stderr.Bytes(), []byte("no merge base")) {
   308  		return repo.GetDiffBinary(base, head, w)
   309  	}
   310  	return err
   311  }
   312  
   313  // ReadPatchCommit will check if a diff patch exists and return stats
   314  func (repo *Repository) ReadPatchCommit(prID int64) (commitSHA string, err error) {
   315  	// Migrated repositories download patches to "pulls" location
   316  	patchFile := fmt.Sprintf("pulls/%d.patch", prID)
   317  	loadPatch, err := os.Open(filepath.Join(repo.Path, patchFile))
   318  	if err != nil {
   319  		return "", err
   320  	}
   321  	defer loadPatch.Close()
   322  	// Read only the first line of the patch - usually it contains the first commit made in patch
   323  	scanner := bufio.NewScanner(loadPatch)
   324  	scanner.Scan()
   325  	// Parse the Patch stats, sometimes Migration returns a 404 for the patch file
   326  	commitSHAGroups := patchCommits.FindStringSubmatch(scanner.Text())
   327  	if len(commitSHAGroups) != 0 {
   328  		commitSHA = commitSHAGroups[1]
   329  	} else {
   330  		return "", errors.New("patch file doesn't contain valid commit ID")
   331  	}
   332  	return commitSHA, nil
   333  }