github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/git/repo_compare.go (about)

     1  // Copyright 2023 The GitBundle Inc. All rights reserved.
     2  // Copyright 2017 The Gitea Authors. All rights reserved.
     3  // Use of this source code is governed by a MIT-style
     4  // license that can be found in the LICENSE file.
     5  
     6  // Copyright 2015 The Gogs Authors. All rights reserved.
     7  
     8  package git
     9  
    10  import (
    11  	"bufio"
    12  	"bytes"
    13  	"context"
    14  	"errors"
    15  	"fmt"
    16  	"io"
    17  	"os"
    18  	"path/filepath"
    19  	"regexp"
    20  	"strconv"
    21  	"strings"
    22  	"time"
    23  
    24  	logger "github.com/gitbundle/modules/log"
    25  )
    26  
    27  // CompareInfo represents needed information for comparing references.
    28  type CompareInfo struct {
    29  	MergeBase    string
    30  	BaseCommitID string
    31  	HeadCommitID string
    32  	Commits      []*Commit
    33  	NumFiles     int
    34  }
    35  
    36  // GetMergeBase checks and returns merge base of two branches and the reference used as base.
    37  func (repo *Repository) GetMergeBase(tmpRemote, base, head string) (string, string, error) {
    38  	if tmpRemote == "" {
    39  		tmpRemote = "origin"
    40  	}
    41  
    42  	if tmpRemote != "origin" {
    43  		tmpBaseName := RemotePrefix + tmpRemote + "/tmp_" + base
    44  		// Fetch commit into a temporary branch in order to be able to handle commits and tags
    45  		_, _, err := NewCommand(repo.Ctx, "fetch", tmpRemote, base+":"+tmpBaseName).RunStdString(&RunOpts{Dir: repo.Path})
    46  		if err == nil {
    47  			base = tmpBaseName
    48  		}
    49  	}
    50  
    51  	stdout, _, err := NewCommand(repo.Ctx, "merge-base", "--", base, head).RunStdString(&RunOpts{Dir: repo.Path})
    52  	return strings.TrimSpace(stdout), base, err
    53  }
    54  
    55  // GetCompareInfo generates and returns compare information between base and head branches of repositories.
    56  func (repo *Repository) GetCompareInfo(basePath, baseBranch, headBranch string, directComparison, fileOnly bool) (_ *CompareInfo, err error) {
    57  	var (
    58  		remoteBranch string
    59  		tmpRemote    string
    60  	)
    61  
    62  	// We don't need a temporary remote for same repository.
    63  	if repo.Path != basePath {
    64  		// Add a temporary remote
    65  		tmpRemote = strconv.FormatInt(time.Now().UnixNano(), 10)
    66  		if err = repo.AddRemote(tmpRemote, basePath, false); err != nil {
    67  			return nil, fmt.Errorf("AddRemote: %v", err)
    68  		}
    69  		defer func() {
    70  			if err := repo.RemoveRemote(tmpRemote); err != nil {
    71  				logger.Error("GetPullRequestInfo: RemoveRemote: %v", err)
    72  			}
    73  		}()
    74  	}
    75  
    76  	compareInfo := new(CompareInfo)
    77  
    78  	compareInfo.HeadCommitID, err = GetFullCommitID(repo.Ctx, repo.Path, headBranch)
    79  	if err != nil {
    80  		compareInfo.HeadCommitID = headBranch
    81  	}
    82  
    83  	compareInfo.MergeBase, remoteBranch, err = repo.GetMergeBase(tmpRemote, baseBranch, headBranch)
    84  	if err == nil {
    85  		compareInfo.BaseCommitID, err = GetFullCommitID(repo.Ctx, repo.Path, remoteBranch)
    86  		if err != nil {
    87  			compareInfo.BaseCommitID = remoteBranch
    88  		}
    89  		separator := "..."
    90  		baseCommitID := compareInfo.MergeBase
    91  		if directComparison {
    92  			separator = ".."
    93  			baseCommitID = compareInfo.BaseCommitID
    94  		}
    95  
    96  		// We have a common base - therefore we know that ... should work
    97  		if !fileOnly {
    98  			var logs []byte
    99  			logs, _, err = NewCommand(repo.Ctx, "log", baseCommitID+separator+headBranch, prettyLogFormat).RunStdBytes(&RunOpts{Dir: repo.Path})
   100  			if err != nil {
   101  				return nil, err
   102  			}
   103  			compareInfo.Commits, err = repo.parsePrettyFormatLogToList(logs)
   104  			if err != nil {
   105  				return nil, fmt.Errorf("parsePrettyFormatLogToList: %v", err)
   106  			}
   107  		} else {
   108  			compareInfo.Commits = []*Commit{}
   109  		}
   110  	} else {
   111  		compareInfo.Commits = []*Commit{}
   112  		compareInfo.MergeBase, err = GetFullCommitID(repo.Ctx, repo.Path, remoteBranch)
   113  		if err != nil {
   114  			compareInfo.MergeBase = remoteBranch
   115  		}
   116  		compareInfo.BaseCommitID = compareInfo.MergeBase
   117  	}
   118  
   119  	// Count number of changed files.
   120  	// This probably should be removed as we need to use shortstat elsewhere
   121  	// Now there is git diff --shortstat but this appears to be slower than simply iterating with --nameonly
   122  	compareInfo.NumFiles, err = repo.GetDiffNumChangedFiles(remoteBranch, headBranch, directComparison)
   123  	if err != nil {
   124  		return nil, err
   125  	}
   126  	return compareInfo, nil
   127  }
   128  
   129  type lineCountWriter struct {
   130  	numLines int
   131  }
   132  
   133  // Write counts the number of newlines in the provided bytestream
   134  func (l *lineCountWriter) Write(p []byte) (n int, err error) {
   135  	n = len(p)
   136  	l.numLines += bytes.Count(p, []byte{'\000'})
   137  	return
   138  }
   139  
   140  // GetDiffNumChangedFiles counts the number of changed files
   141  // This is substantially quicker than shortstat but...
   142  func (repo *Repository) GetDiffNumChangedFiles(base, head string, directComparison bool) (int, error) {
   143  	// Now there is git diff --shortstat but this appears to be slower than simply iterating with --nameonly
   144  	w := &lineCountWriter{}
   145  	stderr := new(bytes.Buffer)
   146  
   147  	separator := "..."
   148  	if directComparison {
   149  		separator = ".."
   150  	}
   151  
   152  	if err := NewCommand(repo.Ctx, "diff", "-z", "--name-only", base+separator+head).
   153  		Run(&RunOpts{
   154  			Dir:    repo.Path,
   155  			Stdout: w,
   156  			Stderr: stderr,
   157  		}); err != nil {
   158  		if strings.Contains(stderr.String(), "no merge base") {
   159  			// git >= 2.28 now returns an error if base and head have become unrelated.
   160  			// previously it would return the results of git diff -z --name-only base head so let's try that...
   161  			w = &lineCountWriter{}
   162  			stderr.Reset()
   163  			if err = NewCommand(repo.Ctx, "diff", "-z", "--name-only", base, head).Run(&RunOpts{
   164  				Dir:    repo.Path,
   165  				Stdout: w,
   166  				Stderr: stderr,
   167  			}); err == nil {
   168  				return w.numLines, nil
   169  			}
   170  		}
   171  		return 0, fmt.Errorf("%v: Stderr: %s", err, stderr)
   172  	}
   173  	return w.numLines, nil
   174  }
   175  
   176  // GetDiffShortStat counts number of changed files, number of additions and deletions
   177  func (repo *Repository) GetDiffShortStat(base, head string) (numFiles, totalAdditions, totalDeletions int, err error) {
   178  	numFiles, totalAdditions, totalDeletions, err = GetDiffShortStat(repo.Ctx, repo.Path, base+"..."+head)
   179  	if err != nil && strings.Contains(err.Error(), "no merge base") {
   180  		return GetDiffShortStat(repo.Ctx, repo.Path, base, head)
   181  	}
   182  	return
   183  }
   184  
   185  // GetDiffShortStat counts number of changed files, number of additions and deletions
   186  func GetDiffShortStat(ctx context.Context, repoPath string, args ...string) (numFiles, totalAdditions, totalDeletions int, err error) {
   187  	// Now if we call:
   188  	// $ git diff --shortstat 1ebb35b98889ff77299f24d82da426b434b0cca0...788b8b1440462d477f45b0088875
   189  	// we get:
   190  	// " 9902 files changed, 2034198 insertions(+), 298800 deletions(-)\n"
   191  	args = append([]string{
   192  		"diff",
   193  		"--shortstat",
   194  	}, args...)
   195  
   196  	stdout, _, err := NewCommand(ctx, args...).RunStdString(&RunOpts{Dir: repoPath})
   197  	if err != nil {
   198  		return 0, 0, 0, err
   199  	}
   200  
   201  	return parseDiffStat(stdout)
   202  }
   203  
   204  var shortStatFormat = regexp.MustCompile(
   205  	`\s*(\d+) files? changed(?:, (\d+) insertions?\(\+\))?(?:, (\d+) deletions?\(-\))?`)
   206  
   207  var patchCommits = regexp.MustCompile(`^From\s(\w+)\s`)
   208  
   209  func parseDiffStat(stdout string) (numFiles, totalAdditions, totalDeletions int, err error) {
   210  	if len(stdout) == 0 || stdout == "\n" {
   211  		return 0, 0, 0, nil
   212  	}
   213  	groups := shortStatFormat.FindStringSubmatch(stdout)
   214  	if len(groups) != 4 {
   215  		return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s groups: %s", stdout, groups)
   216  	}
   217  
   218  	numFiles, err = strconv.Atoi(groups[1])
   219  	if err != nil {
   220  		return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumFiles %v", stdout, err)
   221  	}
   222  
   223  	if len(groups[2]) != 0 {
   224  		totalAdditions, err = strconv.Atoi(groups[2])
   225  		if err != nil {
   226  			return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumAdditions %v", stdout, err)
   227  		}
   228  	}
   229  
   230  	if len(groups[3]) != 0 {
   231  		totalDeletions, err = strconv.Atoi(groups[3])
   232  		if err != nil {
   233  			return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumDeletions %v", stdout, err)
   234  		}
   235  	}
   236  	return
   237  }
   238  
   239  // GetDiffOrPatch generates either diff or formatted patch data between given revisions
   240  func (repo *Repository) GetDiffOrPatch(base, head string, w io.Writer, patch, binary bool) error {
   241  	if patch {
   242  		return repo.GetPatch(base, head, w)
   243  	}
   244  	if binary {
   245  		return repo.GetDiffBinary(base, head, w)
   246  	}
   247  	return repo.GetDiff(base, head, w)
   248  }
   249  
   250  // GetDiff generates and returns patch data between given revisions, optimized for human readability
   251  func (repo *Repository) GetDiff(base, head string, w io.Writer) error {
   252  	return NewCommand(repo.Ctx, "diff", "-p", base, head).Run(&RunOpts{
   253  		Dir:    repo.Path,
   254  		Stdout: w,
   255  	})
   256  }
   257  
   258  // GetDiffBinary generates and returns patch data between given revisions, including binary diffs.
   259  func (repo *Repository) GetDiffBinary(base, head string, w io.Writer) error {
   260  	return NewCommand(repo.Ctx, "diff", "-p", "--binary", "--histogram", base, head).Run(&RunOpts{
   261  		Dir:    repo.Path,
   262  		Stdout: w,
   263  	})
   264  }
   265  
   266  // GetPatch generates and returns format-patch data between given revisions, able to be used with `git apply`
   267  func (repo *Repository) GetPatch(base, head string, w io.Writer) error {
   268  	stderr := new(bytes.Buffer)
   269  	err := NewCommand(repo.Ctx, "format-patch", "--binary", "--stdout", base+"..."+head).
   270  		Run(&RunOpts{
   271  			Dir:    repo.Path,
   272  			Stdout: w,
   273  			Stderr: stderr,
   274  		})
   275  	if err != nil && bytes.Contains(stderr.Bytes(), []byte("no merge base")) {
   276  		return NewCommand(repo.Ctx, "format-patch", "--binary", "--stdout", base, head).
   277  			Run(&RunOpts{
   278  				Dir:    repo.Path,
   279  				Stdout: w,
   280  			})
   281  	}
   282  	return err
   283  }
   284  
   285  // GetFilesChangedBetween returns a list of all files that have been changed between the given commits
   286  func (repo *Repository) GetFilesChangedBetween(base, head string) ([]string, error) {
   287  	stdout, _, err := NewCommand(repo.Ctx, "diff", "--name-only", base+".."+head).RunStdString(&RunOpts{Dir: repo.Path})
   288  	if err != nil {
   289  		return nil, err
   290  	}
   291  	return strings.Split(stdout, "\n"), err
   292  }
   293  
   294  // GetDiffFromMergeBase generates and return patch data from merge base to head
   295  func (repo *Repository) GetDiffFromMergeBase(base, head string, w io.Writer) error {
   296  	stderr := new(bytes.Buffer)
   297  	err := NewCommand(repo.Ctx, "diff", "-p", "--binary", base+"..."+head).
   298  		Run(&RunOpts{
   299  			Dir:    repo.Path,
   300  			Stdout: w,
   301  			Stderr: stderr,
   302  		})
   303  	if err != nil && bytes.Contains(stderr.Bytes(), []byte("no merge base")) {
   304  		return repo.GetDiffBinary(base, head, w)
   305  	}
   306  	return err
   307  }
   308  
   309  // ReadPatchCommit will check if a diff patch exists and return stats
   310  func (repo *Repository) ReadPatchCommit(prID int64) (commitSHA string, err error) {
   311  	// Migrated repositories download patches to "pulls" location
   312  	patchFile := fmt.Sprintf("pulls/%d.patch", prID)
   313  	loadPatch, err := os.Open(filepath.Join(repo.Path, patchFile))
   314  	if err != nil {
   315  		return "", err
   316  	}
   317  	defer loadPatch.Close()
   318  	// Read only the first line of the patch - usually it contains the first commit made in patch
   319  	scanner := bufio.NewScanner(loadPatch)
   320  	scanner.Scan()
   321  	// Parse the Patch stats, sometimes Migration returns a 404 for the patch file
   322  	commitSHAGroups := patchCommits.FindStringSubmatch(scanner.Text())
   323  	if len(commitSHAGroups) != 0 {
   324  		commitSHA = commitSHAGroups[1]
   325  	} else {
   326  		return "", errors.New("patch file doesn't contain valid commit ID")
   327  	}
   328  	return commitSHA, nil
   329  }