github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/git/repo_compare.go (about) 1 // Copyright 2023 The GitBundle Inc. All rights reserved. 2 // Copyright 2017 The Gitea Authors. All rights reserved. 3 // Use of this source code is governed by a MIT-style 4 // license that can be found in the LICENSE file. 5 6 // Copyright 2015 The Gogs Authors. All rights reserved. 7 8 package git 9 10 import ( 11 "bufio" 12 "bytes" 13 "context" 14 "errors" 15 "fmt" 16 "io" 17 "os" 18 "path/filepath" 19 "regexp" 20 "strconv" 21 "strings" 22 "time" 23 24 logger "github.com/gitbundle/modules/log" 25 ) 26 27 // CompareInfo represents needed information for comparing references. 28 type CompareInfo struct { 29 MergeBase string 30 BaseCommitID string 31 HeadCommitID string 32 Commits []*Commit 33 NumFiles int 34 } 35 36 // GetMergeBase checks and returns merge base of two branches and the reference used as base. 37 func (repo *Repository) GetMergeBase(tmpRemote, base, head string) (string, string, error) { 38 if tmpRemote == "" { 39 tmpRemote = "origin" 40 } 41 42 if tmpRemote != "origin" { 43 tmpBaseName := RemotePrefix + tmpRemote + "/tmp_" + base 44 // Fetch commit into a temporary branch in order to be able to handle commits and tags 45 _, _, err := NewCommand(repo.Ctx, "fetch", tmpRemote, base+":"+tmpBaseName).RunStdString(&RunOpts{Dir: repo.Path}) 46 if err == nil { 47 base = tmpBaseName 48 } 49 } 50 51 stdout, _, err := NewCommand(repo.Ctx, "merge-base", "--", base, head).RunStdString(&RunOpts{Dir: repo.Path}) 52 return strings.TrimSpace(stdout), base, err 53 } 54 55 // GetCompareInfo generates and returns compare information between base and head branches of repositories. 56 func (repo *Repository) GetCompareInfo(basePath, baseBranch, headBranch string, directComparison, fileOnly bool) (_ *CompareInfo, err error) { 57 var ( 58 remoteBranch string 59 tmpRemote string 60 ) 61 62 // We don't need a temporary remote for same repository. 63 if repo.Path != basePath { 64 // Add a temporary remote 65 tmpRemote = strconv.FormatInt(time.Now().UnixNano(), 10) 66 if err = repo.AddRemote(tmpRemote, basePath, false); err != nil { 67 return nil, fmt.Errorf("AddRemote: %v", err) 68 } 69 defer func() { 70 if err := repo.RemoveRemote(tmpRemote); err != nil { 71 logger.Error("GetPullRequestInfo: RemoveRemote: %v", err) 72 } 73 }() 74 } 75 76 compareInfo := new(CompareInfo) 77 78 compareInfo.HeadCommitID, err = GetFullCommitID(repo.Ctx, repo.Path, headBranch) 79 if err != nil { 80 compareInfo.HeadCommitID = headBranch 81 } 82 83 compareInfo.MergeBase, remoteBranch, err = repo.GetMergeBase(tmpRemote, baseBranch, headBranch) 84 if err == nil { 85 compareInfo.BaseCommitID, err = GetFullCommitID(repo.Ctx, repo.Path, remoteBranch) 86 if err != nil { 87 compareInfo.BaseCommitID = remoteBranch 88 } 89 separator := "..." 90 baseCommitID := compareInfo.MergeBase 91 if directComparison { 92 separator = ".." 93 baseCommitID = compareInfo.BaseCommitID 94 } 95 96 // We have a common base - therefore we know that ... should work 97 if !fileOnly { 98 var logs []byte 99 logs, _, err = NewCommand(repo.Ctx, "log", baseCommitID+separator+headBranch, prettyLogFormat).RunStdBytes(&RunOpts{Dir: repo.Path}) 100 if err != nil { 101 return nil, err 102 } 103 compareInfo.Commits, err = repo.parsePrettyFormatLogToList(logs) 104 if err != nil { 105 return nil, fmt.Errorf("parsePrettyFormatLogToList: %v", err) 106 } 107 } else { 108 compareInfo.Commits = []*Commit{} 109 } 110 } else { 111 compareInfo.Commits = []*Commit{} 112 compareInfo.MergeBase, err = GetFullCommitID(repo.Ctx, repo.Path, remoteBranch) 113 if err != nil { 114 compareInfo.MergeBase = remoteBranch 115 } 116 compareInfo.BaseCommitID = compareInfo.MergeBase 117 } 118 119 // Count number of changed files. 120 // This probably should be removed as we need to use shortstat elsewhere 121 // Now there is git diff --shortstat but this appears to be slower than simply iterating with --nameonly 122 compareInfo.NumFiles, err = repo.GetDiffNumChangedFiles(remoteBranch, headBranch, directComparison) 123 if err != nil { 124 return nil, err 125 } 126 return compareInfo, nil 127 } 128 129 type lineCountWriter struct { 130 numLines int 131 } 132 133 // Write counts the number of newlines in the provided bytestream 134 func (l *lineCountWriter) Write(p []byte) (n int, err error) { 135 n = len(p) 136 l.numLines += bytes.Count(p, []byte{'\000'}) 137 return 138 } 139 140 // GetDiffNumChangedFiles counts the number of changed files 141 // This is substantially quicker than shortstat but... 142 func (repo *Repository) GetDiffNumChangedFiles(base, head string, directComparison bool) (int, error) { 143 // Now there is git diff --shortstat but this appears to be slower than simply iterating with --nameonly 144 w := &lineCountWriter{} 145 stderr := new(bytes.Buffer) 146 147 separator := "..." 148 if directComparison { 149 separator = ".." 150 } 151 152 if err := NewCommand(repo.Ctx, "diff", "-z", "--name-only", base+separator+head). 153 Run(&RunOpts{ 154 Dir: repo.Path, 155 Stdout: w, 156 Stderr: stderr, 157 }); err != nil { 158 if strings.Contains(stderr.String(), "no merge base") { 159 // git >= 2.28 now returns an error if base and head have become unrelated. 160 // previously it would return the results of git diff -z --name-only base head so let's try that... 161 w = &lineCountWriter{} 162 stderr.Reset() 163 if err = NewCommand(repo.Ctx, "diff", "-z", "--name-only", base, head).Run(&RunOpts{ 164 Dir: repo.Path, 165 Stdout: w, 166 Stderr: stderr, 167 }); err == nil { 168 return w.numLines, nil 169 } 170 } 171 return 0, fmt.Errorf("%v: Stderr: %s", err, stderr) 172 } 173 return w.numLines, nil 174 } 175 176 // GetDiffShortStat counts number of changed files, number of additions and deletions 177 func (repo *Repository) GetDiffShortStat(base, head string) (numFiles, totalAdditions, totalDeletions int, err error) { 178 numFiles, totalAdditions, totalDeletions, err = GetDiffShortStat(repo.Ctx, repo.Path, base+"..."+head) 179 if err != nil && strings.Contains(err.Error(), "no merge base") { 180 return GetDiffShortStat(repo.Ctx, repo.Path, base, head) 181 } 182 return 183 } 184 185 // GetDiffShortStat counts number of changed files, number of additions and deletions 186 func GetDiffShortStat(ctx context.Context, repoPath string, args ...string) (numFiles, totalAdditions, totalDeletions int, err error) { 187 // Now if we call: 188 // $ git diff --shortstat 1ebb35b98889ff77299f24d82da426b434b0cca0...788b8b1440462d477f45b0088875 189 // we get: 190 // " 9902 files changed, 2034198 insertions(+), 298800 deletions(-)\n" 191 args = append([]string{ 192 "diff", 193 "--shortstat", 194 }, args...) 195 196 stdout, _, err := NewCommand(ctx, args...).RunStdString(&RunOpts{Dir: repoPath}) 197 if err != nil { 198 return 0, 0, 0, err 199 } 200 201 return parseDiffStat(stdout) 202 } 203 204 var shortStatFormat = regexp.MustCompile( 205 `\s*(\d+) files? changed(?:, (\d+) insertions?\(\+\))?(?:, (\d+) deletions?\(-\))?`) 206 207 var patchCommits = regexp.MustCompile(`^From\s(\w+)\s`) 208 209 func parseDiffStat(stdout string) (numFiles, totalAdditions, totalDeletions int, err error) { 210 if len(stdout) == 0 || stdout == "\n" { 211 return 0, 0, 0, nil 212 } 213 groups := shortStatFormat.FindStringSubmatch(stdout) 214 if len(groups) != 4 { 215 return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s groups: %s", stdout, groups) 216 } 217 218 numFiles, err = strconv.Atoi(groups[1]) 219 if err != nil { 220 return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumFiles %v", stdout, err) 221 } 222 223 if len(groups[2]) != 0 { 224 totalAdditions, err = strconv.Atoi(groups[2]) 225 if err != nil { 226 return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumAdditions %v", stdout, err) 227 } 228 } 229 230 if len(groups[3]) != 0 { 231 totalDeletions, err = strconv.Atoi(groups[3]) 232 if err != nil { 233 return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumDeletions %v", stdout, err) 234 } 235 } 236 return 237 } 238 239 // GetDiffOrPatch generates either diff or formatted patch data between given revisions 240 func (repo *Repository) GetDiffOrPatch(base, head string, w io.Writer, patch, binary bool) error { 241 if patch { 242 return repo.GetPatch(base, head, w) 243 } 244 if binary { 245 return repo.GetDiffBinary(base, head, w) 246 } 247 return repo.GetDiff(base, head, w) 248 } 249 250 // GetDiff generates and returns patch data between given revisions, optimized for human readability 251 func (repo *Repository) GetDiff(base, head string, w io.Writer) error { 252 return NewCommand(repo.Ctx, "diff", "-p", base, head).Run(&RunOpts{ 253 Dir: repo.Path, 254 Stdout: w, 255 }) 256 } 257 258 // GetDiffBinary generates and returns patch data between given revisions, including binary diffs. 259 func (repo *Repository) GetDiffBinary(base, head string, w io.Writer) error { 260 return NewCommand(repo.Ctx, "diff", "-p", "--binary", "--histogram", base, head).Run(&RunOpts{ 261 Dir: repo.Path, 262 Stdout: w, 263 }) 264 } 265 266 // GetPatch generates and returns format-patch data between given revisions, able to be used with `git apply` 267 func (repo *Repository) GetPatch(base, head string, w io.Writer) error { 268 stderr := new(bytes.Buffer) 269 err := NewCommand(repo.Ctx, "format-patch", "--binary", "--stdout", base+"..."+head). 270 Run(&RunOpts{ 271 Dir: repo.Path, 272 Stdout: w, 273 Stderr: stderr, 274 }) 275 if err != nil && bytes.Contains(stderr.Bytes(), []byte("no merge base")) { 276 return NewCommand(repo.Ctx, "format-patch", "--binary", "--stdout", base, head). 277 Run(&RunOpts{ 278 Dir: repo.Path, 279 Stdout: w, 280 }) 281 } 282 return err 283 } 284 285 // GetFilesChangedBetween returns a list of all files that have been changed between the given commits 286 func (repo *Repository) GetFilesChangedBetween(base, head string) ([]string, error) { 287 stdout, _, err := NewCommand(repo.Ctx, "diff", "--name-only", base+".."+head).RunStdString(&RunOpts{Dir: repo.Path}) 288 if err != nil { 289 return nil, err 290 } 291 return strings.Split(stdout, "\n"), err 292 } 293 294 // GetDiffFromMergeBase generates and return patch data from merge base to head 295 func (repo *Repository) GetDiffFromMergeBase(base, head string, w io.Writer) error { 296 stderr := new(bytes.Buffer) 297 err := NewCommand(repo.Ctx, "diff", "-p", "--binary", base+"..."+head). 298 Run(&RunOpts{ 299 Dir: repo.Path, 300 Stdout: w, 301 Stderr: stderr, 302 }) 303 if err != nil && bytes.Contains(stderr.Bytes(), []byte("no merge base")) { 304 return repo.GetDiffBinary(base, head, w) 305 } 306 return err 307 } 308 309 // ReadPatchCommit will check if a diff patch exists and return stats 310 func (repo *Repository) ReadPatchCommit(prID int64) (commitSHA string, err error) { 311 // Migrated repositories download patches to "pulls" location 312 patchFile := fmt.Sprintf("pulls/%d.patch", prID) 313 loadPatch, err := os.Open(filepath.Join(repo.Path, patchFile)) 314 if err != nil { 315 return "", err 316 } 317 defer loadPatch.Close() 318 // Read only the first line of the patch - usually it contains the first commit made in patch 319 scanner := bufio.NewScanner(loadPatch) 320 scanner.Scan() 321 // Parse the Patch stats, sometimes Migration returns a 404 for the patch file 322 commitSHAGroups := patchCommits.FindStringSubmatch(scanner.Text()) 323 if len(commitSHAGroups) != 0 { 324 commitSHA = commitSHAGroups[1] 325 } else { 326 return "", errors.New("patch file doesn't contain valid commit ID") 327 } 328 return commitSHA, nil 329 }