github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/git/commit_info_gogit.go (about) 1 // Copyright 2023 The GitBundle Inc. All rights reserved. 2 // Copyright 2017 The Gitea Authors. All rights reserved. 3 // Use of this source code is governed by a MIT-style 4 // license that can be found in the LICENSE file. 5 6 //go:build gogit 7 8 package git 9 10 import ( 11 "context" 12 "path" 13 14 "github.com/emirpasic/gods/trees/binaryheap" 15 "github.com/go-git/go-git/v5/plumbing" 16 "github.com/go-git/go-git/v5/plumbing/object" 17 cgobject "github.com/go-git/go-git/v5/plumbing/object/commitgraph" 18 ) 19 20 // GetCommitsInfo gets information of all commits that are corresponding to these entries 21 func (tes Entries) GetCommitsInfo(ctx context.Context, commit *Commit, treePath string, cache *LastCommitCache) ([]CommitInfo, *Commit, error) { 22 entryPaths := make([]string, len(tes)+1) 23 // Get the commit for the treePath itself 24 entryPaths[0] = "" 25 for i, entry := range tes { 26 entryPaths[i+1] = entry.Name() 27 } 28 29 commitNodeIndex, commitGraphFile := commit.repo.CommitNodeIndex() 30 if commitGraphFile != nil { 31 defer commitGraphFile.Close() 32 } 33 34 c, err := commitNodeIndex.Get(commit.ID) 35 if err != nil { 36 return nil, nil, err 37 } 38 39 var revs map[string]*object.Commit 40 if cache != nil { 41 var unHitPaths []string 42 revs, unHitPaths, err = getLastCommitForPathsByCache(commit.ID.String(), treePath, entryPaths, cache) 43 if err != nil { 44 return nil, nil, err 45 } 46 if len(unHitPaths) > 0 { 47 revs2, err := GetLastCommitForPaths(ctx, cache, c, treePath, unHitPaths) 48 if err != nil { 49 return nil, nil, err 50 } 51 52 for k, v := range revs2 { 53 revs[k] = v 54 } 55 } 56 } else { 57 revs, err = GetLastCommitForPaths(ctx, nil, c, treePath, entryPaths) 58 } 59 if err != nil { 60 return nil, nil, err 61 } 62 63 commit.repo.gogitStorage.Close() 64 65 commitsInfo := make([]CommitInfo, len(tes)) 66 for i, entry := range tes { 67 commitsInfo[i] = CommitInfo{ 68 Entry: entry, 69 } 70 71 // Check if we have found a commit for this entry in time 72 if rev, ok := revs[entry.Name()]; ok { 73 entryCommit := convertCommit(rev) 74 commitsInfo[i].Commit = entryCommit 75 } 76 77 // If the entry if a submodule add a submodule file for this 78 if entry.IsSubModule() { 79 subModuleURL := "" 80 var fullPath string 81 if len(treePath) > 0 { 82 fullPath = treePath + "/" + entry.Name() 83 } else { 84 fullPath = entry.Name() 85 } 86 if subModule, err := commit.GetSubModule(fullPath); err != nil { 87 return nil, nil, err 88 } else if subModule != nil { 89 subModuleURL = subModule.URL 90 } 91 subModuleFile := NewSubModuleFile(commitsInfo[i].Commit, subModuleURL, entry.ID.String()) 92 commitsInfo[i].SubModuleFile = subModuleFile 93 } 94 } 95 96 // Retrieve the commit for the treePath itself (see above). We basically 97 // get it for free during the tree traversal and it's used for listing 98 // pages to display information about newest commit for a given path. 99 var treeCommit *Commit 100 if treePath == "" { 101 treeCommit = commit 102 } else if rev, ok := revs[""]; ok { 103 treeCommit = convertCommit(rev) 104 treeCommit.repo = commit.repo 105 } 106 return commitsInfo, treeCommit, nil 107 } 108 109 type commitAndPaths struct { 110 commit cgobject.CommitNode 111 // Paths that are still on the branch represented by commit 112 paths []string 113 // Set of hashes for the paths 114 hashes map[string]plumbing.Hash 115 } 116 117 func getCommitTree(c cgobject.CommitNode, treePath string) (*object.Tree, error) { 118 tree, err := c.Tree() 119 if err != nil { 120 return nil, err 121 } 122 123 // Optimize deep traversals by focusing only on the specific tree 124 if treePath != "" { 125 tree, err = tree.Tree(treePath) 126 if err != nil { 127 return nil, err 128 } 129 } 130 131 return tree, nil 132 } 133 134 func getFileHashes(c cgobject.CommitNode, treePath string, paths []string) (map[string]plumbing.Hash, error) { 135 tree, err := getCommitTree(c, treePath) 136 if err == object.ErrDirectoryNotFound { 137 // The whole tree didn't exist, so return empty map 138 return make(map[string]plumbing.Hash), nil 139 } 140 if err != nil { 141 return nil, err 142 } 143 144 hashes := make(map[string]plumbing.Hash) 145 for _, path := range paths { 146 if path != "" { 147 entry, err := tree.FindEntry(path) 148 if err == nil { 149 hashes[path] = entry.Hash 150 } 151 } else { 152 hashes[path] = tree.Hash 153 } 154 } 155 156 return hashes, nil 157 } 158 159 func getLastCommitForPathsByCache(commitID, treePath string, paths []string, cache *LastCommitCache) (map[string]*object.Commit, []string, error) { 160 var unHitEntryPaths []string 161 results := make(map[string]*object.Commit) 162 for _, p := range paths { 163 lastCommit, err := cache.Get(commitID, path.Join(treePath, p)) 164 if err != nil { 165 return nil, nil, err 166 } 167 if lastCommit != nil { 168 results[p] = lastCommit.(*object.Commit) 169 continue 170 } 171 172 unHitEntryPaths = append(unHitEntryPaths, p) 173 } 174 175 return results, unHitEntryPaths, nil 176 } 177 178 // GetLastCommitForPaths returns last commit information 179 func GetLastCommitForPaths(ctx context.Context, cache *LastCommitCache, c cgobject.CommitNode, treePath string, paths []string) (map[string]*object.Commit, error) { 180 refSha := c.ID().String() 181 182 // We do a tree traversal with nodes sorted by commit time 183 heap := binaryheap.NewWith(func(a, b interface{}) int { 184 if a.(*commitAndPaths).commit.CommitTime().Before(b.(*commitAndPaths).commit.CommitTime()) { 185 return 1 186 } 187 return -1 188 }) 189 190 resultNodes := make(map[string]cgobject.CommitNode) 191 initialHashes, err := getFileHashes(c, treePath, paths) 192 if err != nil { 193 return nil, err 194 } 195 196 // Start search from the root commit and with full set of paths 197 heap.Push(&commitAndPaths{c, paths, initialHashes}) 198 heaploop: 199 for { 200 select { 201 case <-ctx.Done(): 202 if ctx.Err() == context.DeadlineExceeded { 203 break heaploop 204 } 205 return nil, ctx.Err() 206 default: 207 } 208 cIn, ok := heap.Pop() 209 if !ok { 210 break 211 } 212 current := cIn.(*commitAndPaths) 213 214 // Load the parent commits for the one we are currently examining 215 numParents := current.commit.NumParents() 216 var parents []cgobject.CommitNode 217 for i := 0; i < numParents; i++ { 218 parent, err := current.commit.ParentNode(i) 219 if err != nil { 220 break 221 } 222 parents = append(parents, parent) 223 } 224 225 // Examine the current commit and set of interesting paths 226 pathUnchanged := make([]bool, len(current.paths)) 227 parentHashes := make([]map[string]plumbing.Hash, len(parents)) 228 for j, parent := range parents { 229 parentHashes[j], err = getFileHashes(parent, treePath, current.paths) 230 if err != nil { 231 break 232 } 233 234 for i, path := range current.paths { 235 if parentHashes[j][path] == current.hashes[path] { 236 pathUnchanged[i] = true 237 } 238 } 239 } 240 241 var remainingPaths []string 242 for i, pth := range current.paths { 243 // The results could already contain some newer change for the same path, 244 // so don't override that and bail out on the file early. 245 if resultNodes[pth] == nil { 246 if pathUnchanged[i] { 247 // The path existed with the same hash in at least one parent so it could 248 // not have been changed in this commit directly. 249 remainingPaths = append(remainingPaths, pth) 250 } else { 251 // There are few possible cases how can we get here: 252 // - The path didn't exist in any parent, so it must have been created by 253 // this commit. 254 // - The path did exist in the parent commit, but the hash of the file has 255 // changed. 256 // - We are looking at a merge commit and the hash of the file doesn't 257 // match any of the hashes being merged. This is more common for directories, 258 // but it can also happen if a file is changed through conflict resolution. 259 resultNodes[pth] = current.commit 260 if err := cache.Put(refSha, path.Join(treePath, pth), current.commit.ID().String()); err != nil { 261 return nil, err 262 } 263 } 264 } 265 } 266 267 if len(remainingPaths) > 0 { 268 // Add the parent nodes along with remaining paths to the heap for further 269 // processing. 270 for j, parent := range parents { 271 // Combine remainingPath with paths available on the parent branch 272 // and make union of them 273 remainingPathsForParent := make([]string, 0, len(remainingPaths)) 274 newRemainingPaths := make([]string, 0, len(remainingPaths)) 275 for _, path := range remainingPaths { 276 if parentHashes[j][path] == current.hashes[path] { 277 remainingPathsForParent = append(remainingPathsForParent, path) 278 } else { 279 newRemainingPaths = append(newRemainingPaths, path) 280 } 281 } 282 283 if remainingPathsForParent != nil { 284 heap.Push(&commitAndPaths{parent, remainingPathsForParent, parentHashes[j]}) 285 } 286 287 if len(newRemainingPaths) == 0 { 288 break 289 } else { 290 remainingPaths = newRemainingPaths 291 } 292 } 293 } 294 } 295 296 // Post-processing 297 result := make(map[string]*object.Commit) 298 for path, commitNode := range resultNodes { 299 var err error 300 result[path], err = commitNode.Commit() 301 if err != nil { 302 return nil, err 303 } 304 } 305 306 return result, nil 307 }