github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/git/commit_info_gogit.go (about)

     1  // Copyright 2023 The GitBundle Inc. All rights reserved.
     2  // Copyright 2017 The Gitea Authors. All rights reserved.
     3  // Use of this source code is governed by a MIT-style
     4  // license that can be found in the LICENSE file.
     5  
     6  //go:build gogit
     7  
     8  package git
     9  
    10  import (
    11  	"context"
    12  	"path"
    13  
    14  	"github.com/emirpasic/gods/trees/binaryheap"
    15  	"github.com/go-git/go-git/v5/plumbing"
    16  	"github.com/go-git/go-git/v5/plumbing/object"
    17  	cgobject "github.com/go-git/go-git/v5/plumbing/object/commitgraph"
    18  )
    19  
    20  // GetCommitsInfo gets information of all commits that are corresponding to these entries
    21  func (tes Entries) GetCommitsInfo(ctx context.Context, commit *Commit, treePath string, cache *LastCommitCache) ([]CommitInfo, *Commit, error) {
    22  	entryPaths := make([]string, len(tes)+1)
    23  	// Get the commit for the treePath itself
    24  	entryPaths[0] = ""
    25  	for i, entry := range tes {
    26  		entryPaths[i+1] = entry.Name()
    27  	}
    28  
    29  	commitNodeIndex, commitGraphFile := commit.repo.CommitNodeIndex()
    30  	if commitGraphFile != nil {
    31  		defer commitGraphFile.Close()
    32  	}
    33  
    34  	c, err := commitNodeIndex.Get(commit.ID)
    35  	if err != nil {
    36  		return nil, nil, err
    37  	}
    38  
    39  	var revs map[string]*object.Commit
    40  	if cache != nil {
    41  		var unHitPaths []string
    42  		revs, unHitPaths, err = getLastCommitForPathsByCache(commit.ID.String(), treePath, entryPaths, cache)
    43  		if err != nil {
    44  			return nil, nil, err
    45  		}
    46  		if len(unHitPaths) > 0 {
    47  			revs2, err := GetLastCommitForPaths(ctx, cache, c, treePath, unHitPaths)
    48  			if err != nil {
    49  				return nil, nil, err
    50  			}
    51  
    52  			for k, v := range revs2 {
    53  				revs[k] = v
    54  			}
    55  		}
    56  	} else {
    57  		revs, err = GetLastCommitForPaths(ctx, nil, c, treePath, entryPaths)
    58  	}
    59  	if err != nil {
    60  		return nil, nil, err
    61  	}
    62  
    63  	commit.repo.gogitStorage.Close()
    64  
    65  	commitsInfo := make([]CommitInfo, len(tes))
    66  	for i, entry := range tes {
    67  		commitsInfo[i] = CommitInfo{
    68  			Entry: entry,
    69  		}
    70  
    71  		// Check if we have found a commit for this entry in time
    72  		if rev, ok := revs[entry.Name()]; ok {
    73  			entryCommit := convertCommit(rev)
    74  			commitsInfo[i].Commit = entryCommit
    75  		}
    76  
    77  		// If the entry if a submodule add a submodule file for this
    78  		if entry.IsSubModule() {
    79  			subModuleURL := ""
    80  			var fullPath string
    81  			if len(treePath) > 0 {
    82  				fullPath = treePath + "/" + entry.Name()
    83  			} else {
    84  				fullPath = entry.Name()
    85  			}
    86  			if subModule, err := commit.GetSubModule(fullPath); err != nil {
    87  				return nil, nil, err
    88  			} else if subModule != nil {
    89  				subModuleURL = subModule.URL
    90  			}
    91  			subModuleFile := NewSubModuleFile(commitsInfo[i].Commit, subModuleURL, entry.ID.String())
    92  			commitsInfo[i].SubModuleFile = subModuleFile
    93  		}
    94  	}
    95  
    96  	// Retrieve the commit for the treePath itself (see above). We basically
    97  	// get it for free during the tree traversal and it's used for listing
    98  	// pages to display information about newest commit for a given path.
    99  	var treeCommit *Commit
   100  	if treePath == "" {
   101  		treeCommit = commit
   102  	} else if rev, ok := revs[""]; ok {
   103  		treeCommit = convertCommit(rev)
   104  		treeCommit.repo = commit.repo
   105  	}
   106  	return commitsInfo, treeCommit, nil
   107  }
   108  
   109  type commitAndPaths struct {
   110  	commit cgobject.CommitNode
   111  	// Paths that are still on the branch represented by commit
   112  	paths []string
   113  	// Set of hashes for the paths
   114  	hashes map[string]plumbing.Hash
   115  }
   116  
   117  func getCommitTree(c cgobject.CommitNode, treePath string) (*object.Tree, error) {
   118  	tree, err := c.Tree()
   119  	if err != nil {
   120  		return nil, err
   121  	}
   122  
   123  	// Optimize deep traversals by focusing only on the specific tree
   124  	if treePath != "" {
   125  		tree, err = tree.Tree(treePath)
   126  		if err != nil {
   127  			return nil, err
   128  		}
   129  	}
   130  
   131  	return tree, nil
   132  }
   133  
   134  func getFileHashes(c cgobject.CommitNode, treePath string, paths []string) (map[string]plumbing.Hash, error) {
   135  	tree, err := getCommitTree(c, treePath)
   136  	if err == object.ErrDirectoryNotFound {
   137  		// The whole tree didn't exist, so return empty map
   138  		return make(map[string]plumbing.Hash), nil
   139  	}
   140  	if err != nil {
   141  		return nil, err
   142  	}
   143  
   144  	hashes := make(map[string]plumbing.Hash)
   145  	for _, path := range paths {
   146  		if path != "" {
   147  			entry, err := tree.FindEntry(path)
   148  			if err == nil {
   149  				hashes[path] = entry.Hash
   150  			}
   151  		} else {
   152  			hashes[path] = tree.Hash
   153  		}
   154  	}
   155  
   156  	return hashes, nil
   157  }
   158  
   159  func getLastCommitForPathsByCache(commitID, treePath string, paths []string, cache *LastCommitCache) (map[string]*object.Commit, []string, error) {
   160  	var unHitEntryPaths []string
   161  	results := make(map[string]*object.Commit)
   162  	for _, p := range paths {
   163  		lastCommit, err := cache.Get(commitID, path.Join(treePath, p))
   164  		if err != nil {
   165  			return nil, nil, err
   166  		}
   167  		if lastCommit != nil {
   168  			results[p] = lastCommit.(*object.Commit)
   169  			continue
   170  		}
   171  
   172  		unHitEntryPaths = append(unHitEntryPaths, p)
   173  	}
   174  
   175  	return results, unHitEntryPaths, nil
   176  }
   177  
   178  // GetLastCommitForPaths returns last commit information
   179  func GetLastCommitForPaths(ctx context.Context, cache *LastCommitCache, c cgobject.CommitNode, treePath string, paths []string) (map[string]*object.Commit, error) {
   180  	refSha := c.ID().String()
   181  
   182  	// We do a tree traversal with nodes sorted by commit time
   183  	heap := binaryheap.NewWith(func(a, b interface{}) int {
   184  		if a.(*commitAndPaths).commit.CommitTime().Before(b.(*commitAndPaths).commit.CommitTime()) {
   185  			return 1
   186  		}
   187  		return -1
   188  	})
   189  
   190  	resultNodes := make(map[string]cgobject.CommitNode)
   191  	initialHashes, err := getFileHashes(c, treePath, paths)
   192  	if err != nil {
   193  		return nil, err
   194  	}
   195  
   196  	// Start search from the root commit and with full set of paths
   197  	heap.Push(&commitAndPaths{c, paths, initialHashes})
   198  heaploop:
   199  	for {
   200  		select {
   201  		case <-ctx.Done():
   202  			if ctx.Err() == context.DeadlineExceeded {
   203  				break heaploop
   204  			}
   205  			return nil, ctx.Err()
   206  		default:
   207  		}
   208  		cIn, ok := heap.Pop()
   209  		if !ok {
   210  			break
   211  		}
   212  		current := cIn.(*commitAndPaths)
   213  
   214  		// Load the parent commits for the one we are currently examining
   215  		numParents := current.commit.NumParents()
   216  		var parents []cgobject.CommitNode
   217  		for i := 0; i < numParents; i++ {
   218  			parent, err := current.commit.ParentNode(i)
   219  			if err != nil {
   220  				break
   221  			}
   222  			parents = append(parents, parent)
   223  		}
   224  
   225  		// Examine the current commit and set of interesting paths
   226  		pathUnchanged := make([]bool, len(current.paths))
   227  		parentHashes := make([]map[string]plumbing.Hash, len(parents))
   228  		for j, parent := range parents {
   229  			parentHashes[j], err = getFileHashes(parent, treePath, current.paths)
   230  			if err != nil {
   231  				break
   232  			}
   233  
   234  			for i, path := range current.paths {
   235  				if parentHashes[j][path] == current.hashes[path] {
   236  					pathUnchanged[i] = true
   237  				}
   238  			}
   239  		}
   240  
   241  		var remainingPaths []string
   242  		for i, pth := range current.paths {
   243  			// The results could already contain some newer change for the same path,
   244  			// so don't override that and bail out on the file early.
   245  			if resultNodes[pth] == nil {
   246  				if pathUnchanged[i] {
   247  					// The path existed with the same hash in at least one parent so it could
   248  					// not have been changed in this commit directly.
   249  					remainingPaths = append(remainingPaths, pth)
   250  				} else {
   251  					// There are few possible cases how can we get here:
   252  					// - The path didn't exist in any parent, so it must have been created by
   253  					//   this commit.
   254  					// - The path did exist in the parent commit, but the hash of the file has
   255  					//   changed.
   256  					// - We are looking at a merge commit and the hash of the file doesn't
   257  					//   match any of the hashes being merged. This is more common for directories,
   258  					//   but it can also happen if a file is changed through conflict resolution.
   259  					resultNodes[pth] = current.commit
   260  					if err := cache.Put(refSha, path.Join(treePath, pth), current.commit.ID().String()); err != nil {
   261  						return nil, err
   262  					}
   263  				}
   264  			}
   265  		}
   266  
   267  		if len(remainingPaths) > 0 {
   268  			// Add the parent nodes along with remaining paths to the heap for further
   269  			// processing.
   270  			for j, parent := range parents {
   271  				// Combine remainingPath with paths available on the parent branch
   272  				// and make union of them
   273  				remainingPathsForParent := make([]string, 0, len(remainingPaths))
   274  				newRemainingPaths := make([]string, 0, len(remainingPaths))
   275  				for _, path := range remainingPaths {
   276  					if parentHashes[j][path] == current.hashes[path] {
   277  						remainingPathsForParent = append(remainingPathsForParent, path)
   278  					} else {
   279  						newRemainingPaths = append(newRemainingPaths, path)
   280  					}
   281  				}
   282  
   283  				if remainingPathsForParent != nil {
   284  					heap.Push(&commitAndPaths{parent, remainingPathsForParent, parentHashes[j]})
   285  				}
   286  
   287  				if len(newRemainingPaths) == 0 {
   288  					break
   289  				} else {
   290  					remainingPaths = newRemainingPaths
   291  				}
   292  			}
   293  		}
   294  	}
   295  
   296  	// Post-processing
   297  	result := make(map[string]*object.Commit)
   298  	for path, commitNode := range resultNodes {
   299  		var err error
   300  		result[path], err = commitNode.Commit()
   301  		if err != nil {
   302  			return nil, err
   303  		}
   304  	}
   305  
   306  	return result, nil
   307  }