github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/graveler/ref/commit_ordered_iterator.go (about) 1 package ref 2 3 import ( 4 "context" 5 6 "github.com/treeverse/lakefs/pkg/graveler" 7 "github.com/treeverse/lakefs/pkg/kv" 8 ) 9 10 type OrderedCommitIterator struct { 11 ctx context.Context 12 it *kv.PrimaryIterator 13 store kv.Store 14 err error 15 value *graveler.CommitRecord 16 repositoryPath string 17 onlyAncestryLeaves bool 18 firstParents map[string]bool 19 } 20 21 // NewOrderedCommitIterator returns an iterator over all commits in the given repository. 22 // Ordering is based on the Commit ID value. 23 // WithOnlyAncestryLeaves causes the iterator to return only commits which are not the first parent of any other commit. 24 // Consider a commit graph where all non-first-parent edges are removed. This graph is a tree, and ancestry leaves are its leaves. 25 func NewOrderedCommitIterator(ctx context.Context, store kv.Store, repo *graveler.RepositoryRecord, onlyAncestryLeaves bool) (*OrderedCommitIterator, error) { 26 repoPath := graveler.RepoPartition(repo) 27 it, err := kv.NewPrimaryIterator(ctx, store, (&graveler.CommitData{}).ProtoReflect().Type(), repoPath, 28 []byte(graveler.CommitPath("")), kv.IteratorOptionsFrom([]byte(""))) 29 if err != nil { 30 return nil, err 31 } 32 var parents map[string]bool 33 if onlyAncestryLeaves { 34 parents, err = getAllFirstParents(ctx, store, repo) 35 if err != nil { 36 it.Close() 37 return nil, err 38 } 39 } 40 return &OrderedCommitIterator{ 41 ctx: ctx, 42 it: it, 43 store: store, 44 repositoryPath: repoPath, 45 onlyAncestryLeaves: onlyAncestryLeaves, 46 firstParents: parents, 47 }, nil 48 } 49 50 func (i *OrderedCommitIterator) Next() bool { 51 if i.Err() != nil || i.it == nil { 52 return false 53 } 54 for i.it.Next() { 55 e := i.it.Entry() 56 if e == nil { 57 i.err = graveler.ErrInvalid 58 return false 59 } 60 commit, ok := e.Value.(*graveler.CommitData) 61 if commit == nil || !ok { 62 i.err = graveler.ErrReadingFromStore 63 return false 64 } 65 if !i.onlyAncestryLeaves || !i.firstParents[commit.Id] { 66 i.value = CommitDataToCommitRecord(commit) 67 return true 68 } 69 } 70 i.value = nil 71 return false 72 } 73 74 func (i *OrderedCommitIterator) SeekGE(id graveler.CommitID) { 75 if i.err != nil { 76 return 77 } 78 i.it.Close() 79 i.value = nil 80 i.it, i.err = kv.NewPrimaryIterator(i.ctx, i.store, (&graveler.CommitData{}).ProtoReflect().Type(), i.repositoryPath, 81 []byte(graveler.CommitPath("")), kv.IteratorOptionsFrom([]byte(graveler.CommitPath(id)))) 82 } 83 84 func (i *OrderedCommitIterator) Value() *graveler.CommitRecord { 85 if i.Err() != nil { 86 return nil 87 } 88 return i.value 89 } 90 91 func (i *OrderedCommitIterator) Err() error { 92 if i.err != nil { 93 return i.err 94 } 95 if i.it != nil { 96 return i.it.Err() 97 } 98 return nil 99 } 100 101 func (i *OrderedCommitIterator) Close() { 102 if i.it != nil { 103 i.it.Close() 104 i.it = nil 105 } 106 } 107 108 // getAllFirstParents returns a set of all commits that are the first parent of some other commit for a given repository. 109 func getAllFirstParents(ctx context.Context, store kv.Store, repo *graveler.RepositoryRecord) (map[string]bool, error) { 110 it, err := kv.NewPrimaryIterator(ctx, store, (&graveler.CommitData{}).ProtoReflect().Type(), 111 graveler.RepoPartition(repo), 112 []byte(graveler.CommitPath("")), kv.IteratorOptionsFrom([]byte(""))) 113 if err != nil { 114 return nil, err 115 } 116 defer it.Close() 117 firstParents := make(map[string]bool) 118 for it.Next() { 119 entry := it.Entry() 120 commit := entry.Value.(*graveler.CommitData) 121 if len(commit.Parents) > 0 { 122 parentNo := 0 123 if graveler.CommitVersion(commit.Version) < graveler.CommitVersionParentSwitch && len(commit.Parents) > 1 { 124 parentNo = 1 125 } 126 parent := commit.Parents[parentNo] 127 firstParents[parent] = true 128 } 129 } 130 return firstParents, nil 131 }