github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/graveler/ref/commit_iterator.go (about) 1 package ref 2 3 import ( 4 "container/heap" 5 "context" 6 "time" 7 8 "github.com/treeverse/lakefs/pkg/graveler" 9 ) 10 11 type CommitIterator struct { 12 manager graveler.RefManager 13 ctx context.Context 14 repository *graveler.RepositoryRecord 15 start graveler.CommitID 16 firstParent bool 17 value *graveler.CommitRecord 18 queue commitsPriorityQueue 19 visit map[graveler.CommitID]struct{} 20 state commitIteratorState 21 since *time.Time 22 err error 23 } 24 25 type commitIteratorState int 26 27 const ( 28 commitIteratorStateInit commitIteratorState = iota 29 commitIteratorStateQuery 30 commitIteratorStateDone 31 ) 32 33 type commitsPriorityQueue []*graveler.CommitRecord 34 35 func (c *commitsPriorityQueue) Len() int { 36 return len(*c) 37 } 38 39 func (c *commitsPriorityQueue) Less(i, j int) bool { 40 pq := *c 41 if pq[i].Commit.CreationDate.Equal(pq[j].Commit.CreationDate) { 42 return pq[i].CommitID > pq[j].CommitID 43 } 44 return pq[i].Commit.CreationDate.After(pq[j].Commit.CreationDate) 45 } 46 47 func (c *commitsPriorityQueue) Swap(i, j int) { 48 pq := *c 49 pq[i], pq[j] = pq[j], pq[i] 50 } 51 52 func (c *commitsPriorityQueue) Push(x interface{}) { 53 rec := x.(*graveler.CommitRecord) 54 *c = append(*c, rec) 55 } 56 57 func (c *commitsPriorityQueue) Pop() interface{} { 58 cc := *c 59 n := len(cc) - 1 60 item := cc[n] 61 *c = cc[:n] 62 return item 63 } 64 65 type CommitIteratorConfig struct { 66 repository *graveler.RepositoryRecord 67 start graveler.CommitID 68 firstParent bool 69 manager graveler.RefManager 70 since *time.Time 71 } 72 73 // NewCommitIterator returns an iterator over all commits in the given repository. 74 // Ordering is based on the Commit Creation Date. 75 func NewCommitIterator(ctx context.Context, config *CommitIteratorConfig) *CommitIterator { 76 return &CommitIterator{ 77 ctx: ctx, 78 repository: config.repository, 79 start: config.start, 80 queue: make(commitsPriorityQueue, 0), 81 visit: make(map[graveler.CommitID]struct{}), 82 manager: config.manager, 83 firstParent: config.firstParent, 84 since: config.since, 85 } 86 } 87 88 func (ci *CommitIterator) getCommitRecord(commitID graveler.CommitID) (*graveler.CommitRecord, error) { 89 commit, err := ci.manager.GetCommit(ci.ctx, ci.repository, commitID) 90 if err != nil { 91 return nil, err 92 } 93 return &graveler.CommitRecord{ 94 CommitID: commitID, 95 Commit: commit, 96 }, nil 97 } 98 99 func (ci *CommitIterator) Next() bool { 100 if ci.err != nil || ci.state == commitIteratorStateDone { 101 return false 102 } 103 104 if ci.state == commitIteratorStateInit { 105 // first time we look up the 'start' commit and push it into the queue 106 ci.state = commitIteratorStateQuery 107 rec, err := ci.getCommitRecord(ci.start) 108 if err != nil { 109 ci.value = nil 110 ci.err = err 111 return false 112 } 113 // skip commits that are older than since time 114 if ci.since == nil || !rec.Commit.CreationDate.Before(*ci.since) { 115 ci.queue.Push(rec) 116 } 117 } 118 119 if ci.queue.Len() == 0 { 120 ci.value = nil 121 ci.state = commitIteratorStateDone 122 return false 123 } 124 125 // as long as we have something in the queue we will 126 // set it as the current value and push the current commits parents to the queue 127 ci.value = heap.Pop(&ci.queue).(*graveler.CommitRecord) 128 parents := ci.value.Parents 129 if ci.firstParent && len(parents) > 1 { 130 parents = parents[:1] 131 } 132 for _, p := range parents { 133 // skip commits we already visited 134 if _, visited := ci.visit[p]; visited { 135 continue 136 } 137 138 rec, err := ci.getCommitRecord(p) 139 if err != nil { 140 ci.value = nil 141 ci.err = err 142 return false 143 } 144 ci.visit[rec.CommitID] = struct{}{} 145 146 // skip commits that are older than since time 147 if ci.since != nil && rec.Commit.CreationDate.Before(*ci.since) { 148 continue 149 } 150 151 heap.Push(&ci.queue, rec) 152 } 153 return true 154 } 155 156 // SeekGE skip under the point of 'id' commit ID based on a new 157 // 158 // The list of commit 159 func (ci *CommitIterator) SeekGE(id graveler.CommitID) { 160 ci.err = nil 161 ci.queue = make(commitsPriorityQueue, 0) 162 ci.visit = make(map[graveler.CommitID]struct{}) 163 ci.state = commitIteratorStateInit 164 165 // skip until we get into our commit 166 for ci.Next() { 167 if ci.Value().CommitID == id { 168 break 169 } 170 } 171 if ci.Err() != nil { 172 return 173 } 174 175 // step back - in order to have Next to read the value we just got, 176 // we push back the current value to our queue and set the current value to nil. 177 heap.Push(&ci.queue, ci.value) 178 ci.value = nil 179 } 180 181 func (ci *CommitIterator) Value() *graveler.CommitRecord { 182 return ci.value 183 } 184 185 func (ci *CommitIterator) Err() error { 186 return ci.err 187 } 188 189 func (ci *CommitIterator) Close() {}