github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/doltdb/commit_itr.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package doltdb 16 17 import ( 18 "context" 19 "io" 20 21 "github.com/dolthub/go-mysql-server/sql" 22 23 "github.com/dolthub/dolt/go/store/datas" 24 "github.com/dolthub/dolt/go/store/hash" 25 "github.com/dolthub/dolt/go/store/prolly/tree" 26 "github.com/dolthub/dolt/go/store/types" 27 ) 28 29 // CommitItr is an interface for iterating over a set of unique commits 30 type CommitItr interface { 31 // Next returns the hash of the next commit, and a pointer to that commit. Implementations of Next must handle 32 // making sure the list of commits returned are unique. When complete Next will return hash.Hash{}, nil, io.EOF 33 Next(ctx context.Context) (hash.Hash, *OptionalCommit, error) 34 35 // Reset the commit iterator back to the start 36 Reset(ctx context.Context) error 37 } 38 39 type commitItr struct { 40 ddb *DoltDB 41 rootCommits []*Commit 42 currentRoot int 43 44 added map[hash.Hash]bool 45 unprocessed []hash.Hash 46 curr *Commit 47 } 48 49 // CommitItrForAllBranches returns a CommitItr which will iterate over all commits in all branches in a DoltDB 50 func CommitItrForAllBranches(ctx context.Context, ddb *DoltDB) (CommitItr, error) { 51 branchRefs, err := ddb.GetBranches(ctx) 52 53 if err != nil { 54 return nil, err 55 } 56 57 rootCommits := make([]*Commit, 0, len(branchRefs)) 58 for _, ref := range branchRefs { 59 cm, err := ddb.ResolveCommitRef(ctx, ref) 60 61 if err != nil { 62 return nil, err 63 } 64 65 rootCommits = append(rootCommits, cm) 66 } 67 68 cmItr := CommitItrForRoots(ddb, rootCommits...) 69 return cmItr, nil 70 } 71 72 // CommitItrForRoots will return a CommitItr which will iterate over all ancestor commits of the provided rootCommits. 73 func CommitItrForRoots(ddb *DoltDB, rootCommits ...*Commit) CommitItr { 74 return &commitItr{ 75 ddb: ddb, 76 rootCommits: rootCommits, 77 added: make(map[hash.Hash]bool, 4096), 78 unprocessed: make([]hash.Hash, 0, 4096), 79 } 80 } 81 82 func (cmItr *commitItr) Reset(ctx context.Context) error { 83 cmItr.curr = nil 84 cmItr.currentRoot = 0 85 cmItr.added = make(map[hash.Hash]bool, 4096) 86 cmItr.unprocessed = cmItr.unprocessed[:0] 87 88 return nil 89 } 90 91 // Next returns the hash of the next commit, and a pointer to that commit. It handles making sure the list of commits 92 // returned are unique. When complete Next will return hash.Hash{}, nil, io.EOF 93 func (cmItr *commitItr) Next(ctx context.Context) (hash.Hash, *OptionalCommit, error) { 94 for cmItr.curr == nil { 95 if cmItr.currentRoot >= len(cmItr.rootCommits) { 96 return hash.Hash{}, nil, io.EOF 97 } 98 99 cm := cmItr.rootCommits[cmItr.currentRoot] 100 h, err := cm.HashOf() 101 102 if err != nil { 103 return hash.Hash{}, nil, err 104 } 105 106 if !cmItr.added[h] { 107 cmItr.added[h] = true 108 cmItr.curr = cm 109 return h, &OptionalCommit{cmItr.curr, h}, nil 110 } 111 112 cmItr.currentRoot++ 113 } 114 115 parents, err := cmItr.curr.ParentHashes(ctx) 116 117 if err != nil { 118 return hash.Hash{}, nil, err 119 } 120 121 for _, h := range parents { 122 if !cmItr.added[h] { 123 cmItr.added[h] = true 124 cmItr.unprocessed = append(cmItr.unprocessed, h) 125 } 126 } 127 128 numUnprocessed := len(cmItr.unprocessed) 129 130 if numUnprocessed == 0 { 131 cmItr.curr = nil 132 cmItr.currentRoot++ 133 return cmItr.Next(ctx) 134 } 135 136 next := cmItr.unprocessed[numUnprocessed-1] 137 cmItr.unprocessed = cmItr.unprocessed[:numUnprocessed-1] 138 cmItr.curr, err = HashToCommit(ctx, cmItr.ddb.ValueReadWriter(), cmItr.ddb.ns, next) 139 if err != nil && err != ErrGhostCommitEncountered { 140 return hash.Hash{}, nil, err 141 } 142 if err == ErrGhostCommitEncountered { 143 cmItr.curr = nil 144 } 145 146 return next, &OptionalCommit{cmItr.curr, next}, nil 147 } 148 149 func HashToCommit(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeStore, h hash.Hash) (*Commit, error) { 150 dc, err := datas.LoadCommitAddr(ctx, vrw, h) 151 if err != nil { 152 return nil, err 153 } 154 155 if dc.IsGhost() { 156 return nil, ErrGhostCommitEncountered 157 } 158 159 return NewCommit(ctx, vrw, ns, dc) 160 } 161 162 // CommitFilter is a function that returns true if a commit should be filtered out, and false if it should be kept 163 type CommitFilter func(context.Context, hash.Hash, *OptionalCommit) (filterOut bool, err error) 164 165 // FilteringCommitItr is a CommitItr implementation that applies a filtering function to limit the commits returned 166 type FilteringCommitItr struct { 167 itr CommitItr 168 filter CommitFilter 169 } 170 171 // AllCommits is a CommitFilter that matches all commits 172 func AllCommits(_ context.Context, _ hash.Hash, _ *Commit) (filterOut bool, err error) { 173 return false, nil 174 } 175 176 func NewFilteringCommitItr(itr CommitItr, filter CommitFilter) FilteringCommitItr { 177 return FilteringCommitItr{itr, filter} 178 } 179 180 // Next returns the hash of the next commit, and a pointer to that commit. Implementations of Next must handle 181 // making sure the list of commits returned are unique. When complete Next will return hash.Hash{}, nil, io.EOF 182 func (itr FilteringCommitItr) Next(ctx context.Context) (hash.Hash, *OptionalCommit, error) { 183 // iteration will terminate on io.EOF or a commit that is !filteredOut 184 for { 185 h, cm, err := itr.itr.Next(ctx) 186 187 if err != nil { 188 return hash.Hash{}, nil, err 189 } 190 191 if filterOut, err := itr.filter(ctx, h, cm); err != nil { 192 return hash.Hash{}, nil, err 193 } else if !filterOut { 194 return h, cm, nil 195 } 196 } 197 } 198 199 // Reset the commit iterator back to the 200 func (itr FilteringCommitItr) Reset(ctx context.Context) error { 201 return itr.itr.Reset(ctx) 202 } 203 204 func NewCommitSliceIter(cm []*Commit, h []hash.Hash) *CommitSliceIter { 205 return &CommitSliceIter{cm: cm, h: h} 206 } 207 208 type CommitSliceIter struct { 209 h []hash.Hash 210 cm []*Commit 211 i int 212 } 213 214 var _ CommitItr = (*CommitSliceIter)(nil) 215 216 func (i *CommitSliceIter) Next(ctx context.Context) (hash.Hash, *OptionalCommit, error) { 217 if i.i >= len(i.h) { 218 return hash.Hash{}, nil, io.EOF 219 } 220 i.i++ 221 return i.h[i.i-1], &OptionalCommit{i.cm[i.i-1], i.h[i.i-1]}, nil 222 223 } 224 225 func (i *CommitSliceIter) Reset(ctx context.Context) error { 226 i.i = 0 227 return nil 228 } 229 230 func NewOneCommitIter(cm *Commit, h hash.Hash, meta *datas.CommitMeta) *OneCommitIter { 231 return &OneCommitIter{cm: &OptionalCommit{cm, h}, h: h} 232 } 233 234 type OneCommitIter struct { 235 h hash.Hash 236 cm *OptionalCommit 237 m *datas.CommitMeta 238 done bool 239 } 240 241 var _ CommitItr = (*OneCommitIter)(nil) 242 243 func (i *OneCommitIter) Next(_ context.Context) (hash.Hash, *OptionalCommit, error) { 244 if i.done { 245 return hash.Hash{}, nil, io.EOF 246 } 247 i.done = true 248 return i.h, i.cm, nil 249 250 } 251 252 func (i *OneCommitIter) Reset(_ context.Context) error { 253 i.done = false 254 return nil 255 } 256 257 func NewCommitPart(h hash.Hash, cm *Commit, m *datas.CommitMeta) *CommitPart { 258 return &CommitPart{h: h, cm: cm, m: m} 259 } 260 261 type CommitPart struct { 262 h hash.Hash 263 m *datas.CommitMeta 264 cm *Commit 265 } 266 267 var _ sql.Partition = (*CommitPart)(nil) 268 269 func (c *CommitPart) Hash() hash.Hash { 270 return c.h 271 } 272 273 func (c *CommitPart) Commit() *Commit { 274 return c.cm 275 } 276 277 func (c *CommitPart) Meta() *datas.CommitMeta { 278 return c.m 279 } 280 281 func (c *CommitPart) Key() []byte { 282 return c.h[:] 283 } 284 285 func NewCommitSlicePartitionIter(h []hash.Hash, cm []*Commit, m []*datas.CommitMeta) *CommitSlicePartitionIter { 286 return &CommitSlicePartitionIter{h: h, cm: cm, m: m} 287 } 288 289 type CommitSlicePartitionIter struct { 290 h []hash.Hash 291 m []*datas.CommitMeta 292 cm []*Commit 293 i int 294 } 295 296 var _ sql.PartitionIter = (*CommitSlicePartitionIter)(nil) 297 298 func (i *CommitSlicePartitionIter) Next(ctx *sql.Context) (sql.Partition, error) { 299 if i.i >= len(i.cm) { 300 return nil, io.EOF 301 } 302 i.i++ 303 return &CommitPart{h: i.h[i.i-1], m: i.m[i.i-1], cm: i.cm[i.i-1]}, nil 304 305 } 306 307 func (i *CommitSlicePartitionIter) Close(ctx *sql.Context) error { 308 return nil 309 }