github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/doltdb/commit_itr.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package doltdb
    16  
    17  import (
    18  	"context"
    19  	"io"
    20  
    21  	"github.com/dolthub/go-mysql-server/sql"
    22  
    23  	"github.com/dolthub/dolt/go/store/datas"
    24  	"github.com/dolthub/dolt/go/store/hash"
    25  	"github.com/dolthub/dolt/go/store/prolly/tree"
    26  	"github.com/dolthub/dolt/go/store/types"
    27  )
    28  
    29  // CommitItr is an interface for iterating over a set of unique commits
    30  type CommitItr interface {
    31  	// Next returns the hash of the next commit, and a pointer to that commit.  Implementations of Next must handle
    32  	// making sure the list of commits returned are unique.  When complete Next will return hash.Hash{}, nil, io.EOF
    33  	Next(ctx context.Context) (hash.Hash, *OptionalCommit, error)
    34  
    35  	// Reset the commit iterator back to the start
    36  	Reset(ctx context.Context) error
    37  }
    38  
    39  type commitItr struct {
    40  	ddb         *DoltDB
    41  	rootCommits []*Commit
    42  	currentRoot int
    43  
    44  	added       map[hash.Hash]bool
    45  	unprocessed []hash.Hash
    46  	curr        *Commit
    47  }
    48  
    49  // CommitItrForAllBranches returns a CommitItr which will iterate over all commits in all branches in a DoltDB
    50  func CommitItrForAllBranches(ctx context.Context, ddb *DoltDB) (CommitItr, error) {
    51  	branchRefs, err := ddb.GetBranches(ctx)
    52  
    53  	if err != nil {
    54  		return nil, err
    55  	}
    56  
    57  	rootCommits := make([]*Commit, 0, len(branchRefs))
    58  	for _, ref := range branchRefs {
    59  		cm, err := ddb.ResolveCommitRef(ctx, ref)
    60  
    61  		if err != nil {
    62  			return nil, err
    63  		}
    64  
    65  		rootCommits = append(rootCommits, cm)
    66  	}
    67  
    68  	cmItr := CommitItrForRoots(ddb, rootCommits...)
    69  	return cmItr, nil
    70  }
    71  
    72  // CommitItrForRoots will return a CommitItr which will iterate over all ancestor commits of the provided rootCommits.
    73  func CommitItrForRoots(ddb *DoltDB, rootCommits ...*Commit) CommitItr {
    74  	return &commitItr{
    75  		ddb:         ddb,
    76  		rootCommits: rootCommits,
    77  		added:       make(map[hash.Hash]bool, 4096),
    78  		unprocessed: make([]hash.Hash, 0, 4096),
    79  	}
    80  }
    81  
    82  func (cmItr *commitItr) Reset(ctx context.Context) error {
    83  	cmItr.curr = nil
    84  	cmItr.currentRoot = 0
    85  	cmItr.added = make(map[hash.Hash]bool, 4096)
    86  	cmItr.unprocessed = cmItr.unprocessed[:0]
    87  
    88  	return nil
    89  }
    90  
    91  // Next returns the hash of the next commit, and a pointer to that commit.  It handles making sure the list of commits
    92  // returned are unique.  When complete Next will return hash.Hash{}, nil, io.EOF
    93  func (cmItr *commitItr) Next(ctx context.Context) (hash.Hash, *OptionalCommit, error) {
    94  	for cmItr.curr == nil {
    95  		if cmItr.currentRoot >= len(cmItr.rootCommits) {
    96  			return hash.Hash{}, nil, io.EOF
    97  		}
    98  
    99  		cm := cmItr.rootCommits[cmItr.currentRoot]
   100  		h, err := cm.HashOf()
   101  
   102  		if err != nil {
   103  			return hash.Hash{}, nil, err
   104  		}
   105  
   106  		if !cmItr.added[h] {
   107  			cmItr.added[h] = true
   108  			cmItr.curr = cm
   109  			return h, &OptionalCommit{cmItr.curr, h}, nil
   110  		}
   111  
   112  		cmItr.currentRoot++
   113  	}
   114  
   115  	parents, err := cmItr.curr.ParentHashes(ctx)
   116  
   117  	if err != nil {
   118  		return hash.Hash{}, nil, err
   119  	}
   120  
   121  	for _, h := range parents {
   122  		if !cmItr.added[h] {
   123  			cmItr.added[h] = true
   124  			cmItr.unprocessed = append(cmItr.unprocessed, h)
   125  		}
   126  	}
   127  
   128  	numUnprocessed := len(cmItr.unprocessed)
   129  
   130  	if numUnprocessed == 0 {
   131  		cmItr.curr = nil
   132  		cmItr.currentRoot++
   133  		return cmItr.Next(ctx)
   134  	}
   135  
   136  	next := cmItr.unprocessed[numUnprocessed-1]
   137  	cmItr.unprocessed = cmItr.unprocessed[:numUnprocessed-1]
   138  	cmItr.curr, err = HashToCommit(ctx, cmItr.ddb.ValueReadWriter(), cmItr.ddb.ns, next)
   139  	if err != nil && err != ErrGhostCommitEncountered {
   140  		return hash.Hash{}, nil, err
   141  	}
   142  	if err == ErrGhostCommitEncountered {
   143  		cmItr.curr = nil
   144  	}
   145  
   146  	return next, &OptionalCommit{cmItr.curr, next}, nil
   147  }
   148  
   149  func HashToCommit(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeStore, h hash.Hash) (*Commit, error) {
   150  	dc, err := datas.LoadCommitAddr(ctx, vrw, h)
   151  	if err != nil {
   152  		return nil, err
   153  	}
   154  
   155  	if dc.IsGhost() {
   156  		return nil, ErrGhostCommitEncountered
   157  	}
   158  
   159  	return NewCommit(ctx, vrw, ns, dc)
   160  }
   161  
   162  // CommitFilter is a function that returns true if a commit should be filtered out, and false if it should be kept
   163  type CommitFilter func(context.Context, hash.Hash, *OptionalCommit) (filterOut bool, err error)
   164  
   165  // FilteringCommitItr is a CommitItr implementation that applies a filtering function to limit the commits returned
   166  type FilteringCommitItr struct {
   167  	itr    CommitItr
   168  	filter CommitFilter
   169  }
   170  
   171  // AllCommits is a CommitFilter that matches all commits
   172  func AllCommits(_ context.Context, _ hash.Hash, _ *Commit) (filterOut bool, err error) {
   173  	return false, nil
   174  }
   175  
   176  func NewFilteringCommitItr(itr CommitItr, filter CommitFilter) FilteringCommitItr {
   177  	return FilteringCommitItr{itr, filter}
   178  }
   179  
   180  // Next returns the hash of the next commit, and a pointer to that commit.  Implementations of Next must handle
   181  // making sure the list of commits returned are unique.  When complete Next will return hash.Hash{}, nil, io.EOF
   182  func (itr FilteringCommitItr) Next(ctx context.Context) (hash.Hash, *OptionalCommit, error) {
   183  	// iteration will terminate on io.EOF or a commit that is !filteredOut
   184  	for {
   185  		h, cm, err := itr.itr.Next(ctx)
   186  
   187  		if err != nil {
   188  			return hash.Hash{}, nil, err
   189  		}
   190  
   191  		if filterOut, err := itr.filter(ctx, h, cm); err != nil {
   192  			return hash.Hash{}, nil, err
   193  		} else if !filterOut {
   194  			return h, cm, nil
   195  		}
   196  	}
   197  }
   198  
   199  // Reset the commit iterator back to the
   200  func (itr FilteringCommitItr) Reset(ctx context.Context) error {
   201  	return itr.itr.Reset(ctx)
   202  }
   203  
   204  func NewCommitSliceIter(cm []*Commit, h []hash.Hash) *CommitSliceIter {
   205  	return &CommitSliceIter{cm: cm, h: h}
   206  }
   207  
   208  type CommitSliceIter struct {
   209  	h  []hash.Hash
   210  	cm []*Commit
   211  	i  int
   212  }
   213  
   214  var _ CommitItr = (*CommitSliceIter)(nil)
   215  
   216  func (i *CommitSliceIter) Next(ctx context.Context) (hash.Hash, *OptionalCommit, error) {
   217  	if i.i >= len(i.h) {
   218  		return hash.Hash{}, nil, io.EOF
   219  	}
   220  	i.i++
   221  	return i.h[i.i-1], &OptionalCommit{i.cm[i.i-1], i.h[i.i-1]}, nil
   222  
   223  }
   224  
   225  func (i *CommitSliceIter) Reset(ctx context.Context) error {
   226  	i.i = 0
   227  	return nil
   228  }
   229  
   230  func NewOneCommitIter(cm *Commit, h hash.Hash, meta *datas.CommitMeta) *OneCommitIter {
   231  	return &OneCommitIter{cm: &OptionalCommit{cm, h}, h: h}
   232  }
   233  
   234  type OneCommitIter struct {
   235  	h    hash.Hash
   236  	cm   *OptionalCommit
   237  	m    *datas.CommitMeta
   238  	done bool
   239  }
   240  
   241  var _ CommitItr = (*OneCommitIter)(nil)
   242  
   243  func (i *OneCommitIter) Next(_ context.Context) (hash.Hash, *OptionalCommit, error) {
   244  	if i.done {
   245  		return hash.Hash{}, nil, io.EOF
   246  	}
   247  	i.done = true
   248  	return i.h, i.cm, nil
   249  
   250  }
   251  
   252  func (i *OneCommitIter) Reset(_ context.Context) error {
   253  	i.done = false
   254  	return nil
   255  }
   256  
   257  func NewCommitPart(h hash.Hash, cm *Commit, m *datas.CommitMeta) *CommitPart {
   258  	return &CommitPart{h: h, cm: cm, m: m}
   259  }
   260  
   261  type CommitPart struct {
   262  	h  hash.Hash
   263  	m  *datas.CommitMeta
   264  	cm *Commit
   265  }
   266  
   267  var _ sql.Partition = (*CommitPart)(nil)
   268  
   269  func (c *CommitPart) Hash() hash.Hash {
   270  	return c.h
   271  }
   272  
   273  func (c *CommitPart) Commit() *Commit {
   274  	return c.cm
   275  }
   276  
   277  func (c *CommitPart) Meta() *datas.CommitMeta {
   278  	return c.m
   279  }
   280  
   281  func (c *CommitPart) Key() []byte {
   282  	return c.h[:]
   283  }
   284  
   285  func NewCommitSlicePartitionIter(h []hash.Hash, cm []*Commit, m []*datas.CommitMeta) *CommitSlicePartitionIter {
   286  	return &CommitSlicePartitionIter{h: h, cm: cm, m: m}
   287  }
   288  
   289  type CommitSlicePartitionIter struct {
   290  	h  []hash.Hash
   291  	m  []*datas.CommitMeta
   292  	cm []*Commit
   293  	i  int
   294  }
   295  
   296  var _ sql.PartitionIter = (*CommitSlicePartitionIter)(nil)
   297  
   298  func (i *CommitSlicePartitionIter) Next(ctx *sql.Context) (sql.Partition, error) {
   299  	if i.i >= len(i.cm) {
   300  		return nil, io.EOF
   301  	}
   302  	i.i++
   303  	return &CommitPart{h: i.h[i.i-1], m: i.m[i.i-1], cm: i.cm[i.i-1]}, nil
   304  
   305  }
   306  
   307  func (i *CommitSlicePartitionIter) Close(ctx *sql.Context) error {
   308  	return nil
   309  }