github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/graveler/ref/commit_ordered_iterator.go (about)

     1  package ref
     2  
     3  import (
     4  	"context"
     5  
     6  	"github.com/treeverse/lakefs/pkg/graveler"
     7  	"github.com/treeverse/lakefs/pkg/kv"
     8  )
     9  
    10  type OrderedCommitIterator struct {
    11  	ctx                context.Context
    12  	it                 *kv.PrimaryIterator
    13  	store              kv.Store
    14  	err                error
    15  	value              *graveler.CommitRecord
    16  	repositoryPath     string
    17  	onlyAncestryLeaves bool
    18  	firstParents       map[string]bool
    19  }
    20  
    21  // NewOrderedCommitIterator returns an iterator over all commits in the given repository.
    22  // Ordering is based on the Commit ID value.
    23  // WithOnlyAncestryLeaves causes the iterator to return only commits which are not the first parent of any other commit.
    24  // Consider a commit graph where all non-first-parent edges are removed. This graph is a tree, and ancestry leaves are its leaves.
    25  func NewOrderedCommitIterator(ctx context.Context, store kv.Store, repo *graveler.RepositoryRecord, onlyAncestryLeaves bool) (*OrderedCommitIterator, error) {
    26  	repoPath := graveler.RepoPartition(repo)
    27  	it, err := kv.NewPrimaryIterator(ctx, store, (&graveler.CommitData{}).ProtoReflect().Type(), repoPath,
    28  		[]byte(graveler.CommitPath("")), kv.IteratorOptionsFrom([]byte("")))
    29  	if err != nil {
    30  		return nil, err
    31  	}
    32  	var parents map[string]bool
    33  	if onlyAncestryLeaves {
    34  		parents, err = getAllFirstParents(ctx, store, repo)
    35  		if err != nil {
    36  			it.Close()
    37  			return nil, err
    38  		}
    39  	}
    40  	return &OrderedCommitIterator{
    41  		ctx:                ctx,
    42  		it:                 it,
    43  		store:              store,
    44  		repositoryPath:     repoPath,
    45  		onlyAncestryLeaves: onlyAncestryLeaves,
    46  		firstParents:       parents,
    47  	}, nil
    48  }
    49  
    50  func (i *OrderedCommitIterator) Next() bool {
    51  	if i.Err() != nil || i.it == nil {
    52  		return false
    53  	}
    54  	for i.it.Next() {
    55  		e := i.it.Entry()
    56  		if e == nil {
    57  			i.err = graveler.ErrInvalid
    58  			return false
    59  		}
    60  		commit, ok := e.Value.(*graveler.CommitData)
    61  		if commit == nil || !ok {
    62  			i.err = graveler.ErrReadingFromStore
    63  			return false
    64  		}
    65  		if !i.onlyAncestryLeaves || !i.firstParents[commit.Id] {
    66  			i.value = CommitDataToCommitRecord(commit)
    67  			return true
    68  		}
    69  	}
    70  	i.value = nil
    71  	return false
    72  }
    73  
    74  func (i *OrderedCommitIterator) SeekGE(id graveler.CommitID) {
    75  	if i.err != nil {
    76  		return
    77  	}
    78  	i.it.Close()
    79  	i.value = nil
    80  	i.it, i.err = kv.NewPrimaryIterator(i.ctx, i.store, (&graveler.CommitData{}).ProtoReflect().Type(), i.repositoryPath,
    81  		[]byte(graveler.CommitPath("")), kv.IteratorOptionsFrom([]byte(graveler.CommitPath(id))))
    82  }
    83  
    84  func (i *OrderedCommitIterator) Value() *graveler.CommitRecord {
    85  	if i.Err() != nil {
    86  		return nil
    87  	}
    88  	return i.value
    89  }
    90  
    91  func (i *OrderedCommitIterator) Err() error {
    92  	if i.err != nil {
    93  		return i.err
    94  	}
    95  	if i.it != nil {
    96  		return i.it.Err()
    97  	}
    98  	return nil
    99  }
   100  
   101  func (i *OrderedCommitIterator) Close() {
   102  	if i.it != nil {
   103  		i.it.Close()
   104  		i.it = nil
   105  	}
   106  }
   107  
   108  // getAllFirstParents returns a set of all commits that are the first parent of some other commit for a given repository.
   109  func getAllFirstParents(ctx context.Context, store kv.Store, repo *graveler.RepositoryRecord) (map[string]bool, error) {
   110  	it, err := kv.NewPrimaryIterator(ctx, store, (&graveler.CommitData{}).ProtoReflect().Type(),
   111  		graveler.RepoPartition(repo),
   112  		[]byte(graveler.CommitPath("")), kv.IteratorOptionsFrom([]byte("")))
   113  	if err != nil {
   114  		return nil, err
   115  	}
   116  	defer it.Close()
   117  	firstParents := make(map[string]bool)
   118  	for it.Next() {
   119  		entry := it.Entry()
   120  		commit := entry.Value.(*graveler.CommitData)
   121  		if len(commit.Parents) > 0 {
   122  			parentNo := 0
   123  			if graveler.CommitVersion(commit.Version) < graveler.CommitVersionParentSwitch && len(commit.Parents) > 1 {
   124  				parentNo = 1
   125  			}
   126  			parent := commit.Parents[parentNo]
   127  			firstParents[parent] = true
   128  		}
   129  	}
   130  	return firstParents, nil
   131  }