github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/graveler/ref/commit_iterator.go (about)

     1  package ref
     2  
     3  import (
     4  	"container/heap"
     5  	"context"
     6  	"time"
     7  
     8  	"github.com/treeverse/lakefs/pkg/graveler"
     9  )
    10  
    11  type CommitIterator struct {
    12  	manager     graveler.RefManager
    13  	ctx         context.Context
    14  	repository  *graveler.RepositoryRecord
    15  	start       graveler.CommitID
    16  	firstParent bool
    17  	value       *graveler.CommitRecord
    18  	queue       commitsPriorityQueue
    19  	visit       map[graveler.CommitID]struct{}
    20  	state       commitIteratorState
    21  	since       *time.Time
    22  	err         error
    23  }
    24  
    25  type commitIteratorState int
    26  
    27  const (
    28  	commitIteratorStateInit commitIteratorState = iota
    29  	commitIteratorStateQuery
    30  	commitIteratorStateDone
    31  )
    32  
    33  type commitsPriorityQueue []*graveler.CommitRecord
    34  
    35  func (c *commitsPriorityQueue) Len() int {
    36  	return len(*c)
    37  }
    38  
    39  func (c *commitsPriorityQueue) Less(i, j int) bool {
    40  	pq := *c
    41  	if pq[i].Commit.CreationDate.Equal(pq[j].Commit.CreationDate) {
    42  		return pq[i].CommitID > pq[j].CommitID
    43  	}
    44  	return pq[i].Commit.CreationDate.After(pq[j].Commit.CreationDate)
    45  }
    46  
    47  func (c *commitsPriorityQueue) Swap(i, j int) {
    48  	pq := *c
    49  	pq[i], pq[j] = pq[j], pq[i]
    50  }
    51  
    52  func (c *commitsPriorityQueue) Push(x interface{}) {
    53  	rec := x.(*graveler.CommitRecord)
    54  	*c = append(*c, rec)
    55  }
    56  
    57  func (c *commitsPriorityQueue) Pop() interface{} {
    58  	cc := *c
    59  	n := len(cc) - 1
    60  	item := cc[n]
    61  	*c = cc[:n]
    62  	return item
    63  }
    64  
    65  type CommitIteratorConfig struct {
    66  	repository  *graveler.RepositoryRecord
    67  	start       graveler.CommitID
    68  	firstParent bool
    69  	manager     graveler.RefManager
    70  	since       *time.Time
    71  }
    72  
    73  // NewCommitIterator returns an iterator over all commits in the given repository.
    74  // Ordering is based on the Commit Creation Date.
    75  func NewCommitIterator(ctx context.Context, config *CommitIteratorConfig) *CommitIterator {
    76  	return &CommitIterator{
    77  		ctx:         ctx,
    78  		repository:  config.repository,
    79  		start:       config.start,
    80  		queue:       make(commitsPriorityQueue, 0),
    81  		visit:       make(map[graveler.CommitID]struct{}),
    82  		manager:     config.manager,
    83  		firstParent: config.firstParent,
    84  		since:       config.since,
    85  	}
    86  }
    87  
    88  func (ci *CommitIterator) getCommitRecord(commitID graveler.CommitID) (*graveler.CommitRecord, error) {
    89  	commit, err := ci.manager.GetCommit(ci.ctx, ci.repository, commitID)
    90  	if err != nil {
    91  		return nil, err
    92  	}
    93  	return &graveler.CommitRecord{
    94  		CommitID: commitID,
    95  		Commit:   commit,
    96  	}, nil
    97  }
    98  
    99  func (ci *CommitIterator) Next() bool {
   100  	if ci.err != nil || ci.state == commitIteratorStateDone {
   101  		return false
   102  	}
   103  
   104  	if ci.state == commitIteratorStateInit {
   105  		// first time we look up the 'start' commit and push it into the queue
   106  		ci.state = commitIteratorStateQuery
   107  		rec, err := ci.getCommitRecord(ci.start)
   108  		if err != nil {
   109  			ci.value = nil
   110  			ci.err = err
   111  			return false
   112  		}
   113  		// skip commits that are older than since time
   114  		if ci.since == nil || !rec.Commit.CreationDate.Before(*ci.since) {
   115  			ci.queue.Push(rec)
   116  		}
   117  	}
   118  
   119  	if ci.queue.Len() == 0 {
   120  		ci.value = nil
   121  		ci.state = commitIteratorStateDone
   122  		return false
   123  	}
   124  
   125  	// as long as we have something in the queue we will
   126  	// set it as the current value and push the current commits parents to the queue
   127  	ci.value = heap.Pop(&ci.queue).(*graveler.CommitRecord)
   128  	parents := ci.value.Parents
   129  	if ci.firstParent && len(parents) > 1 {
   130  		parents = parents[:1]
   131  	}
   132  	for _, p := range parents {
   133  		// skip commits we already visited
   134  		if _, visited := ci.visit[p]; visited {
   135  			continue
   136  		}
   137  
   138  		rec, err := ci.getCommitRecord(p)
   139  		if err != nil {
   140  			ci.value = nil
   141  			ci.err = err
   142  			return false
   143  		}
   144  		ci.visit[rec.CommitID] = struct{}{}
   145  
   146  		// skip commits that are older than since time
   147  		if ci.since != nil && rec.Commit.CreationDate.Before(*ci.since) {
   148  			continue
   149  		}
   150  
   151  		heap.Push(&ci.queue, rec)
   152  	}
   153  	return true
   154  }
   155  
   156  // SeekGE skip under the point of 'id' commit ID based on a new
   157  //
   158  //	The list of commit
   159  func (ci *CommitIterator) SeekGE(id graveler.CommitID) {
   160  	ci.err = nil
   161  	ci.queue = make(commitsPriorityQueue, 0)
   162  	ci.visit = make(map[graveler.CommitID]struct{})
   163  	ci.state = commitIteratorStateInit
   164  
   165  	// skip until we get into our commit
   166  	for ci.Next() {
   167  		if ci.Value().CommitID == id {
   168  			break
   169  		}
   170  	}
   171  	if ci.Err() != nil {
   172  		return
   173  	}
   174  
   175  	// step back - in order to have Next to read the value we just got,
   176  	// we push back the current value to our queue and set the current value to nil.
   177  	heap.Push(&ci.queue, ci.value)
   178  	ci.value = nil
   179  }
   180  
   181  func (ci *CommitIterator) Value() *graveler.CommitRecord {
   182  	return ci.value
   183  }
   184  
   185  func (ci *CommitIterator) Err() error {
   186  	return ci.err
   187  }
   188  
   189  func (ci *CommitIterator) Close() {}