github.com/cilium/statedb@v0.3.2/iterator.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package statedb
     5  
     6  import (
     7  	"bytes"
     8  	"fmt"
     9  	"iter"
    10  	"slices"
    11  
    12  	"github.com/cilium/statedb/index"
    13  	"github.com/cilium/statedb/part"
    14  )
    15  
    16  // Collect creates a slice of objects out of the iterator.
    17  // The iterator is consumed in the process.
    18  func Collect[Obj any](seq iter.Seq2[Obj, Revision]) []Obj {
    19  	return slices.Collect(ToSeq(seq))
    20  }
    21  
    22  // Map a function over a sequence of objects returned by
    23  // a query.
    24  func Map[In, Out any](seq iter.Seq2[In, Revision], fn func(In) Out) iter.Seq2[Out, Revision] {
    25  	return func(yield func(Out, Revision) bool) {
    26  		for obj, rev := range seq {
    27  			if !yield(fn(obj), rev) {
    28  				break
    29  			}
    30  		}
    31  	}
    32  }
    33  
    34  func Filter[Obj any](seq iter.Seq2[Obj, Revision], keep func(Obj) bool) iter.Seq2[Obj, Revision] {
    35  	return func(yield func(Obj, Revision) bool) {
    36  		for obj, rev := range seq {
    37  			if keep(obj) {
    38  				if !yield(obj, rev) {
    39  					break
    40  				}
    41  			}
    42  		}
    43  	}
    44  }
    45  
    46  // ToSeq takes a Seq2 and produces a Seq with the first element of the pair.
    47  func ToSeq[A, B any](seq iter.Seq2[A, B]) iter.Seq[A] {
    48  	return func(yield func(A) bool) {
    49  		for x, _ := range seq {
    50  			if !yield(x) {
    51  				break
    52  			}
    53  		}
    54  	}
    55  }
    56  
    57  // partSeq returns a casted sequence of objects from a part Iterator.
    58  func partSeq[Obj any](iter *part.Iterator[object]) iter.Seq2[Obj, Revision] {
    59  	return func(yield func(Obj, Revision) bool) {
    60  		// Iterate over a clone of the original iterator to allow the sequence to be iterated
    61  		// from scratch multiple times.
    62  		it := iter.Clone()
    63  		for {
    64  			_, iobj, ok := it.Next()
    65  			if !ok {
    66  				break
    67  			}
    68  			if !yield(iobj.data.(Obj), iobj.revision) {
    69  				break
    70  			}
    71  		}
    72  	}
    73  }
    74  
    75  // nonUniqueSeq returns a sequence of objects for a non-unique index.
    76  // Non-unique indexes work by concatenating the secondary key with the
    77  // primary key and then prefix searching for the items:
    78  //
    79  //	<secondary>\0<primary><secondary length>
    80  //	^^^^^^^^^^^
    81  //
    82  // Since the primary key can be of any length and we're prefix searching,
    83  // we need to iterate over all objects matching the prefix and only emitting
    84  // those which have the correct secondary key length.
    85  // For example if we search for the key "aaaa", then we might have the following
    86  // matches (_ is just delimiting, not part of the key):
    87  //
    88  //	aaaa\0bbb4
    89  //	aaa\0abab3
    90  //	aaaa\0ccc4
    91  //
    92  // We yield "aaaa\0bbb4", skip "aaa\0abab3" and yield "aaaa\0ccc4".
    93  func nonUniqueSeq[Obj any](iter *part.Iterator[object], prefixSearch bool, searchKey []byte) iter.Seq2[Obj, Revision] {
    94  	return func(yield func(Obj, Revision) bool) {
    95  		// Clone the iterator to allow multiple iterations over the sequence.
    96  		it := iter.Clone()
    97  
    98  		var visited map[string]struct{}
    99  		if prefixSearch {
   100  			// When prefix searching, keep track of objects we've already seen as
   101  			// multiple keys in non-unique index may map to a single object.
   102  			// When just doing a List() on a non-unique index we will see each object
   103  			// only once and do not need to track this.
   104  			//
   105  			// This of course makes iterating over a non-unique index with a prefix
   106  			// (or lowerbound search) about 20x slower than normal!
   107  			visited = map[string]struct{}{}
   108  		}
   109  
   110  		for {
   111  			key, iobj, ok := it.Next()
   112  			if !ok {
   113  				break
   114  			}
   115  
   116  			secondary, primary := decodeNonUniqueKey(key)
   117  
   118  			switch {
   119  			case !prefixSearch && len(secondary) != len(searchKey):
   120  				// This a List(), thus secondary key must match length exactly.
   121  				continue
   122  			case prefixSearch && len(secondary) < len(searchKey):
   123  				// This is Prefix(), thus key must be equal or longer to search key.
   124  				continue
   125  			}
   126  
   127  			if prefixSearch {
   128  				// When doing a prefix search on a non-unique index we may see the
   129  				// same object multiple times since multiple keys may point it.
   130  				// Skip if we've already seen this object.
   131  				if _, found := visited[string(primary)]; found {
   132  					continue
   133  				}
   134  				visited[string(primary)] = struct{}{}
   135  			}
   136  
   137  			if !yield(iobj.data.(Obj), iobj.revision) {
   138  				break
   139  			}
   140  		}
   141  	}
   142  }
   143  
   144  func nonUniqueLowerBoundSeq[Obj any](iter *part.Iterator[object], searchKey []byte) iter.Seq2[Obj, Revision] {
   145  	return func(yield func(Obj, Revision) bool) {
   146  		// Clone the iterator to allow multiple uses.
   147  		iter = iter.Clone()
   148  
   149  		// Keep track of objects we've already seen as multiple keys in non-unique
   150  		// index may map to a single object.
   151  		visited := map[string]struct{}{}
   152  		for {
   153  			key, iobj, ok := iter.Next()
   154  			if !ok {
   155  				break
   156  			}
   157  			// With a non-unique index we have a composite key <secondary><primary><secondary len>.
   158  			// This means we need to check every key that it's larger or equal to the search key.
   159  			// Just seeking to the first one isn't enough as the secondary key length may vary.
   160  			secondary, primary := decodeNonUniqueKey(key)
   161  			if bytes.Compare(secondary, searchKey) >= 0 {
   162  				if _, found := visited[string(primary)]; found {
   163  					continue
   164  				}
   165  				visited[string(primary)] = struct{}{}
   166  
   167  				if !yield(iobj.data.(Obj), iobj.revision) {
   168  					return
   169  				}
   170  			}
   171  		}
   172  	}
   173  }
   174  
   175  // iterator adapts the "any" object iterator to a typed object.
   176  type iterator[Obj any] struct {
   177  	iter interface{ Next() ([]byte, object, bool) }
   178  }
   179  
   180  func (it *iterator[Obj]) Next() (obj Obj, revision uint64, ok bool) {
   181  	_, iobj, ok := it.iter.Next()
   182  	if ok {
   183  		obj = iobj.data.(Obj)
   184  		revision = iobj.revision
   185  	}
   186  	return
   187  }
   188  
   189  // Iterator for iterating a sequence objects.
   190  type Iterator[Obj any] interface {
   191  	// Next returns the next object and its revision if ok is true, otherwise
   192  	// zero values to mean that the iteration has finished.
   193  	Next() (obj Obj, rev Revision, ok bool)
   194  }
   195  
   196  func NewDualIterator[Obj any](left, right Iterator[Obj]) *DualIterator[Obj] {
   197  	return &DualIterator[Obj]{
   198  		left:  iterState[Obj]{iter: left},
   199  		right: iterState[Obj]{iter: right},
   200  	}
   201  }
   202  
   203  type iterState[Obj any] struct {
   204  	iter Iterator[Obj]
   205  	obj  Obj
   206  	rev  Revision
   207  	ok   bool
   208  }
   209  
   210  // DualIterator allows iterating over two iterators in revision order.
   211  // Meant to be used for combined iteration of LowerBound(ByRevision)
   212  // and Deleted().
   213  type DualIterator[Obj any] struct {
   214  	left  iterState[Obj]
   215  	right iterState[Obj]
   216  }
   217  
   218  func (it *DualIterator[Obj]) Next() (obj Obj, revision uint64, fromLeft, ok bool) {
   219  	// Advance the iterators
   220  	if !it.left.ok && it.left.iter != nil {
   221  		it.left.obj, it.left.rev, it.left.ok = it.left.iter.Next()
   222  		if !it.left.ok {
   223  			it.left.iter = nil
   224  		}
   225  	}
   226  	if !it.right.ok && it.right.iter != nil {
   227  		it.right.obj, it.right.rev, it.right.ok = it.right.iter.Next()
   228  		if !it.right.ok {
   229  			it.right.iter = nil
   230  		}
   231  	}
   232  
   233  	// Find the lowest revision object
   234  	switch {
   235  	case !it.left.ok && !it.right.ok:
   236  		ok = false
   237  		return
   238  	case it.left.ok && !it.right.ok:
   239  		it.left.ok = false
   240  		return it.left.obj, it.left.rev, true, true
   241  	case it.right.ok && !it.left.ok:
   242  		it.right.ok = false
   243  		return it.right.obj, it.right.rev, false, true
   244  	case it.left.rev <= it.right.rev:
   245  		it.left.ok = false
   246  		return it.left.obj, it.left.rev, true, true
   247  	case it.right.rev <= it.left.rev:
   248  		it.right.ok = false
   249  		return it.right.obj, it.right.rev, false, true
   250  	default:
   251  		panic(fmt.Sprintf("BUG: Unhandled case: %+v", it))
   252  	}
   253  }
   254  
   255  type changeIterator[Obj any] struct {
   256  	table          Table[Obj]
   257  	revision       Revision
   258  	deleteRevision Revision
   259  	dt             *deleteTracker[Obj]
   260  	iter           *DualIterator[Obj]
   261  	watch          <-chan struct{}
   262  }
   263  
   264  func (it *changeIterator[Obj]) refresh(txn ReadTxn) {
   265  	// Instead of indexReadTxn() we look up directly here so we don't
   266  	// refresh from mutated indexes in case [txn] is a WriteTxn. This
   267  	// is important as the WriteTxn may be aborted and thus revisions will
   268  	// reset back and watermarks bumped from here would be invalid.
   269  	itxn := txn.getTxn()
   270  	indexEntry := itxn.root[it.table.tablePos()].indexes[RevisionIndexPos]
   271  	indexTxn := indexReadTxn{indexEntry.tree, indexEntry.unique}
   272  	updateIter := &iterator[Obj]{indexTxn.LowerBound(index.Uint64(it.revision + 1))}
   273  	deleteIter := it.dt.deleted(itxn, it.deleteRevision+1)
   274  	it.iter = NewDualIterator(deleteIter, updateIter)
   275  
   276  	// It is enough to watch the revision index and not the graveyard since
   277  	// any object that is inserted into the graveyard will be deleted from
   278  	// the revision index.
   279  	it.watch = indexTxn.RootWatch()
   280  }
   281  
   282  func (it *changeIterator[Obj]) Next(txn ReadTxn) (seq iter.Seq2[Change[Obj], Revision], watch <-chan struct{}) {
   283  	if it.iter == nil {
   284  		// Iterator has been exhausted, check if we need to requery
   285  		// or whether we need to wait for changes first.
   286  		select {
   287  		case <-it.watch:
   288  			// Watch channel closed, so new changes await
   289  		default:
   290  			// Watch channel for the query not closed yet, so return it to allow
   291  			// caller to wait for the new changes.
   292  			watch = it.watch
   293  			seq = func(yield func(Change[Obj], Revision) bool) {}
   294  			return
   295  		}
   296  	}
   297  
   298  	// Refresh the iterator regardless if it was fully consumed or not to
   299  	// pull in new changes. We keep returning a closed channel until the
   300  	// iterator has been fully consumed. This does mean there's an extra
   301  	// Next() call to get a proper watch channel, but it does make this
   302  	// API much safer to use even when only partially consuming the
   303  	// sequence.
   304  	it.refresh(txn)
   305  	watch = closedWatchChannel
   306  	seq = func(yield func(Change[Obj], Revision) bool) {
   307  		if it.iter == nil {
   308  			return
   309  		}
   310  		for obj, rev, deleted, ok := it.iter.Next(); ok; obj, rev, deleted, ok = it.iter.Next() {
   311  			if deleted {
   312  				it.deleteRevision = rev
   313  				it.dt.mark(rev)
   314  			} else {
   315  				it.revision = rev
   316  			}
   317  			change := Change[Obj]{
   318  				Object:   obj,
   319  				Revision: rev,
   320  				Deleted:  deleted,
   321  			}
   322  			if !yield(change, rev) {
   323  				return
   324  			}
   325  		}
   326  		it.iter = nil
   327  	}
   328  	return
   329  }
   330  
   331  // changesAny is for implementing the /changes HTTP API where the concrete object
   332  // type is not known.
   333  func (it *changeIterator[Obj]) nextAny(txn ReadTxn) (iter.Seq2[Change[any], Revision], <-chan struct{}) {
   334  	seq, watch := it.Next(txn)
   335  
   336  	return func(yield func(Change[any], Revision) bool) {
   337  		for change, rev := range seq {
   338  			ok := yield(Change[any]{
   339  				Object:   change.Object,
   340  				Revision: change.Revision,
   341  				Deleted:  change.Deleted,
   342  			}, rev)
   343  			if !ok {
   344  				break
   345  			}
   346  		}
   347  	}, watch
   348  }
   349  
   350  func (it *changeIterator[Obj]) close() {
   351  	if it.dt != nil {
   352  		it.dt.close()
   353  	}
   354  	it.dt = nil
   355  }
   356  
   357  type anyChangeIterator interface {
   358  	nextAny(ReadTxn) (iter.Seq2[Change[any], Revision], <-chan struct{})
   359  }