github.com/cilium/statedb@v0.3.2/iterator.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package statedb 5 6 import ( 7 "bytes" 8 "fmt" 9 "iter" 10 "slices" 11 12 "github.com/cilium/statedb/index" 13 "github.com/cilium/statedb/part" 14 ) 15 16 // Collect creates a slice of objects out of the iterator. 17 // The iterator is consumed in the process. 18 func Collect[Obj any](seq iter.Seq2[Obj, Revision]) []Obj { 19 return slices.Collect(ToSeq(seq)) 20 } 21 22 // Map a function over a sequence of objects returned by 23 // a query. 24 func Map[In, Out any](seq iter.Seq2[In, Revision], fn func(In) Out) iter.Seq2[Out, Revision] { 25 return func(yield func(Out, Revision) bool) { 26 for obj, rev := range seq { 27 if !yield(fn(obj), rev) { 28 break 29 } 30 } 31 } 32 } 33 34 func Filter[Obj any](seq iter.Seq2[Obj, Revision], keep func(Obj) bool) iter.Seq2[Obj, Revision] { 35 return func(yield func(Obj, Revision) bool) { 36 for obj, rev := range seq { 37 if keep(obj) { 38 if !yield(obj, rev) { 39 break 40 } 41 } 42 } 43 } 44 } 45 46 // ToSeq takes a Seq2 and produces a Seq with the first element of the pair. 47 func ToSeq[A, B any](seq iter.Seq2[A, B]) iter.Seq[A] { 48 return func(yield func(A) bool) { 49 for x, _ := range seq { 50 if !yield(x) { 51 break 52 } 53 } 54 } 55 } 56 57 // partSeq returns a casted sequence of objects from a part Iterator. 58 func partSeq[Obj any](iter *part.Iterator[object]) iter.Seq2[Obj, Revision] { 59 return func(yield func(Obj, Revision) bool) { 60 // Iterate over a clone of the original iterator to allow the sequence to be iterated 61 // from scratch multiple times. 62 it := iter.Clone() 63 for { 64 _, iobj, ok := it.Next() 65 if !ok { 66 break 67 } 68 if !yield(iobj.data.(Obj), iobj.revision) { 69 break 70 } 71 } 72 } 73 } 74 75 // nonUniqueSeq returns a sequence of objects for a non-unique index. 76 // Non-unique indexes work by concatenating the secondary key with the 77 // primary key and then prefix searching for the items: 78 // 79 // <secondary>\0<primary><secondary length> 80 // ^^^^^^^^^^^ 81 // 82 // Since the primary key can be of any length and we're prefix searching, 83 // we need to iterate over all objects matching the prefix and only emitting 84 // those which have the correct secondary key length. 85 // For example if we search for the key "aaaa", then we might have the following 86 // matches (_ is just delimiting, not part of the key): 87 // 88 // aaaa\0bbb4 89 // aaa\0abab3 90 // aaaa\0ccc4 91 // 92 // We yield "aaaa\0bbb4", skip "aaa\0abab3" and yield "aaaa\0ccc4". 93 func nonUniqueSeq[Obj any](iter *part.Iterator[object], prefixSearch bool, searchKey []byte) iter.Seq2[Obj, Revision] { 94 return func(yield func(Obj, Revision) bool) { 95 // Clone the iterator to allow multiple iterations over the sequence. 96 it := iter.Clone() 97 98 var visited map[string]struct{} 99 if prefixSearch { 100 // When prefix searching, keep track of objects we've already seen as 101 // multiple keys in non-unique index may map to a single object. 102 // When just doing a List() on a non-unique index we will see each object 103 // only once and do not need to track this. 104 // 105 // This of course makes iterating over a non-unique index with a prefix 106 // (or lowerbound search) about 20x slower than normal! 107 visited = map[string]struct{}{} 108 } 109 110 for { 111 key, iobj, ok := it.Next() 112 if !ok { 113 break 114 } 115 116 secondary, primary := decodeNonUniqueKey(key) 117 118 switch { 119 case !prefixSearch && len(secondary) != len(searchKey): 120 // This a List(), thus secondary key must match length exactly. 121 continue 122 case prefixSearch && len(secondary) < len(searchKey): 123 // This is Prefix(), thus key must be equal or longer to search key. 124 continue 125 } 126 127 if prefixSearch { 128 // When doing a prefix search on a non-unique index we may see the 129 // same object multiple times since multiple keys may point it. 130 // Skip if we've already seen this object. 131 if _, found := visited[string(primary)]; found { 132 continue 133 } 134 visited[string(primary)] = struct{}{} 135 } 136 137 if !yield(iobj.data.(Obj), iobj.revision) { 138 break 139 } 140 } 141 } 142 } 143 144 func nonUniqueLowerBoundSeq[Obj any](iter *part.Iterator[object], searchKey []byte) iter.Seq2[Obj, Revision] { 145 return func(yield func(Obj, Revision) bool) { 146 // Clone the iterator to allow multiple uses. 147 iter = iter.Clone() 148 149 // Keep track of objects we've already seen as multiple keys in non-unique 150 // index may map to a single object. 151 visited := map[string]struct{}{} 152 for { 153 key, iobj, ok := iter.Next() 154 if !ok { 155 break 156 } 157 // With a non-unique index we have a composite key <secondary><primary><secondary len>. 158 // This means we need to check every key that it's larger or equal to the search key. 159 // Just seeking to the first one isn't enough as the secondary key length may vary. 160 secondary, primary := decodeNonUniqueKey(key) 161 if bytes.Compare(secondary, searchKey) >= 0 { 162 if _, found := visited[string(primary)]; found { 163 continue 164 } 165 visited[string(primary)] = struct{}{} 166 167 if !yield(iobj.data.(Obj), iobj.revision) { 168 return 169 } 170 } 171 } 172 } 173 } 174 175 // iterator adapts the "any" object iterator to a typed object. 176 type iterator[Obj any] struct { 177 iter interface{ Next() ([]byte, object, bool) } 178 } 179 180 func (it *iterator[Obj]) Next() (obj Obj, revision uint64, ok bool) { 181 _, iobj, ok := it.iter.Next() 182 if ok { 183 obj = iobj.data.(Obj) 184 revision = iobj.revision 185 } 186 return 187 } 188 189 // Iterator for iterating a sequence objects. 190 type Iterator[Obj any] interface { 191 // Next returns the next object and its revision if ok is true, otherwise 192 // zero values to mean that the iteration has finished. 193 Next() (obj Obj, rev Revision, ok bool) 194 } 195 196 func NewDualIterator[Obj any](left, right Iterator[Obj]) *DualIterator[Obj] { 197 return &DualIterator[Obj]{ 198 left: iterState[Obj]{iter: left}, 199 right: iterState[Obj]{iter: right}, 200 } 201 } 202 203 type iterState[Obj any] struct { 204 iter Iterator[Obj] 205 obj Obj 206 rev Revision 207 ok bool 208 } 209 210 // DualIterator allows iterating over two iterators in revision order. 211 // Meant to be used for combined iteration of LowerBound(ByRevision) 212 // and Deleted(). 213 type DualIterator[Obj any] struct { 214 left iterState[Obj] 215 right iterState[Obj] 216 } 217 218 func (it *DualIterator[Obj]) Next() (obj Obj, revision uint64, fromLeft, ok bool) { 219 // Advance the iterators 220 if !it.left.ok && it.left.iter != nil { 221 it.left.obj, it.left.rev, it.left.ok = it.left.iter.Next() 222 if !it.left.ok { 223 it.left.iter = nil 224 } 225 } 226 if !it.right.ok && it.right.iter != nil { 227 it.right.obj, it.right.rev, it.right.ok = it.right.iter.Next() 228 if !it.right.ok { 229 it.right.iter = nil 230 } 231 } 232 233 // Find the lowest revision object 234 switch { 235 case !it.left.ok && !it.right.ok: 236 ok = false 237 return 238 case it.left.ok && !it.right.ok: 239 it.left.ok = false 240 return it.left.obj, it.left.rev, true, true 241 case it.right.ok && !it.left.ok: 242 it.right.ok = false 243 return it.right.obj, it.right.rev, false, true 244 case it.left.rev <= it.right.rev: 245 it.left.ok = false 246 return it.left.obj, it.left.rev, true, true 247 case it.right.rev <= it.left.rev: 248 it.right.ok = false 249 return it.right.obj, it.right.rev, false, true 250 default: 251 panic(fmt.Sprintf("BUG: Unhandled case: %+v", it)) 252 } 253 } 254 255 type changeIterator[Obj any] struct { 256 table Table[Obj] 257 revision Revision 258 deleteRevision Revision 259 dt *deleteTracker[Obj] 260 iter *DualIterator[Obj] 261 watch <-chan struct{} 262 } 263 264 func (it *changeIterator[Obj]) refresh(txn ReadTxn) { 265 // Instead of indexReadTxn() we look up directly here so we don't 266 // refresh from mutated indexes in case [txn] is a WriteTxn. This 267 // is important as the WriteTxn may be aborted and thus revisions will 268 // reset back and watermarks bumped from here would be invalid. 269 itxn := txn.getTxn() 270 indexEntry := itxn.root[it.table.tablePos()].indexes[RevisionIndexPos] 271 indexTxn := indexReadTxn{indexEntry.tree, indexEntry.unique} 272 updateIter := &iterator[Obj]{indexTxn.LowerBound(index.Uint64(it.revision + 1))} 273 deleteIter := it.dt.deleted(itxn, it.deleteRevision+1) 274 it.iter = NewDualIterator(deleteIter, updateIter) 275 276 // It is enough to watch the revision index and not the graveyard since 277 // any object that is inserted into the graveyard will be deleted from 278 // the revision index. 279 it.watch = indexTxn.RootWatch() 280 } 281 282 func (it *changeIterator[Obj]) Next(txn ReadTxn) (seq iter.Seq2[Change[Obj], Revision], watch <-chan struct{}) { 283 if it.iter == nil { 284 // Iterator has been exhausted, check if we need to requery 285 // or whether we need to wait for changes first. 286 select { 287 case <-it.watch: 288 // Watch channel closed, so new changes await 289 default: 290 // Watch channel for the query not closed yet, so return it to allow 291 // caller to wait for the new changes. 292 watch = it.watch 293 seq = func(yield func(Change[Obj], Revision) bool) {} 294 return 295 } 296 } 297 298 // Refresh the iterator regardless if it was fully consumed or not to 299 // pull in new changes. We keep returning a closed channel until the 300 // iterator has been fully consumed. This does mean there's an extra 301 // Next() call to get a proper watch channel, but it does make this 302 // API much safer to use even when only partially consuming the 303 // sequence. 304 it.refresh(txn) 305 watch = closedWatchChannel 306 seq = func(yield func(Change[Obj], Revision) bool) { 307 if it.iter == nil { 308 return 309 } 310 for obj, rev, deleted, ok := it.iter.Next(); ok; obj, rev, deleted, ok = it.iter.Next() { 311 if deleted { 312 it.deleteRevision = rev 313 it.dt.mark(rev) 314 } else { 315 it.revision = rev 316 } 317 change := Change[Obj]{ 318 Object: obj, 319 Revision: rev, 320 Deleted: deleted, 321 } 322 if !yield(change, rev) { 323 return 324 } 325 } 326 it.iter = nil 327 } 328 return 329 } 330 331 // changesAny is for implementing the /changes HTTP API where the concrete object 332 // type is not known. 333 func (it *changeIterator[Obj]) nextAny(txn ReadTxn) (iter.Seq2[Change[any], Revision], <-chan struct{}) { 334 seq, watch := it.Next(txn) 335 336 return func(yield func(Change[any], Revision) bool) { 337 for change, rev := range seq { 338 ok := yield(Change[any]{ 339 Object: change.Object, 340 Revision: change.Revision, 341 Deleted: change.Deleted, 342 }, rev) 343 if !ok { 344 break 345 } 346 } 347 }, watch 348 } 349 350 func (it *changeIterator[Obj]) close() { 351 if it.dt != nil { 352 it.dt.close() 353 } 354 it.dt = nil 355 } 356 357 type anyChangeIterator interface { 358 nextAny(ReadTxn) (iter.Seq2[Change[any], Revision], <-chan struct{}) 359 }