github.com/cilium/statedb@v0.3.2/types.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package statedb 5 6 import ( 7 "errors" 8 "io" 9 "iter" 10 11 "github.com/cilium/statedb/index" 12 "github.com/cilium/statedb/internal" 13 "github.com/cilium/statedb/part" 14 ) 15 16 type ( 17 TableName = string 18 IndexName = string 19 Revision = uint64 20 ) 21 22 // Table provides methods for querying the contents of a table. 23 type Table[Obj any] interface { 24 // TableMeta for querying table metadata that is independent of 25 // 'Obj' type. 26 TableMeta 27 28 // PrimaryIndexer returns the primary indexer for the table. 29 // Useful for generic utilities that need access to the primary key. 30 PrimaryIndexer() Indexer[Obj] 31 32 // All returns a sequence of all objects in the table. 33 All(ReadTxn) iter.Seq2[Obj, Revision] 34 35 // AllWatch returns a sequence of all objects in the table and a watch 36 // channel that is closed when the table changes. 37 AllWatch(ReadTxn) (iter.Seq2[Obj, Revision], <-chan struct{}) 38 39 // List returns sequence of objects matching the given query. 40 List(ReadTxn, Query[Obj]) iter.Seq2[Obj, Revision] 41 42 // ListWatch returns an iterator for all objects matching the given query 43 // and a watch channel that is closed if the query results are 44 // invalidated by a write to the table. 45 ListWatch(ReadTxn, Query[Obj]) (iter.Seq2[Obj, Revision], <-chan struct{}) 46 47 // Get returns the first matching object for the query. 48 Get(ReadTxn, Query[Obj]) (obj Obj, rev Revision, found bool) 49 50 // GetWatch return the first matching object and a watch channel 51 // that is closed if the query is invalidated. 52 GetWatch(ReadTxn, Query[Obj]) (obj Obj, rev Revision, watch <-chan struct{}, found bool) 53 54 // LowerBound returns an iterator for objects that have a key 55 // greater or equal to the query. 56 LowerBound(ReadTxn, Query[Obj]) iter.Seq2[Obj, Revision] 57 58 // LowerBoundWatch returns an iterator for objects that have a key 59 // greater or equal to the query. The returned watch channel is closed 60 // when anything in the table changes as more fine-grained notifications 61 // are not possible with a lower bound search. 62 LowerBoundWatch(ReadTxn, Query[Obj]) (seq iter.Seq2[Obj, Revision], watch <-chan struct{}) 63 64 // Prefix searches the table by key prefix. 65 Prefix(ReadTxn, Query[Obj]) iter.Seq2[Obj, Revision] 66 67 // PrefixWatch searches the table by key prefix. Returns an iterator and a watch 68 // channel that closes when the query results have become stale. 69 PrefixWatch(ReadTxn, Query[Obj]) (seq iter.Seq2[Obj, Revision], watch <-chan struct{}) 70 71 // Changes returns an iterator for changes happening to the table. 72 // This uses the revision index to iterate over the objects in the order 73 // they have changed. Deleted objects are placed onto a temporary index 74 // (graveyard) where they live until all change iterators have observed 75 // the deletion. 76 // 77 // If an object is created and deleted before the observer has iterated 78 // over the creation then only the deletion is seen. 79 Changes(WriteTxn) (ChangeIterator[Obj], error) 80 } 81 82 // Change is either an update or a delete of an object. Used by Changes() and 83 // the Observable(). 84 // The 'Revision' is carried also in the Change object so that it is also accessible 85 // via Observable. 86 type Change[Obj any] struct { 87 Object Obj `json:"obj"` 88 Revision Revision `json:"rev"` 89 Deleted bool `json:"deleted,omitempty"` 90 } 91 92 type ChangeIterator[Obj any] interface { 93 // Next returns the sequence of unobserved changes up to the given ReadTxn (snapshot) and 94 // a watch channel. 95 // 96 // If changes are available Next returns a closed watch channel. Only once there are no further 97 // changes available will a proper watch channel be returned. 98 // 99 // Next can be called again without fully consuming the sequence to pull in new changes. 100 // 101 // The returned sequence is a single-use sequence and subsequent calls will return 102 // an empty sequence. 103 // 104 // If the transaction given to Next is a WriteTxn the modifications made in the 105 // transaction are not observed, that is, only committed changes can be observed. 106 Next(ReadTxn) (iter.Seq2[Change[Obj], Revision], <-chan struct{}) 107 } 108 109 // RWTable provides methods for modifying the table under a write transaction 110 // that targets this table. 111 type RWTable[Obj any] interface { 112 // RWTable[Obj] is a superset of Table[Obj]. Queries made with a 113 // write transaction return the fresh uncommitted modifications if any. 114 Table[Obj] 115 116 // RegisterInitializer registers an initializer to the table. Returns 117 // a function to mark the initializer done. Once all initializers are 118 // done, Table[*].Initialized() will return true. 119 // This should only be used before the application has started. 120 RegisterInitializer(txn WriteTxn, name string) func(WriteTxn) 121 122 // ToTable returns the Table[Obj] interface. Useful with cell.Provide 123 // to avoid the anonymous function: 124 // 125 // cell.ProvidePrivate(NewMyTable), // RWTable 126 // cell.Invoke(statedb.Register[statedb.RWTable[Foo]) 127 // 128 // // with anononymous function: 129 // cell.Provide(func(t statedb.RWTable[Foo]) statedb.Table[Foo] { return t }) 130 // 131 // // with ToTable: 132 // cell.Provide(statedb.RWTable[Foo].ToTable), 133 ToTable() Table[Obj] 134 135 // Insert an object into the table. Returns the object that was 136 // replaced if there was one. 137 // 138 // Possible errors: 139 // - ErrTableNotLockedForWriting: table was not locked for writing 140 // - ErrTransactionClosed: the write transaction already committed or aborted 141 // 142 // Each inserted or updated object will be assigned a new unique 143 // revision. 144 Insert(WriteTxn, Obj) (oldObj Obj, hadOld bool, err error) 145 146 // Modify an existing object or insert a new object into the table. If an old object 147 // exists the [merge] function is called with the old and new objects. 148 // 149 // Modify is semantically equal to Get + Insert, but avoids extra lookups making 150 // it significantly more efficient. 151 // 152 // Possible errors: 153 // - ErrTableNotLockedForWriting: table was not locked for writing 154 // - ErrTransactionClosed: the write transaction already committed or aborted 155 Modify(txn WriteTxn, new Obj, merge func(old, new Obj) Obj) (oldObj Obj, hadOld bool, err error) 156 157 // CompareAndSwap compares the existing object's revision against the 158 // given revision and if equal it replaces the object. 159 // 160 // Possible errors: 161 // - ErrRevisionNotEqual: the object has mismatching revision 162 // - ErrObjectNotFound: object not found from the table 163 // - ErrTableNotLockedForWriting: table was not locked for writing 164 // - ErrTransactionClosed: the write transaction already committed or aborted 165 CompareAndSwap(WriteTxn, Revision, Obj) (oldObj Obj, hadOld bool, err error) 166 167 // Delete an object from the table. Returns the object that was 168 // deleted if there was one. 169 // 170 // If the table is being tracked for deletions via EventIterator() 171 // the deleted object is inserted into a graveyard index and garbage 172 // collected when all delete trackers have consumed it. Each deleted 173 // object in the graveyard has unique revision allowing interleaved 174 // iteration of updates and deletions. 175 // 176 // Possible errors: 177 // - ErrTableNotLockedForWriting: table was not locked for writing 178 // - ErrTransactionClosed: the write transaction already committed or aborted 179 Delete(WriteTxn, Obj) (oldObj Obj, hadOld bool, err error) 180 181 // DeleteAll removes all objects in the table. Semantically the same as 182 // All() + Delete(). See Delete() for more information. 183 // 184 // Possible errors: 185 // - ErrTableNotLockedForWriting: table was not locked for writing 186 // - ErrTransactionClosed: the write transaction already committed or aborted 187 DeleteAll(WriteTxn) error 188 189 // CompareAndDelete compares the existing object's revision against the 190 // given revision and if equal it deletes the object. If object is not 191 // found 'hadOld' will be false and 'err' nil. 192 // 193 // Possible errors: 194 // - ErrRevisionNotEqual: the object has mismatching revision 195 // - ErrTableNotLockedForWriting: table was not locked for writing 196 // - ErrTransactionClosed: the write transaction already committed or aborted 197 CompareAndDelete(WriteTxn, Revision, Obj) (oldObj Obj, hadOld bool, err error) 198 } 199 200 // TableMeta provides information about the table that is independent of 201 // the object type (the 'Obj' constraint). 202 type TableMeta interface { 203 // Name returns the name of the table 204 Name() TableName 205 206 // Indexes returns the names of the indexes 207 Indexes() []string 208 209 // NumObjects returns the number of objects stored in the table. 210 NumObjects(ReadTxn) int 211 212 // Initialized returns true if in this ReadTxn (snapshot of the database) 213 // the registered initializers have all been completed. The returned 214 // watch channel will be closed when the table becomes initialized. 215 Initialized(ReadTxn) (bool, <-chan struct{}) 216 217 // PendingInitializers returns the set of pending initializers that 218 // have not yet completed. 219 PendingInitializers(ReadTxn) []string 220 221 // Revision of the table. Constant for a read transaction, but 222 // increments in a write transaction on each Insert and Delete. 223 Revision(ReadTxn) Revision 224 225 // Internal unexported methods used only internally. 226 tableInternal 227 } 228 229 type tableInternal interface { 230 tableEntry() tableEntry 231 tablePos() int 232 setTablePos(int) 233 indexPos(string) int 234 tableKey() []byte // The radix key for the table in the root tree 235 getIndexer(name string) *anyIndexer 236 primary() anyIndexer // The untyped primary indexer for the table 237 secondary() map[string]anyIndexer // Secondary indexers (if any) 238 sortableMutex() internal.SortableMutex // The sortable mutex for locking the table for writing 239 anyChanges(txn WriteTxn) (anyChangeIterator, error) 240 proto() any // Returns the zero value of 'Obj', e.g. the prototype 241 unmarshalYAML(data []byte) (any, error) // Unmarshal the data into 'Obj' 242 numDeletedObjects(txn ReadTxn) int // Number of objects in graveyard 243 acquired(*txn) 244 getAcquiredInfo() string 245 } 246 247 type ReadTxn interface { 248 getTxn() *txn 249 250 // WriteJSON writes the contents of the database as JSON. 251 WriteJSON(w io.Writer, tables ...string) error 252 } 253 254 type WriteTxn interface { 255 // WriteTxn is always also a ReadTxn 256 ReadTxn 257 258 // Abort the current transaction. All changes are disgarded. 259 // It is safe to call Abort() after calling Commit(), e.g. 260 // the following pattern is strongly encouraged to make sure 261 // write transactions are always completed: 262 // 263 // txn := db.WriteTxn(...) 264 // defer txn.Abort() 265 // ... 266 // txn.Commit() 267 Abort() 268 269 // Commit the changes in the current transaction to the target tables. 270 // This is a no-op if Abort() or Commit() has already been called. 271 // Returns a ReadTxn for reading the database at the time of commit. 272 Commit() ReadTxn 273 } 274 275 type Query[Obj any] struct { 276 index IndexName 277 key index.Key 278 } 279 280 // ByRevision constructs a revision query. Applicable to any table. 281 func ByRevision[Obj any](rev uint64) Query[Obj] { 282 return Query[Obj]{ 283 index: RevisionIndex, 284 key: index.Uint64(rev), 285 } 286 } 287 288 // Index implements the indexing of objects (FromObjects) and querying of objects from the index (FromKey) 289 type Index[Obj any, Key any] struct { 290 // Name of the index 291 Name string 292 293 // FromObject extracts key(s) from the object. The key set 294 // can contain 0, 1 or more keys. 295 FromObject func(obj Obj) index.KeySet 296 297 // FromKey converts the index key into a raw key. 298 // With this we can perform Query() against this index with 299 // the [Key] type. 300 FromKey func(key Key) index.Key 301 302 // FromString is an optional conversion from string to a raw key. 303 // If implemented allows script commands to query with this index. 304 FromString func(key string) (index.Key, error) 305 306 // Unique marks the index as unique. Primary index must always be 307 // unique. A secondary index may be non-unique in which case a single 308 // key may map to multiple objects. 309 Unique bool 310 } 311 312 var _ Indexer[struct{}] = &Index[struct{}, bool]{} 313 314 // The nolint:unused below are needed due to linter not seeing 315 // the use-sites due to generics. 316 317 //nolint:unused 318 func (i Index[Key, Obj]) indexName() string { 319 return i.Name 320 } 321 322 //nolint:unused 323 func (i Index[Obj, Key]) fromObject(obj Obj) index.KeySet { 324 return i.FromObject(obj) 325 } 326 327 var errFromStringNil = errors.New("FromString not defined") 328 329 //nolint:unused 330 func (i Index[Obj, Key]) fromString(s string) (index.Key, error) { 331 if i.FromString == nil { 332 return index.Key{}, errFromStringNil 333 } 334 k, err := i.FromString(s) 335 return k, err 336 } 337 338 //nolint:unused 339 func (i Index[Obj, Key]) isUnique() bool { 340 return i.Unique 341 } 342 343 // Query constructs a query against this index from a key. 344 func (i Index[Obj, Key]) Query(key Key) Query[Obj] { 345 return Query[Obj]{ 346 index: i.Name, 347 key: i.FromKey(key), 348 } 349 } 350 351 func (i Index[Obj, Key]) QueryFromObject(obj Obj) Query[Obj] { 352 return Query[Obj]{ 353 index: i.Name, 354 key: i.FromObject(obj).First(), 355 } 356 } 357 358 func (i Index[Obj, Key]) ObjectToKey(obj Obj) index.Key { 359 return i.FromObject(obj).First() 360 } 361 362 // Indexer is the "FromObject" subset of Index[Obj, Key] 363 // without the 'Key' constraint. 364 type Indexer[Obj any] interface { 365 indexName() string 366 isUnique() bool 367 fromObject(Obj) index.KeySet 368 fromString(string) (index.Key, error) 369 370 ObjectToKey(Obj) index.Key 371 QueryFromObject(Obj) Query[Obj] 372 } 373 374 // TableWritable is a constraint for objects that implement tabular 375 // pretty-printing. Used in "cilium-dbg statedb" sub-commands. 376 type TableWritable interface { 377 // TableHeader returns the header columns that are independent of the 378 // object. 379 TableHeader() []string 380 381 // TableRow returns the row columns for this object. 382 TableRow() []string 383 } 384 385 // 386 // Internal types and constants. 387 // 388 389 const ( 390 PrimaryIndexPos = 0 391 392 reservedIndexPrefix = "__" 393 RevisionIndex = "__revision__" 394 RevisionIndexPos = 1 395 GraveyardIndex = "__graveyard__" 396 GraveyardIndexPos = 2 397 GraveyardRevisionIndex = "__graveyard_revision__" 398 GraveyardRevisionIndexPos = 3 399 400 SecondaryIndexStartPos = 4 401 ) 402 403 // object is the format in which data is stored in the tables. 404 type object struct { 405 revision uint64 406 data any 407 } 408 409 // anyIndexer is an untyped indexer. The user-defined 'Index[Obj,Key]' 410 // is converted to this form. 411 type anyIndexer struct { 412 // name is the indexer name. 413 name string 414 415 // fromObject returns the key (or keys for multi-index) to index the 416 // object with. 417 fromObject func(object) index.KeySet 418 419 // fromString converts string into a key. Optional. 420 fromString func(string) (index.Key, error) 421 422 // unique if true will index the object solely on the 423 // values returned by fromObject. If false the primary 424 // key of the object will be appended to the key. 425 unique bool 426 427 // pos is the position of the index in [tableEntry.indexes] 428 pos int 429 } 430 431 type anyDeleteTracker interface { 432 setRevision(uint64) 433 getRevision() uint64 434 close() 435 } 436 437 type indexEntry struct { 438 tree *part.Tree[object] 439 txn *part.Txn[object] 440 unique bool 441 } 442 443 type tableEntry struct { 444 meta TableMeta 445 indexes []indexEntry 446 deleteTrackers *part.Tree[anyDeleteTracker] 447 revision uint64 448 pendingInitializers []string 449 initialized bool 450 initWatchChan chan struct{} 451 } 452 453 func (t *tableEntry) numObjects() int { 454 indexEntry := t.indexes[t.meta.indexPos(RevisionIndex)] 455 if indexEntry.txn != nil { 456 return indexEntry.txn.Len() 457 } 458 return indexEntry.tree.Len() 459 } 460 461 func (t *tableEntry) numDeletedObjects() int { 462 indexEntry := t.indexes[t.meta.indexPos(GraveyardIndex)] 463 if indexEntry.txn != nil { 464 return indexEntry.txn.Len() 465 } 466 return indexEntry.tree.Len() 467 }