github.com/cilium/statedb@v0.3.2/types.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package statedb
     5  
     6  import (
     7  	"errors"
     8  	"io"
     9  	"iter"
    10  
    11  	"github.com/cilium/statedb/index"
    12  	"github.com/cilium/statedb/internal"
    13  	"github.com/cilium/statedb/part"
    14  )
    15  
    16  type (
    17  	TableName = string
    18  	IndexName = string
    19  	Revision  = uint64
    20  )
    21  
    22  // Table provides methods for querying the contents of a table.
    23  type Table[Obj any] interface {
    24  	// TableMeta for querying table metadata that is independent of
    25  	// 'Obj' type.
    26  	TableMeta
    27  
    28  	// PrimaryIndexer returns the primary indexer for the table.
    29  	// Useful for generic utilities that need access to the primary key.
    30  	PrimaryIndexer() Indexer[Obj]
    31  
    32  	// All returns a sequence of all objects in the table.
    33  	All(ReadTxn) iter.Seq2[Obj, Revision]
    34  
    35  	// AllWatch returns a sequence of all objects in the table and a watch
    36  	// channel that is closed when the table changes.
    37  	AllWatch(ReadTxn) (iter.Seq2[Obj, Revision], <-chan struct{})
    38  
    39  	// List returns sequence of objects matching the given query.
    40  	List(ReadTxn, Query[Obj]) iter.Seq2[Obj, Revision]
    41  
    42  	// ListWatch returns an iterator for all objects matching the given query
    43  	// and a watch channel that is closed if the query results are
    44  	// invalidated by a write to the table.
    45  	ListWatch(ReadTxn, Query[Obj]) (iter.Seq2[Obj, Revision], <-chan struct{})
    46  
    47  	// Get returns the first matching object for the query.
    48  	Get(ReadTxn, Query[Obj]) (obj Obj, rev Revision, found bool)
    49  
    50  	// GetWatch return the first matching object and a watch channel
    51  	// that is closed if the query is invalidated.
    52  	GetWatch(ReadTxn, Query[Obj]) (obj Obj, rev Revision, watch <-chan struct{}, found bool)
    53  
    54  	// LowerBound returns an iterator for objects that have a key
    55  	// greater or equal to the query.
    56  	LowerBound(ReadTxn, Query[Obj]) iter.Seq2[Obj, Revision]
    57  
    58  	// LowerBoundWatch returns an iterator for objects that have a key
    59  	// greater or equal to the query. The returned watch channel is closed
    60  	// when anything in the table changes as more fine-grained notifications
    61  	// are not possible with a lower bound search.
    62  	LowerBoundWatch(ReadTxn, Query[Obj]) (seq iter.Seq2[Obj, Revision], watch <-chan struct{})
    63  
    64  	// Prefix searches the table by key prefix.
    65  	Prefix(ReadTxn, Query[Obj]) iter.Seq2[Obj, Revision]
    66  
    67  	// PrefixWatch searches the table by key prefix. Returns an iterator and a watch
    68  	// channel that closes when the query results have become stale.
    69  	PrefixWatch(ReadTxn, Query[Obj]) (seq iter.Seq2[Obj, Revision], watch <-chan struct{})
    70  
    71  	// Changes returns an iterator for changes happening to the table.
    72  	// This uses the revision index to iterate over the objects in the order
    73  	// they have changed. Deleted objects are placed onto a temporary index
    74  	// (graveyard) where they live until all change iterators have observed
    75  	// the deletion.
    76  	//
    77  	// If an object is created and deleted before the observer has iterated
    78  	// over the creation then only the deletion is seen.
    79  	Changes(WriteTxn) (ChangeIterator[Obj], error)
    80  }
    81  
    82  // Change is either an update or a delete of an object. Used by Changes() and
    83  // the Observable().
    84  // The 'Revision' is carried also in the Change object so that it is also accessible
    85  // via Observable.
    86  type Change[Obj any] struct {
    87  	Object   Obj      `json:"obj"`
    88  	Revision Revision `json:"rev"`
    89  	Deleted  bool     `json:"deleted,omitempty"`
    90  }
    91  
    92  type ChangeIterator[Obj any] interface {
    93  	// Next returns the sequence of unobserved changes up to the given ReadTxn (snapshot) and
    94  	// a watch channel.
    95  	//
    96  	// If changes are available Next returns a closed watch channel. Only once there are no further
    97  	// changes available will a proper watch channel be returned.
    98  	//
    99  	// Next can be called again without fully consuming the sequence to pull in new changes.
   100  	//
   101  	// The returned sequence is a single-use sequence and subsequent calls will return
   102  	// an empty sequence.
   103  	//
   104  	// If the transaction given to Next is a WriteTxn the modifications made in the
   105  	// transaction are not observed, that is, only committed changes can be observed.
   106  	Next(ReadTxn) (iter.Seq2[Change[Obj], Revision], <-chan struct{})
   107  }
   108  
   109  // RWTable provides methods for modifying the table under a write transaction
   110  // that targets this table.
   111  type RWTable[Obj any] interface {
   112  	// RWTable[Obj] is a superset of Table[Obj]. Queries made with a
   113  	// write transaction return the fresh uncommitted modifications if any.
   114  	Table[Obj]
   115  
   116  	// RegisterInitializer registers an initializer to the table. Returns
   117  	// a function to mark the initializer done. Once all initializers are
   118  	// done, Table[*].Initialized() will return true.
   119  	// This should only be used before the application has started.
   120  	RegisterInitializer(txn WriteTxn, name string) func(WriteTxn)
   121  
   122  	// ToTable returns the Table[Obj] interface. Useful with cell.Provide
   123  	// to avoid the anonymous function:
   124  	//
   125  	//   cell.ProvidePrivate(NewMyTable), // RWTable
   126  	//   cell.Invoke(statedb.Register[statedb.RWTable[Foo])
   127  	//
   128  	//   // with anononymous function:
   129  	//   cell.Provide(func(t statedb.RWTable[Foo]) statedb.Table[Foo] { return t })
   130  	//
   131  	//   // with ToTable:
   132  	//   cell.Provide(statedb.RWTable[Foo].ToTable),
   133  	ToTable() Table[Obj]
   134  
   135  	// Insert an object into the table. Returns the object that was
   136  	// replaced if there was one.
   137  	//
   138  	// Possible errors:
   139  	// - ErrTableNotLockedForWriting: table was not locked for writing
   140  	// - ErrTransactionClosed: the write transaction already committed or aborted
   141  	//
   142  	// Each inserted or updated object will be assigned a new unique
   143  	// revision.
   144  	Insert(WriteTxn, Obj) (oldObj Obj, hadOld bool, err error)
   145  
   146  	// Modify an existing object or insert a new object into the table. If an old object
   147  	// exists the [merge] function is called with the old and new objects.
   148  	//
   149  	// Modify is semantically equal to Get + Insert, but avoids extra lookups making
   150  	// it significantly more efficient.
   151  	//
   152  	// Possible errors:
   153  	// - ErrTableNotLockedForWriting: table was not locked for writing
   154  	// - ErrTransactionClosed: the write transaction already committed or aborted
   155  	Modify(txn WriteTxn, new Obj, merge func(old, new Obj) Obj) (oldObj Obj, hadOld bool, err error)
   156  
   157  	// CompareAndSwap compares the existing object's revision against the
   158  	// given revision and if equal it replaces the object.
   159  	//
   160  	// Possible errors:
   161  	// - ErrRevisionNotEqual: the object has mismatching revision
   162  	// - ErrObjectNotFound: object not found from the table
   163  	// - ErrTableNotLockedForWriting: table was not locked for writing
   164  	// - ErrTransactionClosed: the write transaction already committed or aborted
   165  	CompareAndSwap(WriteTxn, Revision, Obj) (oldObj Obj, hadOld bool, err error)
   166  
   167  	// Delete an object from the table. Returns the object that was
   168  	// deleted if there was one.
   169  	//
   170  	// If the table is being tracked for deletions via EventIterator()
   171  	// the deleted object is inserted into a graveyard index and garbage
   172  	// collected when all delete trackers have consumed it. Each deleted
   173  	// object in the graveyard has unique revision allowing interleaved
   174  	// iteration of updates and deletions.
   175  	//
   176  	// Possible errors:
   177  	// - ErrTableNotLockedForWriting: table was not locked for writing
   178  	// - ErrTransactionClosed: the write transaction already committed or aborted
   179  	Delete(WriteTxn, Obj) (oldObj Obj, hadOld bool, err error)
   180  
   181  	// DeleteAll removes all objects in the table. Semantically the same as
   182  	// All() + Delete(). See Delete() for more information.
   183  	//
   184  	// Possible errors:
   185  	// - ErrTableNotLockedForWriting: table was not locked for writing
   186  	// - ErrTransactionClosed: the write transaction already committed or aborted
   187  	DeleteAll(WriteTxn) error
   188  
   189  	// CompareAndDelete compares the existing object's revision against the
   190  	// given revision and if equal it deletes the object. If object is not
   191  	// found 'hadOld' will be false and 'err' nil.
   192  	//
   193  	// Possible errors:
   194  	// - ErrRevisionNotEqual: the object has mismatching revision
   195  	// - ErrTableNotLockedForWriting: table was not locked for writing
   196  	// - ErrTransactionClosed: the write transaction already committed or aborted
   197  	CompareAndDelete(WriteTxn, Revision, Obj) (oldObj Obj, hadOld bool, err error)
   198  }
   199  
   200  // TableMeta provides information about the table that is independent of
   201  // the object type (the 'Obj' constraint).
   202  type TableMeta interface {
   203  	// Name returns the name of the table
   204  	Name() TableName
   205  
   206  	// Indexes returns the names of the indexes
   207  	Indexes() []string
   208  
   209  	// NumObjects returns the number of objects stored in the table.
   210  	NumObjects(ReadTxn) int
   211  
   212  	// Initialized returns true if in this ReadTxn (snapshot of the database)
   213  	// the registered initializers have all been completed. The returned
   214  	// watch channel will be closed when the table becomes initialized.
   215  	Initialized(ReadTxn) (bool, <-chan struct{})
   216  
   217  	// PendingInitializers returns the set of pending initializers that
   218  	// have not yet completed.
   219  	PendingInitializers(ReadTxn) []string
   220  
   221  	// Revision of the table. Constant for a read transaction, but
   222  	// increments in a write transaction on each Insert and Delete.
   223  	Revision(ReadTxn) Revision
   224  
   225  	// Internal unexported methods used only internally.
   226  	tableInternal
   227  }
   228  
   229  type tableInternal interface {
   230  	tableEntry() tableEntry
   231  	tablePos() int
   232  	setTablePos(int)
   233  	indexPos(string) int
   234  	tableKey() []byte // The radix key for the table in the root tree
   235  	getIndexer(name string) *anyIndexer
   236  	primary() anyIndexer                   // The untyped primary indexer for the table
   237  	secondary() map[string]anyIndexer      // Secondary indexers (if any)
   238  	sortableMutex() internal.SortableMutex // The sortable mutex for locking the table for writing
   239  	anyChanges(txn WriteTxn) (anyChangeIterator, error)
   240  	proto() any                             // Returns the zero value of 'Obj', e.g. the prototype
   241  	unmarshalYAML(data []byte) (any, error) // Unmarshal the data into 'Obj'
   242  	numDeletedObjects(txn ReadTxn) int      // Number of objects in graveyard
   243  	acquired(*txn)
   244  	getAcquiredInfo() string
   245  }
   246  
   247  type ReadTxn interface {
   248  	getTxn() *txn
   249  
   250  	// WriteJSON writes the contents of the database as JSON.
   251  	WriteJSON(w io.Writer, tables ...string) error
   252  }
   253  
   254  type WriteTxn interface {
   255  	// WriteTxn is always also a ReadTxn
   256  	ReadTxn
   257  
   258  	// Abort the current transaction. All changes are disgarded.
   259  	// It is safe to call Abort() after calling Commit(), e.g.
   260  	// the following pattern is strongly encouraged to make sure
   261  	// write transactions are always completed:
   262  	//
   263  	//  txn := db.WriteTxn(...)
   264  	//  defer txn.Abort()
   265  	//  ...
   266  	//  txn.Commit()
   267  	Abort()
   268  
   269  	// Commit the changes in the current transaction to the target tables.
   270  	// This is a no-op if Abort() or Commit() has already been called.
   271  	// Returns a ReadTxn for reading the database at the time of commit.
   272  	Commit() ReadTxn
   273  }
   274  
   275  type Query[Obj any] struct {
   276  	index IndexName
   277  	key   index.Key
   278  }
   279  
   280  // ByRevision constructs a revision query. Applicable to any table.
   281  func ByRevision[Obj any](rev uint64) Query[Obj] {
   282  	return Query[Obj]{
   283  		index: RevisionIndex,
   284  		key:   index.Uint64(rev),
   285  	}
   286  }
   287  
   288  // Index implements the indexing of objects (FromObjects) and querying of objects from the index (FromKey)
   289  type Index[Obj any, Key any] struct {
   290  	// Name of the index
   291  	Name string
   292  
   293  	// FromObject extracts key(s) from the object. The key set
   294  	// can contain 0, 1 or more keys.
   295  	FromObject func(obj Obj) index.KeySet
   296  
   297  	// FromKey converts the index key into a raw key.
   298  	// With this we can perform Query() against this index with
   299  	// the [Key] type.
   300  	FromKey func(key Key) index.Key
   301  
   302  	// FromString is an optional conversion from string to a raw key.
   303  	// If implemented allows script commands to query with this index.
   304  	FromString func(key string) (index.Key, error)
   305  
   306  	// Unique marks the index as unique. Primary index must always be
   307  	// unique. A secondary index may be non-unique in which case a single
   308  	// key may map to multiple objects.
   309  	Unique bool
   310  }
   311  
   312  var _ Indexer[struct{}] = &Index[struct{}, bool]{}
   313  
   314  // The nolint:unused below are needed due to linter not seeing
   315  // the use-sites due to generics.
   316  
   317  //nolint:unused
   318  func (i Index[Key, Obj]) indexName() string {
   319  	return i.Name
   320  }
   321  
   322  //nolint:unused
   323  func (i Index[Obj, Key]) fromObject(obj Obj) index.KeySet {
   324  	return i.FromObject(obj)
   325  }
   326  
   327  var errFromStringNil = errors.New("FromString not defined")
   328  
   329  //nolint:unused
   330  func (i Index[Obj, Key]) fromString(s string) (index.Key, error) {
   331  	if i.FromString == nil {
   332  		return index.Key{}, errFromStringNil
   333  	}
   334  	k, err := i.FromString(s)
   335  	return k, err
   336  }
   337  
   338  //nolint:unused
   339  func (i Index[Obj, Key]) isUnique() bool {
   340  	return i.Unique
   341  }
   342  
   343  // Query constructs a query against this index from a key.
   344  func (i Index[Obj, Key]) Query(key Key) Query[Obj] {
   345  	return Query[Obj]{
   346  		index: i.Name,
   347  		key:   i.FromKey(key),
   348  	}
   349  }
   350  
   351  func (i Index[Obj, Key]) QueryFromObject(obj Obj) Query[Obj] {
   352  	return Query[Obj]{
   353  		index: i.Name,
   354  		key:   i.FromObject(obj).First(),
   355  	}
   356  }
   357  
   358  func (i Index[Obj, Key]) ObjectToKey(obj Obj) index.Key {
   359  	return i.FromObject(obj).First()
   360  }
   361  
   362  // Indexer is the "FromObject" subset of Index[Obj, Key]
   363  // without the 'Key' constraint.
   364  type Indexer[Obj any] interface {
   365  	indexName() string
   366  	isUnique() bool
   367  	fromObject(Obj) index.KeySet
   368  	fromString(string) (index.Key, error)
   369  
   370  	ObjectToKey(Obj) index.Key
   371  	QueryFromObject(Obj) Query[Obj]
   372  }
   373  
   374  // TableWritable is a constraint for objects that implement tabular
   375  // pretty-printing. Used in "cilium-dbg statedb" sub-commands.
   376  type TableWritable interface {
   377  	// TableHeader returns the header columns that are independent of the
   378  	// object.
   379  	TableHeader() []string
   380  
   381  	// TableRow returns the row columns for this object.
   382  	TableRow() []string
   383  }
   384  
   385  //
   386  // Internal types and constants.
   387  //
   388  
   389  const (
   390  	PrimaryIndexPos = 0
   391  
   392  	reservedIndexPrefix       = "__"
   393  	RevisionIndex             = "__revision__"
   394  	RevisionIndexPos          = 1
   395  	GraveyardIndex            = "__graveyard__"
   396  	GraveyardIndexPos         = 2
   397  	GraveyardRevisionIndex    = "__graveyard_revision__"
   398  	GraveyardRevisionIndexPos = 3
   399  
   400  	SecondaryIndexStartPos = 4
   401  )
   402  
   403  // object is the format in which data is stored in the tables.
   404  type object struct {
   405  	revision uint64
   406  	data     any
   407  }
   408  
   409  // anyIndexer is an untyped indexer. The user-defined 'Index[Obj,Key]'
   410  // is converted to this form.
   411  type anyIndexer struct {
   412  	// name is the indexer name.
   413  	name string
   414  
   415  	// fromObject returns the key (or keys for multi-index) to index the
   416  	// object with.
   417  	fromObject func(object) index.KeySet
   418  
   419  	// fromString converts string into a key. Optional.
   420  	fromString func(string) (index.Key, error)
   421  
   422  	// unique if true will index the object solely on the
   423  	// values returned by fromObject. If false the primary
   424  	// key of the object will be appended to the key.
   425  	unique bool
   426  
   427  	// pos is the position of the index in [tableEntry.indexes]
   428  	pos int
   429  }
   430  
   431  type anyDeleteTracker interface {
   432  	setRevision(uint64)
   433  	getRevision() uint64
   434  	close()
   435  }
   436  
   437  type indexEntry struct {
   438  	tree   *part.Tree[object]
   439  	txn    *part.Txn[object]
   440  	unique bool
   441  }
   442  
   443  type tableEntry struct {
   444  	meta                TableMeta
   445  	indexes             []indexEntry
   446  	deleteTrackers      *part.Tree[anyDeleteTracker]
   447  	revision            uint64
   448  	pendingInitializers []string
   449  	initialized         bool
   450  	initWatchChan       chan struct{}
   451  }
   452  
   453  func (t *tableEntry) numObjects() int {
   454  	indexEntry := t.indexes[t.meta.indexPos(RevisionIndex)]
   455  	if indexEntry.txn != nil {
   456  		return indexEntry.txn.Len()
   457  	}
   458  	return indexEntry.tree.Len()
   459  }
   460  
   461  func (t *tableEntry) numDeletedObjects() int {
   462  	indexEntry := t.indexes[t.meta.indexPos(GraveyardIndex)]
   463  	if indexEntry.txn != nil {
   464  		return indexEntry.txn.Len()
   465  	}
   466  	return indexEntry.tree.Len()
   467  }