github.com/cilium/statedb@v0.3.2/part/txn.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package part
     5  
     6  import (
     7  	"bytes"
     8  )
     9  
    10  // Txn is a transaction against a tree. It allows doing efficient
    11  // modifications to a tree by caching and reusing cloned nodes.
    12  type Txn[T any] struct {
    13  	// tree is the tree being modified
    14  	Tree[T]
    15  
    16  	// mutated is the set of nodes mutated in this transaction
    17  	// that we can keep mutating without cloning them again.
    18  	// It is cleared if the transaction is cloned or iterated
    19  	// upon.
    20  	mutated nodeMutated[T]
    21  
    22  	// watches contains the channels of cloned nodes that should be closed
    23  	// when transaction is committed.
    24  	watches map[chan struct{}]struct{}
    25  
    26  	// deleteParentsCache keeps the last allocated slice to avoid
    27  	// reallocating it on every deletion.
    28  	deleteParentsCache []deleteParent[T]
    29  }
    30  
    31  // Len returns the number of objects in the tree.
    32  func (txn *Txn[T]) Len() int {
    33  	return txn.size
    34  }
    35  
    36  // Clone returns a clone of the transaction. The clone is unaffected
    37  // by any future changes done with the original transaction.
    38  func (txn *Txn[T]) Clone() *Txn[T] {
    39  	// Clear the mutated nodes so that the returned clone won't be changed by
    40  	// further modifications in this transaction.
    41  	txn.mutated.clear()
    42  	return &Txn[T]{
    43  		Tree:               txn.Tree,
    44  		watches:            map[chan struct{}]struct{}{},
    45  		deleteParentsCache: nil,
    46  	}
    47  }
    48  
    49  // Insert or update the tree with the given key and value.
    50  // Returns the old value if it exists.
    51  func (txn *Txn[T]) Insert(key []byte, value T) (old T, hadOld bool) {
    52  	old, hadOld, txn.root = txn.insert(txn.root, key, value)
    53  	if !hadOld {
    54  		txn.size++
    55  	}
    56  	return
    57  }
    58  
    59  // Modify a value in the tree. If the key does not exist the modify
    60  // function is called with the zero value for T. It is up to the
    61  // caller to not mutate the value in-place and to return a clone.
    62  // Returns the old value if it exists.
    63  func (txn *Txn[T]) Modify(key []byte, mod func(T) T) (old T, hadOld bool) {
    64  	old, hadOld, txn.root = txn.modify(txn.root, key, mod)
    65  	if !hadOld {
    66  		txn.size++
    67  	}
    68  	return
    69  }
    70  
    71  // Delete the given key from the tree.
    72  // Returns the old value if it exists.
    73  func (txn *Txn[T]) Delete(key []byte) (old T, hadOld bool) {
    74  	old, hadOld, txn.root = txn.delete(txn.root, key)
    75  	if hadOld {
    76  		txn.size--
    77  	}
    78  	return
    79  }
    80  
    81  // RootWatch returns a watch channel for the root of the tree.
    82  // Since this is the channel associated with the root, this closes
    83  // when there are any changes to the tree.
    84  func (txn *Txn[T]) RootWatch() <-chan struct{} {
    85  	return txn.root.watch
    86  }
    87  
    88  // Get fetches the value associated with the given key.
    89  // Returns the value, a watch channel (which is closed on
    90  // modification to the key) and boolean which is true if
    91  // value was found.
    92  func (txn *Txn[T]) Get(key []byte) (T, <-chan struct{}, bool) {
    93  	value, watch, ok := search(txn.root, key)
    94  	if txn.opts.rootOnlyWatch {
    95  		watch = txn.root.watch
    96  	}
    97  	return value, watch, ok
    98  }
    99  
   100  // Prefix returns an iterator for all objects that starts with the
   101  // given prefix, and a channel that closes when any objects matching
   102  // the given prefix are upserted or deleted.
   103  func (txn *Txn[T]) Prefix(key []byte) (*Iterator[T], <-chan struct{}) {
   104  	txn.mutated.clear()
   105  	iter, watch := prefixSearch(txn.root, key)
   106  	if txn.opts.rootOnlyWatch {
   107  		watch = txn.root.watch
   108  	}
   109  	return iter, watch
   110  }
   111  
   112  // LowerBound returns an iterator for all objects that have a
   113  // key equal or higher than the given 'key'.
   114  func (txn *Txn[T]) LowerBound(key []byte) *Iterator[T] {
   115  	txn.mutated.clear()
   116  	return lowerbound(txn.root, key)
   117  }
   118  
   119  // Iterator returns an iterator for all objects.
   120  func (txn *Txn[T]) Iterator() *Iterator[T] {
   121  	txn.mutated.clear()
   122  	return newIterator[T](txn.root)
   123  }
   124  
   125  // Commit the transaction and produce the new tree.
   126  func (txn *Txn[T]) Commit() *Tree[T] {
   127  	txn.mutated.clear()
   128  	for ch := range txn.watches {
   129  		close(ch)
   130  	}
   131  	txn.watches = nil
   132  	return &Tree[T]{txn.opts, txn.root, txn.size}
   133  }
   134  
   135  // CommitOnly the transaction, but do not close the
   136  // watch channels. Returns the new tree.
   137  // To close the watch channels call Notify().
   138  func (txn *Txn[T]) CommitOnly() *Tree[T] {
   139  	txn.mutated.clear()
   140  	return &Tree[T]{txn.opts, txn.root, txn.size}
   141  }
   142  
   143  // Notify closes the watch channels of nodes that were
   144  // mutated as part of this transaction.
   145  func (txn *Txn[T]) Notify() {
   146  	for ch := range txn.watches {
   147  		close(ch)
   148  	}
   149  	txn.watches = nil
   150  }
   151  
   152  // PrintTree to the standard output. For debugging.
   153  func (txn *Txn[T]) PrintTree() {
   154  	txn.root.printTree(0)
   155  }
   156  
   157  func (txn *Txn[T]) cloneNode(n *header[T]) *header[T] {
   158  	if txn.mutated.exists(n) {
   159  		return n
   160  	}
   161  	if n.watch != nil {
   162  		txn.watches[n.watch] = struct{}{}
   163  	}
   164  	n = n.clone(!txn.opts.rootOnlyWatch || n == txn.root)
   165  	txn.mutated.put(n)
   166  	return n
   167  }
   168  
   169  func (txn *Txn[T]) insert(root *header[T], key []byte, value T) (oldValue T, hadOld bool, newRoot *header[T]) {
   170  	return txn.modify(root, key, func(_ T) T { return value })
   171  }
   172  
   173  func (txn *Txn[T]) modify(root *header[T], key []byte, mod func(T) T) (oldValue T, hadOld bool, newRoot *header[T]) {
   174  	fullKey := key
   175  
   176  	this := root
   177  	thisp := &newRoot
   178  
   179  	// Try to insert the key into the tree. If we find a free slot into which to insert
   180  	// it, we do it and return. If an existing node exists where the key should go, then
   181  	// we stop. 'this' points to that node, and 'thisp' to its memory location. It has
   182  	// not been cloned.
   183  	for {
   184  		if this.isLeaf() {
   185  			// We've reached a leaf node, cannot go further.
   186  			break
   187  		}
   188  
   189  		if !bytes.HasPrefix(key, this.prefix) {
   190  			break
   191  		}
   192  
   193  		// Prefix matched. Consume it and go further.
   194  		key = key[len(this.prefix):]
   195  		if len(key) == 0 {
   196  			// Our key matches this node.
   197  			break
   198  		}
   199  
   200  		child, idx := this.findIndex(key[0])
   201  		if child == nil {
   202  			// We've found a free slot where to insert the key.
   203  			if this.size()+1 > this.cap() {
   204  				// Node too small, promote it to the next size.
   205  				if this.watch != nil {
   206  					txn.watches[this.watch] = struct{}{}
   207  				}
   208  				this = this.promote(!txn.opts.rootOnlyWatch || this == newRoot)
   209  				txn.mutated.put(this)
   210  			} else {
   211  				// Node is big enough, clone it so we can mutate it
   212  				this = txn.cloneNode(this)
   213  			}
   214  			var zero T
   215  			this.insert(idx, newLeaf(txn.opts, key, fullKey, mod(zero)).self())
   216  			*thisp = this
   217  			return
   218  		}
   219  
   220  		// Clone the parent so we can modify it
   221  		this = txn.cloneNode(this)
   222  		*thisp = this
   223  		// And recurse into the child
   224  		thisp = &this.children()[idx]
   225  		this = *thisp
   226  	}
   227  
   228  	// A node exists where we wanted to insert the key.
   229  	// 'this' points to it, and 'thisp' is its memory location. The parents
   230  	// have been cloned.
   231  	switch {
   232  	case this.isLeaf():
   233  		common := commonPrefix(key, this.prefix)
   234  		if len(common) == len(this.prefix) && len(common) == len(key) {
   235  			// Exact match, clone and update the value.
   236  			oldValue = this.getLeaf().value
   237  			hadOld = true
   238  			this = txn.cloneNode(this)
   239  			*thisp = this
   240  			this.getLeaf().value = mod(oldValue)
   241  		} else {
   242  			// Partially matching prefix.
   243  			newNode := &node4[T]{
   244  				header: header[T]{prefix: common},
   245  			}
   246  			newNode.setKind(nodeKind4)
   247  
   248  			// Make a shallow copy of the leaf. But keep its watch channel
   249  			// intact since we're only manipulating its prefix.
   250  			oldLeafCopy := *this.getLeaf()
   251  			oldLeaf := &oldLeafCopy
   252  			oldLeaf.prefix = oldLeaf.prefix[len(common):]
   253  			key = key[len(common):]
   254  			var zero T
   255  			newLeaf := newLeaf(txn.opts, key, fullKey, mod(zero))
   256  
   257  			// Insert the two leaves into the node we created. If one has
   258  			// a key that is a subset of the other, then we can insert them
   259  			// as a leaf of the node4, otherwise they become children.
   260  			switch {
   261  			case len(oldLeaf.prefix) == 0:
   262  				oldLeaf.prefix = common
   263  				newNode.setLeaf(oldLeaf)
   264  				newNode.children[0] = newLeaf.self()
   265  				newNode.keys[0] = newLeaf.prefix[0]
   266  				newNode.setSize(1)
   267  
   268  			case len(key) == 0:
   269  				newLeaf.prefix = common
   270  				newNode.setLeaf(newLeaf)
   271  				newNode.children[0] = oldLeaf.self()
   272  				newNode.keys[0] = oldLeaf.prefix[0]
   273  				newNode.setSize(1)
   274  
   275  			case oldLeaf.prefix[0] < key[0]:
   276  				newNode.children[0] = oldLeaf.self()
   277  				newNode.keys[0] = oldLeaf.prefix[0]
   278  				newNode.children[1] = newLeaf.self()
   279  				newNode.keys[1] = key[0]
   280  				newNode.setSize(2)
   281  
   282  			default:
   283  				newNode.children[0] = newLeaf.self()
   284  				newNode.keys[0] = key[0]
   285  				newNode.children[1] = oldLeaf.self()
   286  				newNode.keys[1] = oldLeaf.prefix[0]
   287  				newNode.setSize(2)
   288  			}
   289  			*thisp = newNode.self()
   290  		}
   291  	case len(key) == 0:
   292  		// Exact match, but not a leaf node
   293  		this = txn.cloneNode(this)
   294  		*thisp = this
   295  		if leaf := this.getLeaf(); leaf != nil {
   296  			// Replace the existing leaf
   297  			oldValue = leaf.value
   298  			hadOld = true
   299  			leaf = txn.cloneNode(leaf.self()).getLeaf()
   300  			leaf.value = mod(oldValue)
   301  			this.setLeaf(leaf)
   302  		} else {
   303  			// Set the leaf
   304  			var zero T
   305  			this.setLeaf(newLeaf(txn.opts, this.prefix, fullKey, mod(zero)))
   306  		}
   307  
   308  	default:
   309  		// Partially matching prefix, non-leaf node.
   310  		common := commonPrefix(key, this.prefix)
   311  
   312  		this = txn.cloneNode(this)
   313  		*thisp = this
   314  		this.prefix = this.prefix[len(common):]
   315  		key = key[len(common):]
   316  
   317  		var zero T
   318  		newLeaf := newLeaf(txn.opts, key, fullKey, mod(zero))
   319  		newNode := &node4[T]{
   320  			header: header[T]{prefix: common},
   321  		}
   322  		newNode.setKind(nodeKind4)
   323  
   324  		switch {
   325  		case len(key) == 0:
   326  			newLeaf.prefix = common
   327  			newNode.setLeaf(newLeaf)
   328  			newNode.children[0] = this
   329  			newNode.keys[0] = this.prefix[0]
   330  			newNode.setSize(1)
   331  
   332  		case this.prefix[0] < key[0]:
   333  			newNode.children[0] = this
   334  			newNode.keys[0] = this.prefix[0]
   335  			newNode.children[1] = newLeaf.self()
   336  			newNode.keys[1] = key[0]
   337  			newNode.setSize(2)
   338  		default:
   339  			newNode.children[0] = newLeaf.self()
   340  			newNode.keys[0] = key[0]
   341  			newNode.children[1] = this
   342  			newNode.keys[1] = this.prefix[0]
   343  			newNode.setSize(2)
   344  		}
   345  		*thisp = newNode.self()
   346  	}
   347  	return
   348  }
   349  
   350  // deleteParent tracks a node on the path to the target node that is being
   351  // deleted.
   352  type deleteParent[T any] struct {
   353  	node  *header[T]
   354  	index int // the index of this node at its parent
   355  }
   356  
   357  func (txn *Txn[T]) delete(root *header[T], key []byte) (oldValue T, hadOld bool, newRoot *header[T]) {
   358  	// Reuse the same slice in the transaction to hold the parents in order to avoid
   359  	// allocations. Pre-allocate 32 levels to cover most of the use-cases without
   360  	// reallocation.
   361  	if txn.deleteParentsCache == nil {
   362  		txn.deleteParentsCache = make([]deleteParent[T], 0, 32)
   363  	}
   364  	parents := txn.deleteParentsCache[:1] // Placeholder for root
   365  
   366  	newRoot = root
   367  	this := root
   368  
   369  	// Find the target node and record the path to it.
   370  	var leaf *leaf[T]
   371  	for {
   372  		if bytes.HasPrefix(key, this.prefix) {
   373  			key = key[len(this.prefix):]
   374  			if len(key) == 0 {
   375  				leaf = this.getLeaf()
   376  				if leaf == nil {
   377  					return
   378  				}
   379  				// Target node found!
   380  				break
   381  			}
   382  			var idx int
   383  			this, idx = this.findIndex(key[0])
   384  			if this == nil {
   385  				return
   386  			}
   387  			parents = append(parents, deleteParent[T]{this, idx})
   388  		} else {
   389  			// Reached a node with a different prefix, so node not found.
   390  			return
   391  		}
   392  	}
   393  
   394  	oldValue = leaf.value
   395  	hadOld = true
   396  
   397  	// Mark the watch channel of the target for closing if not mutated already.
   398  	if leaf.watch != nil {
   399  		txn.watches[leaf.watch] = struct{}{}
   400  	}
   401  
   402  	if this == root {
   403  		// Target is the root, clear it.
   404  		if root.isLeaf() || newRoot.size() == 0 {
   405  			// Replace leaf or empty root with a node4
   406  			newRoot = newNode4[T]()
   407  		} else {
   408  			newRoot = txn.cloneNode(root)
   409  			newRoot.setLeaf(nil)
   410  		}
   411  		return
   412  	}
   413  
   414  	// The target was found, rebuild the tree from the root upwards.
   415  	parents[0].node = root
   416  
   417  	for i := len(parents) - 1; i > 0; i-- {
   418  		parent := &parents[i-1]
   419  		target := &parents[i]
   420  
   421  		// Clone the parent to mutate it.
   422  		parent.node = txn.cloneNode(parent.node)
   423  		children := parent.node.children()
   424  
   425  		if target.node == this && target.node.size() > 0 {
   426  			// This is the node that we want to delete, but it has
   427  			// children. Clone and clear the leaf.
   428  			target.node = txn.cloneNode(target.node)
   429  			target.node.setLeaf(nil)
   430  			children[target.index] = target.node
   431  		} else if target.node.size() == 0 && (target.node == this || target.node.getLeaf() == nil) {
   432  			// The node is empty, remove it from the parent.
   433  			parent.node.remove(target.index)
   434  		} else {
   435  			// Update the target (as it may have been cloned)
   436  			children[target.index] = target.node
   437  		}
   438  
   439  		if parent.node.size() > 0 {
   440  			// Check if the node should be demoted.
   441  			// To avoid thrashing we don't demote at the boundary, but at a slightly
   442  			// smaller size.
   443  			// TODO: Can we avoid the initial clone of parent.node?
   444  			var newNode *header[T]
   445  			switch {
   446  			case parent.node.kind() == nodeKind256 && parent.node.size() <= 37:
   447  				newNode = (&node48[T]{header: *parent.node}).self()
   448  				newNode.setKind(nodeKind48)
   449  				n48 := newNode.node48()
   450  				n48.leaf = parent.node.getLeaf()
   451  				children := n48.children[:0]
   452  				for k, n := range parent.node.node256().children[:] {
   453  					if n != nil {
   454  						n48.index[k] = int8(len(children))
   455  						children = append(children, n)
   456  					}
   457  				}
   458  			case parent.node.kind() == nodeKind48 && parent.node.size() <= 12:
   459  				newNode = (&node16[T]{header: *parent.node}).self()
   460  				newNode.setKind(nodeKind16)
   461  				copy(newNode.children()[:], parent.node.children())
   462  				n16 := newNode.node16()
   463  				n16.leaf = parent.node.getLeaf()
   464  				size := n16.size()
   465  				for i := 0; i < size; i++ {
   466  					n16.keys[i] = n16.children[i].prefix[0]
   467  				}
   468  			case parent.node.kind() == nodeKind16 && parent.node.size() <= 3:
   469  				newNode = (&node4[T]{header: *parent.node}).self()
   470  				newNode.setKind(nodeKind4)
   471  				n16 := parent.node.node16()
   472  				size := n16.size()
   473  				n4 := newNode.node4()
   474  				n4.leaf = n16.leaf
   475  				copy(n4.children[:], n16.children[:size])
   476  				copy(n4.keys[:], n16.keys[:size])
   477  			}
   478  			if newNode != nil {
   479  				parent.node = newNode
   480  			}
   481  		}
   482  	}
   483  	newRoot = parents[0].node
   484  	return
   485  }