github.com/cilium/statedb@v0.3.2/part/txn.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package part 5 6 import ( 7 "bytes" 8 ) 9 10 // Txn is a transaction against a tree. It allows doing efficient 11 // modifications to a tree by caching and reusing cloned nodes. 12 type Txn[T any] struct { 13 // tree is the tree being modified 14 Tree[T] 15 16 // mutated is the set of nodes mutated in this transaction 17 // that we can keep mutating without cloning them again. 18 // It is cleared if the transaction is cloned or iterated 19 // upon. 20 mutated nodeMutated[T] 21 22 // watches contains the channels of cloned nodes that should be closed 23 // when transaction is committed. 24 watches map[chan struct{}]struct{} 25 26 // deleteParentsCache keeps the last allocated slice to avoid 27 // reallocating it on every deletion. 28 deleteParentsCache []deleteParent[T] 29 } 30 31 // Len returns the number of objects in the tree. 32 func (txn *Txn[T]) Len() int { 33 return txn.size 34 } 35 36 // Clone returns a clone of the transaction. The clone is unaffected 37 // by any future changes done with the original transaction. 38 func (txn *Txn[T]) Clone() *Txn[T] { 39 // Clear the mutated nodes so that the returned clone won't be changed by 40 // further modifications in this transaction. 41 txn.mutated.clear() 42 return &Txn[T]{ 43 Tree: txn.Tree, 44 watches: map[chan struct{}]struct{}{}, 45 deleteParentsCache: nil, 46 } 47 } 48 49 // Insert or update the tree with the given key and value. 50 // Returns the old value if it exists. 51 func (txn *Txn[T]) Insert(key []byte, value T) (old T, hadOld bool) { 52 old, hadOld, txn.root = txn.insert(txn.root, key, value) 53 if !hadOld { 54 txn.size++ 55 } 56 return 57 } 58 59 // Modify a value in the tree. If the key does not exist the modify 60 // function is called with the zero value for T. It is up to the 61 // caller to not mutate the value in-place and to return a clone. 62 // Returns the old value if it exists. 63 func (txn *Txn[T]) Modify(key []byte, mod func(T) T) (old T, hadOld bool) { 64 old, hadOld, txn.root = txn.modify(txn.root, key, mod) 65 if !hadOld { 66 txn.size++ 67 } 68 return 69 } 70 71 // Delete the given key from the tree. 72 // Returns the old value if it exists. 73 func (txn *Txn[T]) Delete(key []byte) (old T, hadOld bool) { 74 old, hadOld, txn.root = txn.delete(txn.root, key) 75 if hadOld { 76 txn.size-- 77 } 78 return 79 } 80 81 // RootWatch returns a watch channel for the root of the tree. 82 // Since this is the channel associated with the root, this closes 83 // when there are any changes to the tree. 84 func (txn *Txn[T]) RootWatch() <-chan struct{} { 85 return txn.root.watch 86 } 87 88 // Get fetches the value associated with the given key. 89 // Returns the value, a watch channel (which is closed on 90 // modification to the key) and boolean which is true if 91 // value was found. 92 func (txn *Txn[T]) Get(key []byte) (T, <-chan struct{}, bool) { 93 value, watch, ok := search(txn.root, key) 94 if txn.opts.rootOnlyWatch { 95 watch = txn.root.watch 96 } 97 return value, watch, ok 98 } 99 100 // Prefix returns an iterator for all objects that starts with the 101 // given prefix, and a channel that closes when any objects matching 102 // the given prefix are upserted or deleted. 103 func (txn *Txn[T]) Prefix(key []byte) (*Iterator[T], <-chan struct{}) { 104 txn.mutated.clear() 105 iter, watch := prefixSearch(txn.root, key) 106 if txn.opts.rootOnlyWatch { 107 watch = txn.root.watch 108 } 109 return iter, watch 110 } 111 112 // LowerBound returns an iterator for all objects that have a 113 // key equal or higher than the given 'key'. 114 func (txn *Txn[T]) LowerBound(key []byte) *Iterator[T] { 115 txn.mutated.clear() 116 return lowerbound(txn.root, key) 117 } 118 119 // Iterator returns an iterator for all objects. 120 func (txn *Txn[T]) Iterator() *Iterator[T] { 121 txn.mutated.clear() 122 return newIterator[T](txn.root) 123 } 124 125 // Commit the transaction and produce the new tree. 126 func (txn *Txn[T]) Commit() *Tree[T] { 127 txn.mutated.clear() 128 for ch := range txn.watches { 129 close(ch) 130 } 131 txn.watches = nil 132 return &Tree[T]{txn.opts, txn.root, txn.size} 133 } 134 135 // CommitOnly the transaction, but do not close the 136 // watch channels. Returns the new tree. 137 // To close the watch channels call Notify(). 138 func (txn *Txn[T]) CommitOnly() *Tree[T] { 139 txn.mutated.clear() 140 return &Tree[T]{txn.opts, txn.root, txn.size} 141 } 142 143 // Notify closes the watch channels of nodes that were 144 // mutated as part of this transaction. 145 func (txn *Txn[T]) Notify() { 146 for ch := range txn.watches { 147 close(ch) 148 } 149 txn.watches = nil 150 } 151 152 // PrintTree to the standard output. For debugging. 153 func (txn *Txn[T]) PrintTree() { 154 txn.root.printTree(0) 155 } 156 157 func (txn *Txn[T]) cloneNode(n *header[T]) *header[T] { 158 if txn.mutated.exists(n) { 159 return n 160 } 161 if n.watch != nil { 162 txn.watches[n.watch] = struct{}{} 163 } 164 n = n.clone(!txn.opts.rootOnlyWatch || n == txn.root) 165 txn.mutated.put(n) 166 return n 167 } 168 169 func (txn *Txn[T]) insert(root *header[T], key []byte, value T) (oldValue T, hadOld bool, newRoot *header[T]) { 170 return txn.modify(root, key, func(_ T) T { return value }) 171 } 172 173 func (txn *Txn[T]) modify(root *header[T], key []byte, mod func(T) T) (oldValue T, hadOld bool, newRoot *header[T]) { 174 fullKey := key 175 176 this := root 177 thisp := &newRoot 178 179 // Try to insert the key into the tree. If we find a free slot into which to insert 180 // it, we do it and return. If an existing node exists where the key should go, then 181 // we stop. 'this' points to that node, and 'thisp' to its memory location. It has 182 // not been cloned. 183 for { 184 if this.isLeaf() { 185 // We've reached a leaf node, cannot go further. 186 break 187 } 188 189 if !bytes.HasPrefix(key, this.prefix) { 190 break 191 } 192 193 // Prefix matched. Consume it and go further. 194 key = key[len(this.prefix):] 195 if len(key) == 0 { 196 // Our key matches this node. 197 break 198 } 199 200 child, idx := this.findIndex(key[0]) 201 if child == nil { 202 // We've found a free slot where to insert the key. 203 if this.size()+1 > this.cap() { 204 // Node too small, promote it to the next size. 205 if this.watch != nil { 206 txn.watches[this.watch] = struct{}{} 207 } 208 this = this.promote(!txn.opts.rootOnlyWatch || this == newRoot) 209 txn.mutated.put(this) 210 } else { 211 // Node is big enough, clone it so we can mutate it 212 this = txn.cloneNode(this) 213 } 214 var zero T 215 this.insert(idx, newLeaf(txn.opts, key, fullKey, mod(zero)).self()) 216 *thisp = this 217 return 218 } 219 220 // Clone the parent so we can modify it 221 this = txn.cloneNode(this) 222 *thisp = this 223 // And recurse into the child 224 thisp = &this.children()[idx] 225 this = *thisp 226 } 227 228 // A node exists where we wanted to insert the key. 229 // 'this' points to it, and 'thisp' is its memory location. The parents 230 // have been cloned. 231 switch { 232 case this.isLeaf(): 233 common := commonPrefix(key, this.prefix) 234 if len(common) == len(this.prefix) && len(common) == len(key) { 235 // Exact match, clone and update the value. 236 oldValue = this.getLeaf().value 237 hadOld = true 238 this = txn.cloneNode(this) 239 *thisp = this 240 this.getLeaf().value = mod(oldValue) 241 } else { 242 // Partially matching prefix. 243 newNode := &node4[T]{ 244 header: header[T]{prefix: common}, 245 } 246 newNode.setKind(nodeKind4) 247 248 // Make a shallow copy of the leaf. But keep its watch channel 249 // intact since we're only manipulating its prefix. 250 oldLeafCopy := *this.getLeaf() 251 oldLeaf := &oldLeafCopy 252 oldLeaf.prefix = oldLeaf.prefix[len(common):] 253 key = key[len(common):] 254 var zero T 255 newLeaf := newLeaf(txn.opts, key, fullKey, mod(zero)) 256 257 // Insert the two leaves into the node we created. If one has 258 // a key that is a subset of the other, then we can insert them 259 // as a leaf of the node4, otherwise they become children. 260 switch { 261 case len(oldLeaf.prefix) == 0: 262 oldLeaf.prefix = common 263 newNode.setLeaf(oldLeaf) 264 newNode.children[0] = newLeaf.self() 265 newNode.keys[0] = newLeaf.prefix[0] 266 newNode.setSize(1) 267 268 case len(key) == 0: 269 newLeaf.prefix = common 270 newNode.setLeaf(newLeaf) 271 newNode.children[0] = oldLeaf.self() 272 newNode.keys[0] = oldLeaf.prefix[0] 273 newNode.setSize(1) 274 275 case oldLeaf.prefix[0] < key[0]: 276 newNode.children[0] = oldLeaf.self() 277 newNode.keys[0] = oldLeaf.prefix[0] 278 newNode.children[1] = newLeaf.self() 279 newNode.keys[1] = key[0] 280 newNode.setSize(2) 281 282 default: 283 newNode.children[0] = newLeaf.self() 284 newNode.keys[0] = key[0] 285 newNode.children[1] = oldLeaf.self() 286 newNode.keys[1] = oldLeaf.prefix[0] 287 newNode.setSize(2) 288 } 289 *thisp = newNode.self() 290 } 291 case len(key) == 0: 292 // Exact match, but not a leaf node 293 this = txn.cloneNode(this) 294 *thisp = this 295 if leaf := this.getLeaf(); leaf != nil { 296 // Replace the existing leaf 297 oldValue = leaf.value 298 hadOld = true 299 leaf = txn.cloneNode(leaf.self()).getLeaf() 300 leaf.value = mod(oldValue) 301 this.setLeaf(leaf) 302 } else { 303 // Set the leaf 304 var zero T 305 this.setLeaf(newLeaf(txn.opts, this.prefix, fullKey, mod(zero))) 306 } 307 308 default: 309 // Partially matching prefix, non-leaf node. 310 common := commonPrefix(key, this.prefix) 311 312 this = txn.cloneNode(this) 313 *thisp = this 314 this.prefix = this.prefix[len(common):] 315 key = key[len(common):] 316 317 var zero T 318 newLeaf := newLeaf(txn.opts, key, fullKey, mod(zero)) 319 newNode := &node4[T]{ 320 header: header[T]{prefix: common}, 321 } 322 newNode.setKind(nodeKind4) 323 324 switch { 325 case len(key) == 0: 326 newLeaf.prefix = common 327 newNode.setLeaf(newLeaf) 328 newNode.children[0] = this 329 newNode.keys[0] = this.prefix[0] 330 newNode.setSize(1) 331 332 case this.prefix[0] < key[0]: 333 newNode.children[0] = this 334 newNode.keys[0] = this.prefix[0] 335 newNode.children[1] = newLeaf.self() 336 newNode.keys[1] = key[0] 337 newNode.setSize(2) 338 default: 339 newNode.children[0] = newLeaf.self() 340 newNode.keys[0] = key[0] 341 newNode.children[1] = this 342 newNode.keys[1] = this.prefix[0] 343 newNode.setSize(2) 344 } 345 *thisp = newNode.self() 346 } 347 return 348 } 349 350 // deleteParent tracks a node on the path to the target node that is being 351 // deleted. 352 type deleteParent[T any] struct { 353 node *header[T] 354 index int // the index of this node at its parent 355 } 356 357 func (txn *Txn[T]) delete(root *header[T], key []byte) (oldValue T, hadOld bool, newRoot *header[T]) { 358 // Reuse the same slice in the transaction to hold the parents in order to avoid 359 // allocations. Pre-allocate 32 levels to cover most of the use-cases without 360 // reallocation. 361 if txn.deleteParentsCache == nil { 362 txn.deleteParentsCache = make([]deleteParent[T], 0, 32) 363 } 364 parents := txn.deleteParentsCache[:1] // Placeholder for root 365 366 newRoot = root 367 this := root 368 369 // Find the target node and record the path to it. 370 var leaf *leaf[T] 371 for { 372 if bytes.HasPrefix(key, this.prefix) { 373 key = key[len(this.prefix):] 374 if len(key) == 0 { 375 leaf = this.getLeaf() 376 if leaf == nil { 377 return 378 } 379 // Target node found! 380 break 381 } 382 var idx int 383 this, idx = this.findIndex(key[0]) 384 if this == nil { 385 return 386 } 387 parents = append(parents, deleteParent[T]{this, idx}) 388 } else { 389 // Reached a node with a different prefix, so node not found. 390 return 391 } 392 } 393 394 oldValue = leaf.value 395 hadOld = true 396 397 // Mark the watch channel of the target for closing if not mutated already. 398 if leaf.watch != nil { 399 txn.watches[leaf.watch] = struct{}{} 400 } 401 402 if this == root { 403 // Target is the root, clear it. 404 if root.isLeaf() || newRoot.size() == 0 { 405 // Replace leaf or empty root with a node4 406 newRoot = newNode4[T]() 407 } else { 408 newRoot = txn.cloneNode(root) 409 newRoot.setLeaf(nil) 410 } 411 return 412 } 413 414 // The target was found, rebuild the tree from the root upwards. 415 parents[0].node = root 416 417 for i := len(parents) - 1; i > 0; i-- { 418 parent := &parents[i-1] 419 target := &parents[i] 420 421 // Clone the parent to mutate it. 422 parent.node = txn.cloneNode(parent.node) 423 children := parent.node.children() 424 425 if target.node == this && target.node.size() > 0 { 426 // This is the node that we want to delete, but it has 427 // children. Clone and clear the leaf. 428 target.node = txn.cloneNode(target.node) 429 target.node.setLeaf(nil) 430 children[target.index] = target.node 431 } else if target.node.size() == 0 && (target.node == this || target.node.getLeaf() == nil) { 432 // The node is empty, remove it from the parent. 433 parent.node.remove(target.index) 434 } else { 435 // Update the target (as it may have been cloned) 436 children[target.index] = target.node 437 } 438 439 if parent.node.size() > 0 { 440 // Check if the node should be demoted. 441 // To avoid thrashing we don't demote at the boundary, but at a slightly 442 // smaller size. 443 // TODO: Can we avoid the initial clone of parent.node? 444 var newNode *header[T] 445 switch { 446 case parent.node.kind() == nodeKind256 && parent.node.size() <= 37: 447 newNode = (&node48[T]{header: *parent.node}).self() 448 newNode.setKind(nodeKind48) 449 n48 := newNode.node48() 450 n48.leaf = parent.node.getLeaf() 451 children := n48.children[:0] 452 for k, n := range parent.node.node256().children[:] { 453 if n != nil { 454 n48.index[k] = int8(len(children)) 455 children = append(children, n) 456 } 457 } 458 case parent.node.kind() == nodeKind48 && parent.node.size() <= 12: 459 newNode = (&node16[T]{header: *parent.node}).self() 460 newNode.setKind(nodeKind16) 461 copy(newNode.children()[:], parent.node.children()) 462 n16 := newNode.node16() 463 n16.leaf = parent.node.getLeaf() 464 size := n16.size() 465 for i := 0; i < size; i++ { 466 n16.keys[i] = n16.children[i].prefix[0] 467 } 468 case parent.node.kind() == nodeKind16 && parent.node.size() <= 3: 469 newNode = (&node4[T]{header: *parent.node}).self() 470 newNode.setKind(nodeKind4) 471 n16 := parent.node.node16() 472 size := n16.size() 473 n4 := newNode.node4() 474 n4.leaf = n16.leaf 475 copy(n4.children[:], n16.children[:size]) 476 copy(n4.keys[:], n16.keys[:size]) 477 } 478 if newNode != nil { 479 parent.node = newNode 480 } 481 } 482 } 483 newRoot = parents[0].node 484 return 485 }