github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/chunker.go (about) 1 // Copyright 2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package tree 23 24 import ( 25 "context" 26 27 "github.com/dolthub/dolt/go/store/prolly/message" 28 ) 29 30 type Chunker interface { 31 AddPair(ctx context.Context, key, value Item) error 32 UpdatePair(ctx context.Context, key, value Item) error 33 DeletePair(ctx context.Context, key, value Item) error 34 Done(ctx context.Context) (Node, error) 35 } 36 37 type chunker[S message.Serializer] struct { 38 cur *cursor 39 parent *chunker[S] 40 level int 41 done bool 42 43 splitter nodeSplitter 44 builder *nodeBuilder[S] 45 serializer S 46 47 ns NodeStore 48 } 49 50 //var _ Chunker = &chunker[]{} 51 52 func NewEmptyChunker[S message.Serializer](ctx context.Context, ns NodeStore, serializer S) (Chunker, error) { 53 return newEmptyChunker(ctx, ns, serializer) 54 } 55 56 func newEmptyChunker[S message.Serializer](ctx context.Context, ns NodeStore, serializer S) (*chunker[S], error) { 57 return newChunker(ctx, nil, 0, ns, serializer) 58 } 59 60 func newChunker[S message.Serializer](ctx context.Context, cur *cursor, level int, ns NodeStore, serializer S) (*chunker[S], error) { 61 // |cur| will be nil if this is a new Node, implying this is a new tree, or the tree has grown in height relative 62 // to its original chunked form. 63 64 splitter := defaultSplitterFactory(uint8(level % 256)) 65 builder := newNodeBuilder(serializer, level) 66 67 sc := &chunker[S]{ 68 cur: cur, 69 parent: nil, 70 level: level, 71 splitter: splitter, 72 builder: builder, 73 serializer: serializer, 74 ns: ns, 75 } 76 77 if cur != nil { 78 if err := sc.processPrefix(ctx); err != nil { 79 return nil, err 80 } 81 } 82 83 return sc, nil 84 } 85 86 func (tc *chunker[S]) processPrefix(ctx context.Context) (err error) { 87 if tc.cur.parent != nil && tc.parent == nil { 88 if err := tc.createParentChunker(ctx); err != nil { 89 return err 90 } 91 } 92 93 idx := tc.cur.idx 94 tc.cur.skipToNodeStart() 95 96 for tc.cur.idx < idx { 97 var sz uint64 98 sz, err = tc.cur.currentSubtreeSize() 99 if err != nil { 100 return err 101 } 102 _, err = tc.append(ctx, 103 tc.cur.CurrentKey(), 104 tc.cur.currentValue(), 105 sz) 106 107 // todo(andy): seek to correct chunk 108 // currently when inserting tuples between chunks 109 // we seek to the end of the previous chunk rather 110 // than the beginning of the next chunk. This causes 111 // us to iterate over the entire previous chunk 112 //assertFalse(ok) 113 114 if err != nil { 115 return err 116 } 117 118 err = tc.cur.advance(ctx) 119 if err != nil { 120 return err 121 } 122 } 123 124 return nil 125 } 126 127 // AddPair adds a val.Tuple pair to the chunker. 128 func (tc *chunker[S]) AddPair(ctx context.Context, key, value Item) error { 129 _, err := tc.append(ctx, Item(key), Item(value), 1) 130 return err 131 } 132 133 // UpdatePair updates a val.Tuple pair in the chunker. 134 func (tc *chunker[S]) UpdatePair(ctx context.Context, key, value Item) error { 135 if err := tc.skip(ctx); err != nil { 136 return err 137 } 138 _, err := tc.append(ctx, Item(key), Item(value), 1) 139 return err 140 } 141 142 // DeletePair deletes a val.Tuple pair from the chunker. 143 func (tc *chunker[S]) DeletePair(ctx context.Context, _, _ Item) error { 144 return tc.skip(ctx) 145 } 146 147 // advanceTo progresses the chunker until its tracking cursor catches up with 148 // |next|, a cursor indicating next key where an edit will be applied. 149 // 150 // The method proceeds from the deepest chunker recursively into its 151 // linked list parents: 152 // 153 // (1) If the current cursor and all of its parents are aligned with |next|, 154 // we are done. 155 // 156 // (2) In lockstep, a) append to the chunker and b) increment the cursor until 157 // we either meet condition (1) and return, or we synchronize and progress to 158 // (3) or (4). Synchronizing means that the current tree being built has 159 // reached a chunk boundary that aligns with a chunk boundary in the old tree 160 // being mutated. Synchronization means chunks between this boundary and 161 // |next| at the current cursor level will be unchanged and can be skipped. 162 // 163 // (3) All parent cursors are (1) current or (2) synchronized, or there are no 164 // parents, and we are done. 165 // 166 // (4) The parent cursors are not aligned. Recurse into the parent. After 167 // parents are aligned, we need to reprocess the prefix of the current node in 168 // anticipation of impending edits that may edit the current chunk. Note that 169 // processPrefix is only necessary for the "fast forward" case where we 170 // synchronized the tree level before reaching |next|. 171 func (tc *chunker[S]) advanceTo(ctx context.Context, next *cursor) error { 172 cmp := tc.cur.compare(next) 173 if cmp == 0 { // step (1) 174 return nil 175 } else if cmp > 0 { 176 //todo(max): this appears to be a result of a seek() bug, where 177 // we navigate to the end of the previous chunk rather than the 178 // beginning of the next chunk. I think this is basically a one-off 179 // error. 180 for tc.cur.compare(next) > 0 { 181 if err := next.advance(ctx); err != nil { 182 return err 183 } 184 } 185 return nil 186 } 187 188 sz, err := tc.cur.currentSubtreeSize() 189 if err != nil { 190 return err 191 } 192 split, err := tc.append(ctx, tc.cur.CurrentKey(), tc.cur.currentValue(), sz) 193 if err != nil { 194 return err 195 } 196 197 for !(split && tc.cur.atNodeEnd()) { // step (2) 198 err = tc.cur.advance(ctx) 199 if err != nil { 200 return err 201 } 202 if cmp = tc.cur.compare(next); cmp >= 0 { 203 // we caught up before synchronizing 204 return nil 205 } 206 sz, err := tc.cur.currentSubtreeSize() 207 if err != nil { 208 return err 209 } 210 split, err = tc.append(ctx, tc.cur.CurrentKey(), tc.cur.currentValue(), sz) 211 if err != nil { 212 return err 213 } 214 } 215 216 if tc.cur.parent == nil || next.parent == nil { // step (3) 217 // end of tree 218 tc.cur.copy(next) 219 return nil 220 } 221 222 if tc.cur.parent.compare(next.parent) == 0 { // step (3) 223 // (rare) new tree synchronized with old tree at the 224 // same time as the cursor caught up to the next mutation point 225 tc.cur.copy(next) 226 return nil 227 } 228 229 // step(4) 230 231 // This optimization is logically equivalent to advancing 232 // current cursor. Because we just wrote a chunk, we are 233 // at a boundary and can simply increment the parent. 234 err = tc.cur.parent.advance(ctx) 235 if err != nil { 236 return err 237 } 238 tc.cur.invalidateAtEnd() 239 240 // no more pending chunks at this level, recurse 241 // into parent 242 err = tc.parent.advanceTo(ctx, next.parent) 243 if err != nil { 244 return err 245 } 246 247 // fast forward to the edit index at this level 248 tc.cur.copy(next) 249 250 // incoming edit can affect the entire chunk, process the prefix 251 err = tc.processPrefix(ctx) 252 if err != nil { 253 return err 254 } 255 return nil 256 } 257 258 func (tc *chunker[S]) skip(ctx context.Context) error { 259 err := tc.cur.advance(ctx) 260 return err 261 } 262 263 // Append adds a new key-value pair to the chunker, validating the new pair to ensure 264 // that chunks are well-formed. Key-value pairs are appended atomically a chunk boundary 265 // may be made before or after the pair, but not between them. Returns true if chunk boundary 266 // was split. 267 func (tc *chunker[S]) append(ctx context.Context, key, value Item, subtree uint64) (bool, error) { 268 // When adding new key-value pairs to an in-progress chunk, we must enforce 3 invariants 269 // (1) Key-value pairs are stored in the same Node. 270 // (2) The total Size of a Node's data cannot exceed |MaxVectorOffset|. 271 // (3) Internal Nodes (Level > 0) must contain at least 2 key-value pairs (4 node items). 272 // Infinite recursion can occur if internal nodes contain a single novelNode with a key 273 // large enough to trigger a chunk boundary. Forming a chunk boundary after a single 274 // key will lead to an identical novelNode in the nextMutation Level in the tree, triggering 275 // the same state infinitely. This problem can only occur at levels 2 and above, 276 // but we enforce this constraint for all internal nodes of the tree. 277 278 // constraint (3) 279 degenerate := !tc.isLeaf() && tc.builder.count() == 1 280 281 // constraint (2) 282 overflow := !tc.builder.hasCapacity(key, value) 283 284 if overflow && degenerate { 285 // Constraints (2) and (3) are in conflict 286 panic("impossible node") 287 } 288 289 if overflow { 290 // Enforce constraints (1) and (2): 291 // |key| and |value| won't fit in this chunk, force a 292 // boundary here and pass them to the nextMutation chunk. 293 err := tc.handleChunkBoundary(ctx) 294 if err != nil { 295 return false, err 296 } 297 } 298 299 tc.builder.addItems(key, value, subtree) 300 301 err := tc.splitter.Append(key, value) 302 if err != nil { 303 return false, err 304 } 305 306 // recompute with updated |tc.keys| 307 degenerate = !tc.isLeaf() && tc.builder.count() == 1 308 309 if tc.splitter.CrossedBoundary() && !degenerate { 310 err := tc.handleChunkBoundary(ctx) 311 if err != nil { 312 return false, err 313 } 314 return true, nil 315 } 316 317 return false, nil 318 } 319 320 func (tc *chunker[S]) appendToParent(ctx context.Context, novel novelNode) (bool, error) { 321 if tc.parent == nil { 322 if err := tc.createParentChunker(ctx); err != nil { 323 return false, err 324 } 325 } 326 327 return tc.parent.append(ctx, novel.lastKey, novel.addr[:], novel.treeCount) 328 } 329 330 func (tc *chunker[S]) handleChunkBoundary(ctx context.Context) error { 331 assertTrue(tc.builder.count() > 0, "in-progress chunk must be non-empty to create chunk boundary") 332 333 novel, err := writeNewNode(ctx, tc.ns, tc.builder) 334 if err != nil { 335 return err 336 } 337 338 if _, err = tc.appendToParent(ctx, novel); err != nil { 339 return err 340 } 341 342 tc.splitter.Reset() 343 344 return nil 345 } 346 347 func (tc *chunker[S]) createParentChunker(ctx context.Context) (err error) { 348 assertTrue(tc.parent == nil, "chunker parent must be nil") 349 350 var parent *cursor 351 if tc.cur != nil && tc.cur.parent != nil { 352 // todo(andy): does this comment make sense? cloning a pointer? 353 // Clone the parent cursor because otherwise calling cur.forward() will affect our parent - and vice versa - 354 // in surprising ways. Instead, Skip moves forward our parent's cursor if we forward across a boundary. 355 parent = tc.cur.parent 356 } 357 358 tc.parent, err = newChunker(ctx, parent, tc.level+1, tc.ns, tc.serializer) 359 if err != nil { 360 return err 361 } 362 363 return nil 364 } 365 366 // Done returns the root Node of the resulting tree. 367 // The logic here is subtle, but hopefully correct and understandable. See comments inline. 368 func (tc *chunker[S]) Done(ctx context.Context) (Node, error) { 369 assertTrue(!tc.done, "chunker must not be done") 370 tc.done = true 371 372 if tc.cur != nil { 373 if err := tc.finalizeCursor(ctx); err != nil { 374 return Node{}, err 375 } 376 } 377 378 // There is pending content above us, so we must push any remaining items from this Level up and allow some parent 379 // to find the root of the resulting tree. 380 if tc.parent != nil && tc.parent.anyPending() { 381 if tc.builder.count() > 0 { 382 // |tc.keys| are the last items at this Level of the tree, 383 // make a chunk out of them 384 if err := tc.handleChunkBoundary(ctx); err != nil { 385 return Node{}, err 386 } 387 } 388 389 return tc.parent.Done(ctx) 390 } 391 392 // At this point, we know |tc.keys| contains every item at this Level of the tree. 393 // To see this, consider that there are two ways items can enter |tc.keys|. 394 // (1) as the result of processPrefix() with the cursor on anything other than the first item in the Node 395 // (2) as a result of a child chunker hitting an explicit chunk boundary during either Append() or finalize(). 396 // 397 // The only way there can be no items in some parent chunker's |tc.keys| is if this chunker began with 398 // a cursor within its first existing chunk (and thus all parents processPrefix()'d with a cursor on their first item) and 399 // continued through all subsequent items without creating any explicit chunk boundaries (and thus never sent any 400 // items up to a parent as a result of chunking). Therefore, this chunker's |tc.keys| must contain all items 401 // within the current Node. 402 403 // This Level must represent *a* root of the tree, but it is possibly non-canonical. There are three possible cases: 404 // (1) This is "leaf" chunker and thus produced tree of depth 1 which contains exactly one chunk 405 // (never hit a boundary), or 406 // (2) This in an internal Node of the tree which contains multiple references to child nodes. In either case, 407 // this is the canonical root of the tree. 408 if tc.isLeaf() || tc.builder.count() > 1 { 409 novel, err := writeNewNode(ctx, tc.ns, tc.builder) 410 return novel.node, err 411 } 412 // (3) This is an internal Node of the tree with a single novelNode. This is a non-canonical root, and we must walk 413 // down until we find cases (1) or (2), above. 414 assertTrue(!tc.isLeaf(), "chunker must not be leaf chunker") 415 return getCanonicalRoot(ctx, tc.ns, tc.builder) 416 } 417 418 // If we are mutating an existing Node, appending subsequent items in the Node until we reach a pre-existing chunk 419 // boundary or the end of the Node. 420 func (tc *chunker[S]) finalizeCursor(ctx context.Context) (err error) { 421 for tc.cur.Valid() { 422 var sz uint64 423 sz, err = tc.cur.currentSubtreeSize() 424 if err != nil { 425 return 426 } 427 var ok bool 428 ok, err = tc.append(ctx, 429 tc.cur.CurrentKey(), 430 tc.cur.currentValue(), 431 sz) 432 if err != nil { 433 return err 434 } 435 if ok && tc.cur.atNodeEnd() { 436 break // boundary occurred at same place in old & new Node 437 } 438 439 err = tc.cur.advance(ctx) 440 if err != nil { 441 return err 442 } 443 } 444 445 if tc.cur.parent != nil { 446 err := tc.cur.parent.advance(ctx) 447 448 if err != nil { 449 return err 450 } 451 452 // Invalidate this cursor to mark it finalized. 453 tc.cur.nd = Node{} 454 } 455 456 return nil 457 } 458 459 // Returns true if this nodeSplitter or any of its parents have any pending items in their |currentPair| slice. 460 func (tc *chunker[S]) anyPending() bool { 461 if tc.builder.count() > 0 { 462 return true 463 } 464 465 if tc.parent != nil { 466 return tc.parent.anyPending() 467 } 468 469 return false 470 } 471 472 func (tc *chunker[S]) isLeaf() bool { 473 return tc.level == 0 474 } 475 476 func getCanonicalRoot[S message.Serializer](ctx context.Context, ns NodeStore, builder *nodeBuilder[S]) (Node, error) { 477 cnt := builder.count() 478 assertTrue(cnt == 1, "in-progress chunk must be non-canonical to call getCanonicalRoot") 479 480 nd, err := builder.build() 481 if err != nil { 482 return Node{}, err 483 } 484 mt := nd.getAddress(0) 485 486 for { 487 child, err := fetchChild(ctx, ns, mt) 488 if err != nil { 489 return Node{}, err 490 } 491 492 if child.IsLeaf() || child.count > 1 { 493 return child, nil 494 } 495 496 mt = child.getAddress(0) 497 } 498 }