github.com/MetalBlockchain/metalgo@v1.11.9/x/sync/manager.go (about) 1 // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package sync 5 6 import ( 7 "bytes" 8 "context" 9 "errors" 10 "fmt" 11 "slices" 12 "sync" 13 14 "go.uber.org/zap" 15 "golang.org/x/exp/maps" 16 17 "github.com/MetalBlockchain/metalgo/ids" 18 "github.com/MetalBlockchain/metalgo/utils/logging" 19 "github.com/MetalBlockchain/metalgo/utils/maybe" 20 "github.com/MetalBlockchain/metalgo/utils/set" 21 "github.com/MetalBlockchain/metalgo/x/merkledb" 22 23 pb "github.com/MetalBlockchain/metalgo/proto/pb/sync" 24 ) 25 26 const ( 27 defaultRequestKeyLimit = maxKeyValuesLimit 28 defaultRequestByteSizeLimit = maxByteSizeLimit 29 ) 30 31 var ( 32 ErrAlreadyStarted = errors.New("cannot start a Manager that has already been started") 33 ErrAlreadyClosed = errors.New("Manager is closed") 34 ErrNoClientProvided = errors.New("client is a required field of the sync config") 35 ErrNoDatabaseProvided = errors.New("sync database is a required field of the sync config") 36 ErrNoLogProvided = errors.New("log is a required field of the sync config") 37 ErrZeroWorkLimit = errors.New("simultaneous work limit must be greater than 0") 38 ErrFinishedWithUnexpectedRoot = errors.New("finished syncing with an unexpected root") 39 ) 40 41 type priority byte 42 43 // Note that [highPriority] > [medPriority] > [lowPriority]. 44 const ( 45 lowPriority priority = iota + 1 46 medPriority 47 highPriority 48 ) 49 50 // Signifies that we should sync the range [start, end]. 51 // nil [start] means there is no lower bound. 52 // nil [end] means there is no upper bound. 53 // [localRootID] is the ID of the root of this range in our database. 54 // If we have no local root for this range, [localRootID] is ids.Empty. 55 type workItem struct { 56 start maybe.Maybe[[]byte] 57 end maybe.Maybe[[]byte] 58 priority priority 59 localRootID ids.ID 60 } 61 62 func newWorkItem(localRootID ids.ID, start maybe.Maybe[[]byte], end maybe.Maybe[[]byte], priority priority) *workItem { 63 return &workItem{ 64 localRootID: localRootID, 65 start: start, 66 end: end, 67 priority: priority, 68 } 69 } 70 71 type Manager struct { 72 // Must be held when accessing [config.TargetRoot]. 73 syncTargetLock sync.RWMutex 74 config ManagerConfig 75 76 workLock sync.Mutex 77 // The number of work items currently being processed. 78 // Namely, the number of goroutines executing [doWork]. 79 // [workLock] must be held when accessing [processingWorkItems]. 80 processingWorkItems int 81 // [workLock] must be held while accessing [unprocessedWork]. 82 unprocessedWork *workHeap 83 // Signalled when: 84 // - An item is added to [unprocessedWork]. 85 // - An item is added to [processedWork]. 86 // - Close() is called. 87 // [workLock] is its inner lock. 88 unprocessedWorkCond sync.Cond 89 // [workLock] must be held while accessing [processedWork]. 90 processedWork *workHeap 91 92 // When this is closed: 93 // - [closed] is true. 94 // - [cancelCtx] was called. 95 // - [workToBeDone] and [completedWork] are closed. 96 doneChan chan struct{} 97 98 errLock sync.Mutex 99 // If non-nil, there was a fatal error. 100 // [errLock] must be held when accessing [fatalError]. 101 fatalError error 102 103 // Cancels all currently processing work items. 104 cancelCtx context.CancelFunc 105 106 // Set to true when StartSyncing is called. 107 syncing bool 108 closeOnce sync.Once 109 tokenSize int 110 } 111 112 type ManagerConfig struct { 113 DB DB 114 Client Client 115 SimultaneousWorkLimit int 116 Log logging.Logger 117 TargetRoot ids.ID 118 BranchFactor merkledb.BranchFactor 119 } 120 121 func NewManager(config ManagerConfig) (*Manager, error) { 122 switch { 123 case config.Client == nil: 124 return nil, ErrNoClientProvided 125 case config.DB == nil: 126 return nil, ErrNoDatabaseProvided 127 case config.Log == nil: 128 return nil, ErrNoLogProvided 129 case config.SimultaneousWorkLimit == 0: 130 return nil, ErrZeroWorkLimit 131 } 132 if err := config.BranchFactor.Valid(); err != nil { 133 return nil, err 134 } 135 136 m := &Manager{ 137 config: config, 138 doneChan: make(chan struct{}), 139 unprocessedWork: newWorkHeap(), 140 processedWork: newWorkHeap(), 141 tokenSize: merkledb.BranchFactorToTokenSize[config.BranchFactor], 142 } 143 m.unprocessedWorkCond.L = &m.workLock 144 145 return m, nil 146 } 147 148 func (m *Manager) Start(ctx context.Context) error { 149 m.workLock.Lock() 150 defer m.workLock.Unlock() 151 152 if m.syncing { 153 return ErrAlreadyStarted 154 } 155 156 m.config.Log.Info("starting sync", zap.Stringer("target root", m.config.TargetRoot)) 157 158 // Add work item to fetch the entire key range. 159 // Note that this will be the first work item to be processed. 160 m.unprocessedWork.Insert(newWorkItem(ids.Empty, maybe.Nothing[[]byte](), maybe.Nothing[[]byte](), lowPriority)) 161 162 m.syncing = true 163 ctx, m.cancelCtx = context.WithCancel(ctx) 164 165 go m.sync(ctx) 166 return nil 167 } 168 169 // sync awaits signal on [m.unprocessedWorkCond], which indicates that there 170 // is work to do or syncing completes. If there is work, sync will dispatch a goroutine to do 171 // the work. 172 func (m *Manager) sync(ctx context.Context) { 173 defer func() { 174 // Invariant: [m.workLock] is held when this goroutine begins. 175 m.close() 176 m.workLock.Unlock() 177 }() 178 179 // Keep doing work until we're closed, done or [ctx] is canceled. 180 m.workLock.Lock() 181 for { 182 // Invariant: [m.workLock] is held here. 183 switch { 184 case ctx.Err() != nil: 185 return // [m.workLock] released by defer. 186 case m.processingWorkItems >= m.config.SimultaneousWorkLimit: 187 // We're already processing the maximum number of work items. 188 // Wait until one of them finishes. 189 m.unprocessedWorkCond.Wait() 190 case m.unprocessedWork.Len() == 0: 191 if m.processingWorkItems == 0 { 192 // There's no work to do, and there are no work items being processed 193 // which could cause work to be added, so we're done. 194 return // [m.workLock] released by defer. 195 } 196 // There's no work to do. 197 // Note that if [m].Close() is called, or [ctx] is canceled, 198 // Close() will be called, which will broadcast on [m.unprocessedWorkCond], 199 // which will cause Wait() to return, and this goroutine to exit. 200 m.unprocessedWorkCond.Wait() 201 default: 202 m.processingWorkItems++ 203 work := m.unprocessedWork.GetWork() 204 go m.doWork(ctx, work) 205 } 206 } 207 } 208 209 // Close will stop the syncing process 210 func (m *Manager) Close() { 211 m.workLock.Lock() 212 defer m.workLock.Unlock() 213 214 m.close() 215 } 216 217 // close is called when there is a fatal error or sync is complete. 218 // [workLock] must be held 219 func (m *Manager) close() { 220 m.closeOnce.Do(func() { 221 // Don't process any more work items. 222 // Drop currently processing work items. 223 if m.cancelCtx != nil { 224 m.cancelCtx() 225 } 226 227 // ensure any goroutines waiting for work from the heaps gets released 228 m.unprocessedWork.Close() 229 m.unprocessedWorkCond.Signal() 230 m.processedWork.Close() 231 232 // signal all code waiting on the sync to complete 233 close(m.doneChan) 234 }) 235 } 236 237 // Processes [item] by fetching and applying a change or range proof. 238 // Assumes [m.workLock] is not held. 239 func (m *Manager) doWork(ctx context.Context, work *workItem) { 240 defer func() { 241 m.workLock.Lock() 242 defer m.workLock.Unlock() 243 244 m.processingWorkItems-- 245 m.unprocessedWorkCond.Signal() 246 }() 247 248 if work.localRootID == ids.Empty { 249 // the keys in this range have not been downloaded, so get all key/values 250 m.getAndApplyRangeProof(ctx, work) 251 } else { 252 // the keys in this range have already been downloaded, but the root changed, so get all changes 253 m.getAndApplyChangeProof(ctx, work) 254 } 255 } 256 257 // Fetch and apply the change proof given by [work]. 258 // Assumes [m.workLock] is not held. 259 func (m *Manager) getAndApplyChangeProof(ctx context.Context, work *workItem) { 260 targetRootID := m.getTargetRoot() 261 262 if work.localRootID == targetRootID { 263 // Start root is the same as the end root, so we're done. 264 m.completeWorkItem(ctx, work, work.end, targetRootID, nil) 265 return 266 } 267 268 if targetRootID == ids.Empty { 269 // The trie is empty after this change. 270 // Delete all the key-value pairs in the range. 271 if err := m.config.DB.Clear(); err != nil { 272 m.setError(err) 273 return 274 } 275 work.start = maybe.Nothing[[]byte]() 276 m.completeWorkItem(ctx, work, maybe.Nothing[[]byte](), targetRootID, nil) 277 return 278 } 279 280 changeOrRangeProof, err := m.config.Client.GetChangeProof( 281 ctx, 282 &pb.SyncGetChangeProofRequest{ 283 StartRootHash: work.localRootID[:], 284 EndRootHash: targetRootID[:], 285 StartKey: &pb.MaybeBytes{ 286 Value: work.start.Value(), 287 IsNothing: work.start.IsNothing(), 288 }, 289 EndKey: &pb.MaybeBytes{ 290 Value: work.end.Value(), 291 IsNothing: work.end.IsNothing(), 292 }, 293 KeyLimit: defaultRequestKeyLimit, 294 BytesLimit: defaultRequestByteSizeLimit, 295 }, 296 m.config.DB, 297 ) 298 if err != nil { 299 m.setError(err) 300 return 301 } 302 303 select { 304 case <-m.doneChan: 305 // If we're closed, don't apply the proof. 306 return 307 default: 308 } 309 310 if changeOrRangeProof.ChangeProof != nil { 311 // The server had sufficient history to respond with a change proof. 312 changeProof := changeOrRangeProof.ChangeProof 313 largestHandledKey := work.end 314 // if the proof wasn't empty, apply changes to the sync DB 315 if len(changeProof.KeyChanges) > 0 { 316 if err := m.config.DB.CommitChangeProof(ctx, changeProof); err != nil { 317 m.setError(err) 318 return 319 } 320 largestHandledKey = maybe.Some(changeProof.KeyChanges[len(changeProof.KeyChanges)-1].Key) 321 } 322 323 m.completeWorkItem(ctx, work, largestHandledKey, targetRootID, changeProof.EndProof) 324 return 325 } 326 327 // The server responded with a range proof. 328 rangeProof := changeOrRangeProof.RangeProof 329 largestHandledKey := work.end 330 if len(rangeProof.KeyValues) > 0 { 331 // Add all the key-value pairs we got to the database. 332 if err := m.config.DB.CommitRangeProof(ctx, work.start, work.end, rangeProof); err != nil { 333 m.setError(err) 334 return 335 } 336 largestHandledKey = maybe.Some(rangeProof.KeyValues[len(rangeProof.KeyValues)-1].Key) 337 } 338 339 m.completeWorkItem(ctx, work, largestHandledKey, targetRootID, rangeProof.EndProof) 340 } 341 342 // Fetch and apply the range proof given by [work]. 343 // Assumes [m.workLock] is not held. 344 func (m *Manager) getAndApplyRangeProof(ctx context.Context, work *workItem) { 345 targetRootID := m.getTargetRoot() 346 347 if targetRootID == ids.Empty { 348 if err := m.config.DB.Clear(); err != nil { 349 m.setError(err) 350 return 351 } 352 work.start = maybe.Nothing[[]byte]() 353 m.completeWorkItem(ctx, work, maybe.Nothing[[]byte](), targetRootID, nil) 354 return 355 } 356 357 proof, err := m.config.Client.GetRangeProof(ctx, 358 &pb.SyncGetRangeProofRequest{ 359 RootHash: targetRootID[:], 360 StartKey: &pb.MaybeBytes{ 361 Value: work.start.Value(), 362 IsNothing: work.start.IsNothing(), 363 }, 364 EndKey: &pb.MaybeBytes{ 365 Value: work.end.Value(), 366 IsNothing: work.end.IsNothing(), 367 }, 368 KeyLimit: defaultRequestKeyLimit, 369 BytesLimit: defaultRequestByteSizeLimit, 370 }, 371 ) 372 if err != nil { 373 m.setError(err) 374 return 375 } 376 377 select { 378 case <-m.doneChan: 379 // If we're closed, don't apply the proof. 380 return 381 default: 382 } 383 384 largestHandledKey := work.end 385 386 // Replace all the key-value pairs in the DB from start to end with values from the response. 387 if err := m.config.DB.CommitRangeProof(ctx, work.start, work.end, proof); err != nil { 388 m.setError(err) 389 return 390 } 391 392 if len(proof.KeyValues) > 0 { 393 largestHandledKey = maybe.Some(proof.KeyValues[len(proof.KeyValues)-1].Key) 394 } 395 396 m.completeWorkItem(ctx, work, largestHandledKey, targetRootID, proof.EndProof) 397 } 398 399 // findNextKey returns the start of the key range that should be fetched next 400 // given that we just received a range/change proof that proved a range of 401 // key-value pairs ending at [lastReceivedKey]. 402 // 403 // [rangeEnd] is the end of the range that we want to fetch. 404 // 405 // Returns Nothing if there are no more keys to fetch in [lastReceivedKey, rangeEnd]. 406 // 407 // [endProof] is the end proof of the last proof received. 408 // 409 // Invariant: [lastReceivedKey] < [rangeEnd]. 410 // If [rangeEnd] is Nothing it's considered > [lastReceivedKey]. 411 func (m *Manager) findNextKey( 412 ctx context.Context, 413 lastReceivedKey []byte, 414 rangeEnd maybe.Maybe[[]byte], 415 endProof []merkledb.ProofNode, 416 ) (maybe.Maybe[[]byte], error) { 417 if len(endProof) == 0 { 418 // We try to find the next key to fetch by looking at the end proof. 419 // If the end proof is empty, we have no information to use. 420 // Start fetching from the next key after [lastReceivedKey]. 421 nextKey := lastReceivedKey 422 nextKey = append(nextKey, 0) 423 return maybe.Some(nextKey), nil 424 } 425 426 // We want the first key larger than the [lastReceivedKey]. 427 // This is done by taking two proofs for the same key 428 // (one that was just received as part of a proof, and one from the local db) 429 // and traversing them from the longest key to the shortest key. 430 // For each node in these proofs, compare if the children of that node exist 431 // or have the same ID in the other proof. 432 proofKeyPath := merkledb.ToKey(lastReceivedKey) 433 434 // If the received proof is an exclusion proof, the last node may be for a 435 // key that is after the [lastReceivedKey]. 436 // If the last received node's key is after the [lastReceivedKey], it can 437 // be removed to obtain a valid proof for a prefix of the [lastReceivedKey]. 438 if !proofKeyPath.HasPrefix(endProof[len(endProof)-1].Key) { 439 endProof = endProof[:len(endProof)-1] 440 // update the proofKeyPath to be for the prefix 441 proofKeyPath = endProof[len(endProof)-1].Key 442 } 443 444 // get a proof for the same key as the received proof from the local db 445 localProofOfKey, err := m.config.DB.GetProof(ctx, proofKeyPath.Bytes()) 446 if err != nil { 447 return maybe.Nothing[[]byte](), err 448 } 449 localProofNodes := localProofOfKey.Path 450 451 // The local proof may also be an exclusion proof with an extra node. 452 // Remove this extra node if it exists to get a proof of the same key as the received proof 453 if !proofKeyPath.HasPrefix(localProofNodes[len(localProofNodes)-1].Key) { 454 localProofNodes = localProofNodes[:len(localProofNodes)-1] 455 } 456 457 nextKey := maybe.Nothing[[]byte]() 458 459 // Add sentinel node back into the localProofNodes, if it is missing. 460 // Required to ensure that a common node exists in both proofs 461 if len(localProofNodes) > 0 && localProofNodes[0].Key.Length() != 0 { 462 sentinel := merkledb.ProofNode{ 463 Children: map[byte]ids.ID{ 464 localProofNodes[0].Key.Token(0, m.tokenSize): ids.Empty, 465 }, 466 } 467 localProofNodes = append([]merkledb.ProofNode{sentinel}, localProofNodes...) 468 } 469 470 // Add sentinel node back into the endProof, if it is missing. 471 // Required to ensure that a common node exists in both proofs 472 if len(endProof) > 0 && endProof[0].Key.Length() != 0 { 473 sentinel := merkledb.ProofNode{ 474 Children: map[byte]ids.ID{ 475 endProof[0].Key.Token(0, m.tokenSize): ids.Empty, 476 }, 477 } 478 endProof = append([]merkledb.ProofNode{sentinel}, endProof...) 479 } 480 481 localProofNodeIndex := len(localProofNodes) - 1 482 receivedProofNodeIndex := len(endProof) - 1 483 484 // traverse the two proofs from the deepest nodes up to the sentinel node until a difference is found 485 for localProofNodeIndex >= 0 && receivedProofNodeIndex >= 0 && nextKey.IsNothing() { 486 localProofNode := localProofNodes[localProofNodeIndex] 487 receivedProofNode := endProof[receivedProofNodeIndex] 488 489 // [deepestNode] is the proof node with the longest key (deepest in the trie) in the 490 // two proofs that hasn't been handled yet. 491 // [deepestNodeFromOtherProof] is the proof node from the other proof with 492 // the same key/depth if it exists, nil otherwise. 493 var deepestNode, deepestNodeFromOtherProof *merkledb.ProofNode 494 495 // select the deepest proof node from the two proofs 496 switch { 497 case receivedProofNode.Key.Length() > localProofNode.Key.Length(): 498 // there was a branch node in the received proof that isn't in the local proof 499 // see if the received proof node has children not present in the local proof 500 deepestNode = &receivedProofNode 501 502 // we have dealt with this received node, so move on to the next received node 503 receivedProofNodeIndex-- 504 505 case localProofNode.Key.Length() > receivedProofNode.Key.Length(): 506 // there was a branch node in the local proof that isn't in the received proof 507 // see if the local proof node has children not present in the received proof 508 deepestNode = &localProofNode 509 510 // we have dealt with this local node, so move on to the next local node 511 localProofNodeIndex-- 512 513 default: 514 // the two nodes are at the same depth 515 // see if any of the children present in the local proof node are different 516 // from the children in the received proof node 517 deepestNode = &localProofNode 518 deepestNodeFromOtherProof = &receivedProofNode 519 520 // we have dealt with this local node and received node, so move on to the next nodes 521 localProofNodeIndex-- 522 receivedProofNodeIndex-- 523 } 524 525 // We only want to look at the children with keys greater than the proofKey. 526 // The proof key has the deepest node's key as a prefix, 527 // so only the next token of the proof key needs to be considered. 528 529 // If the deepest node has the same key as [proofKeyPath], 530 // then all of its children have keys greater than the proof key, 531 // so we can start at the 0 token. 532 startingChildToken := 0 533 534 // If the deepest node has a key shorter than the key being proven, 535 // we can look at the next token index of the proof key to determine which of that 536 // node's children have keys larger than [proofKeyPath]. 537 // Any child with a token greater than the [proofKeyPath]'s token at that 538 // index will have a larger key. 539 if deepestNode.Key.Length() < proofKeyPath.Length() { 540 startingChildToken = int(proofKeyPath.Token(deepestNode.Key.Length(), m.tokenSize)) + 1 541 } 542 543 // determine if there are any differences in the children for the deepest unhandled node of the two proofs 544 if childIndex, hasDifference := findChildDifference(deepestNode, deepestNodeFromOtherProof, startingChildToken); hasDifference { 545 nextKey = maybe.Some(deepestNode.Key.Extend(merkledb.ToToken(childIndex, m.tokenSize)).Bytes()) 546 break 547 } 548 } 549 550 // If the nextKey is before or equal to the [lastReceivedKey] 551 // then we couldn't find a better answer than the [lastReceivedKey]. 552 // Set the nextKey to [lastReceivedKey] + 0, which is the first key in 553 // the open range (lastReceivedKey, rangeEnd). 554 if nextKey.HasValue() && bytes.Compare(nextKey.Value(), lastReceivedKey) <= 0 { 555 nextKeyVal := slices.Clone(lastReceivedKey) 556 nextKeyVal = append(nextKeyVal, 0) 557 nextKey = maybe.Some(nextKeyVal) 558 } 559 560 // If the [nextKey] is larger than the end of the range, return Nothing to signal that there is no next key in range 561 if rangeEnd.HasValue() && bytes.Compare(nextKey.Value(), rangeEnd.Value()) >= 0 { 562 return maybe.Nothing[[]byte](), nil 563 } 564 565 // the nextKey is within the open range (lastReceivedKey, rangeEnd), so return it 566 return nextKey, nil 567 } 568 569 func (m *Manager) Error() error { 570 m.errLock.Lock() 571 defer m.errLock.Unlock() 572 573 return m.fatalError 574 } 575 576 // Wait blocks until one of the following occurs: 577 // - sync is complete. 578 // - sync fatally errored. 579 // - [ctx] is canceled. 580 // If [ctx] is canceled, returns [ctx].Err(). 581 func (m *Manager) Wait(ctx context.Context) error { 582 select { 583 case <-m.doneChan: 584 case <-ctx.Done(): 585 return ctx.Err() 586 } 587 588 // There was a fatal error. 589 if err := m.Error(); err != nil { 590 return err 591 } 592 593 root, err := m.config.DB.GetMerkleRoot(ctx) 594 if err != nil { 595 return err 596 } 597 598 if targetRootID := m.getTargetRoot(); targetRootID != root { 599 // This should never happen. 600 return fmt.Errorf("%w: expected %s, got %s", ErrFinishedWithUnexpectedRoot, targetRootID, root) 601 } 602 603 m.config.Log.Info("completed", zap.Stringer("root", root)) 604 return nil 605 } 606 607 func (m *Manager) UpdateSyncTarget(syncTargetRoot ids.ID) error { 608 m.syncTargetLock.Lock() 609 defer m.syncTargetLock.Unlock() 610 611 m.workLock.Lock() 612 defer m.workLock.Unlock() 613 614 select { 615 case <-m.doneChan: 616 return ErrAlreadyClosed 617 default: 618 } 619 620 if m.config.TargetRoot == syncTargetRoot { 621 // the target hasn't changed, so there is nothing to do 622 return nil 623 } 624 625 m.config.Log.Debug("updated sync target", zap.Stringer("target", syncTargetRoot)) 626 m.config.TargetRoot = syncTargetRoot 627 628 // move all completed ranges into the work heap with high priority 629 shouldSignal := m.processedWork.Len() > 0 630 for m.processedWork.Len() > 0 { 631 // Note that [m.processedWork].Close() hasn't 632 // been called because we have [m.workLock] 633 // and we checked that [m.closed] is false. 634 currentItem := m.processedWork.GetWork() 635 currentItem.priority = highPriority 636 m.unprocessedWork.Insert(currentItem) 637 } 638 if shouldSignal { 639 // Only signal once because we only have 1 goroutine 640 // waiting on [m.unprocessedWorkCond]. 641 m.unprocessedWorkCond.Signal() 642 } 643 return nil 644 } 645 646 func (m *Manager) getTargetRoot() ids.ID { 647 m.syncTargetLock.RLock() 648 defer m.syncTargetLock.RUnlock() 649 650 return m.config.TargetRoot 651 } 652 653 // Record that there was a fatal error and begin shutting down. 654 func (m *Manager) setError(err error) { 655 m.errLock.Lock() 656 defer m.errLock.Unlock() 657 658 m.config.Log.Error("sync errored", zap.Error(err)) 659 m.fatalError = err 660 // Call in goroutine because we might be holding [m.workLock] 661 // which [m.Close] will try to acquire. 662 go m.Close() 663 } 664 665 // Mark that we've fetched all the key-value pairs in the range 666 // [workItem.start, largestHandledKey] for the trie with root [rootID]. 667 // 668 // If [workItem.start] is Nothing, then we've fetched all the key-value 669 // pairs up to and including [largestHandledKey]. 670 // 671 // If [largestHandledKey] is Nothing, then we've fetched all the key-value 672 // pairs at and after [workItem.start]. 673 // 674 // [proofOfLargestKey] is the end proof for the range/change proof 675 // that gave us the range up to and including [largestHandledKey]. 676 // 677 // Assumes [m.workLock] is not held. 678 func (m *Manager) completeWorkItem(ctx context.Context, work *workItem, largestHandledKey maybe.Maybe[[]byte], rootID ids.ID, proofOfLargestKey []merkledb.ProofNode) { 679 if !maybe.Equal(largestHandledKey, work.end, bytes.Equal) { 680 // The largest handled key isn't equal to the end of the work item. 681 // Find the start of the next key range to fetch. 682 // Note that [largestHandledKey] can't be Nothing. 683 // Proof: Suppose it is. That means that we got a range/change proof that proved up to the 684 // greatest key-value pair in the database. That means we requested a proof with no upper 685 // bound. That is, [workItem.end] is Nothing. Since we're here, [bothNothing] is false, 686 // which means [workItem.end] isn't Nothing. Contradiction. 687 nextStartKey, err := m.findNextKey(ctx, largestHandledKey.Value(), work.end, proofOfLargestKey) 688 if err != nil { 689 m.setError(err) 690 return 691 } 692 693 // nextStartKey being Nothing indicates that the entire range has been completed 694 if nextStartKey.IsNothing() { 695 largestHandledKey = work.end 696 } else { 697 // the full range wasn't completed, so enqueue a new work item for the range [nextStartKey, workItem.end] 698 m.enqueueWork(newWorkItem(work.localRootID, nextStartKey, work.end, work.priority)) 699 largestHandledKey = nextStartKey 700 } 701 } 702 703 // Process [work] while holding [syncTargetLock] to ensure that object 704 // is added to the right queue, even if a target update is triggered 705 m.syncTargetLock.RLock() 706 defer m.syncTargetLock.RUnlock() 707 708 stale := m.config.TargetRoot != rootID 709 if stale { 710 // the root has changed, so reinsert with high priority 711 m.enqueueWork(newWorkItem(rootID, work.start, largestHandledKey, highPriority)) 712 } else { 713 m.workLock.Lock() 714 defer m.workLock.Unlock() 715 716 m.processedWork.MergeInsert(newWorkItem(rootID, work.start, largestHandledKey, work.priority)) 717 } 718 719 // completed the range [work.start, lastKey], log and record in the completed work heap 720 m.config.Log.Debug("completed range", 721 zap.Stringer("start", work.start), 722 zap.Stringer("end", largestHandledKey), 723 zap.Stringer("rootID", rootID), 724 zap.Bool("stale", stale), 725 ) 726 } 727 728 // Queue the given key range to be fetched and applied. 729 // If there are sufficiently few unprocessed/processing work items, 730 // splits the range into two items and queues them both. 731 // Assumes [m.workLock] is not held. 732 func (m *Manager) enqueueWork(work *workItem) { 733 m.workLock.Lock() 734 defer func() { 735 m.workLock.Unlock() 736 m.unprocessedWorkCond.Signal() 737 }() 738 739 if m.processingWorkItems+m.unprocessedWork.Len() > 2*m.config.SimultaneousWorkLimit { 740 // There are too many work items already, don't split the range 741 m.unprocessedWork.Insert(work) 742 return 743 } 744 745 // Split the remaining range into to 2. 746 // Find the middle point. 747 mid := midPoint(work.start, work.end) 748 749 if maybe.Equal(work.start, mid, bytes.Equal) || maybe.Equal(mid, work.end, bytes.Equal) { 750 // The range is too small to split. 751 // If we didn't have this check we would add work items 752 // [start, start] and [start, end]. Since start <= end, this would 753 // violate the invariant of [m.unprocessedWork] and [m.processedWork] 754 // that there are no overlapping ranges. 755 m.unprocessedWork.Insert(work) 756 return 757 } 758 759 // first item gets higher priority than the second to encourage finished ranges to grow 760 // rather than start a new range that is not contiguous with existing completed ranges 761 first := newWorkItem(work.localRootID, work.start, mid, medPriority) 762 second := newWorkItem(work.localRootID, mid, work.end, lowPriority) 763 764 m.unprocessedWork.Insert(first) 765 m.unprocessedWork.Insert(second) 766 } 767 768 // find the midpoint between two keys 769 // start is expected to be less than end 770 // Nothing/nil [start] is treated as all 0's 771 // Nothing/nil [end] is treated as all 255's 772 func midPoint(startMaybe, endMaybe maybe.Maybe[[]byte]) maybe.Maybe[[]byte] { 773 start := startMaybe.Value() 774 end := endMaybe.Value() 775 length := len(start) 776 if len(end) > length { 777 length = len(end) 778 } 779 780 if length == 0 { 781 if endMaybe.IsNothing() { 782 return maybe.Some([]byte{127}) 783 } else if len(end) == 0 { 784 return maybe.Nothing[[]byte]() 785 } 786 } 787 788 // This check deals with cases where the end has a 255(or is nothing which is treated as all 255s) and the start key ends 255. 789 // For example, midPoint([255], nothing) should be [255, 127], not [255]. 790 // The result needs the extra byte added on to the end to deal with the fact that the naive midpoint between 255 and 255 would be 255 791 if (len(start) > 0 && start[len(start)-1] == 255) && (len(end) == 0 || end[len(end)-1] == 255) { 792 length++ 793 } 794 795 leftover := 0 796 midpoint := make([]byte, length+1) 797 for i := 0; i < length; i++ { 798 startVal := 0 799 if i < len(start) { 800 startVal = int(start[i]) 801 } 802 803 endVal := 0 804 if endMaybe.IsNothing() { 805 endVal = 255 806 } 807 if i < len(end) { 808 endVal = int(end[i]) 809 } 810 811 total := startVal + endVal + leftover 812 leftover = 0 813 // if total is odd, when we divide, we will lose the .5, 814 // record that in the leftover for the next digits 815 if total%2 == 1 { 816 leftover = 256 817 } 818 819 // find the midpoint between the start and the end 820 total /= 2 821 822 // larger than byte can hold, so carry over to previous byte 823 if total >= 256 { 824 total -= 256 825 index := i - 1 826 for index > 0 && midpoint[index] == 255 { 827 midpoint[index] = 0 828 index-- 829 } 830 midpoint[index]++ 831 } 832 midpoint[i] = byte(total) 833 } 834 if leftover > 0 { 835 midpoint[length] = 127 836 } else { 837 midpoint = midpoint[0:length] 838 } 839 return maybe.Some(midpoint) 840 } 841 842 // findChildDifference returns the first child index that is different between node 1 and node 2 if one exists and 843 // a bool indicating if any difference was found 844 func findChildDifference(node1, node2 *merkledb.ProofNode, startIndex int) (byte, bool) { 845 // Children indices >= [startIndex] present in at least one of the nodes. 846 childIndices := set.Set[byte]{} 847 for _, node := range []*merkledb.ProofNode{node1, node2} { 848 if node == nil { 849 continue 850 } 851 for key := range node.Children { 852 if int(key) >= startIndex { 853 childIndices.Add(key) 854 } 855 } 856 } 857 858 sortedChildIndices := maps.Keys(childIndices) 859 slices.Sort(sortedChildIndices) 860 var ( 861 child1, child2 ids.ID 862 ok1, ok2 bool 863 ) 864 for _, childIndex := range sortedChildIndices { 865 if node1 != nil { 866 child1, ok1 = node1.Children[childIndex] 867 } 868 if node2 != nil { 869 child2, ok2 = node2.Children[childIndex] 870 } 871 // if one node has a child and the other doesn't or the children ids don't match, 872 // return the current child index as the first difference 873 if (ok1 || ok2) && child1 != child2 { 874 return childIndex, true 875 } 876 } 877 // there were no differences found 878 return 0, false 879 }