github.com/MetalBlockchain/metalgo@v1.11.9/snow/networking/router/chain_router.go (about) 1 // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package router 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "strings" 11 "sync" 12 "time" 13 14 "github.com/prometheus/client_golang/prometheus" 15 "go.uber.org/zap" 16 17 "github.com/MetalBlockchain/metalgo/ids" 18 "github.com/MetalBlockchain/metalgo/message" 19 "github.com/MetalBlockchain/metalgo/proto/pb/p2p" 20 "github.com/MetalBlockchain/metalgo/snow/networking/benchlist" 21 "github.com/MetalBlockchain/metalgo/snow/networking/handler" 22 "github.com/MetalBlockchain/metalgo/snow/networking/timeout" 23 "github.com/MetalBlockchain/metalgo/utils/constants" 24 "github.com/MetalBlockchain/metalgo/utils/linked" 25 "github.com/MetalBlockchain/metalgo/utils/logging" 26 "github.com/MetalBlockchain/metalgo/utils/set" 27 "github.com/MetalBlockchain/metalgo/utils/timer/mockable" 28 "github.com/MetalBlockchain/metalgo/version" 29 ) 30 31 var ( 32 errUnknownChain = errors.New("received message for unknown chain") 33 errUnallowedNode = errors.New("received message from non-allowed node") 34 errClosing = errors.New("router is closing") 35 36 _ Router = (*ChainRouter)(nil) 37 _ benchlist.Benchable = (*ChainRouter)(nil) 38 ) 39 40 type requestEntry struct { 41 // When this request was registered 42 time time.Time 43 // The type of request that was made 44 op message.Op 45 // The engine type of the request that was made 46 engineType p2p.EngineType 47 } 48 49 type peer struct { 50 version *version.Application 51 // The subnets that this peer is currently tracking 52 trackedSubnets set.Set[ids.ID] 53 // The subnets that this peer actually has a connection to. 54 // This is a subset of trackedSubnets. 55 connectedSubnets set.Set[ids.ID] 56 } 57 58 // ChainRouter routes incoming messages from the validator network 59 // to the consensus engines that the messages are intended for. 60 // Note that consensus engines are uniquely identified by the ID of the chain 61 // that they are working on. 62 // Invariant: P-chain must be registered before processing any messages 63 type ChainRouter struct { 64 clock mockable.Clock 65 log logging.Logger 66 lock sync.Mutex 67 closing bool 68 chainHandlers map[ids.ID]handler.Handler 69 70 // It is only safe to call [RegisterResponse] with the router lock held. Any 71 // other calls to the timeout manager with the router lock held could cause 72 // a deadlock because the timeout manager will call Benched and Unbenched. 73 timeoutManager timeout.Manager 74 75 closeTimeout time.Duration 76 myNodeID ids.NodeID 77 peers map[ids.NodeID]*peer 78 // node ID --> chains that node is benched on 79 // invariant: if a node is benched on any chain, it is treated as disconnected on all chains 80 benched map[ids.NodeID]set.Set[ids.ID] 81 criticalChains set.Set[ids.ID] 82 sybilProtectionEnabled bool 83 onFatal func(exitCode int) 84 metrics *routerMetrics 85 // Parameters for doing health checks 86 healthConfig HealthConfig 87 // aggregator of requests based on their time 88 timedRequests *linked.Hashmap[ids.RequestID, requestEntry] 89 } 90 91 // Initialize the router. 92 // 93 // When this router receives an incoming message, it cancels the timeout in 94 // [timeouts] associated with the request that caused the incoming message, if 95 // applicable. 96 func (cr *ChainRouter) Initialize( 97 nodeID ids.NodeID, 98 log logging.Logger, 99 timeoutManager timeout.Manager, 100 closeTimeout time.Duration, 101 criticalChains set.Set[ids.ID], 102 sybilProtectionEnabled bool, 103 trackedSubnets set.Set[ids.ID], 104 onFatal func(exitCode int), 105 healthConfig HealthConfig, 106 reg prometheus.Registerer, 107 ) error { 108 cr.log = log 109 cr.chainHandlers = make(map[ids.ID]handler.Handler) 110 cr.timeoutManager = timeoutManager 111 cr.closeTimeout = closeTimeout 112 cr.benched = make(map[ids.NodeID]set.Set[ids.ID]) 113 cr.criticalChains = criticalChains 114 cr.sybilProtectionEnabled = sybilProtectionEnabled 115 cr.onFatal = onFatal 116 cr.timedRequests = linked.NewHashmap[ids.RequestID, requestEntry]() 117 cr.peers = make(map[ids.NodeID]*peer) 118 cr.healthConfig = healthConfig 119 120 // Mark myself as connected 121 cr.myNodeID = nodeID 122 myself := &peer{ 123 version: version.CurrentApp, 124 } 125 myself.trackedSubnets.Union(trackedSubnets) 126 myself.trackedSubnets.Add(constants.PrimaryNetworkID) 127 cr.peers[nodeID] = myself 128 129 // Register metrics 130 rMetrics, err := newRouterMetrics(reg) 131 if err != nil { 132 return err 133 } 134 cr.metrics = rMetrics 135 return nil 136 } 137 138 // RegisterRequest marks that we should expect to receive a reply for a request 139 // issued by [requestingChainID] from the given node's [respondingChainID] and 140 // the reply should have the given requestID. 141 // 142 // The type of message we expect is [op]. 143 // 144 // Every registered request must be cleared either by receiving a valid reply 145 // and passing it to the appropriate chain or by a timeout. 146 // This method registers a timeout that calls such methods if we don't get a 147 // reply in time. 148 func (cr *ChainRouter) RegisterRequest( 149 ctx context.Context, 150 nodeID ids.NodeID, 151 requestingChainID ids.ID, 152 respondingChainID ids.ID, 153 requestID uint32, 154 op message.Op, 155 timeoutMsg message.InboundMessage, 156 engineType p2p.EngineType, 157 ) { 158 cr.lock.Lock() 159 if cr.closing { 160 cr.log.Debug("dropping request", 161 zap.Stringer("nodeID", nodeID), 162 zap.Stringer("requestingChainID", requestingChainID), 163 zap.Stringer("respondingChainID", respondingChainID), 164 zap.Uint32("requestID", requestID), 165 zap.Stringer("messageOp", op), 166 zap.Error(errClosing), 167 ) 168 cr.lock.Unlock() 169 return 170 } 171 // When we receive a response message type (Chits, Put, Accepted, etc.) 172 // we validate that we actually sent the corresponding request. 173 // Give this request a unique ID so we can do that validation. 174 // 175 // For cross-chain messages, the responding chain is the source of the 176 // response which is sent to the requester which is the destination, 177 // which is why we flip the two in request id generation. 178 uniqueRequestID := ids.RequestID{ 179 NodeID: nodeID, 180 SourceChainID: respondingChainID, 181 DestinationChainID: requestingChainID, 182 RequestID: requestID, 183 Op: byte(op), 184 } 185 // Add to the set of unfulfilled requests 186 cr.timedRequests.Put(uniqueRequestID, requestEntry{ 187 time: cr.clock.Time(), 188 op: op, 189 engineType: engineType, 190 }) 191 cr.metrics.outstandingRequests.Set(float64(cr.timedRequests.Len())) 192 cr.lock.Unlock() 193 194 // Determine whether we should include the latency of this request in our 195 // measurements. 196 // - Don't measure messages from ourself since these don't go over the 197 // network. 198 // - Don't measure Puts because an adversary can cause us to issue a Get 199 // request to them and not respond, causing a timeout, skewing latency 200 // measurements. 201 shouldMeasureLatency := nodeID != cr.myNodeID && op != message.PutOp 202 203 // Register a timeout to fire if we don't get a reply in time. 204 cr.timeoutManager.RegisterRequest( 205 nodeID, 206 respondingChainID, 207 shouldMeasureLatency, 208 uniqueRequestID, 209 func() { 210 cr.HandleInbound(ctx, timeoutMsg) 211 }, 212 ) 213 } 214 215 func (cr *ChainRouter) HandleInbound(ctx context.Context, msg message.InboundMessage) { 216 nodeID := msg.NodeID() 217 op := msg.Op() 218 219 m := msg.Message() 220 destinationChainID, err := message.GetChainID(m) 221 if err != nil { 222 cr.log.Debug("dropping message with invalid field", 223 zap.Stringer("nodeID", nodeID), 224 zap.Stringer("messageOp", op), 225 zap.String("field", "ChainID"), 226 zap.Error(err), 227 ) 228 229 msg.OnFinishedHandling() 230 return 231 } 232 233 sourceChainID, err := message.GetSourceChainID(m) 234 if err != nil { 235 cr.log.Debug("dropping message with invalid field", 236 zap.Stringer("nodeID", nodeID), 237 zap.Stringer("messageOp", op), 238 zap.String("field", "SourceChainID"), 239 zap.Error(err), 240 ) 241 242 msg.OnFinishedHandling() 243 return 244 } 245 246 requestID, ok := message.GetRequestID(m) 247 if !ok { 248 cr.log.Debug("dropping message with invalid field", 249 zap.Stringer("nodeID", nodeID), 250 zap.Stringer("messageOp", op), 251 zap.String("field", "RequestID"), 252 ) 253 254 msg.OnFinishedHandling() 255 return 256 } 257 258 cr.lock.Lock() 259 defer cr.lock.Unlock() 260 261 if cr.closing { 262 cr.log.Debug("dropping message", 263 zap.Stringer("messageOp", op), 264 zap.Stringer("nodeID", nodeID), 265 zap.Stringer("chainID", destinationChainID), 266 zap.Error(errClosing), 267 ) 268 msg.OnFinishedHandling() 269 return 270 } 271 272 // Get the chain, if it exists 273 chain, exists := cr.chainHandlers[destinationChainID] 274 if !exists { 275 cr.log.Debug("dropping message", 276 zap.Stringer("messageOp", op), 277 zap.Stringer("nodeID", nodeID), 278 zap.Stringer("chainID", destinationChainID), 279 zap.Error(errUnknownChain), 280 ) 281 msg.OnFinishedHandling() 282 return 283 } 284 285 if !chain.ShouldHandle(nodeID) { 286 cr.log.Debug("dropping message", 287 zap.Stringer("messageOp", op), 288 zap.Stringer("nodeID", nodeID), 289 zap.Stringer("chainID", destinationChainID), 290 zap.Error(errUnallowedNode), 291 ) 292 msg.OnFinishedHandling() 293 return 294 } 295 296 chainCtx := chain.Context() 297 if message.UnrequestedOps.Contains(op) { 298 if chainCtx.Executing.Get() { 299 cr.log.Debug("dropping message and skipping queue", 300 zap.String("reason", "the chain is currently executing"), 301 zap.Stringer("messageOp", op), 302 ) 303 cr.metrics.droppedRequests.Inc() 304 msg.OnFinishedHandling() 305 return 306 } 307 308 // Note: engineType is not guaranteed to be one of the explicitly named 309 // enum values. If it was not specified it defaults to UNSPECIFIED. 310 engineType, _ := message.GetEngineType(m) 311 chain.Push( 312 ctx, 313 handler.Message{ 314 InboundMessage: msg, 315 EngineType: engineType, 316 }, 317 ) 318 return 319 } 320 321 if expectedResponse, isFailed := message.FailedToResponseOps[op]; isFailed { 322 // Create the request ID of the request we sent that this message is in 323 // response to. 324 uniqueRequestID, req := cr.clearRequest(expectedResponse, nodeID, sourceChainID, destinationChainID, requestID) 325 if req == nil { 326 // This was a duplicated response. 327 msg.OnFinishedHandling() 328 return 329 } 330 331 // Tell the timeout manager we are no longer expecting a response 332 cr.timeoutManager.RemoveRequest(uniqueRequestID) 333 334 // Pass the failure to the chain 335 chain.Push( 336 ctx, 337 handler.Message{ 338 InboundMessage: msg, 339 EngineType: req.engineType, 340 }, 341 ) 342 return 343 } 344 345 if chainCtx.Executing.Get() { 346 cr.log.Debug("dropping message and skipping queue", 347 zap.String("reason", "the chain is currently executing"), 348 zap.Stringer("messageOp", op), 349 ) 350 cr.metrics.droppedRequests.Inc() 351 msg.OnFinishedHandling() 352 return 353 } 354 355 uniqueRequestID, req := cr.clearRequest(op, nodeID, sourceChainID, destinationChainID, requestID) 356 if req == nil { 357 // We didn't request this message. 358 msg.OnFinishedHandling() 359 return 360 } 361 362 // Calculate how long it took [nodeID] to reply 363 latency := cr.clock.Time().Sub(req.time) 364 365 // Tell the timeout manager we got a response 366 cr.timeoutManager.RegisterResponse(nodeID, destinationChainID, uniqueRequestID, req.op, latency) 367 368 // Pass the response to the chain 369 chain.Push( 370 ctx, 371 handler.Message{ 372 InboundMessage: msg, 373 EngineType: req.engineType, 374 }, 375 ) 376 } 377 378 // Shutdown shuts down this router 379 func (cr *ChainRouter) Shutdown(ctx context.Context) { 380 cr.log.Info("shutting down chain router") 381 cr.lock.Lock() 382 prevChains := cr.chainHandlers 383 cr.chainHandlers = map[ids.ID]handler.Handler{} 384 cr.closing = true 385 cr.lock.Unlock() 386 387 for _, chain := range prevChains { 388 chain.Stop(ctx) 389 } 390 391 ctx, cancel := context.WithTimeout(ctx, cr.closeTimeout) 392 defer cancel() 393 394 for _, chain := range prevChains { 395 shutdownDuration, err := chain.AwaitStopped(ctx) 396 397 chainLog := chain.Context().Log 398 if err != nil { 399 chainLog.Warn("timed out while shutting down", 400 zap.Error(err), 401 ) 402 } else { 403 chainLog.Info("chain shutdown", 404 zap.Duration("shutdownDuration", shutdownDuration), 405 ) 406 } 407 } 408 } 409 410 // AddChain registers the specified chain so that incoming 411 // messages can be routed to it 412 func (cr *ChainRouter) AddChain(ctx context.Context, chain handler.Handler) { 413 cr.lock.Lock() 414 defer cr.lock.Unlock() 415 416 chainID := chain.Context().ChainID 417 if cr.closing { 418 cr.log.Debug("dropping add chain request", 419 zap.Stringer("chainID", chainID), 420 zap.Error(errClosing), 421 ) 422 return 423 } 424 cr.log.Debug("registering chain with chain router", 425 zap.Stringer("chainID", chainID), 426 ) 427 chain.SetOnStopped(func() { 428 cr.removeChain(ctx, chainID) 429 }) 430 cr.chainHandlers[chainID] = chain 431 432 // Notify connected validators 433 subnetID := chain.Context().SubnetID 434 for validatorID, peer := range cr.peers { 435 // If this validator is benched on any chain, treat them as disconnected 436 // on all chains 437 _, benched := cr.benched[validatorID] 438 if benched { 439 continue 440 } 441 442 // If this peer isn't running this chain, then we shouldn't mark them as 443 // connected 444 if !peer.trackedSubnets.Contains(subnetID) && cr.sybilProtectionEnabled { 445 continue 446 } 447 448 msg := message.InternalConnected(validatorID, peer.version) 449 chain.Push(ctx, 450 handler.Message{ 451 InboundMessage: msg, 452 EngineType: p2p.EngineType_ENGINE_TYPE_UNSPECIFIED, 453 }, 454 ) 455 } 456 457 // When we register the P-chain, we mark ourselves as connected on all of 458 // the subnets that we have tracked. 459 if chainID != constants.PlatformChainID { 460 return 461 } 462 463 // If we have currently benched ourselves, we will mark ourselves as 464 // connected when we unbench. So skip connecting now. 465 // This is not "theoretically" possible, but keeping this here prevents us 466 // from keeping an invariant that we never bench ourselves. 467 if _, benched := cr.benched[cr.myNodeID]; benched { 468 return 469 } 470 471 myself := cr.peers[cr.myNodeID] 472 for subnetID := range myself.trackedSubnets { 473 cr.connectedSubnet(myself, cr.myNodeID, subnetID) 474 } 475 } 476 477 // Connected routes an incoming notification that a validator was just connected 478 func (cr *ChainRouter) Connected(nodeID ids.NodeID, nodeVersion *version.Application, subnetID ids.ID) { 479 cr.lock.Lock() 480 defer cr.lock.Unlock() 481 482 if cr.closing { 483 cr.log.Debug("dropping connected message", 484 zap.Stringer("nodeID", nodeID), 485 zap.Error(errClosing), 486 ) 487 return 488 } 489 490 connectedPeer, exists := cr.peers[nodeID] 491 if !exists { 492 connectedPeer = &peer{ 493 version: nodeVersion, 494 } 495 cr.peers[nodeID] = connectedPeer 496 } 497 connectedPeer.trackedSubnets.Add(subnetID) 498 499 // If this validator is benched on any chain, treat them as disconnected on all chains 500 if _, benched := cr.benched[nodeID]; benched { 501 return 502 } 503 504 msg := message.InternalConnected(nodeID, nodeVersion) 505 506 // TODO: fire up an event when validator state changes i.e when they leave 507 // set, disconnect. we cannot put a subnet-only validator check here since 508 // Disconnected would not be handled properly. 509 // 510 // When sybil protection is disabled, we only want this clause to happen 511 // once. Therefore, we only update the chains during the connection of the 512 // primary network, which is guaranteed to happen for every peer. 513 if cr.sybilProtectionEnabled || subnetID == constants.PrimaryNetworkID { 514 for _, chain := range cr.chainHandlers { 515 // If sybil protection is disabled, send a Connected message to 516 // every chain when connecting to the primary network. 517 if subnetID == chain.Context().SubnetID || !cr.sybilProtectionEnabled { 518 chain.Push( 519 context.TODO(), 520 handler.Message{ 521 InboundMessage: msg, 522 EngineType: p2p.EngineType_ENGINE_TYPE_UNSPECIFIED, 523 }, 524 ) 525 } 526 } 527 } 528 529 cr.connectedSubnet(connectedPeer, nodeID, subnetID) 530 } 531 532 // Disconnected routes an incoming notification that a validator was connected 533 func (cr *ChainRouter) Disconnected(nodeID ids.NodeID) { 534 cr.lock.Lock() 535 defer cr.lock.Unlock() 536 537 if cr.closing { 538 cr.log.Debug("dropping disconnected message", 539 zap.Stringer("nodeID", nodeID), 540 zap.Error(errClosing), 541 ) 542 return 543 } 544 545 peer := cr.peers[nodeID] 546 delete(cr.peers, nodeID) 547 if _, benched := cr.benched[nodeID]; benched { 548 return 549 } 550 551 msg := message.InternalDisconnected(nodeID) 552 553 // TODO: fire up an event when validator state changes i.e when they leave 554 // set, disconnect. we cannot put a subnet-only validator check here since 555 // if a validator connects then it leaves validator-set, it would not be 556 // disconnected properly. 557 for _, chain := range cr.chainHandlers { 558 if peer.trackedSubnets.Contains(chain.Context().SubnetID) || !cr.sybilProtectionEnabled { 559 chain.Push( 560 context.TODO(), 561 handler.Message{ 562 InboundMessage: msg, 563 EngineType: p2p.EngineType_ENGINE_TYPE_UNSPECIFIED, 564 }) 565 } 566 } 567 } 568 569 // Benched routes an incoming notification that a validator was benched 570 func (cr *ChainRouter) Benched(chainID ids.ID, nodeID ids.NodeID) { 571 cr.lock.Lock() 572 defer cr.lock.Unlock() 573 574 if cr.closing { 575 cr.log.Debug("dropping benched message", 576 zap.Stringer("nodeID", nodeID), 577 zap.Stringer("chainID", chainID), 578 zap.Error(errClosing), 579 ) 580 return 581 } 582 583 benchedChains, exists := cr.benched[nodeID] 584 benchedChains.Add(chainID) 585 cr.benched[nodeID] = benchedChains 586 peer, hasPeer := cr.peers[nodeID] 587 if exists || !hasPeer { 588 // If the set already existed, then the node was previously benched. 589 return 590 } 591 592 // This will disconnect the node from all subnets when issued to P-chain. 593 // Even if there is no chain in the subnet. 594 msg := message.InternalDisconnected(nodeID) 595 596 for _, chain := range cr.chainHandlers { 597 if peer.trackedSubnets.Contains(chain.Context().SubnetID) || !cr.sybilProtectionEnabled { 598 chain.Push( 599 context.TODO(), 600 handler.Message{ 601 InboundMessage: msg, 602 EngineType: p2p.EngineType_ENGINE_TYPE_UNSPECIFIED, 603 }) 604 } 605 } 606 607 peer.connectedSubnets.Clear() 608 } 609 610 // Unbenched routes an incoming notification that a validator was just unbenched 611 func (cr *ChainRouter) Unbenched(chainID ids.ID, nodeID ids.NodeID) { 612 cr.lock.Lock() 613 defer cr.lock.Unlock() 614 615 if cr.closing { 616 cr.log.Debug("dropping unbenched message", 617 zap.Stringer("nodeID", nodeID), 618 zap.Stringer("chainID", chainID), 619 zap.Error(errClosing), 620 ) 621 return 622 } 623 624 benchedChains := cr.benched[nodeID] 625 benchedChains.Remove(chainID) 626 if benchedChains.Len() != 0 { 627 cr.benched[nodeID] = benchedChains 628 return // This node is still benched 629 } 630 631 delete(cr.benched, nodeID) 632 633 peer, found := cr.peers[nodeID] 634 if !found { 635 return 636 } 637 638 msg := message.InternalConnected(nodeID, peer.version) 639 640 for _, chain := range cr.chainHandlers { 641 if peer.trackedSubnets.Contains(chain.Context().SubnetID) || !cr.sybilProtectionEnabled { 642 chain.Push( 643 context.TODO(), 644 handler.Message{ 645 InboundMessage: msg, 646 EngineType: p2p.EngineType_ENGINE_TYPE_UNSPECIFIED, 647 }) 648 } 649 } 650 651 // This will unbench the node from all its subnets. 652 // We handle this case separately because the node may have been benched on 653 // a subnet that has no chains. 654 for subnetID := range peer.trackedSubnets { 655 cr.connectedSubnet(peer, nodeID, subnetID) 656 } 657 } 658 659 // HealthCheck returns results of router health checks. Returns: 660 // 1) Information about health check results 661 // 2) An error if the health check reports unhealthy 662 func (cr *ChainRouter) HealthCheck(context.Context) (interface{}, error) { 663 cr.lock.Lock() 664 defer cr.lock.Unlock() 665 666 numOutstandingReqs := cr.timedRequests.Len() 667 isOutstandingReqs := numOutstandingReqs <= cr.healthConfig.MaxOutstandingRequests 668 healthy := isOutstandingReqs 669 details := map[string]interface{}{ 670 "outstandingRequests": numOutstandingReqs, 671 } 672 673 // check for long running requests 674 now := cr.clock.Time() 675 processingRequest := now 676 if _, longestRunning, exists := cr.timedRequests.Oldest(); exists { 677 processingRequest = longestRunning.time 678 } 679 timeReqRunning := now.Sub(processingRequest) 680 isOutstanding := timeReqRunning <= cr.healthConfig.MaxOutstandingDuration 681 healthy = healthy && isOutstanding 682 details["longestRunningRequest"] = timeReqRunning.String() 683 cr.metrics.longestRunningRequest.Set(float64(timeReqRunning)) 684 685 if !healthy { 686 var errorReasons []string 687 if !isOutstandingReqs { 688 errorReasons = append(errorReasons, fmt.Sprintf("number of outstanding requests %d > %d", numOutstandingReqs, cr.healthConfig.MaxOutstandingRequests)) 689 } 690 if !isOutstanding { 691 errorReasons = append(errorReasons, fmt.Sprintf("time for outstanding requests %s > %s", timeReqRunning, cr.healthConfig.MaxOutstandingDuration)) 692 } 693 // The router is not healthy 694 return details, fmt.Errorf("the router is not healthy reason: %s", strings.Join(errorReasons, ", ")) 695 } 696 return details, nil 697 } 698 699 // RemoveChain removes the specified chain so that incoming 700 // messages can't be routed to it 701 func (cr *ChainRouter) removeChain(ctx context.Context, chainID ids.ID) { 702 cr.lock.Lock() 703 chain, exists := cr.chainHandlers[chainID] 704 if !exists { 705 cr.log.Debug("can't remove unknown chain", 706 zap.Stringer("chainID", chainID), 707 ) 708 cr.lock.Unlock() 709 return 710 } 711 delete(cr.chainHandlers, chainID) 712 cr.lock.Unlock() 713 714 chain.Stop(ctx) 715 716 ctx, cancel := context.WithTimeout(ctx, cr.closeTimeout) 717 shutdownDuration, err := chain.AwaitStopped(ctx) 718 cancel() 719 720 chainLog := chain.Context().Log 721 if err != nil { 722 chainLog.Warn("timed out while shutting down", 723 zap.Error(err), 724 ) 725 } else { 726 chainLog.Info("chain shutdown", 727 zap.Duration("shutdownDuration", shutdownDuration), 728 ) 729 } 730 731 if cr.onFatal != nil && cr.criticalChains.Contains(chainID) { 732 go cr.onFatal(1) 733 } 734 } 735 736 func (cr *ChainRouter) clearRequest( 737 op message.Op, 738 nodeID ids.NodeID, 739 sourceChainID ids.ID, 740 destinationChainID ids.ID, 741 requestID uint32, 742 ) (ids.RequestID, *requestEntry) { 743 // Create the request ID of the request we sent that this message is (allegedly) in response to. 744 uniqueRequestID := ids.RequestID{ 745 NodeID: nodeID, 746 SourceChainID: sourceChainID, 747 DestinationChainID: destinationChainID, 748 RequestID: requestID, 749 Op: byte(op), 750 } 751 // Mark that an outstanding request has been fulfilled 752 request, exists := cr.timedRequests.Get(uniqueRequestID) 753 if !exists { 754 return uniqueRequestID, nil 755 } 756 757 cr.timedRequests.Delete(uniqueRequestID) 758 cr.metrics.outstandingRequests.Set(float64(cr.timedRequests.Len())) 759 return uniqueRequestID, &request 760 } 761 762 // connectedSubnet pushes an InternalSubnetConnected message with [nodeID] and 763 // [subnetID] to the P-chain. This should be called when a node is either first 764 // connecting to [subnetID] or when a node that was already connected is 765 // unbenched on [subnetID]. This is a noop if [subnetID] is the Primary Network 766 // or if the peer is already marked as connected to the subnet. 767 // Invariant: should be called after *message.Connected is pushed to the P-chain 768 // Invariant: should be called after the P-chain was provided in [AddChain] 769 func (cr *ChainRouter) connectedSubnet(peer *peer, nodeID ids.NodeID, subnetID ids.ID) { 770 // if connected to primary network, we can skip this 771 // because Connected has its own internal message 772 if subnetID == constants.PrimaryNetworkID { 773 return 774 } 775 776 // peer already connected to this subnet 777 if peer.connectedSubnets.Contains(subnetID) { 778 return 779 } 780 781 msg := message.InternalConnectedSubnet(nodeID, subnetID) 782 // We only push this message to the P-chain because it is the only chain 783 // that cares about the connectivity of all subnets. Others chains learn 784 // about the connectivity of their own subnet when they receive a 785 // *message.Connected. 786 platformChain, ok := cr.chainHandlers[constants.PlatformChainID] 787 if !ok { 788 cr.log.Error("trying to issue InternalConnectedSubnet message, but platform chain is not registered", 789 zap.Stringer("nodeID", nodeID), 790 zap.Stringer("subnetID", subnetID), 791 ) 792 return 793 } 794 platformChain.Push( 795 context.TODO(), 796 handler.Message{ 797 InboundMessage: msg, 798 EngineType: p2p.EngineType_ENGINE_TYPE_UNSPECIFIED, 799 }, 800 ) 801 802 peer.connectedSubnets.Add(subnetID) 803 }