github.com/MetalBlockchain/metalgo@v1.11.9/network/peer/peer.go (about) 1 // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package peer 5 6 import ( 7 "bufio" 8 "context" 9 "errors" 10 "io" 11 "math" 12 "net" 13 "net/netip" 14 "sync" 15 "sync/atomic" 16 "time" 17 18 "go.uber.org/zap" 19 20 "github.com/MetalBlockchain/metalgo/ids" 21 "github.com/MetalBlockchain/metalgo/message" 22 "github.com/MetalBlockchain/metalgo/proto/pb/p2p" 23 "github.com/MetalBlockchain/metalgo/staking" 24 "github.com/MetalBlockchain/metalgo/utils" 25 "github.com/MetalBlockchain/metalgo/utils/bloom" 26 "github.com/MetalBlockchain/metalgo/utils/constants" 27 "github.com/MetalBlockchain/metalgo/utils/crypto/bls" 28 "github.com/MetalBlockchain/metalgo/utils/ips" 29 "github.com/MetalBlockchain/metalgo/utils/json" 30 "github.com/MetalBlockchain/metalgo/utils/set" 31 "github.com/MetalBlockchain/metalgo/utils/wrappers" 32 "github.com/MetalBlockchain/metalgo/version" 33 ) 34 35 const ( 36 // maxBloomSaltLen restricts the allowed size of the bloom salt to prevent 37 // excessively expensive bloom filter contains checks. 38 maxBloomSaltLen = 32 39 // maxNumTrackedSubnets limits how many subnets a peer can track to prevent 40 // excessive memory usage. 41 maxNumTrackedSubnets = 16 42 43 disconnectingLog = "disconnecting from peer" 44 failedToCreateMessageLog = "failed to create message" 45 failedToSetDeadlineLog = "failed to set connection deadline" 46 failedToGetUptimeLog = "failed to get peer uptime percentage" 47 malformedMessageLog = "malformed message" 48 ) 49 50 var ( 51 errClosed = errors.New("closed") 52 53 _ Peer = (*peer)(nil) 54 ) 55 56 // Peer encapsulates all of the functionality required to send and receive 57 // messages with a remote peer. 58 type Peer interface { 59 // ID returns the nodeID of the remote peer. 60 ID() ids.NodeID 61 62 // Cert returns the certificate that the remote peer is using to 63 // authenticate their messages. 64 Cert() *staking.Certificate 65 66 // LastSent returns the last time a message was sent to the peer. 67 LastSent() time.Time 68 69 // LastReceived returns the last time a message was received from the peer. 70 LastReceived() time.Time 71 72 // Ready returns true if the peer has finished the p2p handshake and is 73 // ready to send and receive messages. 74 Ready() bool 75 76 // AwaitReady will block until the peer has finished the p2p handshake. If 77 // the context is cancelled or the peer starts closing, then an error will 78 // be returned. 79 AwaitReady(ctx context.Context) error 80 81 // Info returns a description of the state of this peer. It should only be 82 // called after [Ready] returns true. 83 Info() Info 84 85 // IP returns the claimed IP and signature provided by this peer during the 86 // handshake. It should only be called after [Ready] returns true. 87 IP() *SignedIP 88 89 // Version returns the claimed node version this peer is running. It should 90 // only be called after [Ready] returns true. 91 Version() *version.Application 92 93 // TrackedSubnets returns the subnets this peer is running. It should only 94 // be called after [Ready] returns true. 95 TrackedSubnets() set.Set[ids.ID] 96 97 // ObservedUptime returns the local node's subnet uptime according to the 98 // peer. The value ranges from [0, 100]. It should only be called after 99 // [Ready] returns true. 100 ObservedUptime(subnetID ids.ID) (uint32, bool) 101 102 // Send attempts to send [msg] to the peer. The peer takes ownership of 103 // [msg] for reference counting. This returns false if the message is 104 // guaranteed not to be delivered to the peer. 105 Send(ctx context.Context, msg message.OutboundMessage) bool 106 107 // StartSendGetPeerList attempts to send a GetPeerList message to this peer 108 // on this peer's gossip routine. It is not guaranteed that a GetPeerList 109 // will be sent. 110 StartSendGetPeerList() 111 112 // StartClose will begin shutting down the peer. It will not block. 113 StartClose() 114 115 // Closed returns true once the peer has been fully shutdown. It is 116 // guaranteed that no more messages will be received by this peer once this 117 // returns true. 118 Closed() bool 119 120 // AwaitClosed will block until the peer has been fully shutdown. If the 121 // context is cancelled, then an error will be returned. 122 AwaitClosed(ctx context.Context) error 123 } 124 125 type peer struct { 126 *Config 127 128 // the connection object that is used to read/write messages from 129 conn net.Conn 130 131 // [cert] is this peer's certificate, specifically the leaf of the 132 // certificate chain they provided. 133 cert *staking.Certificate 134 135 // node ID of this peer. 136 id ids.NodeID 137 138 // queue of messages to send to this peer. 139 messageQueue MessageQueue 140 141 // ip is the claimed IP the peer gave us in the Handshake message. 142 ip *SignedIP 143 // version is the claimed version the peer is running that we received in 144 // the Handshake message. 145 version *version.Application 146 // trackedSubnets are the subnetIDs the peer sent us in the Handshake 147 // message. The primary network ID is always included. 148 trackedSubnets set.Set[ids.ID] 149 // options of ACPs provided in the Handshake message. 150 supportedACPs set.Set[uint32] 151 objectedACPs set.Set[uint32] 152 153 // txIDOfVerifiedBLSKey is the txID that added the BLS key that was most 154 // recently verified to have signed the IP. 155 // 156 // Invariant: Prior to the handshake being completed, this can only be 157 // accessed by the reader goroutine. After the handshake has been completed, 158 // this can only be accessed by the message sender goroutine. 159 txIDOfVerifiedBLSKey ids.ID 160 161 observedUptimesLock sync.RWMutex 162 // [observedUptimesLock] must be held while accessing [observedUptime] 163 // Subnet ID --> Our uptime for the given subnet as perceived by the peer 164 observedUptimes map[ids.ID]uint32 165 166 // True if this peer has sent us a valid Handshake message and 167 // is running a compatible version. 168 // Only modified on the connection's reader routine. 169 gotHandshake utils.Atomic[bool] 170 171 // True if the peer: 172 // * Has sent us a Handshake message 173 // * Has sent us a PeerList message 174 // * Is running a compatible version 175 // Only modified on the connection's reader routine. 176 finishedHandshake utils.Atomic[bool] 177 178 // onFinishHandshake is closed when the peer finishes the p2p handshake. 179 onFinishHandshake chan struct{} 180 181 // numExecuting is the number of goroutines this peer is currently using 182 numExecuting int64 183 startClosingOnce sync.Once 184 // onClosingCtx is canceled when the peer starts closing 185 onClosingCtx context.Context 186 // onClosingCtxCancel cancels onClosingCtx 187 onClosingCtxCancel func() 188 189 // onClosed is closed when the peer is closed 190 onClosed chan struct{} 191 192 // Unix time of the last message sent and received respectively 193 // Must only be accessed atomically 194 lastSent, lastReceived int64 195 196 // getPeerListChan signals that we should attempt to send a GetPeerList to 197 // this peer 198 getPeerListChan chan struct{} 199 } 200 201 // Start a new peer instance. 202 // 203 // Invariant: There must only be one peer running at a time with a reference to 204 // the same [config.InboundMsgThrottler]. 205 func Start( 206 config *Config, 207 conn net.Conn, 208 cert *staking.Certificate, 209 id ids.NodeID, 210 messageQueue MessageQueue, 211 ) Peer { 212 onClosingCtx, onClosingCtxCancel := context.WithCancel(context.Background()) 213 p := &peer{ 214 Config: config, 215 conn: conn, 216 cert: cert, 217 id: id, 218 messageQueue: messageQueue, 219 onFinishHandshake: make(chan struct{}), 220 numExecuting: 3, 221 onClosingCtx: onClosingCtx, 222 onClosingCtxCancel: onClosingCtxCancel, 223 onClosed: make(chan struct{}), 224 observedUptimes: make(map[ids.ID]uint32), 225 getPeerListChan: make(chan struct{}, 1), 226 } 227 228 go p.readMessages() 229 go p.writeMessages() 230 go p.sendNetworkMessages() 231 232 return p 233 } 234 235 func (p *peer) ID() ids.NodeID { 236 return p.id 237 } 238 239 func (p *peer) Cert() *staking.Certificate { 240 return p.cert 241 } 242 243 func (p *peer) LastSent() time.Time { 244 return time.Unix( 245 atomic.LoadInt64(&p.lastSent), 246 0, 247 ) 248 } 249 250 func (p *peer) LastReceived() time.Time { 251 return time.Unix( 252 atomic.LoadInt64(&p.lastReceived), 253 0, 254 ) 255 } 256 257 func (p *peer) Ready() bool { 258 return p.finishedHandshake.Get() 259 } 260 261 func (p *peer) AwaitReady(ctx context.Context) error { 262 select { 263 case <-p.onFinishHandshake: 264 return nil 265 case <-p.onClosed: 266 return errClosed 267 case <-ctx.Done(): 268 return ctx.Err() 269 } 270 } 271 272 func (p *peer) Info() Info { 273 uptimes := make(map[ids.ID]json.Uint32, p.MySubnets.Len()) 274 for subnetID := range p.MySubnets { 275 uptime, exist := p.ObservedUptime(subnetID) 276 if !exist { 277 continue 278 } 279 uptimes[subnetID] = json.Uint32(uptime) 280 } 281 282 primaryUptime, exist := p.ObservedUptime(constants.PrimaryNetworkID) 283 if !exist { 284 primaryUptime = 0 285 } 286 287 ip, _ := ips.ParseAddrPort(p.conn.RemoteAddr().String()) 288 return Info{ 289 IP: ip, 290 PublicIP: p.ip.AddrPort, 291 ID: p.id, 292 Version: p.version.String(), 293 LastSent: p.LastSent(), 294 LastReceived: p.LastReceived(), 295 ObservedUptime: json.Uint32(primaryUptime), 296 ObservedSubnetUptimes: uptimes, 297 TrackedSubnets: p.trackedSubnets, 298 SupportedACPs: p.supportedACPs, 299 ObjectedACPs: p.objectedACPs, 300 } 301 } 302 303 func (p *peer) IP() *SignedIP { 304 return p.ip 305 } 306 307 func (p *peer) Version() *version.Application { 308 return p.version 309 } 310 311 func (p *peer) TrackedSubnets() set.Set[ids.ID] { 312 return p.trackedSubnets 313 } 314 315 func (p *peer) ObservedUptime(subnetID ids.ID) (uint32, bool) { 316 p.observedUptimesLock.RLock() 317 defer p.observedUptimesLock.RUnlock() 318 319 uptime, exist := p.observedUptimes[subnetID] 320 return uptime, exist 321 } 322 323 func (p *peer) Send(ctx context.Context, msg message.OutboundMessage) bool { 324 return p.messageQueue.Push(ctx, msg) 325 } 326 327 func (p *peer) StartSendGetPeerList() { 328 select { 329 case p.getPeerListChan <- struct{}{}: 330 default: 331 } 332 } 333 334 func (p *peer) StartClose() { 335 p.startClosingOnce.Do(func() { 336 if err := p.conn.Close(); err != nil { 337 p.Log.Debug("failed to close connection", 338 zap.Stringer("nodeID", p.id), 339 zap.Error(err), 340 ) 341 } 342 343 p.messageQueue.Close() 344 p.onClosingCtxCancel() 345 }) 346 } 347 348 func (p *peer) Closed() bool { 349 select { 350 case _, ok := <-p.onClosed: 351 return !ok 352 default: 353 return false 354 } 355 } 356 357 func (p *peer) AwaitClosed(ctx context.Context) error { 358 select { 359 case <-p.onClosed: 360 return nil 361 case <-ctx.Done(): 362 return ctx.Err() 363 } 364 } 365 366 // close should be called at the end of each goroutine that has been spun up. 367 // When the last goroutine is exiting, the peer will be marked as closed. 368 func (p *peer) close() { 369 if atomic.AddInt64(&p.numExecuting, -1) != 0 { 370 return 371 } 372 373 p.Network.Disconnected(p.id) 374 close(p.onClosed) 375 } 376 377 // Read and handle messages from this peer. 378 // When this method returns, the connection is closed. 379 func (p *peer) readMessages() { 380 // Track this node with the inbound message throttler. 381 p.InboundMsgThrottler.AddNode(p.id) 382 defer func() { 383 p.InboundMsgThrottler.RemoveNode(p.id) 384 p.StartClose() 385 p.close() 386 }() 387 388 // Continuously read and handle messages from this peer. 389 reader := bufio.NewReaderSize(p.conn, p.Config.ReadBufferSize) 390 msgLenBytes := make([]byte, wrappers.IntLen) 391 for { 392 // Time out and close connection if we can't read the message length 393 if err := p.conn.SetReadDeadline(p.nextTimeout()); err != nil { 394 p.Log.Verbo(failedToSetDeadlineLog, 395 zap.Stringer("nodeID", p.id), 396 zap.String("direction", "read"), 397 zap.Error(err), 398 ) 399 return 400 } 401 402 // Read the message length 403 if _, err := io.ReadFull(reader, msgLenBytes); err != nil { 404 p.Log.Verbo("error reading message length", 405 zap.Stringer("nodeID", p.id), 406 zap.Error(err), 407 ) 408 return 409 } 410 411 // Parse the message length 412 msgLen, err := readMsgLen(msgLenBytes, constants.DefaultMaxMessageSize) 413 if err != nil { 414 p.Log.Verbo("error parsing message length", 415 zap.Stringer("nodeID", p.id), 416 zap.Error(err), 417 ) 418 return 419 } 420 421 // Wait until the throttler says we can proceed to read the message. 422 // 423 // Invariant: When done processing this message, onFinishedHandling() is 424 // called exactly once. If this is not honored, the message throttler 425 // will leak until no new messages can be read. You can look at message 426 // throttler metrics to verify that there is no leak. 427 // 428 // Invariant: There must only be one call to Acquire at any given time 429 // with the same nodeID. In this package, only this goroutine ever 430 // performs Acquire. Additionally, we ensure that this goroutine has 431 // exited before calling [Network.Disconnected] to guarantee that there 432 // can't be multiple instances of this goroutine running over different 433 // peer instances. 434 onFinishedHandling := p.InboundMsgThrottler.Acquire( 435 p.onClosingCtx, 436 uint64(msgLen), 437 p.id, 438 ) 439 440 // If the peer is shutting down, there's no need to read the message. 441 if err := p.onClosingCtx.Err(); err != nil { 442 onFinishedHandling() 443 return 444 } 445 446 // Time out and close connection if we can't read message 447 if err := p.conn.SetReadDeadline(p.nextTimeout()); err != nil { 448 p.Log.Verbo(failedToSetDeadlineLog, 449 zap.Stringer("nodeID", p.id), 450 zap.String("direction", "read"), 451 zap.Error(err), 452 ) 453 onFinishedHandling() 454 return 455 } 456 457 // Read the message 458 msgBytes := make([]byte, msgLen) 459 if _, err := io.ReadFull(reader, msgBytes); err != nil { 460 p.Log.Verbo("error reading message", 461 zap.Stringer("nodeID", p.id), 462 zap.Error(err), 463 ) 464 onFinishedHandling() 465 return 466 } 467 468 // Track the time it takes from now until the time the message is 469 // handled (in the event this message is handled at the network level) 470 // or the time the message is handed to the router (in the event this 471 // message is not handled at the network level.) 472 // [p.CPUTracker.StopProcessing] must be called when this loop iteration is 473 // finished. 474 p.ResourceTracker.StartProcessing(p.id, p.Clock.Time()) 475 476 p.Log.Verbo("parsing message", 477 zap.Stringer("nodeID", p.id), 478 zap.Binary("messageBytes", msgBytes), 479 ) 480 481 // Parse the message 482 msg, err := p.MessageCreator.Parse(msgBytes, p.id, onFinishedHandling) 483 if err != nil { 484 p.Log.Verbo("failed to parse message", 485 zap.Stringer("nodeID", p.id), 486 zap.Binary("messageBytes", msgBytes), 487 zap.Error(err), 488 ) 489 490 p.Metrics.NumFailedToParse.Inc() 491 492 // Couldn't parse the message. Read the next one. 493 onFinishedHandling() 494 p.ResourceTracker.StopProcessing(p.id, p.Clock.Time()) 495 continue 496 } 497 498 now := p.Clock.Time() 499 p.storeLastReceived(now) 500 p.Metrics.Received(msg, msgLen) 501 502 // Handle the message. Note that when we are done handling this message, 503 // we must call [msg.OnFinishedHandling()]. 504 p.handle(msg) 505 p.ResourceTracker.StopProcessing(p.id, p.Clock.Time()) 506 } 507 } 508 509 func (p *peer) writeMessages() { 510 defer func() { 511 p.StartClose() 512 p.close() 513 }() 514 515 writer := bufio.NewWriterSize(p.conn, p.Config.WriteBufferSize) 516 517 // Make sure that the Handshake is the first message sent 518 mySignedIP, err := p.IPSigner.GetSignedIP() 519 if err != nil { 520 p.Log.Error("failed to get signed IP", 521 zap.Stringer("nodeID", p.id), 522 zap.Error(err), 523 ) 524 return 525 } 526 if port := mySignedIP.AddrPort.Port(); port == 0 { 527 p.Log.Error("signed IP has invalid port", 528 zap.Stringer("nodeID", p.id), 529 zap.Uint16("port", port), 530 ) 531 return 532 } 533 534 myVersion := p.VersionCompatibility.Version() 535 knownPeersFilter, knownPeersSalt := p.Network.KnownPeers() 536 537 msg, err := p.MessageCreator.Handshake( 538 p.NetworkID, 539 p.Clock.Unix(), 540 mySignedIP.AddrPort, 541 myVersion.Name, 542 uint32(myVersion.Major), 543 uint32(myVersion.Minor), 544 uint32(myVersion.Patch), 545 mySignedIP.Timestamp, 546 mySignedIP.TLSSignature, 547 mySignedIP.BLSSignatureBytes, 548 p.MySubnets.List(), 549 p.SupportedACPs, 550 p.ObjectedACPs, 551 knownPeersFilter, 552 knownPeersSalt, 553 ) 554 if err != nil { 555 p.Log.Error(failedToCreateMessageLog, 556 zap.Stringer("nodeID", p.id), 557 zap.Stringer("messageOp", message.HandshakeOp), 558 zap.Error(err), 559 ) 560 return 561 } 562 563 p.writeMessage(writer, msg) 564 565 for { 566 msg, ok := p.messageQueue.PopNow() 567 if ok { 568 p.writeMessage(writer, msg) 569 continue 570 } 571 572 // Make sure the peer was fully sent all prior messages before 573 // blocking. 574 if err := writer.Flush(); err != nil { 575 p.Log.Verbo("failed to flush writer", 576 zap.Stringer("nodeID", p.id), 577 zap.Error(err), 578 ) 579 return 580 } 581 582 msg, ok = p.messageQueue.Pop() 583 if !ok { 584 // This peer is closing 585 return 586 } 587 588 p.writeMessage(writer, msg) 589 } 590 } 591 592 func (p *peer) writeMessage(writer io.Writer, msg message.OutboundMessage) { 593 msgBytes := msg.Bytes() 594 p.Log.Verbo("sending message", 595 zap.Stringer("nodeID", p.id), 596 zap.Binary("messageBytes", msgBytes), 597 ) 598 599 if err := p.conn.SetWriteDeadline(p.nextTimeout()); err != nil { 600 p.Log.Verbo(failedToSetDeadlineLog, 601 zap.Stringer("nodeID", p.id), 602 zap.String("direction", "write"), 603 zap.Error(err), 604 ) 605 return 606 } 607 608 msgLen := uint32(len(msgBytes)) 609 msgLenBytes, err := writeMsgLen(msgLen, constants.DefaultMaxMessageSize) 610 if err != nil { 611 p.Log.Verbo("error writing message length", 612 zap.Stringer("nodeID", p.id), 613 zap.Error(err), 614 ) 615 return 616 } 617 618 // Write the message 619 var buf net.Buffers = [][]byte{msgLenBytes[:], msgBytes} 620 if _, err := io.CopyN(writer, &buf, int64(wrappers.IntLen+msgLen)); err != nil { 621 p.Log.Verbo("error writing message", 622 zap.Stringer("nodeID", p.id), 623 zap.Error(err), 624 ) 625 return 626 } 627 628 now := p.Clock.Time() 629 p.storeLastSent(now) 630 p.Metrics.Sent(msg) 631 } 632 633 func (p *peer) sendNetworkMessages() { 634 sendPingsTicker := time.NewTicker(p.PingFrequency) 635 defer func() { 636 sendPingsTicker.Stop() 637 638 p.StartClose() 639 p.close() 640 }() 641 642 for { 643 select { 644 case <-p.getPeerListChan: 645 knownPeersFilter, knownPeersSalt := p.Config.Network.KnownPeers() 646 msg, err := p.Config.MessageCreator.GetPeerList(knownPeersFilter, knownPeersSalt) 647 if err != nil { 648 p.Log.Error(failedToCreateMessageLog, 649 zap.Stringer("nodeID", p.id), 650 zap.Stringer("messageOp", message.GetPeerListOp), 651 zap.Error(err), 652 ) 653 return 654 } 655 656 p.Send(p.onClosingCtx, msg) 657 case <-sendPingsTicker.C: 658 if !p.Network.AllowConnection(p.id) { 659 p.Log.Debug(disconnectingLog, 660 zap.String("reason", "connection is no longer desired"), 661 zap.Stringer("nodeID", p.id), 662 ) 663 return 664 } 665 666 // Only check if we should disconnect after the handshake is 667 // finished to avoid race conditions and accessing uninitialized 668 // values. 669 if p.finishedHandshake.Get() && p.shouldDisconnect() { 670 return 671 } 672 673 primaryUptime, subnetUptimes := p.getUptimes() 674 pingMessage, err := p.MessageCreator.Ping(primaryUptime, subnetUptimes) 675 if err != nil { 676 p.Log.Error(failedToCreateMessageLog, 677 zap.Stringer("nodeID", p.id), 678 zap.Stringer("messageOp", message.PingOp), 679 zap.Error(err), 680 ) 681 return 682 } 683 684 p.Send(p.onClosingCtx, pingMessage) 685 case <-p.onClosingCtx.Done(): 686 return 687 } 688 } 689 } 690 691 // shouldDisconnect is called both during receipt of the Handshake message and 692 // periodically when sending a Ping message (after finishing the handshake!). 693 // 694 // It is called during the Handshake to prevent marking a peer as connected and 695 // then immediately disconnecting from them. 696 // 697 // It is called when sending a Ping message to account for validator set 698 // changes. It's called when sending a Ping rather than in a validator set 699 // callback to avoid signature verification on the P-chain accept path. 700 func (p *peer) shouldDisconnect() bool { 701 if err := p.VersionCompatibility.Compatible(p.version); err != nil { 702 p.Log.Debug(disconnectingLog, 703 zap.String("reason", "version not compatible"), 704 zap.Stringer("nodeID", p.id), 705 zap.Stringer("peerVersion", p.version), 706 zap.Error(err), 707 ) 708 return true 709 } 710 711 // Enforce that all validators that have registered a BLS key are signing 712 // their IP with it after the activation of Durango. 713 vdr, ok := p.Validators.GetValidator(constants.PrimaryNetworkID, p.id) 714 if !ok || vdr.PublicKey == nil || vdr.TxID == p.txIDOfVerifiedBLSKey { 715 return false 716 } 717 718 validSignature := bls.VerifyProofOfPossession( 719 vdr.PublicKey, 720 p.ip.BLSSignature, 721 p.ip.UnsignedIP.bytes(), 722 ) 723 if !validSignature { 724 p.Log.Debug(disconnectingLog, 725 zap.String("reason", "invalid BLS signature"), 726 zap.Stringer("nodeID", p.id), 727 ) 728 return true 729 } 730 731 // Avoid unnecessary signature verifications by only verifying the signature 732 // once per validation period. 733 p.txIDOfVerifiedBLSKey = vdr.TxID 734 return false 735 } 736 737 func (p *peer) handle(msg message.InboundMessage) { 738 switch m := msg.Message().(type) { // Network-related message types 739 case *p2p.Ping: 740 p.handlePing(m) 741 msg.OnFinishedHandling() 742 return 743 case *p2p.Pong: 744 p.handlePong(m) 745 msg.OnFinishedHandling() 746 return 747 case *p2p.Handshake: 748 p.handleHandshake(m) 749 msg.OnFinishedHandling() 750 return 751 case *p2p.GetPeerList: 752 p.handleGetPeerList(m) 753 msg.OnFinishedHandling() 754 return 755 case *p2p.PeerList: 756 p.handlePeerList(m) 757 msg.OnFinishedHandling() 758 return 759 } 760 if !p.finishedHandshake.Get() { 761 p.Log.Debug("dropping message", 762 zap.Stringer("nodeID", p.id), 763 zap.Stringer("messageOp", msg.Op()), 764 zap.String("reason", "handshake isn't finished"), 765 ) 766 msg.OnFinishedHandling() 767 return 768 } 769 770 // Consensus and app-level messages 771 p.Router.HandleInbound(context.Background(), msg) 772 } 773 774 func (p *peer) handlePing(msg *p2p.Ping) { 775 if msg.Uptime > 100 { 776 p.Log.Debug(malformedMessageLog, 777 zap.Stringer("nodeID", p.id), 778 zap.Stringer("messageOp", message.PingOp), 779 zap.Stringer("subnetID", constants.PrimaryNetworkID), 780 zap.Uint32("uptime", msg.Uptime), 781 ) 782 p.StartClose() 783 return 784 } 785 p.observeUptime(constants.PrimaryNetworkID, msg.Uptime) 786 787 for _, subnetUptime := range msg.SubnetUptimes { 788 subnetID, err := ids.ToID(subnetUptime.SubnetId) 789 if err != nil { 790 p.Log.Debug(malformedMessageLog, 791 zap.Stringer("nodeID", p.id), 792 zap.Stringer("messageOp", message.PingOp), 793 zap.String("field", "subnetID"), 794 zap.Error(err), 795 ) 796 p.StartClose() 797 return 798 } 799 800 if !p.MySubnets.Contains(subnetID) { 801 p.Log.Debug(malformedMessageLog, 802 zap.Stringer("nodeID", p.id), 803 zap.Stringer("messageOp", message.PingOp), 804 zap.Stringer("subnetID", subnetID), 805 zap.String("reason", "not tracking subnet"), 806 ) 807 p.StartClose() 808 return 809 } 810 811 uptime := subnetUptime.Uptime 812 if uptime > 100 { 813 p.Log.Debug(malformedMessageLog, 814 zap.Stringer("nodeID", p.id), 815 zap.Stringer("messageOp", message.PingOp), 816 zap.Stringer("subnetID", subnetID), 817 zap.Uint32("uptime", uptime), 818 ) 819 p.StartClose() 820 return 821 } 822 p.observeUptime(subnetID, uptime) 823 } 824 825 pongMessage, err := p.MessageCreator.Pong() 826 if err != nil { 827 p.Log.Error(failedToCreateMessageLog, 828 zap.Stringer("nodeID", p.id), 829 zap.Stringer("messageOp", message.PongOp), 830 zap.Error(err), 831 ) 832 p.StartClose() 833 return 834 } 835 836 p.Send(p.onClosingCtx, pongMessage) 837 } 838 839 func (p *peer) getUptimes() (uint32, []*p2p.SubnetUptime) { 840 primaryUptime, err := p.UptimeCalculator.CalculateUptimePercent( 841 p.id, 842 constants.PrimaryNetworkID, 843 ) 844 if err != nil { 845 p.Log.Debug(failedToGetUptimeLog, 846 zap.Stringer("nodeID", p.id), 847 zap.Stringer("subnetID", constants.PrimaryNetworkID), 848 zap.Error(err), 849 ) 850 primaryUptime = 0 851 } 852 853 subnetUptimes := make([]*p2p.SubnetUptime, 0, p.MySubnets.Len()) 854 for subnetID := range p.MySubnets { 855 if !p.trackedSubnets.Contains(subnetID) { 856 continue 857 } 858 859 subnetUptime, err := p.UptimeCalculator.CalculateUptimePercent(p.id, subnetID) 860 if err != nil { 861 p.Log.Debug(failedToGetUptimeLog, 862 zap.Stringer("nodeID", p.id), 863 zap.Stringer("subnetID", subnetID), 864 zap.Error(err), 865 ) 866 continue 867 } 868 869 subnetID := subnetID 870 subnetUptimes = append(subnetUptimes, &p2p.SubnetUptime{ 871 SubnetId: subnetID[:], 872 Uptime: uint32(subnetUptime * 100), 873 }) 874 } 875 876 primaryUptimePercent := uint32(primaryUptime * 100) 877 return primaryUptimePercent, subnetUptimes 878 } 879 880 func (*peer) handlePong(*p2p.Pong) {} 881 882 // Record that the given peer perceives our uptime for the given [subnetID] 883 // to be [uptime]. 884 // Assumes [uptime] is in the range [0, 100] and [subnetID] is a valid ID of a 885 // subnet this peer tracks. 886 func (p *peer) observeUptime(subnetID ids.ID, uptime uint32) { 887 p.observedUptimesLock.Lock() 888 p.observedUptimes[subnetID] = uptime // [0, 100] percentage 889 p.observedUptimesLock.Unlock() 890 } 891 892 func (p *peer) handleHandshake(msg *p2p.Handshake) { 893 if p.gotHandshake.Get() { 894 p.Log.Debug(malformedMessageLog, 895 zap.Stringer("nodeID", p.id), 896 zap.Stringer("messageOp", message.HandshakeOp), 897 zap.String("reason", "already received handshake"), 898 ) 899 p.StartClose() 900 return 901 } 902 903 if msg.NetworkId != p.NetworkID { 904 p.Log.Debug(malformedMessageLog, 905 zap.Stringer("nodeID", p.id), 906 zap.Stringer("messageOp", message.HandshakeOp), 907 zap.String("field", "networkID"), 908 zap.Uint32("peerNetworkID", msg.NetworkId), 909 zap.Uint32("ourNetworkID", p.NetworkID), 910 ) 911 p.StartClose() 912 return 913 } 914 915 localTime := p.Clock.Time() 916 localUnixTime := uint64(localTime.Unix()) 917 clockDifference := math.Abs(float64(msg.MyTime) - float64(localUnixTime)) 918 919 p.Metrics.ClockSkewCount.Inc() 920 p.Metrics.ClockSkewSum.Add(clockDifference) 921 922 if clockDifference > p.MaxClockDifference.Seconds() { 923 log := p.Log.Debug 924 if _, ok := p.Beacons.GetValidator(constants.PrimaryNetworkID, p.id); ok { 925 log = p.Log.Warn 926 } 927 log(malformedMessageLog, 928 zap.Stringer("nodeID", p.id), 929 zap.Stringer("messageOp", message.HandshakeOp), 930 zap.String("field", "myTime"), 931 zap.Uint64("peerTime", msg.MyTime), 932 zap.Uint64("localTime", localUnixTime), 933 ) 934 p.StartClose() 935 return 936 } 937 938 p.version = &version.Application{ 939 Name: msg.Client.GetName(), 940 Major: int(msg.Client.GetMajor()), 941 Minor: int(msg.Client.GetMinor()), 942 Patch: int(msg.Client.GetPatch()), 943 } 944 945 if p.VersionCompatibility.Version().Before(p.version) { 946 log := p.Log.Debug 947 if _, ok := p.Beacons.GetValidator(constants.PrimaryNetworkID, p.id); ok { 948 log = p.Log.Info 949 } 950 log("peer attempting to connect with newer version. You may want to update your client", 951 zap.Stringer("nodeID", p.id), 952 zap.Stringer("peerVersion", p.version), 953 ) 954 } 955 956 // handle subnet IDs 957 if numTrackedSubnets := len(msg.TrackedSubnets); numTrackedSubnets > maxNumTrackedSubnets { 958 p.Log.Debug(malformedMessageLog, 959 zap.Stringer("nodeID", p.id), 960 zap.Stringer("messageOp", message.HandshakeOp), 961 zap.String("field", "trackedSubnets"), 962 zap.Int("numTrackedSubnets", numTrackedSubnets), 963 ) 964 p.StartClose() 965 return 966 } 967 968 p.trackedSubnets.Add(constants.PrimaryNetworkID) 969 for _, subnetIDBytes := range msg.TrackedSubnets { 970 subnetID, err := ids.ToID(subnetIDBytes) 971 if err != nil { 972 p.Log.Debug(malformedMessageLog, 973 zap.Stringer("nodeID", p.id), 974 zap.Stringer("messageOp", message.HandshakeOp), 975 zap.String("field", "trackedSubnets"), 976 zap.Error(err), 977 ) 978 p.StartClose() 979 return 980 } 981 p.trackedSubnets.Add(subnetID) 982 } 983 984 for _, acp := range msg.SupportedAcps { 985 if constants.CurrentACPs.Contains(acp) { 986 p.supportedACPs.Add(acp) 987 } 988 } 989 for _, acp := range msg.ObjectedAcps { 990 if constants.CurrentACPs.Contains(acp) { 991 p.objectedACPs.Add(acp) 992 } 993 } 994 995 if p.supportedACPs.Overlaps(p.objectedACPs) { 996 p.Log.Debug(malformedMessageLog, 997 zap.Stringer("nodeID", p.id), 998 zap.Stringer("messageOp", message.HandshakeOp), 999 zap.String("field", "acps"), 1000 zap.Reflect("supportedACPs", p.supportedACPs), 1001 zap.Reflect("objectedACPs", p.objectedACPs), 1002 ) 1003 p.StartClose() 1004 return 1005 } 1006 1007 var ( 1008 knownPeers = bloom.EmptyFilter 1009 salt []byte 1010 ) 1011 if msg.KnownPeers != nil { 1012 var err error 1013 knownPeers, err = bloom.Parse(msg.KnownPeers.Filter) 1014 if err != nil { 1015 p.Log.Debug(malformedMessageLog, 1016 zap.Stringer("nodeID", p.id), 1017 zap.Stringer("messageOp", message.HandshakeOp), 1018 zap.String("field", "knownPeers.filter"), 1019 zap.Error(err), 1020 ) 1021 p.StartClose() 1022 return 1023 } 1024 1025 salt = msg.KnownPeers.Salt 1026 if saltLen := len(salt); saltLen > maxBloomSaltLen { 1027 p.Log.Debug(malformedMessageLog, 1028 zap.Stringer("nodeID", p.id), 1029 zap.Stringer("messageOp", message.HandshakeOp), 1030 zap.String("field", "knownPeers.salt"), 1031 zap.Int("saltLen", saltLen), 1032 ) 1033 p.StartClose() 1034 return 1035 } 1036 } 1037 1038 addr, ok := ips.AddrFromSlice(msg.IpAddr) 1039 if !ok { 1040 p.Log.Debug(malformedMessageLog, 1041 zap.Stringer("nodeID", p.id), 1042 zap.Stringer("messageOp", message.HandshakeOp), 1043 zap.String("field", "ip"), 1044 zap.Int("ipLen", len(msg.IpAddr)), 1045 ) 1046 p.StartClose() 1047 return 1048 } 1049 1050 port := uint16(msg.IpPort) 1051 if msg.IpPort == 0 { 1052 p.Log.Debug(malformedMessageLog, 1053 zap.Stringer("nodeID", p.id), 1054 zap.Stringer("messageOp", message.HandshakeOp), 1055 zap.String("field", "port"), 1056 zap.Uint16("port", port), 1057 ) 1058 p.StartClose() 1059 return 1060 } 1061 1062 p.ip = &SignedIP{ 1063 UnsignedIP: UnsignedIP{ 1064 AddrPort: netip.AddrPortFrom( 1065 addr, 1066 port, 1067 ), 1068 Timestamp: msg.IpSigningTime, 1069 }, 1070 TLSSignature: msg.IpNodeIdSig, 1071 } 1072 maxTimestamp := localTime.Add(p.MaxClockDifference) 1073 if err := p.ip.Verify(p.cert, maxTimestamp); err != nil { 1074 log := p.Log.Debug 1075 if _, ok := p.Beacons.GetValidator(constants.PrimaryNetworkID, p.id); ok { 1076 log = p.Log.Warn 1077 } 1078 log(malformedMessageLog, 1079 zap.Stringer("nodeID", p.id), 1080 zap.Stringer("messageOp", message.HandshakeOp), 1081 zap.String("field", "tlsSignature"), 1082 zap.Uint64("peerTime", msg.MyTime), 1083 zap.Uint64("localTime", localUnixTime), 1084 zap.Error(err), 1085 ) 1086 1087 p.StartClose() 1088 return 1089 } 1090 1091 signature, err := bls.SignatureFromBytes(msg.IpBlsSig) 1092 if err != nil { 1093 p.Log.Debug(malformedMessageLog, 1094 zap.Stringer("nodeID", p.id), 1095 zap.Stringer("messageOp", message.HandshakeOp), 1096 zap.String("field", "blsSignature"), 1097 zap.Error(err), 1098 ) 1099 p.StartClose() 1100 return 1101 } 1102 1103 p.ip.BLSSignature = signature 1104 p.ip.BLSSignatureBytes = msg.IpBlsSig 1105 1106 // If the peer is running an incompatible version or has an invalid BLS 1107 // signature, disconnect from them prior to marking the handshake as 1108 // completed. 1109 if p.shouldDisconnect() { 1110 p.StartClose() 1111 return 1112 } 1113 1114 p.gotHandshake.Set(true) 1115 1116 peerIPs := p.Network.Peers(p.id, knownPeers, salt) 1117 1118 // We bypass throttling here to ensure that the handshake message is 1119 // acknowledged correctly. 1120 peerListMsg, err := p.Config.MessageCreator.PeerList(peerIPs, true /*=bypassThrottling*/) 1121 if err != nil { 1122 p.Log.Error(failedToCreateMessageLog, 1123 zap.Stringer("nodeID", p.id), 1124 zap.Stringer("messageOp", message.PeerListOp), 1125 zap.Error(err), 1126 ) 1127 p.StartClose() 1128 return 1129 } 1130 1131 if !p.Send(p.onClosingCtx, peerListMsg) { 1132 // Because throttling was marked to be bypassed with this message, 1133 // sending should only fail if the peer has started closing. 1134 p.Log.Debug("failed to send reliable message", 1135 zap.Stringer("nodeID", p.id), 1136 zap.Stringer("messageOp", message.PeerListOp), 1137 zap.Error(p.onClosingCtx.Err()), 1138 ) 1139 p.StartClose() 1140 } 1141 } 1142 1143 func (p *peer) handleGetPeerList(msg *p2p.GetPeerList) { 1144 if !p.finishedHandshake.Get() { 1145 p.Log.Debug(malformedMessageLog, 1146 zap.Stringer("nodeID", p.id), 1147 zap.Stringer("messageOp", message.GetPeerListOp), 1148 zap.String("reason", "not finished handshake"), 1149 ) 1150 return 1151 } 1152 1153 knownPeersMsg := msg.GetKnownPeers() 1154 filter, err := bloom.Parse(knownPeersMsg.GetFilter()) 1155 if err != nil { 1156 p.Log.Debug(malformedMessageLog, 1157 zap.Stringer("nodeID", p.id), 1158 zap.Stringer("messageOp", message.GetPeerListOp), 1159 zap.String("field", "knownPeers.filter"), 1160 zap.Error(err), 1161 ) 1162 p.StartClose() 1163 return 1164 } 1165 1166 salt := knownPeersMsg.GetSalt() 1167 if saltLen := len(salt); saltLen > maxBloomSaltLen { 1168 p.Log.Debug(malformedMessageLog, 1169 zap.Stringer("nodeID", p.id), 1170 zap.Stringer("messageOp", message.GetPeerListOp), 1171 zap.String("field", "knownPeers.salt"), 1172 zap.Int("saltLen", saltLen), 1173 ) 1174 p.StartClose() 1175 return 1176 } 1177 1178 peerIPs := p.Network.Peers(p.id, filter, salt) 1179 if len(peerIPs) == 0 { 1180 p.Log.Debug("skipping sending of empty peer list", 1181 zap.Stringer("nodeID", p.id), 1182 ) 1183 return 1184 } 1185 1186 // Bypass throttling is disabled here to follow the non-handshake message 1187 // sending pattern. 1188 peerListMsg, err := p.Config.MessageCreator.PeerList(peerIPs, false /*=bypassThrottling*/) 1189 if err != nil { 1190 p.Log.Error(failedToCreateMessageLog, 1191 zap.Stringer("nodeID", p.id), 1192 zap.Stringer("messageOp", message.PeerListOp), 1193 zap.Error(err), 1194 ) 1195 return 1196 } 1197 1198 p.Send(p.onClosingCtx, peerListMsg) 1199 } 1200 1201 func (p *peer) handlePeerList(msg *p2p.PeerList) { 1202 if !p.finishedHandshake.Get() { 1203 if !p.gotHandshake.Get() { 1204 return 1205 } 1206 1207 p.Network.Connected(p.id) 1208 p.finishedHandshake.Set(true) 1209 close(p.onFinishHandshake) 1210 } 1211 1212 discoveredIPs := make([]*ips.ClaimedIPPort, len(msg.ClaimedIpPorts)) // the peers this peer told us about 1213 for i, claimedIPPort := range msg.ClaimedIpPorts { 1214 tlsCert, err := staking.ParseCertificate(claimedIPPort.X509Certificate) 1215 if err != nil { 1216 p.Log.Debug(malformedMessageLog, 1217 zap.Stringer("nodeID", p.id), 1218 zap.Stringer("messageOp", message.PeerListOp), 1219 zap.String("field", "cert"), 1220 zap.Error(err), 1221 ) 1222 p.StartClose() 1223 return 1224 } 1225 1226 addr, ok := ips.AddrFromSlice(claimedIPPort.IpAddr) 1227 if !ok { 1228 p.Log.Debug(malformedMessageLog, 1229 zap.Stringer("nodeID", p.id), 1230 zap.Stringer("messageOp", message.PeerListOp), 1231 zap.String("field", "ip"), 1232 zap.Int("ipLen", len(claimedIPPort.IpAddr)), 1233 ) 1234 p.StartClose() 1235 return 1236 } 1237 1238 port := uint16(claimedIPPort.IpPort) 1239 if port == 0 { 1240 p.Log.Debug(malformedMessageLog, 1241 zap.Stringer("nodeID", p.id), 1242 zap.Stringer("messageOp", message.PeerListOp), 1243 zap.String("field", "port"), 1244 zap.Uint16("port", port), 1245 ) 1246 p.StartClose() 1247 return 1248 } 1249 1250 discoveredIPs[i] = ips.NewClaimedIPPort( 1251 tlsCert, 1252 netip.AddrPortFrom( 1253 addr, 1254 port, 1255 ), 1256 claimedIPPort.Timestamp, 1257 claimedIPPort.Signature, 1258 ) 1259 } 1260 1261 if err := p.Network.Track(discoveredIPs); err != nil { 1262 p.Log.Debug(malformedMessageLog, 1263 zap.Stringer("nodeID", p.id), 1264 zap.Stringer("messageOp", message.PeerListOp), 1265 zap.String("field", "claimedIP"), 1266 zap.Error(err), 1267 ) 1268 p.StartClose() 1269 } 1270 } 1271 1272 func (p *peer) nextTimeout() time.Time { 1273 return p.Clock.Time().Add(p.PongTimeout) 1274 } 1275 1276 func (p *peer) storeLastSent(time time.Time) { 1277 unixTime := time.Unix() 1278 atomic.StoreInt64(&p.Config.LastSent, unixTime) 1279 atomic.StoreInt64(&p.lastSent, unixTime) 1280 } 1281 1282 func (p *peer) storeLastReceived(time time.Time) { 1283 unixTime := time.Unix() 1284 atomic.StoreInt64(&p.Config.LastReceived, unixTime) 1285 atomic.StoreInt64(&p.lastReceived, unixTime) 1286 }