github.com/ava-labs/avalanchego@v1.11.11/network/peer/peer.go (about) 1 // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package peer 5 6 import ( 7 "bufio" 8 "context" 9 "errors" 10 "io" 11 "math" 12 "net" 13 "net/netip" 14 "sync" 15 "sync/atomic" 16 "time" 17 18 "go.uber.org/zap" 19 20 "github.com/ava-labs/avalanchego/ids" 21 "github.com/ava-labs/avalanchego/message" 22 "github.com/ava-labs/avalanchego/proto/pb/p2p" 23 "github.com/ava-labs/avalanchego/staking" 24 "github.com/ava-labs/avalanchego/utils" 25 "github.com/ava-labs/avalanchego/utils/bloom" 26 "github.com/ava-labs/avalanchego/utils/constants" 27 "github.com/ava-labs/avalanchego/utils/crypto/bls" 28 "github.com/ava-labs/avalanchego/utils/ips" 29 "github.com/ava-labs/avalanchego/utils/json" 30 "github.com/ava-labs/avalanchego/utils/set" 31 "github.com/ava-labs/avalanchego/utils/wrappers" 32 "github.com/ava-labs/avalanchego/version" 33 ) 34 35 const ( 36 // maxBloomSaltLen restricts the allowed size of the bloom salt to prevent 37 // excessively expensive bloom filter contains checks. 38 maxBloomSaltLen = 32 39 // maxNumTrackedSubnets limits how many subnets a peer can track to prevent 40 // excessive memory usage. 41 maxNumTrackedSubnets = 16 42 43 disconnectingLog = "disconnecting from peer" 44 failedToCreateMessageLog = "failed to create message" 45 failedToSetDeadlineLog = "failed to set connection deadline" 46 failedToGetUptimeLog = "failed to get peer uptime percentage" 47 malformedMessageLog = "malformed message" 48 ) 49 50 var ( 51 errClosed = errors.New("closed") 52 53 _ Peer = (*peer)(nil) 54 ) 55 56 // Peer encapsulates all of the functionality required to send and receive 57 // messages with a remote peer. 58 type Peer interface { 59 // ID returns the nodeID of the remote peer. 60 ID() ids.NodeID 61 62 // Cert returns the certificate that the remote peer is using to 63 // authenticate their messages. 64 Cert() *staking.Certificate 65 66 // LastSent returns the last time a message was sent to the peer. 67 LastSent() time.Time 68 69 // LastReceived returns the last time a message was received from the peer. 70 LastReceived() time.Time 71 72 // Ready returns true if the peer has finished the p2p handshake and is 73 // ready to send and receive messages. 74 Ready() bool 75 76 // AwaitReady will block until the peer has finished the p2p handshake. If 77 // the context is cancelled or the peer starts closing, then an error will 78 // be returned. 79 AwaitReady(ctx context.Context) error 80 81 // Info returns a description of the state of this peer. It should only be 82 // called after [Ready] returns true. 83 Info() Info 84 85 // IP returns the claimed IP and signature provided by this peer during the 86 // handshake. It should only be called after [Ready] returns true. 87 IP() *SignedIP 88 89 // Version returns the claimed node version this peer is running. It should 90 // only be called after [Ready] returns true. 91 Version() *version.Application 92 93 // TrackedSubnets returns the subnets this peer is running. It should only 94 // be called after [Ready] returns true. 95 TrackedSubnets() set.Set[ids.ID] 96 97 // ObservedUptime returns the local node's subnet uptime according to the 98 // peer. The value ranges from [0, 100]. It should only be called after 99 // [Ready] returns true. 100 ObservedUptime(subnetID ids.ID) (uint32, bool) 101 102 // Send attempts to send [msg] to the peer. The peer takes ownership of 103 // [msg] for reference counting. This returns false if the message is 104 // guaranteed not to be delivered to the peer. 105 Send(ctx context.Context, msg message.OutboundMessage) bool 106 107 // StartSendGetPeerList attempts to send a GetPeerList message to this peer 108 // on this peer's gossip routine. It is not guaranteed that a GetPeerList 109 // will be sent. 110 StartSendGetPeerList() 111 112 // StartClose will begin shutting down the peer. It will not block. 113 StartClose() 114 115 // Closed returns true once the peer has been fully shutdown. It is 116 // guaranteed that no more messages will be received by this peer once this 117 // returns true. 118 Closed() bool 119 120 // AwaitClosed will block until the peer has been fully shutdown. If the 121 // context is cancelled, then an error will be returned. 122 AwaitClosed(ctx context.Context) error 123 } 124 125 type peer struct { 126 *Config 127 128 // the connection object that is used to read/write messages from 129 conn net.Conn 130 131 // [cert] is this peer's certificate, specifically the leaf of the 132 // certificate chain they provided. 133 cert *staking.Certificate 134 135 // node ID of this peer. 136 id ids.NodeID 137 138 // queue of messages to send to this peer. 139 messageQueue MessageQueue 140 141 // ip is the claimed IP the peer gave us in the Handshake message. 142 ip *SignedIP 143 // version is the claimed version the peer is running that we received in 144 // the Handshake message. 145 version *version.Application 146 // trackedSubnets are the subnetIDs the peer sent us in the Handshake 147 // message. The primary network ID is always included. 148 trackedSubnets set.Set[ids.ID] 149 // options of ACPs provided in the Handshake message. 150 supportedACPs set.Set[uint32] 151 objectedACPs set.Set[uint32] 152 153 // txIDOfVerifiedBLSKey is the txID that added the BLS key that was most 154 // recently verified to have signed the IP. 155 // 156 // Invariant: Prior to the handshake being completed, this can only be 157 // accessed by the reader goroutine. After the handshake has been completed, 158 // this can only be accessed by the message sender goroutine. 159 txIDOfVerifiedBLSKey ids.ID 160 161 observedUptimesLock sync.RWMutex 162 // [observedUptimesLock] must be held while accessing [observedUptime] 163 // Subnet ID --> Our uptime for the given subnet as perceived by the peer 164 observedUptimes map[ids.ID]uint32 165 166 // True if this peer has sent us a valid Handshake message and 167 // is running a compatible version. 168 // Only modified on the connection's reader routine. 169 gotHandshake utils.Atomic[bool] 170 171 // True if the peer: 172 // * Has sent us a Handshake message 173 // * Has sent us a PeerList message 174 // * Is running a compatible version 175 // Only modified on the connection's reader routine. 176 finishedHandshake utils.Atomic[bool] 177 178 // onFinishHandshake is closed when the peer finishes the p2p handshake. 179 onFinishHandshake chan struct{} 180 181 // numExecuting is the number of goroutines this peer is currently using 182 numExecuting int64 183 startClosingOnce sync.Once 184 // onClosingCtx is canceled when the peer starts closing 185 onClosingCtx context.Context 186 // onClosingCtxCancel cancels onClosingCtx 187 onClosingCtxCancel func() 188 189 // onClosed is closed when the peer is closed 190 onClosed chan struct{} 191 192 // Unix time of the last message sent and received respectively 193 // Must only be accessed atomically 194 lastSent, lastReceived int64 195 196 // getPeerListChan signals that we should attempt to send a GetPeerList to 197 // this peer 198 getPeerListChan chan struct{} 199 } 200 201 // Start a new peer instance. 202 // 203 // Invariant: There must only be one peer running at a time with a reference to 204 // the same [config.InboundMsgThrottler]. 205 func Start( 206 config *Config, 207 conn net.Conn, 208 cert *staking.Certificate, 209 id ids.NodeID, 210 messageQueue MessageQueue, 211 ) Peer { 212 onClosingCtx, onClosingCtxCancel := context.WithCancel(context.Background()) 213 p := &peer{ 214 Config: config, 215 conn: conn, 216 cert: cert, 217 id: id, 218 messageQueue: messageQueue, 219 onFinishHandshake: make(chan struct{}), 220 numExecuting: 3, 221 onClosingCtx: onClosingCtx, 222 onClosingCtxCancel: onClosingCtxCancel, 223 onClosed: make(chan struct{}), 224 observedUptimes: make(map[ids.ID]uint32), 225 getPeerListChan: make(chan struct{}, 1), 226 } 227 228 go p.readMessages() 229 go p.writeMessages() 230 go p.sendNetworkMessages() 231 232 return p 233 } 234 235 func (p *peer) ID() ids.NodeID { 236 return p.id 237 } 238 239 func (p *peer) Cert() *staking.Certificate { 240 return p.cert 241 } 242 243 func (p *peer) LastSent() time.Time { 244 return time.Unix( 245 atomic.LoadInt64(&p.lastSent), 246 0, 247 ) 248 } 249 250 func (p *peer) LastReceived() time.Time { 251 return time.Unix( 252 atomic.LoadInt64(&p.lastReceived), 253 0, 254 ) 255 } 256 257 func (p *peer) Ready() bool { 258 return p.finishedHandshake.Get() 259 } 260 261 func (p *peer) AwaitReady(ctx context.Context) error { 262 select { 263 case <-p.onFinishHandshake: 264 return nil 265 case <-p.onClosed: 266 return errClosed 267 case <-ctx.Done(): 268 return ctx.Err() 269 } 270 } 271 272 func (p *peer) Info() Info { 273 uptimes := make(map[ids.ID]json.Uint32, p.MySubnets.Len()) 274 for subnetID := range p.MySubnets { 275 uptime, exist := p.ObservedUptime(subnetID) 276 if !exist { 277 continue 278 } 279 uptimes[subnetID] = json.Uint32(uptime) 280 } 281 282 primaryUptime, exist := p.ObservedUptime(constants.PrimaryNetworkID) 283 if !exist { 284 primaryUptime = 0 285 } 286 287 ip, _ := ips.ParseAddrPort(p.conn.RemoteAddr().String()) 288 return Info{ 289 IP: ip, 290 PublicIP: p.ip.AddrPort, 291 ID: p.id, 292 Version: p.version.String(), 293 LastSent: p.LastSent(), 294 LastReceived: p.LastReceived(), 295 ObservedUptime: json.Uint32(primaryUptime), 296 ObservedSubnetUptimes: uptimes, 297 TrackedSubnets: p.trackedSubnets, 298 SupportedACPs: p.supportedACPs, 299 ObjectedACPs: p.objectedACPs, 300 } 301 } 302 303 func (p *peer) IP() *SignedIP { 304 return p.ip 305 } 306 307 func (p *peer) Version() *version.Application { 308 return p.version 309 } 310 311 func (p *peer) TrackedSubnets() set.Set[ids.ID] { 312 return p.trackedSubnets 313 } 314 315 func (p *peer) ObservedUptime(subnetID ids.ID) (uint32, bool) { 316 p.observedUptimesLock.RLock() 317 defer p.observedUptimesLock.RUnlock() 318 319 uptime, exist := p.observedUptimes[subnetID] 320 return uptime, exist 321 } 322 323 func (p *peer) Send(ctx context.Context, msg message.OutboundMessage) bool { 324 return p.messageQueue.Push(ctx, msg) 325 } 326 327 func (p *peer) StartSendGetPeerList() { 328 select { 329 case p.getPeerListChan <- struct{}{}: 330 default: 331 } 332 } 333 334 func (p *peer) StartClose() { 335 p.startClosingOnce.Do(func() { 336 if err := p.conn.Close(); err != nil { 337 p.Log.Debug("failed to close connection", 338 zap.Stringer("nodeID", p.id), 339 zap.Error(err), 340 ) 341 } 342 343 p.messageQueue.Close() 344 p.onClosingCtxCancel() 345 }) 346 } 347 348 func (p *peer) Closed() bool { 349 select { 350 case _, ok := <-p.onClosed: 351 return !ok 352 default: 353 return false 354 } 355 } 356 357 func (p *peer) AwaitClosed(ctx context.Context) error { 358 select { 359 case <-p.onClosed: 360 return nil 361 case <-ctx.Done(): 362 return ctx.Err() 363 } 364 } 365 366 // close should be called at the end of each goroutine that has been spun up. 367 // When the last goroutine is exiting, the peer will be marked as closed. 368 func (p *peer) close() { 369 if atomic.AddInt64(&p.numExecuting, -1) != 0 { 370 return 371 } 372 373 p.Network.Disconnected(p.id) 374 close(p.onClosed) 375 } 376 377 // Read and handle messages from this peer. 378 // When this method returns, the connection is closed. 379 func (p *peer) readMessages() { 380 // Track this node with the inbound message throttler. 381 p.InboundMsgThrottler.AddNode(p.id) 382 defer func() { 383 p.InboundMsgThrottler.RemoveNode(p.id) 384 p.StartClose() 385 p.close() 386 }() 387 388 // Continuously read and handle messages from this peer. 389 reader := bufio.NewReaderSize(p.conn, p.Config.ReadBufferSize) 390 msgLenBytes := make([]byte, wrappers.IntLen) 391 for { 392 // Time out and close connection if we can't read the message length 393 if err := p.conn.SetReadDeadline(p.nextTimeout()); err != nil { 394 p.Log.Verbo(failedToSetDeadlineLog, 395 zap.Stringer("nodeID", p.id), 396 zap.String("direction", "read"), 397 zap.Error(err), 398 ) 399 return 400 } 401 402 // Read the message length 403 if _, err := io.ReadFull(reader, msgLenBytes); err != nil { 404 p.Log.Verbo("error reading message length", 405 zap.Stringer("nodeID", p.id), 406 zap.Error(err), 407 ) 408 return 409 } 410 411 // Parse the message length 412 msgLen, err := readMsgLen(msgLenBytes, constants.DefaultMaxMessageSize) 413 if err != nil { 414 p.Log.Verbo("error parsing message length", 415 zap.Stringer("nodeID", p.id), 416 zap.Error(err), 417 ) 418 return 419 } 420 421 // Wait until the throttler says we can proceed to read the message. 422 // 423 // Invariant: When done processing this message, onFinishedHandling() is 424 // called exactly once. If this is not honored, the message throttler 425 // will leak until no new messages can be read. You can look at message 426 // throttler metrics to verify that there is no leak. 427 // 428 // Invariant: There must only be one call to Acquire at any given time 429 // with the same nodeID. In this package, only this goroutine ever 430 // performs Acquire. Additionally, we ensure that this goroutine has 431 // exited before calling [Network.Disconnected] to guarantee that there 432 // can't be multiple instances of this goroutine running over different 433 // peer instances. 434 onFinishedHandling := p.InboundMsgThrottler.Acquire( 435 p.onClosingCtx, 436 uint64(msgLen), 437 p.id, 438 ) 439 440 // If the peer is shutting down, there's no need to read the message. 441 if err := p.onClosingCtx.Err(); err != nil { 442 onFinishedHandling() 443 return 444 } 445 446 // Time out and close connection if we can't read message 447 if err := p.conn.SetReadDeadline(p.nextTimeout()); err != nil { 448 p.Log.Verbo(failedToSetDeadlineLog, 449 zap.Stringer("nodeID", p.id), 450 zap.String("direction", "read"), 451 zap.Error(err), 452 ) 453 onFinishedHandling() 454 return 455 } 456 457 // Read the message 458 msgBytes := make([]byte, msgLen) 459 if _, err := io.ReadFull(reader, msgBytes); err != nil { 460 p.Log.Verbo("error reading message", 461 zap.Stringer("nodeID", p.id), 462 zap.Error(err), 463 ) 464 onFinishedHandling() 465 return 466 } 467 468 // Track the time it takes from now until the time the message is 469 // handled (in the event this message is handled at the network level) 470 // or the time the message is handed to the router (in the event this 471 // message is not handled at the network level.) 472 // [p.CPUTracker.StopProcessing] must be called when this loop iteration is 473 // finished. 474 p.ResourceTracker.StartProcessing(p.id, p.Clock.Time()) 475 476 p.Log.Verbo("parsing message", 477 zap.Stringer("nodeID", p.id), 478 zap.Binary("messageBytes", msgBytes), 479 ) 480 481 // Parse the message 482 msg, err := p.MessageCreator.Parse(msgBytes, p.id, onFinishedHandling) 483 if err != nil { 484 p.Log.Verbo("failed to parse message", 485 zap.Stringer("nodeID", p.id), 486 zap.Binary("messageBytes", msgBytes), 487 zap.Error(err), 488 ) 489 490 p.Metrics.NumFailedToParse.Inc() 491 492 // Couldn't parse the message. Read the next one. 493 onFinishedHandling() 494 p.ResourceTracker.StopProcessing(p.id, p.Clock.Time()) 495 continue 496 } 497 498 now := p.Clock.Time() 499 p.storeLastReceived(now) 500 p.Metrics.Received(msg, msgLen) 501 502 // Handle the message. Note that when we are done handling this message, 503 // we must call [msg.OnFinishedHandling()]. 504 p.handle(msg) 505 p.ResourceTracker.StopProcessing(p.id, p.Clock.Time()) 506 } 507 } 508 509 func (p *peer) writeMessages() { 510 defer func() { 511 p.StartClose() 512 p.close() 513 }() 514 515 writer := bufio.NewWriterSize(p.conn, p.Config.WriteBufferSize) 516 517 // Make sure that the Handshake is the first message sent 518 mySignedIP, err := p.IPSigner.GetSignedIP() 519 if err != nil { 520 p.Log.Error("failed to get signed IP", 521 zap.Stringer("nodeID", p.id), 522 zap.Error(err), 523 ) 524 return 525 } 526 if port := mySignedIP.AddrPort.Port(); port == 0 { 527 p.Log.Error("signed IP has invalid port", 528 zap.Stringer("nodeID", p.id), 529 zap.Uint16("port", port), 530 ) 531 return 532 } 533 534 myVersion := p.VersionCompatibility.Version() 535 knownPeersFilter, knownPeersSalt := p.Network.KnownPeers() 536 537 _, areWeAPrimaryNetworkValidator := p.Validators.GetValidator(constants.PrimaryNetworkID, p.MyNodeID) 538 msg, err := p.MessageCreator.Handshake( 539 p.NetworkID, 540 p.Clock.Unix(), 541 mySignedIP.AddrPort, 542 myVersion.Name, 543 uint32(myVersion.Major), 544 uint32(myVersion.Minor), 545 uint32(myVersion.Patch), 546 mySignedIP.Timestamp, 547 mySignedIP.TLSSignature, 548 mySignedIP.BLSSignatureBytes, 549 p.MySubnets.List(), 550 p.SupportedACPs, 551 p.ObjectedACPs, 552 knownPeersFilter, 553 knownPeersSalt, 554 areWeAPrimaryNetworkValidator, 555 ) 556 if err != nil { 557 p.Log.Error(failedToCreateMessageLog, 558 zap.Stringer("nodeID", p.id), 559 zap.Stringer("messageOp", message.HandshakeOp), 560 zap.Error(err), 561 ) 562 return 563 } 564 565 p.writeMessage(writer, msg) 566 567 for { 568 msg, ok := p.messageQueue.PopNow() 569 if ok { 570 p.writeMessage(writer, msg) 571 continue 572 } 573 574 // Make sure the peer was fully sent all prior messages before 575 // blocking. 576 if err := writer.Flush(); err != nil { 577 p.Log.Verbo("failed to flush writer", 578 zap.Stringer("nodeID", p.id), 579 zap.Error(err), 580 ) 581 return 582 } 583 584 msg, ok = p.messageQueue.Pop() 585 if !ok { 586 // This peer is closing 587 return 588 } 589 590 p.writeMessage(writer, msg) 591 } 592 } 593 594 func (p *peer) writeMessage(writer io.Writer, msg message.OutboundMessage) { 595 msgBytes := msg.Bytes() 596 p.Log.Verbo("sending message", 597 zap.Stringer("op", msg.Op()), 598 zap.Stringer("nodeID", p.id), 599 zap.Binary("messageBytes", msgBytes), 600 ) 601 602 if err := p.conn.SetWriteDeadline(p.nextTimeout()); err != nil { 603 p.Log.Verbo(failedToSetDeadlineLog, 604 zap.Stringer("nodeID", p.id), 605 zap.String("direction", "write"), 606 zap.Error(err), 607 ) 608 return 609 } 610 611 msgLen := uint32(len(msgBytes)) 612 msgLenBytes, err := writeMsgLen(msgLen, constants.DefaultMaxMessageSize) 613 if err != nil { 614 p.Log.Verbo("error writing message length", 615 zap.Stringer("nodeID", p.id), 616 zap.Error(err), 617 ) 618 return 619 } 620 621 // Write the message 622 var buf net.Buffers = [][]byte{msgLenBytes[:], msgBytes} 623 if _, err := io.CopyN(writer, &buf, int64(wrappers.IntLen+msgLen)); err != nil { 624 p.Log.Verbo("error writing message", 625 zap.Stringer("nodeID", p.id), 626 zap.Error(err), 627 ) 628 return 629 } 630 631 now := p.Clock.Time() 632 p.storeLastSent(now) 633 p.Metrics.Sent(msg) 634 } 635 636 func (p *peer) sendNetworkMessages() { 637 sendPingsTicker := time.NewTicker(p.PingFrequency) 638 defer func() { 639 sendPingsTicker.Stop() 640 641 p.StartClose() 642 p.close() 643 }() 644 645 for { 646 select { 647 case <-p.getPeerListChan: 648 knownPeersFilter, knownPeersSalt := p.Config.Network.KnownPeers() 649 _, areWeAPrimaryNetworkValidator := p.Validators.GetValidator(constants.PrimaryNetworkID, p.MyNodeID) 650 msg, err := p.Config.MessageCreator.GetPeerList( 651 knownPeersFilter, 652 knownPeersSalt, 653 areWeAPrimaryNetworkValidator, 654 ) 655 if err != nil { 656 p.Log.Error(failedToCreateMessageLog, 657 zap.Stringer("nodeID", p.id), 658 zap.Stringer("messageOp", message.GetPeerListOp), 659 zap.Error(err), 660 ) 661 return 662 } 663 664 p.Send(p.onClosingCtx, msg) 665 case <-sendPingsTicker.C: 666 if !p.Network.AllowConnection(p.id) { 667 p.Log.Debug(disconnectingLog, 668 zap.String("reason", "connection is no longer desired"), 669 zap.Stringer("nodeID", p.id), 670 ) 671 return 672 } 673 674 // Only check if we should disconnect after the handshake is 675 // finished to avoid race conditions and accessing uninitialized 676 // values. 677 if p.finishedHandshake.Get() && p.shouldDisconnect() { 678 return 679 } 680 681 primaryUptime, subnetUptimes := p.getUptimes() 682 pingMessage, err := p.MessageCreator.Ping(primaryUptime, subnetUptimes) 683 if err != nil { 684 p.Log.Error(failedToCreateMessageLog, 685 zap.Stringer("nodeID", p.id), 686 zap.Stringer("messageOp", message.PingOp), 687 zap.Error(err), 688 ) 689 return 690 } 691 692 p.Send(p.onClosingCtx, pingMessage) 693 case <-p.onClosingCtx.Done(): 694 return 695 } 696 } 697 } 698 699 // shouldDisconnect is called both during receipt of the Handshake message and 700 // periodically when sending a Ping message (after finishing the handshake!). 701 // 702 // It is called during the Handshake to prevent marking a peer as connected and 703 // then immediately disconnecting from them. 704 // 705 // It is called when sending a Ping message to account for validator set 706 // changes. It's called when sending a Ping rather than in a validator set 707 // callback to avoid signature verification on the P-chain accept path. 708 func (p *peer) shouldDisconnect() bool { 709 if err := p.VersionCompatibility.Compatible(p.version); err != nil { 710 p.Log.Debug(disconnectingLog, 711 zap.String("reason", "version not compatible"), 712 zap.Stringer("nodeID", p.id), 713 zap.Stringer("peerVersion", p.version), 714 zap.Error(err), 715 ) 716 return true 717 } 718 719 // Enforce that all validators that have registered a BLS key are signing 720 // their IP with it after the activation of Durango. 721 vdr, ok := p.Validators.GetValidator(constants.PrimaryNetworkID, p.id) 722 if !ok || vdr.PublicKey == nil || vdr.TxID == p.txIDOfVerifiedBLSKey { 723 return false 724 } 725 726 validSignature := bls.VerifyProofOfPossession( 727 vdr.PublicKey, 728 p.ip.BLSSignature, 729 p.ip.UnsignedIP.bytes(), 730 ) 731 if !validSignature { 732 p.Log.Debug(disconnectingLog, 733 zap.String("reason", "invalid BLS signature"), 734 zap.Stringer("nodeID", p.id), 735 ) 736 return true 737 } 738 739 // Avoid unnecessary signature verifications by only verifying the signature 740 // once per validation period. 741 p.txIDOfVerifiedBLSKey = vdr.TxID 742 return false 743 } 744 745 func (p *peer) handle(msg message.InboundMessage) { 746 switch m := msg.Message().(type) { // Network-related message types 747 case *p2p.Ping: 748 p.handlePing(m) 749 msg.OnFinishedHandling() 750 return 751 case *p2p.Pong: 752 p.handlePong(m) 753 msg.OnFinishedHandling() 754 return 755 case *p2p.Handshake: 756 p.handleHandshake(m) 757 msg.OnFinishedHandling() 758 return 759 case *p2p.GetPeerList: 760 p.handleGetPeerList(m) 761 msg.OnFinishedHandling() 762 return 763 case *p2p.PeerList: 764 p.handlePeerList(m) 765 msg.OnFinishedHandling() 766 return 767 } 768 if !p.finishedHandshake.Get() { 769 p.Log.Debug("dropping message", 770 zap.Stringer("nodeID", p.id), 771 zap.Stringer("messageOp", msg.Op()), 772 zap.String("reason", "handshake isn't finished"), 773 ) 774 msg.OnFinishedHandling() 775 return 776 } 777 778 // Consensus and app-level messages 779 p.Router.HandleInbound(context.Background(), msg) 780 } 781 782 func (p *peer) handlePing(msg *p2p.Ping) { 783 if msg.Uptime > 100 { 784 p.Log.Debug(malformedMessageLog, 785 zap.Stringer("nodeID", p.id), 786 zap.Stringer("messageOp", message.PingOp), 787 zap.Stringer("subnetID", constants.PrimaryNetworkID), 788 zap.Uint32("uptime", msg.Uptime), 789 ) 790 p.StartClose() 791 return 792 } 793 p.observeUptime(constants.PrimaryNetworkID, msg.Uptime) 794 795 for _, subnetUptime := range msg.SubnetUptimes { 796 subnetID, err := ids.ToID(subnetUptime.SubnetId) 797 if err != nil { 798 p.Log.Debug(malformedMessageLog, 799 zap.Stringer("nodeID", p.id), 800 zap.Stringer("messageOp", message.PingOp), 801 zap.String("field", "subnetID"), 802 zap.Error(err), 803 ) 804 p.StartClose() 805 return 806 } 807 808 if !p.MySubnets.Contains(subnetID) { 809 p.Log.Debug(malformedMessageLog, 810 zap.Stringer("nodeID", p.id), 811 zap.Stringer("messageOp", message.PingOp), 812 zap.Stringer("subnetID", subnetID), 813 zap.String("reason", "not tracking subnet"), 814 ) 815 p.StartClose() 816 return 817 } 818 819 uptime := subnetUptime.Uptime 820 if uptime > 100 { 821 p.Log.Debug(malformedMessageLog, 822 zap.Stringer("nodeID", p.id), 823 zap.Stringer("messageOp", message.PingOp), 824 zap.Stringer("subnetID", subnetID), 825 zap.Uint32("uptime", uptime), 826 ) 827 p.StartClose() 828 return 829 } 830 p.observeUptime(subnetID, uptime) 831 } 832 833 pongMessage, err := p.MessageCreator.Pong() 834 if err != nil { 835 p.Log.Error(failedToCreateMessageLog, 836 zap.Stringer("nodeID", p.id), 837 zap.Stringer("messageOp", message.PongOp), 838 zap.Error(err), 839 ) 840 p.StartClose() 841 return 842 } 843 844 p.Send(p.onClosingCtx, pongMessage) 845 } 846 847 func (p *peer) getUptimes() (uint32, []*p2p.SubnetUptime) { 848 primaryUptime, err := p.UptimeCalculator.CalculateUptimePercent( 849 p.id, 850 constants.PrimaryNetworkID, 851 ) 852 if err != nil { 853 p.Log.Debug(failedToGetUptimeLog, 854 zap.Stringer("nodeID", p.id), 855 zap.Stringer("subnetID", constants.PrimaryNetworkID), 856 zap.Error(err), 857 ) 858 primaryUptime = 0 859 } 860 861 subnetUptimes := make([]*p2p.SubnetUptime, 0, p.MySubnets.Len()) 862 for subnetID := range p.MySubnets { 863 if !p.trackedSubnets.Contains(subnetID) { 864 continue 865 } 866 867 subnetUptime, err := p.UptimeCalculator.CalculateUptimePercent(p.id, subnetID) 868 if err != nil { 869 p.Log.Debug(failedToGetUptimeLog, 870 zap.Stringer("nodeID", p.id), 871 zap.Stringer("subnetID", subnetID), 872 zap.Error(err), 873 ) 874 continue 875 } 876 877 subnetID := subnetID 878 subnetUptimes = append(subnetUptimes, &p2p.SubnetUptime{ 879 SubnetId: subnetID[:], 880 Uptime: uint32(subnetUptime * 100), 881 }) 882 } 883 884 primaryUptimePercent := uint32(primaryUptime * 100) 885 return primaryUptimePercent, subnetUptimes 886 } 887 888 func (*peer) handlePong(*p2p.Pong) {} 889 890 // Record that the given peer perceives our uptime for the given [subnetID] 891 // to be [uptime]. 892 // Assumes [uptime] is in the range [0, 100] and [subnetID] is a valid ID of a 893 // subnet this peer tracks. 894 func (p *peer) observeUptime(subnetID ids.ID, uptime uint32) { 895 p.observedUptimesLock.Lock() 896 p.observedUptimes[subnetID] = uptime // [0, 100] percentage 897 p.observedUptimesLock.Unlock() 898 } 899 900 func (p *peer) handleHandshake(msg *p2p.Handshake) { 901 if p.gotHandshake.Get() { 902 p.Log.Debug(malformedMessageLog, 903 zap.Stringer("nodeID", p.id), 904 zap.Stringer("messageOp", message.HandshakeOp), 905 zap.String("reason", "already received handshake"), 906 ) 907 p.StartClose() 908 return 909 } 910 911 if msg.NetworkId != p.NetworkID { 912 p.Log.Debug(malformedMessageLog, 913 zap.Stringer("nodeID", p.id), 914 zap.Stringer("messageOp", message.HandshakeOp), 915 zap.String("field", "networkID"), 916 zap.Uint32("peerNetworkID", msg.NetworkId), 917 zap.Uint32("ourNetworkID", p.NetworkID), 918 ) 919 p.StartClose() 920 return 921 } 922 923 localTime := p.Clock.Time() 924 localUnixTime := uint64(localTime.Unix()) 925 clockDifference := math.Abs(float64(msg.MyTime) - float64(localUnixTime)) 926 927 p.Metrics.ClockSkewCount.Inc() 928 p.Metrics.ClockSkewSum.Add(clockDifference) 929 930 if clockDifference > p.MaxClockDifference.Seconds() { 931 log := p.Log.Debug 932 if _, ok := p.Beacons.GetValidator(constants.PrimaryNetworkID, p.id); ok { 933 log = p.Log.Warn 934 } 935 log(malformedMessageLog, 936 zap.Stringer("nodeID", p.id), 937 zap.Stringer("messageOp", message.HandshakeOp), 938 zap.String("field", "myTime"), 939 zap.Uint64("peerTime", msg.MyTime), 940 zap.Uint64("localTime", localUnixTime), 941 ) 942 p.StartClose() 943 return 944 } 945 946 p.version = &version.Application{ 947 Name: msg.Client.GetName(), 948 Major: int(msg.Client.GetMajor()), 949 Minor: int(msg.Client.GetMinor()), 950 Patch: int(msg.Client.GetPatch()), 951 } 952 953 if p.VersionCompatibility.Version().Before(p.version) { 954 log := p.Log.Debug 955 if _, ok := p.Beacons.GetValidator(constants.PrimaryNetworkID, p.id); ok { 956 log = p.Log.Info 957 } 958 log("peer attempting to connect with newer version. You may want to update your client", 959 zap.Stringer("nodeID", p.id), 960 zap.Stringer("peerVersion", p.version), 961 ) 962 } 963 964 // handle subnet IDs 965 if numTrackedSubnets := len(msg.TrackedSubnets); numTrackedSubnets > maxNumTrackedSubnets { 966 p.Log.Debug(malformedMessageLog, 967 zap.Stringer("nodeID", p.id), 968 zap.Stringer("messageOp", message.HandshakeOp), 969 zap.String("field", "trackedSubnets"), 970 zap.Int("numTrackedSubnets", numTrackedSubnets), 971 ) 972 p.StartClose() 973 return 974 } 975 976 p.trackedSubnets.Add(constants.PrimaryNetworkID) 977 for _, subnetIDBytes := range msg.TrackedSubnets { 978 subnetID, err := ids.ToID(subnetIDBytes) 979 if err != nil { 980 p.Log.Debug(malformedMessageLog, 981 zap.Stringer("nodeID", p.id), 982 zap.Stringer("messageOp", message.HandshakeOp), 983 zap.String("field", "trackedSubnets"), 984 zap.Error(err), 985 ) 986 p.StartClose() 987 return 988 } 989 p.trackedSubnets.Add(subnetID) 990 } 991 992 for _, acp := range msg.SupportedAcps { 993 if constants.CurrentACPs.Contains(acp) { 994 p.supportedACPs.Add(acp) 995 } 996 } 997 for _, acp := range msg.ObjectedAcps { 998 if constants.CurrentACPs.Contains(acp) { 999 p.objectedACPs.Add(acp) 1000 } 1001 } 1002 1003 if p.supportedACPs.Overlaps(p.objectedACPs) { 1004 p.Log.Debug(malformedMessageLog, 1005 zap.Stringer("nodeID", p.id), 1006 zap.Stringer("messageOp", message.HandshakeOp), 1007 zap.String("field", "acps"), 1008 zap.Reflect("supportedACPs", p.supportedACPs), 1009 zap.Reflect("objectedACPs", p.objectedACPs), 1010 ) 1011 p.StartClose() 1012 return 1013 } 1014 1015 var ( 1016 knownPeers = bloom.EmptyFilter 1017 salt []byte 1018 ) 1019 if msg.KnownPeers != nil { 1020 var err error 1021 knownPeers, err = bloom.Parse(msg.KnownPeers.Filter) 1022 if err != nil { 1023 p.Log.Debug(malformedMessageLog, 1024 zap.Stringer("nodeID", p.id), 1025 zap.Stringer("messageOp", message.HandshakeOp), 1026 zap.String("field", "knownPeers.filter"), 1027 zap.Error(err), 1028 ) 1029 p.StartClose() 1030 return 1031 } 1032 1033 salt = msg.KnownPeers.Salt 1034 if saltLen := len(salt); saltLen > maxBloomSaltLen { 1035 p.Log.Debug(malformedMessageLog, 1036 zap.Stringer("nodeID", p.id), 1037 zap.Stringer("messageOp", message.HandshakeOp), 1038 zap.String("field", "knownPeers.salt"), 1039 zap.Int("saltLen", saltLen), 1040 ) 1041 p.StartClose() 1042 return 1043 } 1044 } 1045 1046 addr, ok := ips.AddrFromSlice(msg.IpAddr) 1047 if !ok { 1048 p.Log.Debug(malformedMessageLog, 1049 zap.Stringer("nodeID", p.id), 1050 zap.Stringer("messageOp", message.HandshakeOp), 1051 zap.String("field", "ip"), 1052 zap.Int("ipLen", len(msg.IpAddr)), 1053 ) 1054 p.StartClose() 1055 return 1056 } 1057 1058 port := uint16(msg.IpPort) 1059 if msg.IpPort == 0 { 1060 p.Log.Debug(malformedMessageLog, 1061 zap.Stringer("nodeID", p.id), 1062 zap.Stringer("messageOp", message.HandshakeOp), 1063 zap.String("field", "port"), 1064 zap.Uint16("port", port), 1065 ) 1066 p.StartClose() 1067 return 1068 } 1069 1070 p.ip = &SignedIP{ 1071 UnsignedIP: UnsignedIP{ 1072 AddrPort: netip.AddrPortFrom( 1073 addr, 1074 port, 1075 ), 1076 Timestamp: msg.IpSigningTime, 1077 }, 1078 TLSSignature: msg.IpNodeIdSig, 1079 } 1080 maxTimestamp := localTime.Add(p.MaxClockDifference) 1081 if err := p.ip.Verify(p.cert, maxTimestamp); err != nil { 1082 log := p.Log.Debug 1083 if _, ok := p.Beacons.GetValidator(constants.PrimaryNetworkID, p.id); ok { 1084 log = p.Log.Warn 1085 } 1086 log(malformedMessageLog, 1087 zap.Stringer("nodeID", p.id), 1088 zap.Stringer("messageOp", message.HandshakeOp), 1089 zap.String("field", "tlsSignature"), 1090 zap.Uint64("peerTime", msg.MyTime), 1091 zap.Uint64("localTime", localUnixTime), 1092 zap.Error(err), 1093 ) 1094 1095 p.StartClose() 1096 return 1097 } 1098 1099 signature, err := bls.SignatureFromBytes(msg.IpBlsSig) 1100 if err != nil { 1101 p.Log.Debug(malformedMessageLog, 1102 zap.Stringer("nodeID", p.id), 1103 zap.Stringer("messageOp", message.HandshakeOp), 1104 zap.String("field", "blsSignature"), 1105 zap.Error(err), 1106 ) 1107 p.StartClose() 1108 return 1109 } 1110 1111 p.ip.BLSSignature = signature 1112 p.ip.BLSSignatureBytes = msg.IpBlsSig 1113 1114 // If the peer is running an incompatible version or has an invalid BLS 1115 // signature, disconnect from them prior to marking the handshake as 1116 // completed. 1117 if p.shouldDisconnect() { 1118 p.StartClose() 1119 return 1120 } 1121 1122 p.gotHandshake.Set(true) 1123 1124 peerIPs := p.Network.Peers(p.id, p.trackedSubnets, msg.AllSubnets, knownPeers, salt) 1125 1126 // We bypass throttling here to ensure that the handshake message is 1127 // acknowledged correctly. 1128 peerListMsg, err := p.Config.MessageCreator.PeerList(peerIPs, true /*=bypassThrottling*/) 1129 if err != nil { 1130 p.Log.Error(failedToCreateMessageLog, 1131 zap.Stringer("nodeID", p.id), 1132 zap.Stringer("messageOp", message.PeerListOp), 1133 zap.Error(err), 1134 ) 1135 p.StartClose() 1136 return 1137 } 1138 1139 if !p.Send(p.onClosingCtx, peerListMsg) { 1140 // Because throttling was marked to be bypassed with this message, 1141 // sending should only fail if the peer has started closing. 1142 p.Log.Debug("failed to send reliable message", 1143 zap.Stringer("nodeID", p.id), 1144 zap.Stringer("messageOp", message.PeerListOp), 1145 zap.Error(p.onClosingCtx.Err()), 1146 ) 1147 p.StartClose() 1148 } 1149 } 1150 1151 func (p *peer) handleGetPeerList(msg *p2p.GetPeerList) { 1152 if !p.finishedHandshake.Get() { 1153 p.Log.Debug(malformedMessageLog, 1154 zap.Stringer("nodeID", p.id), 1155 zap.Stringer("messageOp", message.GetPeerListOp), 1156 zap.String("reason", "not finished handshake"), 1157 ) 1158 return 1159 } 1160 1161 knownPeersMsg := msg.GetKnownPeers() 1162 filter, err := bloom.Parse(knownPeersMsg.GetFilter()) 1163 if err != nil { 1164 p.Log.Debug(malformedMessageLog, 1165 zap.Stringer("nodeID", p.id), 1166 zap.Stringer("messageOp", message.GetPeerListOp), 1167 zap.String("field", "knownPeers.filter"), 1168 zap.Error(err), 1169 ) 1170 p.StartClose() 1171 return 1172 } 1173 1174 salt := knownPeersMsg.GetSalt() 1175 if saltLen := len(salt); saltLen > maxBloomSaltLen { 1176 p.Log.Debug(malformedMessageLog, 1177 zap.Stringer("nodeID", p.id), 1178 zap.Stringer("messageOp", message.GetPeerListOp), 1179 zap.String("field", "knownPeers.salt"), 1180 zap.Int("saltLen", saltLen), 1181 ) 1182 p.StartClose() 1183 return 1184 } 1185 1186 peerIPs := p.Network.Peers(p.id, p.trackedSubnets, msg.AllSubnets, filter, salt) 1187 if len(peerIPs) == 0 { 1188 p.Log.Debug("skipping sending of empty peer list", 1189 zap.Stringer("nodeID", p.id), 1190 ) 1191 return 1192 } 1193 1194 // Bypass throttling is disabled here to follow the non-handshake message 1195 // sending pattern. 1196 peerListMsg, err := p.Config.MessageCreator.PeerList(peerIPs, false /*=bypassThrottling*/) 1197 if err != nil { 1198 p.Log.Error(failedToCreateMessageLog, 1199 zap.Stringer("nodeID", p.id), 1200 zap.Stringer("messageOp", message.PeerListOp), 1201 zap.Error(err), 1202 ) 1203 return 1204 } 1205 1206 p.Send(p.onClosingCtx, peerListMsg) 1207 } 1208 1209 func (p *peer) handlePeerList(msg *p2p.PeerList) { 1210 if !p.finishedHandshake.Get() { 1211 if !p.gotHandshake.Get() { 1212 return 1213 } 1214 1215 p.Network.Connected(p.id) 1216 p.finishedHandshake.Set(true) 1217 close(p.onFinishHandshake) 1218 } 1219 1220 discoveredIPs := make([]*ips.ClaimedIPPort, len(msg.ClaimedIpPorts)) // the peers this peer told us about 1221 for i, claimedIPPort := range msg.ClaimedIpPorts { 1222 tlsCert, err := staking.ParseCertificate(claimedIPPort.X509Certificate) 1223 if err != nil { 1224 p.Log.Debug(malformedMessageLog, 1225 zap.Stringer("nodeID", p.id), 1226 zap.Stringer("messageOp", message.PeerListOp), 1227 zap.String("field", "cert"), 1228 zap.Error(err), 1229 ) 1230 p.StartClose() 1231 return 1232 } 1233 1234 addr, ok := ips.AddrFromSlice(claimedIPPort.IpAddr) 1235 if !ok { 1236 p.Log.Debug(malformedMessageLog, 1237 zap.Stringer("nodeID", p.id), 1238 zap.Stringer("messageOp", message.PeerListOp), 1239 zap.String("field", "ip"), 1240 zap.Int("ipLen", len(claimedIPPort.IpAddr)), 1241 ) 1242 p.StartClose() 1243 return 1244 } 1245 1246 port := uint16(claimedIPPort.IpPort) 1247 if port == 0 { 1248 p.Log.Debug(malformedMessageLog, 1249 zap.Stringer("nodeID", p.id), 1250 zap.Stringer("messageOp", message.PeerListOp), 1251 zap.String("field", "port"), 1252 zap.Uint16("port", port), 1253 ) 1254 p.StartClose() 1255 return 1256 } 1257 1258 discoveredIPs[i] = ips.NewClaimedIPPort( 1259 tlsCert, 1260 netip.AddrPortFrom( 1261 addr, 1262 port, 1263 ), 1264 claimedIPPort.Timestamp, 1265 claimedIPPort.Signature, 1266 ) 1267 } 1268 1269 if err := p.Network.Track(discoveredIPs); err != nil { 1270 p.Log.Debug(malformedMessageLog, 1271 zap.Stringer("nodeID", p.id), 1272 zap.Stringer("messageOp", message.PeerListOp), 1273 zap.String("field", "claimedIP"), 1274 zap.Error(err), 1275 ) 1276 p.StartClose() 1277 } 1278 } 1279 1280 func (p *peer) nextTimeout() time.Time { 1281 return p.Clock.Time().Add(p.PongTimeout) 1282 } 1283 1284 func (p *peer) storeLastSent(time time.Time) { 1285 unixTime := time.Unix() 1286 atomic.StoreInt64(&p.Config.LastSent, unixTime) 1287 atomic.StoreInt64(&p.lastSent, unixTime) 1288 } 1289 1290 func (p *peer) storeLastReceived(time time.Time) { 1291 unixTime := time.Unix() 1292 atomic.StoreInt64(&p.Config.LastReceived, unixTime) 1293 atomic.StoreInt64(&p.lastReceived, unixTime) 1294 }