github.com/iotexproject/iotex-core@v1.14.1-rc1/p2p/agent.go (about) 1 // Copyright (c) 2022 IoTeX Foundation 2 // This source code is provided 'as is' and no warranties are given as to title or non-infringement, merchantability 3 // or fitness for purpose and, to the extent permitted by law, all liability for your use of the code is disclaimed. 4 // This source code is governed by Apache License 2.0 that can be found in the LICENSE file. 5 6 package p2p 7 8 import ( 9 "context" 10 "encoding/hex" 11 "fmt" 12 "strconv" 13 "strings" 14 "time" 15 16 "github.com/libp2p/go-libp2p-core/peer" 17 "github.com/multiformats/go-multiaddr" 18 "github.com/pkg/errors" 19 "github.com/prometheus/client_golang/prometheus" 20 "go.uber.org/zap" 21 "google.golang.org/protobuf/proto" 22 "google.golang.org/protobuf/types/known/timestamppb" 23 24 "github.com/iotexproject/go-p2p" 25 "github.com/iotexproject/go-pkgs/hash" 26 goproto "github.com/iotexproject/iotex-proto/golang" 27 "github.com/iotexproject/iotex-proto/golang/iotexrpc" 28 29 "github.com/iotexproject/iotex-core/pkg/lifecycle" 30 "github.com/iotexproject/iotex-core/pkg/log" 31 "github.com/iotexproject/iotex-core/pkg/routine" 32 "github.com/iotexproject/iotex-core/pkg/tracer" 33 "github.com/iotexproject/iotex-core/server/itx/nodestats" 34 ) 35 36 const ( 37 _successStr = "success" 38 _failureStr = "failure" 39 ) 40 41 var ( 42 _p2pMsgCounter = prometheus.NewCounterVec( 43 prometheus.CounterOpts{ 44 Name: "iotex_p2p_message_counter", 45 Help: "P2P message stats", 46 }, 47 []string{"protocol", "message", "direction", "peer", "status"}, 48 ) 49 _p2pMsgLatency = prometheus.NewHistogramVec( 50 prometheus.HistogramOpts{ 51 Name: "iotex_p2p_message_latency", 52 Help: "message latency", 53 Buckets: prometheus.LinearBuckets(0, 10, 200), 54 }, 55 []string{"protocol", "message", "status"}, 56 ) 57 // ErrAgentNotStarted is the error returned when p2p agent has not been started 58 ErrAgentNotStarted = errors.New("p2p agent has not been started") 59 ) 60 61 func init() { 62 prometheus.MustRegister(_p2pMsgCounter) 63 prometheus.MustRegister(_p2pMsgLatency) 64 } 65 66 const ( 67 // TODO: the topic could be fine tuned 68 _broadcastTopic = "broadcast" 69 _unicastTopic = "unicast" 70 _numDialRetries = 8 71 _dialRetryInterval = 2 * time.Second 72 ) 73 74 type ( 75 // HandleBroadcastInbound handles broadcast message when agent listens it from the network 76 HandleBroadcastInbound func(context.Context, uint32, string, proto.Message) 77 78 // HandleUnicastInboundAsync handles unicast message when agent listens it from the network 79 HandleUnicastInboundAsync func(context.Context, uint32, peer.AddrInfo, proto.Message) 80 81 // Config is the config of p2p 82 Config struct { 83 Host string `yaml:"host"` 84 Port int `yaml:"port"` 85 ExternalHost string `yaml:"externalHost"` 86 ExternalPort int `yaml:"externalPort"` 87 BootstrapNodes []string `yaml:"bootstrapNodes"` 88 MasterKey string `yaml:"masterKey"` // master key will be PrivateKey if not set. 89 // RelayType is the type of P2P network relay. By default, the value is empty, meaning disabled. Two relay types 90 // are supported: active, nat. 91 RelayType string `yaml:"relayType"` 92 ReconnectInterval time.Duration `yaml:"reconnectInterval"` 93 RateLimit p2p.RateLimitConfig `yaml:"rateLimit"` 94 EnableRateLimit bool `yaml:"enableRateLimit"` 95 PrivateNetworkPSK string `yaml:"privateNetworkPSK"` 96 MaxPeers int `yaml:"maxPeers"` 97 MaxMessageSize int `yaml:"maxMessageSize"` 98 } 99 100 // Agent is the agent to help the blockchain node connect into the P2P networks and send/receive messages 101 Agent interface { 102 lifecycle.StartStopper 103 nodestats.StatsReporter 104 // BroadcastOutbound sends a broadcast message to the whole network 105 BroadcastOutbound(ctx context.Context, msg proto.Message) (err error) 106 // UnicastOutbound sends a unicast message to the given address 107 UnicastOutbound(_ context.Context, peer peer.AddrInfo, msg proto.Message) (err error) 108 // Info returns agents' peer info. 109 Info() (peer.AddrInfo, error) 110 // Self returns the self network address 111 Self() ([]multiaddr.Multiaddr, error) 112 // ConnectedPeers returns the connected peers' info 113 ConnectedPeers() ([]peer.AddrInfo, error) 114 // BlockPeer blocks the peer in p2p layer 115 BlockPeer(string) 116 } 117 118 dummyAgent struct{} 119 120 agent struct { 121 cfg Config 122 chainID uint32 123 topicSuffix string 124 broadcastInboundHandler HandleBroadcastInbound 125 unicastInboundAsyncHandler HandleUnicastInboundAsync 126 host *p2p.Host 127 bootNodeAddr []multiaddr.Multiaddr 128 reconnectTimeout time.Duration 129 reconnectTask *routine.RecurringTask 130 qosMetrics *Qos 131 } 132 ) 133 134 // DefaultConfig is the default config of p2p 135 var DefaultConfig = Config{ 136 Host: "0.0.0.0", 137 Port: 4689, 138 ExternalHost: "", 139 ExternalPort: 4689, 140 BootstrapNodes: []string{}, 141 MasterKey: "", 142 RateLimit: p2p.DefaultRatelimitConfig, 143 ReconnectInterval: 150 * time.Second, 144 EnableRateLimit: true, 145 PrivateNetworkPSK: "", 146 MaxPeers: 30, 147 MaxMessageSize: p2p.DefaultConfig.MaxMessageSize, 148 } 149 150 // NewDummyAgent creates a dummy p2p agent 151 func NewDummyAgent() Agent { 152 return &dummyAgent{} 153 } 154 155 func (*dummyAgent) Start(context.Context) error { 156 return nil 157 } 158 159 func (*dummyAgent) Stop(context.Context) error { 160 return nil 161 } 162 163 func (*dummyAgent) BroadcastOutbound(ctx context.Context, msg proto.Message) error { 164 return nil 165 } 166 167 func (*dummyAgent) UnicastOutbound(_ context.Context, peer peer.AddrInfo, msg proto.Message) error { 168 return nil 169 } 170 171 func (*dummyAgent) Info() (peer.AddrInfo, error) { 172 return peer.AddrInfo{}, nil 173 } 174 175 func (*dummyAgent) Self() ([]multiaddr.Multiaddr, error) { 176 return nil, nil 177 } 178 179 func (*dummyAgent) ConnectedPeers() ([]peer.AddrInfo, error) { 180 return nil, nil 181 } 182 183 func (*dummyAgent) BlockPeer(string) { 184 return 185 } 186 187 func (*dummyAgent) BuildReport() string { 188 return "" 189 } 190 191 // NewAgent instantiates a local P2P agent instance 192 func NewAgent(cfg Config, chainID uint32, genesisHash hash.Hash256, broadcastHandler HandleBroadcastInbound, unicastHandler HandleUnicastInboundAsync) Agent { 193 log.L().Info("p2p agent", log.Hex("topicSuffix", genesisHash[22:])) 194 return &agent{ 195 cfg: cfg, 196 chainID: chainID, 197 // Make sure the honest node only care the messages related the chain from the same genesis 198 topicSuffix: hex.EncodeToString(genesisHash[22:]), // last 10 bytes of genesis hash 199 broadcastInboundHandler: broadcastHandler, 200 unicastInboundAsyncHandler: unicastHandler, 201 reconnectTimeout: cfg.ReconnectInterval, 202 qosMetrics: NewQoS(time.Now(), 2*cfg.ReconnectInterval), 203 } 204 } 205 206 func (p *agent) Start(ctx context.Context) error { 207 ready := make(chan interface{}) 208 p2p.SetLogger(log.L()) 209 opts := []p2p.Option{ 210 p2p.HostName(p.cfg.Host), 211 p2p.Port(p.cfg.Port), 212 p2p.Gossip(), 213 p2p.SecureIO(), 214 p2p.MasterKey(p.cfg.MasterKey), 215 p2p.PrivateNetworkPSK(p.cfg.PrivateNetworkPSK), 216 p2p.DHTProtocolID(p.chainID), 217 p2p.DHTGroupID(p.chainID), 218 p2p.WithMaxPeer(uint32(p.cfg.MaxPeers)), 219 p2p.WithMaxMessageSize(p.cfg.MaxMessageSize), 220 } 221 if p.cfg.EnableRateLimit { 222 opts = append(opts, p2p.WithRateLimit(p.cfg.RateLimit)) 223 } 224 if p.cfg.ExternalHost != "" { 225 opts = append(opts, p2p.ExternalHostName(p.cfg.ExternalHost)) 226 opts = append(opts, p2p.ExternalPort(p.cfg.ExternalPort)) 227 } 228 if p.cfg.RelayType != "" { 229 opts = append(opts, p2p.WithRelay(p.cfg.RelayType)) 230 } 231 host, err := p2p.NewHost(ctx, opts...) 232 if err != nil { 233 return errors.Wrap(err, "error when instantiating Agent host") 234 } 235 236 if err := host.AddBroadcastPubSub(ctx, _broadcastTopic+p.topicSuffix, func(ctx context.Context, data []byte) (err error) { 237 // Blocking handling the broadcast message until the agent is started 238 <-ready 239 var ( 240 peerID string 241 broadcast iotexrpc.BroadcastMsg 242 latency int64 243 ) 244 skip := false 245 defer func() { 246 // Skip accounting if the broadcast message is not handled 247 if skip { 248 return 249 } 250 status := _successStr 251 if err != nil { 252 status = _failureStr 253 } 254 _p2pMsgCounter.WithLabelValues("broadcast", strconv.Itoa(int(broadcast.MsgType)), "in", peerID, status).Inc() 255 _p2pMsgLatency.WithLabelValues("broadcast", strconv.Itoa(int(broadcast.MsgType)), status).Observe(float64(latency)) 256 }() 257 if err = proto.Unmarshal(data, &broadcast); err != nil { 258 err = errors.Wrap(err, "error when marshaling broadcast message") 259 return 260 } 261 // Skip the broadcast message if it's from the node itself 262 rawmsg, ok := p2p.GetBroadcastMsg(ctx) 263 if !ok { 264 err = errors.New("error when asserting broadcast msg context") 265 return 266 } 267 peerID = rawmsg.GetFrom().Pretty() 268 if p.host.HostIdentity() == peerID { 269 skip = true 270 return 271 } 272 if broadcast.ChainId != p.chainID { 273 err = errors.Errorf("chain ID mismatch, received %d, expecting %d", broadcast.ChainId, p.chainID) 274 return 275 } 276 277 t := broadcast.GetTimestamp().AsTime() 278 latency = time.Since(t).Nanoseconds() / time.Millisecond.Nanoseconds() 279 280 msg, err := goproto.TypifyRPCMsg(broadcast.MsgType, broadcast.MsgBody) 281 if err != nil { 282 err = errors.Wrap(err, "error when typifying broadcast message") 283 return 284 } 285 p.broadcastInboundHandler(ctx, broadcast.ChainId, peerID, msg) 286 p.qosMetrics.updateRecvBroadcast(time.Now()) 287 return 288 }); err != nil { 289 return errors.Wrap(err, "error when adding broadcast pubsub") 290 } 291 292 if err := host.AddUnicastPubSub(_unicastTopic+p.topicSuffix, func(ctx context.Context, peerInfo peer.AddrInfo, data []byte) (err error) { 293 // Blocking handling the unicast message until the agent is started 294 <-ready 295 var ( 296 unicast iotexrpc.UnicastMsg 297 peerID = peerInfo.ID.Pretty() 298 latency int64 299 ) 300 defer func() { 301 status := _successStr 302 if err != nil { 303 status = _failureStr 304 } 305 _p2pMsgCounter.WithLabelValues("unicast", strconv.Itoa(int(unicast.MsgType)), "in", peerID, status).Inc() 306 _p2pMsgLatency.WithLabelValues("unicast", strconv.Itoa(int(unicast.MsgType)), status).Observe(float64(latency)) 307 }() 308 if err = proto.Unmarshal(data, &unicast); err != nil { 309 err = errors.Wrap(err, "error when marshaling unicast message") 310 return 311 } 312 msg, err := goproto.TypifyRPCMsg(unicast.MsgType, unicast.MsgBody) 313 if err != nil { 314 err = errors.Wrap(err, "error when typifying unicast message") 315 return 316 } 317 if unicast.ChainId != p.chainID { 318 err = errors.Errorf("chain ID mismatch, received %d, expecting %d", unicast.ChainId, p.chainID) 319 return 320 } 321 322 t := unicast.GetTimestamp().AsTime() 323 latency = time.Since(t).Nanoseconds() / time.Millisecond.Nanoseconds() 324 325 p.unicastInboundAsyncHandler(ctx, unicast.ChainId, peerInfo, msg) 326 p.qosMetrics.updateRecvUnicast(peerID, time.Now()) 327 return 328 }); err != nil { 329 return errors.Wrap(err, "error when adding unicast pubsub") 330 } 331 332 // create boot nodes list except itself 333 hostName := host.HostIdentity() 334 for _, bootstrapNode := range p.cfg.BootstrapNodes { 335 bootAddr := multiaddr.StringCast(bootstrapNode) 336 if !strings.Contains(bootAddr.String(), hostName) { 337 p.bootNodeAddr = append(p.bootNodeAddr, bootAddr) 338 } 339 } 340 if err := host.AddBootstrap(p.bootNodeAddr); err != nil { 341 return err 342 } 343 host.JoinOverlay() 344 p.host = host 345 346 // connect to bootstrap nodes 347 if err := p.connectBootNode(ctx); err != nil { 348 log.L().Error("fail to connect bootnode", zap.Error(err)) 349 return err 350 } 351 if err := p.host.AdvertiseAsync(); err != nil { 352 return err 353 } 354 if err := p.host.FindPeersAsync(); err != nil { 355 return err 356 } 357 358 close(ready) 359 360 // check network connectivity every 60 blocks, and reconnect in case of disconnection 361 p.reconnectTask = routine.NewRecurringTask(p.reconnect, p.reconnectTimeout) 362 return p.reconnectTask.Start(ctx) 363 } 364 365 func (p *agent) Stop(ctx context.Context) error { 366 if p.host == nil { 367 return ErrAgentNotStarted 368 } 369 log.L().Info("p2p is shutting down.", zap.Error(ctx.Err())) 370 if err := p.reconnectTask.Stop(ctx); err != nil { 371 return err 372 } 373 if err := p.host.Close(); err != nil { 374 return errors.Wrap(err, "error when closing Agent host") 375 } 376 return nil 377 } 378 379 func (p *agent) BroadcastOutbound(ctx context.Context, msg proto.Message) (err error) { 380 _, span := tracer.NewSpan(ctx, "Agent.BroadcastOutbound") 381 defer span.End() 382 383 host := p.host 384 if host == nil { 385 return ErrAgentNotStarted 386 } 387 var msgType iotexrpc.MessageType 388 var msgBody []byte 389 defer func() { 390 status := _successStr 391 if err != nil { 392 status = _failureStr 393 } 394 _p2pMsgCounter.WithLabelValues( 395 "broadcast", 396 strconv.Itoa(int(msgType)), 397 "out", 398 host.HostIdentity(), 399 status, 400 ).Inc() 401 }() 402 msgType, msgBody, err = convertAppMsg(msg) 403 if err != nil { 404 return 405 } 406 broadcast := iotexrpc.BroadcastMsg{ 407 ChainId: p.chainID, 408 PeerId: host.HostIdentity(), 409 MsgType: msgType, 410 MsgBody: msgBody, 411 Timestamp: timestamppb.Now(), 412 } 413 data, err := proto.Marshal(&broadcast) 414 if err != nil { 415 err = errors.Wrap(err, "error when marshaling broadcast message") 416 return 417 } 418 t := time.Now() 419 if err = host.Broadcast(ctx, _broadcastTopic+p.topicSuffix, data); err != nil { 420 err = errors.Wrap(err, "error when sending broadcast message") 421 p.qosMetrics.updateSendBroadcast(t, false) 422 return 423 } 424 p.qosMetrics.updateSendBroadcast(t, true) 425 return 426 } 427 428 func (p *agent) UnicastOutbound(ctx context.Context, peer peer.AddrInfo, msg proto.Message) (err error) { 429 host := p.host 430 if host == nil { 431 return ErrAgentNotStarted 432 } 433 var ( 434 peerName = peer.ID.Pretty() 435 msgType iotexrpc.MessageType 436 msgBody []byte 437 ) 438 defer func() { 439 status := _successStr 440 if err != nil { 441 status = _failureStr 442 } 443 _p2pMsgCounter.WithLabelValues("unicast", strconv.Itoa(int(msgType)), "out", peer.ID.Pretty(), status).Inc() 444 }() 445 446 msgType, msgBody, err = convertAppMsg(msg) 447 if err != nil { 448 return 449 } 450 unicast := iotexrpc.UnicastMsg{ 451 ChainId: p.chainID, 452 PeerId: host.HostIdentity(), 453 MsgType: msgType, 454 MsgBody: msgBody, 455 Timestamp: timestamppb.Now(), 456 } 457 data, err := proto.Marshal(&unicast) 458 if err != nil { 459 err = errors.Wrap(err, "error when marshaling unicast message") 460 return 461 } 462 463 t := time.Now() 464 if err = host.Unicast(ctx, peer, _unicastTopic+p.topicSuffix, data); err != nil { 465 err = errors.Wrap(err, "error when sending unicast message") 466 p.qosMetrics.updateSendUnicast(peerName, t, false) 467 return 468 } 469 p.qosMetrics.updateSendUnicast(peerName, t, true) 470 return 471 } 472 473 func (p *agent) Info() (peer.AddrInfo, error) { 474 if p.host == nil { 475 return peer.AddrInfo{}, ErrAgentNotStarted 476 } 477 return p.host.Info(), nil 478 } 479 480 func (p *agent) Self() ([]multiaddr.Multiaddr, error) { 481 if p.host == nil { 482 return nil, ErrAgentNotStarted 483 } 484 return p.host.Addresses(), nil 485 } 486 487 func (p *agent) ConnectedPeers() ([]peer.AddrInfo, error) { 488 if p.host == nil { 489 return nil, ErrAgentNotStarted 490 } 491 return p.host.ConnectedPeers(), nil 492 } 493 494 func (p *agent) BlockPeer(pidStr string) { 495 pid, err := peer.Decode(pidStr) 496 if err != nil { 497 return 498 } 499 p.host.BlockPeer(pid) 500 } 501 502 // BuildReport builds a report of p2p agent 503 func (p *agent) BuildReport() string { 504 neighbors, err := p.ConnectedPeers() 505 if err == nil { 506 return fmt.Sprintf("P2P ConnectedPeers: %d", len(neighbors)) 507 } 508 return "" 509 } 510 511 func (p *agent) connectBootNode(ctx context.Context) error { 512 if len(p.cfg.BootstrapNodes) == 0 { 513 return nil 514 } 515 var errNum, connNum, desiredConnNum int 516 conn := make(chan struct{}, len(p.cfg.BootstrapNodes)) 517 connErrChan := make(chan error, len(p.cfg.BootstrapNodes)) 518 519 // try to connect to all bootstrap node beside itself. 520 for i := range p.bootNodeAddr { 521 bootAddr := p.bootNodeAddr[i] 522 go func() { 523 if err := exponentialRetry( 524 func() error { return p.host.ConnectWithMultiaddr(ctx, bootAddr) }, 525 _dialRetryInterval, 526 _numDialRetries, 527 ); err != nil { 528 err := errors.Wrap(err, fmt.Sprintf("error when connecting bootstrap node %s", bootAddr.String())) 529 connErrChan <- err 530 return 531 } 532 conn <- struct{}{} 533 log.L().Info("Connected bootstrap node.", zap.String("address", bootAddr.String())) 534 }() 535 } 536 537 // wait until half+1 bootnodes get connected 538 desiredConnNum = len(p.bootNodeAddr)/2 + 1 539 for { 540 select { 541 case err := <-connErrChan: 542 log.L().Info("Connection failed.", zap.Error(err)) 543 errNum++ 544 if errNum == len(p.bootNodeAddr) { 545 return errors.New("failed to connect to any bootstrap node") 546 } 547 case <-conn: 548 connNum++ 549 } 550 // can add more condition later 551 if connNum >= desiredConnNum { 552 break 553 } 554 } 555 return nil 556 } 557 558 func (p *agent) reconnect() { 559 if p.host == nil { 560 return 561 } 562 if len(p.host.ConnectedPeers()) == 0 || p.qosMetrics.lostConnection() { 563 log.L().Info("network lost, try re-connecting.") 564 p.host.ClearBlocklist() 565 if err := p.connectBootNode(context.Background()); err != nil { 566 log.L().Error("fail to connect bootnode", zap.Error(err)) 567 return 568 } 569 if err := p.host.AdvertiseAsync(); err != nil { 570 log.L().Error("fail to advertise", zap.Error(err)) 571 return 572 } 573 } 574 if err := p.host.FindPeersAsync(); err != nil { 575 log.L().Error("fail to find peer", zap.Error(err)) 576 } 577 } 578 579 func convertAppMsg(msg proto.Message) (iotexrpc.MessageType, []byte, error) { 580 msgType, err := goproto.GetTypeFromRPCMsg(msg) 581 if err != nil { 582 return 0, nil, errors.Wrap(err, "error when converting application message to proto") 583 } 584 msgBody, err := proto.Marshal(msg) 585 if err != nil { 586 return 0, nil, errors.Wrap(err, "error when marshaling application message") 587 } 588 return msgType, msgBody, nil 589 } 590 591 func exponentialRetry(f func() error, retryInterval time.Duration, numRetries int) (err error) { 592 for i := 0; i < numRetries; i++ { 593 if err = f(); err == nil { 594 return 595 } 596 log.L().Error("Error happens, will retry.", zap.Error(err)) 597 time.Sleep(retryInterval) 598 retryInterval *= 2 599 } 600 return 601 }