github.com/koko1123/flow-go-1@v0.29.6/network/p2p/network.go (about) 1 package p2p 2 3 import ( 4 "errors" 5 "fmt" 6 "sync" 7 "time" 8 9 "github.com/koko1123/flow-go-1/utils/logging" 10 11 "github.com/ipfs/go-datastore" 12 "github.com/libp2p/go-libp2p/core/peer" 13 "github.com/libp2p/go-libp2p/core/protocol" 14 "github.com/rs/zerolog" 15 16 "github.com/koko1123/flow-go-1/model/flow" 17 "github.com/koko1123/flow-go-1/model/flow/filter" 18 "github.com/koko1123/flow-go-1/module" 19 "github.com/koko1123/flow-go-1/module/component" 20 "github.com/koko1123/flow-go-1/module/irrecoverable" 21 "github.com/koko1123/flow-go-1/network" 22 netcache "github.com/koko1123/flow-go-1/network/cache" 23 "github.com/koko1123/flow-go-1/network/channels" 24 "github.com/koko1123/flow-go-1/network/p2p/conduit" 25 "github.com/koko1123/flow-go-1/network/queue" 26 _ "github.com/koko1123/flow-go-1/utils/binstat" 27 ) 28 29 const ( 30 // DefaultReceiveCacheSize represents size of receive cache that keeps hash of incoming messages 31 // for sake of deduplication. 32 DefaultReceiveCacheSize = 10e4 33 ) 34 35 // NotEjectedFilter is an identity filter that, when applied to the identity 36 // table at a given snapshot, returns all nodes that we should communicate with 37 // over the networking layer. 38 // 39 // NOTE: The protocol state includes nodes from the previous/next epoch that should 40 // be included in network communication. We omit any nodes that have been ejected. 41 var NotEjectedFilter = filter.Not(filter.Ejected) 42 43 type NetworkOptFunction func(*Network) 44 45 func WithConduitFactory(f network.ConduitFactory) NetworkOptFunction { 46 return func(n *Network) { 47 n.conduitFactory = f 48 } 49 } 50 51 // Network represents the overlay network of our peer-to-peer network, including 52 // the protocols for handshakes, authentication, gossiping and heartbeats. 53 type Network struct { 54 sync.RWMutex 55 *component.ComponentManager 56 identityProvider module.IdentityProvider 57 logger zerolog.Logger 58 codec network.Codec 59 me module.Local 60 mw network.Middleware 61 metrics module.NetworkCoreMetrics 62 receiveCache *netcache.ReceiveCache // used to deduplicate incoming messages 63 queue network.MessageQueue 64 subscriptionManager network.SubscriptionManager // used to keep track of subscribed channels 65 conduitFactory network.ConduitFactory 66 topology network.Topology 67 registerEngineRequests chan *registerEngineRequest 68 registerBlobServiceRequests chan *registerBlobServiceRequest 69 } 70 71 var _ network.Network = &Network{} 72 var _ network.Overlay = &Network{} 73 74 type registerEngineRequest struct { 75 channel channels.Channel 76 messageProcessor network.MessageProcessor 77 respChan chan *registerEngineResp 78 } 79 80 type registerEngineResp struct { 81 conduit network.Conduit 82 err error 83 } 84 85 type registerBlobServiceRequest struct { 86 channel channels.Channel 87 ds datastore.Batching 88 opts []network.BlobServiceOption 89 respChan chan *registerBlobServiceResp 90 } 91 92 type registerBlobServiceResp struct { 93 blobService network.BlobService 94 err error 95 } 96 97 var ErrNetworkShutdown = errors.New("network has already shutdown") 98 99 type NetworkParameters struct { 100 Logger zerolog.Logger 101 Codec network.Codec 102 Me module.Local 103 MiddlewareFactory func() (network.Middleware, error) 104 Topology network.Topology 105 SubscriptionManager network.SubscriptionManager 106 Metrics module.NetworkCoreMetrics 107 IdentityProvider module.IdentityProvider 108 ReceiveCache *netcache.ReceiveCache 109 Options []NetworkOptFunction 110 } 111 112 // NewNetwork creates a new naive overlay network, using the given middleware to 113 // communicate to direct peers, using the given codec for serialization, and 114 // using the given state & cache interfaces to track volatile information. 115 // csize determines the size of the cache dedicated to keep track of received messages 116 func NewNetwork(param *NetworkParameters) (*Network, error) { 117 118 mw, err := param.MiddlewareFactory() 119 if err != nil { 120 return nil, fmt.Errorf("could not create middleware: %w", err) 121 } 122 123 n := &Network{ 124 logger: param.Logger, 125 codec: param.Codec, 126 me: param.Me, 127 mw: mw, 128 receiveCache: param.ReceiveCache, 129 topology: param.Topology, 130 metrics: param.Metrics, 131 subscriptionManager: param.SubscriptionManager, 132 identityProvider: param.IdentityProvider, 133 conduitFactory: conduit.NewDefaultConduitFactory(), 134 registerEngineRequests: make(chan *registerEngineRequest), 135 registerBlobServiceRequests: make(chan *registerBlobServiceRequest), 136 } 137 138 for _, opt := range param.Options { 139 opt(n) 140 } 141 142 n.mw.SetOverlay(n) 143 144 if err := n.conduitFactory.RegisterAdapter(n); err != nil { 145 return nil, fmt.Errorf("could not register network adapter: %w", err) 146 } 147 148 n.ComponentManager = component.NewComponentManagerBuilder(). 149 AddWorker(n.runMiddleware). 150 AddWorker(n.processRegisterEngineRequests). 151 AddWorker(n.processRegisterBlobServiceRequests).Build() 152 153 return n, nil 154 } 155 156 func (n *Network) processRegisterEngineRequests(parent irrecoverable.SignalerContext, ready component.ReadyFunc) { 157 <-n.mw.Ready() 158 ready() 159 160 for { 161 select { 162 case req := <-n.registerEngineRequests: 163 conduit, err := n.handleRegisterEngineRequest(parent, req.channel, req.messageProcessor) 164 resp := ®isterEngineResp{ 165 conduit: conduit, 166 err: err, 167 } 168 169 select { 170 case <-parent.Done(): 171 return 172 case req.respChan <- resp: 173 } 174 case <-parent.Done(): 175 return 176 } 177 } 178 } 179 180 func (n *Network) processRegisterBlobServiceRequests(parent irrecoverable.SignalerContext, ready component.ReadyFunc) { 181 <-n.mw.Ready() 182 ready() 183 184 for { 185 select { 186 case req := <-n.registerBlobServiceRequests: 187 blobService, err := n.handleRegisterBlobServiceRequest(parent, req.channel, req.ds, req.opts) 188 resp := ®isterBlobServiceResp{ 189 blobService: blobService, 190 err: err, 191 } 192 193 select { 194 case <-parent.Done(): 195 return 196 case req.respChan <- resp: 197 } 198 case <-parent.Done(): 199 return 200 } 201 } 202 } 203 204 func (n *Network) runMiddleware(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 205 // setup the message queue 206 // create priority queue 207 n.queue = queue.NewMessageQueue(ctx, queue.GetEventPriority, n.metrics) 208 209 // create workers to read from the queue and call queueSubmitFunc 210 queue.CreateQueueWorkers(ctx, queue.DefaultNumWorkers, n.queue, n.queueSubmitFunc) 211 212 n.mw.Start(ctx) 213 <-n.mw.Ready() 214 215 ready() 216 217 <-n.mw.Done() 218 } 219 220 func (n *Network) handleRegisterEngineRequest(parent irrecoverable.SignalerContext, channel channels.Channel, engine network.MessageProcessor) (network.Conduit, error) { 221 if !channels.ChannelExists(channel) { 222 return nil, fmt.Errorf("unknown channel: %s, should be registered in topic map", channel) 223 } 224 225 err := n.subscriptionManager.Register(channel, engine) 226 if err != nil { 227 return nil, fmt.Errorf("failed to register engine for channel %s: %w", channel, err) 228 } 229 230 n.logger.Info(). 231 Str("channel_id", channel.String()). 232 Msg("channel successfully registered") 233 234 // create the conduit 235 newConduit, err := n.conduitFactory.NewConduit(parent, channel) 236 if err != nil { 237 return nil, fmt.Errorf("could not create conduit using factory: %w", err) 238 } 239 240 return newConduit, nil 241 } 242 243 func (n *Network) handleRegisterBlobServiceRequest(parent irrecoverable.SignalerContext, channel channels.Channel, ds datastore.Batching, opts []network.BlobServiceOption) (network.BlobService, error) { 244 bs := n.mw.NewBlobService(channel, ds, opts...) 245 246 // start the blob service using the network's context 247 bs.Start(parent) 248 249 return bs, nil 250 } 251 252 // Register will register the given engine with the given unique engine engineID, 253 // returning a conduit to directly submit messages to the message bus of the 254 // engine. 255 func (n *Network) Register(channel channels.Channel, messageProcessor network.MessageProcessor) (network.Conduit, error) { 256 respChan := make(chan *registerEngineResp) 257 258 select { 259 case <-n.ComponentManager.ShutdownSignal(): 260 return nil, ErrNetworkShutdown 261 case n.registerEngineRequests <- ®isterEngineRequest{ 262 channel: channel, 263 messageProcessor: messageProcessor, 264 respChan: respChan, 265 }: 266 select { 267 case <-n.ComponentManager.ShutdownSignal(): 268 return nil, ErrNetworkShutdown 269 case resp := <-respChan: 270 return resp.conduit, resp.err 271 } 272 } 273 } 274 275 func (n *Network) RegisterPingService(pingProtocol protocol.ID, provider network.PingInfoProvider) (network.PingService, error) { 276 select { 277 case <-n.ComponentManager.ShutdownSignal(): 278 return nil, ErrNetworkShutdown 279 default: 280 return n.mw.NewPingService(pingProtocol, provider), nil 281 } 282 } 283 284 // RegisterBlobService registers a BlobService on the given channel. 285 // The returned BlobService can be used to request blobs from the network. 286 func (n *Network) RegisterBlobService(channel channels.Channel, ds datastore.Batching, opts ...network.BlobServiceOption) (network.BlobService, error) { 287 respChan := make(chan *registerBlobServiceResp) 288 289 select { 290 case <-n.ComponentManager.ShutdownSignal(): 291 return nil, ErrNetworkShutdown 292 case n.registerBlobServiceRequests <- ®isterBlobServiceRequest{ 293 channel: channel, 294 ds: ds, 295 opts: opts, 296 respChan: respChan, 297 }: 298 select { 299 case <-n.ComponentManager.ShutdownSignal(): 300 return nil, ErrNetworkShutdown 301 case resp := <-respChan: 302 return resp.blobService, resp.err 303 } 304 } 305 } 306 307 // UnRegisterChannel unregisters the engine for the specified channel. The engine will no longer be able to send or 308 // receive messages from that channel. 309 func (n *Network) UnRegisterChannel(channel channels.Channel) error { 310 err := n.subscriptionManager.Unregister(channel) 311 if err != nil { 312 return fmt.Errorf("failed to unregister engine for channel %s: %w", channel, err) 313 } 314 return nil 315 } 316 317 func (n *Network) Identities() flow.IdentityList { 318 return n.identityProvider.Identities(NotEjectedFilter) 319 } 320 321 func (n *Network) Identity(pid peer.ID) (*flow.Identity, bool) { 322 return n.identityProvider.ByPeerID(pid) 323 } 324 325 func (n *Network) Receive(msg *network.IncomingMessageScope) error { 326 n.metrics.InboundMessageReceived(msg.Size(), msg.Channel().String(), msg.Protocol().String(), msg.PayloadType()) 327 328 err := n.processNetworkMessage(msg) 329 if err != nil { 330 return fmt.Errorf("could not process message: %w", err) 331 } 332 return nil 333 } 334 335 func (n *Network) processNetworkMessage(msg *network.IncomingMessageScope) error { 336 // checks the cache for deduplication and adds the message if not already present 337 if !n.receiveCache.Add(msg.EventID()) { 338 // drops duplicate message 339 n.logger.Debug(). 340 Hex("sender_id", logging.ID(msg.OriginId())). 341 Hex("event_id", msg.EventID()). 342 Str("channel", msg.Channel().String()). 343 Msg("dropping message due to duplication") 344 345 n.metrics.DuplicateInboundMessagesDropped(msg.Channel().String(), msg.Protocol().String(), msg.PayloadType()) 346 347 return nil 348 } 349 350 // create queue message 351 qm := queue.QMessage{ 352 Payload: msg.DecodedPayload(), 353 Size: msg.Size(), 354 Target: msg.Channel(), 355 SenderID: msg.OriginId(), 356 } 357 358 // insert the message in the queue 359 err := n.queue.Insert(qm) 360 if err != nil { 361 return fmt.Errorf("failed to insert message in queue: %w", err) 362 } 363 364 return nil 365 } 366 367 // UnicastOnChannel sends the message in a reliable way to the given recipient. 368 // It uses 1-1 direct messaging over the underlying network to deliver the message. 369 // It returns an error if unicasting fails. 370 func (n *Network) UnicastOnChannel(channel channels.Channel, payload interface{}, targetID flow.Identifier) error { 371 if targetID == n.me.NodeID() { 372 n.logger.Debug().Msg("network skips self unicasting") 373 return nil 374 } 375 376 msg, err := network.NewOutgoingScope( 377 flow.IdentifierList{targetID}, 378 channel, 379 payload, 380 n.codec.Encode, 381 network.ProtocolTypeUnicast) 382 if err != nil { 383 return fmt.Errorf("could not generate outgoing message scope for unicast: %w", err) 384 } 385 386 n.metrics.UnicastMessageSendingStarted(msg.Channel().String()) 387 defer n.metrics.UnicastMessageSendingCompleted(msg.Channel().String()) 388 err = n.mw.SendDirect(msg) 389 if err != nil { 390 return fmt.Errorf("failed to send message to %x: %w", targetID, err) 391 } 392 393 n.metrics.OutboundMessageSent(msg.Size(), msg.Channel().String(), network.ProtocolTypeUnicast.String(), msg.PayloadType()) 394 395 return nil 396 } 397 398 // PublishOnChannel sends the message in an unreliable way to the given recipients. 399 // In this context, unreliable means that the message is published over a libp2p pub-sub 400 // channel and can be read by any node subscribed to that channel. 401 // The selector could be used to optimize or restrict delivery. 402 func (n *Network) PublishOnChannel(channel channels.Channel, message interface{}, targetIDs ...flow.Identifier) error { 403 filteredIDs := flow.IdentifierList(targetIDs).Filter(n.removeSelfFilter()) 404 405 if len(filteredIDs) == 0 { 406 return network.EmptyTargetList 407 } 408 409 err := n.sendOnChannel(channel, message, filteredIDs) 410 411 if err != nil { 412 return fmt.Errorf("failed to publish on channel %s: %w", channel, err) 413 } 414 415 return nil 416 } 417 418 // MulticastOnChannel unreliably sends the specified event over the channel to randomly selected 'num' number of recipients 419 // selected from the specified targetIDs. 420 func (n *Network) MulticastOnChannel(channel channels.Channel, message interface{}, num uint, targetIDs ...flow.Identifier) error { 421 selectedIDs := flow.IdentifierList(targetIDs).Filter(n.removeSelfFilter()).Sample(num) 422 423 if len(selectedIDs) == 0 { 424 return network.EmptyTargetList 425 } 426 427 err := n.sendOnChannel(channel, message, selectedIDs) 428 429 // publishes the message to the selected targets 430 if err != nil { 431 return fmt.Errorf("failed to multicast on channel %s: %w", channel, err) 432 } 433 434 return nil 435 } 436 437 // removeSelfFilter removes the flow.Identifier of this node if present, from the list of nodes 438 func (n *Network) removeSelfFilter() flow.IdentifierFilter { 439 return func(id flow.Identifier) bool { 440 return id != n.me.NodeID() 441 } 442 } 443 444 // sendOnChannel sends the message on channel to targets. 445 func (n *Network) sendOnChannel(channel channels.Channel, message interface{}, targetIDs []flow.Identifier) error { 446 n.logger.Debug(). 447 Interface("message", message). 448 Str("channel", channel.String()). 449 Str("target_ids", fmt.Sprintf("%v", targetIDs)). 450 Msg("sending new message on channel") 451 452 // generate network message (encoding) based on list of recipients 453 msg, err := network.NewOutgoingScope(targetIDs, channel, message, n.codec.Encode, network.ProtocolTypePubSub) 454 if err != nil { 455 return fmt.Errorf("failed to generate outgoing message scope %s: %w", channel, err) 456 } 457 458 // publish the message through the channel, however, the message 459 // is only restricted to targetIDs (if they subscribed to channel). 460 err = n.mw.Publish(msg) 461 if err != nil { 462 return fmt.Errorf("failed to send message on channel %s: %w", channel, err) 463 } 464 465 n.metrics.OutboundMessageSent(msg.Size(), msg.Channel().String(), network.ProtocolTypePubSub.String(), msg.PayloadType()) 466 467 return nil 468 } 469 470 // queueSubmitFunc submits the message to the engine synchronously. It is the callback for the queue worker 471 // when it gets a message from the queue 472 func (n *Network) queueSubmitFunc(message interface{}) { 473 qm := message.(queue.QMessage) 474 475 logger := n.logger.With(). 476 Str("channel_id", qm.Target.String()). 477 Str("sender_id", qm.SenderID.String()). 478 Logger() 479 480 eng, err := n.subscriptionManager.GetEngine(qm.Target) 481 if err != nil { 482 // This means the message was received on a channel that the node has not registered an 483 // engine for. This may be because the message was received during startup and the node 484 // hasn't subscribed to the channel yet, or there is a bug. 485 logger.Err(err).Msg("failed to submit message") 486 return 487 } 488 489 logger.Debug().Msg("submitting message to engine") 490 491 n.metrics.MessageProcessingStarted(qm.Target.String()) 492 493 // submits the message to the engine synchronously and 494 // tracks its processing time. 495 startTimestamp := time.Now() 496 497 err = eng.Process(qm.Target, qm.SenderID, qm.Payload) 498 if err != nil { 499 logger.Err(err).Msg("failed to process message") 500 } 501 502 n.metrics.MessageProcessingFinished(qm.Target.String(), time.Since(startTimestamp)) 503 } 504 505 func (n *Network) Topology() flow.IdentityList { 506 return n.topology.Fanout(n.Identities()) 507 }