github.com/koko1123/flow-go-1@v0.29.6/network/p2p/network.go (about)

     1  package p2p
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/koko1123/flow-go-1/utils/logging"
    10  
    11  	"github.com/ipfs/go-datastore"
    12  	"github.com/libp2p/go-libp2p/core/peer"
    13  	"github.com/libp2p/go-libp2p/core/protocol"
    14  	"github.com/rs/zerolog"
    15  
    16  	"github.com/koko1123/flow-go-1/model/flow"
    17  	"github.com/koko1123/flow-go-1/model/flow/filter"
    18  	"github.com/koko1123/flow-go-1/module"
    19  	"github.com/koko1123/flow-go-1/module/component"
    20  	"github.com/koko1123/flow-go-1/module/irrecoverable"
    21  	"github.com/koko1123/flow-go-1/network"
    22  	netcache "github.com/koko1123/flow-go-1/network/cache"
    23  	"github.com/koko1123/flow-go-1/network/channels"
    24  	"github.com/koko1123/flow-go-1/network/p2p/conduit"
    25  	"github.com/koko1123/flow-go-1/network/queue"
    26  	_ "github.com/koko1123/flow-go-1/utils/binstat"
    27  )
    28  
    29  const (
    30  	// DefaultReceiveCacheSize represents size of receive cache that keeps hash of incoming messages
    31  	// for sake of deduplication.
    32  	DefaultReceiveCacheSize = 10e4
    33  )
    34  
    35  // NotEjectedFilter is an identity filter that, when applied to the identity
    36  // table at a given snapshot, returns all nodes that we should communicate with
    37  // over the networking layer.
    38  //
    39  // NOTE: The protocol state includes nodes from the previous/next epoch that should
    40  // be included in network communication. We omit any nodes that have been ejected.
    41  var NotEjectedFilter = filter.Not(filter.Ejected)
    42  
    43  type NetworkOptFunction func(*Network)
    44  
    45  func WithConduitFactory(f network.ConduitFactory) NetworkOptFunction {
    46  	return func(n *Network) {
    47  		n.conduitFactory = f
    48  	}
    49  }
    50  
    51  // Network represents the overlay network of our peer-to-peer network, including
    52  // the protocols for handshakes, authentication, gossiping and heartbeats.
    53  type Network struct {
    54  	sync.RWMutex
    55  	*component.ComponentManager
    56  	identityProvider            module.IdentityProvider
    57  	logger                      zerolog.Logger
    58  	codec                       network.Codec
    59  	me                          module.Local
    60  	mw                          network.Middleware
    61  	metrics                     module.NetworkCoreMetrics
    62  	receiveCache                *netcache.ReceiveCache // used to deduplicate incoming messages
    63  	queue                       network.MessageQueue
    64  	subscriptionManager         network.SubscriptionManager // used to keep track of subscribed channels
    65  	conduitFactory              network.ConduitFactory
    66  	topology                    network.Topology
    67  	registerEngineRequests      chan *registerEngineRequest
    68  	registerBlobServiceRequests chan *registerBlobServiceRequest
    69  }
    70  
    71  var _ network.Network = &Network{}
    72  var _ network.Overlay = &Network{}
    73  
    74  type registerEngineRequest struct {
    75  	channel          channels.Channel
    76  	messageProcessor network.MessageProcessor
    77  	respChan         chan *registerEngineResp
    78  }
    79  
    80  type registerEngineResp struct {
    81  	conduit network.Conduit
    82  	err     error
    83  }
    84  
    85  type registerBlobServiceRequest struct {
    86  	channel  channels.Channel
    87  	ds       datastore.Batching
    88  	opts     []network.BlobServiceOption
    89  	respChan chan *registerBlobServiceResp
    90  }
    91  
    92  type registerBlobServiceResp struct {
    93  	blobService network.BlobService
    94  	err         error
    95  }
    96  
    97  var ErrNetworkShutdown = errors.New("network has already shutdown")
    98  
    99  type NetworkParameters struct {
   100  	Logger              zerolog.Logger
   101  	Codec               network.Codec
   102  	Me                  module.Local
   103  	MiddlewareFactory   func() (network.Middleware, error)
   104  	Topology            network.Topology
   105  	SubscriptionManager network.SubscriptionManager
   106  	Metrics             module.NetworkCoreMetrics
   107  	IdentityProvider    module.IdentityProvider
   108  	ReceiveCache        *netcache.ReceiveCache
   109  	Options             []NetworkOptFunction
   110  }
   111  
   112  // NewNetwork creates a new naive overlay network, using the given middleware to
   113  // communicate to direct peers, using the given codec for serialization, and
   114  // using the given state & cache interfaces to track volatile information.
   115  // csize determines the size of the cache dedicated to keep track of received messages
   116  func NewNetwork(param *NetworkParameters) (*Network, error) {
   117  
   118  	mw, err := param.MiddlewareFactory()
   119  	if err != nil {
   120  		return nil, fmt.Errorf("could not create middleware: %w", err)
   121  	}
   122  
   123  	n := &Network{
   124  		logger:                      param.Logger,
   125  		codec:                       param.Codec,
   126  		me:                          param.Me,
   127  		mw:                          mw,
   128  		receiveCache:                param.ReceiveCache,
   129  		topology:                    param.Topology,
   130  		metrics:                     param.Metrics,
   131  		subscriptionManager:         param.SubscriptionManager,
   132  		identityProvider:            param.IdentityProvider,
   133  		conduitFactory:              conduit.NewDefaultConduitFactory(),
   134  		registerEngineRequests:      make(chan *registerEngineRequest),
   135  		registerBlobServiceRequests: make(chan *registerBlobServiceRequest),
   136  	}
   137  
   138  	for _, opt := range param.Options {
   139  		opt(n)
   140  	}
   141  
   142  	n.mw.SetOverlay(n)
   143  
   144  	if err := n.conduitFactory.RegisterAdapter(n); err != nil {
   145  		return nil, fmt.Errorf("could not register network adapter: %w", err)
   146  	}
   147  
   148  	n.ComponentManager = component.NewComponentManagerBuilder().
   149  		AddWorker(n.runMiddleware).
   150  		AddWorker(n.processRegisterEngineRequests).
   151  		AddWorker(n.processRegisterBlobServiceRequests).Build()
   152  
   153  	return n, nil
   154  }
   155  
   156  func (n *Network) processRegisterEngineRequests(parent irrecoverable.SignalerContext, ready component.ReadyFunc) {
   157  	<-n.mw.Ready()
   158  	ready()
   159  
   160  	for {
   161  		select {
   162  		case req := <-n.registerEngineRequests:
   163  			conduit, err := n.handleRegisterEngineRequest(parent, req.channel, req.messageProcessor)
   164  			resp := &registerEngineResp{
   165  				conduit: conduit,
   166  				err:     err,
   167  			}
   168  
   169  			select {
   170  			case <-parent.Done():
   171  				return
   172  			case req.respChan <- resp:
   173  			}
   174  		case <-parent.Done():
   175  			return
   176  		}
   177  	}
   178  }
   179  
   180  func (n *Network) processRegisterBlobServiceRequests(parent irrecoverable.SignalerContext, ready component.ReadyFunc) {
   181  	<-n.mw.Ready()
   182  	ready()
   183  
   184  	for {
   185  		select {
   186  		case req := <-n.registerBlobServiceRequests:
   187  			blobService, err := n.handleRegisterBlobServiceRequest(parent, req.channel, req.ds, req.opts)
   188  			resp := &registerBlobServiceResp{
   189  				blobService: blobService,
   190  				err:         err,
   191  			}
   192  
   193  			select {
   194  			case <-parent.Done():
   195  				return
   196  			case req.respChan <- resp:
   197  			}
   198  		case <-parent.Done():
   199  			return
   200  		}
   201  	}
   202  }
   203  
   204  func (n *Network) runMiddleware(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   205  	// setup the message queue
   206  	// create priority queue
   207  	n.queue = queue.NewMessageQueue(ctx, queue.GetEventPriority, n.metrics)
   208  
   209  	// create workers to read from the queue and call queueSubmitFunc
   210  	queue.CreateQueueWorkers(ctx, queue.DefaultNumWorkers, n.queue, n.queueSubmitFunc)
   211  
   212  	n.mw.Start(ctx)
   213  	<-n.mw.Ready()
   214  
   215  	ready()
   216  
   217  	<-n.mw.Done()
   218  }
   219  
   220  func (n *Network) handleRegisterEngineRequest(parent irrecoverable.SignalerContext, channel channels.Channel, engine network.MessageProcessor) (network.Conduit, error) {
   221  	if !channels.ChannelExists(channel) {
   222  		return nil, fmt.Errorf("unknown channel: %s, should be registered in topic map", channel)
   223  	}
   224  
   225  	err := n.subscriptionManager.Register(channel, engine)
   226  	if err != nil {
   227  		return nil, fmt.Errorf("failed to register engine for channel %s: %w", channel, err)
   228  	}
   229  
   230  	n.logger.Info().
   231  		Str("channel_id", channel.String()).
   232  		Msg("channel successfully registered")
   233  
   234  	// create the conduit
   235  	newConduit, err := n.conduitFactory.NewConduit(parent, channel)
   236  	if err != nil {
   237  		return nil, fmt.Errorf("could not create conduit using factory: %w", err)
   238  	}
   239  
   240  	return newConduit, nil
   241  }
   242  
   243  func (n *Network) handleRegisterBlobServiceRequest(parent irrecoverable.SignalerContext, channel channels.Channel, ds datastore.Batching, opts []network.BlobServiceOption) (network.BlobService, error) {
   244  	bs := n.mw.NewBlobService(channel, ds, opts...)
   245  
   246  	// start the blob service using the network's context
   247  	bs.Start(parent)
   248  
   249  	return bs, nil
   250  }
   251  
   252  // Register will register the given engine with the given unique engine engineID,
   253  // returning a conduit to directly submit messages to the message bus of the
   254  // engine.
   255  func (n *Network) Register(channel channels.Channel, messageProcessor network.MessageProcessor) (network.Conduit, error) {
   256  	respChan := make(chan *registerEngineResp)
   257  
   258  	select {
   259  	case <-n.ComponentManager.ShutdownSignal():
   260  		return nil, ErrNetworkShutdown
   261  	case n.registerEngineRequests <- &registerEngineRequest{
   262  		channel:          channel,
   263  		messageProcessor: messageProcessor,
   264  		respChan:         respChan,
   265  	}:
   266  		select {
   267  		case <-n.ComponentManager.ShutdownSignal():
   268  			return nil, ErrNetworkShutdown
   269  		case resp := <-respChan:
   270  			return resp.conduit, resp.err
   271  		}
   272  	}
   273  }
   274  
   275  func (n *Network) RegisterPingService(pingProtocol protocol.ID, provider network.PingInfoProvider) (network.PingService, error) {
   276  	select {
   277  	case <-n.ComponentManager.ShutdownSignal():
   278  		return nil, ErrNetworkShutdown
   279  	default:
   280  		return n.mw.NewPingService(pingProtocol, provider), nil
   281  	}
   282  }
   283  
   284  // RegisterBlobService registers a BlobService on the given channel.
   285  // The returned BlobService can be used to request blobs from the network.
   286  func (n *Network) RegisterBlobService(channel channels.Channel, ds datastore.Batching, opts ...network.BlobServiceOption) (network.BlobService, error) {
   287  	respChan := make(chan *registerBlobServiceResp)
   288  
   289  	select {
   290  	case <-n.ComponentManager.ShutdownSignal():
   291  		return nil, ErrNetworkShutdown
   292  	case n.registerBlobServiceRequests <- &registerBlobServiceRequest{
   293  		channel:  channel,
   294  		ds:       ds,
   295  		opts:     opts,
   296  		respChan: respChan,
   297  	}:
   298  		select {
   299  		case <-n.ComponentManager.ShutdownSignal():
   300  			return nil, ErrNetworkShutdown
   301  		case resp := <-respChan:
   302  			return resp.blobService, resp.err
   303  		}
   304  	}
   305  }
   306  
   307  // UnRegisterChannel unregisters the engine for the specified channel. The engine will no longer be able to send or
   308  // receive messages from that channel.
   309  func (n *Network) UnRegisterChannel(channel channels.Channel) error {
   310  	err := n.subscriptionManager.Unregister(channel)
   311  	if err != nil {
   312  		return fmt.Errorf("failed to unregister engine for channel %s: %w", channel, err)
   313  	}
   314  	return nil
   315  }
   316  
   317  func (n *Network) Identities() flow.IdentityList {
   318  	return n.identityProvider.Identities(NotEjectedFilter)
   319  }
   320  
   321  func (n *Network) Identity(pid peer.ID) (*flow.Identity, bool) {
   322  	return n.identityProvider.ByPeerID(pid)
   323  }
   324  
   325  func (n *Network) Receive(msg *network.IncomingMessageScope) error {
   326  	n.metrics.InboundMessageReceived(msg.Size(), msg.Channel().String(), msg.Protocol().String(), msg.PayloadType())
   327  
   328  	err := n.processNetworkMessage(msg)
   329  	if err != nil {
   330  		return fmt.Errorf("could not process message: %w", err)
   331  	}
   332  	return nil
   333  }
   334  
   335  func (n *Network) processNetworkMessage(msg *network.IncomingMessageScope) error {
   336  	// checks the cache for deduplication and adds the message if not already present
   337  	if !n.receiveCache.Add(msg.EventID()) {
   338  		// drops duplicate message
   339  		n.logger.Debug().
   340  			Hex("sender_id", logging.ID(msg.OriginId())).
   341  			Hex("event_id", msg.EventID()).
   342  			Str("channel", msg.Channel().String()).
   343  			Msg("dropping message due to duplication")
   344  
   345  		n.metrics.DuplicateInboundMessagesDropped(msg.Channel().String(), msg.Protocol().String(), msg.PayloadType())
   346  
   347  		return nil
   348  	}
   349  
   350  	// create queue message
   351  	qm := queue.QMessage{
   352  		Payload:  msg.DecodedPayload(),
   353  		Size:     msg.Size(),
   354  		Target:   msg.Channel(),
   355  		SenderID: msg.OriginId(),
   356  	}
   357  
   358  	// insert the message in the queue
   359  	err := n.queue.Insert(qm)
   360  	if err != nil {
   361  		return fmt.Errorf("failed to insert message in queue: %w", err)
   362  	}
   363  
   364  	return nil
   365  }
   366  
   367  // UnicastOnChannel sends the message in a reliable way to the given recipient.
   368  // It uses 1-1 direct messaging over the underlying network to deliver the message.
   369  // It returns an error if unicasting fails.
   370  func (n *Network) UnicastOnChannel(channel channels.Channel, payload interface{}, targetID flow.Identifier) error {
   371  	if targetID == n.me.NodeID() {
   372  		n.logger.Debug().Msg("network skips self unicasting")
   373  		return nil
   374  	}
   375  
   376  	msg, err := network.NewOutgoingScope(
   377  		flow.IdentifierList{targetID},
   378  		channel,
   379  		payload,
   380  		n.codec.Encode,
   381  		network.ProtocolTypeUnicast)
   382  	if err != nil {
   383  		return fmt.Errorf("could not generate outgoing message scope for unicast: %w", err)
   384  	}
   385  
   386  	n.metrics.UnicastMessageSendingStarted(msg.Channel().String())
   387  	defer n.metrics.UnicastMessageSendingCompleted(msg.Channel().String())
   388  	err = n.mw.SendDirect(msg)
   389  	if err != nil {
   390  		return fmt.Errorf("failed to send message to %x: %w", targetID, err)
   391  	}
   392  
   393  	n.metrics.OutboundMessageSent(msg.Size(), msg.Channel().String(), network.ProtocolTypeUnicast.String(), msg.PayloadType())
   394  
   395  	return nil
   396  }
   397  
   398  // PublishOnChannel sends the message in an unreliable way to the given recipients.
   399  // In this context, unreliable means that the message is published over a libp2p pub-sub
   400  // channel and can be read by any node subscribed to that channel.
   401  // The selector could be used to optimize or restrict delivery.
   402  func (n *Network) PublishOnChannel(channel channels.Channel, message interface{}, targetIDs ...flow.Identifier) error {
   403  	filteredIDs := flow.IdentifierList(targetIDs).Filter(n.removeSelfFilter())
   404  
   405  	if len(filteredIDs) == 0 {
   406  		return network.EmptyTargetList
   407  	}
   408  
   409  	err := n.sendOnChannel(channel, message, filteredIDs)
   410  
   411  	if err != nil {
   412  		return fmt.Errorf("failed to publish on channel %s: %w", channel, err)
   413  	}
   414  
   415  	return nil
   416  }
   417  
   418  // MulticastOnChannel unreliably sends the specified event over the channel to randomly selected 'num' number of recipients
   419  // selected from the specified targetIDs.
   420  func (n *Network) MulticastOnChannel(channel channels.Channel, message interface{}, num uint, targetIDs ...flow.Identifier) error {
   421  	selectedIDs := flow.IdentifierList(targetIDs).Filter(n.removeSelfFilter()).Sample(num)
   422  
   423  	if len(selectedIDs) == 0 {
   424  		return network.EmptyTargetList
   425  	}
   426  
   427  	err := n.sendOnChannel(channel, message, selectedIDs)
   428  
   429  	// publishes the message to the selected targets
   430  	if err != nil {
   431  		return fmt.Errorf("failed to multicast on channel %s: %w", channel, err)
   432  	}
   433  
   434  	return nil
   435  }
   436  
   437  // removeSelfFilter removes the flow.Identifier of this node if present, from the list of nodes
   438  func (n *Network) removeSelfFilter() flow.IdentifierFilter {
   439  	return func(id flow.Identifier) bool {
   440  		return id != n.me.NodeID()
   441  	}
   442  }
   443  
   444  // sendOnChannel sends the message on channel to targets.
   445  func (n *Network) sendOnChannel(channel channels.Channel, message interface{}, targetIDs []flow.Identifier) error {
   446  	n.logger.Debug().
   447  		Interface("message", message).
   448  		Str("channel", channel.String()).
   449  		Str("target_ids", fmt.Sprintf("%v", targetIDs)).
   450  		Msg("sending new message on channel")
   451  
   452  	// generate network message (encoding) based on list of recipients
   453  	msg, err := network.NewOutgoingScope(targetIDs, channel, message, n.codec.Encode, network.ProtocolTypePubSub)
   454  	if err != nil {
   455  		return fmt.Errorf("failed to generate outgoing message scope %s: %w", channel, err)
   456  	}
   457  
   458  	// publish the message through the channel, however, the message
   459  	// is only restricted to targetIDs (if they subscribed to channel).
   460  	err = n.mw.Publish(msg)
   461  	if err != nil {
   462  		return fmt.Errorf("failed to send message on channel %s: %w", channel, err)
   463  	}
   464  
   465  	n.metrics.OutboundMessageSent(msg.Size(), msg.Channel().String(), network.ProtocolTypePubSub.String(), msg.PayloadType())
   466  
   467  	return nil
   468  }
   469  
   470  // queueSubmitFunc submits the message to the engine synchronously. It is the callback for the queue worker
   471  // when it gets a message from the queue
   472  func (n *Network) queueSubmitFunc(message interface{}) {
   473  	qm := message.(queue.QMessage)
   474  
   475  	logger := n.logger.With().
   476  		Str("channel_id", qm.Target.String()).
   477  		Str("sender_id", qm.SenderID.String()).
   478  		Logger()
   479  
   480  	eng, err := n.subscriptionManager.GetEngine(qm.Target)
   481  	if err != nil {
   482  		// This means the message was received on a channel that the node has not registered an
   483  		// engine for. This may be because the message was received during startup and the node
   484  		// hasn't subscribed to the channel yet, or there is a bug.
   485  		logger.Err(err).Msg("failed to submit message")
   486  		return
   487  	}
   488  
   489  	logger.Debug().Msg("submitting message to engine")
   490  
   491  	n.metrics.MessageProcessingStarted(qm.Target.String())
   492  
   493  	// submits the message to the engine synchronously and
   494  	// tracks its processing time.
   495  	startTimestamp := time.Now()
   496  
   497  	err = eng.Process(qm.Target, qm.SenderID, qm.Payload)
   498  	if err != nil {
   499  		logger.Err(err).Msg("failed to process message")
   500  	}
   501  
   502  	n.metrics.MessageProcessingFinished(qm.Target.String(), time.Since(startTimestamp))
   503  }
   504  
   505  func (n *Network) Topology() flow.IdentityList {
   506  	return n.topology.Fanout(n.Identities())
   507  }