github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/common/provider/engine.go

github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/common/provider/engine.go (about)

     1  package provider
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  
     7  	"github.com/rs/zerolog"
     8  	"github.com/vmihailenco/msgpack"
     9  
    10  	"github.com/onflow/flow-go/engine"
    11  	"github.com/onflow/flow-go/engine/common/provider/internal"
    12  	"github.com/onflow/flow-go/model/flow"
    13  	"github.com/onflow/flow-go/model/flow/filter"
    14  	"github.com/onflow/flow-go/model/messages"
    15  	"github.com/onflow/flow-go/module"
    16  	"github.com/onflow/flow-go/module/component"
    17  	"github.com/onflow/flow-go/module/irrecoverable"
    18  	"github.com/onflow/flow-go/module/metrics"
    19  	"github.com/onflow/flow-go/network"
    20  	"github.com/onflow/flow-go/network/channels"
    21  	"github.com/onflow/flow-go/state/protocol"
    22  	"github.com/onflow/flow-go/storage"
    23  	"github.com/onflow/flow-go/utils/logging"
    24  )
    25  
    26  const (
    27  	// DefaultRequestProviderWorkers is the default number of workers used to process entity requests.
    28  	DefaultRequestProviderWorkers = uint(5)
    29  
    30  	// DefaultEntityRequestCacheSize is the default max message queue size for the provider engine.
    31  	// This equates to ~5GB of memory usage with a full queue (10M*500)
    32  	DefaultEntityRequestCacheSize = 500
    33  )
    34  
    35  // RetrieveFunc is a function provided to the provider engine upon construction.
    36  // It is used by the engine when receiving requests in order to retrieve the
    37  // related entities. It is important that the retrieve function return a
    38  // `storage.ErrNotFound` error if the entity does not exist locally; otherwise,
    39  // the logic will error and not send responses when failing to retrieve entities.
    40  type RetrieveFunc func(flow.Identifier) (flow.Entity, error)
    41  
    42  // Engine is a generic provider engine, handling the fulfillment of entity
    43  // requests on the flow network. It is the `reply` part of the request-reply
    44  // pattern provided by the pair of generic exchange engines.
    45  type Engine struct {
    46  	component.Component
    47  	cm             *component.ComponentManager
    48  	log            zerolog.Logger
    49  	metrics        module.EngineMetrics
    50  	state          protocol.State
    51  	con            network.Conduit
    52  	channel        channels.Channel
    53  	requestHandler *engine.MessageHandler
    54  	requestQueue   engine.MessageStore
    55  	selector       flow.IdentityFilter[flow.Identity]
    56  	retrieve       RetrieveFunc
    57  	// buffered channel for EntityRequest workers to pick and process.
    58  	requestChannel chan *internal.EntityRequest
    59  }
    60  
    61  var _ network.MessageProcessor = (*Engine)(nil)
    62  
    63  // New creates a new provider engine, operating on the provided network channel, and accepting requests for entities
    64  // from a node within the set obtained by applying the provided selector filter. It uses the injected retrieve function
    65  // to manage the fullfilment of these requests.
    66  func New(
    67  	log zerolog.Logger,
    68  	metrics module.EngineMetrics,
    69  	net network.EngineRegistry,
    70  	me module.Local,
    71  	state protocol.State,
    72  	requestQueue engine.MessageStore,
    73  	requestWorkers uint,
    74  	channel channels.Channel,
    75  	selector flow.IdentityFilter[flow.Identity],
    76  	retrieve RetrieveFunc) (*Engine, error) {
    77  
    78  	// make sure we don't respond to request sent by self or unauthorized nodes
    79  	selector = filter.And(
    80  		selector,
    81  		filter.Not(filter.HasNodeID[flow.Identity](me.NodeID())),
    82  	)
    83  
    84  	handler := engine.NewMessageHandler(
    85  		log,
    86  		engine.NewNotifier(),
    87  		engine.Pattern{
    88  			// Match is called on every new message coming to this engine.
    89  			// Provider engine only expects EntityRequest.
    90  			// Other message types are discarded by Match.
    91  			Match: func(message *engine.Message) bool {
    92  				_, ok := message.Payload.(*messages.EntityRequest)
    93  				return ok
    94  			},
    95  			// Map is called on messages that are Match(ed) successfully, i.e.,
    96  			// EntityRequest.
    97  			Map: func(message *engine.Message) (*engine.Message, bool) {
    98  				request, ok := message.Payload.(*messages.EntityRequest)
    99  				if !ok {
   100  					// should never happen, unless there is a bug.
   101  					log.Warn().
   102  						Str("entity_ids", fmt.Sprintf("%v", request.EntityIDs)).
   103  						Hex("origin_id", logging.ID(message.OriginID)).
   104  						Msg("cannot match the payload to entity request")
   105  					return nil, false
   106  				}
   107  
   108  				message.Payload = *request // de-reference the pointer as HeroCache works with value.
   109  
   110  				return message, true
   111  			},
   112  			Store: requestQueue,
   113  		})
   114  
   115  	// initialize the propagation engine with its dependencies
   116  	e := &Engine{
   117  		log:            log.With().Str("engine", "provider").Logger(),
   118  		metrics:        metrics,
   119  		state:          state,
   120  		channel:        channel,
   121  		selector:       selector,
   122  		retrieve:       retrieve,
   123  		requestHandler: handler,
   124  		requestQueue:   requestQueue,
   125  		requestChannel: make(chan *internal.EntityRequest, requestWorkers),
   126  	}
   127  
   128  	// register the engine with the network layer and store the conduit
   129  	con, err := net.Register(channel, e)
   130  	if err != nil {
   131  		return nil, fmt.Errorf("could not register engine: %w", err)
   132  	}
   133  	e.con = con
   134  
   135  	cm := component.NewComponentManagerBuilder()
   136  	cm.AddWorker(e.processQueuedRequestsShovellerWorker)
   137  	for i := uint(0); i < requestWorkers; i++ {
   138  		cm.AddWorker(e.processEntityRequestWorker)
   139  	}
   140  
   141  	e.cm = cm.Build()
   142  	e.Component = e.cm
   143  
   144  	return e, nil
   145  }
   146  
   147  // Process processes the given message from the node with the given origin ID in
   148  // a blocking manner. It returns the potential processing error when done.
   149  func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error {
   150  	select {
   151  	case <-e.cm.ShutdownSignal():
   152  		e.log.Warn().
   153  			Hex("origin_id", logging.ID(originID)).
   154  			Msgf("received message after shutdown")
   155  		return nil
   156  	default:
   157  	}
   158  
   159  	e.metrics.MessageReceived(e.channel.String(), metrics.MessageEntityRequest)
   160  
   161  	err := e.requestHandler.Process(originID, event)
   162  	if err != nil {
   163  		if engine.IsIncompatibleInputTypeError(err) {
   164  			e.log.Warn().
   165  				Hex("origin_id", logging.ID(originID)).
   166  				Str("channel", channel.String()).
   167  				Str("event", fmt.Sprintf("%+v", event)).
   168  				Bool(logging.KeySuspicious, true).
   169  				Msg("received unsupported message type")
   170  			return nil
   171  		}
   172  		return fmt.Errorf("unexpected error while processing engine event: %w", err)
   173  	}
   174  
   175  	return nil
   176  }
   177  
   178  // onEntityRequest processes an entity request message from a remote node.
   179  // Error returns:
   180  // * NetworkTransmissionError if there is a network error happens on transmitting the requested entities.
   181  // * InvalidInputError if the list of requested entities is invalid (empty).
   182  // * generic error in case of unexpected failure or implementation bug.
   183  func (e *Engine) onEntityRequest(request *internal.EntityRequest) error {
   184  	defer e.metrics.MessageHandled(e.channel.String(), metrics.MessageEntityRequest)
   185  
   186  	lg := e.log.With().
   187  		Str("origin_id", request.OriginId.String()).
   188  		Strs("entity_ids", flow.IdentifierList(request.EntityIds).Strings()).
   189  		Logger()
   190  
   191  	lg.Info().
   192  		Uint64("nonce", request.Nonce).
   193  		Msg("entity request received")
   194  
   195  	// TODO: add reputation system to punish nodes for malicious behaviour (spam / repeated requests)
   196  
   197  	// then, we try to get the current identity of the requester and check it against the filter
   198  	// for the handler to make sure the requester is authorized for this resource
   199  	requesters, err := e.state.Final().Identities(filter.And(
   200  		e.selector,
   201  		filter.HasNodeID[flow.Identity](request.OriginId)),
   202  	)
   203  	if err != nil {
   204  		return fmt.Errorf("could not get requesters: %w", err)
   205  	}
   206  	if len(requesters) == 0 {
   207  		return engine.NewInvalidInputErrorf("invalid requester origin (%x)", request.OriginId)
   208  	}
   209  
   210  	// try to retrieve each entity and skip missing ones
   211  	entities := make([]flow.Entity, 0, len(request.EntityIds))
   212  	entityIDs := make([]flow.Identifier, 0, len(request.EntityIds))
   213  	seen := make(map[flow.Identifier]struct{})
   214  	for _, entityID := range request.EntityIds {
   215  		// skip requesting duplicate entity IDs
   216  		if _, ok := seen[entityID]; ok {
   217  			lg.Warn().
   218  				Str("entity_id", entityID.String()).
   219  				Bool(logging.KeySuspicious, true).
   220  				Msg("duplicate entity ID in entity request")
   221  			continue
   222  		}
   223  
   224  		entity, err := e.retrieve(entityID)
   225  		if errors.Is(err, storage.ErrNotFound) {
   226  			lg.Debug().
   227  				Str("entity_id", entityID.String()).
   228  				Msg("entity not found")
   229  			continue
   230  		}
   231  		if err != nil {
   232  			return fmt.Errorf("could not retrieve entity (%x): %w", entityID, err)
   233  		}
   234  		entities = append(entities, entity)
   235  		entityIDs = append(entityIDs, entityID)
   236  		seen[entityID] = struct{}{}
   237  	}
   238  
   239  	// encode all of the entities
   240  	blobs := make([][]byte, 0, len(entities))
   241  	for _, entity := range entities {
   242  		blob, err := msgpack.Marshal(entity)
   243  		if err != nil {
   244  			return fmt.Errorf("could not encode entity (%x): %w", entity.ID(), err)
   245  		}
   246  		blobs = append(blobs, blob)
   247  	}
   248  
   249  	// NOTE: we do _NOT_ avoid sending empty responses, as this will allow
   250  	// the requester to know we don't have any of the requested entities, which
   251  	// allows him to retry them immediately, rather than waiting for the expiry
   252  	// of the retry interval
   253  
   254  	// send back the response
   255  	res := &messages.EntityResponse{
   256  		Nonce:     request.Nonce,
   257  		EntityIDs: entityIDs,
   258  		Blobs:     blobs,
   259  	}
   260  	err = e.con.Unicast(res, request.OriginId)
   261  	if err != nil {
   262  		return engine.NewNetworkTransmissionErrorf("could not send entity response: %w", err)
   263  	}
   264  
   265  	e.metrics.MessageSent(e.channel.String(), metrics.MessageEntityResponse)
   266  	e.log.Info().
   267  		Str("origin_id", request.OriginId.String()).
   268  		Strs("entity_ids", flow.IdentifierList(entityIDs).Strings()).
   269  		Uint64("nonce", request.Nonce). // to match with the the entity request received log
   270  		Msg("entity response sent")
   271  
   272  	return nil
   273  }
   274  
   275  func (e *Engine) processQueuedRequestsShovellerWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   276  	ready()
   277  
   278  	e.log.Debug().Msg("process entity request shoveller worker started")
   279  
   280  	for {
   281  		select {
   282  		case <-e.requestHandler.GetNotifier():
   283  			// there is at least a single request in the queue, so we try to process it.
   284  			e.processAvailableMessages(ctx)
   285  		case <-ctx.Done():
   286  			// close the internal channel, the workers will drain the channel before exiting
   287  			close(e.requestChannel)
   288  			e.log.Trace().Msg("processing entity request worker terminated")
   289  			return
   290  		}
   291  	}
   292  }
   293  
   294  func (e *Engine) processAvailableMessages(ctx irrecoverable.SignalerContext) {
   295  	for {
   296  		select {
   297  		case <-ctx.Done():
   298  			return
   299  		default:
   300  		}
   301  
   302  		msg, ok := e.requestQueue.Get()
   303  		if !ok {
   304  			// no more requests, return
   305  			return
   306  		}
   307  
   308  		requestEvent, ok := msg.Payload.(messages.EntityRequest)
   309  		if !ok {
   310  			// should never happen, as we only put EntityRequest in the queue,
   311  			// if it does happen, it means there is a bug in the queue implementation.
   312  			ctx.Throw(fmt.Errorf("invalid message type in entity request queue: %T", msg.Payload))
   313  		}
   314  
   315  		req := &internal.EntityRequest{
   316  			OriginId:  msg.OriginID,
   317  			EntityIds: requestEvent.EntityIDs,
   318  			Nonce:     requestEvent.Nonce,
   319  		}
   320  
   321  		lg := e.log.With().
   322  			Hex("origin_id", logging.ID(req.OriginId)).
   323  			Str("requested_entity_ids", fmt.Sprintf("%v", req.EntityIds)).Logger()
   324  
   325  		lg.Trace().Msg("processor is queuing entity request for processing")
   326  		e.requestChannel <- req
   327  		lg.Trace().Msg("processor queued up entity request for processing")
   328  	}
   329  }
   330  
   331  func (e *Engine) processEntityRequestWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   332  	ready()
   333  
   334  	for {
   335  		request, ok := <-e.requestChannel
   336  		if !ok {
   337  			e.log.Trace().Msg("processing entity request worker terminated")
   338  			return
   339  		}
   340  		lg := e.log.With().
   341  			Hex("origin_id", logging.ID(request.OriginId)).
   342  			Str("requested_entity_ids", fmt.Sprintf("%v", request.EntityIds)).Logger()
   343  		lg.Trace().Msg("worker picked up entity request for processing")
   344  		err := e.onEntityRequest(request)
   345  		if err != nil {
   346  			if engine.IsInvalidInputError(err) || engine.IsNetworkTransmissionError(err) {
   347  				lg.Error().Err(err).Msg("worker could not process entity request")
   348  			} else {
   349  				// this is an unexpected error, we crash the node.
   350  				ctx.Throw(err)
   351  			}
   352  		}
   353  		lg.Trace().Msg("worker finished entity request processing")
   354  	}
   355  }