github.com/koko1123/flow-go-1@v0.29.6/engine/common/provider/engine.go (about)

     1  package provider
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  
     7  	"github.com/rs/zerolog"
     8  	"github.com/vmihailenco/msgpack"
     9  
    10  	"github.com/koko1123/flow-go-1/engine"
    11  	"github.com/koko1123/flow-go-1/engine/common/provider/internal"
    12  	"github.com/koko1123/flow-go-1/model/flow"
    13  	"github.com/koko1123/flow-go-1/model/flow/filter"
    14  	"github.com/koko1123/flow-go-1/model/messages"
    15  	"github.com/koko1123/flow-go-1/module"
    16  	"github.com/koko1123/flow-go-1/module/component"
    17  	"github.com/koko1123/flow-go-1/module/irrecoverable"
    18  	"github.com/koko1123/flow-go-1/module/metrics"
    19  	"github.com/koko1123/flow-go-1/network"
    20  	"github.com/koko1123/flow-go-1/network/channels"
    21  	"github.com/koko1123/flow-go-1/state/protocol"
    22  	"github.com/koko1123/flow-go-1/storage"
    23  	"github.com/koko1123/flow-go-1/utils/logging"
    24  )
    25  
    26  const (
    27  	// DefaultRequestProviderWorkers is the default number of workers used to process entity requests.
    28  	DefaultRequestProviderWorkers = uint(5)
    29  
    30  	// DefaultEntityRequestCacheSize is the default max message queue size for the provider engine.
    31  	// This equates to ~5GB of memory usage with a full queue (10M*500)
    32  	DefaultEntityRequestCacheSize = 500
    33  )
    34  
    35  // RetrieveFunc is a function provided to the provider engine upon construction.
    36  // It is used by the engine when receiving requests in order to retrieve the
    37  // related entities. It is important that the retrieve function return a
    38  // `storage.ErrNotFound` error if the entity does not exist locally; otherwise,
    39  // the logic will error and not send responses when failing to retrieve entities.
    40  type RetrieveFunc func(flow.Identifier) (flow.Entity, error)
    41  
    42  // Engine is a generic provider engine, handling the fulfillment of entity
    43  // requests on the flow network. It is the `reply` part of the request-reply
    44  // pattern provided by the pair of generic exchange engines.
    45  type Engine struct {
    46  	component.Component
    47  	cm             *component.ComponentManager
    48  	log            zerolog.Logger
    49  	metrics        module.EngineMetrics
    50  	state          protocol.State
    51  	con            network.Conduit
    52  	channel        channels.Channel
    53  	requestHandler *engine.MessageHandler
    54  	requestQueue   engine.MessageStore
    55  	selector       flow.IdentityFilter
    56  	retrieve       RetrieveFunc
    57  	// buffered channel for EntityRequest workers to pick and process.
    58  	requestChannel chan *internal.EntityRequest
    59  }
    60  
    61  var _ network.MessageProcessor = (*Engine)(nil)
    62  
    63  // New creates a new provider engine, operating on the provided network channel, and accepting requests for entities
    64  // from a node within the set obtained by applying the provided selector filter. It uses the injected retrieve function
    65  // to manage the fullfilment of these requests.
    66  func New(
    67  	log zerolog.Logger,
    68  	metrics module.EngineMetrics,
    69  	net network.Network,
    70  	me module.Local,
    71  	state protocol.State,
    72  	requestQueue engine.MessageStore,
    73  	requestWorkers uint,
    74  	channel channels.Channel,
    75  	selector flow.IdentityFilter,
    76  	retrieve RetrieveFunc) (*Engine, error) {
    77  
    78  	// make sure we don't respond to request sent by self or unauthorized nodes
    79  	selector = filter.And(
    80  		selector,
    81  		filter.HasWeight(true),
    82  		filter.Not(filter.HasNodeID(me.NodeID())),
    83  	)
    84  
    85  	handler := engine.NewMessageHandler(
    86  		log,
    87  		engine.NewNotifier(),
    88  		engine.Pattern{
    89  			// Match is called on every new message coming to this engine.
    90  			// Provider engine only expects EntityRequest.
    91  			// Other message types are discarded by Match.
    92  			Match: func(message *engine.Message) bool {
    93  				_, ok := message.Payload.(*messages.EntityRequest)
    94  				return ok
    95  			},
    96  			// Map is called on messages that are Match(ed) successfully, i.e.,
    97  			// EntityRequest.
    98  			Map: func(message *engine.Message) (*engine.Message, bool) {
    99  				request, ok := message.Payload.(*messages.EntityRequest)
   100  				if !ok {
   101  					// should never happen, unless there is a bug.
   102  					log.Warn().
   103  						Str("entity_ids", fmt.Sprintf("%v", request.EntityIDs)).
   104  						Hex("origin_id", logging.ID(message.OriginID)).
   105  						Msg("cannot match the payload to entity request")
   106  					return nil, false
   107  				}
   108  
   109  				message.Payload = *request // de-reference the pointer as HeroCache works with value.
   110  
   111  				return message, true
   112  			},
   113  			Store: requestQueue,
   114  		})
   115  
   116  	// initialize the propagation engine with its dependencies
   117  	e := &Engine{
   118  		log:            log.With().Str("engine", "provider").Logger(),
   119  		metrics:        metrics,
   120  		state:          state,
   121  		channel:        channel,
   122  		selector:       selector,
   123  		retrieve:       retrieve,
   124  		requestHandler: handler,
   125  		requestQueue:   requestQueue,
   126  		requestChannel: make(chan *internal.EntityRequest, requestWorkers),
   127  	}
   128  
   129  	// register the engine with the network layer and store the conduit
   130  	con, err := net.Register(channel, e)
   131  	if err != nil {
   132  		return nil, fmt.Errorf("could not register engine: %w", err)
   133  	}
   134  	e.con = con
   135  
   136  	cm := component.NewComponentManagerBuilder()
   137  	cm.AddWorker(e.processQueuedRequestsShovellerWorker)
   138  	for i := uint(0); i < requestWorkers; i++ {
   139  		cm.AddWorker(e.processEntityRequestWorker)
   140  	}
   141  
   142  	e.cm = cm.Build()
   143  	e.Component = e.cm
   144  
   145  	return e, nil
   146  }
   147  
   148  // Process processes the given message from the node with the given origin ID in
   149  // a blocking manner. It returns the potential processing error when done.
   150  func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error {
   151  	select {
   152  	case <-e.cm.ShutdownSignal():
   153  		e.log.Warn().
   154  			Hex("origin_id", logging.ID(originID)).
   155  			Msgf("received message after shutdown")
   156  		return nil
   157  	default:
   158  	}
   159  
   160  	e.metrics.MessageReceived(e.channel.String(), metrics.MessageEntityRequest)
   161  
   162  	err := e.requestHandler.Process(originID, event)
   163  	if err != nil {
   164  		if engine.IsIncompatibleInputTypeError(err) {
   165  			e.log.Warn().
   166  				Hex("origin_id", logging.ID(originID)).
   167  				Str("channel", channel.String()).
   168  				Str("event", fmt.Sprintf("%+v", event)).
   169  				Bool(logging.KeySuspicious, true).
   170  				Msg("received unsupported message type")
   171  			return nil
   172  		}
   173  		return fmt.Errorf("unexpected error while processing engine event: %w", err)
   174  	}
   175  
   176  	return nil
   177  }
   178  
   179  // onEntityRequest processes an entity request message from a remote node.
   180  // Error returns:
   181  // * NetworkTransmissionError if there is a network error happens on transmitting the requested entities.
   182  // * InvalidInputError if the list of requested entities is invalid (empty).
   183  // * generic error in case of unexpected failure or implementation bug.
   184  func (e *Engine) onEntityRequest(request *internal.EntityRequest) error {
   185  	defer e.metrics.MessageHandled(e.channel.String(), metrics.MessageEntityRequest)
   186  
   187  	lg := e.log.With().
   188  		Str("origin_id", request.OriginId.String()).
   189  		Strs("entity_ids", flow.IdentifierList(request.EntityIds).Strings()).
   190  		Logger()
   191  
   192  	lg.Info().
   193  		Uint64("nonce", request.Nonce).
   194  		Msg("entity request received")
   195  
   196  	// TODO: add reputation system to punish nodes for malicious behaviour (spam / repeated requests)
   197  
   198  	// then, we try to get the current identity of the requester and check it against the filter
   199  	// for the handler to make sure the requester is authorized for this resource
   200  	requesters, err := e.state.Final().Identities(filter.And(
   201  		e.selector,
   202  		filter.HasNodeID(request.OriginId)),
   203  	)
   204  	if err != nil {
   205  		return fmt.Errorf("could not get requesters: %w", err)
   206  	}
   207  	if len(requesters) == 0 {
   208  		return engine.NewInvalidInputErrorf("invalid requester origin (%x)", request.OriginId)
   209  	}
   210  
   211  	// try to retrieve each entity and skip missing ones
   212  	entities := make([]flow.Entity, 0, len(request.EntityIds))
   213  	entityIDs := make([]flow.Identifier, 0, len(request.EntityIds))
   214  	seen := make(map[flow.Identifier]struct{})
   215  	for _, entityID := range request.EntityIds {
   216  		// skip requesting duplicate entity IDs
   217  		if _, ok := seen[entityID]; ok {
   218  			lg.Warn().
   219  				Str("entity_id", entityID.String()).
   220  				Bool(logging.KeySuspicious, true).
   221  				Msg("duplicate entity ID in entity request")
   222  			continue
   223  		}
   224  
   225  		entity, err := e.retrieve(entityID)
   226  		if errors.Is(err, storage.ErrNotFound) {
   227  			lg.Debug().
   228  				Str("entity_id", entityID.String()).
   229  				Msg("entity not found")
   230  			continue
   231  		}
   232  		if err != nil {
   233  			return fmt.Errorf("could not retrieve entity (%x): %w", entityID, err)
   234  		}
   235  		entities = append(entities, entity)
   236  		entityIDs = append(entityIDs, entityID)
   237  		seen[entityID] = struct{}{}
   238  	}
   239  
   240  	// encode all of the entities
   241  	blobs := make([][]byte, 0, len(entities))
   242  	for _, entity := range entities {
   243  		blob, err := msgpack.Marshal(entity)
   244  		if err != nil {
   245  			return fmt.Errorf("could not encode entity (%x): %w", entity.ID(), err)
   246  		}
   247  		blobs = append(blobs, blob)
   248  	}
   249  
   250  	// NOTE: we do _NOT_ avoid sending empty responses, as this will allow
   251  	// the requester to know we don't have any of the requested entities, which
   252  	// allows him to retry them immediately, rather than waiting for the expiry
   253  	// of the retry interval
   254  
   255  	// send back the response
   256  	res := &messages.EntityResponse{
   257  		Nonce:     request.Nonce,
   258  		EntityIDs: entityIDs,
   259  		Blobs:     blobs,
   260  	}
   261  	err = e.con.Unicast(res, request.OriginId)
   262  	if err != nil {
   263  		return engine.NewNetworkTransmissionErrorf("could not send entity response: %w", err)
   264  	}
   265  
   266  	e.metrics.MessageSent(e.channel.String(), metrics.MessageEntityResponse)
   267  	e.log.Info().
   268  		Str("origin_id", request.OriginId.String()).
   269  		Strs("entity_ids", flow.IdentifierList(entityIDs).Strings()).
   270  		Uint64("nonce", request.Nonce). // to match with the the entity request received log
   271  		Msg("entity response sent")
   272  
   273  	return nil
   274  }
   275  
   276  func (e *Engine) processQueuedRequestsShovellerWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   277  	ready()
   278  
   279  	e.log.Debug().Msg("process entity request shoveller worker started")
   280  
   281  	for {
   282  		select {
   283  		case <-e.requestHandler.GetNotifier():
   284  			// there is at least a single request in the queue, so we try to process it.
   285  			e.processAvailableMessages(ctx)
   286  		case <-ctx.Done():
   287  			// close the internal channel, the workers will drain the channel before exiting
   288  			close(e.requestChannel)
   289  			e.log.Trace().Msg("processing entity request worker terminated")
   290  			return
   291  		}
   292  	}
   293  }
   294  
   295  func (e *Engine) processAvailableMessages(ctx irrecoverable.SignalerContext) {
   296  	for {
   297  		select {
   298  		case <-ctx.Done():
   299  			return
   300  		default:
   301  		}
   302  
   303  		msg, ok := e.requestQueue.Get()
   304  		if !ok {
   305  			// no more requests, return
   306  			return
   307  		}
   308  
   309  		requestEvent, ok := msg.Payload.(messages.EntityRequest)
   310  		if !ok {
   311  			// should never happen, as we only put EntityRequest in the queue,
   312  			// if it does happen, it means there is a bug in the queue implementation.
   313  			ctx.Throw(fmt.Errorf("invalid message type in entity request queue: %T", msg.Payload))
   314  		}
   315  
   316  		req := &internal.EntityRequest{
   317  			OriginId:  msg.OriginID,
   318  			EntityIds: requestEvent.EntityIDs,
   319  			Nonce:     requestEvent.Nonce,
   320  		}
   321  
   322  		lg := e.log.With().
   323  			Hex("origin_id", logging.ID(req.OriginId)).
   324  			Str("requested_entity_ids", fmt.Sprintf("%v", req.EntityIds)).Logger()
   325  
   326  		lg.Trace().Msg("processor is queuing entity request for processing")
   327  		e.requestChannel <- req
   328  		lg.Trace().Msg("processor queued up entity request for processing")
   329  	}
   330  }
   331  
   332  func (e *Engine) processEntityRequestWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   333  	ready()
   334  
   335  	for {
   336  		request, ok := <-e.requestChannel
   337  		if !ok {
   338  			e.log.Trace().Msg("processing entity request worker terminated")
   339  			return
   340  		}
   341  		lg := e.log.With().
   342  			Hex("origin_id", logging.ID(request.OriginId)).
   343  			Str("requested_entity_ids", fmt.Sprintf("%v", request.EntityIds)).Logger()
   344  		lg.Trace().Msg("worker picked up entity request for processing")
   345  		err := e.onEntityRequest(request)
   346  		if err != nil {
   347  			if engine.IsInvalidInputError(err) || engine.IsNetworkTransmissionError(err) {
   348  				lg.Error().Err(err).Msg("worker could not process entity request")
   349  			} else {
   350  				// this is an unexpected error, we crash the node.
   351  				ctx.Throw(err)
   352  			}
   353  		}
   354  		lg.Trace().Msg("worker finished entity request processing")
   355  	}
   356  }