github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/collection/ingest/engine.go (about)

     1  // Package ingest implements an engine for receiving transactions that need
     2  // to be packaged into a collection.
     3  package ingest
     4  
     5  import (
     6  	"context"
     7  	"errors"
     8  	"fmt"
     9  
    10  	"github.com/rs/zerolog"
    11  
    12  	"github.com/onflow/flow-go/access"
    13  	"github.com/onflow/flow-go/engine"
    14  	"github.com/onflow/flow-go/engine/common/fifoqueue"
    15  	"github.com/onflow/flow-go/model/flow"
    16  	"github.com/onflow/flow-go/module"
    17  	"github.com/onflow/flow-go/module/component"
    18  	"github.com/onflow/flow-go/module/irrecoverable"
    19  	"github.com/onflow/flow-go/module/mempool/epochs"
    20  	"github.com/onflow/flow-go/module/metrics"
    21  	"github.com/onflow/flow-go/network"
    22  	"github.com/onflow/flow-go/network/channels"
    23  	"github.com/onflow/flow-go/state/protocol"
    24  	"github.com/onflow/flow-go/utils/logging"
    25  )
    26  
    27  // Engine is the transaction ingestion engine, which ensures that new
    28  // transactions are delegated to the correct collection cluster, and prepared
    29  // to be included in a collection.
    30  type Engine struct {
    31  	*component.ComponentManager
    32  	log                  zerolog.Logger
    33  	engMetrics           module.EngineMetrics
    34  	colMetrics           module.CollectionMetrics
    35  	conduit              network.Conduit
    36  	me                   module.Local
    37  	state                protocol.State
    38  	pendingTransactions  engine.MessageStore
    39  	messageHandler       *engine.MessageHandler
    40  	pools                *epochs.TransactionPools
    41  	transactionValidator *access.TransactionValidator
    42  
    43  	config Config
    44  }
    45  
    46  // New creates a new collection ingest engine.
    47  func New(
    48  	log zerolog.Logger,
    49  	net network.EngineRegistry,
    50  	state protocol.State,
    51  	engMetrics module.EngineMetrics,
    52  	mempoolMetrics module.MempoolMetrics,
    53  	colMetrics module.CollectionMetrics,
    54  	me module.Local,
    55  	chain flow.Chain,
    56  	pools *epochs.TransactionPools,
    57  	config Config,
    58  	limiter *AddressRateLimiter,
    59  ) (*Engine, error) {
    60  
    61  	logger := log.With().Str("engine", "ingest").Logger()
    62  
    63  	transactionValidator := access.NewTransactionValidatorWithLimiter(
    64  		access.NewProtocolStateBlocks(state),
    65  		chain,
    66  		access.TransactionValidationOptions{
    67  			Expiry:                 flow.DefaultTransactionExpiry,
    68  			ExpiryBuffer:           config.ExpiryBuffer,
    69  			MaxGasLimit:            config.MaxGasLimit,
    70  			CheckScriptsParse:      config.CheckScriptsParse,
    71  			MaxTransactionByteSize: config.MaxTransactionByteSize,
    72  			MaxCollectionByteSize:  config.MaxCollectionByteSize,
    73  		},
    74  		limiter,
    75  	)
    76  
    77  	// FIFO queue for transactions
    78  	queue, err := fifoqueue.NewFifoQueue(
    79  		int(config.MaxMessageQueueSize),
    80  		fifoqueue.WithLengthObserver(func(len int) {
    81  			mempoolMetrics.MempoolEntries(metrics.ResourceTransactionIngestQueue, uint(len))
    82  		}),
    83  	)
    84  	if err != nil {
    85  		return nil, fmt.Errorf("could not create transaction message queue: %w", err)
    86  	}
    87  	pendingTransactions := &engine.FifoMessageStore{FifoQueue: queue}
    88  
    89  	// define how inbound messages are mapped to message queues
    90  	handler := engine.NewMessageHandler(
    91  		logger,
    92  		engine.NewNotifier(),
    93  		engine.Pattern{
    94  			Match: func(msg *engine.Message) bool {
    95  				_, ok := msg.Payload.(*flow.TransactionBody)
    96  				if ok {
    97  					engMetrics.MessageReceived(metrics.EngineCollectionIngest, metrics.MessageTransaction)
    98  				}
    99  				return ok
   100  			},
   101  			Store: pendingTransactions,
   102  		},
   103  	)
   104  
   105  	e := &Engine{
   106  		log:                  logger,
   107  		engMetrics:           engMetrics,
   108  		colMetrics:           colMetrics,
   109  		me:                   me,
   110  		state:                state,
   111  		pendingTransactions:  pendingTransactions,
   112  		messageHandler:       handler,
   113  		pools:                pools,
   114  		config:               config,
   115  		transactionValidator: transactionValidator,
   116  	}
   117  
   118  	e.ComponentManager = component.NewComponentManagerBuilder().
   119  		AddWorker(e.processQueuedTransactions).
   120  		Build()
   121  
   122  	conduit, err := net.Register(channels.PushTransactions, e)
   123  	if err != nil {
   124  		return nil, fmt.Errorf("could not register engine: %w", err)
   125  	}
   126  	e.conduit = conduit
   127  
   128  	return e, nil
   129  }
   130  
   131  // Process processes a transaction message from the network and enqueues the
   132  // message. Validation and ingestion is performed in the processQueuedTransactions
   133  // worker.
   134  func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error {
   135  	select {
   136  	case <-e.ComponentManager.ShutdownSignal():
   137  		e.log.Warn().Msgf("received message from %x after shut down", originID)
   138  		return nil
   139  	default:
   140  	}
   141  
   142  	err := e.messageHandler.Process(originID, event)
   143  	if err != nil {
   144  		if engine.IsIncompatibleInputTypeError(err) {
   145  			e.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, event, channel)
   146  			return nil
   147  		}
   148  		return fmt.Errorf("unexpected error while processing engine message: %w", err)
   149  	}
   150  	return nil
   151  }
   152  
   153  // ProcessTransaction processes a transaction message submitted from another
   154  // local component. The transaction is validated and ingested synchronously.
   155  // This is used by the GRPC API, for transactions from Access nodes.
   156  func (e *Engine) ProcessTransaction(tx *flow.TransactionBody) error {
   157  	// do not process transactions after the engine has shut down
   158  	select {
   159  	case <-e.ComponentManager.ShutdownSignal():
   160  		return component.ErrComponentShutdown
   161  	default:
   162  	}
   163  
   164  	return e.onTransaction(e.me.NodeID(), tx)
   165  }
   166  
   167  // processQueuedTransactions is the main message processing loop for transaction messages.
   168  func (e *Engine) processQueuedTransactions(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   169  	ready()
   170  
   171  	for {
   172  		select {
   173  		case <-ctx.Done():
   174  			return
   175  		case <-e.messageHandler.GetNotifier():
   176  			err := e.processAvailableMessages(ctx)
   177  			if err != nil {
   178  				// if an error reaches this point, it is unexpected
   179  				ctx.Throw(err)
   180  				return
   181  			}
   182  		}
   183  	}
   184  }
   185  
   186  // processAvailableMessages is called when the message queue is non-empty. It
   187  // will process transactions while the queue is non-empty, then return.
   188  //
   189  // All expected error conditions are handled within this function. Unexpected
   190  // errors which should cause the component to stop are passed up.
   191  func (e *Engine) processAvailableMessages(ctx context.Context) error {
   192  	for {
   193  		select {
   194  		case <-ctx.Done():
   195  			return nil
   196  		default:
   197  		}
   198  
   199  		msg, ok := e.pendingTransactions.Get()
   200  		if ok {
   201  			err := e.onTransaction(msg.OriginID, msg.Payload.(*flow.TransactionBody))
   202  			// log warnings for expected error conditions
   203  			if engine.IsUnverifiableInputError(err) {
   204  				e.log.Warn().Err(err).Msg("unable to process unverifiable transaction")
   205  			} else if engine.IsInvalidInputError(err) {
   206  				e.log.Warn().Err(err).Msg("discarding invalid transaction")
   207  			} else if err != nil {
   208  				// bubble up unexpected error
   209  				return fmt.Errorf("unexpected error handling transaction: %w", err)
   210  			}
   211  			continue
   212  		}
   213  
   214  		// when there is no more messages in the queue, back to the loop to wait
   215  		// for the next incoming message to arrive.
   216  		return nil
   217  	}
   218  }
   219  
   220  // onTransaction handles receipt of a new transaction. This can be submitted
   221  // from outside the system or routed from another collection node.
   222  //
   223  // Returns:
   224  //   - engine.UnverifiableInputError if the reference block is unknown or if the
   225  //     node is not a member of any cluster in the reference epoch.
   226  //   - engine.InvalidInputError if the transaction is invalid.
   227  //   - other error for any other unexpected error condition.
   228  func (e *Engine) onTransaction(originID flow.Identifier, tx *flow.TransactionBody) error {
   229  
   230  	defer e.engMetrics.MessageHandled(metrics.EngineCollectionIngest, metrics.MessageTransaction)
   231  
   232  	txID := tx.ID()
   233  	log := e.log.With().
   234  		Hex("origin_id", originID[:]).
   235  		Hex("tx_id", txID[:]).
   236  		Hex("ref_block_id", tx.ReferenceBlockID[:]).
   237  		Logger()
   238  
   239  	log.Info().Msg("transaction message received")
   240  
   241  	// get the state snapshot w.r.t. the reference block
   242  	refSnapshot := e.state.AtBlockID(tx.ReferenceBlockID)
   243  	// fail fast if this is an unknown reference
   244  	_, err := refSnapshot.Head()
   245  	if err != nil {
   246  		return engine.NewUnverifiableInputError("could not get reference block for transaction (%x): %w", txID, err)
   247  	}
   248  
   249  	// using the transaction's reference block, determine which cluster we're in.
   250  	// if we don't know the reference block, we will fail when attempting to query the epoch.
   251  	refEpoch := refSnapshot.Epochs().Current()
   252  
   253  	localCluster, err := e.getLocalCluster(refEpoch)
   254  	if err != nil {
   255  		return fmt.Errorf("could not get local cluster: %w", err)
   256  	}
   257  	clusters, err := refEpoch.Clustering()
   258  	if err != nil {
   259  		return fmt.Errorf("could not get clusters for reference epoch: %w", err)
   260  	}
   261  	txCluster, ok := clusters.ByTxID(txID)
   262  	if !ok {
   263  		return fmt.Errorf("could not get cluster responsible for tx: %x", txID)
   264  	}
   265  
   266  	localClusterFingerPrint := localCluster.ID()
   267  	txClusterFingerPrint := txCluster.ID()
   268  	log = log.With().
   269  		Hex("local_cluster", logging.ID(localClusterFingerPrint)).
   270  		Hex("tx_cluster", logging.ID(txClusterFingerPrint)).
   271  		Logger()
   272  
   273  	// validate and ingest the transaction, so it is eligible for inclusion in
   274  	// a future collection proposed by this node
   275  	err = e.ingestTransaction(log, refEpoch, tx, txID, localClusterFingerPrint, txClusterFingerPrint)
   276  	if err != nil {
   277  		return fmt.Errorf("could not ingest transaction: %w", err)
   278  	}
   279  
   280  	// if the message was submitted internally (ie. via the Access API)
   281  	// propagate it to members of the responsible cluster (either our cluster
   282  	// or a different cluster)
   283  	if originID == e.me.NodeID() {
   284  		e.propagateTransaction(log, tx, txCluster)
   285  	}
   286  
   287  	log.Info().Msg("transaction processed")
   288  	return nil
   289  }
   290  
   291  // getLocalCluster returns the cluster this node is a part of for the given reference epoch.
   292  // In cases where the node is not a part of any cluster, this function will differentiate
   293  // between expected and unexpected cases.
   294  //
   295  // Returns:
   296  //   - engine.UnverifiableInputError when this node is not in any cluster because it is not
   297  //     a member of the reference epoch. This is an expected condition and the transaction
   298  //     should be discarded.
   299  //   - other error for any other, unexpected error condition.
   300  func (e *Engine) getLocalCluster(refEpoch protocol.Epoch) (flow.IdentitySkeletonList, error) {
   301  	epochCounter, err := refEpoch.Counter()
   302  	if err != nil {
   303  		return nil, fmt.Errorf("could not get counter for reference epoch: %w", err)
   304  	}
   305  	clusters, err := refEpoch.Clustering()
   306  	if err != nil {
   307  		return nil, fmt.Errorf("could not get clusters for reference epoch: %w", err)
   308  	}
   309  
   310  	localCluster, _, ok := clusters.ByNodeID(e.me.NodeID())
   311  	if !ok {
   312  		// if we aren't assigned to a cluster, check that we are a member of
   313  		// the reference epoch
   314  		refIdentities, err := refEpoch.InitialIdentities()
   315  		if err != nil {
   316  			return nil, fmt.Errorf("could not get initial identities for reference epoch: %w", err)
   317  		}
   318  
   319  		if _, ok := refIdentities.ByNodeID(e.me.NodeID()); ok {
   320  			// CAUTION: we are a member of the epoch, but have no assigned cluster!
   321  			// This is an unexpected condition and indicates a protocol state invariant has been broken
   322  			return nil, fmt.Errorf("this node should have an assigned cluster in epoch (counter=%d), but has none", epochCounter)
   323  		}
   324  		return nil, engine.NewUnverifiableInputError("this node is not assigned a cluster in epoch (counter=%d)", epochCounter)
   325  	}
   326  
   327  	return localCluster, nil
   328  }
   329  
   330  // ingestTransaction validates and ingests the transaction, if it is routed to
   331  // our local cluster, is valid, and has not been seen previously.
   332  //
   333  // Returns:
   334  // * engine.InvalidInputError if the transaction is invalid.
   335  // * other error for any other unexpected error condition.
   336  func (e *Engine) ingestTransaction(
   337  	log zerolog.Logger,
   338  	refEpoch protocol.Epoch,
   339  	tx *flow.TransactionBody,
   340  	txID flow.Identifier,
   341  	localClusterFingerprint flow.Identifier,
   342  	txClusterFingerprint flow.Identifier,
   343  ) error {
   344  	epochCounter, err := refEpoch.Counter()
   345  	if err != nil {
   346  		return fmt.Errorf("could not get counter for reference epoch: %w", err)
   347  	}
   348  
   349  	// use the transaction pool for the epoch the reference block is part of
   350  	pool := e.pools.ForEpoch(epochCounter)
   351  
   352  	// short-circuit if we have already stored the transaction
   353  	if pool.Has(txID) {
   354  		log.Debug().Msg("received dupe transaction")
   355  		return nil
   356  	}
   357  
   358  	// check if the transaction is valid
   359  	err = e.transactionValidator.Validate(tx)
   360  	if err != nil {
   361  		return engine.NewInvalidInputErrorf("invalid transaction (%x): %w", txID, err)
   362  	}
   363  
   364  	// if our cluster is responsible for the transaction, add it to our local mempool
   365  	if localClusterFingerprint == txClusterFingerprint {
   366  		_ = pool.Add(tx)
   367  		e.colMetrics.TransactionIngested(txID)
   368  	}
   369  
   370  	return nil
   371  }
   372  
   373  // propagateTransaction propagates the transaction to a number of the responsible
   374  // cluster's members. Any unexpected networking errors are logged.
   375  func (e *Engine) propagateTransaction(log zerolog.Logger, tx *flow.TransactionBody, txCluster flow.IdentitySkeletonList) {
   376  	log.Debug().Msg("propagating transaction to cluster")
   377  
   378  	err := e.conduit.Multicast(tx, e.config.PropagationRedundancy+1, txCluster.NodeIDs()...)
   379  	if err != nil && !errors.Is(err, network.EmptyTargetList) {
   380  		// if multicast to a target cluster with at least one node failed, log an error and exit
   381  		e.log.Error().Err(err).Msg("could not route transaction to cluster")
   382  	}
   383  	if err == nil {
   384  		e.engMetrics.MessageSent(metrics.EngineCollectionIngest, metrics.MessageTransaction)
   385  	}
   386  }