github.com/onflow/flow-go@v0.33.17/engine/access/rpc/backend/backend.go (about)

     1  package backend
     2  
     3  import (
     4  	"context"
     5  	"crypto/md5" //nolint:gosec
     6  	"fmt"
     7  	"time"
     8  
     9  	lru "github.com/hashicorp/golang-lru/v2"
    10  	accessproto "github.com/onflow/flow/protobuf/go/flow/access"
    11  	"github.com/rs/zerolog"
    12  
    13  	"github.com/onflow/flow-go/access"
    14  	"github.com/onflow/flow-go/cmd/build"
    15  	"github.com/onflow/flow-go/engine/access/rpc/connection"
    16  	"github.com/onflow/flow-go/engine/common/rpc"
    17  	"github.com/onflow/flow-go/fvm/blueprints"
    18  	"github.com/onflow/flow-go/model/flow"
    19  	"github.com/onflow/flow-go/model/flow/filter"
    20  	"github.com/onflow/flow-go/module"
    21  	"github.com/onflow/flow-go/module/execution"
    22  	"github.com/onflow/flow-go/state/protocol"
    23  	"github.com/onflow/flow-go/storage"
    24  )
    25  
    26  // minExecutionNodesCnt is the minimum number of execution nodes expected to have sent the execution receipt for a block
    27  const minExecutionNodesCnt = 2
    28  
    29  // maxAttemptsForExecutionReceipt is the maximum number of attempts to find execution receipts for a given block ID
    30  const maxAttemptsForExecutionReceipt = 3
    31  
    32  // DefaultMaxHeightRange is the default maximum size of range requests.
    33  const DefaultMaxHeightRange = 250
    34  
    35  // DefaultSnapshotHistoryLimit the amount of blocks to look back in state
    36  // when recursively searching for a valid snapshot
    37  const DefaultSnapshotHistoryLimit = 500
    38  
    39  // DefaultLoggedScriptsCacheSize is the default size of the lookup cache used to dedupe logs of scripts sent to ENs
    40  // limiting cache size to 16MB and does not affect script execution, only for keeping logs tidy
    41  const DefaultLoggedScriptsCacheSize = 1_000_000
    42  
    43  // DefaultConnectionPoolSize is the default size for the connection pool to collection and execution nodes
    44  const DefaultConnectionPoolSize = 250
    45  
    46  var (
    47  	preferredENIdentifiers flow.IdentifierList
    48  	fixedENIdentifiers     flow.IdentifierList
    49  )
    50  
    51  // Backend implements the Access API.
    52  //
    53  // It is composed of several sub-backends that implement part of the Access API.
    54  //
    55  // Script related calls are handled by backendScripts.
    56  // Transaction related calls are handled by backendTransactions.
    57  // Block Header related calls are handled by backendBlockHeaders.
    58  // Block details related calls are handled by backendBlockDetails.
    59  // Event related calls are handled by backendEvents.
    60  // Account related calls are handled by backendAccounts.
    61  //
    62  // All remaining calls are handled by the base Backend in this file.
    63  type Backend struct {
    64  	backendScripts
    65  	backendTransactions
    66  	backendEvents
    67  	backendBlockHeaders
    68  	backendBlockDetails
    69  	backendAccounts
    70  	backendExecutionResults
    71  	backendNetwork
    72  
    73  	state             protocol.State
    74  	chainID           flow.ChainID
    75  	collections       storage.Collections
    76  	executionReceipts storage.ExecutionReceipts
    77  	connFactory       connection.ConnectionFactory
    78  
    79  	// cache the response to GetNodeVersionInfo since it doesn't change
    80  	nodeInfo *access.NodeVersionInfo
    81  }
    82  
    83  type Params struct {
    84  	State                     protocol.State
    85  	CollectionRPC             accessproto.AccessAPIClient
    86  	HistoricalAccessNodes     []accessproto.AccessAPIClient
    87  	Blocks                    storage.Blocks
    88  	Headers                   storage.Headers
    89  	Collections               storage.Collections
    90  	Transactions              storage.Transactions
    91  	ExecutionReceipts         storage.ExecutionReceipts
    92  	ExecutionResults          storage.ExecutionResults
    93  	ChainID                   flow.ChainID
    94  	AccessMetrics             module.AccessMetrics
    95  	ConnFactory               connection.ConnectionFactory
    96  	RetryEnabled              bool
    97  	MaxHeightRange            uint
    98  	PreferredExecutionNodeIDs []string
    99  	FixedExecutionNodeIDs     []string
   100  	Log                       zerolog.Logger
   101  	SnapshotHistoryLimit      int
   102  	Communicator              Communicator
   103  	TxResultCacheSize         uint
   104  	TxErrorMessagesCacheSize  uint
   105  	ScriptExecutor            execution.ScriptExecutor
   106  	ScriptExecutionMode       IndexQueryMode
   107  	EventQueryMode            IndexQueryMode
   108  	EventsIndex               *EventsIndex
   109  	TxResultQueryMode         IndexQueryMode
   110  	TxResultsIndex            *TransactionResultsIndex
   111  }
   112  
   113  var _ TransactionErrorMessage = (*Backend)(nil)
   114  
   115  // New creates backend instance
   116  func New(params Params) (*Backend, error) {
   117  	retry := newRetry(params.Log)
   118  	if params.RetryEnabled {
   119  		retry.Activate()
   120  	}
   121  
   122  	loggedScripts, err := lru.New[[md5.Size]byte, time.Time](DefaultLoggedScriptsCacheSize)
   123  	if err != nil {
   124  		return nil, fmt.Errorf("failed to initialize script logging cache: %w", err)
   125  	}
   126  
   127  	var txResCache *lru.Cache[flow.Identifier, *access.TransactionResult]
   128  	if params.TxResultCacheSize > 0 {
   129  		txResCache, err = lru.New[flow.Identifier, *access.TransactionResult](int(params.TxResultCacheSize))
   130  		if err != nil {
   131  			return nil, fmt.Errorf("failed to init cache for transaction results: %w", err)
   132  		}
   133  	}
   134  
   135  	// NOTE: The transaction error message cache is currently only used by the access node and not by the observer node.
   136  	//       To avoid introducing unnecessary command line arguments in the observer, one case could be that the error
   137  	//       message cache is nil for the observer node.
   138  	var txErrorMessagesCache *lru.Cache[flow.Identifier, string]
   139  
   140  	if params.TxErrorMessagesCacheSize > 0 {
   141  		txErrorMessagesCache, err = lru.New[flow.Identifier, string](int(params.TxErrorMessagesCacheSize))
   142  		if err != nil {
   143  			return nil, fmt.Errorf("failed to init cache for transaction error messages: %w", err)
   144  		}
   145  	}
   146  
   147  	// the system tx is hardcoded and never changes during runtime
   148  	systemTx, err := blueprints.SystemChunkTransaction(params.ChainID.Chain())
   149  	if err != nil {
   150  		return nil, fmt.Errorf("failed to create system chunk transaction: %w", err)
   151  	}
   152  	systemTxID := systemTx.ID()
   153  
   154  	// initialize node version info
   155  	nodeInfo, err := getNodeVersionInfo(params.State.Params())
   156  	if err != nil {
   157  		return nil, fmt.Errorf("failed to initialize node version info: %w", err)
   158  	}
   159  
   160  	b := &Backend{
   161  		state: params.State,
   162  		// create the sub-backends
   163  		backendScripts: backendScripts{
   164  			log:               params.Log,
   165  			headers:           params.Headers,
   166  			executionReceipts: params.ExecutionReceipts,
   167  			connFactory:       params.ConnFactory,
   168  			state:             params.State,
   169  			metrics:           params.AccessMetrics,
   170  			loggedScripts:     loggedScripts,
   171  			nodeCommunicator:  params.Communicator,
   172  			scriptExecutor:    params.ScriptExecutor,
   173  			scriptExecMode:    params.ScriptExecutionMode,
   174  		},
   175  		backendTransactions: backendTransactions{
   176  			TransactionsLocalDataProvider: TransactionsLocalDataProvider{
   177  				state:          params.State,
   178  				collections:    params.Collections,
   179  				blocks:         params.Blocks,
   180  				eventsIndex:    params.EventsIndex,
   181  				txResultsIndex: params.TxResultsIndex,
   182  				systemTxID:     systemTxID,
   183  			},
   184  			log:                  params.Log,
   185  			staticCollectionRPC:  params.CollectionRPC,
   186  			chainID:              params.ChainID,
   187  			transactions:         params.Transactions,
   188  			executionReceipts:    params.ExecutionReceipts,
   189  			transactionValidator: configureTransactionValidator(params.State, params.ChainID),
   190  			transactionMetrics:   params.AccessMetrics,
   191  			retry:                retry,
   192  			connFactory:          params.ConnFactory,
   193  			previousAccessNodes:  params.HistoricalAccessNodes,
   194  			nodeCommunicator:     params.Communicator,
   195  			txResultCache:        txResCache,
   196  			txErrorMessagesCache: txErrorMessagesCache,
   197  			txResultQueryMode:    params.TxResultQueryMode,
   198  			systemTx:             systemTx,
   199  			systemTxID:           systemTxID,
   200  		},
   201  		backendEvents: backendEvents{
   202  			log:               params.Log,
   203  			chain:             params.ChainID.Chain(),
   204  			state:             params.State,
   205  			headers:           params.Headers,
   206  			executionReceipts: params.ExecutionReceipts,
   207  			connFactory:       params.ConnFactory,
   208  			maxHeightRange:    params.MaxHeightRange,
   209  			nodeCommunicator:  params.Communicator,
   210  			queryMode:         params.EventQueryMode,
   211  			eventsIndex:       params.EventsIndex,
   212  		},
   213  		backendBlockHeaders: backendBlockHeaders{
   214  			headers: params.Headers,
   215  			state:   params.State,
   216  		},
   217  		backendBlockDetails: backendBlockDetails{
   218  			blocks: params.Blocks,
   219  			state:  params.State,
   220  		},
   221  		backendAccounts: backendAccounts{
   222  			log:               params.Log,
   223  			state:             params.State,
   224  			headers:           params.Headers,
   225  			executionReceipts: params.ExecutionReceipts,
   226  			connFactory:       params.ConnFactory,
   227  			nodeCommunicator:  params.Communicator,
   228  			scriptExecutor:    params.ScriptExecutor,
   229  			scriptExecMode:    params.ScriptExecutionMode,
   230  		},
   231  		backendExecutionResults: backendExecutionResults{
   232  			executionResults: params.ExecutionResults,
   233  		},
   234  		backendNetwork: backendNetwork{
   235  			state:                params.State,
   236  			chainID:              params.ChainID,
   237  			headers:              params.Headers,
   238  			snapshotHistoryLimit: params.SnapshotHistoryLimit,
   239  		},
   240  		collections:       params.Collections,
   241  		executionReceipts: params.ExecutionReceipts,
   242  		connFactory:       params.ConnFactory,
   243  		chainID:           params.ChainID,
   244  		nodeInfo:          nodeInfo,
   245  	}
   246  
   247  	b.backendTransactions.txErrorMessages = b
   248  
   249  	retry.SetBackend(b)
   250  
   251  	preferredENIdentifiers, err = identifierList(params.PreferredExecutionNodeIDs)
   252  	if err != nil {
   253  		return nil, fmt.Errorf("failed to convert node id string to Flow Identifier for preferred EN map: %w", err)
   254  	}
   255  
   256  	fixedENIdentifiers, err = identifierList(params.FixedExecutionNodeIDs)
   257  	if err != nil {
   258  		return nil, fmt.Errorf("failed to convert node id string to Flow Identifier for fixed EN map: %w", err)
   259  	}
   260  
   261  	return b, nil
   262  }
   263  
   264  func identifierList(ids []string) (flow.IdentifierList, error) {
   265  	idList := make(flow.IdentifierList, len(ids))
   266  	for i, idStr := range ids {
   267  		id, err := flow.HexStringToIdentifier(idStr)
   268  		if err != nil {
   269  			return nil, fmt.Errorf("failed to convert node id string %s to Flow Identifier: %w", id, err)
   270  		}
   271  		idList[i] = id
   272  	}
   273  	return idList, nil
   274  }
   275  
   276  func configureTransactionValidator(state protocol.State, chainID flow.ChainID) *access.TransactionValidator {
   277  	return access.NewTransactionValidator(
   278  		access.NewProtocolStateBlocks(state),
   279  		chainID.Chain(),
   280  		access.TransactionValidationOptions{
   281  			Expiry:                       flow.DefaultTransactionExpiry,
   282  			ExpiryBuffer:                 flow.DefaultTransactionExpiryBuffer,
   283  			AllowEmptyReferenceBlockID:   false,
   284  			AllowUnknownReferenceBlockID: false,
   285  			CheckScriptsParse:            false,
   286  			MaxGasLimit:                  flow.DefaultMaxTransactionGasLimit,
   287  			MaxTransactionByteSize:       flow.DefaultMaxTransactionByteSize,
   288  			MaxCollectionByteSize:        flow.DefaultMaxCollectionByteSize,
   289  		},
   290  	)
   291  }
   292  
   293  // Ping responds to requests when the server is up.
   294  func (b *Backend) Ping(ctx context.Context) error {
   295  	// staticCollectionRPC is only set if a collection node address was provided at startup
   296  	if b.staticCollectionRPC != nil {
   297  		_, err := b.staticCollectionRPC.Ping(ctx, &accessproto.PingRequest{})
   298  		if err != nil {
   299  			return fmt.Errorf("could not ping collection node: %w", err)
   300  		}
   301  	}
   302  
   303  	return nil
   304  }
   305  
   306  // GetNodeVersionInfo returns node version information such as semver, commit, sporkID, protocolVersion, etc
   307  func (b *Backend) GetNodeVersionInfo(_ context.Context) (*access.NodeVersionInfo, error) {
   308  	return b.nodeInfo, nil
   309  }
   310  
   311  // getNodeVersionInfo returns the NodeVersionInfo for the node.
   312  // Since these values are static while the node is running, it is safe to cache.
   313  func getNodeVersionInfo(stateParams protocol.Params) (*access.NodeVersionInfo, error) {
   314  	sporkID, err := stateParams.SporkID()
   315  	if err != nil {
   316  		return nil, fmt.Errorf("failed to read spork ID: %v", err)
   317  	}
   318  
   319  	protocolVersion, err := stateParams.ProtocolVersion()
   320  	if err != nil {
   321  		return nil, fmt.Errorf("failed to read protocol version: %v", err)
   322  	}
   323  
   324  	sporkRootBlockHeight, err := stateParams.SporkRootBlockHeight()
   325  	if err != nil {
   326  		return nil, fmt.Errorf("failed to read spork root block height: %w", err)
   327  	}
   328  
   329  	nodeRootBlockHeader, err := stateParams.SealedRoot()
   330  	if err != nil {
   331  		return nil, fmt.Errorf("failed to read node root block: %w", err)
   332  	}
   333  
   334  	nodeInfo := &access.NodeVersionInfo{
   335  		Semver:               build.Version(),
   336  		Commit:               build.Commit(),
   337  		SporkId:              sporkID,
   338  		ProtocolVersion:      uint64(protocolVersion),
   339  		SporkRootBlockHeight: sporkRootBlockHeight,
   340  		NodeRootBlockHeight:  nodeRootBlockHeader.Height,
   341  	}
   342  
   343  	return nodeInfo, nil
   344  }
   345  
   346  func (b *Backend) GetCollectionByID(_ context.Context, colID flow.Identifier) (*flow.LightCollection, error) {
   347  	// retrieve the collection from the collection storage
   348  	col, err := b.collections.LightByID(colID)
   349  	if err != nil {
   350  		// Collections are retrieved asynchronously as we finalize blocks, so
   351  		// it is possible for a client to request a finalized block from us
   352  		// containing some collection, then get a not found error when requesting
   353  		// that collection. These clients should retry.
   354  		err = rpc.ConvertStorageError(fmt.Errorf("please retry for collection in finalized block: %w", err))
   355  		return nil, err
   356  	}
   357  
   358  	return col, nil
   359  }
   360  
   361  func (b *Backend) GetNetworkParameters(_ context.Context) access.NetworkParameters {
   362  	return access.NetworkParameters{
   363  		ChainID: b.chainID,
   364  	}
   365  }
   366  
   367  // executionNodesForBlockID returns upto maxNodesCnt number of randomly chosen execution node identities
   368  // which have executed the given block ID.
   369  // If no such execution node is found, an InsufficientExecutionReceipts error is returned.
   370  func executionNodesForBlockID(
   371  	ctx context.Context,
   372  	blockID flow.Identifier,
   373  	executionReceipts storage.ExecutionReceipts,
   374  	state protocol.State,
   375  	log zerolog.Logger,
   376  ) (flow.IdentityList, error) {
   377  
   378  	var executorIDs flow.IdentifierList
   379  
   380  	// check if the block ID is of the root block. If it is then don't look for execution receipts since they
   381  	// will not be present for the root block.
   382  	rootBlock, err := state.Params().FinalizedRoot()
   383  	if err != nil {
   384  		return nil, fmt.Errorf("failed to retreive execution IDs for block ID %v: %w", blockID, err)
   385  	}
   386  
   387  	if rootBlock.ID() == blockID {
   388  		executorIdentities, err := state.Final().Identities(filter.HasRole(flow.RoleExecution))
   389  		if err != nil {
   390  			return nil, fmt.Errorf("failed to retreive execution IDs for block ID %v: %w", blockID, err)
   391  		}
   392  		executorIDs = executorIdentities.NodeIDs()
   393  	} else {
   394  		// try to find atleast minExecutionNodesCnt execution node ids from the execution receipts for the given blockID
   395  		for attempt := 0; attempt < maxAttemptsForExecutionReceipt; attempt++ {
   396  			executorIDs, err = findAllExecutionNodes(blockID, executionReceipts, log)
   397  			if err != nil {
   398  				return nil, err
   399  			}
   400  
   401  			if len(executorIDs) >= minExecutionNodesCnt {
   402  				break
   403  			}
   404  
   405  			// log the attempt
   406  			log.Debug().Int("attempt", attempt).Int("max_attempt", maxAttemptsForExecutionReceipt).
   407  				Int("execution_receipts_found", len(executorIDs)).
   408  				Str("block_id", blockID.String()).
   409  				Msg("insufficient execution receipts")
   410  
   411  			// if one or less execution receipts may have been received then re-query
   412  			// in the hope that more might have been received by now
   413  
   414  			select {
   415  			case <-ctx.Done():
   416  				return nil, ctx.Err()
   417  			case <-time.After(100 * time.Millisecond << time.Duration(attempt)):
   418  				// retry after an exponential backoff
   419  			}
   420  		}
   421  
   422  		receiptCnt := len(executorIDs)
   423  		// if less than minExecutionNodesCnt execution receipts have been received so far, then return random ENs
   424  		if receiptCnt < minExecutionNodesCnt {
   425  			newExecutorIDs, err := state.AtBlockID(blockID).Identities(filter.HasRole(flow.RoleExecution))
   426  			if err != nil {
   427  				return nil, fmt.Errorf("failed to retreive execution IDs for block ID %v: %w", blockID, err)
   428  			}
   429  			executorIDs = newExecutorIDs.NodeIDs()
   430  		}
   431  	}
   432  
   433  	// choose from the preferred or fixed execution nodes
   434  	subsetENs, err := chooseExecutionNodes(state, executorIDs)
   435  	if err != nil {
   436  		return nil, fmt.Errorf("failed to retreive execution IDs for block ID %v: %w", blockID, err)
   437  	}
   438  
   439  	if len(subsetENs) == 0 {
   440  		return nil, fmt.Errorf("no matching execution node found for block ID %v", blockID)
   441  	}
   442  
   443  	return subsetENs, nil
   444  }
   445  
   446  // findAllExecutionNodes find all the execution nodes ids from the execution receipts that have been received for the
   447  // given blockID
   448  func findAllExecutionNodes(
   449  	blockID flow.Identifier,
   450  	executionReceipts storage.ExecutionReceipts,
   451  	log zerolog.Logger,
   452  ) (flow.IdentifierList, error) {
   453  	// lookup the receipt's storage with the block ID
   454  	allReceipts, err := executionReceipts.ByBlockID(blockID)
   455  	if err != nil {
   456  		return nil, fmt.Errorf("failed to retreive execution receipts for block ID %v: %w", blockID, err)
   457  	}
   458  
   459  	executionResultMetaList := make(flow.ExecutionReceiptMetaList, 0, len(allReceipts))
   460  	for _, r := range allReceipts {
   461  		executionResultMetaList = append(executionResultMetaList, r.Meta())
   462  	}
   463  	executionResultGroupedMetaList := executionResultMetaList.GroupByResultID()
   464  
   465  	// maximum number of matching receipts found so far for any execution result id
   466  	maxMatchedReceiptCnt := 0
   467  	// execution result id key for the highest number of matching receipts in the identicalReceipts map
   468  	var maxMatchedReceiptResultID flow.Identifier
   469  
   470  	// find the largest list of receipts which have the same result ID
   471  	for resultID, executionReceiptList := range executionResultGroupedMetaList {
   472  		currentMatchedReceiptCnt := executionReceiptList.Size()
   473  		if currentMatchedReceiptCnt > maxMatchedReceiptCnt {
   474  			maxMatchedReceiptCnt = currentMatchedReceiptCnt
   475  			maxMatchedReceiptResultID = resultID
   476  		}
   477  	}
   478  
   479  	// if there are more than one execution result for the same block ID, log as error
   480  	if executionResultGroupedMetaList.NumberGroups() > 1 {
   481  		identicalReceiptsStr := fmt.Sprintf("%v", flow.GetIDs(allReceipts))
   482  		log.Error().
   483  			Str("block_id", blockID.String()).
   484  			Str("execution_receipts", identicalReceiptsStr).
   485  			Msg("execution receipt mismatch")
   486  	}
   487  
   488  	// pick the largest list of matching receipts
   489  	matchingReceiptMetaList := executionResultGroupedMetaList.GetGroup(maxMatchedReceiptResultID)
   490  
   491  	metaReceiptGroupedByExecutorID := matchingReceiptMetaList.GroupByExecutorID()
   492  
   493  	// collect all unique execution node ids from the receipts
   494  	var executorIDs flow.IdentifierList
   495  	for executorID := range metaReceiptGroupedByExecutorID {
   496  		executorIDs = append(executorIDs, executorID)
   497  	}
   498  
   499  	return executorIDs, nil
   500  }
   501  
   502  // chooseExecutionNodes finds the subset of execution nodes defined in the identity table by first
   503  // choosing the preferred execution nodes which have executed the transaction. If no such preferred
   504  // execution nodes are found, then the fixed execution nodes defined in the identity table are returned
   505  // If neither preferred nor fixed nodes are defined, then all execution node matching the executor IDs are returned.
   506  // e.g. If execution nodes in identity table are {1,2,3,4}, preferred ENs are defined as {2,3,4}
   507  // and the executor IDs is {1,2,3}, then {2, 3} is returned as the chosen subset of ENs
   508  func chooseExecutionNodes(state protocol.State, executorIDs flow.IdentifierList) (flow.IdentityList, error) {
   509  
   510  	allENs, err := state.Final().Identities(filter.HasRole(flow.RoleExecution))
   511  	if err != nil {
   512  		return nil, fmt.Errorf("failed to retreive all execution IDs: %w", err)
   513  	}
   514  
   515  	// first try and choose from the preferred EN IDs
   516  	var chosenIDs flow.IdentityList
   517  	if len(preferredENIdentifiers) > 0 {
   518  		// find the preferred execution node IDs which have executed the transaction
   519  		chosenIDs = allENs.Filter(filter.And(filter.HasNodeID(preferredENIdentifiers...),
   520  			filter.HasNodeID(executorIDs...)))
   521  		if len(chosenIDs) > 0 {
   522  			return chosenIDs, nil
   523  		}
   524  	}
   525  
   526  	// if no preferred EN ID is found, then choose from the fixed EN IDs
   527  	if len(fixedENIdentifiers) > 0 {
   528  		// choose fixed ENs which have executed the transaction
   529  		chosenIDs = allENs.Filter(filter.And(filter.HasNodeID(fixedENIdentifiers...), filter.HasNodeID(executorIDs...)))
   530  		if len(chosenIDs) > 0 {
   531  			return chosenIDs, nil
   532  		}
   533  		// if no such ENs are found then just choose all fixed ENs
   534  		chosenIDs = allENs.Filter(filter.HasNodeID(fixedENIdentifiers...))
   535  		return chosenIDs, nil
   536  	}
   537  
   538  	// If no preferred or fixed ENs have been specified, then return all executor IDs i.e. no preference at all
   539  	return allENs.Filter(filter.HasNodeID(executorIDs...)), nil
   540  }