github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/access/rpc/backend/backend_scripts.go

github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/access/rpc/backend/backend_scripts.go (about)

     1  package backend
     2  
     3  import (
     4  	"context"
     5  	"crypto/md5" //nolint:gosec
     6  	"time"
     7  
     8  	lru "github.com/hashicorp/golang-lru/v2"
     9  	execproto "github.com/onflow/flow/protobuf/go/flow/execution"
    10  	"github.com/rs/zerolog"
    11  	"google.golang.org/grpc/codes"
    12  	"google.golang.org/grpc/status"
    13  
    14  	"github.com/onflow/flow-go/engine/access/rpc/connection"
    15  	"github.com/onflow/flow-go/engine/common/rpc"
    16  	fvmerrors "github.com/onflow/flow-go/fvm/errors"
    17  	"github.com/onflow/flow-go/model/flow"
    18  	"github.com/onflow/flow-go/module"
    19  	"github.com/onflow/flow-go/module/execution"
    20  	"github.com/onflow/flow-go/module/irrecoverable"
    21  	"github.com/onflow/flow-go/state/protocol"
    22  	"github.com/onflow/flow-go/storage"
    23  	"github.com/onflow/flow-go/utils/logging"
    24  )
    25  
    26  // uniqueScriptLoggingTimeWindow is the duration for checking the uniqueness of scripts sent for execution
    27  const uniqueScriptLoggingTimeWindow = 10 * time.Minute
    28  
    29  type backendScripts struct {
    30  	log               zerolog.Logger
    31  	headers           storage.Headers
    32  	executionReceipts storage.ExecutionReceipts
    33  	state             protocol.State
    34  	connFactory       connection.ConnectionFactory
    35  	metrics           module.BackendScriptsMetrics
    36  	loggedScripts     *lru.Cache[[md5.Size]byte, time.Time]
    37  	nodeCommunicator  Communicator
    38  	scriptExecutor    execution.ScriptExecutor
    39  	scriptExecMode    IndexQueryMode
    40  }
    41  
    42  // scriptExecutionRequest encapsulates the data needed to execute a script to make it easier
    43  // to pass around between the various methods involved in script execution
    44  type scriptExecutionRequest struct {
    45  	blockID            flow.Identifier
    46  	height             uint64
    47  	script             []byte
    48  	arguments          [][]byte
    49  	insecureScriptHash [md5.Size]byte
    50  }
    51  
    52  func newScriptExecutionRequest(blockID flow.Identifier, height uint64, script []byte, arguments [][]byte) *scriptExecutionRequest {
    53  	return &scriptExecutionRequest{
    54  		blockID:   blockID,
    55  		height:    height,
    56  		script:    script,
    57  		arguments: arguments,
    58  
    59  		// encode to MD5 as low compute/memory lookup key
    60  		// CAUTION: cryptographically insecure md5 is used here, but only to de-duplicate logs.
    61  		// *DO NOT* use this hash for any protocol-related or cryptographic functions.
    62  		insecureScriptHash: md5.Sum(script), //nolint:gosec
    63  	}
    64  }
    65  
    66  // ExecuteScriptAtLatestBlock executes provided script at the latest sealed block.
    67  func (b *backendScripts) ExecuteScriptAtLatestBlock(
    68  	ctx context.Context,
    69  	script []byte,
    70  	arguments [][]byte,
    71  ) ([]byte, error) {
    72  	latestHeader, err := b.state.Sealed().Head()
    73  	if err != nil {
    74  		// the latest sealed header MUST be available
    75  		err := irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err)
    76  		irrecoverable.Throw(ctx, err)
    77  		return nil, err
    78  	}
    79  
    80  	return b.executeScript(ctx, newScriptExecutionRequest(latestHeader.ID(), latestHeader.Height, script, arguments))
    81  }
    82  
    83  // ExecuteScriptAtBlockID executes provided script at the provided block ID.
    84  func (b *backendScripts) ExecuteScriptAtBlockID(
    85  	ctx context.Context,
    86  	blockID flow.Identifier,
    87  	script []byte,
    88  	arguments [][]byte,
    89  ) ([]byte, error) {
    90  	header, err := b.headers.ByBlockID(blockID)
    91  	if err != nil {
    92  		return nil, rpc.ConvertStorageError(err)
    93  	}
    94  
    95  	return b.executeScript(ctx, newScriptExecutionRequest(blockID, header.Height, script, arguments))
    96  }
    97  
    98  // ExecuteScriptAtBlockHeight executes provided script at the provided block height.
    99  func (b *backendScripts) ExecuteScriptAtBlockHeight(
   100  	ctx context.Context,
   101  	blockHeight uint64,
   102  	script []byte,
   103  	arguments [][]byte,
   104  ) ([]byte, error) {
   105  	header, err := b.headers.ByHeight(blockHeight)
   106  	if err != nil {
   107  		return nil, rpc.ConvertStorageError(err)
   108  	}
   109  
   110  	return b.executeScript(ctx, newScriptExecutionRequest(header.ID(), blockHeight, script, arguments))
   111  }
   112  
   113  // executeScript executes the provided script using either the local execution state or the execution
   114  // nodes depending on the node's configuration and the availability of the data.
   115  func (b *backendScripts) executeScript(
   116  	ctx context.Context,
   117  	scriptRequest *scriptExecutionRequest,
   118  ) ([]byte, error) {
   119  	switch b.scriptExecMode {
   120  	case IndexQueryModeExecutionNodesOnly:
   121  		result, _, err := b.executeScriptOnAvailableExecutionNodes(ctx, scriptRequest)
   122  		return result, err
   123  
   124  	case IndexQueryModeLocalOnly:
   125  		result, _, err := b.executeScriptLocally(ctx, scriptRequest)
   126  		return result, err
   127  
   128  	case IndexQueryModeFailover:
   129  		localResult, localDuration, localErr := b.executeScriptLocally(ctx, scriptRequest)
   130  		if localErr == nil || isInvalidArgumentError(localErr) || status.Code(localErr) == codes.Canceled {
   131  			return localResult, localErr
   132  		}
   133  		// Note: scripts that timeout are retried on the execution nodes since ANs may have performance
   134  		// issues for some scripts.
   135  		execResult, execDuration, execErr := b.executeScriptOnAvailableExecutionNodes(ctx, scriptRequest)
   136  
   137  		resultComparer := newScriptResultComparison(b.log, b.metrics, scriptRequest)
   138  		_ = resultComparer.compare(
   139  			newScriptResult(execResult, execDuration, execErr),
   140  			newScriptResult(localResult, localDuration, localErr),
   141  		)
   142  
   143  		return execResult, execErr
   144  
   145  	case IndexQueryModeCompare:
   146  		execResult, execDuration, execErr := b.executeScriptOnAvailableExecutionNodes(ctx, scriptRequest)
   147  		// we can only compare the results if there were either no errors or a cadence error
   148  		// since we cannot distinguish the EN error as caused by the block being pruned or some other reason,
   149  		// which may produce a valid RN output but an error for the EN
   150  		if execErr != nil && !isInvalidArgumentError(execErr) {
   151  			return nil, execErr
   152  		}
   153  		localResult, localDuration, localErr := b.executeScriptLocally(ctx, scriptRequest)
   154  
   155  		resultComparer := newScriptResultComparison(b.log, b.metrics, scriptRequest)
   156  		_ = resultComparer.compare(
   157  			newScriptResult(execResult, execDuration, execErr),
   158  			newScriptResult(localResult, localDuration, localErr),
   159  		)
   160  
   161  		// always return EN results
   162  		return execResult, execErr
   163  
   164  	default:
   165  		return nil, status.Errorf(codes.Internal, "unknown script execution mode: %v", b.scriptExecMode)
   166  	}
   167  }
   168  
   169  // executeScriptLocally executes the provided script using the local execution state.
   170  func (b *backendScripts) executeScriptLocally(
   171  	ctx context.Context,
   172  	r *scriptExecutionRequest,
   173  ) ([]byte, time.Duration, error) {
   174  	execStartTime := time.Now()
   175  
   176  	result, err := b.scriptExecutor.ExecuteAtBlockHeight(ctx, r.script, r.arguments, r.height)
   177  
   178  	execEndTime := time.Now()
   179  	execDuration := execEndTime.Sub(execStartTime)
   180  
   181  	lg := b.log.With().
   182  		Str("script_executor_addr", "localhost").
   183  		Hex("block_id", logging.ID(r.blockID)).
   184  		Uint64("height", r.height).
   185  		Hex("script_hash", r.insecureScriptHash[:]).
   186  		Dur("execution_dur_ms", execDuration).
   187  		Logger()
   188  
   189  	if err != nil {
   190  		convertedErr := convertScriptExecutionError(err, r.height)
   191  
   192  		switch status.Code(convertedErr) {
   193  		case codes.InvalidArgument, codes.Canceled, codes.DeadlineExceeded:
   194  			lg.Debug().Err(err).
   195  				Str("script", string(r.script)).
   196  				Msg("script failed to execute locally")
   197  
   198  		default:
   199  			lg.Error().Err(err).Msg("script execution failed")
   200  			b.metrics.ScriptExecutionErrorLocal()
   201  		}
   202  
   203  		return nil, execDuration, convertedErr
   204  	}
   205  
   206  	if b.log.GetLevel() == zerolog.DebugLevel && b.shouldLogScript(execEndTime, r.insecureScriptHash) {
   207  		lg.Debug().
   208  			Str("script", string(r.script)).
   209  			Msg("Successfully executed script")
   210  		b.loggedScripts.Add(r.insecureScriptHash, execEndTime)
   211  	}
   212  
   213  	// log execution time
   214  	b.metrics.ScriptExecuted(execDuration, len(r.script))
   215  
   216  	return result, execDuration, nil
   217  }
   218  
   219  // executeScriptOnAvailableExecutionNodes executes the provided script using available execution nodes.
   220  func (b *backendScripts) executeScriptOnAvailableExecutionNodes(
   221  	ctx context.Context,
   222  	r *scriptExecutionRequest,
   223  ) ([]byte, time.Duration, error) {
   224  	// find few execution nodes which have executed the block earlier and provided an execution receipt for it
   225  	executors, err := executionNodesForBlockID(ctx, r.blockID, b.executionReceipts, b.state, b.log)
   226  	if err != nil {
   227  		return nil, 0, status.Errorf(codes.Internal, "failed to find script executors at blockId %v: %v", r.blockID.String(), err)
   228  	}
   229  
   230  	lg := b.log.With().
   231  		Hex("block_id", logging.ID(r.blockID)).
   232  		Hex("script_hash", r.insecureScriptHash[:]).
   233  		Logger()
   234  
   235  	var result []byte
   236  	var execDuration time.Duration
   237  	errToReturn := b.nodeCommunicator.CallAvailableNode(
   238  		executors,
   239  		func(node *flow.IdentitySkeleton) error {
   240  			execStartTime := time.Now()
   241  
   242  			result, err = b.tryExecuteScriptOnExecutionNode(ctx, node.Address, r)
   243  
   244  			executionTime := time.Now()
   245  			execDuration = executionTime.Sub(execStartTime)
   246  
   247  			if err != nil {
   248  				return err
   249  			}
   250  
   251  			if b.log.GetLevel() == zerolog.DebugLevel {
   252  				if b.shouldLogScript(executionTime, r.insecureScriptHash) {
   253  					lg.Debug().
   254  						Str("script_executor_addr", node.Address).
   255  						Str("script", string(r.script)).
   256  						Dur("execution_dur_ms", execDuration).
   257  						Msg("Successfully executed script")
   258  					b.loggedScripts.Add(r.insecureScriptHash, executionTime)
   259  				}
   260  			}
   261  
   262  			// log execution time
   263  			b.metrics.ScriptExecuted(time.Since(execStartTime), len(r.script))
   264  
   265  			return nil
   266  		},
   267  		func(node *flow.IdentitySkeleton, err error) bool {
   268  			if status.Code(err) == codes.InvalidArgument {
   269  				lg.Debug().Err(err).
   270  					Str("script_executor_addr", node.Address).
   271  					Str("script", string(r.script)).
   272  					Msg("script failed to execute on the execution node")
   273  				return true
   274  			}
   275  			return false
   276  		},
   277  	)
   278  
   279  	if errToReturn != nil {
   280  		if status.Code(errToReturn) != codes.InvalidArgument {
   281  			b.metrics.ScriptExecutionErrorOnExecutionNode()
   282  			b.log.Error().Err(errToReturn).Msg("script execution failed for execution node internal reasons")
   283  		}
   284  		return nil, execDuration, rpc.ConvertError(errToReturn, "failed to execute script on execution nodes", codes.Internal)
   285  	}
   286  
   287  	return result, execDuration, nil
   288  }
   289  
   290  // tryExecuteScriptOnExecutionNode attempts to execute the script on the given execution node.
   291  func (b *backendScripts) tryExecuteScriptOnExecutionNode(
   292  	ctx context.Context,
   293  	executorAddress string,
   294  	r *scriptExecutionRequest,
   295  ) ([]byte, error) {
   296  	execRPCClient, closer, err := b.connFactory.GetExecutionAPIClient(executorAddress)
   297  	if err != nil {
   298  		return nil, status.Errorf(codes.Internal, "failed to create client for execution node %s: %v",
   299  			executorAddress, err)
   300  	}
   301  	defer closer.Close()
   302  
   303  	execResp, err := execRPCClient.ExecuteScriptAtBlockID(ctx, &execproto.ExecuteScriptAtBlockIDRequest{
   304  		BlockId:   r.blockID[:],
   305  		Script:    r.script,
   306  		Arguments: r.arguments,
   307  	})
   308  	if err != nil {
   309  		return nil, status.Errorf(status.Code(err), "failed to execute the script on the execution node %s: %v", executorAddress, err)
   310  	}
   311  	return execResp.GetValue(), nil
   312  }
   313  
   314  // isInvalidArgumentError checks if the error is from an invalid argument
   315  func isInvalidArgumentError(scriptExecutionErr error) bool {
   316  	return status.Code(scriptExecutionErr) == codes.InvalidArgument
   317  }
   318  
   319  // shouldLogScript checks if the script hash is unique in the time window
   320  func (b *backendScripts) shouldLogScript(execTime time.Time, scriptHash [md5.Size]byte) bool {
   321  	timestamp, seen := b.loggedScripts.Get(scriptHash)
   322  	if seen {
   323  		return execTime.Sub(timestamp) >= uniqueScriptLoggingTimeWindow
   324  	}
   325  	return true
   326  }
   327  
   328  // convertScriptExecutionError converts the script execution error to a gRPC error
   329  func convertScriptExecutionError(err error, height uint64) error {
   330  	if err == nil {
   331  		return nil
   332  	}
   333  
   334  	var failure fvmerrors.CodedFailure
   335  	if fvmerrors.As(err, &failure) {
   336  		return rpc.ConvertError(err, "failed to execute script", codes.Internal)
   337  	}
   338  
   339  	// general FVM/ledger errors
   340  	var coded fvmerrors.CodedError
   341  	if fvmerrors.As(err, &coded) {
   342  		switch coded.Code() {
   343  		case fvmerrors.ErrCodeScriptExecutionCancelledError:
   344  			return status.Errorf(codes.Canceled, "script execution canceled: %v", err)
   345  
   346  		case fvmerrors.ErrCodeScriptExecutionTimedOutError:
   347  			return status.Errorf(codes.DeadlineExceeded, "script execution timed out: %v", err)
   348  
   349  		case fvmerrors.ErrCodeComputationLimitExceededError:
   350  			return status.Errorf(codes.ResourceExhausted, "script execution computation limit exceeded: %v", err)
   351  
   352  		case fvmerrors.ErrCodeMemoryLimitExceededError:
   353  			return status.Errorf(codes.ResourceExhausted, "script execution memory limit exceeded: %v", err)
   354  
   355  		default:
   356  			// runtime errors
   357  			return status.Errorf(codes.InvalidArgument, "failed to execute script: %v", err)
   358  		}
   359  	}
   360  
   361  	return rpc.ConvertIndexError(err, height, "failed to execute script")
   362  }