github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/access/rpc/backend/backend_scripts.go (about) 1 package backend 2 3 import ( 4 "context" 5 "crypto/md5" //nolint:gosec 6 "time" 7 8 lru "github.com/hashicorp/golang-lru/v2" 9 execproto "github.com/onflow/flow/protobuf/go/flow/execution" 10 "github.com/rs/zerolog" 11 "google.golang.org/grpc/codes" 12 "google.golang.org/grpc/status" 13 14 "github.com/onflow/flow-go/engine/access/rpc/connection" 15 "github.com/onflow/flow-go/engine/common/rpc" 16 fvmerrors "github.com/onflow/flow-go/fvm/errors" 17 "github.com/onflow/flow-go/model/flow" 18 "github.com/onflow/flow-go/module" 19 "github.com/onflow/flow-go/module/execution" 20 "github.com/onflow/flow-go/module/irrecoverable" 21 "github.com/onflow/flow-go/state/protocol" 22 "github.com/onflow/flow-go/storage" 23 "github.com/onflow/flow-go/utils/logging" 24 ) 25 26 // uniqueScriptLoggingTimeWindow is the duration for checking the uniqueness of scripts sent for execution 27 const uniqueScriptLoggingTimeWindow = 10 * time.Minute 28 29 type backendScripts struct { 30 log zerolog.Logger 31 headers storage.Headers 32 executionReceipts storage.ExecutionReceipts 33 state protocol.State 34 connFactory connection.ConnectionFactory 35 metrics module.BackendScriptsMetrics 36 loggedScripts *lru.Cache[[md5.Size]byte, time.Time] 37 nodeCommunicator Communicator 38 scriptExecutor execution.ScriptExecutor 39 scriptExecMode IndexQueryMode 40 } 41 42 // scriptExecutionRequest encapsulates the data needed to execute a script to make it easier 43 // to pass around between the various methods involved in script execution 44 type scriptExecutionRequest struct { 45 blockID flow.Identifier 46 height uint64 47 script []byte 48 arguments [][]byte 49 insecureScriptHash [md5.Size]byte 50 } 51 52 func newScriptExecutionRequest(blockID flow.Identifier, height uint64, script []byte, arguments [][]byte) *scriptExecutionRequest { 53 return &scriptExecutionRequest{ 54 blockID: blockID, 55 height: height, 56 script: script, 57 arguments: arguments, 58 59 // encode to MD5 as low compute/memory lookup key 60 // CAUTION: cryptographically insecure md5 is used here, but only to de-duplicate logs. 61 // *DO NOT* use this hash for any protocol-related or cryptographic functions. 62 insecureScriptHash: md5.Sum(script), //nolint:gosec 63 } 64 } 65 66 // ExecuteScriptAtLatestBlock executes provided script at the latest sealed block. 67 func (b *backendScripts) ExecuteScriptAtLatestBlock( 68 ctx context.Context, 69 script []byte, 70 arguments [][]byte, 71 ) ([]byte, error) { 72 latestHeader, err := b.state.Sealed().Head() 73 if err != nil { 74 // the latest sealed header MUST be available 75 err := irrecoverable.NewExceptionf("failed to lookup sealed header: %w", err) 76 irrecoverable.Throw(ctx, err) 77 return nil, err 78 } 79 80 return b.executeScript(ctx, newScriptExecutionRequest(latestHeader.ID(), latestHeader.Height, script, arguments)) 81 } 82 83 // ExecuteScriptAtBlockID executes provided script at the provided block ID. 84 func (b *backendScripts) ExecuteScriptAtBlockID( 85 ctx context.Context, 86 blockID flow.Identifier, 87 script []byte, 88 arguments [][]byte, 89 ) ([]byte, error) { 90 header, err := b.headers.ByBlockID(blockID) 91 if err != nil { 92 return nil, rpc.ConvertStorageError(err) 93 } 94 95 return b.executeScript(ctx, newScriptExecutionRequest(blockID, header.Height, script, arguments)) 96 } 97 98 // ExecuteScriptAtBlockHeight executes provided script at the provided block height. 99 func (b *backendScripts) ExecuteScriptAtBlockHeight( 100 ctx context.Context, 101 blockHeight uint64, 102 script []byte, 103 arguments [][]byte, 104 ) ([]byte, error) { 105 header, err := b.headers.ByHeight(blockHeight) 106 if err != nil { 107 return nil, rpc.ConvertStorageError(err) 108 } 109 110 return b.executeScript(ctx, newScriptExecutionRequest(header.ID(), blockHeight, script, arguments)) 111 } 112 113 // executeScript executes the provided script using either the local execution state or the execution 114 // nodes depending on the node's configuration and the availability of the data. 115 func (b *backendScripts) executeScript( 116 ctx context.Context, 117 scriptRequest *scriptExecutionRequest, 118 ) ([]byte, error) { 119 switch b.scriptExecMode { 120 case IndexQueryModeExecutionNodesOnly: 121 result, _, err := b.executeScriptOnAvailableExecutionNodes(ctx, scriptRequest) 122 return result, err 123 124 case IndexQueryModeLocalOnly: 125 result, _, err := b.executeScriptLocally(ctx, scriptRequest) 126 return result, err 127 128 case IndexQueryModeFailover: 129 localResult, localDuration, localErr := b.executeScriptLocally(ctx, scriptRequest) 130 if localErr == nil || isInvalidArgumentError(localErr) || status.Code(localErr) == codes.Canceled { 131 return localResult, localErr 132 } 133 // Note: scripts that timeout are retried on the execution nodes since ANs may have performance 134 // issues for some scripts. 135 execResult, execDuration, execErr := b.executeScriptOnAvailableExecutionNodes(ctx, scriptRequest) 136 137 resultComparer := newScriptResultComparison(b.log, b.metrics, scriptRequest) 138 _ = resultComparer.compare( 139 newScriptResult(execResult, execDuration, execErr), 140 newScriptResult(localResult, localDuration, localErr), 141 ) 142 143 return execResult, execErr 144 145 case IndexQueryModeCompare: 146 execResult, execDuration, execErr := b.executeScriptOnAvailableExecutionNodes(ctx, scriptRequest) 147 // we can only compare the results if there were either no errors or a cadence error 148 // since we cannot distinguish the EN error as caused by the block being pruned or some other reason, 149 // which may produce a valid RN output but an error for the EN 150 if execErr != nil && !isInvalidArgumentError(execErr) { 151 return nil, execErr 152 } 153 localResult, localDuration, localErr := b.executeScriptLocally(ctx, scriptRequest) 154 155 resultComparer := newScriptResultComparison(b.log, b.metrics, scriptRequest) 156 _ = resultComparer.compare( 157 newScriptResult(execResult, execDuration, execErr), 158 newScriptResult(localResult, localDuration, localErr), 159 ) 160 161 // always return EN results 162 return execResult, execErr 163 164 default: 165 return nil, status.Errorf(codes.Internal, "unknown script execution mode: %v", b.scriptExecMode) 166 } 167 } 168 169 // executeScriptLocally executes the provided script using the local execution state. 170 func (b *backendScripts) executeScriptLocally( 171 ctx context.Context, 172 r *scriptExecutionRequest, 173 ) ([]byte, time.Duration, error) { 174 execStartTime := time.Now() 175 176 result, err := b.scriptExecutor.ExecuteAtBlockHeight(ctx, r.script, r.arguments, r.height) 177 178 execEndTime := time.Now() 179 execDuration := execEndTime.Sub(execStartTime) 180 181 lg := b.log.With(). 182 Str("script_executor_addr", "localhost"). 183 Hex("block_id", logging.ID(r.blockID)). 184 Uint64("height", r.height). 185 Hex("script_hash", r.insecureScriptHash[:]). 186 Dur("execution_dur_ms", execDuration). 187 Logger() 188 189 if err != nil { 190 convertedErr := convertScriptExecutionError(err, r.height) 191 192 switch status.Code(convertedErr) { 193 case codes.InvalidArgument, codes.Canceled, codes.DeadlineExceeded: 194 lg.Debug().Err(err). 195 Str("script", string(r.script)). 196 Msg("script failed to execute locally") 197 198 default: 199 lg.Error().Err(err).Msg("script execution failed") 200 b.metrics.ScriptExecutionErrorLocal() 201 } 202 203 return nil, execDuration, convertedErr 204 } 205 206 if b.log.GetLevel() == zerolog.DebugLevel && b.shouldLogScript(execEndTime, r.insecureScriptHash) { 207 lg.Debug(). 208 Str("script", string(r.script)). 209 Msg("Successfully executed script") 210 b.loggedScripts.Add(r.insecureScriptHash, execEndTime) 211 } 212 213 // log execution time 214 b.metrics.ScriptExecuted(execDuration, len(r.script)) 215 216 return result, execDuration, nil 217 } 218 219 // executeScriptOnAvailableExecutionNodes executes the provided script using available execution nodes. 220 func (b *backendScripts) executeScriptOnAvailableExecutionNodes( 221 ctx context.Context, 222 r *scriptExecutionRequest, 223 ) ([]byte, time.Duration, error) { 224 // find few execution nodes which have executed the block earlier and provided an execution receipt for it 225 executors, err := executionNodesForBlockID(ctx, r.blockID, b.executionReceipts, b.state, b.log) 226 if err != nil { 227 return nil, 0, status.Errorf(codes.Internal, "failed to find script executors at blockId %v: %v", r.blockID.String(), err) 228 } 229 230 lg := b.log.With(). 231 Hex("block_id", logging.ID(r.blockID)). 232 Hex("script_hash", r.insecureScriptHash[:]). 233 Logger() 234 235 var result []byte 236 var execDuration time.Duration 237 errToReturn := b.nodeCommunicator.CallAvailableNode( 238 executors, 239 func(node *flow.IdentitySkeleton) error { 240 execStartTime := time.Now() 241 242 result, err = b.tryExecuteScriptOnExecutionNode(ctx, node.Address, r) 243 244 executionTime := time.Now() 245 execDuration = executionTime.Sub(execStartTime) 246 247 if err != nil { 248 return err 249 } 250 251 if b.log.GetLevel() == zerolog.DebugLevel { 252 if b.shouldLogScript(executionTime, r.insecureScriptHash) { 253 lg.Debug(). 254 Str("script_executor_addr", node.Address). 255 Str("script", string(r.script)). 256 Dur("execution_dur_ms", execDuration). 257 Msg("Successfully executed script") 258 b.loggedScripts.Add(r.insecureScriptHash, executionTime) 259 } 260 } 261 262 // log execution time 263 b.metrics.ScriptExecuted(time.Since(execStartTime), len(r.script)) 264 265 return nil 266 }, 267 func(node *flow.IdentitySkeleton, err error) bool { 268 if status.Code(err) == codes.InvalidArgument { 269 lg.Debug().Err(err). 270 Str("script_executor_addr", node.Address). 271 Str("script", string(r.script)). 272 Msg("script failed to execute on the execution node") 273 return true 274 } 275 return false 276 }, 277 ) 278 279 if errToReturn != nil { 280 if status.Code(errToReturn) != codes.InvalidArgument { 281 b.metrics.ScriptExecutionErrorOnExecutionNode() 282 b.log.Error().Err(errToReturn).Msg("script execution failed for execution node internal reasons") 283 } 284 return nil, execDuration, rpc.ConvertError(errToReturn, "failed to execute script on execution nodes", codes.Internal) 285 } 286 287 return result, execDuration, nil 288 } 289 290 // tryExecuteScriptOnExecutionNode attempts to execute the script on the given execution node. 291 func (b *backendScripts) tryExecuteScriptOnExecutionNode( 292 ctx context.Context, 293 executorAddress string, 294 r *scriptExecutionRequest, 295 ) ([]byte, error) { 296 execRPCClient, closer, err := b.connFactory.GetExecutionAPIClient(executorAddress) 297 if err != nil { 298 return nil, status.Errorf(codes.Internal, "failed to create client for execution node %s: %v", 299 executorAddress, err) 300 } 301 defer closer.Close() 302 303 execResp, err := execRPCClient.ExecuteScriptAtBlockID(ctx, &execproto.ExecuteScriptAtBlockIDRequest{ 304 BlockId: r.blockID[:], 305 Script: r.script, 306 Arguments: r.arguments, 307 }) 308 if err != nil { 309 return nil, status.Errorf(status.Code(err), "failed to execute the script on the execution node %s: %v", executorAddress, err) 310 } 311 return execResp.GetValue(), nil 312 } 313 314 // isInvalidArgumentError checks if the error is from an invalid argument 315 func isInvalidArgumentError(scriptExecutionErr error) bool { 316 return status.Code(scriptExecutionErr) == codes.InvalidArgument 317 } 318 319 // shouldLogScript checks if the script hash is unique in the time window 320 func (b *backendScripts) shouldLogScript(execTime time.Time, scriptHash [md5.Size]byte) bool { 321 timestamp, seen := b.loggedScripts.Get(scriptHash) 322 if seen { 323 return execTime.Sub(timestamp) >= uniqueScriptLoggingTimeWindow 324 } 325 return true 326 } 327 328 // convertScriptExecutionError converts the script execution error to a gRPC error 329 func convertScriptExecutionError(err error, height uint64) error { 330 if err == nil { 331 return nil 332 } 333 334 var failure fvmerrors.CodedFailure 335 if fvmerrors.As(err, &failure) { 336 return rpc.ConvertError(err, "failed to execute script", codes.Internal) 337 } 338 339 // general FVM/ledger errors 340 var coded fvmerrors.CodedError 341 if fvmerrors.As(err, &coded) { 342 switch coded.Code() { 343 case fvmerrors.ErrCodeScriptExecutionCancelledError: 344 return status.Errorf(codes.Canceled, "script execution canceled: %v", err) 345 346 case fvmerrors.ErrCodeScriptExecutionTimedOutError: 347 return status.Errorf(codes.DeadlineExceeded, "script execution timed out: %v", err) 348 349 case fvmerrors.ErrCodeComputationLimitExceededError: 350 return status.Errorf(codes.ResourceExhausted, "script execution computation limit exceeded: %v", err) 351 352 case fvmerrors.ErrCodeMemoryLimitExceededError: 353 return status.Errorf(codes.ResourceExhausted, "script execution memory limit exceeded: %v", err) 354 355 default: 356 // runtime errors 357 return status.Errorf(codes.InvalidArgument, "failed to execute script: %v", err) 358 } 359 } 360 361 return rpc.ConvertIndexError(err, height, "failed to execute script") 362 }