github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/pkg/executor/wasm/executor.go (about)

     1  package wasm
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"fmt"
     7  	"io/fs"
     8  	"os"
     9  	"path/filepath"
    10  	"sort"
    11  
    12  	"github.com/c2h5oh/datasize"
    13  	"github.com/filecoin-project/bacalhau/pkg/executor"
    14  	"golang.org/x/exp/maps"
    15  
    16  	"github.com/filecoin-project/bacalhau/pkg/job"
    17  	"github.com/filecoin-project/bacalhau/pkg/model"
    18  	"github.com/filecoin-project/bacalhau/pkg/storage"
    19  	"github.com/filecoin-project/bacalhau/pkg/storage/util"
    20  	"github.com/filecoin-project/bacalhau/pkg/system"
    21  	"github.com/filecoin-project/bacalhau/pkg/util/filefs"
    22  	"github.com/filecoin-project/bacalhau/pkg/util/mountfs"
    23  	"github.com/filecoin-project/bacalhau/pkg/util/touchfs"
    24  	"github.com/rs/zerolog/log"
    25  	"github.com/tetratelabs/wazero"
    26  	"github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1"
    27  	"github.com/tetratelabs/wazero/sys"
    28  )
    29  
    30  type Executor struct {
    31  	StorageProvider storage.StorageProvider
    32  }
    33  
    34  func NewExecutor(
    35  	_ context.Context,
    36  	storageProvider storage.StorageProvider,
    37  ) (*Executor, error) {
    38  	return &Executor{
    39  		StorageProvider: storageProvider,
    40  	}, nil
    41  }
    42  
    43  func (e *Executor) IsInstalled(context.Context) (bool, error) {
    44  	// WASM executor runs natively in Go and so is always available
    45  	return true, nil
    46  }
    47  
    48  func (e *Executor) HasStorageLocally(ctx context.Context, volume model.StorageSpec) (bool, error) {
    49  	ctx, span := system.NewSpan(ctx, system.GetTracer(), "pkg/executor/wasm.Executor.HasStorageLocally")
    50  	defer span.End()
    51  
    52  	s, err := e.StorageProvider.Get(ctx, volume.StorageSource)
    53  	if err != nil {
    54  		return false, err
    55  	}
    56  
    57  	return s.HasStorageLocally(ctx, volume)
    58  }
    59  
    60  func (e *Executor) GetVolumeSize(ctx context.Context, volume model.StorageSpec) (uint64, error) {
    61  	ctx, span := system.NewSpan(ctx, system.GetTracer(), "pkg/executor/wasm.Executor.GetVolumeSize")
    62  	defer span.End()
    63  
    64  	storageProvider, err := e.StorageProvider.Get(ctx, volume.StorageSource)
    65  	if err != nil {
    66  		return 0, err
    67  	}
    68  	return storageProvider.GetVolumeSize(ctx, volume)
    69  }
    70  
    71  // makeFsFromStorage sets up a virtual filesystem (represented by an fs.FS) that
    72  // will be the filesystem exposed to our WASM. The strategy for this is to:
    73  //
    74  //   - mount each input at the name specified by Path
    75  //   - make a directory in the job results directory for each output and mount that
    76  //     at the name specified by Name
    77  func (e *Executor) makeFsFromStorage(ctx context.Context, jobResultsDir string, inputs, outputs []model.StorageSpec) (fs.FS, error) {
    78  	var err error
    79  	rootFs := mountfs.New()
    80  
    81  	volumes, err := storage.ParallelPrepareStorage(ctx, e.StorageProvider, inputs)
    82  	if err != nil {
    83  		return nil, err
    84  	}
    85  
    86  	for input, volume := range volumes {
    87  		log.Ctx(ctx).Debug().Msgf("Using input '%s' at '%s'", input.Path, volume.Source)
    88  
    89  		var stat os.FileInfo
    90  		stat, err = os.Stat(volume.Source)
    91  		if err != nil {
    92  			return nil, err
    93  		}
    94  
    95  		var inputFs fs.FS
    96  		if stat.IsDir() {
    97  			inputFs = os.DirFS(volume.Source)
    98  		} else {
    99  			inputFs = filefs.New(volume.Source)
   100  		}
   101  
   102  		err = rootFs.Mount(input.Path, inputFs)
   103  		if err != nil {
   104  			return nil, err
   105  		}
   106  	}
   107  
   108  	for _, output := range outputs {
   109  		if output.Name == "" {
   110  			return nil, fmt.Errorf("output volume has no name: %+v", output)
   111  		}
   112  
   113  		if output.Path == "" {
   114  			return nil, fmt.Errorf("output volume has no path: %+v", output)
   115  		}
   116  
   117  		srcd := filepath.Join(jobResultsDir, output.Name)
   118  		log.Ctx(ctx).Debug().Msgf("Collecting output '%s' at '%s'", output.Name, srcd)
   119  
   120  		err = os.Mkdir(srcd, util.OS_ALL_R|util.OS_ALL_X|util.OS_USER_W)
   121  		if err != nil {
   122  			return nil, err
   123  		}
   124  
   125  		err = rootFs.Mount(output.Name, touchfs.New(srcd))
   126  		if err != nil {
   127  			return nil, err
   128  		}
   129  	}
   130  
   131  	return rootFs, nil
   132  }
   133  
   134  //nolint:funlen  // Will be made shorter when we do more module linking
   135  func (e *Executor) RunShard(
   136  	ctx context.Context,
   137  	shard model.JobShard,
   138  	jobResultsDir string,
   139  ) (*model.RunCommandResult, error) {
   140  	ctx, span := system.NewSpan(ctx, system.GetTracer(), "pkg/executor/wasm.Executor.RunShard")
   141  	defer span.End()
   142  
   143  	cache := wazero.NewCompilationCache()
   144  	engineConfig := wazero.NewRuntimeConfig().WithCompilationCache(cache)
   145  
   146  	// Apply memory limits to the runtime. We have to do this in multiples of
   147  	// the WASM page size of 64kb, so round up to the nearest page size if the
   148  	// limit is not specified as a multiple of that.
   149  	if shard.Job.Spec.Resources.Memory != "" {
   150  		memoryLimit, err := datasize.ParseString(shard.Job.Spec.Resources.Memory)
   151  		if err != nil {
   152  			return executor.FailResult(err)
   153  		}
   154  
   155  		const pageSize = 65536
   156  		pageLimit := memoryLimit.Bytes()/pageSize + system.Min(memoryLimit.Bytes()%pageSize, 1)
   157  		engineConfig = engineConfig.WithMemoryLimitPages(uint32(pageLimit))
   158  	}
   159  
   160  	engine := wazero.NewRuntimeWithConfig(ctx, engineConfig)
   161  
   162  	wasmSpec := shard.Job.Spec.Wasm
   163  	contextStorageSpec := shard.Job.Spec.Wasm.EntryModule
   164  	module, err := LoadRemoteModule(ctx, engine, e.StorageProvider, contextStorageSpec)
   165  	if err != nil {
   166  		return executor.FailResult(err)
   167  	}
   168  	defer module.Close(ctx)
   169  
   170  	shardStorageSpec, err := job.GetShardStorageSpec(ctx, shard, e.StorageProvider)
   171  	if err != nil {
   172  		return executor.FailResult(err)
   173  	}
   174  
   175  	fs, err := e.makeFsFromStorage(ctx, jobResultsDir, shardStorageSpec, shard.Job.Spec.Outputs)
   176  	if err != nil {
   177  		return executor.FailResult(err)
   178  	}
   179  
   180  	// Configure the modules. We will write STDOUT and STDERR to a buffer so
   181  	// that we can later include them in the job results. We don't want to
   182  	// execute any start functions automatically as we will do it manually
   183  	// later. Finally, add the filesystem which contains our input and output.
   184  	stdout := new(bytes.Buffer)
   185  	stderr := new(bytes.Buffer)
   186  
   187  	args := []string{module.Name()}
   188  	args = append(args, wasmSpec.Parameters...)
   189  
   190  	config := wazero.NewModuleConfig().
   191  		WithStartFunctions().
   192  		WithStdout(stdout).
   193  		WithStderr(stderr).
   194  		WithArgs(args...).
   195  		WithFS(fs)
   196  
   197  	keys := maps.Keys(wasmSpec.EnvironmentVariables)
   198  	sort.Strings(keys)
   199  	for _, key := range keys {
   200  		// Make sure we add the environment variables in a consistent order
   201  		config = config.WithEnv(key, wasmSpec.EnvironmentVariables[key])
   202  	}
   203  
   204  	entryPoint := wasmSpec.EntryPoint
   205  	importedModules := []wazero.CompiledModule{}
   206  
   207  	// Load and instantiate imported modules
   208  	for _, wasmSpec := range wasmSpec.ImportModules {
   209  		importedWasi, importErr := LoadRemoteModule(ctx, engine, e.StorageProvider, wasmSpec)
   210  		if importErr != nil {
   211  			return executor.FailResult(importErr)
   212  		}
   213  		importedModules = append(importedModules, importedWasi)
   214  
   215  		_, instantiateErr := engine.InstantiateModule(ctx, importedWasi, config)
   216  		if instantiateErr != nil {
   217  			return executor.FailResult(instantiateErr)
   218  		}
   219  	}
   220  
   221  	wasi, err := wasi_snapshot_preview1.NewBuilder(engine).Compile(ctx)
   222  	if err != nil {
   223  		return executor.FailResult(err)
   224  	}
   225  	defer wasi.Close(ctx)
   226  
   227  	_, err = engine.InstantiateModule(ctx, wasi, config)
   228  	if err != nil {
   229  		return executor.FailResult(err)
   230  	}
   231  
   232  	// Now instantiate the module and run the entry point.
   233  	instance, err := engine.InstantiateModule(ctx, module, config)
   234  	if err != nil {
   235  		return executor.FailResult(err)
   236  	}
   237  
   238  	// Check that all WASI modules conform to our requirements.
   239  	importedModules = append(importedModules, wasi)
   240  	err = ValidateModuleAgainstJob(module, shard.Job.Spec, importedModules...)
   241  	if err != nil {
   242  		return executor.FailResult(err)
   243  	}
   244  
   245  	// The function should exit which results in a sys.ExitError. So we capture
   246  	// the exit code for inclusion in the job output, and ignore the return code
   247  	// from the function (most WASI compilers will not give one). Some compilers
   248  	// though do not set an exit code, so we use a default of -1.
   249  	log.Ctx(ctx).Debug().Msgf("Running WASM %q from job %q", entryPoint, shard.Job.Metadata.ID)
   250  	entryFunc := instance.ExportedFunction(entryPoint)
   251  	exitCode := int(-1)
   252  	_, wasmErr := entryFunc.Call(ctx)
   253  	if wasmErr != nil {
   254  		errExit, ok := wasmErr.(*sys.ExitError)
   255  		if ok {
   256  			exitCode = int(errExit.ExitCode())
   257  			wasmErr = nil
   258  		}
   259  	}
   260  
   261  	return executor.WriteJobResults(jobResultsDir, stdout, stderr, exitCode, wasmErr)
   262  }
   263  
   264  // Compile-time check that Executor implements the Executor interface.
   265  var _ executor.Executor = (*Executor)(nil)