github.com/apache/beam/sdks/v2@v2.48.2/go/examples/wasm/wasm.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  // wasm is a simple example that loads and executes a wasm file function.
    17  // greet.wasm, Cargo.toml and greet.rs were copied from the example provided by the wazero library:
    18  // https://github.com/tetratelabs/wazero/blob/v1.0.0-pre.3/examples/allocation/rust/greet.go
    19  //
    20  // New Concepts:
    21  // 1. Load a wasm file compiled from: cargo build --release --target wasm32-unknown-unknown
    22  // 2. Execute a wasm function within a DoFn
    23  package main
    24  
    25  import (
    26  	"context"
    27  	_ "embed"
    28  	"flag"
    29  	"fmt"
    30  
    31  	"github.com/apache/beam/sdks/v2/go/pkg/beam"
    32  	"github.com/apache/beam/sdks/v2/go/pkg/beam/io/textio"
    33  	"github.com/apache/beam/sdks/v2/go/pkg/beam/log"
    34  	"github.com/apache/beam/sdks/v2/go/pkg/beam/register"
    35  	"github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx"
    36  	"github.com/tetratelabs/wazero"
    37  	"github.com/tetratelabs/wazero/api"
    38  )
    39  
    40  const (
    41  	wasmFunctionName           = "greeting"
    42  	wasmAllocateFunctionName   = "allocate"
    43  	wasmDeallocateFunctionName = "deallocate"
    44  )
    45  
    46  //go:embed greet.wasm
    47  var greetWasm []byte
    48  
    49  var (
    50  	output = flag.String("output", "", "Output file (required).")
    51  )
    52  
    53  func init() {
    54  	// register.DoFnXxY registers a struct DoFn so that it can be correctly
    55  	// serialized and does some optimization to avoid runtime reflection. Since
    56  	// embeddedWasmFn's ProcessElement func has 2 inputs (context.Context) and 2 outputs (string, error),
    57  	// we use register.DoFn2x2 and provide its input and output types as its constraints.
    58  	// Struct DoFns must be registered for a pipeline to run.
    59  	register.DoFn2x2[context.Context, string, string, error](&embeddedWasmFn{})
    60  }
    61  
    62  func preRun() error {
    63  	if *output == "" {
    64  		return fmt.Errorf("--output is required")
    65  	}
    66  	return nil
    67  }
    68  
    69  func main() {
    70  	flag.Parse()
    71  	beam.Init()
    72  	ctx := context.Background()
    73  	if err := preRun(); err != nil {
    74  		log.Fatal(ctx, err)
    75  	}
    76  	if err := run(ctx); err != nil {
    77  		log.Fatal(ctx, err)
    78  	}
    79  }
    80  
    81  func run(ctx context.Context) error {
    82  	p, s := beam.NewPipelineWithRoot()
    83  
    84  	in := beam.Create(s, "Ada", "Lovelace", "World", "Beam", "Senior López", "Random unicorn emoji 🦄")
    85  
    86  	out := beam.ParDo(s, &embeddedWasmFn{}, in)
    87  
    88  	textio.Write(s, *output, out)
    89  
    90  	if err := beamx.Run(ctx, p); err != nil {
    91  		return fmt.Errorf("failed to run pipeline: %v", err)
    92  	}
    93  	return nil
    94  }
    95  
    96  // Concept #2 wrap wasm function execution within a DoFn.
    97  // wasmFn wraps a DoFn to execute a Rust compiled wasm function
    98  type embeddedWasmFn struct {
    99  	r                              wazero.Runtime
   100  	mod                            api.Module
   101  	greeting, allocate, deallocate api.Function
   102  }
   103  
   104  // Setup loads and initializes the embedded wasm functions
   105  // Concept #1: Load a compiled wasm file []byte content and function.
   106  // This example is derived from
   107  // https://github.com/tetratelabs/wazero/blob/v1.0.0-pre.3/examples/allocation/rust/greet.go
   108  func (fn *embeddedWasmFn) Setup(ctx context.Context) error {
   109  	// Create a new WebAssembly Runtime.
   110  	// Typically, a defer r.Close() would be called subsequently after.  Yet, we need to keep this in memory
   111  	// throughout the DoFn lifecycle after which we invoke r.Close(); see Teardown below.
   112  	fn.r = wazero.NewRuntime(ctx)
   113  
   114  	// Instantiate a Go-defined module named "env" that exports a function to
   115  	// log to the console.
   116  	_, err := fn.r.NewHostModuleBuilder("env").
   117  		NewFunctionBuilder().WithFunc(logString).Export("log").
   118  		Instantiate(ctx)
   119  	if err != nil {
   120  		return fmt.Errorf("failed to instantiate host module: %w", err)
   121  	}
   122  
   123  	// Instantiate a WebAssembly module that imports the "log" function defined
   124  	// in "env" and exports "memory" and functions we'll use in this example.
   125  	fn.mod, err = fn.r.Instantiate(ctx, greetWasm)
   126  	if err != nil {
   127  		return fmt.Errorf("failed to instantiate wasm module: %v", err)
   128  	}
   129  
   130  	// Get references to WebAssembly functions we'll use in this example.
   131  	fn.greeting = fn.mod.ExportedFunction(wasmFunctionName)
   132  	fn.allocate = fn.mod.ExportedFunction(wasmAllocateFunctionName)
   133  	fn.deallocate = fn.mod.ExportedFunction(wasmDeallocateFunctionName)
   134  	return nil
   135  }
   136  
   137  // ProcessElement processes a string calling a wasm function written in Rust
   138  // This example is derived from
   139  // https://github.com/tetratelabs/wazero/blob/v1.0.0-pre.3/examples/allocation/rust/greet.go
   140  func (fn *embeddedWasmFn) ProcessElement(ctx context.Context, s string) (string, error) {
   141  
   142  	// We need to compute the size of s to use Rust's memory allocator.
   143  	size := uint64(len(s))
   144  
   145  	// Instead of an arbitrary memory offset, use Rust's allocator. Notice
   146  	// there is nothing string-specific in this allocation function. The same
   147  	// function could be used to pass binary serialized data to Wasm.
   148  	results, err := fn.allocate.Call(ctx, size)
   149  	if err != nil {
   150  		return "", fmt.Errorf("error calling allocate: %w", err)
   151  	}
   152  	ptr := results[0]
   153  
   154  	// This pointer was allocated by Rust, but owned by Go, So, we have to
   155  	// deallocate it when finished; defer means that this statement will be called when the function exits
   156  	defer fn.deallocate.Call(ctx, ptr, size)
   157  
   158  	// The pointer is a linear memory offset, which is where we write the value of the DoFn's input element s.
   159  	if !fn.mod.Memory().Write(uint32(ptr), []byte(s)) {
   160  		return "", fmt.Errorf("Memory.Write(%d, %d) out of range of memory size %d",
   161  			ptr, size, fn.mod.Memory().Size())
   162  	}
   163  
   164  	// Finally, we get the greeting message "Hello" concatenated to the DoFn's input element s.
   165  	// This shows how to read-back something allocated by Rust.
   166  	ptrSize, err := fn.greeting.Call(ctx, ptr, size)
   167  	resultPtr := uint32(ptrSize[0] >> 32)
   168  	resultSize := uint32(ptrSize[0])
   169  
   170  	// This pointer was allocated by Rust, but owned by Go, So, we have to
   171  	// deallocate it when finished; again defer flags Go to execute this statement when the function exits
   172  	defer fn.deallocate.Call(ctx, uint64(resultPtr), uint64(resultSize))
   173  
   174  	// The pointer is a linear memory offset, which is where we wrote the results of the string concatenation.
   175  	bytes, ok := fn.mod.Memory().Read(resultPtr, resultSize)
   176  	if !ok {
   177  		return "", fmt.Errorf("Memory.Read(%d, %d) out of range of memory size %d",
   178  			resultPtr, resultSize, fn.mod.Memory().Size())
   179  	}
   180  
   181  	// bytes contains our final result that we emit into the output PCollection
   182  	return string(bytes), nil
   183  }
   184  
   185  // Teardown the wazero.Runtime during the DoFn teardown lifecycle
   186  func (fn *embeddedWasmFn) Teardown(ctx context.Context) error {
   187  	// Typically we would proceed wazero.Runtime's Close method with Go's defer keyword, just after instantiation.
   188  	// However, we need to keep the property in memory until the end of the DoFn lifecycle
   189  	if err := fn.r.Close(ctx); err != nil {
   190  		return fmt.Errorf("failed to close runtime: %w", err)
   191  	}
   192  	return nil
   193  }
   194  
   195  // logString is an exported function to the wasm module that logs to console output.
   196  func logString(ctx context.Context, m api.Module, offset, byteCount uint32) {
   197  	buf, ok := m.Memory().Read(offset, byteCount)
   198  	if !ok {
   199  		log.Fatalf(ctx, "Memory.Read(%d, %d) out of range", offset, byteCount)
   200  	}
   201  	log.Info(ctx, string(buf))
   202  }