github.com/grailbio/bigslice@v0.0.0-20230519005545-30c4c12152ad/func.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package bigslice
     6  
     7  import (
     8  	"fmt"
     9  	"reflect"
    10  	"runtime"
    11  	"strings"
    12  	"sync/atomic"
    13  
    14  	"github.com/grailbio/bigslice/typecheck"
    15  )
    16  
    17  var typeOfSlice = reflect.TypeOf((*Slice)(nil)).Elem()
    18  
    19  var (
    20  	// Funcs is the global registry of funcs. We rely on deterministic
    21  	// registration order. (This is guaranteed by Go's variable
    22  	// initialization for a single compiler, which is sufficient for our
    23  	// use.) It would definitely be nice to have a nicer way of doing
    24  	// this (without the overhead of users minting their own names).
    25  	funcs []*FuncValue
    26  	// FuncsBusy is used to detect data races in registration.
    27  	funcsBusy int32
    28  )
    29  
    30  // A FuncValue represents a Bigslice function, as returned by Func.
    31  type FuncValue struct {
    32  	fn        reflect.Value
    33  	args      []reflect.Type
    34  	index     int
    35  	exclusive bool
    36  
    37  	// file and line are the location at which the function was defined.
    38  	file string
    39  	line int
    40  }
    41  
    42  // Exclusive marks this func to require mutually exclusive machine
    43  // allocation.
    44  //
    45  // NOTE: This is an experimental API that may change.
    46  func (f *FuncValue) Exclusive() *FuncValue {
    47  	fv := new(FuncValue)
    48  	*fv = *f
    49  	fv.exclusive = true
    50  	return fv
    51  }
    52  
    53  // NumIn returns the number of input arguments to f.
    54  func (f *FuncValue) NumIn() int { return len(f.args) }
    55  
    56  // In returns the i'th argument type of function f.
    57  func (f *FuncValue) In(i int) reflect.Type { return f.args[i] }
    58  
    59  // Invocation creates an invocation representing the function f
    60  // applied to the provided arguments. Invocation panics with a type
    61  // error if the provided arguments do not match in type or arity.
    62  func (f *FuncValue) Invocation(location string, args ...interface{}) Invocation {
    63  	argTypes := make([]reflect.Type, len(args))
    64  	for i, arg := range args {
    65  		argTypes[i] = reflect.TypeOf(arg)
    66  	}
    67  	f.typecheck(argTypes...)
    68  	return newInvocation(location, uint64(f.index), f.exclusive, args...)
    69  }
    70  
    71  // Apply invokes the function f with the provided arguments,
    72  // returning the computed Slice. Apply panics with a type error if
    73  // argument type or arity do not match.
    74  func (f *FuncValue) Apply(args ...interface{}) Slice {
    75  	argv := make([]reflect.Value, len(args))
    76  	for i := range argv {
    77  		argv[i] = reflect.ValueOf(args[i])
    78  	}
    79  	return f.applyValue(argv)
    80  }
    81  
    82  func (f *FuncValue) applyValue(args []reflect.Value) Slice {
    83  	argTypes := make([]reflect.Type, len(args))
    84  	for i, arg := range args {
    85  		if !arg.IsValid() {
    86  			if !isNilAssignable(f.args[i]) {
    87  				// Untyped nil argument for type that cannot be nil.
    88  				typecheck.Panicf(2, "cannot use nil as type %s in argument to function", f.args[i])
    89  			}
    90  			argTypes[i] = f.args[i]
    91  			args[i] = reflect.Zero(f.args[i])
    92  			continue
    93  		}
    94  		argTypes[i] = arg.Type()
    95  	}
    96  	f.typecheck(argTypes...)
    97  	out := f.fn.Call(args)
    98  	return out[0].Interface().(Slice)
    99  }
   100  
   101  func isNilAssignable(typ reflect.Type) bool {
   102  	switch typ.Kind() {
   103  	case reflect.Chan:
   104  	case reflect.Func:
   105  	case reflect.Interface:
   106  	case reflect.Map:
   107  	case reflect.Ptr:
   108  	case reflect.Slice:
   109  	case reflect.UnsafePointer:
   110  	default:
   111  		return false
   112  	}
   113  	return true
   114  }
   115  
   116  func (f *FuncValue) typecheck(args ...reflect.Type) {
   117  	if len(args) != len(f.args) {
   118  		typecheck.Panicf(2, "wrong number of arguments: function takes %d arguments, got %d",
   119  			len(f.args), len(args))
   120  	}
   121  	for i := range args {
   122  		expect, have := f.args[i], args[i]
   123  		if have == nil {
   124  			if !isNilAssignable(expect) {
   125  				typecheck.Panicf(2, "wrong type for argument %d: %s cannot be nil", i, expect)
   126  			}
   127  			continue
   128  		}
   129  		switch expect.Kind() {
   130  		case reflect.Interface:
   131  			if !have.Implements(expect) {
   132  				typecheck.Panicf(2, "wrong type for argument %d: type %s does not implement interface %s", i, have, expect)
   133  			}
   134  		default:
   135  			if have != expect {
   136  				typecheck.Panicf(2, "wrong type for argument %d: expected %s, got %s", i, expect, have)
   137  			}
   138  		}
   139  	}
   140  }
   141  
   142  // Func creates a bigslice function from the provided function value. Bigslice
   143  // funcs must return a single Slice value.
   144  //
   145  // All calls to Func must happen before exec.Start is called (and occur in
   146  // deterministic order). This rule is easy to follow by making all Func calls
   147  // occur in global variable initialization, with exec.Start called from the
   148  // program's main function, e.g.:
   149  //
   150  //  var myFunc = bigslice.Func(...)
   151  //
   152  //  func main() {
   153  //   	sess, err := exec.Start()
   154  //   	...
   155  //  }
   156  //
   157  // Funcs provide bigslice with a means of dynamic abstraction: since Funcs can
   158  // be invoked remotely, dynamically created slices may be named across process
   159  // boundaries.
   160  func Func(fn interface{}) *FuncValue {
   161  	fv := reflect.ValueOf(fn)
   162  	ftype := fv.Type()
   163  	if ftype.Kind() != reflect.Func {
   164  		typecheck.Panicf(1, "bigslice.Func: argument to func is a %T, not a func", fn)
   165  	}
   166  	if ftype.NumOut() != 1 || ftype.Out(0) != typeOfSlice {
   167  		typecheck.Panicf(1, "bigslice.Func: func must return a single bigslice.Slice")
   168  	}
   169  	v := new(FuncValue)
   170  	v.fn = fv
   171  	for i := 0; i < ftype.NumIn(); i++ {
   172  		typ := ftype.In(i)
   173  		v.args = append(v.args, typ)
   174  	}
   175  	if atomic.AddInt32(&funcsBusy, 1) != 1 {
   176  		panic("bigslice.Func: data race")
   177  	}
   178  	v.index = len(funcs)
   179  	funcs = append(funcs, v)
   180  	if atomic.AddInt32(&funcsBusy, -1) != 0 {
   181  		panic("bigslice.Func: data race")
   182  	}
   183  	_, v.file, v.line, _ = runtime.Caller(1)
   184  	return v
   185  }
   186  
   187  // FuncByIndex returns the *FuncValue, created by Func, with the given index.
   188  // We use this to address funcs across process boundaries, as we serialize the
   189  // index for the receiver to look up in its address space. This function must
   190  // not be called concurrently with Func.
   191  func FuncByIndex(i uint64) *FuncValue {
   192  	return funcs[i]
   193  }
   194  
   195  // FuncLocations returns a slice of strings that describe the locations of
   196  // Func creation, in the same order as the Funcs registry. We use this to
   197  // verify that worker processes have the same Funcs. Note that this is not a
   198  // precisely correct verification, as it's possible to define multiple Funcs on
   199  // the same line. However, it's good enough for the scenarios we have
   200  // encountered or anticipate.
   201  func FuncLocations() []string {
   202  	locs := make([]string, len(funcs))
   203  	for i, f := range funcs {
   204  		locs[i] = fmt.Sprintf("%s:%d", f.file, f.line)
   205  	}
   206  	return locs
   207  }
   208  
   209  // Invocation represents an invocation of a Bigslice func of the same
   210  // binary. Invocations can be transmitted across process boundaries
   211  // and thus may be invoked by remote executors.
   212  //
   213  // Each invocation carries an invocation index, which is a unique index
   214  // for invocations within a process namespace. It can thus be used to
   215  // represent a particular function invocation from a driver process.
   216  //
   217  // Invocations must be created by newInvocation.
   218  type Invocation struct {
   219  	// Index is the unique index of this invocation. Is is always >= 1.
   220  	Index     uint64
   221  	Func      uint64
   222  	Args      []interface{}
   223  	Exclusive bool
   224  	Location  string
   225  }
   226  
   227  func (inv Invocation) String() string {
   228  	args := make([]string, len(inv.Args))
   229  	for i := range args {
   230  		args[i] = fmt.Sprint(inv.Args[i])
   231  	}
   232  	return fmt.Sprintf(
   233  		"%s func:%d invocation:%d args:(%s)",
   234  		inv.Location,
   235  		inv.Func,
   236  		inv.Index,
   237  		strings.Join(args, ", "),
   238  	)
   239  }
   240  
   241  var invocationIndex uint64
   242  
   243  func newInvocation(location string, fn uint64, exclusive bool, args ...interface{}) Invocation {
   244  	return Invocation{
   245  		Index:     atomic.AddUint64(&invocationIndex, 1),
   246  		Func:      fn,
   247  		Args:      args,
   248  		Exclusive: exclusive,
   249  		Location:  location,
   250  	}
   251  }
   252  
   253  // Invoke performs the Func invocation represented by this Invocation instance,
   254  // returning the resulting slice. This method must not be called concurrently
   255  // with Func.
   256  func (i Invocation) Invoke() Slice {
   257  	return funcs[i.Func].Apply(i.Args...)
   258  }
   259  
   260  // FuncLocationsDiff returns a slice of strings that describes the differences
   261  // between lhs and rhs locations slices as returned by FuncLocations. The slice
   262  // is a unified diff between the slices, so if you print each element on a
   263  // line, you'll get interpretable output. For example:
   264  //
   265  //  for _, edit := FuncLocationsDiff([]string{"a", "b", "c"}, []string{"a", "c"}) {
   266  //      fmt.Println(edit)
   267  //  }
   268  //
   269  // will produce:
   270  //
   271  //  a
   272  //  - b
   273  //  c
   274  //
   275  // If the slices are identical, it returns nil.
   276  func FuncLocationsDiff(lhs, rhs []string) []string {
   277  	// This is a vanilla Levenshtein distance implementation.
   278  	const (
   279  		editNone = iota
   280  		editAdd
   281  		editDel
   282  	)
   283  	type cell struct {
   284  		edit int
   285  		cost int
   286  	}
   287  	cells := make([][]cell, len(lhs)+1)
   288  	for i := range cells {
   289  		cells[i] = make([]cell, len(rhs)+1)
   290  	}
   291  	for i := 1; i < len(lhs)+1; i++ {
   292  		cells[i][0].edit = editDel
   293  		cells[i][0].cost = i
   294  	}
   295  	for j := 1; j < len(rhs)+1; j++ {
   296  		cells[0][j].edit = editAdd
   297  		cells[0][j].cost = j
   298  	}
   299  	for i := 1; i < len(lhs)+1; i++ {
   300  		for j := 1; j < len(rhs)+1; j++ {
   301  			switch {
   302  			case lhs[i-1] == rhs[j-1]:
   303  				cells[i][j].cost = cells[i-1][j-1].cost
   304  			// No replacement, as we want to represent it as
   305  			// deletion-then-addition in our unified diff output anyway.
   306  			case cells[i-1][j].cost < cells[i][j-1].cost:
   307  				cells[i][j].edit = editDel
   308  				cells[i][j].cost = cells[i-1][j].cost + 1
   309  			default:
   310  				cells[i][j].edit = editAdd
   311  				cells[i][j].cost = cells[i][j-1].cost + 1
   312  			}
   313  		}
   314  	}
   315  	var (
   316  		d      []string
   317  		differ bool
   318  	)
   319  	for i, j := len(lhs), len(rhs); i > 0 || j > 0; {
   320  		switch cells[i][j].edit {
   321  		case editNone:
   322  			d = append(d, lhs[i-1])
   323  			i -= 1
   324  			j -= 1
   325  		case editAdd:
   326  			d = append(d, "+ "+rhs[j-1])
   327  			j -= 1
   328  			differ = true
   329  		case editDel:
   330  			d = append(d, "- "+lhs[i-1])
   331  			i -= 1
   332  			differ = true
   333  		}
   334  	}
   335  	if !differ {
   336  		return nil
   337  	}
   338  	for i := len(d)/2 - 1; i >= 0; i-- {
   339  		opp := len(d) - 1 - i
   340  		d[i], d[opp] = d[opp], d[i]
   341  	}
   342  	return d
   343  }