github.com/grailbio/bigslice@v0.0.0-20230519005545-30c4c12152ad/func.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 package bigslice 6 7 import ( 8 "fmt" 9 "reflect" 10 "runtime" 11 "strings" 12 "sync/atomic" 13 14 "github.com/grailbio/bigslice/typecheck" 15 ) 16 17 var typeOfSlice = reflect.TypeOf((*Slice)(nil)).Elem() 18 19 var ( 20 // Funcs is the global registry of funcs. We rely on deterministic 21 // registration order. (This is guaranteed by Go's variable 22 // initialization for a single compiler, which is sufficient for our 23 // use.) It would definitely be nice to have a nicer way of doing 24 // this (without the overhead of users minting their own names). 25 funcs []*FuncValue 26 // FuncsBusy is used to detect data races in registration. 27 funcsBusy int32 28 ) 29 30 // A FuncValue represents a Bigslice function, as returned by Func. 31 type FuncValue struct { 32 fn reflect.Value 33 args []reflect.Type 34 index int 35 exclusive bool 36 37 // file and line are the location at which the function was defined. 38 file string 39 line int 40 } 41 42 // Exclusive marks this func to require mutually exclusive machine 43 // allocation. 44 // 45 // NOTE: This is an experimental API that may change. 46 func (f *FuncValue) Exclusive() *FuncValue { 47 fv := new(FuncValue) 48 *fv = *f 49 fv.exclusive = true 50 return fv 51 } 52 53 // NumIn returns the number of input arguments to f. 54 func (f *FuncValue) NumIn() int { return len(f.args) } 55 56 // In returns the i'th argument type of function f. 57 func (f *FuncValue) In(i int) reflect.Type { return f.args[i] } 58 59 // Invocation creates an invocation representing the function f 60 // applied to the provided arguments. Invocation panics with a type 61 // error if the provided arguments do not match in type or arity. 62 func (f *FuncValue) Invocation(location string, args ...interface{}) Invocation { 63 argTypes := make([]reflect.Type, len(args)) 64 for i, arg := range args { 65 argTypes[i] = reflect.TypeOf(arg) 66 } 67 f.typecheck(argTypes...) 68 return newInvocation(location, uint64(f.index), f.exclusive, args...) 69 } 70 71 // Apply invokes the function f with the provided arguments, 72 // returning the computed Slice. Apply panics with a type error if 73 // argument type or arity do not match. 74 func (f *FuncValue) Apply(args ...interface{}) Slice { 75 argv := make([]reflect.Value, len(args)) 76 for i := range argv { 77 argv[i] = reflect.ValueOf(args[i]) 78 } 79 return f.applyValue(argv) 80 } 81 82 func (f *FuncValue) applyValue(args []reflect.Value) Slice { 83 argTypes := make([]reflect.Type, len(args)) 84 for i, arg := range args { 85 if !arg.IsValid() { 86 if !isNilAssignable(f.args[i]) { 87 // Untyped nil argument for type that cannot be nil. 88 typecheck.Panicf(2, "cannot use nil as type %s in argument to function", f.args[i]) 89 } 90 argTypes[i] = f.args[i] 91 args[i] = reflect.Zero(f.args[i]) 92 continue 93 } 94 argTypes[i] = arg.Type() 95 } 96 f.typecheck(argTypes...) 97 out := f.fn.Call(args) 98 return out[0].Interface().(Slice) 99 } 100 101 func isNilAssignable(typ reflect.Type) bool { 102 switch typ.Kind() { 103 case reflect.Chan: 104 case reflect.Func: 105 case reflect.Interface: 106 case reflect.Map: 107 case reflect.Ptr: 108 case reflect.Slice: 109 case reflect.UnsafePointer: 110 default: 111 return false 112 } 113 return true 114 } 115 116 func (f *FuncValue) typecheck(args ...reflect.Type) { 117 if len(args) != len(f.args) { 118 typecheck.Panicf(2, "wrong number of arguments: function takes %d arguments, got %d", 119 len(f.args), len(args)) 120 } 121 for i := range args { 122 expect, have := f.args[i], args[i] 123 if have == nil { 124 if !isNilAssignable(expect) { 125 typecheck.Panicf(2, "wrong type for argument %d: %s cannot be nil", i, expect) 126 } 127 continue 128 } 129 switch expect.Kind() { 130 case reflect.Interface: 131 if !have.Implements(expect) { 132 typecheck.Panicf(2, "wrong type for argument %d: type %s does not implement interface %s", i, have, expect) 133 } 134 default: 135 if have != expect { 136 typecheck.Panicf(2, "wrong type for argument %d: expected %s, got %s", i, expect, have) 137 } 138 } 139 } 140 } 141 142 // Func creates a bigslice function from the provided function value. Bigslice 143 // funcs must return a single Slice value. 144 // 145 // All calls to Func must happen before exec.Start is called (and occur in 146 // deterministic order). This rule is easy to follow by making all Func calls 147 // occur in global variable initialization, with exec.Start called from the 148 // program's main function, e.g.: 149 // 150 // var myFunc = bigslice.Func(...) 151 // 152 // func main() { 153 // sess, err := exec.Start() 154 // ... 155 // } 156 // 157 // Funcs provide bigslice with a means of dynamic abstraction: since Funcs can 158 // be invoked remotely, dynamically created slices may be named across process 159 // boundaries. 160 func Func(fn interface{}) *FuncValue { 161 fv := reflect.ValueOf(fn) 162 ftype := fv.Type() 163 if ftype.Kind() != reflect.Func { 164 typecheck.Panicf(1, "bigslice.Func: argument to func is a %T, not a func", fn) 165 } 166 if ftype.NumOut() != 1 || ftype.Out(0) != typeOfSlice { 167 typecheck.Panicf(1, "bigslice.Func: func must return a single bigslice.Slice") 168 } 169 v := new(FuncValue) 170 v.fn = fv 171 for i := 0; i < ftype.NumIn(); i++ { 172 typ := ftype.In(i) 173 v.args = append(v.args, typ) 174 } 175 if atomic.AddInt32(&funcsBusy, 1) != 1 { 176 panic("bigslice.Func: data race") 177 } 178 v.index = len(funcs) 179 funcs = append(funcs, v) 180 if atomic.AddInt32(&funcsBusy, -1) != 0 { 181 panic("bigslice.Func: data race") 182 } 183 _, v.file, v.line, _ = runtime.Caller(1) 184 return v 185 } 186 187 // FuncByIndex returns the *FuncValue, created by Func, with the given index. 188 // We use this to address funcs across process boundaries, as we serialize the 189 // index for the receiver to look up in its address space. This function must 190 // not be called concurrently with Func. 191 func FuncByIndex(i uint64) *FuncValue { 192 return funcs[i] 193 } 194 195 // FuncLocations returns a slice of strings that describe the locations of 196 // Func creation, in the same order as the Funcs registry. We use this to 197 // verify that worker processes have the same Funcs. Note that this is not a 198 // precisely correct verification, as it's possible to define multiple Funcs on 199 // the same line. However, it's good enough for the scenarios we have 200 // encountered or anticipate. 201 func FuncLocations() []string { 202 locs := make([]string, len(funcs)) 203 for i, f := range funcs { 204 locs[i] = fmt.Sprintf("%s:%d", f.file, f.line) 205 } 206 return locs 207 } 208 209 // Invocation represents an invocation of a Bigslice func of the same 210 // binary. Invocations can be transmitted across process boundaries 211 // and thus may be invoked by remote executors. 212 // 213 // Each invocation carries an invocation index, which is a unique index 214 // for invocations within a process namespace. It can thus be used to 215 // represent a particular function invocation from a driver process. 216 // 217 // Invocations must be created by newInvocation. 218 type Invocation struct { 219 // Index is the unique index of this invocation. Is is always >= 1. 220 Index uint64 221 Func uint64 222 Args []interface{} 223 Exclusive bool 224 Location string 225 } 226 227 func (inv Invocation) String() string { 228 args := make([]string, len(inv.Args)) 229 for i := range args { 230 args[i] = fmt.Sprint(inv.Args[i]) 231 } 232 return fmt.Sprintf( 233 "%s func:%d invocation:%d args:(%s)", 234 inv.Location, 235 inv.Func, 236 inv.Index, 237 strings.Join(args, ", "), 238 ) 239 } 240 241 var invocationIndex uint64 242 243 func newInvocation(location string, fn uint64, exclusive bool, args ...interface{}) Invocation { 244 return Invocation{ 245 Index: atomic.AddUint64(&invocationIndex, 1), 246 Func: fn, 247 Args: args, 248 Exclusive: exclusive, 249 Location: location, 250 } 251 } 252 253 // Invoke performs the Func invocation represented by this Invocation instance, 254 // returning the resulting slice. This method must not be called concurrently 255 // with Func. 256 func (i Invocation) Invoke() Slice { 257 return funcs[i.Func].Apply(i.Args...) 258 } 259 260 // FuncLocationsDiff returns a slice of strings that describes the differences 261 // between lhs and rhs locations slices as returned by FuncLocations. The slice 262 // is a unified diff between the slices, so if you print each element on a 263 // line, you'll get interpretable output. For example: 264 // 265 // for _, edit := FuncLocationsDiff([]string{"a", "b", "c"}, []string{"a", "c"}) { 266 // fmt.Println(edit) 267 // } 268 // 269 // will produce: 270 // 271 // a 272 // - b 273 // c 274 // 275 // If the slices are identical, it returns nil. 276 func FuncLocationsDiff(lhs, rhs []string) []string { 277 // This is a vanilla Levenshtein distance implementation. 278 const ( 279 editNone = iota 280 editAdd 281 editDel 282 ) 283 type cell struct { 284 edit int 285 cost int 286 } 287 cells := make([][]cell, len(lhs)+1) 288 for i := range cells { 289 cells[i] = make([]cell, len(rhs)+1) 290 } 291 for i := 1; i < len(lhs)+1; i++ { 292 cells[i][0].edit = editDel 293 cells[i][0].cost = i 294 } 295 for j := 1; j < len(rhs)+1; j++ { 296 cells[0][j].edit = editAdd 297 cells[0][j].cost = j 298 } 299 for i := 1; i < len(lhs)+1; i++ { 300 for j := 1; j < len(rhs)+1; j++ { 301 switch { 302 case lhs[i-1] == rhs[j-1]: 303 cells[i][j].cost = cells[i-1][j-1].cost 304 // No replacement, as we want to represent it as 305 // deletion-then-addition in our unified diff output anyway. 306 case cells[i-1][j].cost < cells[i][j-1].cost: 307 cells[i][j].edit = editDel 308 cells[i][j].cost = cells[i-1][j].cost + 1 309 default: 310 cells[i][j].edit = editAdd 311 cells[i][j].cost = cells[i][j-1].cost + 1 312 } 313 } 314 } 315 var ( 316 d []string 317 differ bool 318 ) 319 for i, j := len(lhs), len(rhs); i > 0 || j > 0; { 320 switch cells[i][j].edit { 321 case editNone: 322 d = append(d, lhs[i-1]) 323 i -= 1 324 j -= 1 325 case editAdd: 326 d = append(d, "+ "+rhs[j-1]) 327 j -= 1 328 differ = true 329 case editDel: 330 d = append(d, "- "+lhs[i-1]) 331 i -= 1 332 differ = true 333 } 334 } 335 if !differ { 336 return nil 337 } 338 for i := len(d)/2 - 1; i >= 0; i-- { 339 opp := len(d) - 1 - i 340 d[i], d[opp] = d[opp], d[i] 341 } 342 return d 343 }