github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/transform/startf/transform.go (about) 1 package startf 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "io/ioutil" 8 "runtime/debug" 9 "strings" 10 11 golog "github.com/ipfs/go-log" 12 "github.com/qri-io/dataset" 13 "github.com/qri-io/dataset/preview" 14 "github.com/qri-io/qfs" 15 "github.com/qri-io/qri/dsref" 16 "github.com/qri-io/qri/event" 17 "github.com/qri-io/qri/repo" 18 stards "github.com/qri-io/qri/transform/startf/ds" 19 "github.com/qri-io/qri/version" 20 "github.com/qri-io/starlib" 21 "github.com/qri-io/starlib/dataframe" 22 "go.starlark.net/resolve" 23 "go.starlark.net/starlark" 24 "go.starlark.net/syntax" 25 ) 26 27 var ( 28 // Version is the version of qri that this transform was run with 29 Version = version.Version 30 // ErrNotDefined is for when a starlark value is not defined or does not exist 31 ErrNotDefined = fmt.Errorf("not defined") 32 // log for this package 33 log = golog.Logger("startf") 34 ) 35 36 // ExecOpts defines options for execution 37 type ExecOpts struct { 38 // loader for loading datasets 39 DatasetLoader dsref.Loader 40 // filesystem for loading scripts 41 Filesystem qfs.Filesystem 42 // supply a repo to make the 'qri' module available in starlark 43 Repo repo.Repo 44 // allow floating-point numbers 45 AllowFloat bool 46 // allow set data type 47 AllowSet bool 48 // allow lambda expressions 49 AllowLambda bool 50 // allow nested def statements 51 AllowNestedDef bool 52 // passed-in secrets (eg: API keys) 53 Secrets map[string]interface{} 54 // global values to pass for script execution 55 Globals starlark.StringDict 56 // provide a writer to record script "stderr" output to 57 ErrWriter io.Writer 58 // starlark module loader function 59 ModuleLoader ModuleLoader 60 // channel to send events on 61 EventsCh chan event.Event 62 // map containing components that have been changed 63 ChangeSet map[string]struct{} 64 // the size of the output area, for stringifying large objects 65 OutputWidth int 66 OutputHeight int 67 } 68 69 // AddDatasetLoader is required to enable the load_dataset starlark builtin 70 func AddDatasetLoader(loader dsref.Loader) func(o *ExecOpts) { 71 return func(o *ExecOpts) { 72 o.DatasetLoader = loader 73 } 74 } 75 76 // AddFilesystem adds a filesystem to the transformer 77 func AddFilesystem(fs qfs.Filesystem) func(o *ExecOpts) { 78 return func(o *ExecOpts) { 79 o.Filesystem = fs 80 } 81 } 82 83 // AddQriRepo adds a qri repo to execution options, providing scripted access 84 // to assets within the respoitory 85 func AddQriRepo(repo repo.Repo) func(o *ExecOpts) { 86 return func(o *ExecOpts) { 87 o.Repo = repo 88 } 89 } 90 91 // AddEventsChannel sets an event channel to send events on 92 func AddEventsChannel(eventsCh chan event.Event) func(o *ExecOpts) { 93 return func(o *ExecOpts) { 94 o.EventsCh = eventsCh 95 } 96 } 97 98 // SetSecrets assigns environment secret key-value pairs for script execution 99 func SetSecrets(secrets map[string]string) func(o *ExecOpts) { 100 return func(o *ExecOpts) { 101 if secrets != nil { 102 // convert to map[string]interface{}, which the lower-level startf supports 103 // until we're sure map[string]string is going to work in the majority of use cases 104 s := map[string]interface{}{} 105 for key, val := range secrets { 106 s[key] = val 107 } 108 o.Secrets = s 109 } 110 } 111 } 112 113 // SetErrWriter provides a writer to record the "stderr" diagnostic output of 114 // the transform script 115 func SetErrWriter(w io.Writer) func(o *ExecOpts) { 116 return func(o *ExecOpts) { 117 o.ErrWriter = w 118 } 119 } 120 121 // TrackChanges retains a map that tracks changes to dataset components 122 func TrackChanges(changes map[string]struct{}) func(o *ExecOpts) { 123 return func(o *ExecOpts) { 124 o.ChangeSet = changes 125 } 126 } 127 128 // SizeInfo sets the size of the area that will display output 129 func SizeInfo(outWidth, outHeight int) func(o *ExecOpts) { 130 return func(o *ExecOpts) { 131 o.OutputWidth = outWidth 132 o.OutputHeight = outHeight 133 } 134 } 135 136 // DefaultExecOpts applies default options to an ExecOpts pointer 137 func DefaultExecOpts(o *ExecOpts) { 138 o.AllowFloat = true 139 o.AllowSet = true 140 o.AllowLambda = true 141 o.Globals = starlark.StringDict{} 142 o.ErrWriter = ioutil.Discard 143 o.ModuleLoader = DefaultModuleLoader 144 } 145 146 // StepRunner is able to run individual transform steps 147 type StepRunner struct { 148 config map[string]interface{} 149 secrets map[string]interface{} 150 fs qfs.Filesystem 151 dsLoader dsref.Loader 152 stards *stards.BoundDataset 153 globals starlark.StringDict 154 eventsCh chan event.Event 155 writer io.Writer 156 thread *starlark.Thread 157 changeSet map[string]struct{} 158 commitCalled bool 159 } 160 161 // NewStepRunner returns a new StepRunner for the given dataset 162 func NewStepRunner(target *dataset.Dataset, opts ...func(o *ExecOpts)) *StepRunner { 163 o := &ExecOpts{} 164 DefaultExecOpts(o) 165 for _, opt := range opts { 166 opt(o) 167 } 168 169 // hoist execution settings to resolve package settings 170 resolve.AllowFloat = o.AllowFloat 171 resolve.AllowSet = o.AllowSet 172 resolve.AllowLambda = o.AllowLambda 173 resolve.AllowNestedDef = o.AllowNestedDef 174 resolve.LoadBindsGlobally = true 175 resolve.AllowGlobalReassign = true 176 177 // add error func to starlark environment 178 starlark.Universe["error"] = starlark.NewBuiltin("error", Error) 179 for key, val := range o.Globals { 180 starlark.Universe[key] = val 181 } 182 183 thread := &starlark.Thread{ 184 Load: o.ModuleLoader, 185 Print: func(thread *starlark.Thread, msg string) { 186 if o.EventsCh != nil { 187 o.EventsCh <- event.Event{ 188 Type: event.ETTransformPrint, 189 Payload: event.TransformMessage{ 190 Msg: msg, 191 }, 192 } 193 } 194 o.ErrWriter.Write([]byte(msg + "\n")) 195 }, 196 } 197 198 // Store the OutputConfig on the starlark thread. This allows functions 199 // such as the DataFrame constructor to get this configuration 200 outconf := dataframe.SetOutputSize(thread, o.OutputWidth, o.OutputHeight) 201 202 r := &StepRunner{ 203 config: target.Transform.Config, 204 secrets: o.Secrets, 205 fs: o.Filesystem, 206 dsLoader: o.DatasetLoader, 207 eventsCh: o.EventsCh, 208 writer: o.ErrWriter, 209 thread: thread, 210 globals: starlark.StringDict{}, 211 changeSet: o.ChangeSet, 212 } 213 r.stards = stards.NewBoundDataset(target, outconf, r.onCommit) 214 215 return r 216 } 217 218 // RunStep runs the single transform step using the dataset 219 func (r *StepRunner) RunStep(ctx context.Context, ds *dataset.Dataset, st *dataset.TransformStep) (err error) { 220 r.globals["load_dataset"] = starlark.NewBuiltin("load_dataset", r.loadDatasetFunc(ctx, ds)) 221 r.globals["dataset"] = r.stards 222 r.globals["config"] = config(r.config) 223 r.globals["secrets"] = secrets(r.secrets) 224 225 script, ok := st.Script.(string) 226 if !ok { 227 return fmt.Errorf("starlark step Script must be a string. got %T", st.Script) 228 } 229 230 // Recover from errors. 231 defer func() { 232 if r := recover(); r != nil { 233 // Need to assign to the named return value from 234 // a recovery 235 err = fmt.Errorf("running transform: %w", r) 236 log.Errorf("%s, stacktrace: %s", err, debug.Stack()) 237 } 238 }() 239 240 // Parse, resolve, and compile a Starlark source file. 241 file, mod, err := starlark.SourceProgram(fmt.Sprintf("%s.star", st.Name), strings.NewReader(script), r.globals.Has) 242 if err != nil { 243 return err 244 } 245 246 r.printFinalStatement(file) 247 248 globals, err := mod.Init(r.thread, r.globals) 249 if err != nil { 250 if evalErr, ok := err.(*starlark.EvalError); ok { 251 return fmt.Errorf(evalErr.Backtrace()) 252 } 253 return err 254 } 255 for key, val := range globals { 256 r.globals[key] = val 257 } 258 259 return 260 } 261 262 // TODO(b5): this needs to be finished 263 func (r *StepRunner) printFinalStatement(f *syntax.File) { 264 if len(f.Stmts) == 0 { 265 return 266 } 267 268 _, stepEnd := f.Span() 269 lastStmt := f.Stmts[len(f.Stmts)-1] 270 _, end := lastStmt.Span() 271 272 // only print if statment is on the last line 273 if end.Line == stepEnd.Line { 274 // r.eventsCh <- event.Event{ 275 // Type: event.ETTransformPrint, 276 // Payload: event.TransformMessage{ 277 // Msg: fmt.Sprintf("%T %#v\n", lastStmt, lastStmt), 278 // }, 279 // } 280 } 281 } 282 283 // CommitCalled returns true if the commit function has been called 284 func (r *StepRunner) CommitCalled() bool { 285 return r.commitCalled 286 } 287 288 // globalFunc checks if a global function is defined 289 func (r *StepRunner) globalFunc(name string) (fn *starlark.Function, err error) { 290 x, ok := r.globals[name] 291 if !ok { 292 return fn, ErrNotDefined 293 } 294 if x.Type() != "function" { 295 return fn, fmt.Errorf("%q is not a function", name) 296 } 297 return x.(*starlark.Function), nil 298 } 299 300 // loadDatasetFunc returns an implementation of the starlark load_dataset 301 // function 302 func (r *StepRunner) loadDatasetFunc(ctx context.Context, target *dataset.Dataset) func(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { 303 return func(thread *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { 304 var refstr starlark.String 305 if err := starlark.UnpackArgs("load_dataset", args, kwargs, "ref", &refstr); err != nil { 306 return starlark.None, err 307 } 308 309 if r.dsLoader == nil { 310 return nil, fmt.Errorf("load_datset function is not enabled") 311 } 312 313 ds, err := r.dsLoader.LoadDataset(ctx, refstr.GoString()) 314 if err != nil { 315 return starlark.None, err 316 } 317 318 if target.Transform.Resources == nil { 319 target.Transform.Resources = map[string]*dataset.TransformResource{} 320 } 321 322 target.Transform.Resources[ds.Path] = &dataset.TransformResource{ 323 // TODO(b5) - this should be a method on dataset.Dataset 324 // we should add an ID field to dataset, set that to the InitID, and 325 // add fields to dataset.TransformResource that effectively make it the 326 // same data structure as dsref.Ref 327 Path: fmt.Sprintf("%s/%s@%s", ds.Peername, ds.Name, ds.Path), 328 } 329 330 outconf, _ := thread.Local("OutputConfig").(*dataframe.OutputConfig) 331 return stards.NewDataset(ds, outconf), nil 332 } 333 } 334 335 // func (r *StepRunner) print(thread *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { 336 // var ( 337 // str string 338 // message starlark.Value 339 // ) 340 341 // if err := starlark.UnpackArgs("print", args, kwargs, "message", &message); err != nil { 342 // return starlark.None, err 343 // } 344 345 // if stringer, ok := message.(starlark.GoString) 346 // if r.eventsCh != nil { 347 // r.eventsCh <- event.Event{ 348 // Type: event.ETTransformPrint, 349 // Payload: event.TransformMessage{ 350 // Msg: message.GoString(), 351 // }, 352 // } 353 // } 354 // r.writer.Write([]byte(message.GoString() + "\n")) 355 // return starlark.None, nil 356 // } 357 358 func (r *StepRunner) onCommit(ds *stards.Dataset) error { 359 // Which components were changed 360 if r.changeSet != nil { 361 changes := ds.Changes() 362 for comp := range changes { 363 r.changeSet[comp] = changes[comp] 364 } 365 } 366 367 ctx := context.TODO() 368 if err := ds.AssignComponentsFromDataframe(ctx, r.changeSet, r.fs, r.dsLoader); err != nil { 369 return err 370 } 371 372 if r.eventsCh != nil { 373 pview, err := preview.Create(context.TODO(), ds.Dataset()) 374 if err != nil { 375 return err 376 } 377 r.eventsCh <- event.Event{Type: event.ETTransformDatasetPreview, Payload: pview} 378 } 379 r.commitCalled = true 380 return nil 381 } 382 383 // ModuleLoader is a function that can load starlark modules 384 type ModuleLoader func(thread *starlark.Thread, module string) (starlark.StringDict, error) 385 386 // DefaultModuleLoader loads starlib modules 387 var DefaultModuleLoader = func(thread *starlark.Thread, module string) (dict starlark.StringDict, err error) { 388 return starlib.Loader(thread, module) 389 } 390 391 // Error halts program execution with an error 392 func Error(thread *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { 393 var msg starlark.Value 394 if err := starlark.UnpackPositionalArgs("error", args, kwargs, 1, &msg); err != nil { 395 return nil, err 396 } 397 398 return nil, fmt.Errorf("transform error: %s", msg) 399 }