github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/transform/startf/transform.go (about)

     1  package startf
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"io/ioutil"
     8  	"runtime/debug"
     9  	"strings"
    10  
    11  	golog "github.com/ipfs/go-log"
    12  	"github.com/qri-io/dataset"
    13  	"github.com/qri-io/dataset/preview"
    14  	"github.com/qri-io/qfs"
    15  	"github.com/qri-io/qri/dsref"
    16  	"github.com/qri-io/qri/event"
    17  	"github.com/qri-io/qri/repo"
    18  	stards "github.com/qri-io/qri/transform/startf/ds"
    19  	"github.com/qri-io/qri/version"
    20  	"github.com/qri-io/starlib"
    21  	"github.com/qri-io/starlib/dataframe"
    22  	"go.starlark.net/resolve"
    23  	"go.starlark.net/starlark"
    24  	"go.starlark.net/syntax"
    25  )
    26  
    27  var (
    28  	// Version is the version of qri that this transform was run with
    29  	Version = version.Version
    30  	// ErrNotDefined is for when a starlark value is not defined or does not exist
    31  	ErrNotDefined = fmt.Errorf("not defined")
    32  	// log for this package
    33  	log = golog.Logger("startf")
    34  )
    35  
    36  // ExecOpts defines options for execution
    37  type ExecOpts struct {
    38  	// loader for loading datasets
    39  	DatasetLoader dsref.Loader
    40  	// filesystem for loading scripts
    41  	Filesystem qfs.Filesystem
    42  	// supply a repo to make the 'qri' module available in starlark
    43  	Repo repo.Repo
    44  	// allow floating-point numbers
    45  	AllowFloat bool
    46  	// allow set data type
    47  	AllowSet bool
    48  	// allow lambda expressions
    49  	AllowLambda bool
    50  	// allow nested def statements
    51  	AllowNestedDef bool
    52  	// passed-in secrets (eg: API keys)
    53  	Secrets map[string]interface{}
    54  	// global values to pass for script execution
    55  	Globals starlark.StringDict
    56  	// provide a writer to record script "stderr" output to
    57  	ErrWriter io.Writer
    58  	// starlark module loader function
    59  	ModuleLoader ModuleLoader
    60  	// channel to send events on
    61  	EventsCh chan event.Event
    62  	// map containing components that have been changed
    63  	ChangeSet map[string]struct{}
    64  	// the size of the output area, for stringifying large objects
    65  	OutputWidth  int
    66  	OutputHeight int
    67  }
    68  
    69  // AddDatasetLoader is required to enable the load_dataset starlark builtin
    70  func AddDatasetLoader(loader dsref.Loader) func(o *ExecOpts) {
    71  	return func(o *ExecOpts) {
    72  		o.DatasetLoader = loader
    73  	}
    74  }
    75  
    76  // AddFilesystem adds a filesystem to the transformer
    77  func AddFilesystem(fs qfs.Filesystem) func(o *ExecOpts) {
    78  	return func(o *ExecOpts) {
    79  		o.Filesystem = fs
    80  	}
    81  }
    82  
    83  // AddQriRepo adds a qri repo to execution options, providing scripted access
    84  // to assets within the respoitory
    85  func AddQriRepo(repo repo.Repo) func(o *ExecOpts) {
    86  	return func(o *ExecOpts) {
    87  		o.Repo = repo
    88  	}
    89  }
    90  
    91  // AddEventsChannel sets an event channel to send events on
    92  func AddEventsChannel(eventsCh chan event.Event) func(o *ExecOpts) {
    93  	return func(o *ExecOpts) {
    94  		o.EventsCh = eventsCh
    95  	}
    96  }
    97  
    98  // SetSecrets assigns environment secret key-value pairs for script execution
    99  func SetSecrets(secrets map[string]string) func(o *ExecOpts) {
   100  	return func(o *ExecOpts) {
   101  		if secrets != nil {
   102  			// convert to map[string]interface{}, which the lower-level startf supports
   103  			// until we're sure map[string]string is going to work in the majority of use cases
   104  			s := map[string]interface{}{}
   105  			for key, val := range secrets {
   106  				s[key] = val
   107  			}
   108  			o.Secrets = s
   109  		}
   110  	}
   111  }
   112  
   113  // SetErrWriter provides a writer to record the "stderr" diagnostic output of
   114  // the transform script
   115  func SetErrWriter(w io.Writer) func(o *ExecOpts) {
   116  	return func(o *ExecOpts) {
   117  		o.ErrWriter = w
   118  	}
   119  }
   120  
   121  // TrackChanges retains a map that tracks changes to dataset components
   122  func TrackChanges(changes map[string]struct{}) func(o *ExecOpts) {
   123  	return func(o *ExecOpts) {
   124  		o.ChangeSet = changes
   125  	}
   126  }
   127  
   128  // SizeInfo sets the size of the area that will display output
   129  func SizeInfo(outWidth, outHeight int) func(o *ExecOpts) {
   130  	return func(o *ExecOpts) {
   131  		o.OutputWidth = outWidth
   132  		o.OutputHeight = outHeight
   133  	}
   134  }
   135  
   136  // DefaultExecOpts applies default options to an ExecOpts pointer
   137  func DefaultExecOpts(o *ExecOpts) {
   138  	o.AllowFloat = true
   139  	o.AllowSet = true
   140  	o.AllowLambda = true
   141  	o.Globals = starlark.StringDict{}
   142  	o.ErrWriter = ioutil.Discard
   143  	o.ModuleLoader = DefaultModuleLoader
   144  }
   145  
   146  // StepRunner is able to run individual transform steps
   147  type StepRunner struct {
   148  	config       map[string]interface{}
   149  	secrets      map[string]interface{}
   150  	fs           qfs.Filesystem
   151  	dsLoader     dsref.Loader
   152  	stards       *stards.BoundDataset
   153  	globals      starlark.StringDict
   154  	eventsCh     chan event.Event
   155  	writer       io.Writer
   156  	thread       *starlark.Thread
   157  	changeSet    map[string]struct{}
   158  	commitCalled bool
   159  }
   160  
   161  // NewStepRunner returns a new StepRunner for the given dataset
   162  func NewStepRunner(target *dataset.Dataset, opts ...func(o *ExecOpts)) *StepRunner {
   163  	o := &ExecOpts{}
   164  	DefaultExecOpts(o)
   165  	for _, opt := range opts {
   166  		opt(o)
   167  	}
   168  
   169  	// hoist execution settings to resolve package settings
   170  	resolve.AllowFloat = o.AllowFloat
   171  	resolve.AllowSet = o.AllowSet
   172  	resolve.AllowLambda = o.AllowLambda
   173  	resolve.AllowNestedDef = o.AllowNestedDef
   174  	resolve.LoadBindsGlobally = true
   175  	resolve.AllowGlobalReassign = true
   176  
   177  	// add error func to starlark environment
   178  	starlark.Universe["error"] = starlark.NewBuiltin("error", Error)
   179  	for key, val := range o.Globals {
   180  		starlark.Universe[key] = val
   181  	}
   182  
   183  	thread := &starlark.Thread{
   184  		Load: o.ModuleLoader,
   185  		Print: func(thread *starlark.Thread, msg string) {
   186  			if o.EventsCh != nil {
   187  				o.EventsCh <- event.Event{
   188  					Type: event.ETTransformPrint,
   189  					Payload: event.TransformMessage{
   190  						Msg: msg,
   191  					},
   192  				}
   193  			}
   194  			o.ErrWriter.Write([]byte(msg + "\n"))
   195  		},
   196  	}
   197  
   198  	// Store the OutputConfig on the starlark thread. This allows functions
   199  	// such as the DataFrame constructor to get this configuration
   200  	outconf := dataframe.SetOutputSize(thread, o.OutputWidth, o.OutputHeight)
   201  
   202  	r := &StepRunner{
   203  		config:    target.Transform.Config,
   204  		secrets:   o.Secrets,
   205  		fs:        o.Filesystem,
   206  		dsLoader:  o.DatasetLoader,
   207  		eventsCh:  o.EventsCh,
   208  		writer:    o.ErrWriter,
   209  		thread:    thread,
   210  		globals:   starlark.StringDict{},
   211  		changeSet: o.ChangeSet,
   212  	}
   213  	r.stards = stards.NewBoundDataset(target, outconf, r.onCommit)
   214  
   215  	return r
   216  }
   217  
   218  // RunStep runs the single transform step using the dataset
   219  func (r *StepRunner) RunStep(ctx context.Context, ds *dataset.Dataset, st *dataset.TransformStep) (err error) {
   220  	r.globals["load_dataset"] = starlark.NewBuiltin("load_dataset", r.loadDatasetFunc(ctx, ds))
   221  	r.globals["dataset"] = r.stards
   222  	r.globals["config"] = config(r.config)
   223  	r.globals["secrets"] = secrets(r.secrets)
   224  
   225  	script, ok := st.Script.(string)
   226  	if !ok {
   227  		return fmt.Errorf("starlark step Script must be a string. got %T", st.Script)
   228  	}
   229  
   230  	// Recover from errors.
   231  	defer func() {
   232  		if r := recover(); r != nil {
   233  			// Need to assign to the named return value from
   234  			// a recovery
   235  			err = fmt.Errorf("running transform: %w", r)
   236  			log.Errorf("%s, stacktrace: %s", err, debug.Stack())
   237  		}
   238  	}()
   239  
   240  	// Parse, resolve, and compile a Starlark source file.
   241  	file, mod, err := starlark.SourceProgram(fmt.Sprintf("%s.star", st.Name), strings.NewReader(script), r.globals.Has)
   242  	if err != nil {
   243  		return err
   244  	}
   245  
   246  	r.printFinalStatement(file)
   247  
   248  	globals, err := mod.Init(r.thread, r.globals)
   249  	if err != nil {
   250  		if evalErr, ok := err.(*starlark.EvalError); ok {
   251  			return fmt.Errorf(evalErr.Backtrace())
   252  		}
   253  		return err
   254  	}
   255  	for key, val := range globals {
   256  		r.globals[key] = val
   257  	}
   258  
   259  	return
   260  }
   261  
   262  // TODO(b5): this needs to be finished
   263  func (r *StepRunner) printFinalStatement(f *syntax.File) {
   264  	if len(f.Stmts) == 0 {
   265  		return
   266  	}
   267  
   268  	_, stepEnd := f.Span()
   269  	lastStmt := f.Stmts[len(f.Stmts)-1]
   270  	_, end := lastStmt.Span()
   271  
   272  	// only print if statment is on the last line
   273  	if end.Line == stepEnd.Line {
   274  		// r.eventsCh <- event.Event{
   275  		// 	Type: event.ETTransformPrint,
   276  		// 	Payload: event.TransformMessage{
   277  		// 		Msg: fmt.Sprintf("%T %#v\n", lastStmt, lastStmt),
   278  		// 	},
   279  		// }
   280  	}
   281  }
   282  
   283  // CommitCalled returns true if the commit function has been called
   284  func (r *StepRunner) CommitCalled() bool {
   285  	return r.commitCalled
   286  }
   287  
   288  // globalFunc checks if a global function is defined
   289  func (r *StepRunner) globalFunc(name string) (fn *starlark.Function, err error) {
   290  	x, ok := r.globals[name]
   291  	if !ok {
   292  		return fn, ErrNotDefined
   293  	}
   294  	if x.Type() != "function" {
   295  		return fn, fmt.Errorf("%q is not a function", name)
   296  	}
   297  	return x.(*starlark.Function), nil
   298  }
   299  
   300  // loadDatasetFunc returns an implementation of the starlark load_dataset
   301  // function
   302  func (r *StepRunner) loadDatasetFunc(ctx context.Context, target *dataset.Dataset) func(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
   303  	return func(thread *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
   304  		var refstr starlark.String
   305  		if err := starlark.UnpackArgs("load_dataset", args, kwargs, "ref", &refstr); err != nil {
   306  			return starlark.None, err
   307  		}
   308  
   309  		if r.dsLoader == nil {
   310  			return nil, fmt.Errorf("load_datset function is not enabled")
   311  		}
   312  
   313  		ds, err := r.dsLoader.LoadDataset(ctx, refstr.GoString())
   314  		if err != nil {
   315  			return starlark.None, err
   316  		}
   317  
   318  		if target.Transform.Resources == nil {
   319  			target.Transform.Resources = map[string]*dataset.TransformResource{}
   320  		}
   321  
   322  		target.Transform.Resources[ds.Path] = &dataset.TransformResource{
   323  			// TODO(b5) - this should be a method on dataset.Dataset
   324  			// we should add an ID field to dataset, set that to the InitID, and
   325  			// add fields to dataset.TransformResource that effectively make it the
   326  			// same data structure as dsref.Ref
   327  			Path: fmt.Sprintf("%s/%s@%s", ds.Peername, ds.Name, ds.Path),
   328  		}
   329  
   330  		outconf, _ := thread.Local("OutputConfig").(*dataframe.OutputConfig)
   331  		return stards.NewDataset(ds, outconf), nil
   332  	}
   333  }
   334  
   335  // func (r *StepRunner) print(thread *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
   336  // 	var (
   337  // 		str string
   338  // 		message starlark.Value
   339  // 	)
   340  
   341  // 	if err := starlark.UnpackArgs("print", args, kwargs, "message", &message); err != nil {
   342  // 		return starlark.None, err
   343  // 	}
   344  
   345  // 	if stringer, ok := message.(starlark.GoString)
   346  // 	if r.eventsCh != nil {
   347  // 		r.eventsCh <- event.Event{
   348  // 			Type: event.ETTransformPrint,
   349  // 			Payload: event.TransformMessage{
   350  // 				Msg: message.GoString(),
   351  // 			},
   352  // 		}
   353  // 	}
   354  // 	r.writer.Write([]byte(message.GoString() + "\n"))
   355  // 	return starlark.None, nil
   356  // }
   357  
   358  func (r *StepRunner) onCommit(ds *stards.Dataset) error {
   359  	// Which components were changed
   360  	if r.changeSet != nil {
   361  		changes := ds.Changes()
   362  		for comp := range changes {
   363  			r.changeSet[comp] = changes[comp]
   364  		}
   365  	}
   366  
   367  	ctx := context.TODO()
   368  	if err := ds.AssignComponentsFromDataframe(ctx, r.changeSet, r.fs, r.dsLoader); err != nil {
   369  		return err
   370  	}
   371  
   372  	if r.eventsCh != nil {
   373  		pview, err := preview.Create(context.TODO(), ds.Dataset())
   374  		if err != nil {
   375  			return err
   376  		}
   377  		r.eventsCh <- event.Event{Type: event.ETTransformDatasetPreview, Payload: pview}
   378  	}
   379  	r.commitCalled = true
   380  	return nil
   381  }
   382  
   383  // ModuleLoader is a function that can load starlark modules
   384  type ModuleLoader func(thread *starlark.Thread, module string) (starlark.StringDict, error)
   385  
   386  // DefaultModuleLoader loads starlib modules
   387  var DefaultModuleLoader = func(thread *starlark.Thread, module string) (dict starlark.StringDict, err error) {
   388  	return starlib.Loader(thread, module)
   389  }
   390  
   391  // Error halts program execution with an error
   392  func Error(thread *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
   393  	var msg starlark.Value
   394  	if err := starlark.UnpackPositionalArgs("error", args, kwargs, 1, &msg); err != nil {
   395  		return nil, err
   396  	}
   397  
   398  	return nil, fmt.Errorf("transform error: %s", msg)
   399  }