github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/transform/transform_test.go (about)

     1  package transform
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"io/ioutil"
     7  	"testing"
     8  
     9  	"github.com/google/go-cmp/cmp"
    10  	"github.com/google/go-cmp/cmp/cmpopts"
    11  	"github.com/qri-io/dataset"
    12  	"github.com/qri-io/qfs"
    13  	"github.com/qri-io/qri/dsref"
    14  	"github.com/qri-io/qri/event"
    15  )
    16  
    17  func TestApply(t *testing.T) {
    18  	cases := []struct {
    19  		name   string
    20  		tf     *dataset.Transform
    21  		expect []event.Event
    22  	}{
    23  
    24  		{"three_step_success",
    25  			&dataset.Transform{
    26  				Steps: []*dataset.TransformStep{
    27  					{Syntax: "starlark", Script: `print("oh, hello!")`},
    28  					{Syntax: "starlark", Script: "ds = dataset.latest()"},
    29  					{Syntax: "starlark", Script: "ds.body = [[1,2,3]]\ndataset.commit(ds)"},
    30  				},
    31  			},
    32  			[]event.Event{
    33  				{Type: event.ETTransformStart, Payload: event.TransformLifecycle{RunID: "three_step_success", StepCount: 3, Mode: "apply"}},
    34  				{Type: event.ETTransformStepStart, Payload: event.TransformStepLifecycle{Mode: "apply"}},
    35  				{Type: event.ETTransformPrint, Payload: event.TransformMessage{Msg: "oh, hello!"}},
    36  				{Type: event.ETTransformStepStop, Payload: event.TransformStepLifecycle{Status: StatusSucceeded, Mode: "apply"}},
    37  				{Type: event.ETTransformStepStart, Payload: event.TransformStepLifecycle{Mode: "apply"}},
    38  				{Type: event.ETTransformStepStop, Payload: event.TransformStepLifecycle{Status: StatusSucceeded, Mode: "apply"}},
    39  				{Type: event.ETTransformStepStart, Payload: event.TransformStepLifecycle{Mode: "apply"}},
    40  				{Type: event.ETTransformDatasetPreview, Payload: threeStepDatasetPreview},
    41  				{Type: event.ETTransformStepStop, Payload: event.TransformStepLifecycle{Status: StatusSucceeded, Mode: "apply"}},
    42  				{Type: event.ETTransformStop, Payload: event.TransformLifecycle{RunID: "three_step_success", Status: StatusSucceeded, Mode: "apply"}},
    43  			},
    44  		},
    45  
    46  		{"one_step_error",
    47  			&dataset.Transform{
    48  				Steps: []*dataset.TransformStep{
    49  					{Syntax: "starlark", Script: `error("dang, it broke.")`},
    50  				},
    51  			},
    52  			[]event.Event{
    53  				{Type: event.ETTransformStart, Payload: event.TransformLifecycle{RunID: "one_step_error", StepCount: 1, Mode: "apply"}},
    54  				{Type: event.ETTransformStepStart, Payload: event.TransformStepLifecycle{Mode: "apply"}},
    55  				{Type: event.ETTransformError, Payload: event.TransformMessage{Lvl: event.TransformMsgLvlError, Msg: "Traceback (most recent call last):\n  .star:1:6: in <toplevel>\nError in error: transform error: \"dang, it broke.\"", Mode: "apply"}},
    56  				{Type: event.ETTransformStepStop, Payload: event.TransformStepLifecycle{Status: StatusFailed, Mode: "apply"}},
    57  				{Type: event.ETTransformStop, Payload: event.TransformLifecycle{RunID: "one_step_error", Status: StatusFailed, Mode: "apply"}},
    58  			},
    59  		},
    60  
    61  		{"two_commit_calls_error",
    62  			&dataset.Transform{
    63  				Steps: []*dataset.TransformStep{
    64  					{Syntax: "starlark", Script: "ds = dataset.latest()\ndataset.commit(ds)\ndataset.commit(ds)"},
    65  				},
    66  			},
    67  			[]event.Event{
    68  				{Type: event.ETTransformStart, Payload: event.TransformLifecycle{RunID: "two_commit_calls_error", StepCount: 1, Mode: "apply"}},
    69  				{Type: event.ETTransformStepStart, Payload: event.TransformStepLifecycle{Mode: "apply"}},
    70  				{Type: event.ETTransformDatasetPreview, Payload: &dataset.Dataset{
    71  					Commit: &dataset.Commit{
    72  						Message: "created dataset",
    73  						Title:   "created dataset",
    74  					},
    75  					Transform: &dataset.Transform{
    76  						Steps: []*dataset.TransformStep{
    77  							{Syntax: "starlark", Script: "ds = dataset.latest()\ndataset.commit(ds)\ndataset.commit(ds)"},
    78  						},
    79  					}}},
    80  				{Type: event.ETTransformError, Payload: event.TransformMessage{Lvl: event.TransformMsgLvlError, Msg: "Traceback (most recent call last):\n  .star:3:15: in <toplevel>\nError in commit: commit can only be called once in a transform script", Mode: "apply"}},
    81  				{Type: event.ETTransformStepStop, Payload: event.TransformStepLifecycle{Status: StatusFailed, Mode: "apply"}},
    82  				{Type: event.ETTransformStop, Payload: event.TransformLifecycle{RunID: "two_commit_calls_error", Status: StatusFailed, Mode: "apply"}},
    83  			},
    84  		},
    85  
    86  		{"no_commit_calls_warning",
    87  			&dataset.Transform{
    88  				Steps: []*dataset.TransformStep{
    89  					{Syntax: "starlark", Script: "ds = dataset.latest()"},
    90  				},
    91  			},
    92  			[]event.Event{
    93  				{Type: event.ETTransformStart, Payload: event.TransformLifecycle{RunID: "no_commit_calls_warning", StepCount: 1, Mode: "apply"}},
    94  				{Type: event.ETTransformStepStart, Payload: event.TransformStepLifecycle{Mode: "apply"}},
    95  				{Type: event.ETTransformStepStop, Payload: event.TransformStepLifecycle{Status: StatusSucceeded, Mode: "apply"}},
    96  				{Type: event.ETTransformPrint, Payload: event.TransformMessage{Lvl: "warn", Msg: "this script did not call dataset.commit, no changes will be saved"}},
    97  				{Type: event.ETTransformStop, Payload: event.TransformLifecycle{RunID: "no_commit_calls_warning", Status: StatusSucceeded, Mode: "apply"}},
    98  			},
    99  		},
   100  	}
   101  
   102  	for _, c := range cases {
   103  		t.Run(c.name, func(t *testing.T) {
   104  			log := applyNoHistoryTransform(t, "", c.tf, c.name, "apply")
   105  			compareEventLogs(t, c.expect, log)
   106  		})
   107  	}
   108  }
   109  
   110  func TestCommit(t *testing.T) {
   111  	cases := []struct {
   112  		name   string
   113  		initID string
   114  		runID  string
   115  		tf     *dataset.Transform
   116  		expect []event.Event
   117  	}{
   118  
   119  		{"three_step_success",
   120  			"three_step_success_init_id",
   121  			"three_step_success_run_id",
   122  			&dataset.Transform{
   123  				Steps: []*dataset.TransformStep{
   124  					{Syntax: "starlark", Script: `print("oh, hello!")`},
   125  					{Syntax: "starlark", Script: "ds = dataset.latest()"},
   126  					{Syntax: "starlark", Script: "ds.body = [[1,2,3]]\ndataset.commit(ds)"},
   127  				},
   128  			},
   129  			[]event.Event{
   130  				{Type: event.ETTransformStart, Payload: event.TransformLifecycle{InitID: "three_step_success_init_id", RunID: "three_step_success_run_id", StepCount: 3, Mode: "commit"}},
   131  				{Type: event.ETTransformStepStart, Payload: event.TransformStepLifecycle{Mode: "commit"}},
   132  				{Type: event.ETTransformPrint, Payload: event.TransformMessage{Msg: "oh, hello!"}},
   133  				{Type: event.ETTransformStepStop, Payload: event.TransformStepLifecycle{Status: StatusSucceeded, Mode: "commit"}},
   134  				{Type: event.ETTransformStepStart, Payload: event.TransformStepLifecycle{Mode: "commit"}},
   135  				{Type: event.ETTransformStepStop, Payload: event.TransformStepLifecycle{Status: StatusSucceeded, Mode: "commit"}},
   136  				{Type: event.ETTransformStepStart, Payload: event.TransformStepLifecycle{Mode: "commit"}},
   137  				{Type: event.ETTransformDatasetPreview, Payload: threeStepDatasetPreview},
   138  				{Type: event.ETTransformStepStop, Payload: event.TransformStepLifecycle{Status: StatusSucceeded, Mode: "commit"}},
   139  				{Type: event.ETTransformStop, Payload: event.TransformLifecycle{InitID: "three_step_success_init_id", RunID: "three_step_success_run_id", Status: StatusSucceeded, Mode: "commit"}},
   140  			},
   141  		},
   142  
   143  		{"one_step_error",
   144  			"one_step_error_init_id",
   145  			"one_step_error_run_id",
   146  			&dataset.Transform{
   147  				Steps: []*dataset.TransformStep{
   148  					{Syntax: "starlark", Category: "setup", Script: `error("dang, it broke.")`},
   149  				},
   150  			},
   151  			[]event.Event{
   152  				{Type: event.ETTransformStart, Payload: event.TransformLifecycle{InitID: "one_step_error_init_id", RunID: "one_step_error_run_id", StepCount: 1, Mode: "commit"}},
   153  				{Type: event.ETTransformStepStart, Payload: event.TransformStepLifecycle{Category: "setup", Mode: "commit"}},
   154  				{Type: event.ETTransformError, Payload: event.TransformMessage{Lvl: event.TransformMsgLvlError, Msg: "Traceback (most recent call last):\n  .star:1:6: in <toplevel>\nError in error: transform error: \"dang, it broke.\"", Mode: "commit"}},
   155  				{Type: event.ETTransformStepStop, Payload: event.TransformStepLifecycle{Category: "setup", Status: StatusFailed, Mode: "commit"}},
   156  				{Type: event.ETTransformStop, Payload: event.TransformLifecycle{InitID: "one_step_error_init_id", RunID: "one_step_error_run_id", Status: StatusFailed, Mode: "commit"}},
   157  			},
   158  		},
   159  	}
   160  
   161  	for _, c := range cases {
   162  		t.Run(c.name, func(t *testing.T) {
   163  			log := applyNoHistoryTransform(t, c.initID, c.tf, c.runID, "commit")
   164  			compareEventLogs(t, c.expect, log)
   165  		})
   166  	}
   167  }
   168  
   169  // run a transform script & capture the event log. transform runs against an
   170  // empty dataset history
   171  func applyNoHistoryTransform(t *testing.T, initID string, tf *dataset.Transform, runID, runMode string) []event.Event {
   172  	ctx, cancel := context.WithCancel(context.Background())
   173  	defer cancel()
   174  
   175  	loader := &noHistoryLoader{}
   176  	target := &dataset.Dataset{Transform: tf}
   177  
   178  	bus := event.NewBus(ctx)
   179  	log := []event.Event{}
   180  	doneCh := make(chan struct{})
   181  	bus.SubscribeID(func(ctx context.Context, e event.Event) error {
   182  		log = append(log, e)
   183  		switch e.Type {
   184  		case event.ETTransformStop:
   185  			doneCh <- struct{}{}
   186  		}
   187  		return nil
   188  	}, runID)
   189  
   190  	fs := qfs.NewMemFS()
   191  	transformer := NewTransformer(ctx, fs, loader, bus, SizeInfo{})
   192  	if runMode == "apply" {
   193  		if err := transformer.Apply(ctx, target, runID, false, nil); err != nil {
   194  			t.Fatal(err)
   195  		}
   196  	} else {
   197  		if err := transformer.Commit(ctx, initID, target, runID, false, nil); err != nil {
   198  			t.Fatal(err)
   199  		}
   200  	}
   201  
   202  	<-doneCh
   203  	return log
   204  }
   205  
   206  type noHistoryLoader struct{}
   207  
   208  // LoadDataset fails and returns that the reference has no history
   209  func (l *noHistoryLoader) LoadDataset(ctx context.Context, ref string) (*dataset.Dataset, error) {
   210  	return nil, dsref.ErrNoHistory
   211  }
   212  
   213  // compareEventLogs asserts two event log slices are roughly equal,
   214  // ignoring Timestamp & SessionID fields
   215  func compareEventLogs(t *testing.T, expect, log []event.Event) {
   216  	t.Helper()
   217  	ignorePaths := func(p cmp.Path) bool {
   218  		switch p.Last().String() {
   219  		case ".Timestamp", ".SessionID":
   220  			return true
   221  		default:
   222  			return false
   223  		}
   224  	}
   225  	ignoreUnexported := cmpopts.IgnoreUnexported(
   226  		dataset.Dataset{},
   227  		dataset.Transform{},
   228  	)
   229  	if diff := cmp.Diff(expect, log, cmp.FilterPath(ignorePaths, cmp.Ignore()), ignoreUnexported); diff != "" {
   230  		t.Errorf("result mismatch (-want +got):\n%s", diff)
   231  	}
   232  }
   233  
   234  var threeStepDatasetPreview = &dataset.Dataset{
   235  	Body: json.RawMessage(`[[1,2,3]]`),
   236  	Commit: &dataset.Commit{
   237  		Message: "created dataset",
   238  		Title:   "created dataset",
   239  	},
   240  	Structure: &dataset.Structure{
   241  		Format:       "csv",
   242  		FormatConfig: map[string]interface{}{"lazyQuotes": true},
   243  		Schema: map[string]interface{}{
   244  			"items": map[string]interface{}{
   245  				"items": []interface{}{
   246  					map[string]interface{}{"title": "field_1", "type": "integer"},
   247  					map[string]interface{}{"title": "field_2", "type": "integer"},
   248  					map[string]interface{}{"title": "field_3", "type": "integer"},
   249  				},
   250  				"type": "array",
   251  			},
   252  			"type": "array",
   253  		},
   254  		Length:  6,
   255  		Entries: 1,
   256  	},
   257  	Transform: &dataset.Transform{
   258  		Steps: []*dataset.TransformStep{
   259  			{Syntax: "starlark", Script: `print("oh, hello!")`},
   260  			{Syntax: "starlark", Script: "ds = dataset.latest()"},
   261  			{Syntax: "starlark", Script: "ds.body = [[1,2,3]]\ndataset.commit(ds)"},
   262  		},
   263  	},
   264  }
   265  
   266  func scriptFile(t *testing.T, path string) qfs.File {
   267  	data, err := ioutil.ReadFile(path)
   268  	if err != nil {
   269  		t.Fatal(err)
   270  	}
   271  	return qfs.NewMemfileBytes(path, data)
   272  }
   273  
   274  func TestApplyAssignsColumnsAndBody(t *testing.T) {
   275  	ctx := context.Background()
   276  
   277  	loader := &noHistoryLoader{}
   278  	bus := event.NewBus(ctx)
   279  	fs := qfs.NewMemFS()
   280  	transformer := NewTransformer(ctx, fs, loader, bus, SizeInfo{})
   281  
   282  	ds := &dataset.Dataset{Transform: &dataset.Transform{}}
   283  	ds.Transform.SetScriptFile(scriptFile(t, "startf/testdata/csv_with_header.star"))
   284  	err := transformer.Apply(ctx, ds, "myRunID", true, nil)
   285  	if err != nil {
   286  		t.Fatal(err)
   287  	}
   288  
   289  	// Schema created from the csv header row
   290  	actualBytes, err := json.Marshal(ds.Structure.Schema)
   291  	if err != nil {
   292  		t.Fatal(err)
   293  	}
   294  	actual := string(actualBytes)
   295  	expect := `{"items":{"items":[{"title":"name","type":"string"},{"title":"sound","type":"string"}],"type":"array"},"type":"array"}`
   296  
   297  	if diff := cmp.Diff(expect, actual); diff != "" {
   298  		t.Errorf("result mismatch (-want +got):\n%s", diff)
   299  	}
   300  
   301  	// Body contains just the rows without header
   302  	actualBytes, err = ioutil.ReadAll(ds.BodyFile())
   303  	if err != nil {
   304  		t.Fatal(err)
   305  	}
   306  	actual = string(actualBytes)
   307  	expect = "cat,meow\ndog,bark\n"
   308  
   309  	if diff := cmp.Diff(expect, actual); diff != "" {
   310  		t.Errorf("result mismatch (-want +got):\n%s", diff)
   311  	}
   312  
   313  }