github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/base/dsfs/write_test.go (about)

     1  package dsfs
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/json"
     7  	"fmt"
     8  	"io/ioutil"
     9  	"path/filepath"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/google/go-cmp/cmp"
    14  	"github.com/qri-io/dataset"
    15  	"github.com/qri-io/dataset/dsio"
    16  	"github.com/qri-io/dataset/dstest"
    17  	"github.com/qri-io/dataset/generate"
    18  	"github.com/qri-io/dataset/tabular"
    19  	"github.com/qri-io/dataset/validate"
    20  	"github.com/qri-io/qfs"
    21  	testkeys "github.com/qri-io/qri/auth/key/test"
    22  	"github.com/qri-io/qri/base/toqtype"
    23  	"github.com/qri-io/qri/event"
    24  )
    25  
    26  func TestCreateDataset(t *testing.T) {
    27  	ctx := context.Background()
    28  	fs := qfs.NewMemFS()
    29  	prev := Timestamp
    30  	// shameless call to timestamp to get the coverge points
    31  	Timestamp()
    32  	defer func() { Timestamp = prev }()
    33  	Timestamp = func() time.Time { return time.Date(2001, 01, 01, 01, 01, 01, 01, time.UTC) }
    34  
    35  	// These tests are using hard-coded ids that require this exact peer's private key.
    36  	privKey := testkeys.GetKeyData(10).PrivKey
    37  
    38  	bad := []struct {
    39  		casePath   string
    40  		resultPath string
    41  		prev       *dataset.Dataset
    42  		err        string
    43  	}{
    44  		{"invalid_reference",
    45  			"", nil, "loading dataset commit: loading commit file: path not found"},
    46  		{"invalid",
    47  			"", nil, "commit is required"},
    48  		{"strict_fail",
    49  			"", nil, "processing body data: dataset body did not validate against schema in strict-mode. found at least 16 errors"},
    50  
    51  		// // should error when previous dataset won't dereference.
    52  		// {"craigslist",
    53  		// 	"", &dataset.Dataset{Structure: dataset.NewStructureRef("/bad/path")}, 21, "error loading dataset structure: error loading structure file: cafs: path not found"},
    54  		// // should error when previous dataset isn't valid. Aka, when it isn't empty, but missing
    55  		// // either structure or commit. Commit is checked for first.
    56  		// {"craigslist",
    57  		// 	"", &dataset.Dataset{Meta: &dataset.Meta{Title: "previous"}, Structure: nil}, 21, "commit is required"},
    58  	}
    59  
    60  	for _, c := range bad {
    61  		t.Run(fmt.Sprintf("bad_%s", c.casePath), func(t *testing.T) {
    62  			tc, err := dstest.NewTestCaseFromDir("testdata/" + c.casePath)
    63  			if err != nil {
    64  				t.Fatalf("creating test case: %s", err)
    65  			}
    66  
    67  			_, err = CreateDataset(ctx, fs, fs, event.NilBus, tc.Input, c.prev, privKey, SaveSwitches{ShouldRender: true})
    68  			if err == nil {
    69  				t.Fatalf("CreateDataset expected error. got nil")
    70  			}
    71  			if err.Error() != c.err {
    72  				t.Errorf("error string mismatch.\nwant: %q\ngot:  %q", c.err, err)
    73  			}
    74  		})
    75  	}
    76  
    77  	good := []struct {
    78  		casePath   string
    79  		resultPath string
    80  		prev       *dataset.Dataset
    81  		repoFiles  int // expected total count of files in repo after test execution
    82  	}{
    83  		{"cities",
    84  			"/mem/QmcDaRWnD4e58HsM9rsT3SY5vfhK9hAqmFVppc71JnBEpi", nil, 8},
    85  		{"all_fields",
    86  			"/mem/QmQ2yM2pCQbYcWxdP4R1yeVKBkkMR8ZjKr3x8RzJfrXQmu", nil, 18},
    87  		{"cities_no_commit_title",
    88  			"/mem/QmVFBZpQ9k5w8jF9A1jTRfQ2YW5y4haSNjmqj5H9c23DqW", nil, 21},
    89  		{"craigslist",
    90  			"/mem/QmXhRb415KTb3zxGDwk3iehZ8S8BFzsEM3YiPgkPQr6VKf", nil, 27},
    91  	}
    92  
    93  	for _, c := range good {
    94  		t.Run(fmt.Sprintf("good_%s", c.casePath), func(t *testing.T) {
    95  			tc, err := dstest.NewTestCaseFromDir("testdata/" + c.casePath)
    96  			if err != nil {
    97  				t.Fatalf("creating test case: %s", err)
    98  			}
    99  
   100  			path, err := CreateDataset(ctx, fs, fs, event.NilBus, tc.Input, c.prev, privKey, SaveSwitches{ShouldRender: true})
   101  			if err != nil {
   102  				t.Fatalf("CreateDataset: %s", err)
   103  			}
   104  
   105  			ds, err := LoadDataset(ctx, fs, path)
   106  			if err != nil {
   107  				t.Fatalf("loading dataset: %s", err.Error())
   108  			}
   109  			ds.Path = ""
   110  
   111  			if tc.Expect != nil {
   112  				if diff := dstest.CompareDatasets(tc.Expect, ds); diff != "" {
   113  					t.Errorf("dataset comparison error (-want +got): %s", diff)
   114  					dstest.UpdateGoldenFileIfEnvVarSet(fmt.Sprintf("testdata/%s/expect.dataset.json", c.casePath), ds)
   115  				}
   116  			}
   117  
   118  			if c.resultPath != path {
   119  				t.Errorf("result path mismatch: expected: %q, got: %q", c.resultPath, path)
   120  			}
   121  			if c.repoFiles != len(fs.Files) {
   122  				t.Errorf("invalid number of mapstore entries. want %d, got %d", c.repoFiles, len(fs.Files))
   123  				return
   124  			}
   125  		})
   126  	}
   127  
   128  	t.Run("no_priv_key", func(t *testing.T) {
   129  		_, err := CreateDataset(ctx, fs, fs, event.NilBus, nil, nil, nil, SaveSwitches{ShouldRender: true})
   130  		if err == nil {
   131  			t.Fatal("expected call without prvate key to error")
   132  		}
   133  		pkReqErrMsg := "private key is required to create a dataset"
   134  		if err.Error() != pkReqErrMsg {
   135  			t.Fatalf("error mismatch.\nwant: %q\ngot:  %q", pkReqErrMsg, err.Error())
   136  		}
   137  	})
   138  
   139  	t.Run("no_body", func(t *testing.T) {
   140  		dsData, err := ioutil.ReadFile("testdata/cities/input.dataset.json")
   141  		if err != nil {
   142  			t.Errorf("case nil body and previous body files, error reading dataset file: %s", err.Error())
   143  		}
   144  		ds := &dataset.Dataset{}
   145  		if err := ds.UnmarshalJSON(dsData); err != nil {
   146  			t.Errorf("case nil body and previous body files, error unmarshaling dataset file: %s", err.Error())
   147  		}
   148  
   149  		if err != nil {
   150  			t.Errorf("case nil body and previous body files, error reading data file: %s", err.Error())
   151  		}
   152  		// expectedErr := "bodyfile or previous bodyfile needed"
   153  		// _, err = CreateDataset(ctx, fs, fs, event.NilBus, ds, nil, privKey, SaveSwitches{ShouldRender: true})
   154  		// if err.Error() != expectedErr {
   155  		// 	t.Errorf("case nil body and previous body files, error mismatch: expected '%s', got '%s'", expectedErr, err.Error())
   156  		// }
   157  	})
   158  
   159  	t.Run("no_changes", func(t *testing.T) {
   160  		expectedErr := "saving failed: no changes"
   161  		dsPrev, err := LoadDataset(ctx, fs, good[2].resultPath)
   162  		if err != nil {
   163  			t.Fatal(err)
   164  		}
   165  
   166  		ds := &dataset.Dataset{
   167  			Name:      "cities",
   168  			Commit:    &dataset.Commit{},
   169  			Structure: dsPrev.Structure,
   170  			Meta:      dsPrev.Meta,
   171  		}
   172  		ds.PreviousPath = good[2].resultPath
   173  		if err != nil {
   174  			t.Fatalf("loading previous dataset file: %s", err.Error())
   175  		}
   176  
   177  		bodyBytes, err := ioutil.ReadFile("testdata/cities/body.csv")
   178  		if err != nil {
   179  			t.Fatalf("reading body file: %s", err.Error())
   180  		}
   181  		ds.SetBodyFile(qfs.NewMemfileBytes("body.csv", bodyBytes))
   182  
   183  		path, err := CreateDataset(ctx, fs, fs, event.NilBus, ds, dsPrev, privKey, SaveSwitches{ShouldRender: true})
   184  		if err != nil && err.Error() != expectedErr {
   185  			t.Fatalf("mismatch: expected %q, got %q", expectedErr, err.Error())
   186  		} else if err == nil {
   187  			ds, err := LoadDataset(ctx, fs, path)
   188  			if err != nil {
   189  				t.Fatalf("loading dataset: %s", err.Error())
   190  			}
   191  
   192  			t.Fatalf("CreateDataset expected error got 'nil'. commit: %v", ds.Commit)
   193  		}
   194  
   195  		if len(fs.Files) != 27 {
   196  			t.Errorf("invalid number of entries. want %d got %d", 27, len(fs.Files))
   197  			_, err := fs.Print()
   198  			if err != nil {
   199  				panic(err)
   200  			}
   201  		}
   202  	})
   203  
   204  	// case: previous dataset isn't valid
   205  }
   206  
   207  func TestDatasetSaveCustomTimestamp(t *testing.T) {
   208  	ctx := context.Background()
   209  	fs := qfs.NewMemFS()
   210  	privKey := testkeys.GetKeyData(10).PrivKey
   211  
   212  	// use a custom timestamp in local zone. should be converted to UTC for saving
   213  	ts := time.Date(2100, 1, 2, 3, 4, 5, 6, time.Local)
   214  
   215  	ds := &dataset.Dataset{
   216  		Commit: &dataset.Commit{
   217  			Timestamp: ts,
   218  		},
   219  		Structure: &dataset.Structure{Format: "json", Schema: dataset.BaseSchemaArray},
   220  	}
   221  	ds.SetBodyFile(qfs.NewMemfileBytes("/body.json", []byte(`[]`)))
   222  
   223  	path, err := CreateDataset(ctx, fs, fs, event.NilBus, ds, nil, privKey, SaveSwitches{})
   224  	if err != nil {
   225  		t.Fatal(err)
   226  	}
   227  
   228  	got, err := LoadDataset(ctx, fs, path)
   229  	if err != nil {
   230  		t.Fatal(err)
   231  	}
   232  
   233  	if !ts.In(time.UTC).Equal(got.Commit.Timestamp) {
   234  		t.Errorf("result timestamp mismatch.\nwant: %q\ngot:  %q", ts.In(time.UTC), got.Commit.Timestamp)
   235  	}
   236  }
   237  
   238  func TestDatasetSaveEvents(t *testing.T) {
   239  	ctx, cancel := context.WithCancel(context.Background())
   240  	defer cancel()
   241  
   242  	fs := qfs.NewMemFS()
   243  	privKey := testkeys.GetKeyData(10).PrivKey
   244  	bus := event.NewBus(ctx)
   245  
   246  	fired := map[event.Type]int{}
   247  	bus.SubscribeTypes(func(ctx context.Context, e event.Event) error {
   248  		fired[e.Type]++
   249  		return nil
   250  	},
   251  		event.ETDatasetSaveStarted,
   252  		event.ETDatasetSaveProgress,
   253  		event.ETDatasetSaveCompleted,
   254  	)
   255  
   256  	ds := &dataset.Dataset{
   257  		Commit: &dataset.Commit{
   258  			Timestamp: time.Date(2100, 1, 2, 3, 4, 5, 6, time.Local),
   259  		},
   260  		Structure: &dataset.Structure{Format: "json", Schema: dataset.BaseSchemaArray},
   261  	}
   262  	ds.SetBodyFile(qfs.NewMemfileBytes("/body.json", []byte(`[]`)))
   263  
   264  	if _, err := CreateDataset(ctx, fs, fs, bus, ds, nil, privKey, SaveSwitches{}); err != nil {
   265  		t.Fatal(err)
   266  	}
   267  
   268  	expect := map[event.Type]int{
   269  		event.ETDatasetSaveStarted:   1,
   270  		event.ETDatasetSaveProgress:  3,
   271  		event.ETDatasetSaveCompleted: 1,
   272  	}
   273  
   274  	if diff := cmp.Diff(expect, fired); diff != "" {
   275  		t.Errorf("fired event count mismatch. (-want +got):%s\n", diff)
   276  	}
   277  }
   278  
   279  // Test that if the body is too large, the commit message just assumes the body changed
   280  func TestCreateDatasetBodyTooLarge(t *testing.T) {
   281  	ctx := context.Background()
   282  	fs := qfs.NewMemFS()
   283  
   284  	prevTs := Timestamp
   285  	defer func() { Timestamp = prevTs }()
   286  	Timestamp = func() time.Time { return time.Date(2001, 01, 01, 01, 01, 01, 01, time.UTC) }
   287  
   288  	// Set the limit for the body to be 100 bytes
   289  	prevBodySizeLimit := BodySizeSmallEnoughToDiff
   290  	defer func() { BodySizeSmallEnoughToDiff = prevBodySizeLimit }()
   291  	BodySizeSmallEnoughToDiff = 100
   292  
   293  	privKey := testkeys.GetKeyData(10).PrivKey
   294  
   295  	// Need a previous commit, otherwise we just get the "created dataset" message
   296  	prevDs := dataset.Dataset{
   297  		Commit: &dataset.Commit{},
   298  		Structure: &dataset.Structure{
   299  			Format: "csv",
   300  			Schema: tabular.BaseTabularSchema,
   301  		},
   302  	}
   303  
   304  	testBodyPath, _ := filepath.Abs("testdata/movies/body.csv")
   305  	testBodyBytes, _ := ioutil.ReadFile(testBodyPath)
   306  
   307  	// Create a new version and add the body
   308  	nextDs := dataset.Dataset{
   309  		Commit: &dataset.Commit{},
   310  		Structure: &dataset.Structure{
   311  			Format: "csv",
   312  			Schema: tabular.BaseTabularSchema,
   313  		},
   314  	}
   315  	nextDs.SetBodyFile(qfs.NewMemfileBytes(testBodyPath, testBodyBytes))
   316  
   317  	path, err := CreateDataset(ctx, fs, fs, event.NilBus, &nextDs, &prevDs, privKey, SaveSwitches{ShouldRender: true})
   318  	if err != nil {
   319  		t.Fatalf("CreateDataset: %s", err)
   320  	}
   321  
   322  	// Load the created dataset to inspect the commit message
   323  	got, err := LoadDataset(ctx, fs, path)
   324  	if err != nil {
   325  		t.Fatalf("LoadDataset: %s", err)
   326  	}
   327  
   328  	expect := dstest.LoadGoldenFile(t, "testdata/movies/expect.dataset.json")
   329  	if diff := dstest.CompareDatasets(expect, got); diff != "" {
   330  		t.Errorf("result mismatch (-want +got):%s\n", diff)
   331  		dstest.UpdateGoldenFileIfEnvVarSet("testdata/movies/expect.dataset.json", got)
   332  	}
   333  }
   334  
   335  func TestWriteDataset(t *testing.T) {
   336  	ctx := context.Background()
   337  	fs := qfs.NewMemFS()
   338  	prev := Timestamp
   339  	defer func() { Timestamp = prev }()
   340  	Timestamp = func() time.Time { return time.Date(2001, 01, 01, 01, 01, 01, 01, time.UTC) }
   341  
   342  	// These tests are using hard-coded ids that require this exact peer's private key.
   343  	pk := testkeys.GetKeyData(10).PrivKey
   344  
   345  	if _, err := WriteDataset(ctx, fs, fs, nil, &dataset.Dataset{}, event.NilBus, pk, SaveSwitches{Pin: true}); err == nil || err.Error() != "cannot save empty dataset" {
   346  		t.Errorf("didn't reject empty dataset: %s", err)
   347  	}
   348  
   349  	cases := []struct {
   350  		casePath  string
   351  		repoFiles int // expected total count of files in repo after test execution
   352  		err       string
   353  	}{
   354  		// TODO (b5) - these are *very* close, need to be fixed
   355  		// {"cities", 6, ""},      // dataset, commit, structure, meta, viz, body
   356  		// {"all_fields", 14, ""}, // dataset, commit, structure, meta, viz, viz_script, transform, transform_script, SAME BODY as cities -> gets de-duped
   357  	}
   358  
   359  	for i, c := range cases {
   360  		tc, err := dstest.NewTestCaseFromDir("testdata/" + c.casePath)
   361  		if err != nil {
   362  			t.Errorf("%s: error creating test case: %s", c.casePath, err)
   363  			continue
   364  		}
   365  
   366  		ds := tc.Input
   367  
   368  		got, err := WriteDataset(ctx, fs, fs, nil, ds, event.NilBus, pk, SaveSwitches{Pin: true})
   369  		if !(err == nil && c.err == "" || err != nil && err.Error() == c.err) {
   370  			t.Errorf("case %d error mismatch. expected: '%s', got: '%s'", i, c.err, err)
   371  			continue
   372  		}
   373  
   374  		// total count expected of files in repo after test execution
   375  		if len(fs.Files) != c.repoFiles {
   376  			t.Errorf("case expected %d invalid number of entries: %d != %d", i, c.repoFiles, len(fs.Files))
   377  			str, err := fs.Print()
   378  			if err != nil {
   379  				panic(err)
   380  			}
   381  			t.Log(str)
   382  			continue
   383  		}
   384  
   385  		got = PackageFilepath(fs, got, PackageFileDataset)
   386  
   387  		f, err := fs.Get(ctx, got)
   388  		if err != nil {
   389  			t.Errorf("error getting dataset file: %s", err.Error())
   390  			continue
   391  		}
   392  
   393  		ref := &dataset.Dataset{}
   394  		if err := json.NewDecoder(f).Decode(ref); err != nil {
   395  			t.Errorf("error decoding dataset json: %s", err.Error())
   396  			continue
   397  		}
   398  
   399  		if ref.Transform != nil {
   400  			if ref.Transform.IsEmpty() {
   401  				t.Errorf("expected stored dataset.Transform to be populated")
   402  			}
   403  			ds.Transform.Assign(dataset.NewTransformRef(ref.Transform.Path))
   404  		}
   405  		if ref.Meta != nil {
   406  			if !ref.Meta.IsEmpty() {
   407  				t.Errorf("expected stored dataset.Meta to be a reference")
   408  			}
   409  			// Abstract transforms aren't loaded
   410  			ds.Meta.Assign(dataset.NewMetaRef(ref.Meta.Path))
   411  		}
   412  		if ref.Structure != nil {
   413  			if !ref.Structure.IsEmpty() {
   414  				t.Errorf("expected stored dataset.Structure to be a reference")
   415  			}
   416  			ds.Structure.Assign(dataset.NewStructureRef(ref.Structure.Path))
   417  		}
   418  		if ref.Viz != nil {
   419  			if ref.Viz.IsEmpty() {
   420  				t.Errorf("expected stored dataset.Viz to be populated")
   421  			}
   422  			ds.Viz.Assign(dataset.NewVizRef(ref.Viz.Path))
   423  		}
   424  		ds.BodyPath = ref.BodyPath
   425  
   426  		ds.Assign(dataset.NewDatasetRef(got))
   427  		result, err := LoadDataset(ctx, fs, got)
   428  		if err != nil {
   429  			t.Errorf("case %d unexpected error loading dataset: %s", i, err)
   430  			continue
   431  		}
   432  
   433  		if diff := dstest.CompareDatasets(ds, result); diff != "" {
   434  			t.Errorf("case %d comparison mismatch: (-want +got):\n%s", i, diff)
   435  
   436  			d1, _ := ds.MarshalJSON()
   437  			t.Log(string(d1))
   438  
   439  			d, _ := result.MarshalJSON()
   440  			t.Log(string(d))
   441  			continue
   442  		}
   443  	}
   444  }
   445  
   446  func TestGenerateCommitMessage(t *testing.T) {
   447  	badCases := []struct {
   448  		description string
   449  		prev, ds    *dataset.Dataset
   450  		force       bool
   451  		errMsg      string
   452  	}{
   453  		{
   454  			"no changes from one dataset version to next",
   455  			&dataset.Dataset{Meta: &dataset.Meta{Title: "same dataset"}},
   456  			&dataset.Dataset{Meta: &dataset.Meta{Title: "same dataset"}},
   457  			false,
   458  			"no changes",
   459  		},
   460  	}
   461  
   462  	ctx := context.Background()
   463  	fs := qfs.NewMemFS()
   464  
   465  	for _, c := range badCases {
   466  		t.Run(fmt.Sprintf("%s", c.description), func(t *testing.T) {
   467  			_, _, err := generateCommitDescriptions(ctx, fs, c.ds, c.prev, BodySame, c.force)
   468  			if err == nil {
   469  				t.Errorf("error expected, did not get one")
   470  			} else if c.errMsg != err.Error() {
   471  				t.Errorf("error mismatch\nexpect: %s\ngot: %s", c.errMsg, err.Error())
   472  			}
   473  		})
   474  	}
   475  
   476  	goodCases := []struct {
   477  		description string
   478  		prev, ds    *dataset.Dataset
   479  		force       bool
   480  		expectShort string
   481  		expectLong  string
   482  	}{
   483  		{
   484  			"empty previous and non-empty dataset",
   485  			&dataset.Dataset{},
   486  			&dataset.Dataset{Meta: &dataset.Meta{Title: "new dataset"}},
   487  			false,
   488  			"created dataset",
   489  			"created dataset",
   490  		},
   491  		{
   492  			"title changes from previous",
   493  			&dataset.Dataset{Meta: &dataset.Meta{Title: "new dataset"}},
   494  			&dataset.Dataset{Meta: &dataset.Meta{Title: "changes to dataset"}},
   495  			false,
   496  			"meta updated title",
   497  			"meta:\n\tupdated title",
   498  		},
   499  		{
   500  			"same dataset but force is true",
   501  			&dataset.Dataset{Meta: &dataset.Meta{Title: "same dataset"}},
   502  			&dataset.Dataset{Meta: &dataset.Meta{Title: "same dataset"}},
   503  			true,
   504  			"forced update",
   505  			"forced update",
   506  		},
   507  		{
   508  			"structure sets the headerRow config option",
   509  			&dataset.Dataset{Structure: &dataset.Structure{
   510  				FormatConfig: map[string]interface{}{
   511  					"headerRow": false,
   512  				},
   513  			}},
   514  			&dataset.Dataset{Structure: &dataset.Structure{
   515  				FormatConfig: map[string]interface{}{
   516  					"headerRow": true,
   517  				},
   518  			}},
   519  			false,
   520  			"structure updated formatConfig.headerRow",
   521  			"structure:\n\tupdated formatConfig.headerRow",
   522  		},
   523  		{
   524  			"readme modified",
   525  			&dataset.Dataset{Readme: &dataset.Readme{
   526  				Format: "md",
   527  				Text:   "# hello\n\ncontent\n\n",
   528  			}},
   529  			&dataset.Dataset{Readme: &dataset.Readme{
   530  				Format: "md",
   531  				Text:   "# hello\n\ncontent\n\nanother line\n\n",
   532  			}},
   533  			false,
   534  			// TODO(dustmop): Should mention the line added.
   535  			"readme updated text",
   536  			"readme:\n\tupdated text",
   537  		},
   538  		{
   539  			"body with a small number of changes",
   540  			&dataset.Dataset{
   541  				Structure: &dataset.Structure{Format: "json"},
   542  				Body: toqtype.MustParseJSONAsArray(`[
   543    { "fruit": "apple", "color": "red" },
   544    { "fruit": "banana", "color": "yellow" },
   545    { "fruit": "cherry", "color": "red" }
   546  ]`),
   547  			},
   548  			&dataset.Dataset{
   549  				Structure: &dataset.Structure{Format: "json"},
   550  				Body: toqtype.MustParseJSONAsArray(`[
   551    { "fruit": "apple", "color": "red" },
   552    { "fruit": "blueberry", "color": "blue" },
   553    { "fruit": "cherry", "color": "red" },
   554    { "fruit": "durian", "color": "green" }
   555  ]`),
   556  			},
   557  			false,
   558  			"body updated row 1 and added row 3",
   559  			"body:\n\tupdated row 1\n\tadded row 3",
   560  		},
   561  		{
   562  			"body with lots of changes",
   563  			&dataset.Dataset{
   564  				Structure: &dataset.Structure{Format: "csv"},
   565  				Body: toqtype.MustParseCsvAsArray(`one,two,3
   566  four,five,6
   567  seven,eight,9
   568  ten,eleven,12
   569  thirteen,fourteen,15
   570  sixteen,seventeen,18
   571  nineteen,twenty,21
   572  twenty-two,twenty-three,24
   573  twenty-five,twenty-six,27
   574  twenty-eight,twenty-nine,30`),
   575  			},
   576  			&dataset.Dataset{
   577  				Structure: &dataset.Structure{Format: "csv"},
   578  				Body: toqtype.MustParseCsvAsArray(`one,two,3
   579  four,five,6
   580  seven,eight,cat
   581  dog,eleven,12
   582  thirteen,eel,15
   583  sixteen,seventeen,100
   584  frog,twenty,21
   585  twenty-two,twenty-three,24
   586  twenty-five,giraffe,200
   587  hen,twenty-nine,30`),
   588  			},
   589  			false,
   590  			"body changed by 19%",
   591  			"body:\n\tchanged by 19%",
   592  		},
   593  		{
   594  			"meta and structure and readme changes",
   595  			&dataset.Dataset{
   596  				Meta: &dataset.Meta{Title: "new dataset"},
   597  				Structure: &dataset.Structure{
   598  					FormatConfig: map[string]interface{}{
   599  						"headerRow": false,
   600  					},
   601  				},
   602  				Readme: &dataset.Readme{
   603  					Format: "md",
   604  					Text:   "# hello\n\ncontent\n\n",
   605  				},
   606  			},
   607  			&dataset.Dataset{
   608  				Meta: &dataset.Meta{Title: "changes to dataset"},
   609  				Structure: &dataset.Structure{
   610  					FormatConfig: map[string]interface{}{
   611  						"headerRow": true,
   612  					},
   613  				},
   614  				Readme: &dataset.Readme{
   615  					Format: "md",
   616  					Text:   "# hello\n\ncontent\n\nanother line\n\n",
   617  				},
   618  			},
   619  			false,
   620  			"updated meta, structure, and readme",
   621  			"meta:\n\tupdated title\nstructure:\n\tupdated formatConfig.headerRow\nreadme:\n\tupdated text",
   622  		},
   623  		{
   624  			"meta removed but everything else is the same",
   625  			&dataset.Dataset{
   626  				Meta: &dataset.Meta{Title: "new dataset"},
   627  				Structure: &dataset.Structure{
   628  					FormatConfig: map[string]interface{}{
   629  						"headerRow": false,
   630  					},
   631  				},
   632  				Readme: &dataset.Readme{
   633  					Format: "md",
   634  					Text:   "# hello\n\ncontent\n\n",
   635  				},
   636  			},
   637  			&dataset.Dataset{
   638  				Structure: &dataset.Structure{
   639  					FormatConfig: map[string]interface{}{
   640  						"headerRow": false,
   641  					},
   642  				},
   643  				Readme: &dataset.Readme{
   644  					Format: "md",
   645  					Text:   "# hello\n\ncontent\n\n",
   646  				},
   647  			},
   648  			false,
   649  			"meta removed",
   650  			"meta removed",
   651  		},
   652  		{
   653  			"meta has multiple parts changed",
   654  			&dataset.Dataset{
   655  				Meta: &dataset.Meta{
   656  					Title:       "new dataset",
   657  					Description: "TODO: Add description",
   658  				},
   659  			},
   660  			&dataset.Dataset{
   661  				Meta: &dataset.Meta{
   662  					Title:       "changes to dataset",
   663  					HomeURL:     "http://example.com",
   664  					Description: "this is a great description",
   665  				},
   666  			},
   667  			false,
   668  			"meta updated 3 fields",
   669  			"meta:\n\tupdated description\n\tadded homeURL\n\tupdated title",
   670  		},
   671  		{
   672  			"meta and body changed",
   673  			&dataset.Dataset{
   674  				Meta: &dataset.Meta{
   675  					Title:       "new dataset",
   676  					Description: "TODO: Add description",
   677  				},
   678  				Structure: &dataset.Structure{Format: "csv"},
   679  				Body: toqtype.MustParseCsvAsArray(`one,two,3
   680  four,five,6
   681  seven,eight,9
   682  ten,eleven,12
   683  thirteen,fourteen,15
   684  sixteen,seventeen,18
   685  nineteen,twenty,21
   686  twenty-two,twenty-three,24
   687  twenty-five,twenty-six,27
   688  twenty-eight,twenty-nine,30`),
   689  			},
   690  			&dataset.Dataset{
   691  				Meta: &dataset.Meta{
   692  					Title:       "changes to dataset",
   693  					HomeURL:     "http://example.com",
   694  					Description: "this is a great description",
   695  				},
   696  				Structure: &dataset.Structure{Format: "csv"},
   697  				Body: toqtype.MustParseCsvAsArray(`one,two,3
   698  four,five,6
   699  something,eight,cat
   700  dog,eleven,12
   701  thirteen,eel,15
   702  sixteen,60,100
   703  frog,twenty,21
   704  twenty-two,twenty-three,24
   705  twenty-five,giraffe,200
   706  hen,twenty-nine,30`),
   707  			},
   708  			false,
   709  			"updated meta and body",
   710  			"meta:\n\tupdated description\n\tadded homeURL\n\tupdated title\nbody:\n\tchanged by 24%",
   711  		},
   712  		{
   713  			"meta changed but body stays the same",
   714  			&dataset.Dataset{
   715  				Meta: &dataset.Meta{
   716  					Title: "new dataset",
   717  				},
   718  				Structure: &dataset.Structure{Format: "csv"},
   719  				Body: toqtype.MustParseCsvAsArray(`one,two,3
   720  four,five,6
   721  seven,eight,9
   722  ten,eleven,12
   723  thirteen,fourteen,15
   724  sixteen,seventeen,18`),
   725  			},
   726  			&dataset.Dataset{
   727  				Meta: &dataset.Meta{
   728  					Title: "dataset of a bunch of numbers",
   729  				},
   730  				Structure: &dataset.Structure{Format: "csv"},
   731  				Body: toqtype.MustParseCsvAsArray(`one,two,3
   732  four,five,6
   733  seven,eight,9
   734  ten,eleven,12
   735  thirteen,fourteen,15
   736  sixteen,seventeen,18`),
   737  			},
   738  			false,
   739  			"meta updated title",
   740  			"meta:\n\tupdated title",
   741  		},
   742  	}
   743  
   744  	for _, c := range goodCases {
   745  		t.Run(c.description, func(t *testing.T) {
   746  			bodyAct := BodyDefault
   747  			if compareBody(c.prev.Body, c.ds.Body) {
   748  				bodyAct = BodySame
   749  			}
   750  			shortTitle, longMessage, err := generateCommitDescriptions(ctx, fs, c.ds, c.prev, bodyAct, c.force)
   751  			if err != nil {
   752  				t.Errorf("error: %s", err.Error())
   753  				return
   754  			}
   755  			if c.expectShort != shortTitle {
   756  				t.Errorf("short message mismatch\nexpect: %s\ngot: %s", c.expectShort, shortTitle)
   757  			}
   758  			if c.expectLong != longMessage {
   759  				t.Errorf("long message mismatch\nexpect: %s\ngot: %s", c.expectLong, longMessage)
   760  			}
   761  		})
   762  	}
   763  }
   764  
   765  func compareBody(left, right interface{}) bool {
   766  	leftData, err := json.Marshal(left)
   767  	if err != nil {
   768  		panic(err)
   769  	}
   770  	rightData, err := json.Marshal(right)
   771  	if err != nil {
   772  		panic(err)
   773  	}
   774  	return string(leftData) == string(rightData)
   775  }
   776  
   777  func TestGetDepth(t *testing.T) {
   778  	good := []struct {
   779  		val      string
   780  		expected int
   781  	}{
   782  		{`"foo"`, 0},
   783  		{`1000`, 0},
   784  		{`true`, 0},
   785  		{`{"foo": "bar"}`, 1},
   786  		{`{"foo": "bar","bar": "baz"}`, 1},
   787  		{`{
   788  			"foo":"bar",
   789  			"bar": "baz",
   790  			"baz": {
   791  				"foo": "bar",
   792  				"bar": "baz"
   793  			}
   794  		}`, 2},
   795  		{`{
   796  			"foo": "bar",
   797  			"bar": "baz",
   798  			"baz": {
   799  				"foo": "bar",
   800  				"bar": [
   801  					"foo",
   802  					"bar",
   803  					"baz"
   804  				]
   805  			}
   806  		}`, 3},
   807  		{`{
   808  			"foo": "bar",
   809  			"bar": "baz",
   810  			"baz": [
   811  				"foo",
   812  				"bar",
   813  				"baz"
   814  			]
   815  		}`, 2},
   816  		{`["foo","bar","baz"]`, 1},
   817  		{`["a","b",[1, 2, 3]]`, 2},
   818  		{`[
   819  			"foo",
   820  			"bar",
   821  			{"baz": {
   822  				"foo": "bar",
   823  				"bar": "baz",
   824  				"baz": "foo"
   825  				}
   826  			}
   827  		]`, 3},
   828  		{`{
   829  			"foo": "bar",
   830  			"foo1": {
   831  				"foo2": 2,
   832  				"foo3": false
   833  			},
   834  			"foo4": "bar",
   835  			"foo5": {
   836  				"foo6": 100
   837  			}
   838  		}`, 2},
   839  		{`{
   840  			"foo":  "bar",
   841  			"foo1": "bar",
   842  			"foo2": {
   843  				"foo3": 100,
   844  				"foo4": 100
   845  			},
   846  			"foo5": {
   847  				"foo6": 100,
   848  				"foo7": 100,
   849  				"foo8": 100,
   850  				"foo9": 100
   851  			},
   852  			"foo10": {
   853  				"foo11": 100,
   854  				"foo12": 100
   855  			}
   856  		}`, 2},
   857  	}
   858  
   859  	var val interface{}
   860  
   861  	for i, c := range good {
   862  		if err := json.Unmarshal([]byte(c.val), &val); err != nil {
   863  			t.Fatal(err)
   864  		}
   865  		depth := getDepth(val)
   866  		if c.expected != depth {
   867  			t.Errorf("case %d, depth mismatch, expected %d, got %d", i, c.expected, depth)
   868  		}
   869  	}
   870  }
   871  
   872  func GenerateDataset(b *testing.B, sampleSize int, format string) (int, *dataset.Dataset) {
   873  	ds := &dataset.Dataset{
   874  		Commit: &dataset.Commit{
   875  			Timestamp: time.Date(2017, 1, 1, 1, 0, 0, 0, time.UTC),
   876  			Title:     "initial commit",
   877  		},
   878  		Meta: &dataset.Meta{
   879  			Title: "performance benchmark data",
   880  		},
   881  		Structure: &dataset.Structure{
   882  			Format: format,
   883  			FormatConfig: map[string]interface{}{
   884  				"headerRow":  true,
   885  				"lazyQuotes": true,
   886  			},
   887  			Schema: map[string]interface{}{
   888  				"type": "array",
   889  				"items": map[string]interface{}{
   890  					"type": "array",
   891  					"items": []interface{}{
   892  						map[string]interface{}{"title": "uuid", "type": "string"},
   893  						map[string]interface{}{"title": "ingest", "type": "string"},
   894  						map[string]interface{}{"title": "occurred", "type": "string"},
   895  						map[string]interface{}{"title": "raw_data", "type": "string"},
   896  					},
   897  				},
   898  			},
   899  		},
   900  	}
   901  
   902  	gen, err := generate.NewTabularGenerator(ds.Structure)
   903  	if err != nil {
   904  		b.Errorf("error creating generator: %s", err.Error())
   905  	}
   906  	defer gen.Close()
   907  
   908  	bodyBuffer := &bytes.Buffer{}
   909  	w, err := dsio.NewEntryWriter(ds.Structure, bodyBuffer)
   910  	if err != nil {
   911  		b.Fatalf("creating entry writer: %s", err.Error())
   912  	}
   913  
   914  	for i := 0; i < sampleSize; i++ {
   915  		ent, err := gen.ReadEntry()
   916  		if err != nil {
   917  			b.Fatalf("reading generator entry: %s", err.Error())
   918  		}
   919  		w.WriteEntry(ent)
   920  	}
   921  	if err := w.Close(); err != nil {
   922  		b.Fatalf("closing writer: %s", err)
   923  	}
   924  
   925  	fileName := fmt.Sprintf("body.%s", ds.Structure.Format)
   926  	ds.SetBodyFile(qfs.NewMemfileReader(fileName, bodyBuffer))
   927  
   928  	return bodyBuffer.Len(), ds
   929  }
   930  
   931  func BenchmarkCreateDatasetCSV(b *testing.B) {
   932  	// ~1 MB, ~12 MB, ~25 MB, ~50 MB, ~500 MB, ~1GB
   933  	for _, sampleSize := range []int{10000, 100000, 250000, 500000, 1000000} {
   934  		ctx := context.Background()
   935  		fs := qfs.NewMemFS()
   936  		prev := Timestamp
   937  
   938  		defer func() { Timestamp = prev }()
   939  		Timestamp = func() time.Time { return time.Date(2001, 01, 01, 01, 01, 01, 01, time.UTC) }
   940  
   941  		// These tests are using hard-coded ids that require this exact peer's private key.
   942  		privKey := testkeys.GetKeyData(10).PrivKey
   943  
   944  		b.Run(fmt.Sprintf("sample size %v", sampleSize), func(b *testing.B) {
   945  			b.ResetTimer()
   946  			for i := 0; i < b.N; i++ {
   947  				b.StopTimer()
   948  
   949  				_, dataset := GenerateDataset(b, sampleSize, "csv")
   950  
   951  				b.StartTimer()
   952  				_, err := CreateDataset(ctx, fs, fs, event.NilBus, dataset, nil, privKey, SaveSwitches{ShouldRender: true})
   953  				if err != nil {
   954  					b.Errorf("error creating dataset: %s", err.Error())
   955  				}
   956  			}
   957  			b.StopTimer()
   958  		})
   959  	}
   960  }
   961  
   962  // validateDataset is a stripped copy of base/dsfs/setErrCount
   963  func validateDataset(ds *dataset.Dataset, data qfs.File) error {
   964  	defer data.Close()
   965  
   966  	er, err := dsio.NewEntryReader(ds.Structure, data)
   967  	if err != nil {
   968  		return err
   969  	}
   970  
   971  	_, err = validate.EntryReader(er)
   972  
   973  	return err
   974  }
   975  
   976  func BenchmarkValidateCSV(b *testing.B) {
   977  	// ~1 MB, ~12 MB, ~25 MB, ~50 MB, ~500 MB, ~1GB
   978  	for _, sampleSize := range []int{10000, 100000, 250000, 500000, 1000000, 10000000} {
   979  		b.Run(fmt.Sprintf("sample size %v", sampleSize), func(b *testing.B) {
   980  			b.ResetTimer()
   981  			for i := 0; i < b.N; i++ {
   982  				b.StopTimer()
   983  				_, dataset := GenerateDataset(b, sampleSize, "csv")
   984  
   985  				b.StartTimer()
   986  				err := validateDataset(dataset, dataset.BodyFile())
   987  				if err != nil {
   988  					b.Errorf("error creating dataset: %s", err.Error())
   989  				}
   990  			}
   991  			b.StopTimer()
   992  		})
   993  	}
   994  }
   995  
   996  func BenchmarkValidateJSON(b *testing.B) {
   997  	// ~1 MB, ~12 MB, ~25 MB, ~50 MB, ~500 MB, ~1GB
   998  	for _, sampleSize := range []int{10000, 100000, 250000, 500000, 1000000, 10000000} {
   999  		b.Run(fmt.Sprintf("sample size %v", sampleSize), func(b *testing.B) {
  1000  			b.ResetTimer()
  1001  			for i := 0; i < b.N; i++ {
  1002  				b.StopTimer()
  1003  				_, dataset := GenerateDataset(b, sampleSize, "json")
  1004  
  1005  				b.StartTimer()
  1006  				err := validateDataset(dataset, dataset.BodyFile())
  1007  				if err != nil {
  1008  					b.Errorf("error creating dataset: %s", err.Error())
  1009  				}
  1010  			}
  1011  			b.StopTimer()
  1012  		})
  1013  	}
  1014  }