github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/base/dsfs/write_test.go (about) 1 package dsfs 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/json" 7 "fmt" 8 "io/ioutil" 9 "path/filepath" 10 "testing" 11 "time" 12 13 "github.com/google/go-cmp/cmp" 14 "github.com/qri-io/dataset" 15 "github.com/qri-io/dataset/dsio" 16 "github.com/qri-io/dataset/dstest" 17 "github.com/qri-io/dataset/generate" 18 "github.com/qri-io/dataset/tabular" 19 "github.com/qri-io/dataset/validate" 20 "github.com/qri-io/qfs" 21 testkeys "github.com/qri-io/qri/auth/key/test" 22 "github.com/qri-io/qri/base/toqtype" 23 "github.com/qri-io/qri/event" 24 ) 25 26 func TestCreateDataset(t *testing.T) { 27 ctx := context.Background() 28 fs := qfs.NewMemFS() 29 prev := Timestamp 30 // shameless call to timestamp to get the coverge points 31 Timestamp() 32 defer func() { Timestamp = prev }() 33 Timestamp = func() time.Time { return time.Date(2001, 01, 01, 01, 01, 01, 01, time.UTC) } 34 35 // These tests are using hard-coded ids that require this exact peer's private key. 36 privKey := testkeys.GetKeyData(10).PrivKey 37 38 bad := []struct { 39 casePath string 40 resultPath string 41 prev *dataset.Dataset 42 err string 43 }{ 44 {"invalid_reference", 45 "", nil, "loading dataset commit: loading commit file: path not found"}, 46 {"invalid", 47 "", nil, "commit is required"}, 48 {"strict_fail", 49 "", nil, "processing body data: dataset body did not validate against schema in strict-mode. found at least 16 errors"}, 50 51 // // should error when previous dataset won't dereference. 52 // {"craigslist", 53 // "", &dataset.Dataset{Structure: dataset.NewStructureRef("/bad/path")}, 21, "error loading dataset structure: error loading structure file: cafs: path not found"}, 54 // // should error when previous dataset isn't valid. Aka, when it isn't empty, but missing 55 // // either structure or commit. Commit is checked for first. 56 // {"craigslist", 57 // "", &dataset.Dataset{Meta: &dataset.Meta{Title: "previous"}, Structure: nil}, 21, "commit is required"}, 58 } 59 60 for _, c := range bad { 61 t.Run(fmt.Sprintf("bad_%s", c.casePath), func(t *testing.T) { 62 tc, err := dstest.NewTestCaseFromDir("testdata/" + c.casePath) 63 if err != nil { 64 t.Fatalf("creating test case: %s", err) 65 } 66 67 _, err = CreateDataset(ctx, fs, fs, event.NilBus, tc.Input, c.prev, privKey, SaveSwitches{ShouldRender: true}) 68 if err == nil { 69 t.Fatalf("CreateDataset expected error. got nil") 70 } 71 if err.Error() != c.err { 72 t.Errorf("error string mismatch.\nwant: %q\ngot: %q", c.err, err) 73 } 74 }) 75 } 76 77 good := []struct { 78 casePath string 79 resultPath string 80 prev *dataset.Dataset 81 repoFiles int // expected total count of files in repo after test execution 82 }{ 83 {"cities", 84 "/mem/QmcDaRWnD4e58HsM9rsT3SY5vfhK9hAqmFVppc71JnBEpi", nil, 8}, 85 {"all_fields", 86 "/mem/QmQ2yM2pCQbYcWxdP4R1yeVKBkkMR8ZjKr3x8RzJfrXQmu", nil, 18}, 87 {"cities_no_commit_title", 88 "/mem/QmVFBZpQ9k5w8jF9A1jTRfQ2YW5y4haSNjmqj5H9c23DqW", nil, 21}, 89 {"craigslist", 90 "/mem/QmXhRb415KTb3zxGDwk3iehZ8S8BFzsEM3YiPgkPQr6VKf", nil, 27}, 91 } 92 93 for _, c := range good { 94 t.Run(fmt.Sprintf("good_%s", c.casePath), func(t *testing.T) { 95 tc, err := dstest.NewTestCaseFromDir("testdata/" + c.casePath) 96 if err != nil { 97 t.Fatalf("creating test case: %s", err) 98 } 99 100 path, err := CreateDataset(ctx, fs, fs, event.NilBus, tc.Input, c.prev, privKey, SaveSwitches{ShouldRender: true}) 101 if err != nil { 102 t.Fatalf("CreateDataset: %s", err) 103 } 104 105 ds, err := LoadDataset(ctx, fs, path) 106 if err != nil { 107 t.Fatalf("loading dataset: %s", err.Error()) 108 } 109 ds.Path = "" 110 111 if tc.Expect != nil { 112 if diff := dstest.CompareDatasets(tc.Expect, ds); diff != "" { 113 t.Errorf("dataset comparison error (-want +got): %s", diff) 114 dstest.UpdateGoldenFileIfEnvVarSet(fmt.Sprintf("testdata/%s/expect.dataset.json", c.casePath), ds) 115 } 116 } 117 118 if c.resultPath != path { 119 t.Errorf("result path mismatch: expected: %q, got: %q", c.resultPath, path) 120 } 121 if c.repoFiles != len(fs.Files) { 122 t.Errorf("invalid number of mapstore entries. want %d, got %d", c.repoFiles, len(fs.Files)) 123 return 124 } 125 }) 126 } 127 128 t.Run("no_priv_key", func(t *testing.T) { 129 _, err := CreateDataset(ctx, fs, fs, event.NilBus, nil, nil, nil, SaveSwitches{ShouldRender: true}) 130 if err == nil { 131 t.Fatal("expected call without prvate key to error") 132 } 133 pkReqErrMsg := "private key is required to create a dataset" 134 if err.Error() != pkReqErrMsg { 135 t.Fatalf("error mismatch.\nwant: %q\ngot: %q", pkReqErrMsg, err.Error()) 136 } 137 }) 138 139 t.Run("no_body", func(t *testing.T) { 140 dsData, err := ioutil.ReadFile("testdata/cities/input.dataset.json") 141 if err != nil { 142 t.Errorf("case nil body and previous body files, error reading dataset file: %s", err.Error()) 143 } 144 ds := &dataset.Dataset{} 145 if err := ds.UnmarshalJSON(dsData); err != nil { 146 t.Errorf("case nil body and previous body files, error unmarshaling dataset file: %s", err.Error()) 147 } 148 149 if err != nil { 150 t.Errorf("case nil body and previous body files, error reading data file: %s", err.Error()) 151 } 152 // expectedErr := "bodyfile or previous bodyfile needed" 153 // _, err = CreateDataset(ctx, fs, fs, event.NilBus, ds, nil, privKey, SaveSwitches{ShouldRender: true}) 154 // if err.Error() != expectedErr { 155 // t.Errorf("case nil body and previous body files, error mismatch: expected '%s', got '%s'", expectedErr, err.Error()) 156 // } 157 }) 158 159 t.Run("no_changes", func(t *testing.T) { 160 expectedErr := "saving failed: no changes" 161 dsPrev, err := LoadDataset(ctx, fs, good[2].resultPath) 162 if err != nil { 163 t.Fatal(err) 164 } 165 166 ds := &dataset.Dataset{ 167 Name: "cities", 168 Commit: &dataset.Commit{}, 169 Structure: dsPrev.Structure, 170 Meta: dsPrev.Meta, 171 } 172 ds.PreviousPath = good[2].resultPath 173 if err != nil { 174 t.Fatalf("loading previous dataset file: %s", err.Error()) 175 } 176 177 bodyBytes, err := ioutil.ReadFile("testdata/cities/body.csv") 178 if err != nil { 179 t.Fatalf("reading body file: %s", err.Error()) 180 } 181 ds.SetBodyFile(qfs.NewMemfileBytes("body.csv", bodyBytes)) 182 183 path, err := CreateDataset(ctx, fs, fs, event.NilBus, ds, dsPrev, privKey, SaveSwitches{ShouldRender: true}) 184 if err != nil && err.Error() != expectedErr { 185 t.Fatalf("mismatch: expected %q, got %q", expectedErr, err.Error()) 186 } else if err == nil { 187 ds, err := LoadDataset(ctx, fs, path) 188 if err != nil { 189 t.Fatalf("loading dataset: %s", err.Error()) 190 } 191 192 t.Fatalf("CreateDataset expected error got 'nil'. commit: %v", ds.Commit) 193 } 194 195 if len(fs.Files) != 27 { 196 t.Errorf("invalid number of entries. want %d got %d", 27, len(fs.Files)) 197 _, err := fs.Print() 198 if err != nil { 199 panic(err) 200 } 201 } 202 }) 203 204 // case: previous dataset isn't valid 205 } 206 207 func TestDatasetSaveCustomTimestamp(t *testing.T) { 208 ctx := context.Background() 209 fs := qfs.NewMemFS() 210 privKey := testkeys.GetKeyData(10).PrivKey 211 212 // use a custom timestamp in local zone. should be converted to UTC for saving 213 ts := time.Date(2100, 1, 2, 3, 4, 5, 6, time.Local) 214 215 ds := &dataset.Dataset{ 216 Commit: &dataset.Commit{ 217 Timestamp: ts, 218 }, 219 Structure: &dataset.Structure{Format: "json", Schema: dataset.BaseSchemaArray}, 220 } 221 ds.SetBodyFile(qfs.NewMemfileBytes("/body.json", []byte(`[]`))) 222 223 path, err := CreateDataset(ctx, fs, fs, event.NilBus, ds, nil, privKey, SaveSwitches{}) 224 if err != nil { 225 t.Fatal(err) 226 } 227 228 got, err := LoadDataset(ctx, fs, path) 229 if err != nil { 230 t.Fatal(err) 231 } 232 233 if !ts.In(time.UTC).Equal(got.Commit.Timestamp) { 234 t.Errorf("result timestamp mismatch.\nwant: %q\ngot: %q", ts.In(time.UTC), got.Commit.Timestamp) 235 } 236 } 237 238 func TestDatasetSaveEvents(t *testing.T) { 239 ctx, cancel := context.WithCancel(context.Background()) 240 defer cancel() 241 242 fs := qfs.NewMemFS() 243 privKey := testkeys.GetKeyData(10).PrivKey 244 bus := event.NewBus(ctx) 245 246 fired := map[event.Type]int{} 247 bus.SubscribeTypes(func(ctx context.Context, e event.Event) error { 248 fired[e.Type]++ 249 return nil 250 }, 251 event.ETDatasetSaveStarted, 252 event.ETDatasetSaveProgress, 253 event.ETDatasetSaveCompleted, 254 ) 255 256 ds := &dataset.Dataset{ 257 Commit: &dataset.Commit{ 258 Timestamp: time.Date(2100, 1, 2, 3, 4, 5, 6, time.Local), 259 }, 260 Structure: &dataset.Structure{Format: "json", Schema: dataset.BaseSchemaArray}, 261 } 262 ds.SetBodyFile(qfs.NewMemfileBytes("/body.json", []byte(`[]`))) 263 264 if _, err := CreateDataset(ctx, fs, fs, bus, ds, nil, privKey, SaveSwitches{}); err != nil { 265 t.Fatal(err) 266 } 267 268 expect := map[event.Type]int{ 269 event.ETDatasetSaveStarted: 1, 270 event.ETDatasetSaveProgress: 3, 271 event.ETDatasetSaveCompleted: 1, 272 } 273 274 if diff := cmp.Diff(expect, fired); diff != "" { 275 t.Errorf("fired event count mismatch. (-want +got):%s\n", diff) 276 } 277 } 278 279 // Test that if the body is too large, the commit message just assumes the body changed 280 func TestCreateDatasetBodyTooLarge(t *testing.T) { 281 ctx := context.Background() 282 fs := qfs.NewMemFS() 283 284 prevTs := Timestamp 285 defer func() { Timestamp = prevTs }() 286 Timestamp = func() time.Time { return time.Date(2001, 01, 01, 01, 01, 01, 01, time.UTC) } 287 288 // Set the limit for the body to be 100 bytes 289 prevBodySizeLimit := BodySizeSmallEnoughToDiff 290 defer func() { BodySizeSmallEnoughToDiff = prevBodySizeLimit }() 291 BodySizeSmallEnoughToDiff = 100 292 293 privKey := testkeys.GetKeyData(10).PrivKey 294 295 // Need a previous commit, otherwise we just get the "created dataset" message 296 prevDs := dataset.Dataset{ 297 Commit: &dataset.Commit{}, 298 Structure: &dataset.Structure{ 299 Format: "csv", 300 Schema: tabular.BaseTabularSchema, 301 }, 302 } 303 304 testBodyPath, _ := filepath.Abs("testdata/movies/body.csv") 305 testBodyBytes, _ := ioutil.ReadFile(testBodyPath) 306 307 // Create a new version and add the body 308 nextDs := dataset.Dataset{ 309 Commit: &dataset.Commit{}, 310 Structure: &dataset.Structure{ 311 Format: "csv", 312 Schema: tabular.BaseTabularSchema, 313 }, 314 } 315 nextDs.SetBodyFile(qfs.NewMemfileBytes(testBodyPath, testBodyBytes)) 316 317 path, err := CreateDataset(ctx, fs, fs, event.NilBus, &nextDs, &prevDs, privKey, SaveSwitches{ShouldRender: true}) 318 if err != nil { 319 t.Fatalf("CreateDataset: %s", err) 320 } 321 322 // Load the created dataset to inspect the commit message 323 got, err := LoadDataset(ctx, fs, path) 324 if err != nil { 325 t.Fatalf("LoadDataset: %s", err) 326 } 327 328 expect := dstest.LoadGoldenFile(t, "testdata/movies/expect.dataset.json") 329 if diff := dstest.CompareDatasets(expect, got); diff != "" { 330 t.Errorf("result mismatch (-want +got):%s\n", diff) 331 dstest.UpdateGoldenFileIfEnvVarSet("testdata/movies/expect.dataset.json", got) 332 } 333 } 334 335 func TestWriteDataset(t *testing.T) { 336 ctx := context.Background() 337 fs := qfs.NewMemFS() 338 prev := Timestamp 339 defer func() { Timestamp = prev }() 340 Timestamp = func() time.Time { return time.Date(2001, 01, 01, 01, 01, 01, 01, time.UTC) } 341 342 // These tests are using hard-coded ids that require this exact peer's private key. 343 pk := testkeys.GetKeyData(10).PrivKey 344 345 if _, err := WriteDataset(ctx, fs, fs, nil, &dataset.Dataset{}, event.NilBus, pk, SaveSwitches{Pin: true}); err == nil || err.Error() != "cannot save empty dataset" { 346 t.Errorf("didn't reject empty dataset: %s", err) 347 } 348 349 cases := []struct { 350 casePath string 351 repoFiles int // expected total count of files in repo after test execution 352 err string 353 }{ 354 // TODO (b5) - these are *very* close, need to be fixed 355 // {"cities", 6, ""}, // dataset, commit, structure, meta, viz, body 356 // {"all_fields", 14, ""}, // dataset, commit, structure, meta, viz, viz_script, transform, transform_script, SAME BODY as cities -> gets de-duped 357 } 358 359 for i, c := range cases { 360 tc, err := dstest.NewTestCaseFromDir("testdata/" + c.casePath) 361 if err != nil { 362 t.Errorf("%s: error creating test case: %s", c.casePath, err) 363 continue 364 } 365 366 ds := tc.Input 367 368 got, err := WriteDataset(ctx, fs, fs, nil, ds, event.NilBus, pk, SaveSwitches{Pin: true}) 369 if !(err == nil && c.err == "" || err != nil && err.Error() == c.err) { 370 t.Errorf("case %d error mismatch. expected: '%s', got: '%s'", i, c.err, err) 371 continue 372 } 373 374 // total count expected of files in repo after test execution 375 if len(fs.Files) != c.repoFiles { 376 t.Errorf("case expected %d invalid number of entries: %d != %d", i, c.repoFiles, len(fs.Files)) 377 str, err := fs.Print() 378 if err != nil { 379 panic(err) 380 } 381 t.Log(str) 382 continue 383 } 384 385 got = PackageFilepath(fs, got, PackageFileDataset) 386 387 f, err := fs.Get(ctx, got) 388 if err != nil { 389 t.Errorf("error getting dataset file: %s", err.Error()) 390 continue 391 } 392 393 ref := &dataset.Dataset{} 394 if err := json.NewDecoder(f).Decode(ref); err != nil { 395 t.Errorf("error decoding dataset json: %s", err.Error()) 396 continue 397 } 398 399 if ref.Transform != nil { 400 if ref.Transform.IsEmpty() { 401 t.Errorf("expected stored dataset.Transform to be populated") 402 } 403 ds.Transform.Assign(dataset.NewTransformRef(ref.Transform.Path)) 404 } 405 if ref.Meta != nil { 406 if !ref.Meta.IsEmpty() { 407 t.Errorf("expected stored dataset.Meta to be a reference") 408 } 409 // Abstract transforms aren't loaded 410 ds.Meta.Assign(dataset.NewMetaRef(ref.Meta.Path)) 411 } 412 if ref.Structure != nil { 413 if !ref.Structure.IsEmpty() { 414 t.Errorf("expected stored dataset.Structure to be a reference") 415 } 416 ds.Structure.Assign(dataset.NewStructureRef(ref.Structure.Path)) 417 } 418 if ref.Viz != nil { 419 if ref.Viz.IsEmpty() { 420 t.Errorf("expected stored dataset.Viz to be populated") 421 } 422 ds.Viz.Assign(dataset.NewVizRef(ref.Viz.Path)) 423 } 424 ds.BodyPath = ref.BodyPath 425 426 ds.Assign(dataset.NewDatasetRef(got)) 427 result, err := LoadDataset(ctx, fs, got) 428 if err != nil { 429 t.Errorf("case %d unexpected error loading dataset: %s", i, err) 430 continue 431 } 432 433 if diff := dstest.CompareDatasets(ds, result); diff != "" { 434 t.Errorf("case %d comparison mismatch: (-want +got):\n%s", i, diff) 435 436 d1, _ := ds.MarshalJSON() 437 t.Log(string(d1)) 438 439 d, _ := result.MarshalJSON() 440 t.Log(string(d)) 441 continue 442 } 443 } 444 } 445 446 func TestGenerateCommitMessage(t *testing.T) { 447 badCases := []struct { 448 description string 449 prev, ds *dataset.Dataset 450 force bool 451 errMsg string 452 }{ 453 { 454 "no changes from one dataset version to next", 455 &dataset.Dataset{Meta: &dataset.Meta{Title: "same dataset"}}, 456 &dataset.Dataset{Meta: &dataset.Meta{Title: "same dataset"}}, 457 false, 458 "no changes", 459 }, 460 } 461 462 ctx := context.Background() 463 fs := qfs.NewMemFS() 464 465 for _, c := range badCases { 466 t.Run(fmt.Sprintf("%s", c.description), func(t *testing.T) { 467 _, _, err := generateCommitDescriptions(ctx, fs, c.ds, c.prev, BodySame, c.force) 468 if err == nil { 469 t.Errorf("error expected, did not get one") 470 } else if c.errMsg != err.Error() { 471 t.Errorf("error mismatch\nexpect: %s\ngot: %s", c.errMsg, err.Error()) 472 } 473 }) 474 } 475 476 goodCases := []struct { 477 description string 478 prev, ds *dataset.Dataset 479 force bool 480 expectShort string 481 expectLong string 482 }{ 483 { 484 "empty previous and non-empty dataset", 485 &dataset.Dataset{}, 486 &dataset.Dataset{Meta: &dataset.Meta{Title: "new dataset"}}, 487 false, 488 "created dataset", 489 "created dataset", 490 }, 491 { 492 "title changes from previous", 493 &dataset.Dataset{Meta: &dataset.Meta{Title: "new dataset"}}, 494 &dataset.Dataset{Meta: &dataset.Meta{Title: "changes to dataset"}}, 495 false, 496 "meta updated title", 497 "meta:\n\tupdated title", 498 }, 499 { 500 "same dataset but force is true", 501 &dataset.Dataset{Meta: &dataset.Meta{Title: "same dataset"}}, 502 &dataset.Dataset{Meta: &dataset.Meta{Title: "same dataset"}}, 503 true, 504 "forced update", 505 "forced update", 506 }, 507 { 508 "structure sets the headerRow config option", 509 &dataset.Dataset{Structure: &dataset.Structure{ 510 FormatConfig: map[string]interface{}{ 511 "headerRow": false, 512 }, 513 }}, 514 &dataset.Dataset{Structure: &dataset.Structure{ 515 FormatConfig: map[string]interface{}{ 516 "headerRow": true, 517 }, 518 }}, 519 false, 520 "structure updated formatConfig.headerRow", 521 "structure:\n\tupdated formatConfig.headerRow", 522 }, 523 { 524 "readme modified", 525 &dataset.Dataset{Readme: &dataset.Readme{ 526 Format: "md", 527 Text: "# hello\n\ncontent\n\n", 528 }}, 529 &dataset.Dataset{Readme: &dataset.Readme{ 530 Format: "md", 531 Text: "# hello\n\ncontent\n\nanother line\n\n", 532 }}, 533 false, 534 // TODO(dustmop): Should mention the line added. 535 "readme updated text", 536 "readme:\n\tupdated text", 537 }, 538 { 539 "body with a small number of changes", 540 &dataset.Dataset{ 541 Structure: &dataset.Structure{Format: "json"}, 542 Body: toqtype.MustParseJSONAsArray(`[ 543 { "fruit": "apple", "color": "red" }, 544 { "fruit": "banana", "color": "yellow" }, 545 { "fruit": "cherry", "color": "red" } 546 ]`), 547 }, 548 &dataset.Dataset{ 549 Structure: &dataset.Structure{Format: "json"}, 550 Body: toqtype.MustParseJSONAsArray(`[ 551 { "fruit": "apple", "color": "red" }, 552 { "fruit": "blueberry", "color": "blue" }, 553 { "fruit": "cherry", "color": "red" }, 554 { "fruit": "durian", "color": "green" } 555 ]`), 556 }, 557 false, 558 "body updated row 1 and added row 3", 559 "body:\n\tupdated row 1\n\tadded row 3", 560 }, 561 { 562 "body with lots of changes", 563 &dataset.Dataset{ 564 Structure: &dataset.Structure{Format: "csv"}, 565 Body: toqtype.MustParseCsvAsArray(`one,two,3 566 four,five,6 567 seven,eight,9 568 ten,eleven,12 569 thirteen,fourteen,15 570 sixteen,seventeen,18 571 nineteen,twenty,21 572 twenty-two,twenty-three,24 573 twenty-five,twenty-six,27 574 twenty-eight,twenty-nine,30`), 575 }, 576 &dataset.Dataset{ 577 Structure: &dataset.Structure{Format: "csv"}, 578 Body: toqtype.MustParseCsvAsArray(`one,two,3 579 four,five,6 580 seven,eight,cat 581 dog,eleven,12 582 thirteen,eel,15 583 sixteen,seventeen,100 584 frog,twenty,21 585 twenty-two,twenty-three,24 586 twenty-five,giraffe,200 587 hen,twenty-nine,30`), 588 }, 589 false, 590 "body changed by 19%", 591 "body:\n\tchanged by 19%", 592 }, 593 { 594 "meta and structure and readme changes", 595 &dataset.Dataset{ 596 Meta: &dataset.Meta{Title: "new dataset"}, 597 Structure: &dataset.Structure{ 598 FormatConfig: map[string]interface{}{ 599 "headerRow": false, 600 }, 601 }, 602 Readme: &dataset.Readme{ 603 Format: "md", 604 Text: "# hello\n\ncontent\n\n", 605 }, 606 }, 607 &dataset.Dataset{ 608 Meta: &dataset.Meta{Title: "changes to dataset"}, 609 Structure: &dataset.Structure{ 610 FormatConfig: map[string]interface{}{ 611 "headerRow": true, 612 }, 613 }, 614 Readme: &dataset.Readme{ 615 Format: "md", 616 Text: "# hello\n\ncontent\n\nanother line\n\n", 617 }, 618 }, 619 false, 620 "updated meta, structure, and readme", 621 "meta:\n\tupdated title\nstructure:\n\tupdated formatConfig.headerRow\nreadme:\n\tupdated text", 622 }, 623 { 624 "meta removed but everything else is the same", 625 &dataset.Dataset{ 626 Meta: &dataset.Meta{Title: "new dataset"}, 627 Structure: &dataset.Structure{ 628 FormatConfig: map[string]interface{}{ 629 "headerRow": false, 630 }, 631 }, 632 Readme: &dataset.Readme{ 633 Format: "md", 634 Text: "# hello\n\ncontent\n\n", 635 }, 636 }, 637 &dataset.Dataset{ 638 Structure: &dataset.Structure{ 639 FormatConfig: map[string]interface{}{ 640 "headerRow": false, 641 }, 642 }, 643 Readme: &dataset.Readme{ 644 Format: "md", 645 Text: "# hello\n\ncontent\n\n", 646 }, 647 }, 648 false, 649 "meta removed", 650 "meta removed", 651 }, 652 { 653 "meta has multiple parts changed", 654 &dataset.Dataset{ 655 Meta: &dataset.Meta{ 656 Title: "new dataset", 657 Description: "TODO: Add description", 658 }, 659 }, 660 &dataset.Dataset{ 661 Meta: &dataset.Meta{ 662 Title: "changes to dataset", 663 HomeURL: "http://example.com", 664 Description: "this is a great description", 665 }, 666 }, 667 false, 668 "meta updated 3 fields", 669 "meta:\n\tupdated description\n\tadded homeURL\n\tupdated title", 670 }, 671 { 672 "meta and body changed", 673 &dataset.Dataset{ 674 Meta: &dataset.Meta{ 675 Title: "new dataset", 676 Description: "TODO: Add description", 677 }, 678 Structure: &dataset.Structure{Format: "csv"}, 679 Body: toqtype.MustParseCsvAsArray(`one,two,3 680 four,five,6 681 seven,eight,9 682 ten,eleven,12 683 thirteen,fourteen,15 684 sixteen,seventeen,18 685 nineteen,twenty,21 686 twenty-two,twenty-three,24 687 twenty-five,twenty-six,27 688 twenty-eight,twenty-nine,30`), 689 }, 690 &dataset.Dataset{ 691 Meta: &dataset.Meta{ 692 Title: "changes to dataset", 693 HomeURL: "http://example.com", 694 Description: "this is a great description", 695 }, 696 Structure: &dataset.Structure{Format: "csv"}, 697 Body: toqtype.MustParseCsvAsArray(`one,two,3 698 four,five,6 699 something,eight,cat 700 dog,eleven,12 701 thirteen,eel,15 702 sixteen,60,100 703 frog,twenty,21 704 twenty-two,twenty-three,24 705 twenty-five,giraffe,200 706 hen,twenty-nine,30`), 707 }, 708 false, 709 "updated meta and body", 710 "meta:\n\tupdated description\n\tadded homeURL\n\tupdated title\nbody:\n\tchanged by 24%", 711 }, 712 { 713 "meta changed but body stays the same", 714 &dataset.Dataset{ 715 Meta: &dataset.Meta{ 716 Title: "new dataset", 717 }, 718 Structure: &dataset.Structure{Format: "csv"}, 719 Body: toqtype.MustParseCsvAsArray(`one,two,3 720 four,five,6 721 seven,eight,9 722 ten,eleven,12 723 thirteen,fourteen,15 724 sixteen,seventeen,18`), 725 }, 726 &dataset.Dataset{ 727 Meta: &dataset.Meta{ 728 Title: "dataset of a bunch of numbers", 729 }, 730 Structure: &dataset.Structure{Format: "csv"}, 731 Body: toqtype.MustParseCsvAsArray(`one,two,3 732 four,five,6 733 seven,eight,9 734 ten,eleven,12 735 thirteen,fourteen,15 736 sixteen,seventeen,18`), 737 }, 738 false, 739 "meta updated title", 740 "meta:\n\tupdated title", 741 }, 742 } 743 744 for _, c := range goodCases { 745 t.Run(c.description, func(t *testing.T) { 746 bodyAct := BodyDefault 747 if compareBody(c.prev.Body, c.ds.Body) { 748 bodyAct = BodySame 749 } 750 shortTitle, longMessage, err := generateCommitDescriptions(ctx, fs, c.ds, c.prev, bodyAct, c.force) 751 if err != nil { 752 t.Errorf("error: %s", err.Error()) 753 return 754 } 755 if c.expectShort != shortTitle { 756 t.Errorf("short message mismatch\nexpect: %s\ngot: %s", c.expectShort, shortTitle) 757 } 758 if c.expectLong != longMessage { 759 t.Errorf("long message mismatch\nexpect: %s\ngot: %s", c.expectLong, longMessage) 760 } 761 }) 762 } 763 } 764 765 func compareBody(left, right interface{}) bool { 766 leftData, err := json.Marshal(left) 767 if err != nil { 768 panic(err) 769 } 770 rightData, err := json.Marshal(right) 771 if err != nil { 772 panic(err) 773 } 774 return string(leftData) == string(rightData) 775 } 776 777 func TestGetDepth(t *testing.T) { 778 good := []struct { 779 val string 780 expected int 781 }{ 782 {`"foo"`, 0}, 783 {`1000`, 0}, 784 {`true`, 0}, 785 {`{"foo": "bar"}`, 1}, 786 {`{"foo": "bar","bar": "baz"}`, 1}, 787 {`{ 788 "foo":"bar", 789 "bar": "baz", 790 "baz": { 791 "foo": "bar", 792 "bar": "baz" 793 } 794 }`, 2}, 795 {`{ 796 "foo": "bar", 797 "bar": "baz", 798 "baz": { 799 "foo": "bar", 800 "bar": [ 801 "foo", 802 "bar", 803 "baz" 804 ] 805 } 806 }`, 3}, 807 {`{ 808 "foo": "bar", 809 "bar": "baz", 810 "baz": [ 811 "foo", 812 "bar", 813 "baz" 814 ] 815 }`, 2}, 816 {`["foo","bar","baz"]`, 1}, 817 {`["a","b",[1, 2, 3]]`, 2}, 818 {`[ 819 "foo", 820 "bar", 821 {"baz": { 822 "foo": "bar", 823 "bar": "baz", 824 "baz": "foo" 825 } 826 } 827 ]`, 3}, 828 {`{ 829 "foo": "bar", 830 "foo1": { 831 "foo2": 2, 832 "foo3": false 833 }, 834 "foo4": "bar", 835 "foo5": { 836 "foo6": 100 837 } 838 }`, 2}, 839 {`{ 840 "foo": "bar", 841 "foo1": "bar", 842 "foo2": { 843 "foo3": 100, 844 "foo4": 100 845 }, 846 "foo5": { 847 "foo6": 100, 848 "foo7": 100, 849 "foo8": 100, 850 "foo9": 100 851 }, 852 "foo10": { 853 "foo11": 100, 854 "foo12": 100 855 } 856 }`, 2}, 857 } 858 859 var val interface{} 860 861 for i, c := range good { 862 if err := json.Unmarshal([]byte(c.val), &val); err != nil { 863 t.Fatal(err) 864 } 865 depth := getDepth(val) 866 if c.expected != depth { 867 t.Errorf("case %d, depth mismatch, expected %d, got %d", i, c.expected, depth) 868 } 869 } 870 } 871 872 func GenerateDataset(b *testing.B, sampleSize int, format string) (int, *dataset.Dataset) { 873 ds := &dataset.Dataset{ 874 Commit: &dataset.Commit{ 875 Timestamp: time.Date(2017, 1, 1, 1, 0, 0, 0, time.UTC), 876 Title: "initial commit", 877 }, 878 Meta: &dataset.Meta{ 879 Title: "performance benchmark data", 880 }, 881 Structure: &dataset.Structure{ 882 Format: format, 883 FormatConfig: map[string]interface{}{ 884 "headerRow": true, 885 "lazyQuotes": true, 886 }, 887 Schema: map[string]interface{}{ 888 "type": "array", 889 "items": map[string]interface{}{ 890 "type": "array", 891 "items": []interface{}{ 892 map[string]interface{}{"title": "uuid", "type": "string"}, 893 map[string]interface{}{"title": "ingest", "type": "string"}, 894 map[string]interface{}{"title": "occurred", "type": "string"}, 895 map[string]interface{}{"title": "raw_data", "type": "string"}, 896 }, 897 }, 898 }, 899 }, 900 } 901 902 gen, err := generate.NewTabularGenerator(ds.Structure) 903 if err != nil { 904 b.Errorf("error creating generator: %s", err.Error()) 905 } 906 defer gen.Close() 907 908 bodyBuffer := &bytes.Buffer{} 909 w, err := dsio.NewEntryWriter(ds.Structure, bodyBuffer) 910 if err != nil { 911 b.Fatalf("creating entry writer: %s", err.Error()) 912 } 913 914 for i := 0; i < sampleSize; i++ { 915 ent, err := gen.ReadEntry() 916 if err != nil { 917 b.Fatalf("reading generator entry: %s", err.Error()) 918 } 919 w.WriteEntry(ent) 920 } 921 if err := w.Close(); err != nil { 922 b.Fatalf("closing writer: %s", err) 923 } 924 925 fileName := fmt.Sprintf("body.%s", ds.Structure.Format) 926 ds.SetBodyFile(qfs.NewMemfileReader(fileName, bodyBuffer)) 927 928 return bodyBuffer.Len(), ds 929 } 930 931 func BenchmarkCreateDatasetCSV(b *testing.B) { 932 // ~1 MB, ~12 MB, ~25 MB, ~50 MB, ~500 MB, ~1GB 933 for _, sampleSize := range []int{10000, 100000, 250000, 500000, 1000000} { 934 ctx := context.Background() 935 fs := qfs.NewMemFS() 936 prev := Timestamp 937 938 defer func() { Timestamp = prev }() 939 Timestamp = func() time.Time { return time.Date(2001, 01, 01, 01, 01, 01, 01, time.UTC) } 940 941 // These tests are using hard-coded ids that require this exact peer's private key. 942 privKey := testkeys.GetKeyData(10).PrivKey 943 944 b.Run(fmt.Sprintf("sample size %v", sampleSize), func(b *testing.B) { 945 b.ResetTimer() 946 for i := 0; i < b.N; i++ { 947 b.StopTimer() 948 949 _, dataset := GenerateDataset(b, sampleSize, "csv") 950 951 b.StartTimer() 952 _, err := CreateDataset(ctx, fs, fs, event.NilBus, dataset, nil, privKey, SaveSwitches{ShouldRender: true}) 953 if err != nil { 954 b.Errorf("error creating dataset: %s", err.Error()) 955 } 956 } 957 b.StopTimer() 958 }) 959 } 960 } 961 962 // validateDataset is a stripped copy of base/dsfs/setErrCount 963 func validateDataset(ds *dataset.Dataset, data qfs.File) error { 964 defer data.Close() 965 966 er, err := dsio.NewEntryReader(ds.Structure, data) 967 if err != nil { 968 return err 969 } 970 971 _, err = validate.EntryReader(er) 972 973 return err 974 } 975 976 func BenchmarkValidateCSV(b *testing.B) { 977 // ~1 MB, ~12 MB, ~25 MB, ~50 MB, ~500 MB, ~1GB 978 for _, sampleSize := range []int{10000, 100000, 250000, 500000, 1000000, 10000000} { 979 b.Run(fmt.Sprintf("sample size %v", sampleSize), func(b *testing.B) { 980 b.ResetTimer() 981 for i := 0; i < b.N; i++ { 982 b.StopTimer() 983 _, dataset := GenerateDataset(b, sampleSize, "csv") 984 985 b.StartTimer() 986 err := validateDataset(dataset, dataset.BodyFile()) 987 if err != nil { 988 b.Errorf("error creating dataset: %s", err.Error()) 989 } 990 } 991 b.StopTimer() 992 }) 993 } 994 } 995 996 func BenchmarkValidateJSON(b *testing.B) { 997 // ~1 MB, ~12 MB, ~25 MB, ~50 MB, ~500 MB, ~1GB 998 for _, sampleSize := range []int{10000, 100000, 250000, 500000, 1000000, 10000000} { 999 b.Run(fmt.Sprintf("sample size %v", sampleSize), func(b *testing.B) { 1000 b.ResetTimer() 1001 for i := 0; i < b.N; i++ { 1002 b.StopTimer() 1003 _, dataset := GenerateDataset(b, sampleSize, "json") 1004 1005 b.StartTimer() 1006 err := validateDataset(dataset, dataset.BodyFile()) 1007 if err != nil { 1008 b.Errorf("error creating dataset: %s", err.Error()) 1009 } 1010 } 1011 b.StopTimer() 1012 }) 1013 } 1014 }