github.com/fraugster/parquet-go@v0.12.0/floor/writer_test.go (about) 1 package floor 2 3 import ( 4 "fmt" 5 "os" 6 "testing" 7 "time" 8 9 "github.com/davecgh/go-spew/spew" 10 goparquet "github.com/fraugster/parquet-go" 11 "github.com/fraugster/parquet-go/floor/interfaces" 12 "github.com/fraugster/parquet-go/parquet" 13 "github.com/fraugster/parquet-go/parquetschema" 14 "github.com/stretchr/testify/require" 15 ) 16 17 func TestDecodeStruct(t *testing.T) { 18 testData := []struct { 19 Input interface{} 20 ExpectedOutput map[string]interface{} 21 ExpectErr bool 22 Schema string 23 }{ 24 { 25 Input: struct{ Foo int16 }{Foo: 42}, 26 ExpectedOutput: map[string]interface{}{"foo": int32(42)}, 27 ExpectErr: false, 28 Schema: `message test { required int32 foo; }`, 29 }, 30 { 31 Input: struct{ Foo int }{Foo: 43}, 32 ExpectedOutput: map[string]interface{}{"foo": int32(43)}, 33 ExpectErr: false, 34 Schema: `message test { required int32 foo; }`, 35 }, 36 { 37 Input: struct{ Foo int8 }{Foo: 44}, 38 ExpectedOutput: map[string]interface{}{"foo": int32(44)}, 39 ExpectErr: false, 40 Schema: `message test { required int32 foo; }`, 41 }, 42 { 43 Input: struct{ Foo int32 }{Foo: 100000}, 44 ExpectedOutput: map[string]interface{}{"foo": int32(100000)}, 45 ExpectErr: false, 46 Schema: `message test { required int32 foo; }`, 47 }, 48 { 49 Input: struct{ Foo uint64 }{Foo: 1125899906842624}, 50 ExpectedOutput: map[string]interface{}{"foo": int64(1125899906842624)}, 51 ExpectErr: false, 52 Schema: `message test { required int64 foo; }`, 53 }, 54 { 55 Input: struct{ Foo uint }{Foo: 200000}, 56 ExpectedOutput: map[string]interface{}{"foo": int32(200000)}, 57 ExpectErr: false, 58 Schema: `message test { required int32 foo; }`, 59 }, 60 { 61 Input: struct{ Foo float32 }{Foo: 42.5}, 62 ExpectedOutput: map[string]interface{}{"foo": float32(42.5)}, 63 ExpectErr: false, 64 Schema: `message test { required float foo; }`, 65 }, 66 { 67 Input: struct{ Foo float64 }{Foo: 23.5}, 68 ExpectedOutput: map[string]interface{}{"foo": float64(23.5)}, 69 ExpectErr: false, 70 Schema: `message test { required double foo; }`, 71 }, 72 { 73 Input: struct{ Foo byte }{Foo: 1}, 74 ExpectedOutput: map[string]interface{}{"foo": int32(1)}, 75 ExpectErr: false, 76 Schema: `message test { required int32 foo; }`, 77 }, 78 { 79 Input: struct{ Foo string }{Foo: "bar"}, 80 ExpectedOutput: map[string]interface{}{"foo": []byte("bar")}, 81 ExpectErr: false, 82 Schema: `message test { required binary foo (STRING); }`, 83 }, 84 { 85 Input: struct{ Foo *string }{Foo: new(string)}, 86 ExpectedOutput: map[string]interface{}{"foo": []byte("")}, 87 ExpectErr: false, 88 Schema: `message test { optional binary foo (STRING); }`, 89 }, 90 { 91 Input: struct{ Foo *string }{}, 92 ExpectedOutput: map[string]interface{}{}, 93 ExpectErr: false, 94 Schema: `message test { optional binary foo (STRING); }`, 95 }, 96 { 97 Input: int(23), 98 ExpectedOutput: nil, 99 ExpectErr: true, 100 Schema: `message test { }`, 101 }, 102 { 103 Input: struct { 104 Foo struct { 105 Bar int64 106 } 107 Quux *bool 108 Baz uint32 109 Blub bool 110 }{}, 111 ExpectedOutput: map[string]interface{}{"foo": map[string]interface{}{"bar": int64(0)}, "baz": int64(0), "blub": false}, 112 ExpectErr: false, 113 Schema: `message test { required group foo { required int64 bar; } required int64 baz; optional boolean quux; required boolean blub; }`, 114 }, 115 { 116 Input: struct { 117 Foo []bool 118 }{ 119 Foo: []bool{false, true, false}, 120 }, 121 ExpectedOutput: map[string]interface{}{ 122 "foo": map[string]interface{}{ 123 "list": []map[string]interface{}{ 124 {"element": false}, 125 {"element": true}, 126 {"element": false}, 127 }, 128 }, 129 }, 130 ExpectErr: false, 131 Schema: `message test { 132 required group foo (LIST) { 133 repeated group list { 134 required boolean element; 135 } 136 } 137 }`, 138 }, 139 { 140 Input: struct { 141 Foo [5]uint16 142 }{ 143 Foo: [5]uint16{1, 1, 2, 3, 5}, 144 }, 145 ExpectedOutput: map[string]interface{}{ 146 "foo": map[string]interface{}{ 147 "list": []map[string]interface{}{ 148 {"element": int32(1)}, 149 {"element": int32(1)}, 150 {"element": int32(2)}, 151 {"element": int32(3)}, 152 {"element": int32(5)}, 153 }, 154 }, 155 }, 156 ExpectErr: false, 157 Schema: `message test { 158 required group foo (LIST) { 159 repeated group list { 160 required int32 element; 161 } 162 } 163 }`, 164 }, 165 { 166 Input: struct { 167 Foo map[string]int64 168 }{ 169 Foo: map[string]int64{ 170 "hello": int64(23), 171 }, 172 }, 173 ExpectedOutput: map[string]interface{}{ 174 "foo": map[string]interface{}{ 175 "key_value": []map[string]interface{}{ 176 {"key": []byte("hello"), "value": int64(23)}, 177 }, 178 }, 179 }, 180 ExpectErr: false, 181 Schema: `message test { 182 required group foo (MAP) { 183 repeated group key_value { 184 required binary key (STRING); 185 required int64 value; 186 } 187 } 188 }`, 189 }, 190 { 191 Input: struct { 192 C chan int 193 }{}, 194 ExpectedOutput: map[string]interface{}{}, 195 ExpectErr: false, 196 Schema: `message foo { }`, 197 }, 198 { 199 Input: struct { 200 Foo struct { 201 C chan int 202 Bar int 203 } 204 }{}, 205 ExpectedOutput: map[string]interface{}{"foo": map[string]interface{}{"bar": int64(0)}}, 206 ExpectErr: false, 207 Schema: `message foo { required group foo { optional int64 bar; } }`, 208 }, 209 { 210 Input: struct { 211 Foo []chan int 212 }{Foo: []chan int{make(chan int)}}, 213 ExpectedOutput: nil, 214 ExpectErr: true, 215 Schema: `message foo { required group foo (LIST) { repeated group list { required int32 element; } } }`, 216 }, 217 { 218 Input: &struct { 219 Bla int 220 }{Bla: 616}, 221 ExpectedOutput: map[string]interface{}{"bla": int32(616)}, 222 ExpectErr: false, 223 Schema: `message test { required int32 bla; }`, 224 }, 225 { 226 Input: (*struct { 227 Bla int 228 })(nil), 229 ExpectedOutput: nil, 230 ExpectErr: true, 231 Schema: `message test { required int32 bla; }`, 232 }, 233 { 234 Input: struct { 235 Date time.Time 236 }{ 237 Date: time.Date(1970, 1, 10, 0, 0, 0, 0, time.UTC), 238 }, 239 ExpectedOutput: map[string]interface{}{"date": int32(9)}, 240 ExpectErr: false, 241 Schema: `message test { required int32 date (DATE); }`, 242 }, 243 { 244 Input: struct { 245 Date time.Time 246 }{ 247 Date: time.Date(1970, 1, 12, 23, 59, 59, 0, time.UTC), 248 }, 249 ExpectedOutput: map[string]interface{}{"date": int32(11)}, 250 ExpectErr: false, 251 Schema: `message test { required int32 date (DATE); }`, 252 }, 253 { 254 Input: struct { 255 TS time.Time 256 }{ 257 TS: time.Date(1970, 1, 1, 0, 0, 23, 0, time.UTC), 258 }, 259 ExpectedOutput: map[string]interface{}{"ts": int64(23000)}, 260 ExpectErr: false, 261 Schema: `message test { required int64 ts (TIMESTAMP(MILLIS, false)); }`, 262 }, 263 { 264 Input: struct { 265 TS time.Time 266 }{ 267 TS: time.Date(1970, 1, 1, 0, 0, 24, 0, time.UTC), 268 }, 269 ExpectedOutput: map[string]interface{}{"ts": int64(24000000)}, 270 ExpectErr: false, 271 Schema: `message test { required int64 ts (TIMESTAMP(MICROS, false)); }`, 272 }, 273 { 274 Input: struct { 275 TS time.Time 276 }{ 277 TS: time.Date(1970, 1, 1, 0, 0, 25, 2000, time.UTC), 278 }, 279 ExpectedOutput: map[string]interface{}{"ts": int64(25000002000)}, 280 ExpectErr: false, 281 Schema: `message test { required int64 ts (TIMESTAMP(NANOS, false)); }`, 282 }, 283 { 284 Input: struct { 285 Lunch Time 286 }{ 287 Lunch: MustTime(NewTime(12, 30, 0, 0)), 288 }, 289 ExpectedOutput: map[string]interface{}{"lunch": int32(45000000)}, 290 ExpectErr: false, 291 Schema: `message test { required int32 lunch (TIME(MILLIS, false)); }`, 292 }, 293 { 294 Input: struct { 295 BeddyByes Time 296 }{ 297 BeddyByes: MustTime(NewTime(20, 15, 30, 0)), 298 }, 299 ExpectedOutput: map[string]interface{}{"beddybyes": int64(72930000000)}, 300 ExpectErr: false, 301 Schema: `message test { required int64 beddybyes (TIME(MICROS, false)); }`, 302 }, 303 { 304 Input: struct { 305 WakeyWakey Time 306 }{ 307 WakeyWakey: MustTime(NewTime(7, 5, 59, 0)), 308 }, 309 ExpectedOutput: map[string]interface{}{"wakeywakey": int64(25559000000000)}, 310 ExpectErr: false, 311 Schema: `message test { required int64 wakeywakey (TIME(NANOS, false)); }`, 312 }, 313 { 314 Input: struct { 315 Foo string 316 Times []interface{} 317 }{ 318 Foo: "bar", 319 Times: []interface{}{"2021-10-29T20:06:47.960577000Z", 1635542684, 1635542811912, 1635542811912010, 1635542854925031000}, 320 }, 321 ExpectedOutput: map[string]interface{}{ 322 "foo": []byte("bar"), 323 "times": map[string]interface{}{ 324 "list": []map[string]interface{}{ 325 {"element": goparquet.TimeToInt96(time.Date(2021, 10, 29, 20, 06, 47, 960577000, time.UTC))}, 326 {"element": goparquet.TimeToInt96(time.Date(2021, 10, 29, 21, 24, 44, 0, time.UTC))}, 327 {"element": goparquet.TimeToInt96(time.Date(2021, 10, 29, 21, 26, 51, 912000000, time.UTC))}, 328 {"element": goparquet.TimeToInt96(time.Date(2021, 10, 29, 21, 26, 51, 912010000, time.UTC))}, 329 {"element": goparquet.TimeToInt96(time.Date(2021, 10, 29, 21, 27, 34, 925031000, time.UTC))}, 330 }, 331 }, 332 }, 333 ExpectErr: false, 334 Schema: `message test { 335 optional binary foo (STRING); 336 optional group times (LIST) { 337 repeated group list { 338 required int96 element; 339 } 340 } 341 }`, 342 }, 343 { 344 Input: map[string]interface{}{"foo": "bar"}, 345 ExpectedOutput: map[string]interface{}{"foo": []byte("bar")}, 346 ExpectErr: false, 347 Schema: `message test { optional binary foo (STRING); }`, 348 }, 349 { 350 Input: map[string]interface{}{"foo": "bar", "data": map[string]interface{}{"foo": "bar"}}, 351 ExpectedOutput: map[string]interface{}{ 352 "foo": []byte("bar"), 353 "data": map[string]interface{}{ 354 "key_value": []map[string]interface{}{ 355 {"key": []byte("foo"), "value": []byte("bar")}, 356 }, 357 }}, 358 ExpectErr: false, 359 Schema: `message test { 360 optional binary foo (STRING); 361 required group data (MAP) { 362 repeated group key_value { 363 required binary key (STRING); 364 optional binary value (STRING); 365 } 366 } 367 }`, 368 }, 369 } 370 371 for idx, tt := range testData { 372 t.Run(fmt.Sprintf("test_%d", idx), func(t *testing.T) { 373 sd, err := parquetschema.ParseSchemaDefinition(tt.Schema) 374 require.NoError(t, err, "%d. parsing schema failed", idx) 375 obj := interfaces.NewMarshallObject(nil) 376 m := &reflectMarshaller{obj: tt.Input, schemaDef: sd} 377 err = m.MarshalParquet(obj) 378 if tt.ExpectErr { 379 require.Error(t, err, "%d. expected error, but found none", idx) 380 } else { 381 require.NoError(t, err, "%d. expected no error, but found one", idx) 382 require.Equal(t, tt.ExpectedOutput, obj.GetData(), "%d. output mismatch; schema = %s", idx, tt.Schema) 383 } 384 }) 385 } 386 } 387 388 func TestWriteFile(t *testing.T) { 389 _ = os.Mkdir("files", 0755) 390 391 sd, err := parquetschema.ParseSchemaDefinition( 392 `message test_msg { 393 required int64 foo; 394 optional binary bar (STRING); 395 optional group baz (LIST) { 396 repeated group list { 397 required int32 element; 398 } 399 } 400 optional int64 ts (TIMESTAMP(NANOS, false)); 401 optional int64 time (TIME(NANOS, false)); 402 }`) 403 require.NoError(t, err, "parsing schema definition failed") 404 405 t.Logf("schema definition: %s", spew.Sdump(sd)) 406 407 hlWriter, err := NewFileWriter( 408 "files/test.parquet", 409 goparquet.WithCompressionCodec(parquet.CompressionCodec_SNAPPY), 410 goparquet.WithCreator("floor-unittest"), 411 goparquet.WithSchemaDefinition(sd), 412 ) 413 require.NoError(t, err, "creating new file writer failed") 414 415 data := []struct { 416 Foo int64 417 Bar *string 418 Baz []int32 419 Time *Time 420 }{ 421 {23, strPtr("hello!"), []int32{23}, nil}, 422 {42, strPtr("world!"), []int32{1, 1, 2, 3, 5}, nil}, 423 {500, nil, nil, nil}, 424 {750, strPtr("empty"), nil, nil}, 425 {1000, strPtr("bye!"), []int32{2, 3, 5, 7, 11}, timePtr(MustTime(NewTime(16, 20, 0, 0)))}, 426 } 427 428 for idx, d := range data { 429 require.NoError(t, hlWriter.Write(d), "%d. Write failed", idx) 430 } 431 432 require.NoError(t, hlWriter.Close()) 433 434 rf, err := os.Open("files/test.parquet") 435 require.NoError(t, err) 436 437 reader, err := goparquet.NewFileReader(rf) 438 require.NoError(t, err) 439 440 n, err := reader.RowGroupNumRows() 441 require.NoError(t, err) 442 require.Equal(t, int64(len(data)), n) 443 444 expectedData := []map[string]interface{}{ 445 { 446 "foo": int64(23), 447 "bar": []byte("hello!"), 448 "baz": map[string]interface{}{ 449 "list": []map[string]interface{}{ 450 {"element": int32(23)}, 451 }, 452 }, 453 }, 454 { 455 "foo": int64(42), 456 "bar": []byte("world!"), 457 "baz": map[string]interface{}{ 458 "list": []map[string]interface{}{ 459 {"element": int32(1)}, 460 {"element": int32(1)}, 461 {"element": int32(2)}, 462 {"element": int32(3)}, 463 {"element": int32(5)}, 464 }, 465 }, 466 }, 467 { 468 "foo": int64(500), 469 }, 470 { 471 "foo": int64(750), 472 "bar": []byte("empty"), 473 }, 474 { 475 "foo": int64(1000), 476 "bar": []byte("bye!"), 477 "baz": map[string]interface{}{ 478 "list": []map[string]interface{}{ 479 {"element": int32(2)}, 480 {"element": int32(3)}, 481 {"element": int32(5)}, 482 {"element": int32(7)}, 483 {"element": int32(11)}, 484 }, 485 }, 486 "time": int64(58800000000000), 487 }, 488 } 489 490 n, err = reader.RowGroupNumRows() 491 require.NoError(t, err) 492 493 for i := int64(0); i < n; i++ { 494 data, err := reader.NextRow() 495 require.NoError(t, err, "%d. reading record failed") 496 require.Equal(t, expectedData[i], data, "%d. data in parquet file differs from what's expected", i) 497 } 498 } 499 500 func timePtr(t Time) *Time { 501 return &t 502 } 503 504 func strPtr(s string) *string { 505 return &s 506 } 507 508 func TestWriteReadByteArrays(t *testing.T) { 509 _ = os.Mkdir("files", 0755) 510 511 sd, err := parquetschema.ParseSchemaDefinition( 512 `message test_msg { 513 required fixed_len_byte_array(4) foo; 514 optional fixed_len_byte_array(4) bar; 515 required binary baz; 516 optional binary quux; 517 }`) 518 require.NoError(t, err, "parsing schema definition failed") 519 520 t.Logf("schema definition: %s", spew.Sdump(sd)) 521 522 hlWriter, err := NewFileWriter( 523 "files/bytearrays.parquet", 524 goparquet.WithCompressionCodec(parquet.CompressionCodec_SNAPPY), 525 goparquet.WithCreator("floor-unittest"), 526 goparquet.WithSchemaDefinition(sd), 527 ) 528 require.NoError(t, err, "creating new file writer failed") 529 530 type testData struct { 531 Foo [4]byte 532 Bar []byte 533 Baz []byte 534 Quux []byte 535 } 536 537 data := []testData{ 538 {Foo: [4]byte{0, 1, 2, 3}, Bar: []byte{4, 5, 6, 7}, Baz: []byte{99}, Quux: []byte{100, 101}}, 539 {Foo: [4]byte{8, 9, 10, 11}, Baz: []byte("hello world!")}, 540 {Foo: [4]byte{12, 13, 14, 15}, Bar: []byte{16, 17, 18, 19}, Baz: []byte{155, 156, 157, 158, 159, 160}, Quux: []byte{180, 181, 182, 183}}, 541 } 542 543 for idx, record := range data { 544 require.NoError(t, hlWriter.Write(record), "%d. writing record failed", idx) 545 } 546 require.NoError(t, hlWriter.Close()) 547 548 hlReader, err := NewFileReader("files/bytearrays.parquet") 549 require.NoError(t, err, "creating new file reader failed") 550 551 var readData []testData 552 553 for hlReader.Next() { 554 var record testData 555 require.NoError(t, hlReader.Scan(&record)) 556 readData = append(readData, record) 557 } 558 559 require.Equal(t, data, readData, "data written and read back doesn't match") 560 } 561 562 func TestWriteFileWithMarshallerThenReadWithUnmarshaller(t *testing.T) { 563 _ = os.Mkdir("files", 0755) 564 565 sd, err := parquetschema.ParseSchemaDefinition( 566 `message test_msg { 567 required binary foo (STRING); 568 required int64 bar; 569 required group baz (LIST) { 570 repeated group list { 571 required group element { 572 required int64 quux; 573 } 574 } 575 } 576 }`) 577 require.NoError(t, err, "parsing schema definition failed") 578 579 t.Logf("schema definition: %s", spew.Sdump(sd)) 580 581 hlWriter, err := NewFileWriter( 582 "files/marshaller.parquet", 583 goparquet.WithCompressionCodec(parquet.CompressionCodec_SNAPPY), 584 goparquet.WithCreator("floor-unittest"), 585 goparquet.WithSchemaDefinition(sd), 586 ) 587 require.NoError(t, err, "creating new file writer failed") 588 589 testData := &marshTestRecord{foo: "hello world!", bar: 1234567, baz: []marshTestGroup{{quux: 23}, {quux: 42}}} 590 require.NoError(t, hlWriter.Write(testData), "writing object using marshaller failed") 591 592 require.NoError(t, hlWriter.Close()) 593 594 hlReader, err := NewFileReader("files/marshaller.parquet") 595 require.NoError(t, err, "opening file failed") 596 597 require.True(t, hlReader.Next()) 598 599 readData := &marshTestRecord{} 600 require.NoError(t, hlReader.Scan(readData)) 601 602 require.Equal(t, testData, readData, "written and read data don't match") 603 require.NoError(t, hlReader.Close()) 604 } 605 606 func BenchmarkWriteFile(b *testing.B) { 607 _ = os.Mkdir("files", 0755) 608 609 sd, err := parquetschema.ParseSchemaDefinition( 610 `message test_msg { 611 required int64 foo; 612 optional binary bar (STRING); 613 optional group baz (LIST) { 614 repeated group list { 615 required int32 element; 616 } 617 } 618 optional int64 ts (TIMESTAMP(NANOS, false)); 619 optional int64 time (TIME(NANOS, false)); 620 }`) 621 require.NoError(b, err, "parsing schema definition failed") 622 623 hlWriter, err := NewFileWriter( 624 "files/test.parquet", 625 goparquet.WithCompressionCodec(parquet.CompressionCodec_SNAPPY), 626 goparquet.WithCreator("floor-unittest"), 627 goparquet.WithSchemaDefinition(sd), 628 ) 629 require.NoError(b, err, "creating new file writer failed") 630 defer func() { 631 require.NoError(b, hlWriter.Close()) 632 }() 633 634 data := struct { 635 Foo int64 636 Bar *string 637 Baz []int32 638 Time *Time 639 }{ 640 42, strPtr("world!"), []int32{1, 1, 2, 3, 5}, nil, 641 } 642 643 b.ResetTimer() 644 for i := 0; i < b.N; i++ { 645 _ = hlWriter.Write(data) 646 } 647 } 648 649 type marshTestRecord struct { 650 foo string 651 bar int64 652 baz []marshTestGroup 653 } 654 655 type marshTestGroup struct { 656 quux int64 657 } 658 659 func (r *marshTestRecord) MarshalParquet(obj interfaces.MarshalObject) error { 660 obj.AddField("foo").SetByteArray([]byte(r.foo)) 661 obj.AddField("bar").SetInt64(r.bar) 662 list := obj.AddField("baz").List() 663 for _, b := range r.baz { 664 grp := list.Add().Group() 665 grp.AddField("quux").SetInt64(b.quux) 666 } 667 668 return nil 669 } 670 671 func (r *marshTestRecord) UnmarshalParquet(obj interfaces.UnmarshalObject) error { 672 foo := obj.GetField("foo") 673 if err := foo.Error(); err != nil { 674 return err 675 } 676 677 fooValue, err := foo.ByteArray() 678 if err != nil { 679 return err 680 } 681 682 r.foo = string(fooValue) 683 684 bar := obj.GetField("bar") 685 if err = bar.Error(); err != nil { 686 return err 687 } 688 689 barValue, err := bar.Int64() 690 if err != nil { 691 return err 692 } 693 694 r.bar = barValue 695 696 bazList, err := obj.GetField("baz").List() 697 if err != nil { 698 return err 699 } 700 701 for bazList.Next() { 702 v, err := bazList.Value() 703 if err != nil { 704 return err 705 } 706 707 grp, err := v.Group() 708 if err != nil { 709 return err 710 } 711 712 quux, err := grp.GetField("quux").Int64() 713 if err != nil { 714 return err 715 } 716 717 r.baz = append(r.baz, marshTestGroup{quux: quux}) 718 } 719 720 return nil 721 } 722 723 type testMsg struct { 724 ID int64 725 Foobar []string 726 } 727 728 func (m *testMsg) MarshalParquet(obj interfaces.MarshalObject) error { 729 obj.AddField("id").SetInt64(m.ID) 730 list := obj.AddField("foobar").List() 731 for _, elem := range m.Foobar { 732 list.Add().SetByteArray([]byte(elem)) 733 } 734 return nil 735 } 736 737 func (m *testMsg) UnmarshalParquet(obj interfaces.UnmarshalObject) error { 738 id, err := obj.GetField("id").Int64() 739 if err != nil { 740 return err 741 } 742 m.ID = id 743 list, err := obj.GetField("foobar").List() 744 if err == interfaces.ErrFieldNotPresent { 745 return nil 746 } 747 if err != nil { 748 return err 749 } 750 751 for list.Next() { 752 v, err := list.Value() 753 if err != nil { 754 return err 755 } 756 vv, err := v.ByteArray() 757 if err != nil { 758 return err 759 } 760 m.Foobar = append(m.Foobar, string(vv)) 761 } 762 763 return nil 764 } 765 766 func TestWriteEmptyList(t *testing.T) { 767 _ = os.Mkdir("files", 0755) 768 769 sd, err := parquetschema.ParseSchemaDefinition( 770 `message test_msg { 771 required int64 id; 772 optional group foobar (LIST) { 773 repeated group list { 774 required binary element (STRING); 775 } 776 } 777 }`) 778 require.NoError(t, err, "parsing schema definition failed") 779 780 t.Logf("schema definition: %s", spew.Sdump(sd)) 781 782 hlWriter, err := NewFileWriter( 783 "files/emptylist.parquet", 784 goparquet.WithCompressionCodec(parquet.CompressionCodec_SNAPPY), 785 goparquet.WithCreator("floor-unittest"), 786 goparquet.WithSchemaDefinition(sd), 787 ) 788 require.NoError(t, err, "creating new file writer failed") 789 790 testData1 := &testMsg{ID: 23, Foobar: nil} 791 require.NoError(t, hlWriter.Write(testData1), "writing object using marshaller failed") 792 793 testData2 := &testMsg{ID: 42, Foobar: []string{"so", "long", "and", "thanks", "for", "all", "the", "fish"}} 794 require.NoError(t, hlWriter.Write(testData2), "writing object using marshaller failed") 795 796 require.NoError(t, hlWriter.Write(testData1), "writing object using marshaller failed") 797 require.NoError(t, hlWriter.Write(testData2), "writing object using marshaller failed") 798 799 require.NoError(t, hlWriter.Close()) 800 801 hlReader, err := NewFileReader("files/emptylist.parquet") 802 require.NoError(t, err, "opening file failed") 803 804 require.True(t, hlReader.Next()) 805 806 readData1 := &testMsg{} 807 require.NoError(t, hlReader.Scan(readData1)) 808 require.Equal(t, testData1, readData1, "written and read data don't match") 809 810 readData2 := &testMsg{} 811 require.NoError(t, hlReader.Scan(readData2)) 812 require.Equal(t, testData1, readData2, "written and read data don't match") 813 814 readData3 := &testMsg{} 815 require.NoError(t, hlReader.Scan(readData3)) 816 require.Equal(t, testData1, readData3, "written and read data don't match") 817 818 readData4 := &testMsg{} 819 require.NoError(t, hlReader.Scan(readData4)) 820 require.Equal(t, testData1, readData4, "written and read data don't match") 821 822 require.NoError(t, hlReader.Close()) 823 }