github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/convert_test.go (about) 1 package parquet_test 2 3 import ( 4 "reflect" 5 "testing" 6 "time" 7 8 "github.com/parquet-go/parquet-go" 9 "github.com/parquet-go/parquet-go/deprecated" 10 ) 11 12 type AddressBook1 struct { 13 Owner string `parquet:"owner,zstd"` 14 OwnerPhoneNumbers []string `parquet:"ownerPhoneNumbers,gzip"` 15 } 16 17 type AddressBook2 struct { 18 Owner string `parquet:"owner,zstd"` 19 OwnerPhoneNumbers []string `parquet:"ownerPhoneNumbers,gzip"` 20 Contacts []Contact `parquet:"contacts"` 21 Extra string `parquet:"extra"` 22 } 23 24 type AddressBook3 struct { 25 Owner string `parquet:"owner,zstd"` 26 Contacts []Contact2 `parquet:"contacts"` 27 } 28 29 type Contact2 struct { 30 Name string `parquet:"name"` 31 PhoneNumbers []string `parquet:"phoneNumbers,zstd"` 32 Addresses []string `parquet:"addresses,zstd"` 33 } 34 35 type AddressBook4 struct { 36 Owner string `parquet:"owner,zstd"` 37 Contacts []Contact2 `parquet:"contacts"` 38 Extra string `parquet:"extra"` 39 } 40 41 type SimpleNumber struct { 42 Number *int64 `parquet:"number,optional"` 43 } 44 45 type SimpleContact struct { 46 Numbers []SimpleNumber `parquet:"numbers"` 47 } 48 49 type SimpleAddressBook struct { 50 Name string 51 Contact SimpleContact 52 } 53 54 type SimpleAddressBook2 struct { 55 Name string 56 Contact SimpleContact 57 Extra string 58 } 59 60 type ListOfIDs struct { 61 IDs []uint64 62 } 63 64 var conversionTests = [...]struct { 65 scenario string 66 from interface{} 67 to interface{} 68 }{ 69 { 70 scenario: "convert between rows which have the same schema", 71 from: AddressBook{ 72 Owner: "Julien Le Dem", 73 OwnerPhoneNumbers: []string{ 74 "555 123 4567", 75 "555 666 1337", 76 }, 77 Contacts: []Contact{ 78 { 79 Name: "Dmitriy Ryaboy", 80 PhoneNumber: "555 987 6543", 81 }, 82 { 83 Name: "Chris Aniszczyk", 84 }, 85 }, 86 }, 87 to: AddressBook{ 88 Owner: "Julien Le Dem", 89 OwnerPhoneNumbers: []string{ 90 "555 123 4567", 91 "555 666 1337", 92 }, 93 Contacts: []Contact{ 94 { 95 Name: "Dmitriy Ryaboy", 96 PhoneNumber: "555 987 6543", 97 }, 98 { 99 Name: "Chris Aniszczyk", 100 }, 101 }, 102 }, 103 }, 104 105 { 106 scenario: "missing column", 107 from: struct{ FirstName, LastName string }{FirstName: "Luke", LastName: "Skywalker"}, 108 to: struct{ LastName string }{LastName: "Skywalker"}, 109 }, 110 111 { 112 scenario: "missing optional column", 113 from: struct { 114 FirstName *string 115 LastName string 116 }{FirstName: newString("Luke"), LastName: "Skywalker"}, 117 to: struct{ LastName string }{LastName: "Skywalker"}, 118 }, 119 120 { 121 scenario: "missing repeated column", 122 from: struct { 123 ID uint64 124 Names []string 125 }{ID: 42, Names: []string{"me", "myself", "I"}}, 126 to: struct{ ID uint64 }{ID: 42}, 127 }, 128 129 { 130 scenario: "extra column", 131 from: struct{ LastName string }{LastName: "Skywalker"}, 132 to: struct{ FirstName, LastName string }{LastName: "Skywalker"}, 133 }, 134 135 { 136 scenario: "extra optional column", 137 from: struct{ ID uint64 }{ID: 2}, 138 to: struct { 139 ID uint64 140 Details *struct{ FirstName, LastName string } 141 }{ID: 2, Details: nil}, 142 }, 143 144 { 145 scenario: "extra repeated column", 146 from: struct{ ID uint64 }{ID: 1}, 147 to: struct { 148 ID uint64 149 Names []string 150 }{ID: 1, Names: []string{}}, 151 }, 152 153 { 154 scenario: "extra required column from repeated", 155 from: struct{ ListOfIDs ListOfIDs }{ 156 ListOfIDs: ListOfIDs{IDs: []uint64{0, 1, 2}}, 157 }, 158 to: struct { 159 MainID uint64 160 ListOfIDs ListOfIDs 161 }{ 162 ListOfIDs: ListOfIDs{IDs: []uint64{0, 1, 2}}, 163 }, 164 }, 165 166 { 167 scenario: "extra fields in repeated group", 168 from: struct{ Books []AddressBook1 }{ 169 Books: []AddressBook1{ 170 { 171 Owner: "me", 172 OwnerPhoneNumbers: []string{"123-456-7890", "321-654-0987"}, 173 }, 174 { 175 Owner: "you", 176 OwnerPhoneNumbers: []string{"000-000-0000"}, 177 }, 178 }, 179 }, 180 to: struct{ Books []AddressBook2 }{ 181 Books: []AddressBook2{ 182 { 183 Owner: "me", 184 OwnerPhoneNumbers: []string{"123-456-7890", "321-654-0987"}, 185 Contacts: []Contact{}, 186 }, 187 { 188 Owner: "you", 189 OwnerPhoneNumbers: []string{"000-000-0000"}, 190 Contacts: []Contact{}, 191 }, 192 }, 193 }, 194 }, 195 196 { 197 scenario: "extra column on complex struct", 198 from: AddressBook{ 199 Owner: "Julien Le Dem", 200 OwnerPhoneNumbers: []string{}, 201 Contacts: []Contact{ 202 { 203 Name: "Dmitriy Ryaboy", 204 PhoneNumber: "555 987 6543", 205 }, 206 { 207 Name: "Chris Aniszczyk", 208 }, 209 }, 210 }, 211 to: AddressBook2{ 212 Owner: "Julien Le Dem", 213 OwnerPhoneNumbers: []string{}, 214 Contacts: []Contact{ 215 { 216 Name: "Dmitriy Ryaboy", 217 PhoneNumber: "555 987 6543", 218 }, 219 { 220 Name: "Chris Aniszczyk", 221 }, 222 }, 223 }, 224 }, 225 226 { 227 scenario: "required to optional leaf", 228 from: struct{ Name string }{Name: "Luke"}, 229 to: struct{ Name *string }{Name: newString("Luke")}, 230 }, 231 232 { 233 scenario: "required to repeated leaf", 234 from: struct{ Name string }{Name: "Luke"}, 235 to: struct{ Name []string }{Name: []string{"Luke"}}, 236 }, 237 238 { 239 scenario: "optional to required leaf", 240 from: struct{ Name *string }{Name: newString("Luke")}, 241 to: struct{ Name string }{Name: "Luke"}, 242 }, 243 244 { 245 scenario: "optional to repeated leaf", 246 from: struct{ Name *string }{Name: newString("Luke")}, 247 to: struct{ Name []string }{Name: []string{"Luke"}}, 248 }, 249 250 { 251 scenario: "optional to repeated leaf (null)", 252 from: struct{ Name *string }{Name: nil}, 253 to: struct{ Name []string }{Name: []string{}}, 254 }, 255 256 { 257 scenario: "repeated to required leaf", 258 from: struct{ Name []string }{Name: []string{"Luke", "Han", "Leia"}}, 259 to: struct{ Name string }{Name: "Luke"}, 260 }, 261 262 { 263 scenario: "repeated to optional leaf", 264 from: struct{ Name []string }{Name: []string{"Luke", "Han", "Leia"}}, 265 to: struct{ Name *string }{Name: newString("Luke")}, 266 }, 267 268 { 269 scenario: "required to optional group", 270 from: struct{ Book AddressBook }{ 271 Book: AddressBook{ 272 Owner: "Julien Le Dem", 273 OwnerPhoneNumbers: []string{ 274 "555 123 4567", 275 "555 666 1337", 276 }, 277 Contacts: []Contact{ 278 { 279 Name: "Dmitriy Ryaboy", 280 PhoneNumber: "555 987 6543", 281 }, 282 { 283 Name: "Chris Aniszczyk", 284 }, 285 }, 286 }, 287 }, 288 to: struct{ Book *AddressBook }{ 289 Book: &AddressBook{ 290 Owner: "Julien Le Dem", 291 OwnerPhoneNumbers: []string{ 292 "555 123 4567", 293 "555 666 1337", 294 }, 295 Contacts: []Contact{ 296 { 297 Name: "Dmitriy Ryaboy", 298 PhoneNumber: "555 987 6543", 299 }, 300 { 301 Name: "Chris Aniszczyk", 302 }, 303 }, 304 }, 305 }, 306 }, 307 308 { 309 scenario: "required to optional group (empty)", 310 from: struct{ Book AddressBook }{ 311 Book: AddressBook{}, 312 }, 313 to: struct{ Book *AddressBook }{ 314 Book: &AddressBook{ 315 OwnerPhoneNumbers: []string{}, 316 Contacts: []Contact{}, 317 }, 318 }, 319 }, 320 321 { 322 scenario: "optional to required group (null)", 323 from: struct{ Book *AddressBook }{ 324 Book: nil, 325 }, 326 to: struct{ Book AddressBook }{ 327 Book: AddressBook{ 328 OwnerPhoneNumbers: []string{}, 329 Contacts: []Contact{}, 330 }, 331 }, 332 }, 333 334 { 335 scenario: "optional to repeated group (null)", 336 from: struct{ Book *AddressBook }{Book: nil}, 337 to: struct{ Book []AddressBook }{Book: []AddressBook{}}, 338 }, 339 340 { 341 scenario: "optional to repeated optional group (null)", 342 from: struct{ Book *AddressBook }{Book: nil}, 343 to: struct{ Book []*AddressBook }{Book: []*AddressBook{}}, 344 }, 345 346 { 347 scenario: "handle nested repeated elements during conversion", 348 from: AddressBook3{ 349 Owner: "Julien Le Dem", 350 Contacts: []Contact2{ 351 { 352 Name: "Dmitriy Ryaboy", 353 PhoneNumbers: []string{ 354 "555 987 6543", 355 "555 123 4567", 356 }, 357 Addresses: []string{}, 358 }, 359 { 360 Name: "Chris Aniszczyk", 361 PhoneNumbers: []string{ 362 "555 345 8129", 363 }, 364 Addresses: []string{ 365 "42 Wallaby Way Sydney", 366 "1 White House Way", 367 }, 368 }, 369 { 370 Name: "Bob Ross", 371 PhoneNumbers: []string{ 372 "555 198 3628", 373 }, 374 Addresses: []string{ 375 "::1", 376 }, 377 }, 378 }, 379 }, 380 to: AddressBook4{ 381 Owner: "Julien Le Dem", 382 Contacts: []Contact2{ 383 { 384 Name: "Dmitriy Ryaboy", 385 PhoneNumbers: []string{ 386 "555 987 6543", 387 "555 123 4567", 388 }, 389 Addresses: []string{}, 390 }, 391 { 392 Name: "Chris Aniszczyk", 393 PhoneNumbers: []string{ 394 "555 345 8129", 395 }, 396 Addresses: []string{ 397 "42 Wallaby Way Sydney", 398 "1 White House Way", 399 }, 400 }, 401 { 402 Name: "Bob Ross", 403 PhoneNumbers: []string{ 404 "555 198 3628", 405 }, 406 Addresses: []string{ 407 "::1", 408 }, 409 }, 410 }, 411 Extra: "", 412 }, 413 }, 414 415 { 416 scenario: "handle nested repeated elements during conversion", 417 from: SimpleAddressBook{ 418 Name: "New Contact", 419 Contact: SimpleContact{ 420 Numbers: []SimpleNumber{ 421 { 422 Number: nil, 423 }, 424 { 425 Number: newInt64(1329), 426 }, 427 }, 428 }, 429 }, 430 to: SimpleAddressBook2{ 431 Name: "New Contact", 432 Contact: SimpleContact{ 433 Numbers: []SimpleNumber{ 434 { 435 Number: nil, 436 }, 437 { 438 Number: newInt64(1329), 439 }, 440 }, 441 }, 442 Extra: "", 443 }, 444 }, 445 } 446 447 func TestConvert(t *testing.T) { 448 for _, test := range conversionTests { 449 t.Run(test.scenario, func(t *testing.T) { 450 to := parquet.SchemaOf(test.to) 451 from := parquet.SchemaOf(test.from) 452 453 conv, err := parquet.Convert(to, from) 454 if err != nil { 455 t.Fatal(err) 456 } 457 458 row := from.Deconstruct(nil, test.from) 459 rowbuf := []parquet.Row{row} 460 n, err := conv.Convert(rowbuf) 461 if err != nil { 462 t.Fatal(err) 463 } 464 if n != 1 { 465 t.Errorf("wrong number of rows got converted: want=1 got=%d", n) 466 } 467 row = rowbuf[0] 468 469 value := reflect.New(reflect.TypeOf(test.to)) 470 if err := to.Reconstruct(value.Interface(), row); err != nil { 471 t.Fatal(err) 472 } 473 474 value = value.Elem() 475 if !reflect.DeepEqual(value.Interface(), test.to) { 476 t.Errorf("converted value mismatch:\nwant = %#v\ngot = %#v", test.to, value.Interface()) 477 } 478 }) 479 } 480 } 481 482 func newInt64(i int64) *int64 { return &i } 483 func newString(s string) *string { return &s } 484 485 func TestConvertValue(t *testing.T) { 486 now := time.Unix(42, 0) 487 ms := now.UnixMilli() 488 us := now.UnixMicro() 489 ns := now.UnixNano() 490 491 msType := parquet.Timestamp(parquet.Millisecond).Type() 492 msVal := parquet.ValueOf(ms) 493 if msVal.Int64() != ms { 494 t.Errorf("converted value mismatch:\nwant = %+v\ngot = %+v", ms, msVal.Int64()) 495 } 496 497 usType := parquet.Timestamp(parquet.Microsecond).Type() 498 usVal := parquet.ValueOf(us) 499 if usVal.Int64() != us { 500 t.Errorf("converted value mismatch:\nwant = %+v\ngot = %+v", us, usVal.Int64()) 501 } 502 503 nsType := parquet.Timestamp(parquet.Nanosecond).Type() 504 nsVal := parquet.ValueOf(ns) 505 if nsVal.Int64() != ns { 506 t.Errorf("converted value mismatch:\nwant = %+v\ngot = %+v", ns, nsVal.Int64()) 507 } 508 509 var timestampConversionTests = [...]struct { 510 scenario string 511 fromType parquet.Type 512 fromValue parquet.Value 513 toType parquet.Type 514 toValue parquet.Value 515 }{ 516 { 517 scenario: "true to boolean", 518 fromType: parquet.BooleanType, 519 fromValue: parquet.BooleanValue(true), 520 toType: parquet.BooleanType, 521 toValue: parquet.BooleanValue(true), 522 }, 523 524 { 525 scenario: "true to int32", 526 fromType: parquet.BooleanType, 527 fromValue: parquet.BooleanValue(true), 528 toType: parquet.Int32Type, 529 toValue: parquet.Int32Value(1), 530 }, 531 532 { 533 scenario: "true to int64", 534 fromType: parquet.BooleanType, 535 fromValue: parquet.BooleanValue(true), 536 toType: parquet.Int64Type, 537 toValue: parquet.Int64Value(1), 538 }, 539 540 { 541 scenario: "true to int96", 542 fromType: parquet.BooleanType, 543 fromValue: parquet.BooleanValue(true), 544 toType: parquet.Int96Type, 545 toValue: parquet.Int96Value(deprecated.Int96{0: 1}), 546 }, 547 548 { 549 scenario: "true to float", 550 fromType: parquet.BooleanType, 551 fromValue: parquet.BooleanValue(true), 552 toType: parquet.FloatType, 553 toValue: parquet.FloatValue(1), 554 }, 555 556 { 557 scenario: "true to double", 558 fromType: parquet.BooleanType, 559 fromValue: parquet.BooleanValue(true), 560 toType: parquet.FloatType, 561 toValue: parquet.FloatValue(1), 562 }, 563 564 { 565 scenario: "true to byte array", 566 fromType: parquet.BooleanType, 567 fromValue: parquet.BooleanValue(true), 568 toType: parquet.ByteArrayType, 569 toValue: parquet.ByteArrayValue([]byte{1}), 570 }, 571 572 { 573 scenario: "true to fixed length byte array", 574 fromType: parquet.BooleanType, 575 fromValue: parquet.BooleanValue(true), 576 toType: parquet.FixedLenByteArrayType(4), 577 toValue: parquet.FixedLenByteArrayValue([]byte{1, 0, 0, 0}), 578 }, 579 580 { 581 scenario: "true to string", 582 fromType: parquet.BooleanType, 583 fromValue: parquet.BooleanValue(true), 584 toType: parquet.String().Type(), 585 toValue: parquet.ByteArrayValue([]byte(`true`)), 586 }, 587 588 { 589 scenario: "false to boolean", 590 fromType: parquet.BooleanType, 591 fromValue: parquet.BooleanValue(false), 592 toType: parquet.BooleanType, 593 toValue: parquet.BooleanValue(false), 594 }, 595 596 { 597 scenario: "false to int32", 598 fromType: parquet.BooleanType, 599 fromValue: parquet.BooleanValue(false), 600 toType: parquet.Int32Type, 601 toValue: parquet.Int32Value(0), 602 }, 603 604 { 605 scenario: "false to int64", 606 fromType: parquet.BooleanType, 607 fromValue: parquet.BooleanValue(false), 608 toType: parquet.Int64Type, 609 toValue: parquet.Int64Value(0), 610 }, 611 612 { 613 scenario: "false to int96", 614 fromType: parquet.BooleanType, 615 fromValue: parquet.BooleanValue(false), 616 toType: parquet.Int96Type, 617 toValue: parquet.Int96Value(deprecated.Int96{}), 618 }, 619 620 { 621 scenario: "false to float", 622 fromType: parquet.BooleanType, 623 fromValue: parquet.BooleanValue(false), 624 toType: parquet.FloatType, 625 toValue: parquet.FloatValue(0), 626 }, 627 628 { 629 scenario: "false to double", 630 fromType: parquet.BooleanType, 631 fromValue: parquet.BooleanValue(false), 632 toType: parquet.FloatType, 633 toValue: parquet.FloatValue(0), 634 }, 635 636 { 637 scenario: "false to byte array", 638 fromType: parquet.BooleanType, 639 fromValue: parquet.BooleanValue(false), 640 toType: parquet.ByteArrayType, 641 toValue: parquet.ByteArrayValue([]byte{0}), 642 }, 643 644 { 645 scenario: "false to fixed length byte array", 646 fromType: parquet.BooleanType, 647 fromValue: parquet.BooleanValue(false), 648 toType: parquet.FixedLenByteArrayType(4), 649 toValue: parquet.FixedLenByteArrayValue([]byte{0, 0, 0, 0}), 650 }, 651 652 { 653 scenario: "false to string", 654 fromType: parquet.BooleanType, 655 fromValue: parquet.BooleanValue(false), 656 toType: parquet.String().Type(), 657 toValue: parquet.ByteArrayValue([]byte(`false`)), 658 }, 659 660 { 661 scenario: "int32 to true", 662 fromType: parquet.Int32Type, 663 fromValue: parquet.Int32Value(10), 664 toType: parquet.BooleanType, 665 toValue: parquet.BooleanValue(true), 666 }, 667 668 { 669 scenario: "int32 to false", 670 fromType: parquet.Int32Type, 671 fromValue: parquet.Int32Value(0), 672 toType: parquet.BooleanType, 673 toValue: parquet.BooleanValue(false), 674 }, 675 676 { 677 scenario: "int32 to int32", 678 fromType: parquet.Int32Type, 679 fromValue: parquet.Int32Value(42), 680 toType: parquet.Int32Type, 681 toValue: parquet.Int32Value(42), 682 }, 683 684 { 685 scenario: "int32 to int64", 686 fromType: parquet.Int32Type, 687 fromValue: parquet.Int32Value(-21), 688 toType: parquet.Int64Type, 689 toValue: parquet.Int64Value(-21), 690 }, 691 692 { 693 scenario: "int32 to int96", 694 fromType: parquet.Int32Type, 695 fromValue: parquet.Int32Value(123), 696 toType: parquet.Int96Type, 697 toValue: parquet.Int96Value(deprecated.Int96{0: 123}), 698 }, 699 700 { 701 scenario: "int32 to float", 702 fromType: parquet.Int32Type, 703 fromValue: parquet.Int32Value(9), 704 toType: parquet.FloatType, 705 toValue: parquet.FloatValue(9), 706 }, 707 708 { 709 scenario: "int32 to double", 710 fromType: parquet.Int32Type, 711 fromValue: parquet.Int32Value(100), 712 toType: parquet.DoubleType, 713 toValue: parquet.DoubleValue(100), 714 }, 715 716 { 717 scenario: "int32 to byte array", 718 fromType: parquet.Int32Type, 719 fromValue: parquet.Int32Value(1 << 8), 720 toType: parquet.ByteArrayType, 721 toValue: parquet.ByteArrayValue([]byte{0, 1, 0, 0}), 722 }, 723 724 { 725 scenario: "int32 to fixed length byte array", 726 fromType: parquet.Int32Type, 727 fromValue: parquet.Int32Value(1 << 8), 728 toType: parquet.FixedLenByteArrayType(3), 729 toValue: parquet.FixedLenByteArrayValue([]byte{0, 1, 0}), 730 }, 731 732 { 733 scenario: "int32 to string", 734 fromType: parquet.Int32Type, 735 fromValue: parquet.Int32Value(12345), 736 toType: parquet.String().Type(), 737 toValue: parquet.ByteArrayValue([]byte(`12345`)), 738 }, 739 740 { 741 scenario: "int64 to true", 742 fromType: parquet.Int64Type, 743 fromValue: parquet.Int64Value(10), 744 toType: parquet.BooleanType, 745 toValue: parquet.BooleanValue(true), 746 }, 747 748 { 749 scenario: "int64 to false", 750 fromType: parquet.Int64Type, 751 fromValue: parquet.Int64Value(0), 752 toType: parquet.BooleanType, 753 toValue: parquet.BooleanValue(false), 754 }, 755 756 { 757 scenario: "int64 to int32", 758 fromType: parquet.Int64Type, 759 fromValue: parquet.Int64Value(-21), 760 toType: parquet.Int32Type, 761 toValue: parquet.Int32Value(-21), 762 }, 763 764 { 765 scenario: "int64 to int64", 766 fromType: parquet.Int64Type, 767 fromValue: parquet.Int64Value(42), 768 toType: parquet.Int64Type, 769 toValue: parquet.Int64Value(42), 770 }, 771 772 { 773 scenario: "int64 to int96", 774 fromType: parquet.Int64Type, 775 fromValue: parquet.Int64Value(123), 776 toType: parquet.Int96Type, 777 toValue: parquet.Int96Value(deprecated.Int96{0: 123}), 778 }, 779 780 { 781 scenario: "int64 to float", 782 fromType: parquet.Int64Type, 783 fromValue: parquet.Int64Value(9), 784 toType: parquet.FloatType, 785 toValue: parquet.FloatValue(9), 786 }, 787 788 { 789 scenario: "int64 to double", 790 fromType: parquet.Int64Type, 791 fromValue: parquet.Int64Value(100), 792 toType: parquet.DoubleType, 793 toValue: parquet.DoubleValue(100), 794 }, 795 796 { 797 scenario: "int64 to byte array", 798 fromType: parquet.Int64Type, 799 fromValue: parquet.Int64Value(1 << 8), 800 toType: parquet.ByteArrayType, 801 toValue: parquet.ByteArrayValue([]byte{0, 1, 0, 0, 0, 0, 0, 0}), 802 }, 803 804 { 805 scenario: "int64 to fixed length byte array", 806 fromType: parquet.Int64Type, 807 fromValue: parquet.Int64Value(1 << 8), 808 toType: parquet.FixedLenByteArrayType(3), 809 toValue: parquet.FixedLenByteArrayValue([]byte{0, 1, 0}), 810 }, 811 812 { 813 scenario: "int64 to string", 814 fromType: parquet.Int64Type, 815 fromValue: parquet.Int64Value(1234567890), 816 toType: parquet.String().Type(), 817 toValue: parquet.ByteArrayValue([]byte(`1234567890`)), 818 }, 819 820 { 821 scenario: "float to true", 822 fromType: parquet.FloatType, 823 fromValue: parquet.FloatValue(0.1), 824 toType: parquet.BooleanType, 825 toValue: parquet.BooleanValue(true), 826 }, 827 828 { 829 scenario: "float to false", 830 fromType: parquet.FloatType, 831 fromValue: parquet.FloatValue(0), 832 toType: parquet.BooleanType, 833 toValue: parquet.BooleanValue(false), 834 }, 835 836 { 837 scenario: "float to int32", 838 fromType: parquet.FloatType, 839 fromValue: parquet.FloatValue(9.9), 840 toType: parquet.Int32Type, 841 toValue: parquet.Int32Value(9), 842 }, 843 844 { 845 scenario: "float to int64", 846 fromType: parquet.FloatType, 847 fromValue: parquet.FloatValue(-1.5), 848 toType: parquet.Int64Type, 849 toValue: parquet.Int64Value(-1), 850 }, 851 852 { 853 scenario: "float to float", 854 fromType: parquet.FloatType, 855 fromValue: parquet.FloatValue(1.234), 856 toType: parquet.FloatType, 857 toValue: parquet.FloatValue(1.234), 858 }, 859 860 { 861 scenario: "float to double", 862 fromType: parquet.FloatType, 863 fromValue: parquet.FloatValue(-0.5), 864 toType: parquet.DoubleType, 865 toValue: parquet.DoubleValue(-0.5), 866 }, 867 868 { 869 scenario: "float to string", 870 fromType: parquet.FloatType, 871 fromValue: parquet.FloatValue(0.125), 872 toType: parquet.String().Type(), 873 toValue: parquet.ByteArrayValue([]byte(`0.125`)), 874 }, 875 876 { 877 scenario: "double to true", 878 fromType: parquet.DoubleType, 879 fromValue: parquet.DoubleValue(0.1), 880 toType: parquet.BooleanType, 881 toValue: parquet.BooleanValue(true), 882 }, 883 884 { 885 scenario: "double to false", 886 fromType: parquet.DoubleType, 887 fromValue: parquet.DoubleValue(0), 888 toType: parquet.BooleanType, 889 toValue: parquet.BooleanValue(false), 890 }, 891 892 { 893 scenario: "double to int32", 894 fromType: parquet.DoubleType, 895 fromValue: parquet.DoubleValue(9.9), 896 toType: parquet.Int32Type, 897 toValue: parquet.Int32Value(9), 898 }, 899 900 { 901 scenario: "double to int64", 902 fromType: parquet.DoubleType, 903 fromValue: parquet.DoubleValue(-1.5), 904 toType: parquet.Int64Type, 905 toValue: parquet.Int64Value(-1), 906 }, 907 908 { 909 scenario: "double to float", 910 fromType: parquet.DoubleType, 911 fromValue: parquet.DoubleValue(1.234), 912 toType: parquet.FloatType, 913 toValue: parquet.FloatValue(1.234), 914 }, 915 916 { 917 scenario: "double to double", 918 fromType: parquet.DoubleType, 919 fromValue: parquet.DoubleValue(-0.5), 920 toType: parquet.DoubleType, 921 toValue: parquet.DoubleValue(-0.5), 922 }, 923 924 { 925 scenario: "double to string", 926 fromType: parquet.DoubleType, 927 fromValue: parquet.DoubleValue(0.125), 928 toType: parquet.String().Type(), 929 toValue: parquet.ByteArrayValue([]byte(`0.125`)), 930 }, 931 932 { 933 scenario: "string to true", 934 fromType: parquet.String().Type(), 935 fromValue: parquet.ByteArrayValue([]byte(`true`)), 936 toType: parquet.BooleanType, 937 toValue: parquet.BooleanValue(true), 938 }, 939 940 { 941 scenario: "string to false", 942 fromType: parquet.String().Type(), 943 fromValue: parquet.ByteArrayValue([]byte(`false`)), 944 toType: parquet.BooleanType, 945 toValue: parquet.BooleanValue(false), 946 }, 947 948 { 949 scenario: "string to int32", 950 fromType: parquet.String().Type(), 951 fromValue: parquet.ByteArrayValue([]byte(`-21`)), 952 toType: parquet.Int32Type, 953 toValue: parquet.Int32Value(-21), 954 }, 955 956 { 957 scenario: "string to int64", 958 fromType: parquet.String().Type(), 959 fromValue: parquet.ByteArrayValue([]byte(`42`)), 960 toType: parquet.Int64Type, 961 toValue: parquet.Int64Value(42), 962 }, 963 964 { 965 scenario: "string to int96", 966 fromType: parquet.String().Type(), 967 fromValue: parquet.ByteArrayValue([]byte(`123`)), 968 toType: parquet.Int96Type, 969 toValue: parquet.Int96Value(deprecated.Int96{0: 123}), 970 }, 971 972 { 973 scenario: "string to float", 974 fromType: parquet.String().Type(), 975 fromValue: parquet.ByteArrayValue([]byte(`-0.5`)), 976 toType: parquet.FloatType, 977 toValue: parquet.FloatValue(-0.5), 978 }, 979 980 { 981 scenario: "string to double", 982 fromType: parquet.String().Type(), 983 fromValue: parquet.ByteArrayValue([]byte(`0.5`)), 984 toType: parquet.DoubleType, 985 toValue: parquet.DoubleValue(0.5), 986 }, 987 988 { 989 scenario: "string to byte array", 990 fromType: parquet.String().Type(), 991 fromValue: parquet.ByteArrayValue([]byte(`ABC`)), 992 toType: parquet.ByteArrayType, 993 toValue: parquet.ByteArrayValue([]byte(`ABC`)), 994 }, 995 996 { 997 scenario: "string to fixed length byte array", 998 fromType: parquet.String().Type(), 999 fromValue: parquet.ByteArrayValue([]byte(`99B816772522447EBF76821A7C5ADF65`)), 1000 toType: parquet.FixedLenByteArrayType(16), 1001 toValue: parquet.FixedLenByteArrayValue([]byte{ 1002 0x99, 0xb8, 0x16, 0x77, 0x25, 0x22, 0x44, 0x7e, 1003 0xbf, 0x76, 0x82, 0x1a, 0x7c, 0x5a, 0xdf, 0x65, 1004 }), 1005 }, 1006 1007 { 1008 scenario: "string to string", 1009 fromType: parquet.String().Type(), 1010 fromValue: parquet.ByteArrayValue([]byte(`Hello World!`)), 1011 toType: parquet.String().Type(), 1012 toValue: parquet.ByteArrayValue([]byte(`Hello World!`)), 1013 }, 1014 1015 { 1016 scenario: "string to date", 1017 fromType: parquet.String().Type(), 1018 fromValue: parquet.ByteArrayValue([]byte(`1970-01-03`)), 1019 toType: parquet.Date().Type(), 1020 toValue: parquet.Int32Value(2), 1021 }, 1022 1023 { 1024 scenario: "string to millisecond time", 1025 fromType: parquet.String().Type(), 1026 fromValue: parquet.ByteArrayValue([]byte(`12:34:56.789`)), 1027 toType: parquet.Time(parquet.Millisecond).Type(), 1028 toValue: parquet.Int32Value(45296789), 1029 }, 1030 1031 { 1032 scenario: "string to microsecond time", 1033 fromType: parquet.String().Type(), 1034 fromValue: parquet.ByteArrayValue([]byte(`12:34:56.789012`)), 1035 toType: parquet.Time(parquet.Microsecond).Type(), 1036 toValue: parquet.Int64Value(45296789012), 1037 }, 1038 1039 { 1040 scenario: "date to millisecond timestamp", 1041 fromType: parquet.Date().Type(), 1042 fromValue: parquet.Int32Value(19338), 1043 toType: parquet.Timestamp(parquet.Millisecond).Type(), 1044 toValue: parquet.Int64Value(1670803200000), 1045 }, 1046 1047 { 1048 scenario: "date to microsecond timestamp", 1049 fromType: parquet.Date().Type(), 1050 fromValue: parquet.Int32Value(19338), 1051 toType: parquet.Timestamp(parquet.Microsecond).Type(), 1052 toValue: parquet.Int64Value(1670803200000000), 1053 }, 1054 1055 { 1056 scenario: "date to string", 1057 fromType: parquet.Date().Type(), 1058 fromValue: parquet.Int32Value(18995), 1059 toType: parquet.String().Type(), 1060 toValue: parquet.ByteArrayValue([]byte(`2022-01-03`)), 1061 }, 1062 1063 { 1064 scenario: "millisecond time to string", 1065 fromType: parquet.Time(parquet.Millisecond).Type(), 1066 fromValue: parquet.Int32Value(45296789), 1067 toType: parquet.String().Type(), 1068 toValue: parquet.ByteArrayValue([]byte(`12:34:56.789`)), 1069 }, 1070 1071 { 1072 scenario: "microsecond time to string", 1073 fromType: parquet.Time(parquet.Microsecond).Type(), 1074 fromValue: parquet.Int64Value(45296789012), 1075 toType: parquet.String().Type(), 1076 toValue: parquet.ByteArrayValue([]byte(`12:34:56.789012`)), 1077 }, 1078 1079 { 1080 scenario: "millisecond timestamp to date", 1081 fromType: parquet.Timestamp(parquet.Millisecond).Type(), 1082 fromValue: parquet.Int64Value(1670888613000), 1083 toType: parquet.Date().Type(), 1084 toValue: parquet.Int32Value(19338), 1085 }, 1086 1087 { 1088 scenario: "microsecond timestamp to date", 1089 fromType: parquet.Timestamp(parquet.Microsecond).Type(), 1090 fromValue: parquet.Int64Value(1670888613000123), 1091 toType: parquet.Date().Type(), 1092 toValue: parquet.Int32Value(19338), 1093 }, 1094 1095 { 1096 scenario: "millisecond timestamp to millisecond time", 1097 fromType: parquet.Timestamp(parquet.Millisecond).Type(), 1098 fromValue: parquet.Int64Value(1670888613123), 1099 toType: parquet.Time(parquet.Millisecond).Type(), 1100 toValue: parquet.Int32Value(85413123), 1101 }, 1102 1103 { 1104 scenario: "millisecond timestamp to micronsecond time", 1105 fromType: parquet.Timestamp(parquet.Millisecond).Type(), 1106 fromValue: parquet.Int64Value(1670888613123), 1107 toType: parquet.Time(parquet.Microsecond).Type(), 1108 toValue: parquet.Int64Value(85413123000), 1109 }, 1110 1111 { 1112 scenario: "microsecond timestamp to millisecond time", 1113 fromType: parquet.Timestamp(parquet.Microsecond).Type(), 1114 fromValue: parquet.Int64Value(1670888613123456), 1115 toType: parquet.Time(parquet.Millisecond).Type(), 1116 toValue: parquet.Int32Value(85413123), 1117 }, 1118 1119 { 1120 scenario: "microsecond timestamp to micronsecond time", 1121 fromType: parquet.Timestamp(parquet.Microsecond).Type(), 1122 fromValue: parquet.Int64Value(1670888613123456), 1123 toType: parquet.Time(parquet.Microsecond).Type(), 1124 toValue: parquet.Int64Value(85413123456), 1125 }, 1126 1127 { 1128 scenario: "micros to nanos", 1129 fromType: usType, 1130 fromValue: usVal, 1131 toType: nsType, 1132 toValue: parquet.Int64Value(ns), 1133 }, 1134 1135 { 1136 scenario: "millis to nanos", 1137 fromType: msType, 1138 fromValue: msVal, 1139 toType: nsType, 1140 toValue: parquet.Int64Value(ns), 1141 }, 1142 1143 { 1144 scenario: "nanos to micros", 1145 fromType: nsType, 1146 fromValue: nsVal, 1147 toType: usType, 1148 toValue: parquet.Int64Value(us), 1149 }, 1150 1151 { 1152 scenario: "nanos to nanos", 1153 fromType: nsType, 1154 fromValue: nsVal, 1155 toType: nsType, 1156 toValue: parquet.Int64Value(ns), 1157 }, 1158 1159 { 1160 scenario: "int64 to nanos", 1161 fromType: parquet.Int64Type, 1162 fromValue: nsVal, 1163 toType: nsType, 1164 toValue: parquet.Int64Value(ns), 1165 }, 1166 1167 { 1168 scenario: "int64 to int64", 1169 fromType: parquet.Int64Type, 1170 fromValue: nsVal, 1171 toType: parquet.Int64Type, 1172 toValue: parquet.Int64Value(ns), 1173 }, 1174 } 1175 1176 for _, test := range timestampConversionTests { 1177 t.Run(test.scenario, func(t *testing.T) { 1178 // Set levels to ensure that they are retained by the conversion. 1179 from := test.fromValue.Level(1, 2, 3) 1180 want := test.toValue.Level(1, 2, 3) 1181 1182 got, err := test.toType.ConvertValue(from, test.fromType) 1183 if err != nil { 1184 t.Fatal(err) 1185 } 1186 1187 if !parquet.DeepEqual(want, got) { 1188 t.Errorf("converted value mismatch:\nwant = %+v\ngot = %+v", want, got) 1189 } 1190 }) 1191 } 1192 } 1193 1194 func TestMissingColumnChunk(t *testing.T) { 1195 type stringRow struct{ StringVal string } 1196 schema := parquet.SchemaOf(&stringRow{}) 1197 buffer := parquet.NewGenericBuffer[stringRow](schema) 1198 if _, err := buffer.Write([]stringRow{{"hello"}, {"world"}}); err != nil { 1199 t.Fatal(err) 1200 } 1201 1202 type boolRow struct{ BoolValue bool } 1203 conv := convertMissingColumn{ 1204 schema: parquet.SchemaOf(&boolRow{}), 1205 } 1206 boolRowGroup := parquet.ConvertRowGroup(buffer, conv) 1207 chunk := boolRowGroup.ColumnChunks()[0] 1208 1209 t.Run("chunk values", func(t *testing.T) { 1210 if chunk.NumValues() != buffer.NumRows() { 1211 t.Fatal("chunk values mismatch, got", chunk.NumValues(), "want", buffer.NumRows()) 1212 } 1213 }) 1214 1215 t.Run("slice page", func(t *testing.T) { 1216 page, err := chunk.Pages().ReadPage() 1217 if err != nil { 1218 t.Fatal(err) 1219 } 1220 1221 if page.NumValues() != buffer.NumRows() { 1222 t.Fatalf("page size mismatch: want = %d, got = %d", buffer.NumRows(), page.NumValues()) 1223 } 1224 if size := page.Slice(0, 1).NumValues(); size != 1 { 1225 t.Fatalf("page slice size mismatch: want = %d, got = %d", 1, size) 1226 } 1227 }) 1228 } 1229 1230 type convertMissingColumn struct { 1231 schema *parquet.Schema 1232 } 1233 1234 func (m convertMissingColumn) Column(_ int) int { return -1 } 1235 func (m convertMissingColumn) Schema() *parquet.Schema { return m.schema } 1236 func (m convertMissingColumn) Convert(rows []parquet.Row) (int, error) { return len(rows), nil }