github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/convert_test.go (about)

     1  package parquet_test
     2  
     3  import (
     4  	"reflect"
     5  	"testing"
     6  	"time"
     7  
     8  	"github.com/parquet-go/parquet-go"
     9  	"github.com/parquet-go/parquet-go/deprecated"
    10  )
    11  
    12  type AddressBook1 struct {
    13  	Owner             string   `parquet:"owner,zstd"`
    14  	OwnerPhoneNumbers []string `parquet:"ownerPhoneNumbers,gzip"`
    15  }
    16  
    17  type AddressBook2 struct {
    18  	Owner             string    `parquet:"owner,zstd"`
    19  	OwnerPhoneNumbers []string  `parquet:"ownerPhoneNumbers,gzip"`
    20  	Contacts          []Contact `parquet:"contacts"`
    21  	Extra             string    `parquet:"extra"`
    22  }
    23  
    24  type AddressBook3 struct {
    25  	Owner    string     `parquet:"owner,zstd"`
    26  	Contacts []Contact2 `parquet:"contacts"`
    27  }
    28  
    29  type Contact2 struct {
    30  	Name         string   `parquet:"name"`
    31  	PhoneNumbers []string `parquet:"phoneNumbers,zstd"`
    32  	Addresses    []string `parquet:"addresses,zstd"`
    33  }
    34  
    35  type AddressBook4 struct {
    36  	Owner    string     `parquet:"owner,zstd"`
    37  	Contacts []Contact2 `parquet:"contacts"`
    38  	Extra    string     `parquet:"extra"`
    39  }
    40  
    41  type SimpleNumber struct {
    42  	Number *int64 `parquet:"number,optional"`
    43  }
    44  
    45  type SimpleContact struct {
    46  	Numbers []SimpleNumber `parquet:"numbers"`
    47  }
    48  
    49  type SimpleAddressBook struct {
    50  	Name    string
    51  	Contact SimpleContact
    52  }
    53  
    54  type SimpleAddressBook2 struct {
    55  	Name    string
    56  	Contact SimpleContact
    57  	Extra   string
    58  }
    59  
    60  type ListOfIDs struct {
    61  	IDs []uint64
    62  }
    63  
    64  var conversionTests = [...]struct {
    65  	scenario string
    66  	from     interface{}
    67  	to       interface{}
    68  }{
    69  	{
    70  		scenario: "convert between rows which have the same schema",
    71  		from: AddressBook{
    72  			Owner: "Julien Le Dem",
    73  			OwnerPhoneNumbers: []string{
    74  				"555 123 4567",
    75  				"555 666 1337",
    76  			},
    77  			Contacts: []Contact{
    78  				{
    79  					Name:        "Dmitriy Ryaboy",
    80  					PhoneNumber: "555 987 6543",
    81  				},
    82  				{
    83  					Name: "Chris Aniszczyk",
    84  				},
    85  			},
    86  		},
    87  		to: AddressBook{
    88  			Owner: "Julien Le Dem",
    89  			OwnerPhoneNumbers: []string{
    90  				"555 123 4567",
    91  				"555 666 1337",
    92  			},
    93  			Contacts: []Contact{
    94  				{
    95  					Name:        "Dmitriy Ryaboy",
    96  					PhoneNumber: "555 987 6543",
    97  				},
    98  				{
    99  					Name: "Chris Aniszczyk",
   100  				},
   101  			},
   102  		},
   103  	},
   104  
   105  	{
   106  		scenario: "missing column",
   107  		from:     struct{ FirstName, LastName string }{FirstName: "Luke", LastName: "Skywalker"},
   108  		to:       struct{ LastName string }{LastName: "Skywalker"},
   109  	},
   110  
   111  	{
   112  		scenario: "missing optional column",
   113  		from: struct {
   114  			FirstName *string
   115  			LastName  string
   116  		}{FirstName: newString("Luke"), LastName: "Skywalker"},
   117  		to: struct{ LastName string }{LastName: "Skywalker"},
   118  	},
   119  
   120  	{
   121  		scenario: "missing repeated column",
   122  		from: struct {
   123  			ID    uint64
   124  			Names []string
   125  		}{ID: 42, Names: []string{"me", "myself", "I"}},
   126  		to: struct{ ID uint64 }{ID: 42},
   127  	},
   128  
   129  	{
   130  		scenario: "extra column",
   131  		from:     struct{ LastName string }{LastName: "Skywalker"},
   132  		to:       struct{ FirstName, LastName string }{LastName: "Skywalker"},
   133  	},
   134  
   135  	{
   136  		scenario: "extra optional column",
   137  		from:     struct{ ID uint64 }{ID: 2},
   138  		to: struct {
   139  			ID      uint64
   140  			Details *struct{ FirstName, LastName string }
   141  		}{ID: 2, Details: nil},
   142  	},
   143  
   144  	{
   145  		scenario: "extra repeated column",
   146  		from:     struct{ ID uint64 }{ID: 1},
   147  		to: struct {
   148  			ID    uint64
   149  			Names []string
   150  		}{ID: 1, Names: []string{}},
   151  	},
   152  
   153  	{
   154  		scenario: "extra required column from repeated",
   155  		from: struct{ ListOfIDs ListOfIDs }{
   156  			ListOfIDs: ListOfIDs{IDs: []uint64{0, 1, 2}},
   157  		},
   158  		to: struct {
   159  			MainID    uint64
   160  			ListOfIDs ListOfIDs
   161  		}{
   162  			ListOfIDs: ListOfIDs{IDs: []uint64{0, 1, 2}},
   163  		},
   164  	},
   165  
   166  	{
   167  		scenario: "extra fields in repeated group",
   168  		from: struct{ Books []AddressBook1 }{
   169  			Books: []AddressBook1{
   170  				{
   171  					Owner:             "me",
   172  					OwnerPhoneNumbers: []string{"123-456-7890", "321-654-0987"},
   173  				},
   174  				{
   175  					Owner:             "you",
   176  					OwnerPhoneNumbers: []string{"000-000-0000"},
   177  				},
   178  			},
   179  		},
   180  		to: struct{ Books []AddressBook2 }{
   181  			Books: []AddressBook2{
   182  				{
   183  					Owner:             "me",
   184  					OwnerPhoneNumbers: []string{"123-456-7890", "321-654-0987"},
   185  					Contacts:          []Contact{},
   186  				},
   187  				{
   188  					Owner:             "you",
   189  					OwnerPhoneNumbers: []string{"000-000-0000"},
   190  					Contacts:          []Contact{},
   191  				},
   192  			},
   193  		},
   194  	},
   195  
   196  	{
   197  		scenario: "extra column on complex struct",
   198  		from: AddressBook{
   199  			Owner:             "Julien Le Dem",
   200  			OwnerPhoneNumbers: []string{},
   201  			Contacts: []Contact{
   202  				{
   203  					Name:        "Dmitriy Ryaboy",
   204  					PhoneNumber: "555 987 6543",
   205  				},
   206  				{
   207  					Name: "Chris Aniszczyk",
   208  				},
   209  			},
   210  		},
   211  		to: AddressBook2{
   212  			Owner:             "Julien Le Dem",
   213  			OwnerPhoneNumbers: []string{},
   214  			Contacts: []Contact{
   215  				{
   216  					Name:        "Dmitriy Ryaboy",
   217  					PhoneNumber: "555 987 6543",
   218  				},
   219  				{
   220  					Name: "Chris Aniszczyk",
   221  				},
   222  			},
   223  		},
   224  	},
   225  
   226  	{
   227  		scenario: "required to optional leaf",
   228  		from:     struct{ Name string }{Name: "Luke"},
   229  		to:       struct{ Name *string }{Name: newString("Luke")},
   230  	},
   231  
   232  	{
   233  		scenario: "required to repeated leaf",
   234  		from:     struct{ Name string }{Name: "Luke"},
   235  		to:       struct{ Name []string }{Name: []string{"Luke"}},
   236  	},
   237  
   238  	{
   239  		scenario: "optional to required leaf",
   240  		from:     struct{ Name *string }{Name: newString("Luke")},
   241  		to:       struct{ Name string }{Name: "Luke"},
   242  	},
   243  
   244  	{
   245  		scenario: "optional to repeated leaf",
   246  		from:     struct{ Name *string }{Name: newString("Luke")},
   247  		to:       struct{ Name []string }{Name: []string{"Luke"}},
   248  	},
   249  
   250  	{
   251  		scenario: "optional to repeated leaf (null)",
   252  		from:     struct{ Name *string }{Name: nil},
   253  		to:       struct{ Name []string }{Name: []string{}},
   254  	},
   255  
   256  	{
   257  		scenario: "repeated to required leaf",
   258  		from:     struct{ Name []string }{Name: []string{"Luke", "Han", "Leia"}},
   259  		to:       struct{ Name string }{Name: "Luke"},
   260  	},
   261  
   262  	{
   263  		scenario: "repeated to optional leaf",
   264  		from:     struct{ Name []string }{Name: []string{"Luke", "Han", "Leia"}},
   265  		to:       struct{ Name *string }{Name: newString("Luke")},
   266  	},
   267  
   268  	{
   269  		scenario: "required to optional group",
   270  		from: struct{ Book AddressBook }{
   271  			Book: AddressBook{
   272  				Owner: "Julien Le Dem",
   273  				OwnerPhoneNumbers: []string{
   274  					"555 123 4567",
   275  					"555 666 1337",
   276  				},
   277  				Contacts: []Contact{
   278  					{
   279  						Name:        "Dmitriy Ryaboy",
   280  						PhoneNumber: "555 987 6543",
   281  					},
   282  					{
   283  						Name: "Chris Aniszczyk",
   284  					},
   285  				},
   286  			},
   287  		},
   288  		to: struct{ Book *AddressBook }{
   289  			Book: &AddressBook{
   290  				Owner: "Julien Le Dem",
   291  				OwnerPhoneNumbers: []string{
   292  					"555 123 4567",
   293  					"555 666 1337",
   294  				},
   295  				Contacts: []Contact{
   296  					{
   297  						Name:        "Dmitriy Ryaboy",
   298  						PhoneNumber: "555 987 6543",
   299  					},
   300  					{
   301  						Name: "Chris Aniszczyk",
   302  					},
   303  				},
   304  			},
   305  		},
   306  	},
   307  
   308  	{
   309  		scenario: "required to optional group (empty)",
   310  		from: struct{ Book AddressBook }{
   311  			Book: AddressBook{},
   312  		},
   313  		to: struct{ Book *AddressBook }{
   314  			Book: &AddressBook{
   315  				OwnerPhoneNumbers: []string{},
   316  				Contacts:          []Contact{},
   317  			},
   318  		},
   319  	},
   320  
   321  	{
   322  		scenario: "optional to required group (null)",
   323  		from: struct{ Book *AddressBook }{
   324  			Book: nil,
   325  		},
   326  		to: struct{ Book AddressBook }{
   327  			Book: AddressBook{
   328  				OwnerPhoneNumbers: []string{},
   329  				Contacts:          []Contact{},
   330  			},
   331  		},
   332  	},
   333  
   334  	{
   335  		scenario: "optional to repeated group (null)",
   336  		from:     struct{ Book *AddressBook }{Book: nil},
   337  		to:       struct{ Book []AddressBook }{Book: []AddressBook{}},
   338  	},
   339  
   340  	{
   341  		scenario: "optional to repeated optional group (null)",
   342  		from:     struct{ Book *AddressBook }{Book: nil},
   343  		to:       struct{ Book []*AddressBook }{Book: []*AddressBook{}},
   344  	},
   345  
   346  	{
   347  		scenario: "handle nested repeated elements during conversion",
   348  		from: AddressBook3{
   349  			Owner: "Julien Le Dem",
   350  			Contacts: []Contact2{
   351  				{
   352  					Name: "Dmitriy Ryaboy",
   353  					PhoneNumbers: []string{
   354  						"555 987 6543",
   355  						"555 123 4567",
   356  					},
   357  					Addresses: []string{},
   358  				},
   359  				{
   360  					Name: "Chris Aniszczyk",
   361  					PhoneNumbers: []string{
   362  						"555 345 8129",
   363  					},
   364  					Addresses: []string{
   365  						"42 Wallaby Way Sydney",
   366  						"1 White House Way",
   367  					},
   368  				},
   369  				{
   370  					Name: "Bob Ross",
   371  					PhoneNumbers: []string{
   372  						"555 198 3628",
   373  					},
   374  					Addresses: []string{
   375  						"::1",
   376  					},
   377  				},
   378  			},
   379  		},
   380  		to: AddressBook4{
   381  			Owner: "Julien Le Dem",
   382  			Contacts: []Contact2{
   383  				{
   384  					Name: "Dmitriy Ryaboy",
   385  					PhoneNumbers: []string{
   386  						"555 987 6543",
   387  						"555 123 4567",
   388  					},
   389  					Addresses: []string{},
   390  				},
   391  				{
   392  					Name: "Chris Aniszczyk",
   393  					PhoneNumbers: []string{
   394  						"555 345 8129",
   395  					},
   396  					Addresses: []string{
   397  						"42 Wallaby Way Sydney",
   398  						"1 White House Way",
   399  					},
   400  				},
   401  				{
   402  					Name: "Bob Ross",
   403  					PhoneNumbers: []string{
   404  						"555 198 3628",
   405  					},
   406  					Addresses: []string{
   407  						"::1",
   408  					},
   409  				},
   410  			},
   411  			Extra: "",
   412  		},
   413  	},
   414  
   415  	{
   416  		scenario: "handle nested repeated elements during conversion",
   417  		from: SimpleAddressBook{
   418  			Name: "New Contact",
   419  			Contact: SimpleContact{
   420  				Numbers: []SimpleNumber{
   421  					{
   422  						Number: nil,
   423  					},
   424  					{
   425  						Number: newInt64(1329),
   426  					},
   427  				},
   428  			},
   429  		},
   430  		to: SimpleAddressBook2{
   431  			Name: "New Contact",
   432  			Contact: SimpleContact{
   433  				Numbers: []SimpleNumber{
   434  					{
   435  						Number: nil,
   436  					},
   437  					{
   438  						Number: newInt64(1329),
   439  					},
   440  				},
   441  			},
   442  			Extra: "",
   443  		},
   444  	},
   445  }
   446  
   447  func TestConvert(t *testing.T) {
   448  	for _, test := range conversionTests {
   449  		t.Run(test.scenario, func(t *testing.T) {
   450  			to := parquet.SchemaOf(test.to)
   451  			from := parquet.SchemaOf(test.from)
   452  
   453  			conv, err := parquet.Convert(to, from)
   454  			if err != nil {
   455  				t.Fatal(err)
   456  			}
   457  
   458  			row := from.Deconstruct(nil, test.from)
   459  			rowbuf := []parquet.Row{row}
   460  			n, err := conv.Convert(rowbuf)
   461  			if err != nil {
   462  				t.Fatal(err)
   463  			}
   464  			if n != 1 {
   465  				t.Errorf("wrong number of rows got converted: want=1 got=%d", n)
   466  			}
   467  			row = rowbuf[0]
   468  
   469  			value := reflect.New(reflect.TypeOf(test.to))
   470  			if err := to.Reconstruct(value.Interface(), row); err != nil {
   471  				t.Fatal(err)
   472  			}
   473  
   474  			value = value.Elem()
   475  			if !reflect.DeepEqual(value.Interface(), test.to) {
   476  				t.Errorf("converted value mismatch:\nwant = %#v\ngot  = %#v", test.to, value.Interface())
   477  			}
   478  		})
   479  	}
   480  }
   481  
   482  func newInt64(i int64) *int64    { return &i }
   483  func newString(s string) *string { return &s }
   484  
   485  func TestConvertValue(t *testing.T) {
   486  	now := time.Unix(42, 0)
   487  	ms := now.UnixMilli()
   488  	us := now.UnixMicro()
   489  	ns := now.UnixNano()
   490  
   491  	msType := parquet.Timestamp(parquet.Millisecond).Type()
   492  	msVal := parquet.ValueOf(ms)
   493  	if msVal.Int64() != ms {
   494  		t.Errorf("converted value mismatch:\nwant = %+v\ngot  = %+v", ms, msVal.Int64())
   495  	}
   496  
   497  	usType := parquet.Timestamp(parquet.Microsecond).Type()
   498  	usVal := parquet.ValueOf(us)
   499  	if usVal.Int64() != us {
   500  		t.Errorf("converted value mismatch:\nwant = %+v\ngot  = %+v", us, usVal.Int64())
   501  	}
   502  
   503  	nsType := parquet.Timestamp(parquet.Nanosecond).Type()
   504  	nsVal := parquet.ValueOf(ns)
   505  	if nsVal.Int64() != ns {
   506  		t.Errorf("converted value mismatch:\nwant = %+v\ngot  = %+v", ns, nsVal.Int64())
   507  	}
   508  
   509  	var timestampConversionTests = [...]struct {
   510  		scenario  string
   511  		fromType  parquet.Type
   512  		fromValue parquet.Value
   513  		toType    parquet.Type
   514  		toValue   parquet.Value
   515  	}{
   516  		{
   517  			scenario:  "true to boolean",
   518  			fromType:  parquet.BooleanType,
   519  			fromValue: parquet.BooleanValue(true),
   520  			toType:    parquet.BooleanType,
   521  			toValue:   parquet.BooleanValue(true),
   522  		},
   523  
   524  		{
   525  			scenario:  "true to int32",
   526  			fromType:  parquet.BooleanType,
   527  			fromValue: parquet.BooleanValue(true),
   528  			toType:    parquet.Int32Type,
   529  			toValue:   parquet.Int32Value(1),
   530  		},
   531  
   532  		{
   533  			scenario:  "true to int64",
   534  			fromType:  parquet.BooleanType,
   535  			fromValue: parquet.BooleanValue(true),
   536  			toType:    parquet.Int64Type,
   537  			toValue:   parquet.Int64Value(1),
   538  		},
   539  
   540  		{
   541  			scenario:  "true to int96",
   542  			fromType:  parquet.BooleanType,
   543  			fromValue: parquet.BooleanValue(true),
   544  			toType:    parquet.Int96Type,
   545  			toValue:   parquet.Int96Value(deprecated.Int96{0: 1}),
   546  		},
   547  
   548  		{
   549  			scenario:  "true to float",
   550  			fromType:  parquet.BooleanType,
   551  			fromValue: parquet.BooleanValue(true),
   552  			toType:    parquet.FloatType,
   553  			toValue:   parquet.FloatValue(1),
   554  		},
   555  
   556  		{
   557  			scenario:  "true to double",
   558  			fromType:  parquet.BooleanType,
   559  			fromValue: parquet.BooleanValue(true),
   560  			toType:    parquet.FloatType,
   561  			toValue:   parquet.FloatValue(1),
   562  		},
   563  
   564  		{
   565  			scenario:  "true to byte array",
   566  			fromType:  parquet.BooleanType,
   567  			fromValue: parquet.BooleanValue(true),
   568  			toType:    parquet.ByteArrayType,
   569  			toValue:   parquet.ByteArrayValue([]byte{1}),
   570  		},
   571  
   572  		{
   573  			scenario:  "true to fixed length byte array",
   574  			fromType:  parquet.BooleanType,
   575  			fromValue: parquet.BooleanValue(true),
   576  			toType:    parquet.FixedLenByteArrayType(4),
   577  			toValue:   parquet.FixedLenByteArrayValue([]byte{1, 0, 0, 0}),
   578  		},
   579  
   580  		{
   581  			scenario:  "true to string",
   582  			fromType:  parquet.BooleanType,
   583  			fromValue: parquet.BooleanValue(true),
   584  			toType:    parquet.String().Type(),
   585  			toValue:   parquet.ByteArrayValue([]byte(`true`)),
   586  		},
   587  
   588  		{
   589  			scenario:  "false to boolean",
   590  			fromType:  parquet.BooleanType,
   591  			fromValue: parquet.BooleanValue(false),
   592  			toType:    parquet.BooleanType,
   593  			toValue:   parquet.BooleanValue(false),
   594  		},
   595  
   596  		{
   597  			scenario:  "false to int32",
   598  			fromType:  parquet.BooleanType,
   599  			fromValue: parquet.BooleanValue(false),
   600  			toType:    parquet.Int32Type,
   601  			toValue:   parquet.Int32Value(0),
   602  		},
   603  
   604  		{
   605  			scenario:  "false to int64",
   606  			fromType:  parquet.BooleanType,
   607  			fromValue: parquet.BooleanValue(false),
   608  			toType:    parquet.Int64Type,
   609  			toValue:   parquet.Int64Value(0),
   610  		},
   611  
   612  		{
   613  			scenario:  "false to int96",
   614  			fromType:  parquet.BooleanType,
   615  			fromValue: parquet.BooleanValue(false),
   616  			toType:    parquet.Int96Type,
   617  			toValue:   parquet.Int96Value(deprecated.Int96{}),
   618  		},
   619  
   620  		{
   621  			scenario:  "false to float",
   622  			fromType:  parquet.BooleanType,
   623  			fromValue: parquet.BooleanValue(false),
   624  			toType:    parquet.FloatType,
   625  			toValue:   parquet.FloatValue(0),
   626  		},
   627  
   628  		{
   629  			scenario:  "false to double",
   630  			fromType:  parquet.BooleanType,
   631  			fromValue: parquet.BooleanValue(false),
   632  			toType:    parquet.FloatType,
   633  			toValue:   parquet.FloatValue(0),
   634  		},
   635  
   636  		{
   637  			scenario:  "false to byte array",
   638  			fromType:  parquet.BooleanType,
   639  			fromValue: parquet.BooleanValue(false),
   640  			toType:    parquet.ByteArrayType,
   641  			toValue:   parquet.ByteArrayValue([]byte{0}),
   642  		},
   643  
   644  		{
   645  			scenario:  "false to fixed length byte array",
   646  			fromType:  parquet.BooleanType,
   647  			fromValue: parquet.BooleanValue(false),
   648  			toType:    parquet.FixedLenByteArrayType(4),
   649  			toValue:   parquet.FixedLenByteArrayValue([]byte{0, 0, 0, 0}),
   650  		},
   651  
   652  		{
   653  			scenario:  "false to string",
   654  			fromType:  parquet.BooleanType,
   655  			fromValue: parquet.BooleanValue(false),
   656  			toType:    parquet.String().Type(),
   657  			toValue:   parquet.ByteArrayValue([]byte(`false`)),
   658  		},
   659  
   660  		{
   661  			scenario:  "int32 to true",
   662  			fromType:  parquet.Int32Type,
   663  			fromValue: parquet.Int32Value(10),
   664  			toType:    parquet.BooleanType,
   665  			toValue:   parquet.BooleanValue(true),
   666  		},
   667  
   668  		{
   669  			scenario:  "int32 to false",
   670  			fromType:  parquet.Int32Type,
   671  			fromValue: parquet.Int32Value(0),
   672  			toType:    parquet.BooleanType,
   673  			toValue:   parquet.BooleanValue(false),
   674  		},
   675  
   676  		{
   677  			scenario:  "int32 to int32",
   678  			fromType:  parquet.Int32Type,
   679  			fromValue: parquet.Int32Value(42),
   680  			toType:    parquet.Int32Type,
   681  			toValue:   parquet.Int32Value(42),
   682  		},
   683  
   684  		{
   685  			scenario:  "int32 to int64",
   686  			fromType:  parquet.Int32Type,
   687  			fromValue: parquet.Int32Value(-21),
   688  			toType:    parquet.Int64Type,
   689  			toValue:   parquet.Int64Value(-21),
   690  		},
   691  
   692  		{
   693  			scenario:  "int32 to int96",
   694  			fromType:  parquet.Int32Type,
   695  			fromValue: parquet.Int32Value(123),
   696  			toType:    parquet.Int96Type,
   697  			toValue:   parquet.Int96Value(deprecated.Int96{0: 123}),
   698  		},
   699  
   700  		{
   701  			scenario:  "int32 to float",
   702  			fromType:  parquet.Int32Type,
   703  			fromValue: parquet.Int32Value(9),
   704  			toType:    parquet.FloatType,
   705  			toValue:   parquet.FloatValue(9),
   706  		},
   707  
   708  		{
   709  			scenario:  "int32 to double",
   710  			fromType:  parquet.Int32Type,
   711  			fromValue: parquet.Int32Value(100),
   712  			toType:    parquet.DoubleType,
   713  			toValue:   parquet.DoubleValue(100),
   714  		},
   715  
   716  		{
   717  			scenario:  "int32 to byte array",
   718  			fromType:  parquet.Int32Type,
   719  			fromValue: parquet.Int32Value(1 << 8),
   720  			toType:    parquet.ByteArrayType,
   721  			toValue:   parquet.ByteArrayValue([]byte{0, 1, 0, 0}),
   722  		},
   723  
   724  		{
   725  			scenario:  "int32 to fixed length byte array",
   726  			fromType:  parquet.Int32Type,
   727  			fromValue: parquet.Int32Value(1 << 8),
   728  			toType:    parquet.FixedLenByteArrayType(3),
   729  			toValue:   parquet.FixedLenByteArrayValue([]byte{0, 1, 0}),
   730  		},
   731  
   732  		{
   733  			scenario:  "int32 to string",
   734  			fromType:  parquet.Int32Type,
   735  			fromValue: parquet.Int32Value(12345),
   736  			toType:    parquet.String().Type(),
   737  			toValue:   parquet.ByteArrayValue([]byte(`12345`)),
   738  		},
   739  
   740  		{
   741  			scenario:  "int64 to true",
   742  			fromType:  parquet.Int64Type,
   743  			fromValue: parquet.Int64Value(10),
   744  			toType:    parquet.BooleanType,
   745  			toValue:   parquet.BooleanValue(true),
   746  		},
   747  
   748  		{
   749  			scenario:  "int64 to false",
   750  			fromType:  parquet.Int64Type,
   751  			fromValue: parquet.Int64Value(0),
   752  			toType:    parquet.BooleanType,
   753  			toValue:   parquet.BooleanValue(false),
   754  		},
   755  
   756  		{
   757  			scenario:  "int64 to int32",
   758  			fromType:  parquet.Int64Type,
   759  			fromValue: parquet.Int64Value(-21),
   760  			toType:    parquet.Int32Type,
   761  			toValue:   parquet.Int32Value(-21),
   762  		},
   763  
   764  		{
   765  			scenario:  "int64 to int64",
   766  			fromType:  parquet.Int64Type,
   767  			fromValue: parquet.Int64Value(42),
   768  			toType:    parquet.Int64Type,
   769  			toValue:   parquet.Int64Value(42),
   770  		},
   771  
   772  		{
   773  			scenario:  "int64 to int96",
   774  			fromType:  parquet.Int64Type,
   775  			fromValue: parquet.Int64Value(123),
   776  			toType:    parquet.Int96Type,
   777  			toValue:   parquet.Int96Value(deprecated.Int96{0: 123}),
   778  		},
   779  
   780  		{
   781  			scenario:  "int64 to float",
   782  			fromType:  parquet.Int64Type,
   783  			fromValue: parquet.Int64Value(9),
   784  			toType:    parquet.FloatType,
   785  			toValue:   parquet.FloatValue(9),
   786  		},
   787  
   788  		{
   789  			scenario:  "int64 to double",
   790  			fromType:  parquet.Int64Type,
   791  			fromValue: parquet.Int64Value(100),
   792  			toType:    parquet.DoubleType,
   793  			toValue:   parquet.DoubleValue(100),
   794  		},
   795  
   796  		{
   797  			scenario:  "int64 to byte array",
   798  			fromType:  parquet.Int64Type,
   799  			fromValue: parquet.Int64Value(1 << 8),
   800  			toType:    parquet.ByteArrayType,
   801  			toValue:   parquet.ByteArrayValue([]byte{0, 1, 0, 0, 0, 0, 0, 0}),
   802  		},
   803  
   804  		{
   805  			scenario:  "int64 to fixed length byte array",
   806  			fromType:  parquet.Int64Type,
   807  			fromValue: parquet.Int64Value(1 << 8),
   808  			toType:    parquet.FixedLenByteArrayType(3),
   809  			toValue:   parquet.FixedLenByteArrayValue([]byte{0, 1, 0}),
   810  		},
   811  
   812  		{
   813  			scenario:  "int64 to string",
   814  			fromType:  parquet.Int64Type,
   815  			fromValue: parquet.Int64Value(1234567890),
   816  			toType:    parquet.String().Type(),
   817  			toValue:   parquet.ByteArrayValue([]byte(`1234567890`)),
   818  		},
   819  
   820  		{
   821  			scenario:  "float to true",
   822  			fromType:  parquet.FloatType,
   823  			fromValue: parquet.FloatValue(0.1),
   824  			toType:    parquet.BooleanType,
   825  			toValue:   parquet.BooleanValue(true),
   826  		},
   827  
   828  		{
   829  			scenario:  "float to false",
   830  			fromType:  parquet.FloatType,
   831  			fromValue: parquet.FloatValue(0),
   832  			toType:    parquet.BooleanType,
   833  			toValue:   parquet.BooleanValue(false),
   834  		},
   835  
   836  		{
   837  			scenario:  "float to int32",
   838  			fromType:  parquet.FloatType,
   839  			fromValue: parquet.FloatValue(9.9),
   840  			toType:    parquet.Int32Type,
   841  			toValue:   parquet.Int32Value(9),
   842  		},
   843  
   844  		{
   845  			scenario:  "float to int64",
   846  			fromType:  parquet.FloatType,
   847  			fromValue: parquet.FloatValue(-1.5),
   848  			toType:    parquet.Int64Type,
   849  			toValue:   parquet.Int64Value(-1),
   850  		},
   851  
   852  		{
   853  			scenario:  "float to float",
   854  			fromType:  parquet.FloatType,
   855  			fromValue: parquet.FloatValue(1.234),
   856  			toType:    parquet.FloatType,
   857  			toValue:   parquet.FloatValue(1.234),
   858  		},
   859  
   860  		{
   861  			scenario:  "float to double",
   862  			fromType:  parquet.FloatType,
   863  			fromValue: parquet.FloatValue(-0.5),
   864  			toType:    parquet.DoubleType,
   865  			toValue:   parquet.DoubleValue(-0.5),
   866  		},
   867  
   868  		{
   869  			scenario:  "float to string",
   870  			fromType:  parquet.FloatType,
   871  			fromValue: parquet.FloatValue(0.125),
   872  			toType:    parquet.String().Type(),
   873  			toValue:   parquet.ByteArrayValue([]byte(`0.125`)),
   874  		},
   875  
   876  		{
   877  			scenario:  "double to true",
   878  			fromType:  parquet.DoubleType,
   879  			fromValue: parquet.DoubleValue(0.1),
   880  			toType:    parquet.BooleanType,
   881  			toValue:   parquet.BooleanValue(true),
   882  		},
   883  
   884  		{
   885  			scenario:  "double to false",
   886  			fromType:  parquet.DoubleType,
   887  			fromValue: parquet.DoubleValue(0),
   888  			toType:    parquet.BooleanType,
   889  			toValue:   parquet.BooleanValue(false),
   890  		},
   891  
   892  		{
   893  			scenario:  "double to int32",
   894  			fromType:  parquet.DoubleType,
   895  			fromValue: parquet.DoubleValue(9.9),
   896  			toType:    parquet.Int32Type,
   897  			toValue:   parquet.Int32Value(9),
   898  		},
   899  
   900  		{
   901  			scenario:  "double to int64",
   902  			fromType:  parquet.DoubleType,
   903  			fromValue: parquet.DoubleValue(-1.5),
   904  			toType:    parquet.Int64Type,
   905  			toValue:   parquet.Int64Value(-1),
   906  		},
   907  
   908  		{
   909  			scenario:  "double to float",
   910  			fromType:  parquet.DoubleType,
   911  			fromValue: parquet.DoubleValue(1.234),
   912  			toType:    parquet.FloatType,
   913  			toValue:   parquet.FloatValue(1.234),
   914  		},
   915  
   916  		{
   917  			scenario:  "double to double",
   918  			fromType:  parquet.DoubleType,
   919  			fromValue: parquet.DoubleValue(-0.5),
   920  			toType:    parquet.DoubleType,
   921  			toValue:   parquet.DoubleValue(-0.5),
   922  		},
   923  
   924  		{
   925  			scenario:  "double to string",
   926  			fromType:  parquet.DoubleType,
   927  			fromValue: parquet.DoubleValue(0.125),
   928  			toType:    parquet.String().Type(),
   929  			toValue:   parquet.ByteArrayValue([]byte(`0.125`)),
   930  		},
   931  
   932  		{
   933  			scenario:  "string to true",
   934  			fromType:  parquet.String().Type(),
   935  			fromValue: parquet.ByteArrayValue([]byte(`true`)),
   936  			toType:    parquet.BooleanType,
   937  			toValue:   parquet.BooleanValue(true),
   938  		},
   939  
   940  		{
   941  			scenario:  "string to false",
   942  			fromType:  parquet.String().Type(),
   943  			fromValue: parquet.ByteArrayValue([]byte(`false`)),
   944  			toType:    parquet.BooleanType,
   945  			toValue:   parquet.BooleanValue(false),
   946  		},
   947  
   948  		{
   949  			scenario:  "string to int32",
   950  			fromType:  parquet.String().Type(),
   951  			fromValue: parquet.ByteArrayValue([]byte(`-21`)),
   952  			toType:    parquet.Int32Type,
   953  			toValue:   parquet.Int32Value(-21),
   954  		},
   955  
   956  		{
   957  			scenario:  "string to int64",
   958  			fromType:  parquet.String().Type(),
   959  			fromValue: parquet.ByteArrayValue([]byte(`42`)),
   960  			toType:    parquet.Int64Type,
   961  			toValue:   parquet.Int64Value(42),
   962  		},
   963  
   964  		{
   965  			scenario:  "string to int96",
   966  			fromType:  parquet.String().Type(),
   967  			fromValue: parquet.ByteArrayValue([]byte(`123`)),
   968  			toType:    parquet.Int96Type,
   969  			toValue:   parquet.Int96Value(deprecated.Int96{0: 123}),
   970  		},
   971  
   972  		{
   973  			scenario:  "string to float",
   974  			fromType:  parquet.String().Type(),
   975  			fromValue: parquet.ByteArrayValue([]byte(`-0.5`)),
   976  			toType:    parquet.FloatType,
   977  			toValue:   parquet.FloatValue(-0.5),
   978  		},
   979  
   980  		{
   981  			scenario:  "string to double",
   982  			fromType:  parquet.String().Type(),
   983  			fromValue: parquet.ByteArrayValue([]byte(`0.5`)),
   984  			toType:    parquet.DoubleType,
   985  			toValue:   parquet.DoubleValue(0.5),
   986  		},
   987  
   988  		{
   989  			scenario:  "string to byte array",
   990  			fromType:  parquet.String().Type(),
   991  			fromValue: parquet.ByteArrayValue([]byte(`ABC`)),
   992  			toType:    parquet.ByteArrayType,
   993  			toValue:   parquet.ByteArrayValue([]byte(`ABC`)),
   994  		},
   995  
   996  		{
   997  			scenario:  "string to fixed length byte array",
   998  			fromType:  parquet.String().Type(),
   999  			fromValue: parquet.ByteArrayValue([]byte(`99B816772522447EBF76821A7C5ADF65`)),
  1000  			toType:    parquet.FixedLenByteArrayType(16),
  1001  			toValue: parquet.FixedLenByteArrayValue([]byte{
  1002  				0x99, 0xb8, 0x16, 0x77, 0x25, 0x22, 0x44, 0x7e,
  1003  				0xbf, 0x76, 0x82, 0x1a, 0x7c, 0x5a, 0xdf, 0x65,
  1004  			}),
  1005  		},
  1006  
  1007  		{
  1008  			scenario:  "string to string",
  1009  			fromType:  parquet.String().Type(),
  1010  			fromValue: parquet.ByteArrayValue([]byte(`Hello World!`)),
  1011  			toType:    parquet.String().Type(),
  1012  			toValue:   parquet.ByteArrayValue([]byte(`Hello World!`)),
  1013  		},
  1014  
  1015  		{
  1016  			scenario:  "string to date",
  1017  			fromType:  parquet.String().Type(),
  1018  			fromValue: parquet.ByteArrayValue([]byte(`1970-01-03`)),
  1019  			toType:    parquet.Date().Type(),
  1020  			toValue:   parquet.Int32Value(2),
  1021  		},
  1022  
  1023  		{
  1024  			scenario:  "string to millisecond time",
  1025  			fromType:  parquet.String().Type(),
  1026  			fromValue: parquet.ByteArrayValue([]byte(`12:34:56.789`)),
  1027  			toType:    parquet.Time(parquet.Millisecond).Type(),
  1028  			toValue:   parquet.Int32Value(45296789),
  1029  		},
  1030  
  1031  		{
  1032  			scenario:  "string to microsecond time",
  1033  			fromType:  parquet.String().Type(),
  1034  			fromValue: parquet.ByteArrayValue([]byte(`12:34:56.789012`)),
  1035  			toType:    parquet.Time(parquet.Microsecond).Type(),
  1036  			toValue:   parquet.Int64Value(45296789012),
  1037  		},
  1038  
  1039  		{
  1040  			scenario:  "date to millisecond timestamp",
  1041  			fromType:  parquet.Date().Type(),
  1042  			fromValue: parquet.Int32Value(19338),
  1043  			toType:    parquet.Timestamp(parquet.Millisecond).Type(),
  1044  			toValue:   parquet.Int64Value(1670803200000),
  1045  		},
  1046  
  1047  		{
  1048  			scenario:  "date to microsecond timestamp",
  1049  			fromType:  parquet.Date().Type(),
  1050  			fromValue: parquet.Int32Value(19338),
  1051  			toType:    parquet.Timestamp(parquet.Microsecond).Type(),
  1052  			toValue:   parquet.Int64Value(1670803200000000),
  1053  		},
  1054  
  1055  		{
  1056  			scenario:  "date to string",
  1057  			fromType:  parquet.Date().Type(),
  1058  			fromValue: parquet.Int32Value(18995),
  1059  			toType:    parquet.String().Type(),
  1060  			toValue:   parquet.ByteArrayValue([]byte(`2022-01-03`)),
  1061  		},
  1062  
  1063  		{
  1064  			scenario:  "millisecond time to string",
  1065  			fromType:  parquet.Time(parquet.Millisecond).Type(),
  1066  			fromValue: parquet.Int32Value(45296789),
  1067  			toType:    parquet.String().Type(),
  1068  			toValue:   parquet.ByteArrayValue([]byte(`12:34:56.789`)),
  1069  		},
  1070  
  1071  		{
  1072  			scenario:  "microsecond time to string",
  1073  			fromType:  parquet.Time(parquet.Microsecond).Type(),
  1074  			fromValue: parquet.Int64Value(45296789012),
  1075  			toType:    parquet.String().Type(),
  1076  			toValue:   parquet.ByteArrayValue([]byte(`12:34:56.789012`)),
  1077  		},
  1078  
  1079  		{
  1080  			scenario:  "millisecond timestamp to date",
  1081  			fromType:  parquet.Timestamp(parquet.Millisecond).Type(),
  1082  			fromValue: parquet.Int64Value(1670888613000),
  1083  			toType:    parquet.Date().Type(),
  1084  			toValue:   parquet.Int32Value(19338),
  1085  		},
  1086  
  1087  		{
  1088  			scenario:  "microsecond timestamp to date",
  1089  			fromType:  parquet.Timestamp(parquet.Microsecond).Type(),
  1090  			fromValue: parquet.Int64Value(1670888613000123),
  1091  			toType:    parquet.Date().Type(),
  1092  			toValue:   parquet.Int32Value(19338),
  1093  		},
  1094  
  1095  		{
  1096  			scenario:  "millisecond timestamp to millisecond time",
  1097  			fromType:  parquet.Timestamp(parquet.Millisecond).Type(),
  1098  			fromValue: parquet.Int64Value(1670888613123),
  1099  			toType:    parquet.Time(parquet.Millisecond).Type(),
  1100  			toValue:   parquet.Int32Value(85413123),
  1101  		},
  1102  
  1103  		{
  1104  			scenario:  "millisecond timestamp to micronsecond time",
  1105  			fromType:  parquet.Timestamp(parquet.Millisecond).Type(),
  1106  			fromValue: parquet.Int64Value(1670888613123),
  1107  			toType:    parquet.Time(parquet.Microsecond).Type(),
  1108  			toValue:   parquet.Int64Value(85413123000),
  1109  		},
  1110  
  1111  		{
  1112  			scenario:  "microsecond timestamp to millisecond time",
  1113  			fromType:  parquet.Timestamp(parquet.Microsecond).Type(),
  1114  			fromValue: parquet.Int64Value(1670888613123456),
  1115  			toType:    parquet.Time(parquet.Millisecond).Type(),
  1116  			toValue:   parquet.Int32Value(85413123),
  1117  		},
  1118  
  1119  		{
  1120  			scenario:  "microsecond timestamp to micronsecond time",
  1121  			fromType:  parquet.Timestamp(parquet.Microsecond).Type(),
  1122  			fromValue: parquet.Int64Value(1670888613123456),
  1123  			toType:    parquet.Time(parquet.Microsecond).Type(),
  1124  			toValue:   parquet.Int64Value(85413123456),
  1125  		},
  1126  
  1127  		{
  1128  			scenario:  "micros to nanos",
  1129  			fromType:  usType,
  1130  			fromValue: usVal,
  1131  			toType:    nsType,
  1132  			toValue:   parquet.Int64Value(ns),
  1133  		},
  1134  
  1135  		{
  1136  			scenario:  "millis to nanos",
  1137  			fromType:  msType,
  1138  			fromValue: msVal,
  1139  			toType:    nsType,
  1140  			toValue:   parquet.Int64Value(ns),
  1141  		},
  1142  
  1143  		{
  1144  			scenario:  "nanos to micros",
  1145  			fromType:  nsType,
  1146  			fromValue: nsVal,
  1147  			toType:    usType,
  1148  			toValue:   parquet.Int64Value(us),
  1149  		},
  1150  
  1151  		{
  1152  			scenario:  "nanos to nanos",
  1153  			fromType:  nsType,
  1154  			fromValue: nsVal,
  1155  			toType:    nsType,
  1156  			toValue:   parquet.Int64Value(ns),
  1157  		},
  1158  
  1159  		{
  1160  			scenario:  "int64 to nanos",
  1161  			fromType:  parquet.Int64Type,
  1162  			fromValue: nsVal,
  1163  			toType:    nsType,
  1164  			toValue:   parquet.Int64Value(ns),
  1165  		},
  1166  
  1167  		{
  1168  			scenario:  "int64 to int64",
  1169  			fromType:  parquet.Int64Type,
  1170  			fromValue: nsVal,
  1171  			toType:    parquet.Int64Type,
  1172  			toValue:   parquet.Int64Value(ns),
  1173  		},
  1174  	}
  1175  
  1176  	for _, test := range timestampConversionTests {
  1177  		t.Run(test.scenario, func(t *testing.T) {
  1178  			// Set levels to ensure that they are retained by the conversion.
  1179  			from := test.fromValue.Level(1, 2, 3)
  1180  			want := test.toValue.Level(1, 2, 3)
  1181  
  1182  			got, err := test.toType.ConvertValue(from, test.fromType)
  1183  			if err != nil {
  1184  				t.Fatal(err)
  1185  			}
  1186  
  1187  			if !parquet.DeepEqual(want, got) {
  1188  				t.Errorf("converted value mismatch:\nwant = %+v\ngot  = %+v", want, got)
  1189  			}
  1190  		})
  1191  	}
  1192  }
  1193  
  1194  func TestMissingColumnChunk(t *testing.T) {
  1195  	type stringRow struct{ StringVal string }
  1196  	schema := parquet.SchemaOf(&stringRow{})
  1197  	buffer := parquet.NewGenericBuffer[stringRow](schema)
  1198  	if _, err := buffer.Write([]stringRow{{"hello"}, {"world"}}); err != nil {
  1199  		t.Fatal(err)
  1200  	}
  1201  
  1202  	type boolRow struct{ BoolValue bool }
  1203  	conv := convertMissingColumn{
  1204  		schema: parquet.SchemaOf(&boolRow{}),
  1205  	}
  1206  	boolRowGroup := parquet.ConvertRowGroup(buffer, conv)
  1207  	chunk := boolRowGroup.ColumnChunks()[0]
  1208  
  1209  	t.Run("chunk values", func(t *testing.T) {
  1210  		if chunk.NumValues() != buffer.NumRows() {
  1211  			t.Fatal("chunk values mismatch, got", chunk.NumValues(), "want", buffer.NumRows())
  1212  		}
  1213  	})
  1214  
  1215  	t.Run("slice page", func(t *testing.T) {
  1216  		page, err := chunk.Pages().ReadPage()
  1217  		if err != nil {
  1218  			t.Fatal(err)
  1219  		}
  1220  
  1221  		if page.NumValues() != buffer.NumRows() {
  1222  			t.Fatalf("page size mismatch: want = %d, got = %d", buffer.NumRows(), page.NumValues())
  1223  		}
  1224  		if size := page.Slice(0, 1).NumValues(); size != 1 {
  1225  			t.Fatalf("page slice size mismatch: want = %d, got = %d", 1, size)
  1226  		}
  1227  	})
  1228  }
  1229  
  1230  type convertMissingColumn struct {
  1231  	schema *parquet.Schema
  1232  }
  1233  
  1234  func (m convertMissingColumn) Column(_ int) int                        { return -1 }
  1235  func (m convertMissingColumn) Schema() *parquet.Schema                 { return m.schema }
  1236  func (m convertMissingColumn) Convert(rows []parquet.Row) (int, error) { return len(rows), nil }