github.com/fraugster/parquet-go@v0.12.0/floor/writer_test.go (about)

     1  package floor
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"testing"
     7  	"time"
     8  
     9  	"github.com/davecgh/go-spew/spew"
    10  	goparquet "github.com/fraugster/parquet-go"
    11  	"github.com/fraugster/parquet-go/floor/interfaces"
    12  	"github.com/fraugster/parquet-go/parquet"
    13  	"github.com/fraugster/parquet-go/parquetschema"
    14  	"github.com/stretchr/testify/require"
    15  )
    16  
    17  func TestDecodeStruct(t *testing.T) {
    18  	testData := []struct {
    19  		Input          interface{}
    20  		ExpectedOutput map[string]interface{}
    21  		ExpectErr      bool
    22  		Schema         string
    23  	}{
    24  		{
    25  			Input:          struct{ Foo int16 }{Foo: 42},
    26  			ExpectedOutput: map[string]interface{}{"foo": int32(42)},
    27  			ExpectErr:      false,
    28  			Schema:         `message test { required int32 foo; }`,
    29  		},
    30  		{
    31  			Input:          struct{ Foo int }{Foo: 43},
    32  			ExpectedOutput: map[string]interface{}{"foo": int32(43)},
    33  			ExpectErr:      false,
    34  			Schema:         `message test { required int32 foo; }`,
    35  		},
    36  		{
    37  			Input:          struct{ Foo int8 }{Foo: 44},
    38  			ExpectedOutput: map[string]interface{}{"foo": int32(44)},
    39  			ExpectErr:      false,
    40  			Schema:         `message test { required int32 foo; }`,
    41  		},
    42  		{
    43  			Input:          struct{ Foo int32 }{Foo: 100000},
    44  			ExpectedOutput: map[string]interface{}{"foo": int32(100000)},
    45  			ExpectErr:      false,
    46  			Schema:         `message test { required int32 foo; }`,
    47  		},
    48  		{
    49  			Input:          struct{ Foo uint64 }{Foo: 1125899906842624},
    50  			ExpectedOutput: map[string]interface{}{"foo": int64(1125899906842624)},
    51  			ExpectErr:      false,
    52  			Schema:         `message test { required int64 foo; }`,
    53  		},
    54  		{
    55  			Input:          struct{ Foo uint }{Foo: 200000},
    56  			ExpectedOutput: map[string]interface{}{"foo": int32(200000)},
    57  			ExpectErr:      false,
    58  			Schema:         `message test { required int32 foo; }`,
    59  		},
    60  		{
    61  			Input:          struct{ Foo float32 }{Foo: 42.5},
    62  			ExpectedOutput: map[string]interface{}{"foo": float32(42.5)},
    63  			ExpectErr:      false,
    64  			Schema:         `message test { required float foo; }`,
    65  		},
    66  		{
    67  			Input:          struct{ Foo float64 }{Foo: 23.5},
    68  			ExpectedOutput: map[string]interface{}{"foo": float64(23.5)},
    69  			ExpectErr:      false,
    70  			Schema:         `message test { required double foo; }`,
    71  		},
    72  		{
    73  			Input:          struct{ Foo byte }{Foo: 1},
    74  			ExpectedOutput: map[string]interface{}{"foo": int32(1)},
    75  			ExpectErr:      false,
    76  			Schema:         `message test { required int32 foo; }`,
    77  		},
    78  		{
    79  			Input:          struct{ Foo string }{Foo: "bar"},
    80  			ExpectedOutput: map[string]interface{}{"foo": []byte("bar")},
    81  			ExpectErr:      false,
    82  			Schema:         `message test { required binary foo (STRING); }`,
    83  		},
    84  		{
    85  			Input:          struct{ Foo *string }{Foo: new(string)},
    86  			ExpectedOutput: map[string]interface{}{"foo": []byte("")},
    87  			ExpectErr:      false,
    88  			Schema:         `message test { optional binary foo (STRING); }`,
    89  		},
    90  		{
    91  			Input:          struct{ Foo *string }{},
    92  			ExpectedOutput: map[string]interface{}{},
    93  			ExpectErr:      false,
    94  			Schema:         `message test { optional binary foo (STRING); }`,
    95  		},
    96  		{
    97  			Input:          int(23),
    98  			ExpectedOutput: nil,
    99  			ExpectErr:      true,
   100  			Schema:         `message test { }`,
   101  		},
   102  		{
   103  			Input: struct {
   104  				Foo struct {
   105  					Bar int64
   106  				}
   107  				Quux *bool
   108  				Baz  uint32
   109  				Blub bool
   110  			}{},
   111  			ExpectedOutput: map[string]interface{}{"foo": map[string]interface{}{"bar": int64(0)}, "baz": int64(0), "blub": false},
   112  			ExpectErr:      false,
   113  			Schema:         `message test { required group foo { required int64 bar; } required int64 baz; optional boolean quux; required boolean blub; }`,
   114  		},
   115  		{
   116  			Input: struct {
   117  				Foo []bool
   118  			}{
   119  				Foo: []bool{false, true, false},
   120  			},
   121  			ExpectedOutput: map[string]interface{}{
   122  				"foo": map[string]interface{}{
   123  					"list": []map[string]interface{}{
   124  						{"element": false},
   125  						{"element": true},
   126  						{"element": false},
   127  					},
   128  				},
   129  			},
   130  			ExpectErr: false,
   131  			Schema: `message test {
   132  				required group foo (LIST) {
   133  					repeated group list {
   134  						required boolean element;
   135  					}
   136  				}
   137  			}`,
   138  		},
   139  		{
   140  			Input: struct {
   141  				Foo [5]uint16
   142  			}{
   143  				Foo: [5]uint16{1, 1, 2, 3, 5},
   144  			},
   145  			ExpectedOutput: map[string]interface{}{
   146  				"foo": map[string]interface{}{
   147  					"list": []map[string]interface{}{
   148  						{"element": int32(1)},
   149  						{"element": int32(1)},
   150  						{"element": int32(2)},
   151  						{"element": int32(3)},
   152  						{"element": int32(5)},
   153  					},
   154  				},
   155  			},
   156  			ExpectErr: false,
   157  			Schema: `message test {
   158  				required group foo (LIST) {
   159  					repeated group list {
   160  						required int32 element;
   161  					}
   162  				}
   163  			}`,
   164  		},
   165  		{
   166  			Input: struct {
   167  				Foo map[string]int64
   168  			}{
   169  				Foo: map[string]int64{
   170  					"hello": int64(23),
   171  				},
   172  			},
   173  			ExpectedOutput: map[string]interface{}{
   174  				"foo": map[string]interface{}{
   175  					"key_value": []map[string]interface{}{
   176  						{"key": []byte("hello"), "value": int64(23)},
   177  					},
   178  				},
   179  			},
   180  			ExpectErr: false,
   181  			Schema: `message test {
   182  				required group foo (MAP) {
   183  					repeated group key_value {
   184  						required binary key (STRING);
   185  						required int64 value;
   186  					}
   187  				}
   188  			}`,
   189  		},
   190  		{
   191  			Input: struct {
   192  				C chan int
   193  			}{},
   194  			ExpectedOutput: map[string]interface{}{},
   195  			ExpectErr:      false,
   196  			Schema:         `message foo { }`,
   197  		},
   198  		{
   199  			Input: struct {
   200  				Foo struct {
   201  					C   chan int
   202  					Bar int
   203  				}
   204  			}{},
   205  			ExpectedOutput: map[string]interface{}{"foo": map[string]interface{}{"bar": int64(0)}},
   206  			ExpectErr:      false,
   207  			Schema:         `message foo { required group foo { optional int64 bar; } }`,
   208  		},
   209  		{
   210  			Input: struct {
   211  				Foo []chan int
   212  			}{Foo: []chan int{make(chan int)}},
   213  			ExpectedOutput: nil,
   214  			ExpectErr:      true,
   215  			Schema:         `message foo { required group foo (LIST) { repeated group list { required int32 element; } } }`,
   216  		},
   217  		{
   218  			Input: &struct {
   219  				Bla int
   220  			}{Bla: 616},
   221  			ExpectedOutput: map[string]interface{}{"bla": int32(616)},
   222  			ExpectErr:      false,
   223  			Schema:         `message test { required int32 bla; }`,
   224  		},
   225  		{
   226  			Input: (*struct {
   227  				Bla int
   228  			})(nil),
   229  			ExpectedOutput: nil,
   230  			ExpectErr:      true,
   231  			Schema:         `message test { required int32 bla; }`,
   232  		},
   233  		{
   234  			Input: struct {
   235  				Date time.Time
   236  			}{
   237  				Date: time.Date(1970, 1, 10, 0, 0, 0, 0, time.UTC),
   238  			},
   239  			ExpectedOutput: map[string]interface{}{"date": int32(9)},
   240  			ExpectErr:      false,
   241  			Schema:         `message test { required int32 date (DATE); }`,
   242  		},
   243  		{
   244  			Input: struct {
   245  				Date time.Time
   246  			}{
   247  				Date: time.Date(1970, 1, 12, 23, 59, 59, 0, time.UTC),
   248  			},
   249  			ExpectedOutput: map[string]interface{}{"date": int32(11)},
   250  			ExpectErr:      false,
   251  			Schema:         `message test { required int32 date (DATE); }`,
   252  		},
   253  		{
   254  			Input: struct {
   255  				TS time.Time
   256  			}{
   257  				TS: time.Date(1970, 1, 1, 0, 0, 23, 0, time.UTC),
   258  			},
   259  			ExpectedOutput: map[string]interface{}{"ts": int64(23000)},
   260  			ExpectErr:      false,
   261  			Schema:         `message test { required int64 ts (TIMESTAMP(MILLIS, false)); }`,
   262  		},
   263  		{
   264  			Input: struct {
   265  				TS time.Time
   266  			}{
   267  				TS: time.Date(1970, 1, 1, 0, 0, 24, 0, time.UTC),
   268  			},
   269  			ExpectedOutput: map[string]interface{}{"ts": int64(24000000)},
   270  			ExpectErr:      false,
   271  			Schema:         `message test { required int64 ts (TIMESTAMP(MICROS, false)); }`,
   272  		},
   273  		{
   274  			Input: struct {
   275  				TS time.Time
   276  			}{
   277  				TS: time.Date(1970, 1, 1, 0, 0, 25, 2000, time.UTC),
   278  			},
   279  			ExpectedOutput: map[string]interface{}{"ts": int64(25000002000)},
   280  			ExpectErr:      false,
   281  			Schema:         `message test { required int64 ts (TIMESTAMP(NANOS, false)); }`,
   282  		},
   283  		{
   284  			Input: struct {
   285  				Lunch Time
   286  			}{
   287  				Lunch: MustTime(NewTime(12, 30, 0, 0)),
   288  			},
   289  			ExpectedOutput: map[string]interface{}{"lunch": int32(45000000)},
   290  			ExpectErr:      false,
   291  			Schema:         `message test { required int32 lunch (TIME(MILLIS, false)); }`,
   292  		},
   293  		{
   294  			Input: struct {
   295  				BeddyByes Time
   296  			}{
   297  				BeddyByes: MustTime(NewTime(20, 15, 30, 0)),
   298  			},
   299  			ExpectedOutput: map[string]interface{}{"beddybyes": int64(72930000000)},
   300  			ExpectErr:      false,
   301  			Schema:         `message test { required int64 beddybyes (TIME(MICROS, false)); }`,
   302  		},
   303  		{
   304  			Input: struct {
   305  				WakeyWakey Time
   306  			}{
   307  				WakeyWakey: MustTime(NewTime(7, 5, 59, 0)),
   308  			},
   309  			ExpectedOutput: map[string]interface{}{"wakeywakey": int64(25559000000000)},
   310  			ExpectErr:      false,
   311  			Schema:         `message test { required int64 wakeywakey (TIME(NANOS, false)); }`,
   312  		},
   313  		{
   314  			Input: struct {
   315  				Foo   string
   316  				Times []interface{}
   317  			}{
   318  				Foo:   "bar",
   319  				Times: []interface{}{"2021-10-29T20:06:47.960577000Z", 1635542684, 1635542811912, 1635542811912010, 1635542854925031000},
   320  			},
   321  			ExpectedOutput: map[string]interface{}{
   322  				"foo": []byte("bar"),
   323  				"times": map[string]interface{}{
   324  					"list": []map[string]interface{}{
   325  						{"element": goparquet.TimeToInt96(time.Date(2021, 10, 29, 20, 06, 47, 960577000, time.UTC))},
   326  						{"element": goparquet.TimeToInt96(time.Date(2021, 10, 29, 21, 24, 44, 0, time.UTC))},
   327  						{"element": goparquet.TimeToInt96(time.Date(2021, 10, 29, 21, 26, 51, 912000000, time.UTC))},
   328  						{"element": goparquet.TimeToInt96(time.Date(2021, 10, 29, 21, 26, 51, 912010000, time.UTC))},
   329  						{"element": goparquet.TimeToInt96(time.Date(2021, 10, 29, 21, 27, 34, 925031000, time.UTC))},
   330  					},
   331  				},
   332  			},
   333  			ExpectErr: false,
   334  			Schema: `message test {
   335  				optional binary foo (STRING);
   336  				optional group times (LIST) {
   337  					repeated group list {
   338  						required int96 element;
   339  					}
   340  				}
   341  			}`,
   342  		},
   343  		{
   344  			Input:          map[string]interface{}{"foo": "bar"},
   345  			ExpectedOutput: map[string]interface{}{"foo": []byte("bar")},
   346  			ExpectErr:      false,
   347  			Schema:         `message test { optional binary foo (STRING); }`,
   348  		},
   349  		{
   350  			Input: map[string]interface{}{"foo": "bar", "data": map[string]interface{}{"foo": "bar"}},
   351  			ExpectedOutput: map[string]interface{}{
   352  				"foo": []byte("bar"),
   353  				"data": map[string]interface{}{
   354  					"key_value": []map[string]interface{}{
   355  						{"key": []byte("foo"), "value": []byte("bar")},
   356  					},
   357  				}},
   358  			ExpectErr: false,
   359  			Schema: `message test {
   360  				optional binary foo (STRING);
   361  				required group data (MAP) {
   362  					repeated group key_value {
   363  						required binary key (STRING);
   364  						optional binary value (STRING);
   365  					}
   366  				}
   367  			}`,
   368  		},
   369  	}
   370  
   371  	for idx, tt := range testData {
   372  		t.Run(fmt.Sprintf("test_%d", idx), func(t *testing.T) {
   373  			sd, err := parquetschema.ParseSchemaDefinition(tt.Schema)
   374  			require.NoError(t, err, "%d. parsing schema failed", idx)
   375  			obj := interfaces.NewMarshallObject(nil)
   376  			m := &reflectMarshaller{obj: tt.Input, schemaDef: sd}
   377  			err = m.MarshalParquet(obj)
   378  			if tt.ExpectErr {
   379  				require.Error(t, err, "%d. expected error, but found none", idx)
   380  			} else {
   381  				require.NoError(t, err, "%d. expected no error, but found one", idx)
   382  				require.Equal(t, tt.ExpectedOutput, obj.GetData(), "%d. output mismatch; schema = %s", idx, tt.Schema)
   383  			}
   384  		})
   385  	}
   386  }
   387  
   388  func TestWriteFile(t *testing.T) {
   389  	_ = os.Mkdir("files", 0755)
   390  
   391  	sd, err := parquetschema.ParseSchemaDefinition(
   392  		`message test_msg {
   393  			required int64 foo;
   394  			optional binary bar (STRING);
   395  			optional group baz (LIST) {
   396  				repeated group list {
   397  					required int32 element;
   398  				}
   399  			}
   400  			optional int64 ts (TIMESTAMP(NANOS, false));
   401  			optional int64 time (TIME(NANOS, false));
   402  		}`)
   403  	require.NoError(t, err, "parsing schema definition failed")
   404  
   405  	t.Logf("schema definition: %s", spew.Sdump(sd))
   406  
   407  	hlWriter, err := NewFileWriter(
   408  		"files/test.parquet",
   409  		goparquet.WithCompressionCodec(parquet.CompressionCodec_SNAPPY),
   410  		goparquet.WithCreator("floor-unittest"),
   411  		goparquet.WithSchemaDefinition(sd),
   412  	)
   413  	require.NoError(t, err, "creating new file writer failed")
   414  
   415  	data := []struct {
   416  		Foo  int64
   417  		Bar  *string
   418  		Baz  []int32
   419  		Time *Time
   420  	}{
   421  		{23, strPtr("hello!"), []int32{23}, nil},
   422  		{42, strPtr("world!"), []int32{1, 1, 2, 3, 5}, nil},
   423  		{500, nil, nil, nil},
   424  		{750, strPtr("empty"), nil, nil},
   425  		{1000, strPtr("bye!"), []int32{2, 3, 5, 7, 11}, timePtr(MustTime(NewTime(16, 20, 0, 0)))},
   426  	}
   427  
   428  	for idx, d := range data {
   429  		require.NoError(t, hlWriter.Write(d), "%d. Write failed", idx)
   430  	}
   431  
   432  	require.NoError(t, hlWriter.Close())
   433  
   434  	rf, err := os.Open("files/test.parquet")
   435  	require.NoError(t, err)
   436  
   437  	reader, err := goparquet.NewFileReader(rf)
   438  	require.NoError(t, err)
   439  
   440  	n, err := reader.RowGroupNumRows()
   441  	require.NoError(t, err)
   442  	require.Equal(t, int64(len(data)), n)
   443  
   444  	expectedData := []map[string]interface{}{
   445  		{
   446  			"foo": int64(23),
   447  			"bar": []byte("hello!"),
   448  			"baz": map[string]interface{}{
   449  				"list": []map[string]interface{}{
   450  					{"element": int32(23)},
   451  				},
   452  			},
   453  		},
   454  		{
   455  			"foo": int64(42),
   456  			"bar": []byte("world!"),
   457  			"baz": map[string]interface{}{
   458  				"list": []map[string]interface{}{
   459  					{"element": int32(1)},
   460  					{"element": int32(1)},
   461  					{"element": int32(2)},
   462  					{"element": int32(3)},
   463  					{"element": int32(5)},
   464  				},
   465  			},
   466  		},
   467  		{
   468  			"foo": int64(500),
   469  		},
   470  		{
   471  			"foo": int64(750),
   472  			"bar": []byte("empty"),
   473  		},
   474  		{
   475  			"foo": int64(1000),
   476  			"bar": []byte("bye!"),
   477  			"baz": map[string]interface{}{
   478  				"list": []map[string]interface{}{
   479  					{"element": int32(2)},
   480  					{"element": int32(3)},
   481  					{"element": int32(5)},
   482  					{"element": int32(7)},
   483  					{"element": int32(11)},
   484  				},
   485  			},
   486  			"time": int64(58800000000000),
   487  		},
   488  	}
   489  
   490  	n, err = reader.RowGroupNumRows()
   491  	require.NoError(t, err)
   492  
   493  	for i := int64(0); i < n; i++ {
   494  		data, err := reader.NextRow()
   495  		require.NoError(t, err, "%d. reading record failed")
   496  		require.Equal(t, expectedData[i], data, "%d. data in parquet file differs from what's expected", i)
   497  	}
   498  }
   499  
   500  func timePtr(t Time) *Time {
   501  	return &t
   502  }
   503  
   504  func strPtr(s string) *string {
   505  	return &s
   506  }
   507  
   508  func TestWriteReadByteArrays(t *testing.T) {
   509  	_ = os.Mkdir("files", 0755)
   510  
   511  	sd, err := parquetschema.ParseSchemaDefinition(
   512  		`message test_msg {
   513  			required fixed_len_byte_array(4) foo;
   514  			optional fixed_len_byte_array(4) bar;
   515  			required binary baz;
   516  			optional binary quux;
   517  		}`)
   518  	require.NoError(t, err, "parsing schema definition failed")
   519  
   520  	t.Logf("schema definition: %s", spew.Sdump(sd))
   521  
   522  	hlWriter, err := NewFileWriter(
   523  		"files/bytearrays.parquet",
   524  		goparquet.WithCompressionCodec(parquet.CompressionCodec_SNAPPY),
   525  		goparquet.WithCreator("floor-unittest"),
   526  		goparquet.WithSchemaDefinition(sd),
   527  	)
   528  	require.NoError(t, err, "creating new file writer failed")
   529  
   530  	type testData struct {
   531  		Foo  [4]byte
   532  		Bar  []byte
   533  		Baz  []byte
   534  		Quux []byte
   535  	}
   536  
   537  	data := []testData{
   538  		{Foo: [4]byte{0, 1, 2, 3}, Bar: []byte{4, 5, 6, 7}, Baz: []byte{99}, Quux: []byte{100, 101}},
   539  		{Foo: [4]byte{8, 9, 10, 11}, Baz: []byte("hello world!")},
   540  		{Foo: [4]byte{12, 13, 14, 15}, Bar: []byte{16, 17, 18, 19}, Baz: []byte{155, 156, 157, 158, 159, 160}, Quux: []byte{180, 181, 182, 183}},
   541  	}
   542  
   543  	for idx, record := range data {
   544  		require.NoError(t, hlWriter.Write(record), "%d. writing record failed", idx)
   545  	}
   546  	require.NoError(t, hlWriter.Close())
   547  
   548  	hlReader, err := NewFileReader("files/bytearrays.parquet")
   549  	require.NoError(t, err, "creating new file reader failed")
   550  
   551  	var readData []testData
   552  
   553  	for hlReader.Next() {
   554  		var record testData
   555  		require.NoError(t, hlReader.Scan(&record))
   556  		readData = append(readData, record)
   557  	}
   558  
   559  	require.Equal(t, data, readData, "data written and read back doesn't match")
   560  }
   561  
   562  func TestWriteFileWithMarshallerThenReadWithUnmarshaller(t *testing.T) {
   563  	_ = os.Mkdir("files", 0755)
   564  
   565  	sd, err := parquetschema.ParseSchemaDefinition(
   566  		`message test_msg {
   567  			required binary foo (STRING);
   568  			required int64 bar;
   569  			required group baz (LIST) {
   570  				repeated group list {
   571  					required group element {
   572  						required int64 quux;
   573  					}
   574  				}
   575  			}
   576  		}`)
   577  	require.NoError(t, err, "parsing schema definition failed")
   578  
   579  	t.Logf("schema definition: %s", spew.Sdump(sd))
   580  
   581  	hlWriter, err := NewFileWriter(
   582  		"files/marshaller.parquet",
   583  		goparquet.WithCompressionCodec(parquet.CompressionCodec_SNAPPY),
   584  		goparquet.WithCreator("floor-unittest"),
   585  		goparquet.WithSchemaDefinition(sd),
   586  	)
   587  	require.NoError(t, err, "creating new file writer failed")
   588  
   589  	testData := &marshTestRecord{foo: "hello world!", bar: 1234567, baz: []marshTestGroup{{quux: 23}, {quux: 42}}}
   590  	require.NoError(t, hlWriter.Write(testData), "writing object using marshaller failed")
   591  
   592  	require.NoError(t, hlWriter.Close())
   593  
   594  	hlReader, err := NewFileReader("files/marshaller.parquet")
   595  	require.NoError(t, err, "opening file failed")
   596  
   597  	require.True(t, hlReader.Next())
   598  
   599  	readData := &marshTestRecord{}
   600  	require.NoError(t, hlReader.Scan(readData))
   601  
   602  	require.Equal(t, testData, readData, "written and read data don't match")
   603  	require.NoError(t, hlReader.Close())
   604  }
   605  
   606  func BenchmarkWriteFile(b *testing.B) {
   607  	_ = os.Mkdir("files", 0755)
   608  
   609  	sd, err := parquetschema.ParseSchemaDefinition(
   610  		`message test_msg {
   611  			required int64 foo;
   612  			optional binary bar (STRING);
   613  			optional group baz (LIST) {
   614  				repeated group list {
   615  					required int32 element;
   616  				}
   617  			}
   618  			optional int64 ts (TIMESTAMP(NANOS, false));
   619  			optional int64 time (TIME(NANOS, false));
   620  		}`)
   621  	require.NoError(b, err, "parsing schema definition failed")
   622  
   623  	hlWriter, err := NewFileWriter(
   624  		"files/test.parquet",
   625  		goparquet.WithCompressionCodec(parquet.CompressionCodec_SNAPPY),
   626  		goparquet.WithCreator("floor-unittest"),
   627  		goparquet.WithSchemaDefinition(sd),
   628  	)
   629  	require.NoError(b, err, "creating new file writer failed")
   630  	defer func() {
   631  		require.NoError(b, hlWriter.Close())
   632  	}()
   633  
   634  	data := struct {
   635  		Foo  int64
   636  		Bar  *string
   637  		Baz  []int32
   638  		Time *Time
   639  	}{
   640  		42, strPtr("world!"), []int32{1, 1, 2, 3, 5}, nil,
   641  	}
   642  
   643  	b.ResetTimer()
   644  	for i := 0; i < b.N; i++ {
   645  		_ = hlWriter.Write(data)
   646  	}
   647  }
   648  
   649  type marshTestRecord struct {
   650  	foo string
   651  	bar int64
   652  	baz []marshTestGroup
   653  }
   654  
   655  type marshTestGroup struct {
   656  	quux int64
   657  }
   658  
   659  func (r *marshTestRecord) MarshalParquet(obj interfaces.MarshalObject) error {
   660  	obj.AddField("foo").SetByteArray([]byte(r.foo))
   661  	obj.AddField("bar").SetInt64(r.bar)
   662  	list := obj.AddField("baz").List()
   663  	for _, b := range r.baz {
   664  		grp := list.Add().Group()
   665  		grp.AddField("quux").SetInt64(b.quux)
   666  	}
   667  
   668  	return nil
   669  }
   670  
   671  func (r *marshTestRecord) UnmarshalParquet(obj interfaces.UnmarshalObject) error {
   672  	foo := obj.GetField("foo")
   673  	if err := foo.Error(); err != nil {
   674  		return err
   675  	}
   676  
   677  	fooValue, err := foo.ByteArray()
   678  	if err != nil {
   679  		return err
   680  	}
   681  
   682  	r.foo = string(fooValue)
   683  
   684  	bar := obj.GetField("bar")
   685  	if err = bar.Error(); err != nil {
   686  		return err
   687  	}
   688  
   689  	barValue, err := bar.Int64()
   690  	if err != nil {
   691  		return err
   692  	}
   693  
   694  	r.bar = barValue
   695  
   696  	bazList, err := obj.GetField("baz").List()
   697  	if err != nil {
   698  		return err
   699  	}
   700  
   701  	for bazList.Next() {
   702  		v, err := bazList.Value()
   703  		if err != nil {
   704  			return err
   705  		}
   706  
   707  		grp, err := v.Group()
   708  		if err != nil {
   709  			return err
   710  		}
   711  
   712  		quux, err := grp.GetField("quux").Int64()
   713  		if err != nil {
   714  			return err
   715  		}
   716  
   717  		r.baz = append(r.baz, marshTestGroup{quux: quux})
   718  	}
   719  
   720  	return nil
   721  }
   722  
   723  type testMsg struct {
   724  	ID     int64
   725  	Foobar []string
   726  }
   727  
   728  func (m *testMsg) MarshalParquet(obj interfaces.MarshalObject) error {
   729  	obj.AddField("id").SetInt64(m.ID)
   730  	list := obj.AddField("foobar").List()
   731  	for _, elem := range m.Foobar {
   732  		list.Add().SetByteArray([]byte(elem))
   733  	}
   734  	return nil
   735  }
   736  
   737  func (m *testMsg) UnmarshalParquet(obj interfaces.UnmarshalObject) error {
   738  	id, err := obj.GetField("id").Int64()
   739  	if err != nil {
   740  		return err
   741  	}
   742  	m.ID = id
   743  	list, err := obj.GetField("foobar").List()
   744  	if err == interfaces.ErrFieldNotPresent {
   745  		return nil
   746  	}
   747  	if err != nil {
   748  		return err
   749  	}
   750  
   751  	for list.Next() {
   752  		v, err := list.Value()
   753  		if err != nil {
   754  			return err
   755  		}
   756  		vv, err := v.ByteArray()
   757  		if err != nil {
   758  			return err
   759  		}
   760  		m.Foobar = append(m.Foobar, string(vv))
   761  	}
   762  
   763  	return nil
   764  }
   765  
   766  func TestWriteEmptyList(t *testing.T) {
   767  	_ = os.Mkdir("files", 0755)
   768  
   769  	sd, err := parquetschema.ParseSchemaDefinition(
   770  		`message test_msg {
   771  			required int64 id;
   772  			optional group foobar (LIST) {
   773  				repeated group list {
   774  					required binary element (STRING);
   775  				}
   776  			}
   777  		}`)
   778  	require.NoError(t, err, "parsing schema definition failed")
   779  
   780  	t.Logf("schema definition: %s", spew.Sdump(sd))
   781  
   782  	hlWriter, err := NewFileWriter(
   783  		"files/emptylist.parquet",
   784  		goparquet.WithCompressionCodec(parquet.CompressionCodec_SNAPPY),
   785  		goparquet.WithCreator("floor-unittest"),
   786  		goparquet.WithSchemaDefinition(sd),
   787  	)
   788  	require.NoError(t, err, "creating new file writer failed")
   789  
   790  	testData1 := &testMsg{ID: 23, Foobar: nil}
   791  	require.NoError(t, hlWriter.Write(testData1), "writing object using marshaller failed")
   792  
   793  	testData2 := &testMsg{ID: 42, Foobar: []string{"so", "long", "and", "thanks", "for", "all", "the", "fish"}}
   794  	require.NoError(t, hlWriter.Write(testData2), "writing object using marshaller failed")
   795  
   796  	require.NoError(t, hlWriter.Write(testData1), "writing object using marshaller failed")
   797  	require.NoError(t, hlWriter.Write(testData2), "writing object using marshaller failed")
   798  
   799  	require.NoError(t, hlWriter.Close())
   800  
   801  	hlReader, err := NewFileReader("files/emptylist.parquet")
   802  	require.NoError(t, err, "opening file failed")
   803  
   804  	require.True(t, hlReader.Next())
   805  
   806  	readData1 := &testMsg{}
   807  	require.NoError(t, hlReader.Scan(readData1))
   808  	require.Equal(t, testData1, readData1, "written and read data don't match")
   809  
   810  	readData2 := &testMsg{}
   811  	require.NoError(t, hlReader.Scan(readData2))
   812  	require.Equal(t, testData1, readData2, "written and read data don't match")
   813  
   814  	readData3 := &testMsg{}
   815  	require.NoError(t, hlReader.Scan(readData3))
   816  	require.Equal(t, testData1, readData3, "written and read data don't match")
   817  
   818  	readData4 := &testMsg{}
   819  	require.NoError(t, hlReader.Scan(readData4))
   820  	require.Equal(t, testData1, readData4, "written and read data don't match")
   821  
   822  	require.NoError(t, hlReader.Close())
   823  }