github.com/fraugster/parquet-go@v0.12.0/floor/interfaces/marshaller_test.go (about)

     1  package interfaces
     2  
     3  import (
     4  	"fmt"
     5  	"testing"
     6  
     7  	"github.com/fraugster/parquet-go/parquetschema"
     8  	"github.com/stretchr/testify/require"
     9  )
    10  
    11  func TestObjectMarshalling(t *testing.T) {
    12  	obj := NewMarshallObject(nil)
    13  
    14  	obj.AddField("foo").SetInt64(23)
    15  	obj.AddField("bar").SetInt32(42)
    16  	obj.AddField("baz").SetBool(true)
    17  	obj.AddField("name").SetByteArray([]byte("John Doe"))
    18  	group := obj.AddField("my_group").Group()
    19  	group.AddField("foo1").SetFloat32(23.5)
    20  	group.AddField("bar1").SetFloat64(9000.5)
    21  
    22  	idList := obj.AddField("id_list").List()
    23  	idList.Add().SetInt64(int64(1))
    24  	idList.Add().SetInt64(int64(2))
    25  	idList.Add().SetInt64(int64(15))
    26  	idList.Add().SetInt64(int64(28))
    27  	idList.Add().SetInt64(int64(32))
    28  
    29  	dataMap := obj.AddField("data_map").Map()
    30  	for i := 0; i < 5; i++ {
    31  		elem := dataMap.Add()
    32  		elem.Key().SetByteArray([]byte(fmt.Sprintf("data%d", i)))
    33  		elem.Value().SetInt32(int32(i))
    34  	}
    35  
    36  	nestedDataMap := obj.AddField("nested_data_map").Map()
    37  	elem := nestedDataMap.Add()
    38  	elem.Key().SetInt64(23)
    39  	elem.Value().Group().AddField("foo").SetInt32(42)
    40  
    41  	groupList := obj.AddField("group_list").List()
    42  	for i := 0; i < 3; i++ {
    43  		group := groupList.Add().Group()
    44  		group.AddField("i").SetInt64(int64(i))
    45  	}
    46  
    47  	expectedData := map[string]interface{}{
    48  		"foo":  int64(23),
    49  		"bar":  int32(42),
    50  		"baz":  true,
    51  		"name": []byte("John Doe"),
    52  		"my_group": map[string]interface{}{
    53  			"foo1": float32(23.5),
    54  			"bar1": float64(9000.5),
    55  		},
    56  		"id_list": map[string]interface{}{
    57  			"list": []map[string]interface{}{
    58  				{
    59  					"element": int64(1),
    60  				},
    61  				{
    62  					"element": int64(2),
    63  				},
    64  				{
    65  					"element": int64(15),
    66  				},
    67  				{
    68  					"element": int64(28),
    69  				},
    70  				{
    71  					"element": int64(32),
    72  				},
    73  			},
    74  		},
    75  		"data_map": map[string]interface{}{
    76  			"key_value": []map[string]interface{}{
    77  				{
    78  					"key":   []byte("data0"),
    79  					"value": int32(0),
    80  				},
    81  				{
    82  					"key":   []byte("data1"),
    83  					"value": int32(1),
    84  				},
    85  				{
    86  					"key":   []byte("data2"),
    87  					"value": int32(2),
    88  				},
    89  				{
    90  					"key":   []byte("data3"),
    91  					"value": int32(3),
    92  				},
    93  				{
    94  					"key":   []byte("data4"),
    95  					"value": int32(4),
    96  				},
    97  			},
    98  		},
    99  		"nested_data_map": map[string]interface{}{
   100  			"key_value": []map[string]interface{}{
   101  				{
   102  					"key": int64(23),
   103  					"value": map[string]interface{}{
   104  						"foo": int32(42),
   105  					},
   106  				},
   107  			},
   108  		},
   109  		"group_list": map[string]interface{}{
   110  			"list": []map[string]interface{}{
   111  				{
   112  					"element": map[string]interface{}{
   113  						"i": int64(0),
   114  					},
   115  				},
   116  				{
   117  					"element": map[string]interface{}{
   118  						"i": int64(1),
   119  					},
   120  				},
   121  				{
   122  					"element": map[string]interface{}{
   123  						"i": int64(2),
   124  					},
   125  				},
   126  			},
   127  		},
   128  	}
   129  
   130  	require.Equal(t, expectedData, obj.GetData())
   131  }
   132  
   133  func TestObjectMarshallingWithSchema(t *testing.T) {
   134  	sd, err := parquetschema.ParseSchemaDefinition(
   135  		`message test {
   136  			required group emails (LIST) {
   137  				repeated group list {
   138  					required binary element (STRING);
   139  				}
   140  			}
   141  		}`)
   142  	require.NoError(t, err)
   143  
   144  	obj := NewMarshallObjectWithSchema(nil, sd)
   145  
   146  	emailList := obj.AddField("emails").List()
   147  	emailList.Add().SetByteArray([]byte("foo@example.com"))
   148  	emailList.Add().SetByteArray([]byte("bar@example.com"))
   149  
   150  	expectedData := map[string]interface{}{
   151  		"emails": map[string]interface{}{
   152  			"list": []map[string]interface{}{
   153  				{
   154  					"element": []byte("foo@example.com"),
   155  				},
   156  				{
   157  					"element": []byte("bar@example.com"),
   158  				},
   159  			},
   160  		},
   161  	}
   162  
   163  	require.Equal(t, expectedData, obj.GetData())
   164  }
   165  
   166  func TestObjectMarshallingWithAthenaCompatibleSchema(t *testing.T) {
   167  	sd, err := parquetschema.ParseSchemaDefinition(
   168  		`message test {
   169  			required group emails (LIST) {
   170  				repeated group bag {
   171  					required binary array_element (STRING);
   172  				}
   173  			}
   174  		}`)
   175  	require.NoError(t, err)
   176  
   177  	obj := NewMarshallObjectWithSchema(nil, sd)
   178  
   179  	emailList := obj.AddField("emails").List()
   180  	emailList.Add().SetByteArray([]byte("foo@example.com"))
   181  	emailList.Add().SetByteArray([]byte("bar@example.com"))
   182  
   183  	expectedData := map[string]interface{}{
   184  		"emails": map[string]interface{}{
   185  			"bag": []map[string]interface{}{
   186  				{
   187  					"array_element": []byte("foo@example.com"),
   188  				},
   189  				{
   190  					"array_element": []byte("bar@example.com"),
   191  				},
   192  			},
   193  		},
   194  	}
   195  
   196  	require.Equal(t, expectedData, obj.GetData())
   197  }