github.com/fraugster/parquet-go@v0.12.0/type_bytearray_test.go (about) 1 package goparquet 2 3 import ( 4 "bytes" 5 "testing" 6 7 "github.com/fraugster/parquet-go/parquet" 8 "github.com/stretchr/testify/assert" 9 10 "github.com/fraugster/parquet-go/parquetschema" 11 "github.com/stretchr/testify/require" 12 ) 13 14 func TestFuzzCrashByteArrayPlainDecoderNext(t *testing.T) { 15 data := []byte("PAR1\x15\x00\x15\xac\x02\x15\xac\x02,\x150\x15\x00\x15\x06\x15" + 16 "00\x01\x15\x02\x19,H\f00000000000" + 17 "0\x1500\x15\x0e\x15\x1d\x150\x18\x0500000%0\x15" + 18 "0\x1500\x160\x19\x1c\x19\x1c&0\x1c\x15\x0e\x190000" + 19 "\x19\x18\x0500000\x15\x00\x160\x16\xfa0\x16\xfa\x02&\b" + 20 "<\x18\x06000000\x18\x06000000\x1600" + 21 "\x19\x1c\x150\x150\x150000\x16\xfa0\x1600000" + 22 "00000000000000000000" + 23 "00000000000000000000" + 24 "00000000000000000000" + 25 "00000000000000000000" + 26 "00000000000000000000" + 27 "00000000000000000000" + 28 "00000000000000000000" + 29 "00000000000000000000" + 30 "00000000000000000000" + 31 "00000000000000000000" + 32 "0000000000000000000P" + 33 "\x01\x00\x00PAR1") 34 35 readAllData(t, data) 36 } 37 38 func TestRepeatedBinaryWithNil(t *testing.T) { 39 // this is here to somehow reproduce the issue discussed in https://github.com/fraugster/parquet-go/pull/8 40 sd, err := parquetschema.ParseSchemaDefinition(`message msg { 41 repeated binary foo; 42 }`) 43 require.NoError(t, err) 44 45 var buf bytes.Buffer 46 fw := NewFileWriter(&buf, WithSchemaDefinition(sd)) 47 48 err = fw.AddData(map[string]interface{}{ 49 "foo": [][]byte{ 50 []byte("hello"), 51 nil, 52 []byte("world!"), 53 }, 54 }) 55 require.NoError(t, err) 56 57 require.NoError(t, fw.Close()) 58 59 r, err := NewFileReader(bytes.NewReader(buf.Bytes())) 60 require.NoError(t, err) 61 62 row, err := r.NextRow() 63 require.NoError(t, err) 64 65 // here's a problem: we added nil, but got a []byte{}. 66 require.Equal(t, [][]byte{ 67 []byte("hello"), 68 {}, 69 []byte("world!"), 70 }, row["foo"]) 71 } 72 73 func TestByteArrayStore(t *testing.T) { 74 buf := &bytes.Buffer{} 75 pq := NewFileWriter(buf) 76 s1, err := NewByteArrayStore(parquet.Encoding_PLAIN, true, &ColumnParameters{}) 77 require.NoError(t, err) 78 s2, err := NewByteArrayStore(parquet.Encoding_PLAIN, true, &ColumnParameters{}) 79 require.NoError(t, err) 80 require.NoError(t, pq.AddColumnByPath([]string{"s1"}, NewDataColumn(s1, parquet.FieldRepetitionType_REQUIRED))) 81 require.NoError(t, pq.AddColumnByPath([]string{"s2"}, NewDataColumn(s2, parquet.FieldRepetitionType_REPEATED))) 82 83 // The old way is not effected 84 err = pq.AddData(map[string]interface{}{ 85 "s1": []byte("abc"), 86 "s2": [][]byte{ 87 []byte("a"), 88 []byte("b"), 89 []byte("c"), 90 }, 91 }) 92 assert.NoError(t, err) 93 // The new string data 94 err = pq.AddData(map[string]interface{}{ 95 "s1": "cba", 96 "s2": []string{ 97 "1", 98 "2", 99 "3", 100 }, 101 }) 102 assert.NoError(t, err) 103 require.NoError(t, pq.Close()) 104 105 pqr, err := NewFileReader(bytes.NewReader(buf.Bytes())) 106 assert.NoError(t, err) 107 108 r1, err := pqr.NextRow() 109 // The first one is equal to the input, since it is the proper type 110 require.Equal(t, r1, map[string]interface{}{ 111 "s1": []byte("abc"), 112 "s2": [][]byte{ 113 []byte("a"), 114 []byte("b"), 115 []byte("c"), 116 }}) 117 assert.NoError(t, err) 118 119 r2, err := pqr.NextRow() 120 // But since parquet do not keep the string type, the returned value here is []byte 121 require.Equal(t, r2, map[string]interface{}{ 122 "s1": []byte("cba"), 123 "s2": [][]byte{ 124 []byte("1"), 125 []byte("2"), 126 []byte("3"), 127 }}) 128 assert.NoError(t, err) 129 130 // There should be nothing left in the file 131 _, err = pqr.NextRow() 132 require.Error(t, err) 133 }