github.com/fraugster/parquet-go@v0.12.0/filereader_test.go (about) 1 package goparquet 2 3 import ( 4 "bytes" 5 "io" 6 "math/rand" 7 "testing" 8 9 "github.com/fraugster/parquet-go/parquetschema" 10 "github.com/stretchr/testify/require" 11 ) 12 13 func buildTestStream(t *testing.T) []byte { 14 schema, err := parquetschema.ParseSchemaDefinition(`message msg { 15 required int64 a; 16 required int64 b; 17 optional group x { 18 required int64 c; 19 required int64 d; 20 } 21 required group y { 22 required int64 e; 23 } 24 } 25 `) 26 require.NoError(t, err) 27 buf := &bytes.Buffer{} 28 pw := NewFileWriter(buf, WithSchemaDefinition(schema)) 29 for i := 0; i < 10000; i++ { 30 data := map[string]interface{}{ 31 "a": rand.Int63(), 32 "b": rand.Int63(), 33 "x": map[string]interface{}{ 34 "c": rand.Int63(), 35 "d": rand.Int63(), 36 }, 37 "y": map[string]interface{}{ 38 "e": rand.Int63(), 39 }, 40 } 41 require.NoError(t, pw.AddData(data)) 42 if i%100 == 0 { 43 require.NoError(t, pw.FlushRowGroup()) 44 } 45 } 46 require.NoError(t, pw.Close()) 47 return buf.Bytes() 48 } 49 50 func TestByteReaderSelected(t *testing.T) { 51 r := buildTestStream(t) 52 pr, err := NewFileReader(bytes.NewReader(r), "a") 53 require.NoError(t, err) 54 55 for { 56 data, err := pr.NextRow() 57 if err == io.EOF { 58 break 59 } 60 require.NoError(t, err) 61 require.Equal(t, 2, len(data)) 62 _, ok := data["a"] 63 require.True(t, ok) 64 y, ok := data["y"] 65 require.True(t, ok) 66 require.Empty(t, y) 67 } 68 } 69 70 func TestByteReaderSelectedInner(t *testing.T) { 71 r := buildTestStream(t) 72 pr, err := NewFileReader(bytes.NewReader(r), "x.c") 73 require.NoError(t, err) 74 75 for { 76 data, err := pr.NextRow() 77 if err == io.EOF { 78 break 79 } 80 require.NoError(t, err) 81 require.Equal(t, 2, len(data)) 82 x, ok := data["x"].(map[string]interface{}) 83 require.True(t, ok) 84 require.Equal(t, 1, len(x)) 85 y, ok := data["y"] 86 require.True(t, ok) 87 require.Empty(t, y) 88 } 89 } 90 91 func TestByteReaderSelectedInnerByColumnPath(t *testing.T) { 92 r := buildTestStream(t) 93 pr, err := NewFileReaderWithOptions(bytes.NewReader(r), WithColumnPaths(ColumnPath{"x", "c"})) 94 require.NoError(t, err) 95 96 for { 97 data, err := pr.NextRow() 98 if err == io.EOF { 99 break 100 } 101 require.NoError(t, err) 102 require.Equal(t, 2, len(data)) 103 x, ok := data["x"].(map[string]interface{}) 104 require.True(t, ok) 105 require.Equal(t, 1, len(x)) 106 y, ok := data["y"] 107 require.True(t, ok) 108 require.Empty(t, y) 109 } 110 } 111 112 func TestByteReaderSelectedInnerFull(t *testing.T) { 113 r := buildTestStream(t) 114 pr, err := NewFileReader(bytes.NewReader(r), "x") 115 require.NoError(t, err) 116 117 require.NotNil(t, pr.GetColumnByName("x.c")) 118 119 for { 120 data, err := pr.NextRow() 121 if err == io.EOF { 122 break 123 } 124 require.NoError(t, err) 125 require.Equal(t, 2, len(data)) 126 x, ok := data["x"].(map[string]interface{}) 127 require.True(t, ok) 128 require.Equal(t, 2, len(x)) 129 y, ok := data["y"] 130 require.True(t, ok) 131 require.Empty(t, y) 132 } 133 } 134 135 func TestByteReaderSelectedInnerFullByColumnPath(t *testing.T) { 136 r := buildTestStream(t) 137 pr, err := NewFileReaderWithOptions(bytes.NewReader(r), WithColumnPaths(ColumnPath{"x"})) 138 require.NoError(t, err) 139 140 require.NotNil(t, pr.GetColumnByPath(ColumnPath{"x", "c"})) 141 142 for { 143 data, err := pr.NextRow() 144 if err == io.EOF { 145 break 146 } 147 require.NoError(t, err) 148 require.Equal(t, 2, len(data)) 149 x, ok := data["x"].(map[string]interface{}) 150 require.True(t, ok) 151 require.Equal(t, 2, len(x)) 152 y, ok := data["y"] 153 require.True(t, ok) 154 require.Empty(t, y) 155 } 156 } 157 158 func TestByteReaderSelectedInnerFullSetSelectedColumns(t *testing.T) { 159 r := buildTestStream(t) 160 pr, err := NewFileReaderWithOptions(bytes.NewReader(r)) 161 require.NoError(t, err) 162 163 pr.SetSelectedColumns("x") 164 165 for { 166 data, err := pr.NextRow() 167 if err == io.EOF { 168 break 169 } 170 require.NoError(t, err) 171 require.Equal(t, 2, len(data)) 172 x, ok := data["x"].(map[string]interface{}) 173 require.True(t, ok) 174 require.Equal(t, 2, len(x)) 175 y, ok := data["y"] 176 require.True(t, ok) 177 require.Empty(t, y) 178 } 179 } 180 181 func TestByteReaderSelectedInnerFullSetSelectedColumnsByPath(t *testing.T) { 182 r := buildTestStream(t) 183 pr, err := NewFileReaderWithOptions(bytes.NewReader(r)) 184 require.NoError(t, err) 185 186 pr.SetSelectedColumnsByPath(ColumnPath{"x"}) 187 188 for { 189 data, err := pr.NextRow() 190 if err == io.EOF { 191 break 192 } 193 require.NoError(t, err) 194 require.Equal(t, 2, len(data)) 195 x, ok := data["x"].(map[string]interface{}) 196 require.True(t, ok) 197 require.Equal(t, 2, len(x)) 198 y, ok := data["y"] 199 require.True(t, ok) 200 require.Empty(t, y) 201 } 202 } 203 204 func TestIssue60(t *testing.T) { 205 sd, err := parquetschema.ParseSchemaDefinition(`message test { 206 required group population (LIST){ 207 repeated group list { 208 optional int64 element; 209 } 210 } 211 }`) 212 require.NoError(t, err) 213 214 var buf bytes.Buffer 215 fw := NewFileWriter(&buf, WithSchemaDefinition(sd)) 216 217 err = fw.AddData(map[string]interface{}{ 218 "population": map[string]interface{}{ 219 "list": []map[string]interface{}{ 220 {"element": int64(23)}, 221 {"element": nil}, 222 {"element": int64(42)}, 223 }, 224 }, 225 }) 226 require.NoError(t, err) 227 228 require.NoError(t, fw.Close()) 229 230 r, err := NewFileReader(bytes.NewReader(buf.Bytes())) 231 require.NoError(t, err) 232 233 row, err := r.NextRow() 234 require.NoError(t, err) 235 236 require.Equal(t, map[string]interface{}{ 237 "population": map[string]interface{}{ 238 "list": []map[string]interface{}{ 239 {"element": int64(23)}, 240 {}, 241 {"element": int64(42)}, 242 }, 243 }, 244 }, row) 245 246 t.Logf("row = %#v", row) 247 }