github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/writer_go18_test.go (about) 1 //go:build go1.18 2 3 package parquet_test 4 5 import ( 6 "bytes" 7 "io" 8 "math/rand" 9 "reflect" 10 "testing" 11 12 "github.com/segmentio/parquet-go" 13 ) 14 15 func BenchmarkGenericWriter(b *testing.B) { 16 benchmarkGenericWriter[benchmarkRowType](b) 17 benchmarkGenericWriter[booleanColumn](b) 18 benchmarkGenericWriter[int32Column](b) 19 benchmarkGenericWriter[int64Column](b) 20 benchmarkGenericWriter[floatColumn](b) 21 benchmarkGenericWriter[doubleColumn](b) 22 benchmarkGenericWriter[byteArrayColumn](b) 23 benchmarkGenericWriter[fixedLenByteArrayColumn](b) 24 benchmarkGenericWriter[stringColumn](b) 25 benchmarkGenericWriter[indexedStringColumn](b) 26 benchmarkGenericWriter[uuidColumn](b) 27 benchmarkGenericWriter[timeColumn](b) 28 benchmarkGenericWriter[timeInMillisColumn](b) 29 benchmarkGenericWriter[mapColumn](b) 30 benchmarkGenericWriter[decimalColumn](b) 31 benchmarkGenericWriter[contact](b) 32 benchmarkGenericWriter[paddedBooleanColumn](b) 33 benchmarkGenericWriter[optionalInt32Column](b) 34 benchmarkGenericWriter[repeatedInt32Column](b) 35 } 36 37 func benchmarkGenericWriter[Row generator[Row]](b *testing.B) { 38 var model Row 39 b.Run(reflect.TypeOf(model).Name(), func(b *testing.B) { 40 prng := rand.New(rand.NewSource(0)) 41 rows := make([]Row, benchmarkNumRows) 42 for i := range rows { 43 rows[i] = rows[i].generate(prng) 44 } 45 46 b.Run("go1.17", func(b *testing.B) { 47 writer := parquet.NewWriter(io.Discard, parquet.SchemaOf(rows[0])) 48 i := 0 49 benchmarkRowsPerSecond(b, func() int { 50 for j := 0; j < benchmarkRowsPerStep; j++ { 51 if err := writer.Write(&rows[i]); err != nil { 52 b.Fatal(err) 53 } 54 } 55 56 i += benchmarkRowsPerStep 57 i %= benchmarkNumRows 58 59 if i == 0 { 60 writer.Close() 61 writer.Reset(io.Discard) 62 } 63 return benchmarkRowsPerStep 64 }) 65 }) 66 67 b.Run("go1.18", func(b *testing.B) { 68 writer := parquet.NewGenericWriter[Row](io.Discard) 69 i := 0 70 benchmarkRowsPerSecond(b, func() int { 71 n, err := writer.Write(rows[i : i+benchmarkRowsPerStep]) 72 if err != nil { 73 b.Fatal(err) 74 } 75 76 i += benchmarkRowsPerStep 77 i %= benchmarkNumRows 78 79 if i == 0 { 80 writer.Close() 81 writer.Reset(io.Discard) 82 } 83 return n 84 }) 85 }) 86 }) 87 } 88 89 func TestIssue272(t *testing.T) { 90 type T2 struct { 91 X string `parquet:",dict,optional"` 92 } 93 94 type T1 struct { 95 TA *T2 96 TB *T2 97 } 98 99 type T struct { 100 T1 *T1 101 } 102 103 const nRows = 1 104 105 row := T{ 106 T1: &T1{ 107 TA: &T2{ 108 X: "abc", 109 }, 110 }, 111 } 112 113 rows := make([]T, nRows) 114 for i := range rows { 115 rows[i] = row 116 } 117 118 b := new(bytes.Buffer) 119 w := parquet.NewGenericWriter[T](b) 120 121 if _, err := w.Write(rows); err != nil { 122 t.Fatal(err) 123 } 124 if err := w.Close(); err != nil { 125 t.Fatal(err) 126 } 127 128 f := bytes.NewReader(b.Bytes()) 129 r := parquet.NewGenericReader[T](f) 130 131 parquetRows := make([]parquet.Row, nRows) 132 n, err := r.ReadRows(parquetRows) 133 if err != nil && err != io.EOF { 134 t.Fatal(err) 135 } 136 if n != nRows { 137 t.Fatalf("wrong number of rows read: want=%d got=%d", nRows, n) 138 } 139 for _, r := range parquetRows { 140 if d := r[0].DefinitionLevel(); d != 3 { 141 t.Errorf("wrong definition level for column 0: %d", d) 142 } 143 if d := r[1].DefinitionLevel(); d != 1 { 144 t.Errorf("wrong definition level for column 1: %d", d) 145 } 146 } 147 } 148 149 func TestIssue279(t *testing.T) { 150 type T2 struct { 151 Id int `parquet:",plain,optional"` 152 Name string `parquet:",plain,optional"` 153 } 154 155 type T1 struct { 156 TA []*T2 157 } 158 159 type T struct { 160 T1 *T1 161 } 162 163 const nRows = 1 164 165 row := T{ 166 T1: &T1{ 167 TA: []*T2{ 168 { 169 Id: 43, 170 Name: "john", 171 }, 172 }, 173 }, 174 } 175 176 rows := make([]T, nRows) 177 for i := range rows { 178 rows[i] = row 179 } 180 181 b := new(bytes.Buffer) 182 w := parquet.NewGenericWriter[T](b) 183 184 if _, err := w.Write(rows); err != nil { 185 t.Fatal(err) 186 } 187 if err := w.Close(); err != nil { 188 t.Fatal(err) 189 } 190 191 f := bytes.NewReader(b.Bytes()) 192 r := parquet.NewGenericReader[T](f) 193 194 parquetRows := make([]parquet.Row, nRows) 195 n, err := r.ReadRows(parquetRows) 196 if err != nil && err != io.EOF { 197 t.Fatal(err) 198 } 199 if n != nRows { 200 t.Fatalf("wrong number of rows read: want=%d got=%d", nRows, n) 201 } 202 for _, r := range parquetRows { 203 if d := r[0].DefinitionLevel(); d != 3 { 204 t.Errorf("wrong definition level for column 0: %d", d) 205 } 206 if d := r[1].DefinitionLevel(); d != 3 { 207 t.Errorf("wrong definition level for column 1: %d", d) 208 } 209 } 210 } 211 212 func TestIssue302(t *testing.T) { 213 tests := []struct { 214 name string 215 fn func(t *testing.T) 216 }{ 217 { 218 name: "SimpleMap", 219 fn: func(t *testing.T) { 220 type M map[string]int 221 222 type T struct { 223 M M `parquet:","` 224 } 225 226 b := new(bytes.Buffer) 227 _ = parquet.NewGenericWriter[T](b) 228 229 }, 230 }, 231 232 { 233 name: "MapWithValueTag", 234 fn: func(t *testing.T) { 235 type M map[string]int 236 237 type T struct { 238 M M `parquet:"," parquet-value:",zstd"` 239 } 240 241 b := new(bytes.Buffer) 242 _ = parquet.NewGenericWriter[T](b) 243 244 }, 245 }, 246 247 { 248 name: "MapWithOptionalTag", 249 fn: func(t *testing.T) { 250 type M map[string]int 251 252 type T struct { 253 M M `parquet:",optional"` 254 } 255 256 b := new(bytes.Buffer) 257 w := parquet.NewGenericWriter[T](b) 258 expect := []T{ 259 { 260 M: M{ 261 "Holden": 1, 262 "Naomi": 2, 263 }, 264 }, 265 { 266 M: nil, 267 }, 268 { 269 M: M{ 270 "Naomi": 1, 271 "Holden": 2, 272 }, 273 }, 274 } 275 _, err := w.Write(expect) 276 if err != nil { 277 t.Fatal(err) 278 } 279 if err = w.Close(); err != nil { 280 t.Fatal(err) 281 } 282 283 bufReader := bytes.NewReader(b.Bytes()) 284 r := parquet.NewGenericReader[T](bufReader) 285 values := make([]T, 3) 286 _, err = r.Read(values) 287 if !reflect.DeepEqual(expect, values) { 288 t.Fatalf("values do not match.\n\texpect: %v\n\tactual: %v", expect, values) 289 } 290 }, 291 }, 292 } 293 294 for _, test := range tests { 295 t.Run(test.name, test.fn) 296 } 297 } 298 299 func TestIssue347Writer(t *testing.T) { 300 type TestType struct { 301 Key int 302 } 303 304 b := new(bytes.Buffer) 305 // instantiating with concrete type shouldn't panic 306 _ = parquet.NewGenericWriter[TestType](b) 307 308 // instantiating with schema and interface type parameter shouldn't panic 309 schema := parquet.SchemaOf(TestType{}) 310 _ = parquet.NewGenericWriter[any](b, schema) 311 312 defer func() { 313 if r := recover(); r == nil { 314 t.Errorf("instantiating generic buffer without schema and with interface " + 315 "type parameter should panic") 316 } 317 }() 318 _ = parquet.NewGenericWriter[any](b) 319 } 320 321 func TestIssue375(t *testing.T) { 322 type Row struct{ FirstName, LastName string } 323 324 output := new(bytes.Buffer) 325 writer := parquet.NewGenericWriter[Row](output, parquet.MaxRowsPerRowGroup(10)) 326 327 rows := make([]Row, 100) 328 for i := range rows { 329 rows[i] = Row{ 330 FirstName: "0123456789"[i%10 : i%10+1], 331 LastName: "foo", 332 } 333 } 334 335 n, err := writer.Write(rows) 336 if err != nil { 337 t.Fatal(err) 338 } 339 if n != len(rows) { 340 t.Fatal("wrong number of rows written:", n) 341 } 342 343 if err := writer.Close(); err != nil { 344 t.Fatal(err) 345 } 346 347 f, err := parquet.OpenFile(bytes.NewReader(output.Bytes()), int64(output.Len())) 348 if err != nil { 349 t.Fatal(err) 350 } 351 352 rowGroups := f.RowGroups() 353 if len(rowGroups) != 10 { 354 t.Errorf("wrong number of row groups in parquet file: want=10 got=%d", len(rowGroups)) 355 } 356 } 357 358 func TestGenericSetKeyValueMetadata(t *testing.T) { 359 testKey := "test-key" 360 testValue := "test-value" 361 362 type Row struct{ FirstName, LastName string } 363 364 output := new(bytes.Buffer) 365 writer := parquet.NewGenericWriter[Row](output, parquet.MaxRowsPerRowGroup(10)) 366 367 rows := []Row{ 368 {FirstName: "First", LastName: "Last"}, 369 } 370 371 _, err := writer.Write(rows) 372 if err != nil { 373 t.Fatal(err) 374 } 375 376 writer.SetKeyValueMetadata(testKey, testValue) 377 378 err = writer.Close() 379 if err != nil { 380 t.Fatal(err) 381 } 382 383 f, err := parquet.OpenFile(bytes.NewReader(output.Bytes()), int64(output.Len())) 384 if err != nil { 385 t.Fatal(err) 386 } 387 388 value, ok := f.Lookup(testKey) 389 if !ok { 390 t.Fatalf("key/value metadata should have included %q", testKey) 391 } 392 if value != testValue { 393 t.Errorf("expected %q, got %q", testValue, value) 394 } 395 }