github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/row.go (about) 1 package parquet 2 3 import ( 4 "errors" 5 "fmt" 6 "io" 7 "reflect" 8 ) 9 10 const ( 11 defaultRowBufferSize = 42 12 ) 13 14 // Row represents a parquet row as a slice of values. 15 // 16 // Each value should embed a column index, repetition level, and definition 17 // level allowing the program to determine how to reconstruct the original 18 // object from the row. 19 type Row []Value 20 21 // MakeRow constructs a Row from a list of column values. 22 // 23 // The function panics if the column indexes of values in each column do not 24 // match their position in the argument list. 25 func MakeRow(columns ...[]Value) Row { return AppendRow(nil, columns...) } 26 27 // AppendRow appends to row the given list of column values. 28 // 29 // AppendRow can be used to construct a Row value from columns, while retaining 30 // the underlying memory buffer to avoid reallocation; for example: 31 // 32 // The function panics if the column indexes of values in each column do not 33 // match their position in the argument list. 34 func AppendRow(row Row, columns ...[]Value) Row { 35 numValues := 0 36 37 for expectedColumnIndex, column := range columns { 38 numValues += len(column) 39 40 for _, value := range column { 41 if value.columnIndex != ^int16(expectedColumnIndex) { 42 panic(fmt.Sprintf("value of column %d has column index %d", expectedColumnIndex, value.Column())) 43 } 44 } 45 } 46 47 if capacity := cap(row) - len(row); capacity < numValues { 48 row = append(make(Row, 0, len(row)+numValues), row...) 49 } 50 51 return appendRow(row, columns) 52 } 53 54 func appendRow(row Row, columns [][]Value) Row { 55 for _, column := range columns { 56 row = append(row, column...) 57 } 58 return row 59 } 60 61 // Clone creates a copy of the row which shares no pointers. 62 // 63 // This method is useful to capture rows after a call to RowReader.ReadRows when 64 // values need to be retained before the next call to ReadRows or after the lifespan 65 // of the reader. 66 func (row Row) Clone() Row { 67 clone := make(Row, len(row)) 68 for i := range row { 69 clone[i] = row[i].Clone() 70 } 71 return clone 72 } 73 74 // Equal returns true if row and other contain the same sequence of values. 75 func (row Row) Equal(other Row) bool { 76 if len(row) != len(other) { 77 return false 78 } 79 for i := range row { 80 if !Equal(row[i], other[i]) { 81 return false 82 } 83 if row[i].repetitionLevel != other[i].repetitionLevel { 84 return false 85 } 86 if row[i].definitionLevel != other[i].definitionLevel { 87 return false 88 } 89 if row[i].columnIndex != other[i].columnIndex { 90 return false 91 } 92 } 93 return true 94 } 95 96 // Range calls f for each column of row. 97 func (row Row) Range(f func(columnIndex int, columnValues []Value) bool) { 98 columnIndex := 0 99 100 for i := 0; i < len(row); { 101 j := i + 1 102 103 for j < len(row) && row[j].columnIndex == ^int16(columnIndex) { 104 j++ 105 } 106 107 if !f(columnIndex, row[i:j:j]) { 108 break 109 } 110 111 columnIndex++ 112 i = j 113 } 114 } 115 116 // RowSeeker is an interface implemented by readers of parquet rows which can be 117 // positioned at a specific row index. 118 type RowSeeker interface { 119 // Positions the stream on the given row index. 120 // 121 // Some implementations of the interface may only allow seeking forward. 122 // 123 // The method returns io.ErrClosedPipe if the stream had already been closed. 124 SeekToRow(int64) error 125 } 126 127 // RowReader reads a sequence of parquet rows. 128 type RowReader interface { 129 // ReadRows reads rows from the reader, returning the number of rows read 130 // into the buffer, and any error that occurred. Note that the rows read 131 // into the buffer are not safe for reuse after a subsequent call to 132 // ReadRows. Callers that want to reuse rows must copy the rows using Clone. 133 // 134 // When all rows have been read, the reader returns io.EOF to indicate the 135 // end of the sequence. It is valid for the reader to return both a non-zero 136 // number of rows and a non-nil error (including io.EOF). 137 // 138 // The buffer of rows passed as argument will be used to store values of 139 // each row read from the reader. If the rows are not nil, the backing array 140 // of the slices will be used as an optimization to avoid re-allocating new 141 // arrays. 142 // 143 // The application is expected to handle the case where ReadRows returns 144 // less rows than requested and no error, by looking at the first returned 145 // value from ReadRows, which is the number of rows that were read. 146 ReadRows([]Row) (int, error) 147 } 148 149 // RowReaderFrom reads parquet rows from reader. 150 type RowReaderFrom interface { 151 ReadRowsFrom(RowReader) (int64, error) 152 } 153 154 // RowReaderWithSchema is an extension of the RowReader interface which 155 // advertises the schema of rows returned by ReadRow calls. 156 type RowReaderWithSchema interface { 157 RowReader 158 Schema() *Schema 159 } 160 161 // RowReadSeeker is an interface implemented by row readers which support 162 // seeking to arbitrary row positions. 163 type RowReadSeeker interface { 164 RowReader 165 RowSeeker 166 } 167 168 // RowWriter writes parquet rows to an underlying medium. 169 type RowWriter interface { 170 // Writes rows to the writer, returning the number of rows written and any 171 // error that occurred. 172 // 173 // Because columnar operations operate on independent columns of values, 174 // writes of rows may not be atomic operations, and could result in some 175 // rows being partially written. The method returns the number of rows that 176 // were successfully written, but if an error occurs, values of the row(s) 177 // that failed to be written may have been partially committed to their 178 // columns. For that reason, applications should consider a write error as 179 // fatal and assume that they need to discard the state, they cannot retry 180 // the write nor recover the underlying file. 181 WriteRows([]Row) (int, error) 182 } 183 184 // RowWriterTo writes parquet rows to a writer. 185 type RowWriterTo interface { 186 WriteRowsTo(RowWriter) (int64, error) 187 } 188 189 // RowWriterWithSchema is an extension of the RowWriter interface which 190 // advertises the schema of rows expected to be passed to WriteRow calls. 191 type RowWriterWithSchema interface { 192 RowWriter 193 Schema() *Schema 194 } 195 196 // RowReaderFunc is a function type implementing the RowReader interface. 197 type RowReaderFunc func([]Row) (int, error) 198 199 func (f RowReaderFunc) ReadRows(rows []Row) (int, error) { return f(rows) } 200 201 // RowWriterFunc is a function type implementing the RowWriter interface. 202 type RowWriterFunc func([]Row) (int, error) 203 204 func (f RowWriterFunc) WriteRows(rows []Row) (int, error) { return f(rows) } 205 206 // MultiRowWriter constructs a RowWriter which dispatches writes to all the 207 // writers passed as arguments. 208 // 209 // When writing rows, if any of the writers returns an error, the operation is 210 // aborted and the error returned. If one of the writers did not error, but did 211 // not write all the rows, the operation is aborted and io.ErrShortWrite is 212 // returned. 213 // 214 // Rows are written sequentially to each writer in the order they are given to 215 // this function. 216 func MultiRowWriter(writers ...RowWriter) RowWriter { 217 m := &multiRowWriter{writers: make([]RowWriter, len(writers))} 218 copy(m.writers, writers) 219 return m 220 } 221 222 type multiRowWriter struct{ writers []RowWriter } 223 224 func (m *multiRowWriter) WriteRows(rows []Row) (int, error) { 225 for _, w := range m.writers { 226 n, err := w.WriteRows(rows) 227 if err != nil { 228 return n, err 229 } 230 if n != len(rows) { 231 return n, io.ErrShortWrite 232 } 233 } 234 return len(rows), nil 235 } 236 237 type forwardRowSeeker struct { 238 rows RowReader 239 seek int64 240 index int64 241 } 242 243 func (r *forwardRowSeeker) ReadRows(rows []Row) (int, error) { 244 for { 245 n, err := r.rows.ReadRows(rows) 246 247 if n > 0 && r.index < r.seek { 248 skip := r.seek - r.index 249 r.index += int64(n) 250 if skip >= int64(n) { 251 continue 252 } 253 254 for i, j := 0, int(skip); j < n; i++ { 255 rows[i] = append(rows[i][:0], rows[j]...) 256 } 257 258 n -= int(skip) 259 } 260 261 return n, err 262 } 263 } 264 265 func (r *forwardRowSeeker) SeekToRow(rowIndex int64) error { 266 if rowIndex >= r.index { 267 r.seek = rowIndex 268 return nil 269 } 270 return fmt.Errorf( 271 "SeekToRow: %T does not implement parquet.RowSeeker: cannot seek backward from row %d to %d", 272 r.rows, 273 r.index, 274 rowIndex, 275 ) 276 } 277 278 // CopyRows copies rows from src to dst. 279 // 280 // The underlying types of src and dst are tested to determine if they expose 281 // information about the schema of rows that are read and expected to be 282 // written. If the schema information are available but do not match, the 283 // function will attempt to automatically convert the rows from the source 284 // schema to the destination. 285 // 286 // As an optimization, the src argument may implement RowWriterTo to bypass 287 // the default row copy logic and provide its own. The dst argument may also 288 // implement RowReaderFrom for the same purpose. 289 // 290 // The function returns the number of rows written, or any error encountered 291 // other than io.EOF. 292 func CopyRows(dst RowWriter, src RowReader) (int64, error) { 293 return copyRows(dst, src, nil) 294 } 295 296 func copyRows(dst RowWriter, src RowReader, buf []Row) (written int64, err error) { 297 targetSchema := targetSchemaOf(dst) 298 sourceSchema := sourceSchemaOf(src) 299 300 if targetSchema != nil && sourceSchema != nil { 301 if !nodesAreEqual(targetSchema, sourceSchema) { 302 conv, err := Convert(targetSchema, sourceSchema) 303 if err != nil { 304 return 0, err 305 } 306 // The conversion effectively disables a potential optimization 307 // if the source reader implemented RowWriterTo. It is a trade off 308 // we are making to optimize for safety rather than performance. 309 // 310 // Entering this code path should not be the common case tho, it is 311 // most often used when parquet schemas are evolving, but we expect 312 // that the majority of files of an application to be sharing a 313 // common schema. 314 src = ConvertRowReader(src, conv) 315 } 316 } 317 318 if wt, ok := src.(RowWriterTo); ok { 319 return wt.WriteRowsTo(dst) 320 } 321 322 if rf, ok := dst.(RowReaderFrom); ok { 323 return rf.ReadRowsFrom(src) 324 } 325 326 if len(buf) == 0 { 327 buf = make([]Row, defaultRowBufferSize) 328 } 329 330 defer clearRows(buf) 331 332 for { 333 rn, err := src.ReadRows(buf) 334 335 if rn > 0 { 336 wn, err := dst.WriteRows(buf[:rn]) 337 if err != nil { 338 return written, err 339 } 340 341 written += int64(wn) 342 } 343 344 if err != nil { 345 if errors.Is(err, io.EOF) { 346 err = nil 347 } 348 return written, err 349 } 350 351 if rn == 0 { 352 return written, io.ErrNoProgress 353 } 354 } 355 } 356 357 func makeRows(n int) []Row { 358 buf := make([]Value, n) 359 row := make([]Row, n) 360 for i := range row { 361 row[i] = buf[i : i : i+1] 362 } 363 return row 364 } 365 366 func clearRows(rows []Row) { 367 for i, values := range rows { 368 clearValues(values) 369 rows[i] = values[:0] 370 } 371 } 372 373 func sourceSchemaOf(r RowReader) *Schema { 374 if rrs, ok := r.(RowReaderWithSchema); ok { 375 return rrs.Schema() 376 } 377 return nil 378 } 379 380 func targetSchemaOf(w RowWriter) *Schema { 381 if rws, ok := w.(RowWriterWithSchema); ok { 382 return rws.Schema() 383 } 384 return nil 385 } 386 387 // ============================================================================= 388 // Functions returning closures are marked with "go:noinline" below to prevent 389 // losing naming information of the closure in stack traces. 390 // 391 // Because some of the functions are very short (simply return a closure), the 392 // compiler inlines when at their call site, which result in the closure being 393 // named something like parquet.deconstructFuncOf.func2 instead of the original 394 // parquet.deconstructFuncOfLeaf.func1; the latter being much more meaningful 395 // when reading CPU or memory profiles. 396 // ============================================================================= 397 398 type levels struct { 399 repetitionDepth byte 400 repetitionLevel byte 401 definitionLevel byte 402 } 403 404 // deconstructFunc accepts a row, the current levels, the value to deserialize 405 // the current column onto, and returns the row minus the deserialied value(s) 406 // It recurses until it hits a leaf node, then deserializes that value 407 // individually as the base case. 408 type deconstructFunc func([][]Value, levels, reflect.Value) 409 410 func deconstructFuncOf(columnIndex int16, node Node) (int16, deconstructFunc) { 411 switch { 412 case node.Optional(): 413 return deconstructFuncOfOptional(columnIndex, node) 414 case node.Repeated(): 415 return deconstructFuncOfRepeated(columnIndex, node) 416 case isList(node): 417 return deconstructFuncOfList(columnIndex, node) 418 case isMap(node): 419 return deconstructFuncOfMap(columnIndex, node) 420 default: 421 return deconstructFuncOfRequired(columnIndex, node) 422 } 423 } 424 425 //go:noinline 426 func deconstructFuncOfOptional(columnIndex int16, node Node) (int16, deconstructFunc) { 427 columnIndex, deconstruct := deconstructFuncOf(columnIndex, Required(node)) 428 return columnIndex, func(columns [][]Value, levels levels, value reflect.Value) { 429 if value.IsValid() { 430 if value.IsZero() { 431 value = reflect.Value{} 432 } else { 433 if value.Kind() == reflect.Ptr { 434 value = value.Elem() 435 } 436 levels.definitionLevel++ 437 } 438 } 439 deconstruct(columns, levels, value) 440 } 441 } 442 443 //go:noinline 444 func deconstructFuncOfRepeated(columnIndex int16, node Node) (int16, deconstructFunc) { 445 columnIndex, deconstruct := deconstructFuncOf(columnIndex, Required(node)) 446 return columnIndex, func(columns [][]Value, levels levels, value reflect.Value) { 447 if value.Kind() == reflect.Interface { 448 value = value.Elem() 449 } 450 451 if !value.IsValid() || value.Len() == 0 { 452 deconstruct(columns, levels, reflect.Value{}) 453 return 454 } 455 456 levels.repetitionDepth++ 457 levels.definitionLevel++ 458 459 for i, n := 0, value.Len(); i < n; i++ { 460 deconstruct(columns, levels, value.Index(i)) 461 levels.repetitionLevel = levels.repetitionDepth 462 } 463 } 464 } 465 466 func deconstructFuncOfRequired(columnIndex int16, node Node) (int16, deconstructFunc) { 467 switch { 468 case node.Leaf(): 469 return deconstructFuncOfLeaf(columnIndex, node) 470 default: 471 return deconstructFuncOfGroup(columnIndex, node) 472 } 473 } 474 475 func deconstructFuncOfList(columnIndex int16, node Node) (int16, deconstructFunc) { 476 return deconstructFuncOf(columnIndex, Repeated(listElementOf(node))) 477 } 478 479 //go:noinline 480 func deconstructFuncOfMap(columnIndex int16, node Node) (int16, deconstructFunc) { 481 keyValue := mapKeyValueOf(node) 482 keyValueType := keyValue.GoType() 483 keyValueElem := keyValueType.Elem() 484 keyType := keyValueElem.Field(0).Type 485 valueType := keyValueElem.Field(1).Type 486 nextColumnIndex, deconstruct := deconstructFuncOf(columnIndex, schemaOf(keyValueElem)) 487 return nextColumnIndex, func(columns [][]Value, levels levels, mapValue reflect.Value) { 488 if !mapValue.IsValid() || mapValue.Len() == 0 { 489 deconstruct(columns, levels, reflect.Value{}) 490 return 491 } 492 493 levels.repetitionDepth++ 494 levels.definitionLevel++ 495 496 elem := reflect.New(keyValueElem).Elem() 497 k := elem.Field(0) 498 v := elem.Field(1) 499 500 for _, key := range mapValue.MapKeys() { 501 k.Set(key.Convert(keyType)) 502 v.Set(mapValue.MapIndex(key).Convert(valueType)) 503 deconstruct(columns, levels, elem) 504 levels.repetitionLevel = levels.repetitionDepth 505 } 506 } 507 } 508 509 //go:noinline 510 func deconstructFuncOfGroup(columnIndex int16, node Node) (int16, deconstructFunc) { 511 fields := node.Fields() 512 funcs := make([]deconstructFunc, len(fields)) 513 for i, field := range fields { 514 columnIndex, funcs[i] = deconstructFuncOf(columnIndex, field) 515 } 516 return columnIndex, func(columns [][]Value, levels levels, value reflect.Value) { 517 if value.IsValid() { 518 for i, f := range funcs { 519 f(columns, levels, fields[i].Value(value)) 520 } 521 } else { 522 for _, f := range funcs { 523 f(columns, levels, value) 524 } 525 } 526 } 527 } 528 529 //go:noinline 530 func deconstructFuncOfLeaf(columnIndex int16, node Node) (int16, deconstructFunc) { 531 if columnIndex > MaxColumnIndex { 532 panic("row cannot be deconstructed because it has more than 127 columns") 533 } 534 typ := node.Type() 535 kind := typ.Kind() 536 lt := typ.LogicalType() 537 valueColumnIndex := ^columnIndex 538 return columnIndex + 1, func(columns [][]Value, levels levels, value reflect.Value) { 539 v := Value{} 540 541 if value.IsValid() { 542 v = makeValue(kind, lt, value) 543 } 544 545 v.repetitionLevel = levels.repetitionLevel 546 v.definitionLevel = levels.definitionLevel 547 v.columnIndex = valueColumnIndex 548 549 columns[columnIndex] = append(columns[columnIndex], v) 550 } 551 } 552 553 // "reconstructX" turns a Go value into a Go representation of a Parquet series 554 // of values 555 556 type reconstructFunc func(reflect.Value, levels, [][]Value) error 557 558 func reconstructFuncOf(columnIndex int16, node Node) (int16, reconstructFunc) { 559 switch { 560 case node.Optional(): 561 return reconstructFuncOfOptional(columnIndex, node) 562 case node.Repeated(): 563 return reconstructFuncOfRepeated(columnIndex, node) 564 case isList(node): 565 return reconstructFuncOfList(columnIndex, node) 566 case isMap(node): 567 return reconstructFuncOfMap(columnIndex, node) 568 default: 569 return reconstructFuncOfRequired(columnIndex, node) 570 } 571 } 572 573 //go:noinline 574 func reconstructFuncOfOptional(columnIndex int16, node Node) (int16, reconstructFunc) { 575 // We convert the optional func to required so that we eventually reach the 576 // leaf base-case. We're still using the heuristics of optional in the 577 // returned closure (see levels.definitionLevel++), but we don't actually do 578 // deserialization here, that happens in the leaf function, hence this line. 579 nextColumnIndex, reconstruct := reconstructFuncOf(columnIndex, Required(node)) 580 581 return nextColumnIndex, func(value reflect.Value, levels levels, columns [][]Value) error { 582 levels.definitionLevel++ 583 584 if columns[0][0].definitionLevel < levels.definitionLevel { 585 value.Set(reflect.Zero(value.Type())) 586 return nil 587 } 588 589 if value.Kind() == reflect.Ptr { 590 if value.IsNil() { 591 value.Set(reflect.New(value.Type().Elem())) 592 } 593 value = value.Elem() 594 } 595 596 return reconstruct(value, levels, columns) 597 } 598 } 599 600 func setMakeSlice(v reflect.Value, n int) reflect.Value { 601 t := v.Type() 602 if t.Kind() == reflect.Interface { 603 t = reflect.TypeOf(([]interface{})(nil)) 604 } 605 s := reflect.MakeSlice(t, n, n) 606 v.Set(s) 607 return s 608 } 609 610 //go:noinline 611 func reconstructFuncOfRepeated(columnIndex int16, node Node) (int16, reconstructFunc) { 612 nextColumnIndex, reconstruct := reconstructFuncOf(columnIndex, Required(node)) 613 return nextColumnIndex, func(value reflect.Value, levels levels, columns [][]Value) error { 614 levels.repetitionDepth++ 615 levels.definitionLevel++ 616 617 if columns[0][0].definitionLevel < levels.definitionLevel { 618 setMakeSlice(value, 0) 619 return nil 620 } 621 622 values := make([][]Value, len(columns)) 623 column := columns[0] 624 n := 0 625 626 for i, column := range columns { 627 values[i] = column[0:0:len(column)] 628 } 629 630 for i := 0; i < len(column); { 631 i++ 632 n++ 633 634 for i < len(column) && column[i].repetitionLevel > levels.repetitionDepth { 635 i++ 636 } 637 } 638 639 value = setMakeSlice(value, n) 640 641 for i := 0; i < n; i++ { 642 for j, column := range values { 643 column = column[:cap(column)] 644 if len(column) == 0 { 645 continue 646 } 647 648 k := 1 649 for k < len(column) && column[k].repetitionLevel > levels.repetitionDepth { 650 k++ 651 } 652 653 values[j] = column[:k] 654 } 655 656 if err := reconstruct(value.Index(i), levels, values); err != nil { 657 return err 658 } 659 660 for j, column := range values { 661 values[j] = column[len(column):len(column):cap(column)] 662 } 663 664 levels.repetitionLevel = levels.repetitionDepth 665 } 666 667 return nil 668 } 669 } 670 671 func reconstructFuncOfRequired(columnIndex int16, node Node) (int16, reconstructFunc) { 672 switch { 673 case node.Leaf(): 674 return reconstructFuncOfLeaf(columnIndex, node) 675 default: 676 return reconstructFuncOfGroup(columnIndex, node) 677 } 678 } 679 680 func reconstructFuncOfList(columnIndex int16, node Node) (int16, reconstructFunc) { 681 return reconstructFuncOf(columnIndex, Repeated(listElementOf(node))) 682 } 683 684 //go:noinline 685 func reconstructFuncOfMap(columnIndex int16, node Node) (int16, reconstructFunc) { 686 keyValue := mapKeyValueOf(node) 687 keyValueType := keyValue.GoType() 688 keyValueElem := keyValueType.Elem() 689 keyValueZero := reflect.Zero(keyValueElem) 690 nextColumnIndex, reconstruct := reconstructFuncOf(columnIndex, schemaOf(keyValueElem)) 691 return nextColumnIndex, func(value reflect.Value, levels levels, columns [][]Value) error { 692 levels.repetitionDepth++ 693 levels.definitionLevel++ 694 695 if columns[0][0].definitionLevel < levels.definitionLevel { 696 value.Set(reflect.MakeMap(value.Type())) 697 return nil 698 } 699 700 values := make([][]Value, len(columns)) 701 column := columns[0] 702 t := value.Type() 703 if t.Kind() == reflect.Interface { 704 t = reflect.TypeOf((map[string]any)(nil)) 705 } 706 k := t.Key() 707 v := t.Elem() 708 n := 0 709 710 for i, column := range columns { 711 values[i] = column[0:0:len(column)] 712 } 713 714 for i := 0; i < len(column); { 715 i++ 716 n++ 717 718 for i < len(column) && column[i].repetitionLevel > levels.repetitionDepth { 719 i++ 720 } 721 } 722 723 if value.IsNil() { 724 m := reflect.MakeMapWithSize(t, n) 725 value.Set(m) 726 value = m // track map instead of interface{} for read[any]() 727 } 728 729 elem := reflect.New(keyValueElem).Elem() 730 for i := 0; i < n; i++ { 731 for j, column := range values { 732 column = column[:cap(column)] 733 k := 1 734 735 for k < len(column) && column[k].repetitionLevel > levels.repetitionDepth { 736 k++ 737 } 738 739 values[j] = column[:k] 740 } 741 742 if err := reconstruct(elem, levels, values); err != nil { 743 return err 744 } 745 746 for j, column := range values { 747 values[j] = column[len(column):len(column):cap(column)] 748 } 749 750 value.SetMapIndex(elem.Field(0).Convert(k), elem.Field(1).Convert(v)) 751 elem.Set(keyValueZero) 752 levels.repetitionLevel = levels.repetitionDepth 753 } 754 755 return nil 756 } 757 } 758 759 //go:noinline 760 func reconstructFuncOfGroup(columnIndex int16, node Node) (int16, reconstructFunc) { 761 fields := node.Fields() 762 funcs := make([]reconstructFunc, len(fields)) 763 columnOffsets := make([]int16, len(fields)) 764 firstColumnIndex := columnIndex 765 766 for i, field := range fields { 767 columnIndex, funcs[i] = reconstructFuncOf(columnIndex, field) 768 columnOffsets[i] = columnIndex - firstColumnIndex 769 } 770 771 return columnIndex, func(value reflect.Value, levels levels, columns [][]Value) error { 772 if value.Kind() == reflect.Interface { 773 value.Set(reflect.MakeMap(reflect.TypeOf((map[string]interface{})(nil)))) 774 value = value.Elem() 775 } 776 777 if value.Kind() == reflect.Map { 778 elemType := value.Type().Elem() 779 name := reflect.New(reflect.TypeOf("")).Elem() 780 elem := reflect.New(elemType).Elem() 781 zero := reflect.Zero(elemType) 782 783 if value.Len() > 0 { 784 value.Set(reflect.MakeMap(value.Type())) 785 } 786 787 off := int16(0) 788 789 for i, f := range funcs { 790 name.SetString(fields[i].Name()) 791 end := columnOffsets[i] 792 err := f(elem, levels, columns[off:end:end]) 793 if err != nil { 794 return fmt.Errorf("%s → %w", name, err) 795 } 796 off = end 797 value.SetMapIndex(name, elem) 798 elem.Set(zero) 799 } 800 } else { 801 off := int16(0) 802 803 for i, f := range funcs { 804 end := columnOffsets[i] 805 err := f(fields[i].Value(value), levels, columns[off:end:end]) 806 if err != nil { 807 return fmt.Errorf("%s → %w", fields[i].Name(), err) 808 } 809 off = end 810 } 811 } 812 813 return nil 814 } 815 } 816 817 //go:noinline 818 func reconstructFuncOfLeaf(columnIndex int16, node Node) (int16, reconstructFunc) { 819 typ := node.Type() 820 return columnIndex + 1, func(value reflect.Value, _ levels, columns [][]Value) error { 821 column := columns[0] 822 if len(column) == 0 { 823 return fmt.Errorf("no values found in parquet row for column %d", columnIndex) 824 } 825 return typ.AssignValue(value, column[0]) 826 } 827 }