github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/row.go (about) 1 package parquet 2 3 import ( 4 "errors" 5 "fmt" 6 "io" 7 "reflect" 8 ) 9 10 const ( 11 defaultRowBufferSize = 42 12 ) 13 14 // Row represents a parquet row as a slice of values. 15 // 16 // Each value should embed a column index, repetition level, and definition 17 // level allowing the program to determine how to reconstruct the original 18 // object from the row. 19 type Row []Value 20 21 // MakeRow constructs a Row from a list of column values. 22 // 23 // The function panics if the column indexes of values in each column do not 24 // match their position in the argument list. 25 func MakeRow(columns ...[]Value) Row { return AppendRow(nil, columns...) } 26 27 // AppendRow appends to row the given list of column values. 28 // 29 // AppendRow can be used to construct a Row value from columns, while retaining 30 // the underlying memory buffer to avoid reallocation; for example: 31 // 32 // The function panics if the column indexes of values in each column do not 33 // match their position in the argument list. 34 func AppendRow(row Row, columns ...[]Value) Row { 35 numValues := 0 36 37 for expectedColumnIndex, column := range columns { 38 numValues += len(column) 39 40 for _, value := range column { 41 if value.columnIndex != ^int16(expectedColumnIndex) { 42 panic(fmt.Sprintf("value of column %d has column index %d", expectedColumnIndex, value.Column())) 43 } 44 } 45 } 46 47 if capacity := cap(row) - len(row); capacity < numValues { 48 row = append(make(Row, 0, len(row)+numValues), row...) 49 } 50 51 return appendRow(row, columns) 52 } 53 54 func appendRow(row Row, columns [][]Value) Row { 55 for _, column := range columns { 56 row = append(row, column...) 57 } 58 return row 59 } 60 61 // Clone creates a copy of the row which shares no pointers. 62 // 63 // This method is useful to capture rows after a call to RowReader.ReadRows when 64 // values need to be retained before the next call to ReadRows or after the lifespan 65 // of the reader. 66 func (row Row) Clone() Row { 67 clone := make(Row, len(row)) 68 for i := range row { 69 clone[i] = row[i].Clone() 70 } 71 return clone 72 } 73 74 // Equal returns true if row and other contain the same sequence of values. 75 func (row Row) Equal(other Row) bool { 76 if len(row) != len(other) { 77 return false 78 } 79 for i := range row { 80 if !Equal(row[i], other[i]) { 81 return false 82 } 83 if row[i].repetitionLevel != other[i].repetitionLevel { 84 return false 85 } 86 if row[i].definitionLevel != other[i].definitionLevel { 87 return false 88 } 89 if row[i].columnIndex != other[i].columnIndex { 90 return false 91 } 92 } 93 return true 94 } 95 96 // Range calls f for each column of row. 97 func (row Row) Range(f func(columnIndex int, columnValues []Value) bool) { 98 columnIndex := 0 99 100 for i := 0; i < len(row); { 101 j := i + 1 102 103 for j < len(row) && row[j].columnIndex == ^int16(columnIndex) { 104 j++ 105 } 106 107 if !f(columnIndex, row[i:j:j]) { 108 break 109 } 110 111 columnIndex++ 112 i = j 113 } 114 } 115 116 // RowSeeker is an interface implemented by readers of parquet rows which can be 117 // positioned at a specific row index. 118 type RowSeeker interface { 119 // Positions the stream on the given row index. 120 // 121 // Some implementations of the interface may only allow seeking forward. 122 // 123 // The method returns io.ErrClosedPipe if the stream had already been closed. 124 SeekToRow(int64) error 125 } 126 127 // RowReader reads a sequence of parquet rows. 128 type RowReader interface { 129 // ReadRows reads rows from the reader, returning the number of rows read 130 // into the buffer, and any error that occurred. Note that the rows read 131 // into the buffer are not safe for reuse after a subsequent call to 132 // ReadRows. Callers that want to reuse rows must copy the rows using Clone. 133 // 134 // When all rows have been read, the reader returns io.EOF to indicate the 135 // end of the sequence. It is valid for the reader to return both a non-zero 136 // number of rows and a non-nil error (including io.EOF). 137 // 138 // The buffer of rows passed as argument will be used to store values of 139 // each row read from the reader. If the rows are not nil, the backing array 140 // of the slices will be used as an optimization to avoid re-allocating new 141 // arrays. 142 // 143 // The application is expected to handle the case where ReadRows returns 144 // less rows than requested and no error, by looking at the first returned 145 // value from ReadRows, which is the number of rows that were read. 146 ReadRows([]Row) (int, error) 147 } 148 149 // RowReaderFrom reads parquet rows from reader. 150 type RowReaderFrom interface { 151 ReadRowsFrom(RowReader) (int64, error) 152 } 153 154 // RowReaderWithSchema is an extension of the RowReader interface which 155 // advertises the schema of rows returned by ReadRow calls. 156 type RowReaderWithSchema interface { 157 RowReader 158 Schema() *Schema 159 } 160 161 // RowReadSeeker is an interface implemented by row readers which support 162 // seeking to arbitrary row positions. 163 type RowReadSeeker interface { 164 RowReader 165 RowSeeker 166 } 167 168 // RowWriter writes parquet rows to an underlying medium. 169 type RowWriter interface { 170 // Writes rows to the writer, returning the number of rows written and any 171 // error that occurred. 172 // 173 // Because columnar operations operate on independent columns of values, 174 // writes of rows may not be atomic operations, and could result in some 175 // rows being partially written. The method returns the number of rows that 176 // were successfully written, but if an error occurs, values of the row(s) 177 // that failed to be written may have been partially committed to their 178 // columns. For that reason, applications should consider a write error as 179 // fatal and assume that they need to discard the state, they cannot retry 180 // the write nor recover the underlying file. 181 WriteRows([]Row) (int, error) 182 } 183 184 // RowWriterTo writes parquet rows to a writer. 185 type RowWriterTo interface { 186 WriteRowsTo(RowWriter) (int64, error) 187 } 188 189 // RowWriterWithSchema is an extension of the RowWriter interface which 190 // advertises the schema of rows expected to be passed to WriteRow calls. 191 type RowWriterWithSchema interface { 192 RowWriter 193 Schema() *Schema 194 } 195 196 // RowReaderFunc is a function type implementing the RowReader interface. 197 type RowReaderFunc func([]Row) (int, error) 198 199 func (f RowReaderFunc) ReadRows(rows []Row) (int, error) { return f(rows) } 200 201 // RowWriterFunc is a function type implementing the RowWriter interface. 202 type RowWriterFunc func([]Row) (int, error) 203 204 func (f RowWriterFunc) WriteRows(rows []Row) (int, error) { return f(rows) } 205 206 // MultiRowWriter constructs a RowWriter which dispatches writes to all the 207 // writers passed as arguments. 208 // 209 // When writing rows, if any of the writers returns an error, the operation is 210 // aborted and the error returned. If one of the writers did not error, but did 211 // not write all the rows, the operation is aborted and io.ErrShortWrite is 212 // returned. 213 // 214 // Rows are written sequentially to each writer in the order they are given to 215 // this function. 216 func MultiRowWriter(writers ...RowWriter) RowWriter { 217 m := &multiRowWriter{writers: make([]RowWriter, len(writers))} 218 copy(m.writers, writers) 219 return m 220 } 221 222 type multiRowWriter struct{ writers []RowWriter } 223 224 func (m *multiRowWriter) WriteRows(rows []Row) (int, error) { 225 for _, w := range m.writers { 226 n, err := w.WriteRows(rows) 227 if err != nil { 228 return n, err 229 } 230 if n != len(rows) { 231 return n, io.ErrShortWrite 232 } 233 } 234 return len(rows), nil 235 } 236 237 type forwardRowSeeker struct { 238 rows RowReader 239 seek int64 240 index int64 241 } 242 243 func (r *forwardRowSeeker) ReadRows(rows []Row) (int, error) { 244 for { 245 n, err := r.rows.ReadRows(rows) 246 247 if n > 0 && r.index < r.seek { 248 skip := r.seek - r.index 249 r.index += int64(n) 250 if skip >= int64(n) { 251 continue 252 } 253 254 for i, j := 0, int(skip); j < n; i++ { 255 rows[i] = append(rows[i][:0], rows[j]...) 256 } 257 258 n -= int(skip) 259 } 260 261 return n, err 262 } 263 } 264 265 func (r *forwardRowSeeker) SeekToRow(rowIndex int64) error { 266 if rowIndex >= r.index { 267 r.seek = rowIndex 268 return nil 269 } 270 return fmt.Errorf( 271 "SeekToRow: %T does not implement parquet.RowSeeker: cannot seek backward from row %d to %d", 272 r.rows, 273 r.index, 274 rowIndex, 275 ) 276 } 277 278 // CopyRows copies rows from src to dst. 279 // 280 // The underlying types of src and dst are tested to determine if they expose 281 // information about the schema of rows that are read and expected to be 282 // written. If the schema information are available but do not match, the 283 // function will attempt to automatically convert the rows from the source 284 // schema to the destination. 285 // 286 // As an optimization, the src argument may implement RowWriterTo to bypass 287 // the default row copy logic and provide its own. The dst argument may also 288 // implement RowReaderFrom for the same purpose. 289 // 290 // The function returns the number of rows written, or any error encountered 291 // other than io.EOF. 292 func CopyRows(dst RowWriter, src RowReader) (int64, error) { 293 return copyRows(dst, src, nil) 294 } 295 296 func copyRows(dst RowWriter, src RowReader, buf []Row) (written int64, err error) { 297 targetSchema := targetSchemaOf(dst) 298 sourceSchema := sourceSchemaOf(src) 299 300 if targetSchema != nil && sourceSchema != nil { 301 if !nodesAreEqual(targetSchema, sourceSchema) { 302 conv, err := Convert(targetSchema, sourceSchema) 303 if err != nil { 304 return 0, err 305 } 306 // The conversion effectively disables a potential optimization 307 // if the source reader implemented RowWriterTo. It is a trade off 308 // we are making to optimize for safety rather than performance. 309 // 310 // Entering this code path should not be the common case tho, it is 311 // most often used when parquet schemas are evolving, but we expect 312 // that the majority of files of an application to be sharing a 313 // common schema. 314 src = ConvertRowReader(src, conv) 315 } 316 } 317 318 if wt, ok := src.(RowWriterTo); ok { 319 return wt.WriteRowsTo(dst) 320 } 321 322 if rf, ok := dst.(RowReaderFrom); ok { 323 return rf.ReadRowsFrom(src) 324 } 325 326 if len(buf) == 0 { 327 buf = make([]Row, defaultRowBufferSize) 328 } 329 330 defer clearRows(buf) 331 332 for { 333 rn, err := src.ReadRows(buf) 334 335 if rn > 0 { 336 wn, err := dst.WriteRows(buf[:rn]) 337 if err != nil { 338 return written, err 339 } 340 341 written += int64(wn) 342 } 343 344 if err != nil { 345 if errors.Is(err, io.EOF) { 346 err = nil 347 } 348 return written, err 349 } 350 351 if rn == 0 { 352 return written, io.ErrNoProgress 353 } 354 } 355 } 356 357 func makeRows(n int) []Row { 358 buf := make([]Value, n) 359 row := make([]Row, n) 360 for i := range row { 361 row[i] = buf[i : i : i+1] 362 } 363 return row 364 } 365 366 func clearRows(rows []Row) { 367 for i, values := range rows { 368 clearValues(values) 369 rows[i] = values[:0] 370 } 371 } 372 373 func sourceSchemaOf(r RowReader) *Schema { 374 if rrs, ok := r.(RowReaderWithSchema); ok { 375 return rrs.Schema() 376 } 377 return nil 378 } 379 380 func targetSchemaOf(w RowWriter) *Schema { 381 if rws, ok := w.(RowWriterWithSchema); ok { 382 return rws.Schema() 383 } 384 return nil 385 } 386 387 // ============================================================================= 388 // Functions returning closures are marked with "go:noinline" below to prevent 389 // losing naming information of the closure in stack traces. 390 // 391 // Because some of the functions are very short (simply return a closure), the 392 // compiler inlines when at their call site, which result in the closure being 393 // named something like parquet.deconstructFuncOf.func2 instead of the original 394 // parquet.deconstructFuncOfLeaf.func1; the latter being much more meaningful 395 // when reading CPU or memory profiles. 396 // ============================================================================= 397 398 type levels struct { 399 repetitionDepth byte 400 repetitionLevel byte 401 definitionLevel byte 402 } 403 404 // deconstructFunc accepts a row, the current levels, the value to deserialize 405 // the current column onto, and returns the row minus the deserialied value(s) 406 // It recurses until it hits a leaf node, then deserializes that value 407 // individually as the base case. 408 type deconstructFunc func([][]Value, levels, reflect.Value) 409 410 func deconstructFuncOf(columnIndex int16, node Node) (int16, deconstructFunc) { 411 switch { 412 case node.Optional(): 413 return deconstructFuncOfOptional(columnIndex, node) 414 case node.Repeated(): 415 return deconstructFuncOfRepeated(columnIndex, node) 416 case isList(node): 417 return deconstructFuncOfList(columnIndex, node) 418 case isMap(node): 419 return deconstructFuncOfMap(columnIndex, node) 420 default: 421 return deconstructFuncOfRequired(columnIndex, node) 422 } 423 } 424 425 //go:noinline 426 func deconstructFuncOfOptional(columnIndex int16, node Node) (int16, deconstructFunc) { 427 columnIndex, deconstruct := deconstructFuncOf(columnIndex, Required(node)) 428 return columnIndex, func(columns [][]Value, levels levels, value reflect.Value) { 429 if value.IsValid() { 430 if value.IsZero() { 431 value = reflect.Value{} 432 } else { 433 if value.Kind() == reflect.Ptr { 434 value = value.Elem() 435 } 436 levels.definitionLevel++ 437 } 438 } 439 deconstruct(columns, levels, value) 440 } 441 } 442 443 //go:noinline 444 func deconstructFuncOfRepeated(columnIndex int16, node Node) (int16, deconstructFunc) { 445 columnIndex, deconstruct := deconstructFuncOf(columnIndex, Required(node)) 446 return columnIndex, func(columns [][]Value, levels levels, value reflect.Value) { 447 if !value.IsValid() || value.Len() == 0 { 448 deconstruct(columns, levels, reflect.Value{}) 449 return 450 } 451 452 levels.repetitionDepth++ 453 levels.definitionLevel++ 454 455 for i, n := 0, value.Len(); i < n; i++ { 456 deconstruct(columns, levels, value.Index(i)) 457 levels.repetitionLevel = levels.repetitionDepth 458 } 459 } 460 } 461 462 func deconstructFuncOfRequired(columnIndex int16, node Node) (int16, deconstructFunc) { 463 switch { 464 case node.Leaf(): 465 return deconstructFuncOfLeaf(columnIndex, node) 466 default: 467 return deconstructFuncOfGroup(columnIndex, node) 468 } 469 } 470 471 func deconstructFuncOfList(columnIndex int16, node Node) (int16, deconstructFunc) { 472 return deconstructFuncOf(columnIndex, Repeated(listElementOf(node))) 473 } 474 475 //go:noinline 476 func deconstructFuncOfMap(columnIndex int16, node Node) (int16, deconstructFunc) { 477 keyValue := mapKeyValueOf(node) 478 keyValueType := keyValue.GoType() 479 keyValueElem := keyValueType.Elem() 480 keyType := keyValueElem.Field(0).Type 481 valueType := keyValueElem.Field(1).Type 482 nextColumnIndex, deconstruct := deconstructFuncOf(columnIndex, schemaOf(keyValueElem)) 483 return nextColumnIndex, func(columns [][]Value, levels levels, mapValue reflect.Value) { 484 if !mapValue.IsValid() || mapValue.Len() == 0 { 485 deconstruct(columns, levels, reflect.Value{}) 486 return 487 } 488 489 levels.repetitionDepth++ 490 levels.definitionLevel++ 491 492 elem := reflect.New(keyValueElem).Elem() 493 k := elem.Field(0) 494 v := elem.Field(1) 495 496 for _, key := range mapValue.MapKeys() { 497 k.Set(key.Convert(keyType)) 498 v.Set(mapValue.MapIndex(key).Convert(valueType)) 499 deconstruct(columns, levels, elem) 500 levels.repetitionLevel = levels.repetitionDepth 501 } 502 } 503 } 504 505 //go:noinline 506 func deconstructFuncOfGroup(columnIndex int16, node Node) (int16, deconstructFunc) { 507 fields := node.Fields() 508 funcs := make([]deconstructFunc, len(fields)) 509 for i, field := range fields { 510 columnIndex, funcs[i] = deconstructFuncOf(columnIndex, field) 511 } 512 return columnIndex, func(columns [][]Value, levels levels, value reflect.Value) { 513 if value.IsValid() { 514 for i, f := range funcs { 515 f(columns, levels, fields[i].Value(value)) 516 } 517 } else { 518 for _, f := range funcs { 519 f(columns, levels, value) 520 } 521 } 522 } 523 } 524 525 //go:noinline 526 func deconstructFuncOfLeaf(columnIndex int16, node Node) (int16, deconstructFunc) { 527 if columnIndex > MaxColumnIndex { 528 panic("row cannot be deconstructed because it has more than 127 columns") 529 } 530 typ := node.Type() 531 kind := typ.Kind() 532 lt := typ.LogicalType() 533 valueColumnIndex := ^columnIndex 534 return columnIndex + 1, func(columns [][]Value, levels levels, value reflect.Value) { 535 v := Value{} 536 537 if value.IsValid() { 538 v = makeValue(kind, lt, value) 539 } 540 541 v.repetitionLevel = levels.repetitionLevel 542 v.definitionLevel = levels.definitionLevel 543 v.columnIndex = valueColumnIndex 544 545 columns[columnIndex] = append(columns[columnIndex], v) 546 } 547 } 548 549 // "reconstructX" turns a Go value into a Go representation of a Parquet series 550 // of values 551 552 type reconstructFunc func(reflect.Value, levels, [][]Value) error 553 554 func reconstructFuncOf(columnIndex int16, node Node) (int16, reconstructFunc) { 555 switch { 556 case node.Optional(): 557 return reconstructFuncOfOptional(columnIndex, node) 558 case node.Repeated(): 559 return reconstructFuncOfRepeated(columnIndex, node) 560 case isList(node): 561 return reconstructFuncOfList(columnIndex, node) 562 case isMap(node): 563 return reconstructFuncOfMap(columnIndex, node) 564 default: 565 return reconstructFuncOfRequired(columnIndex, node) 566 } 567 } 568 569 //go:noinline 570 func reconstructFuncOfOptional(columnIndex int16, node Node) (int16, reconstructFunc) { 571 // We convert the optional func to required so that we eventually reach the 572 // leaf base-case. We're still using the heuristics of optional in the 573 // returned closure (see levels.definitionLevel++), but we don't actually do 574 // deserialization here, that happens in the leaf function, hence this line. 575 nextColumnIndex, reconstruct := reconstructFuncOf(columnIndex, Required(node)) 576 577 return nextColumnIndex, func(value reflect.Value, levels levels, columns [][]Value) error { 578 levels.definitionLevel++ 579 580 if columns[0][0].definitionLevel < levels.definitionLevel { 581 value.Set(reflect.Zero(value.Type())) 582 return nil 583 } 584 585 if value.Kind() == reflect.Ptr { 586 if value.IsNil() { 587 value.Set(reflect.New(value.Type().Elem())) 588 } 589 value = value.Elem() 590 } 591 592 return reconstruct(value, levels, columns) 593 } 594 } 595 596 func setMakeSlice(v reflect.Value, n int) reflect.Value { 597 t := v.Type() 598 if t.Kind() == reflect.Interface { 599 t = reflect.TypeOf(([]interface{})(nil)) 600 } 601 s := reflect.MakeSlice(t, n, n) 602 v.Set(s) 603 return s 604 } 605 606 //go:noinline 607 func reconstructFuncOfRepeated(columnIndex int16, node Node) (int16, reconstructFunc) { 608 nextColumnIndex, reconstruct := reconstructFuncOf(columnIndex, Required(node)) 609 return nextColumnIndex, func(value reflect.Value, levels levels, columns [][]Value) error { 610 levels.repetitionDepth++ 611 levels.definitionLevel++ 612 613 if columns[0][0].definitionLevel < levels.definitionLevel { 614 setMakeSlice(value, 0) 615 return nil 616 } 617 618 values := make([][]Value, len(columns)) 619 column := columns[0] 620 n := 0 621 622 for i, column := range columns { 623 values[i] = column[0:0:len(column)] 624 } 625 626 for i := 0; i < len(column); { 627 i++ 628 n++ 629 630 for i < len(column) && column[i].repetitionLevel > levels.repetitionDepth { 631 i++ 632 } 633 } 634 635 value = setMakeSlice(value, n) 636 637 for i := 0; i < n; i++ { 638 for j, column := range values { 639 column = column[:cap(column)] 640 if len(column) == 0 { 641 continue 642 } 643 644 k := 1 645 for k < len(column) && column[k].repetitionLevel > levels.repetitionDepth { 646 k++ 647 } 648 649 values[j] = column[:k] 650 } 651 652 if err := reconstruct(value.Index(i), levels, values); err != nil { 653 return err 654 } 655 656 for j, column := range values { 657 values[j] = column[len(column):len(column):cap(column)] 658 } 659 660 levels.repetitionLevel = levels.repetitionDepth 661 } 662 663 return nil 664 } 665 } 666 667 func reconstructFuncOfRequired(columnIndex int16, node Node) (int16, reconstructFunc) { 668 switch { 669 case node.Leaf(): 670 return reconstructFuncOfLeaf(columnIndex, node) 671 default: 672 return reconstructFuncOfGroup(columnIndex, node) 673 } 674 } 675 676 func reconstructFuncOfList(columnIndex int16, node Node) (int16, reconstructFunc) { 677 return reconstructFuncOf(columnIndex, Repeated(listElementOf(node))) 678 } 679 680 //go:noinline 681 func reconstructFuncOfMap(columnIndex int16, node Node) (int16, reconstructFunc) { 682 keyValue := mapKeyValueOf(node) 683 keyValueType := keyValue.GoType() 684 keyValueElem := keyValueType.Elem() 685 keyValueZero := reflect.Zero(keyValueElem) 686 nextColumnIndex, reconstruct := reconstructFuncOf(columnIndex, schemaOf(keyValueElem)) 687 return nextColumnIndex, func(value reflect.Value, levels levels, columns [][]Value) error { 688 levels.repetitionDepth++ 689 levels.definitionLevel++ 690 691 if columns[0][0].definitionLevel < levels.definitionLevel { 692 value.Set(reflect.MakeMap(value.Type())) 693 return nil 694 } 695 696 values := make([][]Value, len(columns)) 697 column := columns[0] 698 t := value.Type() 699 k := t.Key() 700 v := t.Elem() 701 n := 0 702 703 for i, column := range columns { 704 values[i] = column[0:0:len(column)] 705 } 706 707 for i := 0; i < len(column); { 708 i++ 709 n++ 710 711 for i < len(column) && column[i].repetitionLevel > levels.repetitionDepth { 712 i++ 713 } 714 } 715 716 if value.IsNil() { 717 value.Set(reflect.MakeMapWithSize(t, n)) 718 } 719 720 elem := reflect.New(keyValueElem).Elem() 721 for i := 0; i < n; i++ { 722 for j, column := range values { 723 column = column[:cap(column)] 724 k := 1 725 726 for k < len(column) && column[k].repetitionLevel > levels.repetitionDepth { 727 k++ 728 } 729 730 values[j] = column[:k] 731 } 732 733 if err := reconstruct(elem, levels, values); err != nil { 734 return err 735 } 736 737 for j, column := range values { 738 values[j] = column[len(column):len(column):cap(column)] 739 } 740 741 value.SetMapIndex(elem.Field(0).Convert(k), elem.Field(1).Convert(v)) 742 elem.Set(keyValueZero) 743 levels.repetitionLevel = levels.repetitionDepth 744 } 745 746 return nil 747 } 748 } 749 750 //go:noinline 751 func reconstructFuncOfGroup(columnIndex int16, node Node) (int16, reconstructFunc) { 752 fields := node.Fields() 753 funcs := make([]reconstructFunc, len(fields)) 754 columnOffsets := make([]int16, len(fields)) 755 firstColumnIndex := columnIndex 756 757 for i, field := range fields { 758 columnIndex, funcs[i] = reconstructFuncOf(columnIndex, field) 759 columnOffsets[i] = columnIndex - firstColumnIndex 760 } 761 762 return columnIndex, func(value reflect.Value, levels levels, columns [][]Value) error { 763 if value.Kind() == reflect.Interface { 764 value.Set(reflect.MakeMap(reflect.TypeOf((map[string]interface{})(nil)))) 765 value = value.Elem() 766 } 767 768 if value.Kind() == reflect.Map { 769 elemType := value.Type().Elem() 770 name := reflect.New(reflect.TypeOf("")).Elem() 771 elem := reflect.New(elemType).Elem() 772 zero := reflect.Zero(elemType) 773 774 if value.Len() > 0 { 775 value.Set(reflect.MakeMap(value.Type())) 776 } 777 778 off := int16(0) 779 780 for i, f := range funcs { 781 name.SetString(fields[i].Name()) 782 end := columnOffsets[i] 783 err := f(elem, levels, columns[off:end:end]) 784 if err != nil { 785 return fmt.Errorf("%s → %w", name, err) 786 } 787 off = end 788 value.SetMapIndex(name, elem) 789 elem.Set(zero) 790 } 791 } else { 792 off := int16(0) 793 794 for i, f := range funcs { 795 end := columnOffsets[i] 796 err := f(fields[i].Value(value), levels, columns[off:end:end]) 797 if err != nil { 798 return fmt.Errorf("%s → %w", fields[i].Name(), err) 799 } 800 off = end 801 } 802 } 803 804 return nil 805 } 806 } 807 808 //go:noinline 809 func reconstructFuncOfLeaf(columnIndex int16, node Node) (int16, reconstructFunc) { 810 typ := node.Type() 811 return columnIndex + 1, func(value reflect.Value, _ levels, columns [][]Value) error { 812 column := columns[0] 813 if len(column) == 0 { 814 return fmt.Errorf("no values found in parquet row for column %d", columnIndex) 815 } 816 return typ.AssignValue(value, column[0]) 817 } 818 }