github.com/apache/arrow/go/v14@v14.0.1/parquet/pqarrow/file_reader.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package pqarrow 18 19 import ( 20 "context" 21 "fmt" 22 "io" 23 "sync" 24 "sync/atomic" 25 26 "github.com/apache/arrow/go/v14/arrow" 27 "github.com/apache/arrow/go/v14/arrow/array" 28 "github.com/apache/arrow/go/v14/arrow/arrio" 29 "github.com/apache/arrow/go/v14/arrow/memory" 30 "github.com/apache/arrow/go/v14/parquet" 31 "github.com/apache/arrow/go/v14/parquet/file" 32 "github.com/apache/arrow/go/v14/parquet/schema" 33 "golang.org/x/sync/errgroup" 34 "golang.org/x/xerrors" 35 ) 36 37 type itrFactory func(int, *file.Reader) *columnIterator 38 39 type readerCtx struct { 40 rdr *file.Reader 41 mem memory.Allocator 42 colFactory itrFactory 43 filterLeaves bool 44 includedLeaves map[int]bool 45 } 46 47 func (r readerCtx) includesLeaf(idx int) bool { 48 _, ok := r.includedLeaves[idx] 49 return ok 50 } 51 52 // ReadTable is a convenience function to quickly and easily read a parquet file 53 // into an arrow table. 54 // 55 // The schema of the arrow table is generated based on the schema of the parquet file, 56 // including nested columns/lists/etc. in the same fashion as the FromParquetSchema 57 // function. This just encapsulates the logic of creating a separate file.Reader and 58 // pqarrow.FileReader to make a single easy function when you just want to construct 59 // a table from the entire parquet file rather than reading it piecemeal. 60 func ReadTable(ctx context.Context, r parquet.ReaderAtSeeker, props *parquet.ReaderProperties, arrProps ArrowReadProperties, mem memory.Allocator) (arrow.Table, error) { 61 pf, err := file.NewParquetReader(r, file.WithReadProps(props)) 62 if err != nil { 63 return nil, err 64 } 65 66 reader, err := NewFileReader(pf, arrProps, mem) 67 if err != nil { 68 return nil, err 69 } 70 71 return reader.ReadTable(ctx) 72 } 73 74 // FileReader is the base object for reading a parquet file into arrow object 75 // types. 76 // 77 // It provides utility functions for reading record batches, a table, subsets of 78 // columns / rowgroups, and so on. 79 type FileReader struct { 80 mem memory.Allocator 81 rdr *file.Reader 82 83 Props ArrowReadProperties 84 Manifest *SchemaManifest 85 } 86 87 // NewFileReader constructs a reader for converting to Arrow objects from an existing 88 // parquet file reader object. 89 // 90 // Only returns an error if there is some error constructing the schema manifest from 91 // the parquet file metadata. 92 func NewFileReader(rdr *file.Reader, props ArrowReadProperties, mem memory.Allocator) (*FileReader, error) { 93 manifest, err := NewSchemaManifest(rdr.MetaData().Schema, rdr.MetaData().KeyValueMetadata(), &props) 94 if err != nil { 95 return nil, err 96 } 97 98 return &FileReader{ 99 mem: mem, 100 rdr: rdr, 101 Props: props, 102 Manifest: manifest, 103 }, nil 104 } 105 106 // Schema returns the arrow schema representation of the underlying file's schema. 107 func (fr *FileReader) Schema() (*arrow.Schema, error) { 108 return FromParquet(fr.rdr.MetaData().Schema, &fr.Props, fr.rdr.MetaData().KeyValueMetadata()) 109 } 110 111 type colReaderImpl interface { 112 LoadBatch(nrecs int64) error 113 BuildArray(boundedLen int64) (*arrow.Chunked, error) 114 GetDefLevels() ([]int16, error) 115 GetRepLevels() ([]int16, error) 116 Field() *arrow.Field 117 IsOrHasRepeatedChild() bool 118 Retain() 119 Release() 120 } 121 122 // ColumnReader is used for reading batches of data from a specific column 123 // across multiple row groups to return a chunked arrow array. 124 type ColumnReader struct { 125 colReaderImpl 126 } 127 128 // NextBatch returns a chunked array after reading `size` values, potentially 129 // across multiple row groups. 130 func (c *ColumnReader) NextBatch(size int64) (*arrow.Chunked, error) { 131 if err := c.LoadBatch(size); err != nil { 132 return nil, err 133 } 134 return c.BuildArray(size) 135 } 136 137 type rdrCtxKey struct{} 138 139 func readerCtxFromContext(ctx context.Context) readerCtx { 140 rdc := ctx.Value(rdrCtxKey{}) 141 if rdc != nil { 142 return rdc.(readerCtx) 143 } 144 panic("no readerctx") 145 } 146 147 // ParquetReader returns the underlying parquet file reader that it was constructed with 148 func (fr *FileReader) ParquetReader() *file.Reader { return fr.rdr } 149 150 // GetColumn returns a reader for pulling the data of leaf column index i 151 // across all row groups in the file. 152 func (fr *FileReader) GetColumn(ctx context.Context, i int) (*ColumnReader, error) { 153 return fr.getColumnReader(ctx, i, fr.allRowGroupFactory()) 154 } 155 156 func rowGroupFactory(rowGroups []int) itrFactory { 157 return func(i int, rdr *file.Reader) *columnIterator { 158 return &columnIterator{ 159 index: i, 160 rdr: rdr, 161 schema: rdr.MetaData().Schema, 162 rowGroups: rowGroups, 163 } 164 } 165 } 166 167 func (fr *FileReader) allRowGroupFactory() itrFactory { 168 rowGroups := make([]int, fr.rdr.NumRowGroups()) 169 for idx := range rowGroups { 170 rowGroups[idx] = idx 171 } 172 return rowGroupFactory(rowGroups) 173 } 174 175 // GetFieldReader returns a reader for the entire Field of index i which could potentially include reading 176 // multiple columns from the underlying parquet file if that field is a nested field. 177 // 178 // IncludedLeaves and RowGroups are used to specify precisely which leaf indexes and row groups to read a subset of. 179 func (fr *FileReader) GetFieldReader(ctx context.Context, i int, includedLeaves map[int]bool, rowGroups []int) (*ColumnReader, error) { 180 ctx = context.WithValue(ctx, rdrCtxKey{}, readerCtx{ 181 rdr: fr.rdr, 182 mem: fr.mem, 183 colFactory: rowGroupFactory(rowGroups), 184 filterLeaves: true, 185 includedLeaves: includedLeaves, 186 }) 187 return fr.getReader(ctx, &fr.Manifest.Fields[i], *fr.Manifest.Fields[i].Field) 188 } 189 190 // GetFieldReaders is for retrieving readers for multiple fields at one time for only the list 191 // of column indexes and rowgroups requested. It returns a slice of the readers and the corresponding 192 // arrow.Schema for those columns. 193 func (fr *FileReader) GetFieldReaders(ctx context.Context, colIndices, rowGroups []int) ([]*ColumnReader, *arrow.Schema, error) { 194 fieldIndices, err := fr.Manifest.GetFieldIndices(colIndices) 195 if err != nil { 196 return nil, nil, err 197 } 198 199 includedLeaves := make(map[int]bool) 200 for _, col := range colIndices { 201 includedLeaves[col] = true 202 } 203 204 out := make([]*ColumnReader, len(fieldIndices)) 205 outFields := make([]arrow.Field, len(fieldIndices)) 206 207 // Load batches in parallel 208 // When reading structs with large numbers of columns, the serial load is very slow. 209 // This is especially true when reading Cloud Storage. Loading concurrently 210 // greatly improves performance. 211 // GetFieldReader causes read operations, when issued serially on large numbers of columns, 212 // this is super time consuming. Get field readers concurrently. 213 g, gctx := errgroup.WithContext(ctx) 214 if !fr.Props.Parallel { 215 g.SetLimit(1) 216 } 217 for idx, fidx := range fieldIndices { 218 idx, fidx := idx, fidx // create concurrent copy 219 g.Go(func() error { 220 rdr, err := fr.GetFieldReader(gctx, fidx, includedLeaves, rowGroups) 221 if err != nil { 222 return err 223 } 224 outFields[idx] = *rdr.Field() 225 out[idx] = rdr 226 return nil 227 }) 228 } 229 if err = g.Wait(); err != nil { 230 return nil, nil, err 231 } 232 233 return out, arrow.NewSchema(outFields, fr.Manifest.SchemaMeta), nil 234 } 235 236 // RowGroup creates a reader that will *only* read from the requested row group 237 func (fr *FileReader) RowGroup(idx int) RowGroupReader { 238 return RowGroupReader{fr, idx} 239 } 240 241 // ReadColumn reads data to create a chunked array only from the requested row groups. 242 func (fr *FileReader) ReadColumn(rowGroups []int, rdr *ColumnReader) (*arrow.Chunked, error) { 243 recs := int64(0) 244 for _, rg := range rowGroups { 245 recs += fr.rdr.MetaData().RowGroups[rg].GetNumRows() 246 } 247 return rdr.NextBatch(recs) 248 } 249 250 // ReadTable reads the entire file into an array.Table 251 func (fr *FileReader) ReadTable(ctx context.Context) (arrow.Table, error) { 252 var ( 253 cols = []int{} 254 rgs = []int{} 255 ) 256 for i := 0; i < fr.rdr.MetaData().Schema.NumColumns(); i++ { 257 cols = append(cols, i) 258 } 259 for i := 0; i < fr.rdr.NumRowGroups(); i++ { 260 rgs = append(rgs, i) 261 } 262 return fr.ReadRowGroups(ctx, cols, rgs) 263 } 264 265 func (fr *FileReader) checkCols(indices []int) (err error) { 266 for _, col := range indices { 267 if col < 0 || col >= fr.rdr.MetaData().Schema.NumColumns() { 268 err = fmt.Errorf("invalid column index specified %d out of %d", col, fr.rdr.MetaData().Schema.NumColumns()) 269 break 270 } 271 } 272 return 273 } 274 275 func (fr *FileReader) checkRowGroups(indices []int) (err error) { 276 for _, rg := range indices { 277 if rg < 0 || rg >= fr.rdr.NumRowGroups() { 278 err = fmt.Errorf("invalid row group specified: %d, file only has %d row groups", rg, fr.rdr.NumRowGroups()) 279 break 280 } 281 } 282 return 283 } 284 285 type readerInfo struct { 286 rdr *ColumnReader 287 idx int 288 } 289 290 type resultPair struct { 291 idx int 292 data *arrow.Chunked 293 err error 294 } 295 296 //! This is Super complicated. I would simpify the pattern, but it works and hesitant to change what works. 297 298 // ReadRowGroups is for generating an array.Table from the file but filtering to only read the requested 299 // columns and row groups rather than the entire file which ReadTable does. 300 func (fr *FileReader) ReadRowGroups(ctx context.Context, indices, rowGroups []int) (arrow.Table, error) { 301 if err := fr.checkRowGroups(rowGroups); err != nil { 302 return nil, err 303 } 304 if err := fr.checkCols(indices); err != nil { 305 return nil, err 306 } 307 308 // TODO(mtopol): add optimizations for pre-buffering data options 309 310 readers, sc, err := fr.GetFieldReaders(ctx, indices, rowGroups) 311 if err != nil { 312 return nil, err 313 } 314 315 // producer-consumer parallelization 316 var ( 317 np = 1 318 wg sync.WaitGroup 319 ch = make(chan readerInfo, len(readers)) 320 results = make(chan resultPair, 2) 321 ) 322 323 if fr.Props.Parallel { 324 np = len(readers) 325 } 326 327 ctx, cancel := context.WithCancel(ctx) 328 defer cancel() 329 330 wg.Add(np) // fan-out to np readers 331 for i := 0; i < np; i++ { 332 go func() { 333 defer wg.Done() 334 for { 335 select { 336 case r, ok := <-ch: 337 if !ok { 338 return 339 } 340 341 chnked, err := fr.ReadColumn(rowGroups, r.rdr) 342 // pass the result column data to the result channel 343 // for the consumer goroutine to process 344 results <- resultPair{r.idx, chnked, err} 345 case <-ctx.Done(): // check if we cancelled 346 return 347 } 348 } 349 }() 350 } 351 352 go func() { 353 wg.Wait() 354 close(results) // close the result channel when there's no more 355 }() 356 357 // pass pairs of reader and column index to the channel for the 358 // goroutines to read the data 359 for idx := range readers { 360 defer readers[idx].Release() 361 ch <- readerInfo{readers[idx], idx} 362 } 363 close(ch) 364 365 // output slice of columns 366 columns := make([]arrow.Column, len(sc.Fields())) 367 defer releaseColumns(columns) 368 for data := range results { 369 if data.err != nil { 370 err = data.err 371 cancel() 372 break 373 } 374 columns[data.idx] = *arrow.NewColumn(sc.Field(data.idx), data.data) 375 data.data.Release() 376 } 377 378 if err != nil { 379 // if we encountered an error, consume any waiting data on the channel 380 // so the goroutines don't leak and so memory can get cleaned up. we already 381 // cancelled the context, so we're just consuming anything that was already queued up. 382 for data := range results { 383 data.data.Release() 384 } 385 return nil, err 386 } 387 388 var nrows int 389 if len(columns) > 0 { 390 nrows = columns[0].Len() 391 } 392 393 return array.NewTable(sc, columns, int64(nrows)), nil 394 } 395 396 func (fr *FileReader) getColumnReader(ctx context.Context, i int, colFactory itrFactory) (*ColumnReader, error) { 397 if i < 0 || i >= len(fr.Manifest.Fields) { 398 return nil, fmt.Errorf("invalid column index chosen %d, there are only %d columns", i, len(fr.Manifest.Fields)) 399 } 400 401 ctx = context.WithValue(ctx, rdrCtxKey{}, readerCtx{ 402 rdr: fr.rdr, 403 mem: fr.mem, 404 colFactory: colFactory, 405 filterLeaves: false, 406 }) 407 408 return fr.getReader(ctx, &fr.Manifest.Fields[i], *fr.Manifest.Fields[i].Field) 409 } 410 411 // RecordReader is a Record Batch Reader that meets the interfaces for both 412 // array.RecordReader and arrio.Reader to allow easy progressive reading 413 // of record batches from the parquet file. Ideal for streaming. 414 type RecordReader interface { 415 array.RecordReader 416 arrio.Reader 417 } 418 419 // GetRecordReader returns a record reader that reads only the requested column indexes and row groups. 420 // 421 // For both cases, if you pass nil for column indexes or rowgroups it will default to reading all of them. 422 func (fr *FileReader) GetRecordReader(ctx context.Context, colIndices, rowGroups []int) (RecordReader, error) { 423 if err := fr.checkRowGroups(rowGroups); err != nil { 424 return nil, err 425 } 426 427 if rowGroups == nil { 428 rowGroups = make([]int, fr.rdr.NumRowGroups()) 429 for idx := range rowGroups { 430 rowGroups[idx] = idx 431 } 432 } 433 434 if err := fr.checkCols(colIndices); err != nil { 435 return nil, err 436 } 437 438 if colIndices == nil { 439 colIndices = make([]int, fr.rdr.MetaData().Schema.NumColumns()) 440 for idx := range colIndices { 441 colIndices[idx] = idx 442 } 443 } 444 445 // TODO(mtopol): add optimizations to pre-buffer data from the file 446 447 readers, sc, err := fr.GetFieldReaders(ctx, colIndices, rowGroups) 448 if err != nil { 449 return nil, err 450 } 451 452 if len(readers) == 0 { 453 return nil, xerrors.New("no leaf column readers matched col indices") 454 } 455 456 nrows := int64(0) 457 for _, rg := range rowGroups { 458 nrows += fr.rdr.MetaData().RowGroup(rg).NumRows() 459 } 460 461 return &recordReader{ 462 numRows: nrows, 463 batchSize: fr.Props.BatchSize, 464 parallel: fr.Props.Parallel, 465 sc: sc, 466 fieldReaders: readers, 467 refCount: 1, 468 }, nil 469 } 470 471 func (fr *FileReader) getReader(ctx context.Context, field *SchemaField, arrowField arrow.Field) (out *ColumnReader, err error) { 472 rctx := readerCtxFromContext(ctx) 473 if len(field.Children) == 0 { 474 if !field.IsLeaf() { 475 return nil, xerrors.New("parquet non-leaf node has no children") 476 } 477 if rctx.filterLeaves && !rctx.includesLeaf(field.ColIndex) { 478 return nil, nil 479 } 480 481 out, err = newLeafReader(&rctx, field.Field, rctx.colFactory(field.ColIndex, rctx.rdr), field.LevelInfo, fr.Props, fr.rdr.BufferPool()) 482 return 483 } 484 485 switch arrowField.Type.ID() { 486 case arrow.EXTENSION: 487 return nil, xerrors.New("extension type not implemented") 488 case arrow.STRUCT: 489 490 childReaders := make([]*ColumnReader, len(field.Children)) 491 childFields := make([]arrow.Field, len(field.Children)) 492 493 // Get child field readers concurrently 494 // 'getReader' causes a read operation. Issue the 'reads' concurrently 495 // When reading structs with large numbers of columns, the serial load is very slow. 496 // This is especially true when reading Cloud Storage. Loading concurrently 497 // greatly improves performance. 498 g, gctx := errgroup.WithContext(ctx) 499 if !fr.Props.Parallel { 500 g.SetLimit(1) 501 } 502 503 for n, child := range field.Children { 504 n, child := n, child 505 g.Go(func() error { 506 reader, err := fr.getReader(gctx, &child, *child.Field) 507 if err != nil { 508 return err 509 } 510 if reader == nil { 511 return nil 512 } 513 childFields[n] = *child.Field 514 childReaders[n] = reader 515 return nil 516 }) 517 } 518 if err = g.Wait(); err != nil { 519 return nil, err 520 } 521 522 // because we performed getReader concurrently, we need to prune out any empty readers 523 for n := len(childReaders) - 1; n >= 0; n-- { 524 if childReaders[n] == nil { 525 childReaders = append(childReaders[:n], childReaders[n+1:]...) 526 childFields = append(childFields[:n], childFields[n+1:]...) 527 } 528 } 529 if len(childFields) == 0 { 530 return nil, nil 531 } 532 filtered := arrow.Field{Name: arrowField.Name, Nullable: arrowField.Nullable, 533 Metadata: arrowField.Metadata, Type: arrow.StructOf(childFields...)} 534 out = newStructReader(&rctx, &filtered, field.LevelInfo, childReaders, fr.Props) 535 case arrow.LIST, arrow.FIXED_SIZE_LIST, arrow.MAP: 536 child := field.Children[0] 537 childReader, err := fr.getReader(ctx, &child, *child.Field) 538 if err != nil { 539 return nil, err 540 } 541 if childReader == nil { 542 return nil, nil 543 } 544 defer childReader.Release() 545 546 switch arrowField.Type.(type) { 547 case *arrow.MapType: 548 if len(child.Children) != 2 { 549 arrowField.Type = arrow.ListOf(childReader.Field().Type) 550 } 551 out = newListReader(&rctx, &arrowField, field.LevelInfo, childReader, fr.Props) 552 case *arrow.ListType: 553 out = newListReader(&rctx, &arrowField, field.LevelInfo, childReader, fr.Props) 554 case *arrow.FixedSizeListType: 555 out = newFixedSizeListReader(&rctx, &arrowField, field.LevelInfo, childReader, fr.Props) 556 default: 557 return nil, fmt.Errorf("unknown list type: %s", field.Field.String()) 558 } 559 } 560 return 561 } 562 563 // RowGroupReader is a reader for getting data only from a single row group of the file 564 // rather than having to repeatedly pass the index to functions on the reader. 565 type RowGroupReader struct { 566 impl *FileReader 567 idx int 568 } 569 570 // ReadTable provides an array.Table consisting only of the columns requested for this rowgroup 571 func (rgr RowGroupReader) ReadTable(ctx context.Context, colIndices []int) (arrow.Table, error) { 572 return rgr.impl.ReadRowGroups(ctx, colIndices, []int{rgr.idx}) 573 } 574 575 // Column creates a reader for just the requested column chunk in only this row group. 576 func (rgr RowGroupReader) Column(idx int) ColumnChunkReader { 577 return ColumnChunkReader{rgr.impl, idx, rgr.idx} 578 } 579 580 // ColumnChunkReader is a reader that reads only a single column chunk from a single 581 // column in a single row group 582 type ColumnChunkReader struct { 583 impl *FileReader 584 idx int 585 rowGroup int 586 } 587 588 func (ccr ColumnChunkReader) Read(ctx context.Context) (*arrow.Chunked, error) { 589 rdr, err := ccr.impl.getColumnReader(ctx, ccr.idx, rowGroupFactory([]int{ccr.rowGroup})) 590 if err != nil { 591 return nil, err 592 } 593 return ccr.impl.ReadColumn([]int{ccr.rowGroup}, rdr) 594 } 595 596 type columnIterator struct { 597 index int 598 rdr *file.Reader 599 schema *schema.Schema 600 rowGroups []int 601 } 602 603 func (c *columnIterator) NextChunk() (file.PageReader, error) { 604 if len(c.rowGroups) == 0 { 605 return nil, nil 606 } 607 608 rgr := c.rdr.RowGroup(c.rowGroups[0]) 609 c.rowGroups = c.rowGroups[1:] 610 return rgr.GetColumnPageReader(c.index) 611 } 612 613 func (c *columnIterator) Descr() *schema.Column { return c.schema.Column(c.index) } 614 615 // implementation of arrio.Reader for streaming record batches 616 // from the parquet data. 617 type recordReader struct { 618 numRows int64 619 batchSize int64 620 parallel bool 621 sc *arrow.Schema 622 fieldReaders []*ColumnReader 623 cur arrow.Record 624 err error 625 626 refCount int64 627 } 628 629 func (r *recordReader) Retain() { 630 atomic.AddInt64(&r.refCount, 1) 631 } 632 633 func (r *recordReader) Release() { 634 if atomic.AddInt64(&r.refCount, -1) == 0 { 635 if r.cur != nil { 636 r.cur.Release() 637 r.cur = nil 638 } 639 if r.fieldReaders == nil { 640 return 641 } 642 for _, fr := range r.fieldReaders { 643 fr.Release() 644 } 645 r.fieldReaders = nil 646 } 647 } 648 649 func (r *recordReader) Schema() *arrow.Schema { return r.sc } 650 651 func (r *recordReader) next() bool { 652 cols := make([]arrow.Array, len(r.sc.Fields())) 653 defer releaseArrays(cols) 654 readField := func(idx int, rdr *ColumnReader) error { 655 data, err := rdr.NextBatch(r.batchSize) 656 if err != nil { 657 return err 658 } 659 defer data.Release() 660 661 if data.Len() == 0 { 662 return io.EOF 663 } 664 665 arrdata, err := chunksToSingle(data) 666 if err != nil { 667 return err 668 } 669 defer arrdata.Release() 670 671 cols[idx] = array.MakeFromData(arrdata) 672 return nil 673 } 674 675 if !r.parallel { 676 for idx, rdr := range r.fieldReaders { 677 if err := readField(idx, rdr); err != nil { 678 r.err = err 679 return false 680 } 681 } 682 683 r.cur = array.NewRecord(r.sc, cols, -1) 684 return true 685 } 686 687 var ( 688 wg sync.WaitGroup 689 np = len(cols) 690 ch = make(chan int, np) 691 errch = make(chan error, np) 692 ) 693 694 ctx, cancel := context.WithCancel(context.Background()) 695 defer cancel() 696 697 wg.Add(np) 698 for i := 0; i < np; i++ { 699 go func() { 700 defer wg.Done() 701 for { 702 select { 703 case idx, ok := <-ch: 704 if !ok { 705 return 706 } 707 708 if err := readField(idx, r.fieldReaders[idx]); err != nil { 709 errch <- err 710 cancel() 711 return 712 } 713 714 case <-ctx.Done(): 715 return 716 } 717 } 718 }() 719 } 720 721 for idx := range r.fieldReaders { 722 ch <- idx 723 } 724 close(ch) 725 wg.Wait() 726 close(errch) 727 728 var ok bool 729 // check for any errors 730 if r.err, ok = <-errch; ok { 731 // return the first error that was reported and drain 732 // any remaining errors from the channel before returning. 733 for range errch { 734 } 735 return false 736 } 737 738 r.cur = array.NewRecord(r.sc, cols, -1) 739 return true 740 } 741 742 func (r *recordReader) Next() bool { 743 if r.cur != nil { 744 r.cur.Release() 745 r.cur = nil 746 } 747 748 if r.err != nil { 749 return false 750 } 751 752 return r.next() 753 } 754 755 func (r *recordReader) Record() arrow.Record { return r.cur } 756 757 func (r *recordReader) Err() error { return r.err } 758 759 func (r *recordReader) Read() (arrow.Record, error) { 760 if r.cur != nil { 761 r.cur.Release() 762 r.cur = nil 763 } 764 765 if !r.next() { 766 return nil, r.err 767 } 768 769 return r.cur, nil 770 }