github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/reader_go18.go (about)

     1  //go:build go1.18
     2  
     3  package parquet
     4  
     5  import (
     6  	"io"
     7  	"reflect"
     8  )
     9  
    10  // GenericReader is similar to a Reader but uses a type parameter to define the
    11  // Go type representing the schema of rows being read.
    12  //
    13  // See GenericWriter for details about the benefits over the classic Reader API.
    14  type GenericReader[T any] struct {
    15  	base Reader
    16  	read readFunc[T]
    17  }
    18  
    19  // NewGenericReader is like NewReader but returns GenericReader[T] suited to write
    20  // rows of Go type T.
    21  //
    22  // The type parameter T should be a map, struct, or any. Any other types will
    23  // cause a panic at runtime. Type checking is a lot more effective when the
    24  // generic parameter is a struct type, using map and interface types is somewhat
    25  // similar to using a Writer.
    26  //
    27  // If the option list may explicitly declare a schema, it must be compatible
    28  // with the schema generated from T.
    29  func NewGenericReader[T any](input io.ReaderAt, options ...ReaderOption) *GenericReader[T] {
    30  	c, err := NewReaderConfig(options...)
    31  	if err != nil {
    32  		panic(err)
    33  	}
    34  
    35  	t := typeOf[T]()
    36  	if c.Schema == nil {
    37  		c.Schema = schemaOf(dereference(t))
    38  	}
    39  
    40  	f, err := openFile(input)
    41  	if err != nil {
    42  		panic(err)
    43  	}
    44  
    45  	r := &GenericReader[T]{
    46  		base: Reader{
    47  			file: reader{
    48  				schema:   c.Schema,
    49  				rowGroup: fileRowGroupOf(f),
    50  			},
    51  		},
    52  	}
    53  
    54  	if !nodesAreEqual(c.Schema, f.schema) {
    55  		r.base.file.rowGroup = convertRowGroupTo(r.base.file.rowGroup, c.Schema)
    56  	}
    57  
    58  	r.base.read.init(r.base.file.schema, r.base.file.rowGroup)
    59  	r.read = readFuncOf[T](t, r.base.file.schema)
    60  	return r
    61  }
    62  
    63  func NewGenericRowGroupReader[T any](rowGroup RowGroup, options ...ReaderOption) *GenericReader[T] {
    64  	c, err := NewReaderConfig(options...)
    65  	if err != nil {
    66  		panic(err)
    67  	}
    68  
    69  	t := typeOf[T]()
    70  	if c.Schema == nil {
    71  		c.Schema = schemaOf(dereference(t))
    72  	}
    73  
    74  	r := &GenericReader[T]{
    75  		base: Reader{
    76  			file: reader{
    77  				schema:   c.Schema,
    78  				rowGroup: rowGroup,
    79  			},
    80  		},
    81  	}
    82  
    83  	if !nodesAreEqual(c.Schema, rowGroup.Schema()) {
    84  		r.base.file.rowGroup = convertRowGroupTo(r.base.file.rowGroup, c.Schema)
    85  	}
    86  
    87  	r.base.read.init(r.base.file.schema, r.base.file.rowGroup)
    88  	r.read = readFuncOf[T](t, r.base.file.schema)
    89  	return r
    90  }
    91  
    92  func (r *GenericReader[T]) Reset() {
    93  	r.base.Reset()
    94  }
    95  
    96  func (r *GenericReader[T]) Read(rows []T) (int, error) {
    97  	return r.read(r, rows)
    98  }
    99  
   100  func (r *GenericReader[T]) ReadRows(rows []Row) (int, error) {
   101  	return r.base.ReadRows(rows)
   102  }
   103  
   104  func (r *GenericReader[T]) Schema() *Schema {
   105  	return r.base.Schema()
   106  }
   107  
   108  func (r *GenericReader[T]) NumRows() int64 {
   109  	return r.base.NumRows()
   110  }
   111  
   112  func (r *GenericReader[T]) SeekToRow(rowIndex int64) error {
   113  	return r.base.SeekToRow(rowIndex)
   114  }
   115  
   116  func (r *GenericReader[T]) Close() error {
   117  	return r.base.Close()
   118  }
   119  
   120  func (r *GenericReader[T]) readRows(rows []T) (int, error) {
   121  	if cap(r.base.rowbuf) < len(rows) {
   122  		r.base.rowbuf = make([]Row, len(rows))
   123  	} else {
   124  		r.base.rowbuf = r.base.rowbuf[:len(rows)]
   125  	}
   126  
   127  	n, err := r.base.ReadRows(r.base.rowbuf)
   128  	if n > 0 {
   129  		schema := r.base.Schema()
   130  
   131  		for i, row := range r.base.rowbuf[:n] {
   132  			if err := schema.Reconstruct(&rows[i], row); err != nil {
   133  				return i, err
   134  			}
   135  		}
   136  	}
   137  	return n, err
   138  }
   139  
   140  var (
   141  	_ Rows                = (*GenericReader[any])(nil)
   142  	_ RowReaderWithSchema = (*Reader)(nil)
   143  
   144  	_ Rows                = (*GenericReader[struct{}])(nil)
   145  	_ RowReaderWithSchema = (*GenericReader[struct{}])(nil)
   146  
   147  	_ Rows                = (*GenericReader[map[struct{}]struct{}])(nil)
   148  	_ RowReaderWithSchema = (*GenericReader[map[struct{}]struct{}])(nil)
   149  )
   150  
   151  type readFunc[T any] func(*GenericReader[T], []T) (int, error)
   152  
   153  func readFuncOf[T any](t reflect.Type, schema *Schema) readFunc[T] {
   154  	switch t.Kind() {
   155  	case reflect.Interface, reflect.Map:
   156  		return (*GenericReader[T]).readRows
   157  
   158  	case reflect.Struct:
   159  		return (*GenericReader[T]).readRows
   160  
   161  	case reflect.Pointer:
   162  		if e := t.Elem(); e.Kind() == reflect.Struct {
   163  			return (*GenericReader[T]).readRows
   164  		}
   165  	}
   166  	panic("cannot create reader for values of type " + t.String())
   167  }