github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/reader_go18.go (about) 1 //go:build go1.18 2 3 package parquet 4 5 import ( 6 "io" 7 "reflect" 8 ) 9 10 // GenericReader is similar to a Reader but uses a type parameter to define the 11 // Go type representing the schema of rows being read. 12 // 13 // See GenericWriter for details about the benefits over the classic Reader API. 14 type GenericReader[T any] struct { 15 base Reader 16 read readFunc[T] 17 } 18 19 // NewGenericReader is like NewReader but returns GenericReader[T] suited to write 20 // rows of Go type T. 21 // 22 // The type parameter T should be a map, struct, or any. Any other types will 23 // cause a panic at runtime. Type checking is a lot more effective when the 24 // generic parameter is a struct type, using map and interface types is somewhat 25 // similar to using a Writer. 26 // 27 // If the option list may explicitly declare a schema, it must be compatible 28 // with the schema generated from T. 29 func NewGenericReader[T any](input io.ReaderAt, options ...ReaderOption) *GenericReader[T] { 30 c, err := NewReaderConfig(options...) 31 if err != nil { 32 panic(err) 33 } 34 35 t := typeOf[T]() 36 if c.Schema == nil { 37 c.Schema = schemaOf(dereference(t)) 38 } 39 40 f, err := openFile(input) 41 if err != nil { 42 panic(err) 43 } 44 45 r := &GenericReader[T]{ 46 base: Reader{ 47 file: reader{ 48 schema: c.Schema, 49 rowGroup: fileRowGroupOf(f), 50 }, 51 }, 52 } 53 54 if !nodesAreEqual(c.Schema, f.schema) { 55 r.base.file.rowGroup = convertRowGroupTo(r.base.file.rowGroup, c.Schema) 56 } 57 58 r.base.read.init(r.base.file.schema, r.base.file.rowGroup) 59 r.read = readFuncOf[T](t, r.base.file.schema) 60 return r 61 } 62 63 func NewGenericRowGroupReader[T any](rowGroup RowGroup, options ...ReaderOption) *GenericReader[T] { 64 c, err := NewReaderConfig(options...) 65 if err != nil { 66 panic(err) 67 } 68 69 t := typeOf[T]() 70 if c.Schema == nil { 71 c.Schema = schemaOf(dereference(t)) 72 } 73 74 r := &GenericReader[T]{ 75 base: Reader{ 76 file: reader{ 77 schema: c.Schema, 78 rowGroup: rowGroup, 79 }, 80 }, 81 } 82 83 if !nodesAreEqual(c.Schema, rowGroup.Schema()) { 84 r.base.file.rowGroup = convertRowGroupTo(r.base.file.rowGroup, c.Schema) 85 } 86 87 r.base.read.init(r.base.file.schema, r.base.file.rowGroup) 88 r.read = readFuncOf[T](t, r.base.file.schema) 89 return r 90 } 91 92 func (r *GenericReader[T]) Reset() { 93 r.base.Reset() 94 } 95 96 func (r *GenericReader[T]) Read(rows []T) (int, error) { 97 return r.read(r, rows) 98 } 99 100 func (r *GenericReader[T]) ReadRows(rows []Row) (int, error) { 101 return r.base.ReadRows(rows) 102 } 103 104 func (r *GenericReader[T]) Schema() *Schema { 105 return r.base.Schema() 106 } 107 108 func (r *GenericReader[T]) NumRows() int64 { 109 return r.base.NumRows() 110 } 111 112 func (r *GenericReader[T]) SeekToRow(rowIndex int64) error { 113 return r.base.SeekToRow(rowIndex) 114 } 115 116 func (r *GenericReader[T]) Close() error { 117 return r.base.Close() 118 } 119 120 func (r *GenericReader[T]) readRows(rows []T) (int, error) { 121 if cap(r.base.rowbuf) < len(rows) { 122 r.base.rowbuf = make([]Row, len(rows)) 123 } else { 124 r.base.rowbuf = r.base.rowbuf[:len(rows)] 125 } 126 127 n, err := r.base.ReadRows(r.base.rowbuf) 128 if n > 0 { 129 schema := r.base.Schema() 130 131 for i, row := range r.base.rowbuf[:n] { 132 if err := schema.Reconstruct(&rows[i], row); err != nil { 133 return i, err 134 } 135 } 136 } 137 return n, err 138 } 139 140 var ( 141 _ Rows = (*GenericReader[any])(nil) 142 _ RowReaderWithSchema = (*Reader)(nil) 143 144 _ Rows = (*GenericReader[struct{}])(nil) 145 _ RowReaderWithSchema = (*GenericReader[struct{}])(nil) 146 147 _ Rows = (*GenericReader[map[struct{}]struct{}])(nil) 148 _ RowReaderWithSchema = (*GenericReader[map[struct{}]struct{}])(nil) 149 ) 150 151 type readFunc[T any] func(*GenericReader[T], []T) (int, error) 152 153 func readFuncOf[T any](t reflect.Type, schema *Schema) readFunc[T] { 154 switch t.Kind() { 155 case reflect.Interface, reflect.Map: 156 return (*GenericReader[T]).readRows 157 158 case reflect.Struct: 159 return (*GenericReader[T]).readRows 160 161 case reflect.Pointer: 162 if e := t.Elem(); e.Kind() == reflect.Struct { 163 return (*GenericReader[T]).readRows 164 } 165 } 166 panic("cannot create reader for values of type " + t.String()) 167 }