github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/buffer_go18.go (about) 1 //go:build go1.18 2 3 package parquet 4 5 import ( 6 "reflect" 7 "sort" 8 ) 9 10 // GenericBuffer is similar to a Buffer but uses a type parameter to define the 11 // Go type representing the schema of rows in the buffer. 12 // 13 // See GenericWriter for details about the benefits over the classic Buffer API. 14 type GenericBuffer[T any] struct { 15 base Buffer 16 write bufferFunc[T] 17 } 18 19 // NewGenericBuffer is like NewBuffer but returns a GenericBuffer[T] suited to write 20 // rows of Go type T. 21 // 22 // The type parameter T should be a map, struct, or any. Any other types will 23 // cause a panic at runtime. Type checking is a lot more effective when the 24 // generic parameter is a struct type, using map and interface types is somewhat 25 // similar to using a Writer. If using an interface type for the type parameter, 26 // then providing a schema at instantiation is required. 27 // 28 // If the option list may explicitly declare a schema, it must be compatible 29 // with the schema generated from T. 30 func NewGenericBuffer[T any](options ...RowGroupOption) *GenericBuffer[T] { 31 config, err := NewRowGroupConfig(options...) 32 if err != nil { 33 panic(err) 34 } 35 36 t := typeOf[T]() 37 if config.Schema == nil && t != nil { 38 config.Schema = schemaOf(dereference(t)) 39 } 40 41 if config.Schema == nil { 42 panic("generic buffer must be instantiated with schema or concrete type.") 43 } 44 45 buf := &GenericBuffer[T]{ 46 base: Buffer{config: config}, 47 } 48 buf.base.configure(config.Schema) 49 buf.write = bufferFuncOf[T](t, config.Schema) 50 return buf 51 } 52 53 func typeOf[T any]() reflect.Type { 54 var v T 55 return reflect.TypeOf(v) 56 } 57 58 type bufferFunc[T any] func(*GenericBuffer[T], []T) (int, error) 59 60 func bufferFuncOf[T any](t reflect.Type, schema *Schema) bufferFunc[T] { 61 if t == nil { 62 return (*GenericBuffer[T]).writeRows 63 } 64 switch t.Kind() { 65 case reflect.Interface, reflect.Map: 66 return (*GenericBuffer[T]).writeRows 67 68 case reflect.Struct: 69 return makeBufferFunc[T](t, schema) 70 71 case reflect.Pointer: 72 if e := t.Elem(); e.Kind() == reflect.Struct { 73 return makeBufferFunc[T](t, schema) 74 } 75 } 76 panic("cannot create buffer for values of type " + t.String()) 77 } 78 79 func makeBufferFunc[T any](t reflect.Type, schema *Schema) bufferFunc[T] { 80 writeRows := writeRowsFuncOf(t, schema, nil) 81 return func(buf *GenericBuffer[T], rows []T) (n int, err error) { 82 err = writeRows(buf.base.columns, makeArrayOf(rows), columnLevels{}) 83 if err == nil { 84 n = len(rows) 85 } 86 return n, err 87 } 88 } 89 90 func (buf *GenericBuffer[T]) Size() int64 { 91 return buf.base.Size() 92 } 93 94 func (buf *GenericBuffer[T]) NumRows() int64 { 95 return buf.base.NumRows() 96 } 97 98 func (buf *GenericBuffer[T]) ColumnChunks() []ColumnChunk { 99 return buf.base.ColumnChunks() 100 } 101 102 func (buf *GenericBuffer[T]) ColumnBuffers() []ColumnBuffer { 103 return buf.base.ColumnBuffers() 104 } 105 106 func (buf *GenericBuffer[T]) SortingColumns() []SortingColumn { 107 return buf.base.SortingColumns() 108 } 109 110 func (buf *GenericBuffer[T]) Len() int { 111 return buf.base.Len() 112 } 113 114 func (buf *GenericBuffer[T]) Less(i, j int) bool { 115 return buf.base.Less(i, j) 116 } 117 118 func (buf *GenericBuffer[T]) Swap(i, j int) { 119 buf.base.Swap(i, j) 120 } 121 122 func (buf *GenericBuffer[T]) Reset() { 123 buf.base.Reset() 124 } 125 126 func (buf *GenericBuffer[T]) Write(rows []T) (int, error) { 127 if len(rows) == 0 { 128 return 0, nil 129 } 130 return buf.write(buf, rows) 131 } 132 133 func (buf *GenericBuffer[T]) WriteRows(rows []Row) (int, error) { 134 return buf.base.WriteRows(rows) 135 } 136 137 func (buf *GenericBuffer[T]) WriteRowGroup(rowGroup RowGroup) (int64, error) { 138 return buf.base.WriteRowGroup(rowGroup) 139 } 140 141 func (buf *GenericBuffer[T]) Rows() Rows { 142 return buf.base.Rows() 143 } 144 145 func (buf *GenericBuffer[T]) Schema() *Schema { 146 return buf.base.Schema() 147 } 148 149 func (buf *GenericBuffer[T]) writeRows(rows []T) (int, error) { 150 if cap(buf.base.rowbuf) < len(rows) { 151 buf.base.rowbuf = make([]Row, len(rows)) 152 } else { 153 buf.base.rowbuf = buf.base.rowbuf[:len(rows)] 154 } 155 defer clearRows(buf.base.rowbuf) 156 157 schema := buf.base.Schema() 158 for i := range rows { 159 buf.base.rowbuf[i] = schema.Deconstruct(buf.base.rowbuf[i], &rows[i]) 160 } 161 162 return buf.base.WriteRows(buf.base.rowbuf) 163 } 164 165 var ( 166 _ RowGroup = (*GenericBuffer[any])(nil) 167 _ RowGroupWriter = (*GenericBuffer[any])(nil) 168 _ sort.Interface = (*GenericBuffer[any])(nil) 169 170 _ RowGroup = (*GenericBuffer[struct{}])(nil) 171 _ RowGroupWriter = (*GenericBuffer[struct{}])(nil) 172 _ sort.Interface = (*GenericBuffer[struct{}])(nil) 173 174 _ RowGroup = (*GenericBuffer[map[struct{}]struct{}])(nil) 175 _ RowGroupWriter = (*GenericBuffer[map[struct{}]struct{}])(nil) 176 _ sort.Interface = (*GenericBuffer[map[struct{}]struct{}])(nil) 177 )