github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/buffer_go18.go (about) 1 //go:build go1.18 2 3 package parquet 4 5 import ( 6 "reflect" 7 "sort" 8 ) 9 10 // GenericBuffer is similar to a Buffer but uses a type parameter to define the 11 // Go type representing the schema of rows in the buffer. 12 // 13 // See GenericWriter for details about the benefits over the classic Buffer API. 14 type GenericBuffer[T any] struct { 15 base Buffer 16 write bufferFunc[T] 17 } 18 19 // NewGenericBuffer is like NewBuffer but returns a GenericBuffer[T] suited to write 20 // rows of Go type T. 21 // 22 // The type parameter T should be a map, struct, or any. Any other types will 23 // cause a panic at runtime. Type checking is a lot more effective when the 24 // generic parameter is a struct type, using map and interface types is somewhat 25 // similar to using a Writer. 26 // 27 // If the option list may explicitly declare a schema, it must be compatible 28 // with the schema generated from T. 29 func NewGenericBuffer[T any](options ...RowGroupOption) *GenericBuffer[T] { 30 config, err := NewRowGroupConfig(options...) 31 if err != nil { 32 panic(err) 33 } 34 35 t := typeOf[T]() 36 if config.Schema == nil { 37 config.Schema = schemaOf(dereference(t)) 38 } 39 40 buf := &GenericBuffer[T]{ 41 base: Buffer{config: config}, 42 } 43 buf.base.configure(config.Schema) 44 buf.write = bufferFuncOf[T](t, config.Schema) 45 return buf 46 } 47 48 func typeOf[T any]() reflect.Type { 49 var v T 50 return reflect.TypeOf(v) 51 } 52 53 type bufferFunc[T any] func(*GenericBuffer[T], []T) (int, error) 54 55 func bufferFuncOf[T any](t reflect.Type, schema *Schema) bufferFunc[T] { 56 switch t.Kind() { 57 case reflect.Interface, reflect.Map: 58 return (*GenericBuffer[T]).writeRows 59 60 case reflect.Struct: 61 return makeBufferFunc[T](t, schema) 62 63 case reflect.Pointer: 64 if e := t.Elem(); e.Kind() == reflect.Struct { 65 return makeBufferFunc[T](t, schema) 66 } 67 } 68 panic("cannot create buffer for values of type " + t.String()) 69 } 70 71 func makeBufferFunc[T any](t reflect.Type, schema *Schema) bufferFunc[T] { 72 writeRows := writeRowsFuncOf(t, schema, nil) 73 return func(buf *GenericBuffer[T], rows []T) (n int, err error) { 74 err = writeRows(buf.base.columns, makeArrayOf(rows), columnLevels{}) 75 if err == nil { 76 n = len(rows) 77 } 78 return n, err 79 } 80 } 81 82 func (buf *GenericBuffer[T]) Size() int64 { 83 return buf.base.Size() 84 } 85 86 func (buf *GenericBuffer[T]) NumRows() int64 { 87 return buf.base.NumRows() 88 } 89 90 func (buf *GenericBuffer[T]) ColumnChunks() []ColumnChunk { 91 return buf.base.ColumnChunks() 92 } 93 94 func (buf *GenericBuffer[T]) ColumnBuffers() []ColumnBuffer { 95 return buf.base.ColumnBuffers() 96 } 97 98 func (buf *GenericBuffer[T]) SortingColumns() []SortingColumn { 99 return buf.base.SortingColumns() 100 } 101 102 func (buf *GenericBuffer[T]) Len() int { 103 return buf.base.Len() 104 } 105 106 func (buf *GenericBuffer[T]) Less(i, j int) bool { 107 return buf.base.Less(i, j) 108 } 109 110 func (buf *GenericBuffer[T]) Swap(i, j int) { 111 buf.base.Swap(i, j) 112 } 113 114 func (buf *GenericBuffer[T]) Reset() { 115 buf.base.Reset() 116 } 117 118 func (buf *GenericBuffer[T]) Write(rows []T) (int, error) { 119 if len(rows) == 0 { 120 return 0, nil 121 } 122 return buf.write(buf, rows) 123 } 124 125 func (buf *GenericBuffer[T]) WriteRows(rows []Row) (int, error) { 126 return buf.base.WriteRows(rows) 127 } 128 129 func (buf *GenericBuffer[T]) WriteRowGroup(rowGroup RowGroup) (int64, error) { 130 return buf.base.WriteRowGroup(rowGroup) 131 } 132 133 func (buf *GenericBuffer[T]) Rows() Rows { 134 return buf.base.Rows() 135 } 136 137 func (buf *GenericBuffer[T]) Schema() *Schema { 138 return buf.base.Schema() 139 } 140 141 func (buf *GenericBuffer[T]) writeRows(rows []T) (int, error) { 142 if cap(buf.base.rowbuf) < len(rows) { 143 buf.base.rowbuf = make([]Row, len(rows)) 144 } else { 145 buf.base.rowbuf = buf.base.rowbuf[:len(rows)] 146 } 147 defer clearRows(buf.base.rowbuf) 148 149 schema := buf.base.Schema() 150 for i := range rows { 151 buf.base.rowbuf[i] = schema.Deconstruct(buf.base.rowbuf[i], &rows[i]) 152 } 153 154 return buf.base.WriteRows(buf.base.rowbuf) 155 } 156 157 var ( 158 _ RowGroup = (*GenericBuffer[any])(nil) 159 _ RowGroupWriter = (*GenericBuffer[any])(nil) 160 _ sort.Interface = (*GenericBuffer[any])(nil) 161 162 _ RowGroup = (*GenericBuffer[struct{}])(nil) 163 _ RowGroupWriter = (*GenericBuffer[struct{}])(nil) 164 _ sort.Interface = (*GenericBuffer[struct{}])(nil) 165 166 _ RowGroup = (*GenericBuffer[map[struct{}]struct{}])(nil) 167 _ RowGroupWriter = (*GenericBuffer[map[struct{}]struct{}])(nil) 168 _ sort.Interface = (*GenericBuffer[map[struct{}]struct{}])(nil) 169 )