github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/buffer_go18.go (about)

     1  //go:build go1.18
     2  
     3  package parquet
     4  
     5  import (
     6  	"reflect"
     7  	"sort"
     8  )
     9  
    10  // GenericBuffer is similar to a Buffer but uses a type parameter to define the
    11  // Go type representing the schema of rows in the buffer.
    12  //
    13  // See GenericWriter for details about the benefits over the classic Buffer API.
    14  type GenericBuffer[T any] struct {
    15  	base  Buffer
    16  	write bufferFunc[T]
    17  }
    18  
    19  // NewGenericBuffer is like NewBuffer but returns a GenericBuffer[T] suited to write
    20  // rows of Go type T.
    21  //
    22  // The type parameter T should be a map, struct, or any. Any other types will
    23  // cause a panic at runtime. Type checking is a lot more effective when the
    24  // generic parameter is a struct type, using map and interface types is somewhat
    25  // similar to using a Writer.
    26  //
    27  // If the option list may explicitly declare a schema, it must be compatible
    28  // with the schema generated from T.
    29  func NewGenericBuffer[T any](options ...RowGroupOption) *GenericBuffer[T] {
    30  	config, err := NewRowGroupConfig(options...)
    31  	if err != nil {
    32  		panic(err)
    33  	}
    34  
    35  	t := typeOf[T]()
    36  	if config.Schema == nil {
    37  		config.Schema = schemaOf(dereference(t))
    38  	}
    39  
    40  	buf := &GenericBuffer[T]{
    41  		base: Buffer{config: config},
    42  	}
    43  	buf.base.configure(config.Schema)
    44  	buf.write = bufferFuncOf[T](t, config.Schema)
    45  	return buf
    46  }
    47  
    48  func typeOf[T any]() reflect.Type {
    49  	var v T
    50  	return reflect.TypeOf(v)
    51  }
    52  
    53  type bufferFunc[T any] func(*GenericBuffer[T], []T) (int, error)
    54  
    55  func bufferFuncOf[T any](t reflect.Type, schema *Schema) bufferFunc[T] {
    56  	switch t.Kind() {
    57  	case reflect.Interface, reflect.Map:
    58  		return (*GenericBuffer[T]).writeRows
    59  
    60  	case reflect.Struct:
    61  		return makeBufferFunc[T](t, schema)
    62  
    63  	case reflect.Pointer:
    64  		if e := t.Elem(); e.Kind() == reflect.Struct {
    65  			return makeBufferFunc[T](t, schema)
    66  		}
    67  	}
    68  	panic("cannot create buffer for values of type " + t.String())
    69  }
    70  
    71  func makeBufferFunc[T any](t reflect.Type, schema *Schema) bufferFunc[T] {
    72  	writeRows := writeRowsFuncOf(t, schema, nil)
    73  	return func(buf *GenericBuffer[T], rows []T) (n int, err error) {
    74  		err = writeRows(buf.base.columns, makeArrayOf(rows), columnLevels{})
    75  		if err == nil {
    76  			n = len(rows)
    77  		}
    78  		return n, err
    79  	}
    80  }
    81  
    82  func (buf *GenericBuffer[T]) Size() int64 {
    83  	return buf.base.Size()
    84  }
    85  
    86  func (buf *GenericBuffer[T]) NumRows() int64 {
    87  	return buf.base.NumRows()
    88  }
    89  
    90  func (buf *GenericBuffer[T]) ColumnChunks() []ColumnChunk {
    91  	return buf.base.ColumnChunks()
    92  }
    93  
    94  func (buf *GenericBuffer[T]) ColumnBuffers() []ColumnBuffer {
    95  	return buf.base.ColumnBuffers()
    96  }
    97  
    98  func (buf *GenericBuffer[T]) SortingColumns() []SortingColumn {
    99  	return buf.base.SortingColumns()
   100  }
   101  
   102  func (buf *GenericBuffer[T]) Len() int {
   103  	return buf.base.Len()
   104  }
   105  
   106  func (buf *GenericBuffer[T]) Less(i, j int) bool {
   107  	return buf.base.Less(i, j)
   108  }
   109  
   110  func (buf *GenericBuffer[T]) Swap(i, j int) {
   111  	buf.base.Swap(i, j)
   112  }
   113  
   114  func (buf *GenericBuffer[T]) Reset() {
   115  	buf.base.Reset()
   116  }
   117  
   118  func (buf *GenericBuffer[T]) Write(rows []T) (int, error) {
   119  	if len(rows) == 0 {
   120  		return 0, nil
   121  	}
   122  	return buf.write(buf, rows)
   123  }
   124  
   125  func (buf *GenericBuffer[T]) WriteRows(rows []Row) (int, error) {
   126  	return buf.base.WriteRows(rows)
   127  }
   128  
   129  func (buf *GenericBuffer[T]) WriteRowGroup(rowGroup RowGroup) (int64, error) {
   130  	return buf.base.WriteRowGroup(rowGroup)
   131  }
   132  
   133  func (buf *GenericBuffer[T]) Rows() Rows {
   134  	return buf.base.Rows()
   135  }
   136  
   137  func (buf *GenericBuffer[T]) Schema() *Schema {
   138  	return buf.base.Schema()
   139  }
   140  
   141  func (buf *GenericBuffer[T]) writeRows(rows []T) (int, error) {
   142  	if cap(buf.base.rowbuf) < len(rows) {
   143  		buf.base.rowbuf = make([]Row, len(rows))
   144  	} else {
   145  		buf.base.rowbuf = buf.base.rowbuf[:len(rows)]
   146  	}
   147  	defer clearRows(buf.base.rowbuf)
   148  
   149  	schema := buf.base.Schema()
   150  	for i := range rows {
   151  		buf.base.rowbuf[i] = schema.Deconstruct(buf.base.rowbuf[i], &rows[i])
   152  	}
   153  
   154  	return buf.base.WriteRows(buf.base.rowbuf)
   155  }
   156  
   157  var (
   158  	_ RowGroup       = (*GenericBuffer[any])(nil)
   159  	_ RowGroupWriter = (*GenericBuffer[any])(nil)
   160  	_ sort.Interface = (*GenericBuffer[any])(nil)
   161  
   162  	_ RowGroup       = (*GenericBuffer[struct{}])(nil)
   163  	_ RowGroupWriter = (*GenericBuffer[struct{}])(nil)
   164  	_ sort.Interface = (*GenericBuffer[struct{}])(nil)
   165  
   166  	_ RowGroup       = (*GenericBuffer[map[struct{}]struct{}])(nil)
   167  	_ RowGroupWriter = (*GenericBuffer[map[struct{}]struct{}])(nil)
   168  	_ sort.Interface = (*GenericBuffer[map[struct{}]struct{}])(nil)
   169  )