github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/buffer_go18.go (about)

     1  //go:build go1.18
     2  
     3  package parquet
     4  
     5  import (
     6  	"reflect"
     7  	"sort"
     8  )
     9  
    10  // GenericBuffer is similar to a Buffer but uses a type parameter to define the
    11  // Go type representing the schema of rows in the buffer.
    12  //
    13  // See GenericWriter for details about the benefits over the classic Buffer API.
    14  type GenericBuffer[T any] struct {
    15  	base  Buffer
    16  	write bufferFunc[T]
    17  }
    18  
    19  // NewGenericBuffer is like NewBuffer but returns a GenericBuffer[T] suited to write
    20  // rows of Go type T.
    21  //
    22  // The type parameter T should be a map, struct, or any. Any other types will
    23  // cause a panic at runtime. Type checking is a lot more effective when the
    24  // generic parameter is a struct type, using map and interface types is somewhat
    25  // similar to using a Writer.  If using an interface type for the type parameter,
    26  // then providing a schema at instantiation is required.
    27  //
    28  // If the option list may explicitly declare a schema, it must be compatible
    29  // with the schema generated from T.
    30  func NewGenericBuffer[T any](options ...RowGroupOption) *GenericBuffer[T] {
    31  	config, err := NewRowGroupConfig(options...)
    32  	if err != nil {
    33  		panic(err)
    34  	}
    35  
    36  	t := typeOf[T]()
    37  	if config.Schema == nil && t != nil {
    38  		config.Schema = schemaOf(dereference(t))
    39  	}
    40  
    41  	if config.Schema == nil {
    42  		panic("generic buffer must be instantiated with schema or concrete type.")
    43  	}
    44  
    45  	buf := &GenericBuffer[T]{
    46  		base: Buffer{config: config},
    47  	}
    48  	buf.base.configure(config.Schema)
    49  	buf.write = bufferFuncOf[T](t, config.Schema)
    50  	return buf
    51  }
    52  
    53  func typeOf[T any]() reflect.Type {
    54  	var v T
    55  	return reflect.TypeOf(v)
    56  }
    57  
    58  type bufferFunc[T any] func(*GenericBuffer[T], []T) (int, error)
    59  
    60  func bufferFuncOf[T any](t reflect.Type, schema *Schema) bufferFunc[T] {
    61  	if t == nil {
    62  		return (*GenericBuffer[T]).writeRows
    63  	}
    64  	switch t.Kind() {
    65  	case reflect.Interface, reflect.Map:
    66  		return (*GenericBuffer[T]).writeRows
    67  
    68  	case reflect.Struct:
    69  		return makeBufferFunc[T](t, schema)
    70  
    71  	case reflect.Pointer:
    72  		if e := t.Elem(); e.Kind() == reflect.Struct {
    73  			return makeBufferFunc[T](t, schema)
    74  		}
    75  	}
    76  	panic("cannot create buffer for values of type " + t.String())
    77  }
    78  
    79  func makeBufferFunc[T any](t reflect.Type, schema *Schema) bufferFunc[T] {
    80  	writeRows := writeRowsFuncOf(t, schema, nil)
    81  	return func(buf *GenericBuffer[T], rows []T) (n int, err error) {
    82  		err = writeRows(buf.base.columns, makeArrayOf(rows), columnLevels{})
    83  		if err == nil {
    84  			n = len(rows)
    85  		}
    86  		return n, err
    87  	}
    88  }
    89  
    90  func (buf *GenericBuffer[T]) Size() int64 {
    91  	return buf.base.Size()
    92  }
    93  
    94  func (buf *GenericBuffer[T]) NumRows() int64 {
    95  	return buf.base.NumRows()
    96  }
    97  
    98  func (buf *GenericBuffer[T]) ColumnChunks() []ColumnChunk {
    99  	return buf.base.ColumnChunks()
   100  }
   101  
   102  func (buf *GenericBuffer[T]) ColumnBuffers() []ColumnBuffer {
   103  	return buf.base.ColumnBuffers()
   104  }
   105  
   106  func (buf *GenericBuffer[T]) SortingColumns() []SortingColumn {
   107  	return buf.base.SortingColumns()
   108  }
   109  
   110  func (buf *GenericBuffer[T]) Len() int {
   111  	return buf.base.Len()
   112  }
   113  
   114  func (buf *GenericBuffer[T]) Less(i, j int) bool {
   115  	return buf.base.Less(i, j)
   116  }
   117  
   118  func (buf *GenericBuffer[T]) Swap(i, j int) {
   119  	buf.base.Swap(i, j)
   120  }
   121  
   122  func (buf *GenericBuffer[T]) Reset() {
   123  	buf.base.Reset()
   124  }
   125  
   126  func (buf *GenericBuffer[T]) Write(rows []T) (int, error) {
   127  	if len(rows) == 0 {
   128  		return 0, nil
   129  	}
   130  	return buf.write(buf, rows)
   131  }
   132  
   133  func (buf *GenericBuffer[T]) WriteRows(rows []Row) (int, error) {
   134  	return buf.base.WriteRows(rows)
   135  }
   136  
   137  func (buf *GenericBuffer[T]) WriteRowGroup(rowGroup RowGroup) (int64, error) {
   138  	return buf.base.WriteRowGroup(rowGroup)
   139  }
   140  
   141  func (buf *GenericBuffer[T]) Rows() Rows {
   142  	return buf.base.Rows()
   143  }
   144  
   145  func (buf *GenericBuffer[T]) Schema() *Schema {
   146  	return buf.base.Schema()
   147  }
   148  
   149  func (buf *GenericBuffer[T]) writeRows(rows []T) (int, error) {
   150  	if cap(buf.base.rowbuf) < len(rows) {
   151  		buf.base.rowbuf = make([]Row, len(rows))
   152  	} else {
   153  		buf.base.rowbuf = buf.base.rowbuf[:len(rows)]
   154  	}
   155  	defer clearRows(buf.base.rowbuf)
   156  
   157  	schema := buf.base.Schema()
   158  	for i := range rows {
   159  		buf.base.rowbuf[i] = schema.Deconstruct(buf.base.rowbuf[i], &rows[i])
   160  	}
   161  
   162  	return buf.base.WriteRows(buf.base.rowbuf)
   163  }
   164  
   165  var (
   166  	_ RowGroup       = (*GenericBuffer[any])(nil)
   167  	_ RowGroupWriter = (*GenericBuffer[any])(nil)
   168  	_ sort.Interface = (*GenericBuffer[any])(nil)
   169  
   170  	_ RowGroup       = (*GenericBuffer[struct{}])(nil)
   171  	_ RowGroupWriter = (*GenericBuffer[struct{}])(nil)
   172  	_ sort.Interface = (*GenericBuffer[struct{}])(nil)
   173  
   174  	_ RowGroup       = (*GenericBuffer[map[struct{}]struct{}])(nil)
   175  	_ RowGroupWriter = (*GenericBuffer[map[struct{}]struct{}])(nil)
   176  	_ sort.Interface = (*GenericBuffer[map[struct{}]struct{}])(nil)
   177  )