github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/col/coldata/vec.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  // Package coldata exposes utilities for handling columnarized data.
    12  package coldata
    13  
    14  import (
    15  	"fmt"
    16  
    17  	"github.com/cockroachdb/cockroachdb-parser/pkg/col/typeconv"
    18  	"github.com/cockroachdb/cockroachdb-parser/pkg/sql/types"
    19  )
    20  
    21  // Column is an interface that represents a raw array of a Go native type.
    22  type Column interface {
    23  	// Len returns the number of elements in the Column.
    24  	Len() int
    25  }
    26  
    27  // SliceArgs represents the arguments passed in to Vec.Append and Nulls.set.
    28  type SliceArgs struct {
    29  	// Src is the data being appended.
    30  	Src Vec
    31  	// Sel is an optional slice specifying indices to append to the destination
    32  	// slice. Note that Src{Start,End}Idx apply to Sel.
    33  	Sel []int
    34  	// DestIdx is the first index that Append will append to.
    35  	DestIdx int
    36  	// SrcStartIdx is the index of the first element in Src that Append will
    37  	// append.
    38  	SrcStartIdx int
    39  	// SrcEndIdx is the exclusive end index of Src. i.e. the element in the index
    40  	// before SrcEndIdx is the last element appended to the destination slice,
    41  	// similar to Src[SrcStartIdx:SrcEndIdx].
    42  	SrcEndIdx int
    43  }
    44  
    45  // Vec is an interface that represents a column vector that's accessible by
    46  // Go native types.
    47  type Vec interface {
    48  	// Type returns the type of data stored in this Vec. Consider whether
    49  	// CanonicalTypeFamily() should be used instead.
    50  	Type() *types.T
    51  	// CanonicalTypeFamily returns the canonical type family of data stored in
    52  	// this Vec.
    53  	CanonicalTypeFamily() types.Family
    54  
    55  	// Bool returns a bool list.
    56  	Bool() Bools
    57  	// Int16 returns an int16 slice.
    58  	Int16() Int16s
    59  	// Int32 returns an int32 slice.
    60  	Int32() Int32s
    61  	// Int64 returns an int64 slice.
    62  	Int64() Int64s
    63  	// Float64 returns a float64 slice.
    64  	Float64() Float64s
    65  	// Bytes returns a flat Bytes representation.
    66  	Bytes() *Bytes
    67  	// Decimal returns an apd.Decimal slice.
    68  	Decimal() Decimals
    69  	// Timestamp returns a time.Time slice.
    70  	Timestamp() Times
    71  	// Interval returns a duration.Duration slice.
    72  	Interval() Durations
    73  	// JSON returns a vector of JSONs.
    74  	JSON() *JSONs
    75  	// Datum returns a vector of Datums.
    76  	Datum() DatumVec
    77  
    78  	// Col returns the raw, typeless backing storage for this Vec.
    79  	Col() Column
    80  
    81  	// SetCol sets the member column (in the case of mutable columns).
    82  	SetCol(Column)
    83  
    84  	// TemplateType returns an []interface{} and is used for operator templates.
    85  	// Do not call this from normal code - it'll always panic.
    86  	TemplateType() []interface{}
    87  
    88  	// Append uses SliceArgs to append elements of a source Vec into this Vec.
    89  	// It is logically equivalent to:
    90  	// destVec = append(destVec[:args.DestIdx], args.Src[args.SrcStartIdx:args.SrcEndIdx])
    91  	// An optional Sel slice can also be provided to apply a filter on the source
    92  	// Vec.
    93  	// Refer to the SliceArgs comment for specifics and TestAppend for examples.
    94  	//
    95  	// Note: Append()'ing from a Vector into itself is not supported.
    96  	Append(SliceArgs)
    97  
    98  	// Copy uses SliceArgs to copy elements of a source Vec into this Vec. It is
    99  	// logically equivalent to:
   100  	// copy(destVec[args.DestIdx:], args.Src[args.SrcStartIdx:args.SrcEndIdx])
   101  	// An optional Sel slice can also be provided to apply a filter on the source
   102  	// Vec.
   103  	// Refer to the SliceArgs comment for specifics and TestCopy for examples.
   104  	Copy(SliceArgs)
   105  
   106  	// CopyWithReorderedSource copies a value at position order[sel[i]] in src
   107  	// into the receiver at position sel[i]. len(sel) elements are copied.
   108  	// Resulting values of elements not mentioned in sel are undefined after
   109  	// this function.
   110  	CopyWithReorderedSource(src Vec, sel, order []int)
   111  
   112  	// Window returns a "window" into the Vec. A "window" is similar to Golang's
   113  	// slice of the current Vec from [start, end), but the returned object is NOT
   114  	// allowed to be modified (the modification might result in an undefined
   115  	// behavior).
   116  	Window(start int, end int) Vec
   117  
   118  	// MaybeHasNulls returns true if the column possibly has any null values, and
   119  	// returns false if the column definitely has no null values.
   120  	MaybeHasNulls() bool
   121  
   122  	// Nulls returns the nulls vector for the column.
   123  	Nulls() *Nulls
   124  
   125  	// SetNulls sets the nulls vector for this column.
   126  	SetNulls(Nulls)
   127  
   128  	// Length returns the length of the slice that is underlying this Vec.
   129  	Length() int
   130  
   131  	// Capacity returns the capacity of the Golang's slice that is underlying
   132  	// this Vec. Note that if there is no "slice" (like in case of flat bytes),
   133  	// then "capacity" of such object is equal to the number of elements.
   134  	Capacity() int
   135  }
   136  
   137  var _ Vec = &memColumn{}
   138  
   139  // memColumn is a simple pass-through implementation of Vec that just casts
   140  // a generic interface{} to the proper type when requested.
   141  type memColumn struct {
   142  	t                   *types.T
   143  	canonicalTypeFamily types.Family
   144  	col                 Column
   145  	nulls               Nulls
   146  }
   147  
   148  // ColumnFactory is an interface that can construct columns for Batches.
   149  type ColumnFactory interface {
   150  	MakeColumn(t *types.T, length int) Column
   151  }
   152  
   153  type defaultColumnFactory struct{}
   154  
   155  // StandardColumnFactory is a factory that produces columns of types that are
   156  // explicitly supported by the vectorized engine (i.e. not datum-backed).
   157  var StandardColumnFactory ColumnFactory = &defaultColumnFactory{}
   158  
   159  func (cf *defaultColumnFactory) MakeColumn(t *types.T, length int) Column {
   160  	switch canonicalTypeFamily := typeconv.TypeFamilyToCanonicalTypeFamily(t.Family()); canonicalTypeFamily {
   161  	case types.BoolFamily:
   162  		return make(Bools, length)
   163  	case types.BytesFamily:
   164  		return NewBytes(length)
   165  	case types.IntFamily:
   166  		switch t.Width() {
   167  		case 16:
   168  			return make(Int16s, length)
   169  		case 32:
   170  			return make(Int32s, length)
   171  		case 0, 64:
   172  			return make(Int64s, length)
   173  		default:
   174  			panic(fmt.Sprintf("unexpected integer width: %d", t.Width()))
   175  		}
   176  	case types.FloatFamily:
   177  		return make(Float64s, length)
   178  	case types.DecimalFamily:
   179  		return make(Decimals, length)
   180  	case types.TimestampTZFamily:
   181  		return make(Times, length)
   182  	case types.IntervalFamily:
   183  		return make(Durations, length)
   184  	case types.JsonFamily:
   185  		return NewJSONs(length)
   186  	default:
   187  		panic(fmt.Sprintf("StandardColumnFactory doesn't support %s", t))
   188  	}
   189  }
   190  
   191  // NewMemColumn returns a new memColumn, initialized with a length using the
   192  // given column factory.
   193  func NewMemColumn(t *types.T, length int, factory ColumnFactory) Vec {
   194  	var m memColumn
   195  	m.init(t, length, factory)
   196  	return &m
   197  }
   198  
   199  // init initializes the receiver with a length using the given column factory.
   200  func (m *memColumn) init(t *types.T, length int, factory ColumnFactory) {
   201  	*m = memColumn{
   202  		t:                   t,
   203  		canonicalTypeFamily: typeconv.TypeFamilyToCanonicalTypeFamily(t.Family()),
   204  		col:                 factory.MakeColumn(t, length),
   205  		nulls:               NewNulls(length),
   206  	}
   207  }
   208  
   209  func (m *memColumn) Type() *types.T {
   210  	return m.t
   211  }
   212  
   213  func (m *memColumn) CanonicalTypeFamily() types.Family {
   214  	return m.canonicalTypeFamily
   215  }
   216  
   217  func (m *memColumn) SetCol(col Column) {
   218  	m.col = col
   219  }
   220  
   221  func (m *memColumn) Bool() Bools {
   222  	return m.col.(Bools)
   223  }
   224  
   225  func (m *memColumn) Int16() Int16s {
   226  	return m.col.(Int16s)
   227  }
   228  
   229  func (m *memColumn) Int32() Int32s {
   230  	return m.col.(Int32s)
   231  }
   232  
   233  func (m *memColumn) Int64() Int64s {
   234  	return m.col.(Int64s)
   235  }
   236  
   237  func (m *memColumn) Float64() Float64s {
   238  	return m.col.(Float64s)
   239  }
   240  
   241  func (m *memColumn) Bytes() *Bytes {
   242  	return m.col.(*Bytes)
   243  }
   244  
   245  func (m *memColumn) Decimal() Decimals {
   246  	return m.col.(Decimals)
   247  }
   248  
   249  func (m *memColumn) Timestamp() Times {
   250  	return m.col.(Times)
   251  }
   252  
   253  func (m *memColumn) Interval() Durations {
   254  	return m.col.(Durations)
   255  }
   256  
   257  func (m *memColumn) JSON() *JSONs {
   258  	return m.col.(*JSONs)
   259  }
   260  
   261  func (m *memColumn) Datum() DatumVec {
   262  	return m.col.(DatumVec)
   263  }
   264  
   265  func (m *memColumn) Col() Column {
   266  	return m.col
   267  }
   268  
   269  func (m *memColumn) TemplateType() []interface{} {
   270  	panic("don't call this from non template code")
   271  }
   272  
   273  func (m *memColumn) MaybeHasNulls() bool {
   274  	return m.nulls.maybeHasNulls
   275  }
   276  
   277  func (m *memColumn) Nulls() *Nulls {
   278  	return &m.nulls
   279  }
   280  
   281  func (m *memColumn) SetNulls(n Nulls) {
   282  	m.nulls = n
   283  }
   284  
   285  func (m *memColumn) Length() int {
   286  	return m.col.Len()
   287  }
   288  
   289  func (m *memColumn) Capacity() int {
   290  	switch m.CanonicalTypeFamily() {
   291  	case types.BoolFamily:
   292  		return cap(m.col.(Bools))
   293  	case types.BytesFamily:
   294  		return m.Bytes().Len()
   295  	case types.IntFamily:
   296  		switch m.t.Width() {
   297  		case 16:
   298  			return cap(m.col.(Int16s))
   299  		case 32:
   300  			return cap(m.col.(Int32s))
   301  		case 0, 64:
   302  			return cap(m.col.(Int64s))
   303  		default:
   304  			panic(fmt.Sprintf("unexpected int width: %d", m.t.Width()))
   305  		}
   306  	case types.FloatFamily:
   307  		return cap(m.col.(Float64s))
   308  	case types.DecimalFamily:
   309  		return cap(m.col.(Decimals))
   310  	case types.TimestampTZFamily:
   311  		return cap(m.col.(Times))
   312  	case types.IntervalFamily:
   313  		return cap(m.col.(Durations))
   314  	case types.JsonFamily:
   315  		return m.JSON().Len()
   316  	case typeconv.DatumVecCanonicalTypeFamily:
   317  		return m.col.(DatumVec).Cap()
   318  	default:
   319  		panic(fmt.Sprintf("unhandled type %s", m.t))
   320  	}
   321  }