github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/col/coldata/vec.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package coldata
    12  
    13  import (
    14  	"fmt"
    15  	"time"
    16  
    17  	"github.com/cockroachdb/apd"
    18  	"github.com/cockroachdb/cockroach/pkg/col/typeconv"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    20  	"github.com/cockroachdb/cockroach/pkg/util/duration"
    21  )
    22  
    23  // Column is an interface that represents a raw array of a Go native type.
    24  type Column interface{}
    25  
    26  // SliceArgs represents the arguments passed in to Vec.Append and Nulls.set.
    27  type SliceArgs struct {
    28  	// Src is the data being appended.
    29  	Src Vec
    30  	// Sel is an optional slice specifying indices to append to the destination
    31  	// slice. Note that Src{Start,End}Idx apply to Sel.
    32  	Sel []int
    33  	// DestIdx is the first index that Append will append to.
    34  	DestIdx int
    35  	// SrcStartIdx is the index of the first element in Src that Append will
    36  	// append.
    37  	SrcStartIdx int
    38  	// SrcEndIdx is the exclusive end index of Src. i.e. the element in the index
    39  	// before SrcEndIdx is the last element appended to the destination slice,
    40  	// similar to Src[SrcStartIdx:SrcEndIdx].
    41  	SrcEndIdx int
    42  }
    43  
    44  // CopySliceArgs represents the extension of SliceArgs that is passed in to
    45  // Vec.Copy.
    46  type CopySliceArgs struct {
    47  	SliceArgs
    48  	// SelOnDest, if true, uses the selection vector as a lens into the
    49  	// destination as well as the source. Normally, when SelOnDest is false, the
    50  	// selection vector is applied to the source vector, but the results are
    51  	// copied densely into the destination vector.
    52  	SelOnDest bool
    53  }
    54  
    55  // Vec is an interface that represents a column vector that's accessible by
    56  // Go native types.
    57  type Vec interface {
    58  	// Type returns the type of data stored in this Vec. Consider whether
    59  	// CanonicalTypeFamily() should be used instead.
    60  	Type() *types.T
    61  	// CanonicalTypeFamily returns the canonical type family of data stored in
    62  	// this Vec.
    63  	CanonicalTypeFamily() types.Family
    64  
    65  	// TODO(jordan): is a bitmap or slice of bools better?
    66  	// Bool returns a bool list.
    67  	Bool() []bool
    68  	// Int16 returns an int16 slice.
    69  	Int16() []int16
    70  	// Int32 returns an int32 slice.
    71  	Int32() []int32
    72  	// Int64 returns an int64 slice.
    73  	Int64() []int64
    74  	// Float64 returns a float64 slice.
    75  	Float64() []float64
    76  	// Bytes returns a flat Bytes representation.
    77  	Bytes() *Bytes
    78  	// TODO(jordan): should this be [][]byte?
    79  	// Decimal returns an apd.Decimal slice.
    80  	Decimal() []apd.Decimal
    81  	// Timestamp returns a time.Time slice.
    82  	Timestamp() []time.Time
    83  	// Interval returns a duration.Duration slice.
    84  	Interval() []duration.Duration
    85  	// Datum returns a vector of Datums.
    86  	Datum() DatumVec
    87  
    88  	// Col returns the raw, typeless backing storage for this Vec.
    89  	Col() interface{}
    90  
    91  	// SetCol sets the member column (in the case of mutable columns).
    92  	SetCol(interface{})
    93  
    94  	// TemplateType returns an []interface{} and is used for operator templates.
    95  	// Do not call this from normal code - it'll always panic.
    96  	TemplateType() []interface{}
    97  
    98  	// Append uses SliceArgs to append elements of a source Vec into this Vec.
    99  	// It is logically equivalent to:
   100  	// destVec = append(destVec[:args.DestIdx], args.Src[args.SrcStartIdx:args.SrcEndIdx])
   101  	// An optional Sel slice can also be provided to apply a filter on the source
   102  	// Vec.
   103  	// Refer to the SliceArgs comment for specifics and TestAppend for examples.
   104  	Append(SliceArgs)
   105  
   106  	// Copy uses CopySliceArgs to copy elements of a source Vec into this Vec. It is
   107  	// logically equivalent to:
   108  	// copy(destVec[args.DestIdx:], args.Src[args.SrcStartIdx:args.SrcEndIdx])
   109  	// An optional Sel slice can also be provided to apply a filter on the source
   110  	// Vec.
   111  	// Refer to the CopySliceArgs comment for specifics and TestCopy for examples.
   112  	Copy(CopySliceArgs)
   113  
   114  	// Window returns a "window" into the Vec. A "window" is similar to Golang's
   115  	// slice of the current Vec from [start, end), but the returned object is NOT
   116  	// allowed to be modified (the modification might result in an undefined
   117  	// behavior).
   118  	Window(start int, end int) Vec
   119  
   120  	// MaybeHasNulls returns true if the column possibly has any null values, and
   121  	// returns false if the column definitely has no null values.
   122  	MaybeHasNulls() bool
   123  
   124  	// Nulls returns the nulls vector for the column.
   125  	Nulls() *Nulls
   126  
   127  	// SetNulls sets the nulls vector for this column.
   128  	SetNulls(*Nulls)
   129  
   130  	// Length returns the length of the slice that is underlying this Vec.
   131  	Length() int
   132  
   133  	// SetLength sets the length of the slice that is underlying this Vec. Note
   134  	// that the length of the batch which this Vec belongs to "takes priority".
   135  	SetLength(int)
   136  
   137  	// Capacity returns the capacity of the Golang's slice that is underlying
   138  	// this Vec. Note that if there is no "slice" (like in case of flat bytes),
   139  	// the "capacity" of such object is undefined, so is the behavior of this
   140  	// method.
   141  	Capacity() int
   142  }
   143  
   144  var _ Vec = &memColumn{}
   145  
   146  // memColumn is a simple pass-through implementation of Vec that just casts
   147  // a generic interface{} to the proper type when requested.
   148  type memColumn struct {
   149  	t                   *types.T
   150  	canonicalTypeFamily types.Family
   151  	col                 Column
   152  	nulls               Nulls
   153  }
   154  
   155  // ColumnFactory is an interface that can construct columns for Batches.
   156  type ColumnFactory interface {
   157  	MakeColumn(t *types.T, n int) Column
   158  }
   159  
   160  type defaultColumnFactory struct{}
   161  
   162  // StandardColumnFactory is a factory that produces columns of types that are
   163  // explicitly supported by the vectorized engine (i.e. not datum-backed).
   164  var StandardColumnFactory ColumnFactory = &defaultColumnFactory{}
   165  
   166  func (cf *defaultColumnFactory) MakeColumn(t *types.T, n int) Column {
   167  	switch canonicalTypeFamily := typeconv.TypeFamilyToCanonicalTypeFamily(t.Family()); canonicalTypeFamily {
   168  	case types.BoolFamily:
   169  		return make([]bool, n)
   170  	case types.BytesFamily:
   171  		return NewBytes(n)
   172  	case types.IntFamily:
   173  		switch t.Width() {
   174  		case 16:
   175  			return make([]int16, n)
   176  		case 32:
   177  			return make([]int32, n)
   178  		case 0, 64:
   179  			return make([]int64, n)
   180  		default:
   181  			panic(fmt.Sprintf("unexpected integer width: %d", t.Width()))
   182  		}
   183  	case types.FloatFamily:
   184  		return make([]float64, n)
   185  	case types.DecimalFamily:
   186  		return make([]apd.Decimal, n)
   187  	case types.TimestampTZFamily:
   188  		return make([]time.Time, n)
   189  	case types.IntervalFamily:
   190  		return make([]duration.Duration, n)
   191  	default:
   192  		panic(fmt.Sprintf("StandardColumnFactory doesn't support %s", t))
   193  	}
   194  }
   195  
   196  // NewMemColumn returns a new memColumn, initialized with a length using the
   197  // given column factory.
   198  func NewMemColumn(t *types.T, n int, factory ColumnFactory) Vec {
   199  	return &memColumn{
   200  		t:                   t,
   201  		canonicalTypeFamily: typeconv.TypeFamilyToCanonicalTypeFamily(t.Family()),
   202  		col:                 factory.MakeColumn(t, n),
   203  		nulls:               NewNulls(n),
   204  	}
   205  }
   206  
   207  func (m *memColumn) Type() *types.T {
   208  	return m.t
   209  }
   210  
   211  func (m *memColumn) CanonicalTypeFamily() types.Family {
   212  	return m.canonicalTypeFamily
   213  }
   214  
   215  func (m *memColumn) SetCol(col interface{}) {
   216  	m.col = col
   217  }
   218  
   219  func (m *memColumn) Bool() []bool {
   220  	return m.col.([]bool)
   221  }
   222  
   223  func (m *memColumn) Int16() []int16 {
   224  	return m.col.([]int16)
   225  }
   226  
   227  func (m *memColumn) Int32() []int32 {
   228  	return m.col.([]int32)
   229  }
   230  
   231  func (m *memColumn) Int64() []int64 {
   232  	return m.col.([]int64)
   233  }
   234  
   235  func (m *memColumn) Float64() []float64 {
   236  	return m.col.([]float64)
   237  }
   238  
   239  func (m *memColumn) Bytes() *Bytes {
   240  	return m.col.(*Bytes)
   241  }
   242  
   243  func (m *memColumn) Decimal() []apd.Decimal {
   244  	return m.col.([]apd.Decimal)
   245  }
   246  
   247  func (m *memColumn) Timestamp() []time.Time {
   248  	return m.col.([]time.Time)
   249  }
   250  
   251  func (m *memColumn) Interval() []duration.Duration {
   252  	return m.col.([]duration.Duration)
   253  }
   254  
   255  func (m *memColumn) Datum() DatumVec {
   256  	return m.col.(DatumVec)
   257  }
   258  
   259  func (m *memColumn) Col() interface{} {
   260  	return m.col
   261  }
   262  
   263  func (m *memColumn) TemplateType() []interface{} {
   264  	panic("don't call this from non template code")
   265  }
   266  
   267  func (m *memColumn) MaybeHasNulls() bool {
   268  	return m.nulls.maybeHasNulls
   269  }
   270  
   271  func (m *memColumn) Nulls() *Nulls {
   272  	return &m.nulls
   273  }
   274  
   275  func (m *memColumn) SetNulls(n *Nulls) {
   276  	m.nulls = *n
   277  }
   278  
   279  func (m *memColumn) Length() int {
   280  	switch m.CanonicalTypeFamily() {
   281  	case types.BoolFamily:
   282  		return len(m.col.([]bool))
   283  	case types.BytesFamily:
   284  		return m.Bytes().Len()
   285  	case types.IntFamily:
   286  		switch m.t.Width() {
   287  		case 16:
   288  			return len(m.col.([]int16))
   289  		case 32:
   290  			return len(m.col.([]int32))
   291  		case 0, 64:
   292  			return len(m.col.([]int64))
   293  		default:
   294  			panic(fmt.Sprintf("unexpected int width: %d", m.t.Width()))
   295  		}
   296  	case types.FloatFamily:
   297  		return len(m.col.([]float64))
   298  	case types.DecimalFamily:
   299  		return len(m.col.([]apd.Decimal))
   300  	case types.TimestampTZFamily:
   301  		return len(m.col.([]time.Time))
   302  	case types.IntervalFamily:
   303  		return len(m.col.([]duration.Duration))
   304  	case typeconv.DatumVecCanonicalTypeFamily:
   305  		return m.col.(DatumVec).Len()
   306  	default:
   307  		panic(fmt.Sprintf("unhandled type %s", m.t))
   308  	}
   309  }
   310  
   311  func (m *memColumn) SetLength(l int) {
   312  	switch m.CanonicalTypeFamily() {
   313  	case types.BoolFamily:
   314  		m.col = m.col.([]bool)[:l]
   315  	case types.BytesFamily:
   316  		m.Bytes().SetLength(l)
   317  	case types.IntFamily:
   318  		switch m.t.Width() {
   319  		case 16:
   320  			m.col = m.col.([]int16)[:l]
   321  		case 32:
   322  			m.col = m.col.([]int32)[:l]
   323  		case 0, 64:
   324  			m.col = m.col.([]int64)[:l]
   325  		default:
   326  			panic(fmt.Sprintf("unexpected int width: %d", m.t.Width()))
   327  		}
   328  	case types.FloatFamily:
   329  		m.col = m.col.([]float64)[:l]
   330  	case types.DecimalFamily:
   331  		m.col = m.col.([]apd.Decimal)[:l]
   332  	case types.TimestampTZFamily:
   333  		m.col = m.col.([]time.Time)[:l]
   334  	case types.IntervalFamily:
   335  		m.col = m.col.([]duration.Duration)[:l]
   336  	case typeconv.DatumVecCanonicalTypeFamily:
   337  		m.col.(DatumVec).SetLength(l)
   338  	default:
   339  		panic(fmt.Sprintf("unhandled type %s", m.t))
   340  	}
   341  }
   342  
   343  func (m *memColumn) Capacity() int {
   344  	switch m.CanonicalTypeFamily() {
   345  	case types.BoolFamily:
   346  		return cap(m.col.([]bool))
   347  	case types.BytesFamily:
   348  		panic("Capacity should not be called on Vec of Bytes type")
   349  	case types.IntFamily:
   350  		switch m.t.Width() {
   351  		case 16:
   352  			return cap(m.col.([]int16))
   353  		case 32:
   354  			return cap(m.col.([]int32))
   355  		case 0, 64:
   356  			return cap(m.col.([]int64))
   357  		default:
   358  			panic(fmt.Sprintf("unexpected int width: %d", m.t.Width()))
   359  		}
   360  	case types.FloatFamily:
   361  		return cap(m.col.([]float64))
   362  	case types.DecimalFamily:
   363  		return cap(m.col.([]apd.Decimal))
   364  	case types.TimestampTZFamily:
   365  		return cap(m.col.([]time.Time))
   366  	case types.IntervalFamily:
   367  		return cap(m.col.([]duration.Duration))
   368  	case typeconv.DatumVecCanonicalTypeFamily:
   369  		return m.col.(DatumVec).Cap()
   370  	default:
   371  		panic(fmt.Sprintf("unhandled type %s", m.t))
   372  	}
   373  }