github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/utils.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package colexec
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"reflect"
    17  
    18  	"github.com/cockroachdb/apd"
    19  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/colcontainer"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/colmem"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    26  	"github.com/cockroachdb/cockroach/pkg/util/duration"
    27  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    28  	"github.com/cockroachdb/errors"
    29  )
    30  
    31  var (
    32  	zeroBoolColumn   = make([]bool, coldata.MaxBatchSize)
    33  	zeroIntColumn    = make([]int, coldata.MaxBatchSize)
    34  	zeroUint64Column = make([]uint64, coldata.MaxBatchSize)
    35  
    36  	zeroDecimalValue  apd.Decimal
    37  	zeroFloat64Value  float64
    38  	zeroInt16Value    int16
    39  	zeroInt32Value    int32
    40  	zeroInt64Value    int64
    41  	zeroIntervalValue duration.Duration
    42  )
    43  
    44  // overloadHelper is a utility struct that helps us avoid allocations
    45  // of temporary decimals on every overloaded operation with them as well as
    46  // plumbs through other useful information. In order for the templates to see
    47  // it correctly, a local variable named `_overloadHelper` of this type must be declared
    48  // before the inlined overloaded code.
    49  type overloadHelper struct {
    50  	tmpDec1, tmpDec2 apd.Decimal
    51  	binFn            *tree.BinOp
    52  }
    53  
    54  // makeWindowIntoBatch updates windowedBatch so that it provides a "window"
    55  // into inputBatch starting at tuple index startIdx. It handles selection
    56  // vectors on inputBatch as well (in which case windowedBatch will also have a
    57  // "windowed" selection vector).
    58  func makeWindowIntoBatch(
    59  	windowedBatch, inputBatch coldata.Batch, startIdx int, inputTypes []*types.T,
    60  ) {
    61  	inputBatchLen := inputBatch.Length()
    62  	windowStart := startIdx
    63  	windowEnd := inputBatchLen
    64  	if sel := inputBatch.Selection(); sel != nil {
    65  		// We have a selection vector on the input batch, and in order to avoid
    66  		// deselecting (i.e. moving the data over), we will provide an adjusted
    67  		// selection vector to the windowed batch as well.
    68  		windowedBatch.SetSelection(true)
    69  		windowIntoSel := sel[startIdx:inputBatchLen]
    70  		copy(windowedBatch.Selection(), windowIntoSel)
    71  		maxSelIdx := 0
    72  		for _, selIdx := range windowIntoSel {
    73  			if selIdx > maxSelIdx {
    74  				maxSelIdx = selIdx
    75  			}
    76  		}
    77  		windowStart = 0
    78  		windowEnd = maxSelIdx + 1
    79  	} else {
    80  		windowedBatch.SetSelection(false)
    81  	}
    82  	for i := range inputTypes {
    83  		window := inputBatch.ColVec(i).Window(windowStart, windowEnd)
    84  		windowedBatch.ReplaceCol(window, i)
    85  	}
    86  	windowedBatch.SetLength(inputBatchLen - startIdx)
    87  }
    88  
    89  func newPartitionerToOperator(
    90  	allocator *colmem.Allocator,
    91  	types []*types.T,
    92  	partitioner colcontainer.PartitionedQueue,
    93  	partitionIdx int,
    94  ) *partitionerToOperator {
    95  	return &partitionerToOperator{
    96  		partitioner:  partitioner,
    97  		partitionIdx: partitionIdx,
    98  		batch:        allocator.NewMemBatch(types),
    99  	}
   100  }
   101  
   102  // partitionerToOperator is an Operator that Dequeue's from the corresponding
   103  // partition on every call to Next. It is a converter from filled in
   104  // PartitionedQueue to Operator.
   105  type partitionerToOperator struct {
   106  	colexecbase.ZeroInputNode
   107  	NonExplainable
   108  
   109  	partitioner  colcontainer.PartitionedQueue
   110  	partitionIdx int
   111  	batch        coldata.Batch
   112  }
   113  
   114  var _ colexecbase.Operator = &partitionerToOperator{}
   115  
   116  func (p *partitionerToOperator) Init() {}
   117  
   118  func (p *partitionerToOperator) Next(ctx context.Context) coldata.Batch {
   119  	if err := p.partitioner.Dequeue(ctx, p.partitionIdx, p.batch); err != nil {
   120  		colexecerror.InternalError(err)
   121  	}
   122  	return p.batch
   123  }
   124  
   125  func newAppendOnlyBufferedBatch(
   126  	allocator *colmem.Allocator, typs []*types.T, initialSize int,
   127  ) *appendOnlyBufferedBatch {
   128  	batch := allocator.NewMemBatchWithSize(typs, initialSize)
   129  	return &appendOnlyBufferedBatch{
   130  		Batch:   batch,
   131  		colVecs: batch.ColVecs(),
   132  		typs:    typs,
   133  	}
   134  }
   135  
   136  // appendOnlyBufferedBatch is a wrapper around coldata.Batch that should be
   137  // used by operators that buffer many tuples into a single batch by appending
   138  // to it. It stores the length of the batch separately and intercepts calls to
   139  // Length() and SetLength() in order to avoid updating offsets on vectors of
   140  // types.Bytes type - which would result in a quadratic behavior - because
   141  // it is not necessary since coldata.Vec.Append maintains the correct offsets.
   142  //
   143  // Note: "appendOnly" in the name indicates that the tuples should *only* be
   144  // appended to the vectors (which can be done via explicit Vec.Append calls or
   145  // using utility append() method); however, this batch prohibits appending and
   146  // replacing of the vectors themselves.
   147  type appendOnlyBufferedBatch struct {
   148  	coldata.Batch
   149  
   150  	length  int
   151  	colVecs []coldata.Vec
   152  	typs    []*types.T
   153  }
   154  
   155  var _ coldata.Batch = &appendOnlyBufferedBatch{}
   156  
   157  func (b *appendOnlyBufferedBatch) Length() int {
   158  	return b.length
   159  }
   160  
   161  func (b *appendOnlyBufferedBatch) SetLength(n int) {
   162  	b.length = n
   163  }
   164  
   165  func (b *appendOnlyBufferedBatch) ColVec(i int) coldata.Vec {
   166  	return b.colVecs[i]
   167  }
   168  
   169  func (b *appendOnlyBufferedBatch) ColVecs() []coldata.Vec {
   170  	return b.colVecs
   171  }
   172  
   173  func (b *appendOnlyBufferedBatch) AppendCol(coldata.Vec) {
   174  	colexecerror.InternalError("AppendCol is prohibited on appendOnlyBufferedBatch")
   175  }
   176  
   177  func (b *appendOnlyBufferedBatch) ReplaceCol(coldata.Vec, int) {
   178  	colexecerror.InternalError("ReplaceCol is prohibited on appendOnlyBufferedBatch")
   179  }
   180  
   181  // append is a helper method that appends all tuples with indices in range
   182  // [startIdx, endIdx) from batch (paying attention to the selection vector)
   183  // into b.
   184  // NOTE: this does *not* perform memory accounting.
   185  func (b *appendOnlyBufferedBatch) append(batch coldata.Batch, startIdx, endIdx int) {
   186  	for i, colVec := range b.colVecs {
   187  		colVec.Append(
   188  			coldata.SliceArgs{
   189  				Src:         batch.ColVec(i),
   190  				Sel:         batch.Selection(),
   191  				DestIdx:     b.length,
   192  				SrcStartIdx: startIdx,
   193  				SrcEndIdx:   endIdx,
   194  			},
   195  		)
   196  	}
   197  	b.length += endIdx - startIdx
   198  }
   199  
   200  // getDatumToPhysicalFn returns a function for converting a datum of the given
   201  // ColumnType to the corresponding Go type.
   202  func getDatumToPhysicalFn(ct *types.T) func(tree.Datum) (interface{}, error) {
   203  	switch ct.Family() {
   204  	case types.BoolFamily:
   205  		return func(datum tree.Datum) (interface{}, error) {
   206  			d, ok := datum.(*tree.DBool)
   207  			if !ok {
   208  				return nil, errors.Errorf("expected *tree.DBool, found %s", reflect.TypeOf(datum))
   209  			}
   210  			return bool(*d), nil
   211  		}
   212  	case types.BytesFamily:
   213  		return func(datum tree.Datum) (interface{}, error) {
   214  			d, ok := datum.(*tree.DBytes)
   215  			if !ok {
   216  				return nil, errors.Errorf("expected *tree.DBytes, found %s", reflect.TypeOf(datum))
   217  			}
   218  			return encoding.UnsafeConvertStringToBytes(string(*d)), nil
   219  		}
   220  	case types.IntFamily:
   221  		switch ct.Width() {
   222  		case 16:
   223  			return func(datum tree.Datum) (interface{}, error) {
   224  				d, ok := datum.(*tree.DInt)
   225  				if !ok {
   226  					return nil, errors.Errorf("expected *tree.DInt, found %s", reflect.TypeOf(datum))
   227  				}
   228  				return int16(*d), nil
   229  			}
   230  		case 32:
   231  			return func(datum tree.Datum) (interface{}, error) {
   232  				d, ok := datum.(*tree.DInt)
   233  				if !ok {
   234  					return nil, errors.Errorf("expected *tree.DInt, found %s", reflect.TypeOf(datum))
   235  				}
   236  				return int32(*d), nil
   237  			}
   238  		case 0, 64:
   239  			return func(datum tree.Datum) (interface{}, error) {
   240  				d, ok := datum.(*tree.DInt)
   241  				if !ok {
   242  					return nil, errors.Errorf("expected *tree.DInt, found %s", reflect.TypeOf(datum))
   243  				}
   244  				return int64(*d), nil
   245  			}
   246  		}
   247  		colexecerror.InternalError(fmt.Sprintf("unhandled INT width %d", ct.Width()))
   248  	case types.DateFamily:
   249  		return func(datum tree.Datum) (interface{}, error) {
   250  			d, ok := datum.(*tree.DDate)
   251  			if !ok {
   252  				return nil, errors.Errorf("expected *tree.DDate, found %s", reflect.TypeOf(datum))
   253  			}
   254  			return d.UnixEpochDaysWithOrig(), nil
   255  		}
   256  	case types.FloatFamily:
   257  		return func(datum tree.Datum) (interface{}, error) {
   258  			d, ok := datum.(*tree.DFloat)
   259  			if !ok {
   260  				return nil, errors.Errorf("expected *tree.DFloat, found %s", reflect.TypeOf(datum))
   261  			}
   262  			return float64(*d), nil
   263  		}
   264  	case types.OidFamily:
   265  		return func(datum tree.Datum) (interface{}, error) {
   266  			d, ok := datum.(*tree.DOid)
   267  			if !ok {
   268  				return nil, errors.Errorf("expected *tree.DOid, found %s", reflect.TypeOf(datum))
   269  			}
   270  			return int64(d.DInt), nil
   271  		}
   272  	case types.StringFamily:
   273  		return func(datum tree.Datum) (interface{}, error) {
   274  			// Handle other STRING-related OID types, like oid.T_name.
   275  			wrapper, ok := datum.(*tree.DOidWrapper)
   276  			if ok {
   277  				datum = wrapper.Wrapped
   278  			}
   279  
   280  			d, ok := datum.(*tree.DString)
   281  			if !ok {
   282  				return nil, errors.Errorf("expected *tree.DString, found %s", reflect.TypeOf(datum))
   283  			}
   284  			return encoding.UnsafeConvertStringToBytes(string(*d)), nil
   285  		}
   286  	case types.DecimalFamily:
   287  		return func(datum tree.Datum) (interface{}, error) {
   288  			d, ok := datum.(*tree.DDecimal)
   289  			if !ok {
   290  				return nil, errors.Errorf("expected *tree.DDecimal, found %s", reflect.TypeOf(datum))
   291  			}
   292  			return d.Decimal, nil
   293  		}
   294  	case types.UuidFamily:
   295  		return func(datum tree.Datum) (interface{}, error) {
   296  			d, ok := datum.(*tree.DUuid)
   297  			if !ok {
   298  				return nil, errors.Errorf("expected *tree.DUuid, found %s", reflect.TypeOf(datum))
   299  			}
   300  			return d.UUID.GetBytesMut(), nil
   301  		}
   302  	case types.TimestampFamily:
   303  		return func(datum tree.Datum) (interface{}, error) {
   304  			d, ok := datum.(*tree.DTimestamp)
   305  			if !ok {
   306  				return nil, errors.Errorf("expected *tree.DTimestamp, found %s", reflect.TypeOf(datum))
   307  			}
   308  			return d.Time, nil
   309  		}
   310  	case types.TimestampTZFamily:
   311  		return func(datum tree.Datum) (interface{}, error) {
   312  			d, ok := datum.(*tree.DTimestampTZ)
   313  			if !ok {
   314  				return nil, errors.Errorf("expected *tree.DTimestampTZ, found %s", reflect.TypeOf(datum))
   315  			}
   316  			return d.Time, nil
   317  		}
   318  	case types.IntervalFamily:
   319  		return func(datum tree.Datum) (interface{}, error) {
   320  			d, ok := datum.(*tree.DInterval)
   321  			if !ok {
   322  				return nil, errors.Errorf("expected *tree.DInterval, found %s", reflect.TypeOf(datum))
   323  			}
   324  			return d.Duration, nil
   325  		}
   326  
   327  	// Types backed by tree.Datums.
   328  	case types.CollatedStringFamily:
   329  		return func(datum tree.Datum) (interface{}, error) {
   330  			d, ok := datum.(*tree.DCollatedString)
   331  			if !ok {
   332  				return nil, errors.Errorf("expected *tree.DCollatedString, found %s", reflect.TypeOf(datum))
   333  			}
   334  			return d, nil
   335  		}
   336  	case types.UnknownFamily:
   337  		return func(datum tree.Datum) (interface{}, error) {
   338  			if datum != tree.DNull {
   339  				return nil, errors.Errorf("unexpectedly datum is not tree.DNull for types.UnknownFamily: %+v", datum)
   340  			}
   341  			return tree.DNull, nil
   342  		}
   343  	case types.ArrayFamily:
   344  		return func(datum tree.Datum) (interface{}, error) {
   345  			d, ok := datum.(*tree.DArray)
   346  			if !ok {
   347  				return nil, errors.Errorf("expected *tree.DArray, found %s", reflect.TypeOf(datum))
   348  			}
   349  			return d, nil
   350  		}
   351  	case types.INetFamily:
   352  		return func(datum tree.Datum) (interface{}, error) {
   353  			d, ok := datum.(*tree.DIPAddr)
   354  			if !ok {
   355  				return nil, errors.Errorf("expected *tree.DIPAddr, found %s", reflect.TypeOf(datum))
   356  			}
   357  			return d, nil
   358  		}
   359  	case types.TimeFamily:
   360  		return func(datum tree.Datum) (interface{}, error) {
   361  			d, ok := datum.(*tree.DTime)
   362  			if !ok {
   363  				return nil, errors.Errorf("expected *tree.DTime, found %s", reflect.TypeOf(datum))
   364  			}
   365  			return d, nil
   366  		}
   367  	case types.JsonFamily:
   368  		return func(datum tree.Datum) (interface{}, error) {
   369  			d, ok := datum.(*tree.DJSON)
   370  			if !ok {
   371  				return nil, errors.Errorf("expected *tree.DJSON, found %s", reflect.TypeOf(datum))
   372  			}
   373  			return d, nil
   374  		}
   375  	case types.TimeTZFamily:
   376  		return func(datum tree.Datum) (interface{}, error) {
   377  			d, ok := datum.(*tree.DTimeTZ)
   378  			if !ok {
   379  				return nil, errors.Errorf("expected *tree.DTimeTZ, found %s", reflect.TypeOf(datum))
   380  			}
   381  			return d, nil
   382  		}
   383  	case types.TupleFamily:
   384  		return func(datum tree.Datum) (interface{}, error) {
   385  			d, ok := datum.(*tree.DTuple)
   386  			if !ok {
   387  				return nil, errors.Errorf("expected *tree.DTuple, found %s", reflect.TypeOf(datum))
   388  			}
   389  			return d, nil
   390  		}
   391  	case types.BitFamily:
   392  		return func(datum tree.Datum) (interface{}, error) {
   393  			d, ok := datum.(*tree.DBitArray)
   394  			if !ok {
   395  				return nil, errors.Errorf("expected *tree.DBitArray, found %s", reflect.TypeOf(datum))
   396  			}
   397  			return d, nil
   398  		}
   399  	case types.GeometryFamily:
   400  		return func(datum tree.Datum) (interface{}, error) {
   401  			d, ok := datum.(*tree.DGeometry)
   402  			if !ok {
   403  				return nil, errors.Errorf("expected *tree.DGeometry, found %s", reflect.TypeOf(datum))
   404  			}
   405  			return d, nil
   406  		}
   407  	case types.GeographyFamily:
   408  		return func(datum tree.Datum) (interface{}, error) {
   409  			d, ok := datum.(*tree.DGeography)
   410  			if !ok {
   411  				return nil, errors.Errorf("expected *tree.DGeography, found %s", reflect.TypeOf(datum))
   412  			}
   413  			return d, nil
   414  		}
   415  	case types.EnumFamily:
   416  		return func(datum tree.Datum) (interface{}, error) {
   417  			d, ok := datum.(*tree.DEnum)
   418  			if !ok {
   419  				return nil, errors.Errorf("expected *tree.DEnum, found %s", reflect.TypeOf(datum))
   420  			}
   421  			return d, nil
   422  		}
   423  	}
   424  	colexecerror.InternalError(fmt.Sprintf("unexpectedly unhandled type %s", ct.DebugString()))
   425  	// This code is unreachable, but the compiler cannot infer that.
   426  	return nil
   427  }