github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/builtin_funcs_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package colexec
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"math/rand"
    17  	"testing"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    20  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/parser"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/sem/builtins"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    27  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    28  	"github.com/stretchr/testify/require"
    29  )
    30  
    31  func TestBasicBuiltinFunctions(t *testing.T) {
    32  	defer leaktest.AfterTest(t)()
    33  	// Trick to get the init() for the builtins package to run.
    34  	_ = builtins.AllBuiltinNames
    35  	ctx := context.Background()
    36  	st := cluster.MakeTestingClusterSettings()
    37  	evalCtx := tree.MakeTestingEvalContext(st)
    38  	defer evalCtx.Stop(ctx)
    39  	flowCtx := &execinfra.FlowCtx{
    40  		EvalCtx: &evalCtx,
    41  		Cfg: &execinfra.ServerConfig{
    42  			Settings: st,
    43  		},
    44  	}
    45  
    46  	testCases := []struct {
    47  		desc         string
    48  		expr         string
    49  		inputCols    []int
    50  		inputTuples  tuples
    51  		inputTypes   []*types.T
    52  		outputTuples tuples
    53  	}{
    54  		{
    55  			desc:         "AbsVal",
    56  			expr:         "abs(@1)",
    57  			inputCols:    []int{0},
    58  			inputTuples:  tuples{{1}, {-2}},
    59  			inputTypes:   []*types.T{types.Int},
    60  			outputTuples: tuples{{1, 1}, {-2, 2}},
    61  		},
    62  		{
    63  			desc:         "StringLen",
    64  			expr:         "length(@1)",
    65  			inputCols:    []int{0},
    66  			inputTuples:  tuples{{"Hello"}, {"The"}},
    67  			inputTypes:   []*types.T{types.String},
    68  			outputTuples: tuples{{"Hello", 5}, {"The", 3}},
    69  		},
    70  	}
    71  
    72  	for _, tc := range testCases {
    73  		t.Run(tc.desc, func(t *testing.T) {
    74  			runTests(t, []tuples{tc.inputTuples}, tc.outputTuples, orderedVerifier,
    75  				func(input []colexecbase.Operator) (colexecbase.Operator, error) {
    76  					return createTestProjectingOperator(
    77  						ctx, flowCtx, input[0], tc.inputTypes,
    78  						tc.expr, false, /* canFallbackToRowexec */
    79  					)
    80  				})
    81  		})
    82  	}
    83  }
    84  
    85  func benchmarkBuiltinFunctions(b *testing.B, useSelectionVector bool, hasNulls bool) {
    86  	ctx := context.Background()
    87  	st := cluster.MakeTestingClusterSettings()
    88  	evalCtx := tree.MakeTestingEvalContext(st)
    89  	defer evalCtx.Stop(ctx)
    90  	flowCtx := &execinfra.FlowCtx{
    91  		EvalCtx: &evalCtx,
    92  		Cfg: &execinfra.ServerConfig{
    93  			Settings: st,
    94  		},
    95  	}
    96  
    97  	batch := testAllocator.NewMemBatch([]*types.T{types.Int})
    98  	col := batch.ColVec(0).Int64()
    99  
   100  	for i := 0; i < coldata.BatchSize(); i++ {
   101  		if float64(i) < float64(coldata.BatchSize())*selectivity {
   102  			col[i] = -1
   103  		} else {
   104  			col[i] = 1
   105  		}
   106  	}
   107  
   108  	if hasNulls {
   109  		for i := 0; i < coldata.BatchSize(); i++ {
   110  			if rand.Float64() < nullProbability {
   111  				batch.ColVec(0).Nulls().SetNull(i)
   112  			}
   113  		}
   114  	}
   115  
   116  	batch.SetLength(coldata.BatchSize())
   117  
   118  	if useSelectionVector {
   119  		batch.SetSelection(true)
   120  		sel := batch.Selection()
   121  		for i := 0; i < coldata.BatchSize(); i++ {
   122  			sel[i] = i
   123  		}
   124  	}
   125  
   126  	typs := []*types.T{types.Int}
   127  	source := colexecbase.NewRepeatableBatchSource(testAllocator, batch, typs)
   128  	op, err := createTestProjectingOperator(
   129  		ctx, flowCtx, source, typs,
   130  		"abs(@1)" /* projectingExpr */, false, /* canFallbackToRowexec */
   131  	)
   132  	require.NoError(b, err)
   133  	op.Init()
   134  
   135  	b.SetBytes(int64(8 * coldata.BatchSize()))
   136  	b.ResetTimer()
   137  	for i := 0; i < b.N; i++ {
   138  		op.Next(ctx)
   139  	}
   140  }
   141  
   142  func BenchmarkBuiltinFunctions(b *testing.B) {
   143  	_ = builtins.AllBuiltinNames
   144  	for _, useSel := range []bool{true, false} {
   145  		for _, hasNulls := range []bool{true, false} {
   146  			b.Run(fmt.Sprintf("useSel=%t,hasNulls=%t", useSel, hasNulls), func(b *testing.B) {
   147  				benchmarkBuiltinFunctions(b, useSel, hasNulls)
   148  			})
   149  		}
   150  	}
   151  }
   152  
   153  // Perform a comparison between the default substring operator
   154  // and the specialized operator.
   155  func BenchmarkCompareSpecializedOperators(b *testing.B) {
   156  	ctx := context.Background()
   157  	tctx := tree.NewTestingEvalContext(cluster.MakeTestingClusterSettings())
   158  
   159  	typs := []*types.T{types.String, types.Int, types.Int}
   160  	batch := testAllocator.NewMemBatch(typs)
   161  	outputIdx := 3
   162  	bCol := batch.ColVec(0).Bytes()
   163  	sCol := batch.ColVec(1).Int64()
   164  	eCol := batch.ColVec(2).Int64()
   165  	for i := 0; i < coldata.BatchSize(); i++ {
   166  		bCol.Set(i, []byte("hello there"))
   167  		sCol[i] = 1
   168  		eCol[i] = 4
   169  	}
   170  	batch.SetLength(coldata.BatchSize())
   171  	var source colexecbase.Operator
   172  	source = colexecbase.NewRepeatableBatchSource(testAllocator, batch, typs)
   173  	source = newVectorTypeEnforcer(testAllocator, source, types.Bytes, outputIdx)
   174  
   175  	// Set up the default operator.
   176  	expr, err := parser.ParseExpr("substring(@1, @2, @3)")
   177  	if err != nil {
   178  		b.Fatal(err)
   179  	}
   180  	inputCols := []int{0, 1, 2}
   181  	p := &mockTypeContext{typs: typs}
   182  	semaCtx := tree.MakeSemaContext()
   183  	semaCtx.IVarContainer = p
   184  	typedExpr, err := tree.TypeCheck(ctx, expr, &semaCtx, types.Any)
   185  	if err != nil {
   186  		b.Fatal(err)
   187  	}
   188  	defaultOp := &defaultBuiltinFuncOperator{
   189  		OneInputNode: NewOneInputNode(source),
   190  		allocator:    testAllocator,
   191  		evalCtx:      tctx,
   192  		funcExpr:     typedExpr.(*tree.FuncExpr),
   193  		outputIdx:    outputIdx,
   194  		columnTypes:  typs,
   195  		outputType:   types.String,
   196  		converter:    getDatumToPhysicalFn(types.String),
   197  		row:          make(tree.Datums, outputIdx),
   198  		argumentCols: inputCols,
   199  	}
   200  	defaultOp.Init()
   201  
   202  	// Set up the specialized substring operator.
   203  	specOp := newSubstringOperator(
   204  		testAllocator, typs, inputCols, outputIdx, source,
   205  	)
   206  	specOp.Init()
   207  
   208  	b.Run("DefaultBuiltinOperator", func(b *testing.B) {
   209  		b.SetBytes(int64(len("hello there") * coldata.BatchSize()))
   210  		b.ResetTimer()
   211  		for i := 0; i < b.N; i++ {
   212  			b := defaultOp.Next(ctx)
   213  			// Due to the flat byte updates, we have to reset the output
   214  			// bytes col after each next call.
   215  			b.ColVec(outputIdx).Bytes().Reset()
   216  		}
   217  	})
   218  
   219  	b.Run("SpecializedSubstringOperator", func(b *testing.B) {
   220  		b.SetBytes(int64(len("hello there") * coldata.BatchSize()))
   221  		b.ResetTimer()
   222  		for i := 0; i < b.N; i++ {
   223  			b := specOp.Next(ctx)
   224  			// Due to the flat byte updates, we have to reset the output
   225  			// bytes col after each next call.
   226  			b.ColVec(outputIdx).Bytes().Reset()
   227  		}
   228  	})
   229  }