github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/utils_test.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package colexec
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"math"
    17  	"math/rand"
    18  	"reflect"
    19  	"sort"
    20  	"strings"
    21  	"testing"
    22  	"testing/quick"
    23  
    24  	"github.com/cockroachdb/apd"
    25  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    26  	"github.com/cockroachdb/cockroach/pkg/col/coldataext"
    27  	"github.com/cockroachdb/cockroach/pkg/col/coldatatestutils"
    28  	"github.com/cockroachdb/cockroach/pkg/col/typeconv"
    29  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    30  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror"
    31  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    32  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    33  	"github.com/cockroachdb/cockroach/pkg/sql/parser"
    34  	"github.com/cockroachdb/cockroach/pkg/sql/rowexec"
    35  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    36  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    37  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    38  	"github.com/cockroachdb/cockroach/pkg/util/json"
    39  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    40  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    41  	"github.com/cockroachdb/errors"
    42  	"github.com/pmezard/go-difflib/difflib"
    43  	"github.com/stretchr/testify/assert"
    44  	"github.com/stretchr/testify/require"
    45  )
    46  
    47  // tuple represents a row with any-type columns.
    48  type tuple []interface{}
    49  
    50  func (t tuple) String() string {
    51  	var sb strings.Builder
    52  	sb.WriteString("[")
    53  	for i := range t {
    54  		if i != 0 {
    55  			sb.WriteString(", ")
    56  		}
    57  		if d, ok := t[i].(apd.Decimal); ok {
    58  			sb.WriteString(d.String())
    59  		} else if d, ok := t[i].(*apd.Decimal); ok {
    60  			sb.WriteString(d.String())
    61  		} else if d, ok := t[i].([]byte); ok {
    62  			sb.WriteString(string(d))
    63  		} else {
    64  			sb.WriteString(fmt.Sprintf("%v", t[i]))
    65  		}
    66  	}
    67  	sb.WriteString("]")
    68  	return sb.String()
    69  }
    70  
    71  func (t tuple) less(other tuple) bool {
    72  	for i := range t {
    73  		// If either side is nil, we short circuit the comparison. For nil, we
    74  		// define: nil < {any_none_nil}
    75  		if t[i] == nil && other[i] == nil {
    76  			continue
    77  		} else if t[i] == nil && other[i] != nil {
    78  			return true
    79  		} else if t[i] != nil && other[i] == nil {
    80  			return false
    81  		}
    82  
    83  		lhsVal := reflect.ValueOf(t[i])
    84  		rhsVal := reflect.ValueOf(other[i])
    85  
    86  		// apd.Decimal are not comparable, so we check that first.
    87  		if lhsVal.Type().Name() == "Decimal" && lhsVal.CanInterface() {
    88  			lhsDecimal := lhsVal.Interface().(apd.Decimal)
    89  			rhsDecimal := rhsVal.Interface().(apd.Decimal)
    90  			cmp := (&lhsDecimal).CmpTotal(&rhsDecimal)
    91  			if cmp == 0 {
    92  				continue
    93  			} else if cmp == -1 {
    94  				return true
    95  			} else {
    96  				return false
    97  			}
    98  		}
    99  
   100  		// Since the expected values are provided as strings, we convert the json
   101  		// values here to strings so we can use the string lexical ordering. This is
   102  		// because json orders certain values differently (e.g. null) compared to
   103  		// string.
   104  		if strings.Contains(lhsVal.Type().String(), "json") {
   105  			lhsStr := lhsVal.Interface().(fmt.Stringer).String()
   106  			rhsStr := rhsVal.Interface().(fmt.Stringer).String()
   107  			if lhsStr == rhsStr {
   108  				continue
   109  			} else {
   110  				return lhsStr < rhsStr
   111  			}
   112  		}
   113  
   114  		// types.Bytes is represented as []uint8.
   115  		if lhsVal.Type().String() == "[]uint8" {
   116  			lhsStr := string(lhsVal.Interface().([]uint8))
   117  			rhsStr := string(rhsVal.Interface().([]uint8))
   118  			if lhsStr == rhsStr {
   119  				continue
   120  			} else if lhsStr < rhsStr {
   121  				return true
   122  			} else {
   123  				return false
   124  			}
   125  		}
   126  
   127  		// No need to compare these two elements when they are the same.
   128  		if t[i] == other[i] {
   129  			continue
   130  		}
   131  
   132  		switch typ := lhsVal.Type().Name(); typ {
   133  		case "int", "int16", "int32", "int64":
   134  			return lhsVal.Int() < rhsVal.Int()
   135  		case "uint", "uint16", "uint32", "uint64":
   136  			return lhsVal.Uint() < rhsVal.Uint()
   137  		case "float", "float64":
   138  			return lhsVal.Float() < rhsVal.Float()
   139  		case "bool":
   140  			return lhsVal.Bool() == false && rhsVal.Bool() == true
   141  		case "string":
   142  			return lhsVal.String() < rhsVal.String()
   143  		default:
   144  			colexecerror.InternalError(fmt.Sprintf("Unhandled comparison type: %s", typ))
   145  		}
   146  	}
   147  	return false
   148  }
   149  
   150  func (t tuple) clone() tuple {
   151  	b := make(tuple, len(t))
   152  	for i := range b {
   153  		b[i] = t[i]
   154  	}
   155  
   156  	return b
   157  }
   158  
   159  // tuples represents a table with any-type columns.
   160  type tuples []tuple
   161  
   162  func (t tuples) clone() tuples {
   163  	b := make(tuples, len(t))
   164  	for i := range b {
   165  		b[i] = t[i].clone()
   166  	}
   167  	return b
   168  }
   169  
   170  func (t tuples) String() string {
   171  	var sb strings.Builder
   172  	sb.WriteString("[")
   173  	for i := range t {
   174  		if i != 0 {
   175  			sb.WriteString(", ")
   176  		}
   177  		sb.WriteString(t[i].String())
   178  	}
   179  	sb.WriteString("]")
   180  	return sb.String()
   181  }
   182  
   183  // sort returns a copy of sorted tuples.
   184  func (t tuples) sort() tuples {
   185  	b := make(tuples, len(t))
   186  	for i := range b {
   187  		b[i] = make(tuple, len(t[i]))
   188  		copy(b[i], t[i])
   189  	}
   190  	sort.SliceStable(b, func(i, j int) bool {
   191  		lhs := b[i]
   192  		rhs := b[j]
   193  		return lhs.less(rhs)
   194  	})
   195  	return b
   196  }
   197  
   198  type verifierType int
   199  
   200  const (
   201  	// orderedVerifier compares the input and output tuples, returning an error
   202  	// if they're not identical.
   203  	orderedVerifier verifierType = iota
   204  	// unorderedVerifier compares the input and output tuples as sets, returning
   205  	// an error if they aren't equal by set comparison (irrespective of order).
   206  	unorderedVerifier
   207  )
   208  
   209  type verifierFn func(output *opTestOutput) error
   210  
   211  // maybeHasNulls is a helper function that returns whether any of the columns in b
   212  // (maybe) have nulls.
   213  func maybeHasNulls(b coldata.Batch) bool {
   214  	if b.Length() == 0 {
   215  		return false
   216  	}
   217  	for i := 0; i < b.Width(); i++ {
   218  		if b.ColVec(i).MaybeHasNulls() {
   219  			return true
   220  		}
   221  	}
   222  	return false
   223  }
   224  
   225  type testRunner func(*testing.T, []tuples, [][]*types.T, tuples, interface{}, func([]colexecbase.Operator) (colexecbase.Operator, error))
   226  
   227  // variableOutputBatchSizeInitializer is implemented by operators that can be
   228  // initialized with variable output size batches. This allows runTests to
   229  // increase test coverage of these operators.
   230  type variableOutputBatchSizeInitializer interface {
   231  	initWithOutputBatchSize(int)
   232  }
   233  
   234  // runTests is a helper that automatically runs your tests with varied batch
   235  // sizes and with and without a random selection vector.
   236  // tups is the sets of input tuples.
   237  // expected is the set of output tuples.
   238  // constructor is a function that takes a list of input Operators and returns
   239  // the operator to test, or an error.
   240  func runTests(
   241  	t *testing.T,
   242  	tups []tuples,
   243  	expected tuples,
   244  	verifier interface{},
   245  	constructor func(inputs []colexecbase.Operator) (colexecbase.Operator, error),
   246  ) {
   247  	runTestsWithTyps(t, tups, nil /* typs */, expected, verifier, constructor)
   248  }
   249  
   250  // runTestsWithTyps is the same as runTests with an ability to specify the
   251  // types of the input tuples.
   252  // - typs is the type schema of the input tuples. Note that this is a multi-
   253  //   dimensional slice which allows for specifying different schemas for each
   254  //   of the inputs.
   255  func runTestsWithTyps(
   256  	t *testing.T,
   257  	tups []tuples,
   258  	typs [][]*types.T,
   259  	expected tuples,
   260  	verifier interface{},
   261  	constructor func(inputs []colexecbase.Operator) (colexecbase.Operator, error),
   262  ) {
   263  	runTestsWithoutAllNullsInjection(t, tups, typs, expected, verifier, constructor)
   264  
   265  	t.Run("allNullsInjection", func(t *testing.T) {
   266  		// This test replaces all values in the input tuples with nulls and ensures
   267  		// that the output is different from the "original" output (i.e. from the
   268  		// one that is returned without nulls injection).
   269  		onlyNullsInTheInput := true
   270  	OUTER:
   271  		for _, tup := range tups {
   272  			for i := 0; i < len(tup); i++ {
   273  				for j := 0; j < len(tup[i]); j++ {
   274  					if tup[i][j] != nil {
   275  						onlyNullsInTheInput = false
   276  						break OUTER
   277  					}
   278  				}
   279  			}
   280  		}
   281  		opConstructor := func(injectAllNulls bool) colexecbase.Operator {
   282  			inputSources := make([]colexecbase.Operator, len(tups))
   283  			var inputTypes []*types.T
   284  			for i, tup := range tups {
   285  				if typs != nil {
   286  					inputTypes = typs[i]
   287  				}
   288  				input := newOpTestInput(1 /* batchSize */, tup, inputTypes)
   289  				input.injectAllNulls = injectAllNulls
   290  				inputSources[i] = input
   291  			}
   292  			op, err := constructor(inputSources)
   293  			if err != nil {
   294  				t.Fatal(err)
   295  			}
   296  			op.Init()
   297  			return op
   298  		}
   299  		ctx := context.Background()
   300  		originalOp := opConstructor(false /* injectAllNulls */)
   301  		opWithNulls := opConstructor(true /* injectAllNulls */)
   302  		foundDifference := false
   303  		for {
   304  			originalBatch := originalOp.Next(ctx)
   305  			batchWithNulls := opWithNulls.Next(ctx)
   306  			if originalBatch.Length() != batchWithNulls.Length() {
   307  				foundDifference = true
   308  				break
   309  			}
   310  			if originalBatch.Length() == 0 {
   311  				break
   312  			}
   313  			var originalTuples, tuplesWithNulls tuples
   314  			for i := 0; i < originalBatch.Length(); i++ {
   315  				// We checked that the batches have the same length.
   316  				originalTuples = append(originalTuples, getTupleFromBatch(originalBatch, i))
   317  				tuplesWithNulls = append(tuplesWithNulls, getTupleFromBatch(batchWithNulls, i))
   318  			}
   319  			if err := assertTuplesSetsEqual(originalTuples, tuplesWithNulls); err != nil {
   320  				// err is non-nil which means that the batches are different.
   321  				foundDifference = true
   322  				break
   323  			}
   324  		}
   325  		if onlyNullsInTheInput {
   326  			require.False(t, foundDifference, "since there were only "+
   327  				"nulls in the input tuples, we expect for all nulls injection to not "+
   328  				"change the output")
   329  		} else {
   330  			require.True(t, foundDifference, "since there were "+
   331  				"non-nulls in the input tuples, we expect for all nulls injection to "+
   332  				"change the output")
   333  		}
   334  		if c, ok := originalOp.(IdempotentCloser); ok {
   335  			require.NoError(t, c.IdempotentClose(ctx))
   336  		}
   337  		if c, ok := opWithNulls.(IdempotentCloser); ok {
   338  			require.NoError(t, c.IdempotentClose(ctx))
   339  		}
   340  	})
   341  }
   342  
   343  // runTestsWithoutAllNullsInjection is the same as runTests, but it skips the
   344  // all nulls injection test. Use this only when the all nulls injection should
   345  // not change the output of the operator under testing.
   346  // NOTE: please leave a justification why you're using this variant of
   347  // runTests.
   348  func runTestsWithoutAllNullsInjection(
   349  	t *testing.T,
   350  	tups []tuples,
   351  	typs [][]*types.T,
   352  	expected tuples,
   353  	verifier interface{},
   354  	constructor func(inputs []colexecbase.Operator) (colexecbase.Operator, error),
   355  ) {
   356  	skipVerifySelAndNullsResets := true
   357  	var verifyFn verifierFn
   358  	switch v := verifier.(type) {
   359  	case verifierType:
   360  		switch v {
   361  		case orderedVerifier:
   362  			verifyFn = (*opTestOutput).Verify
   363  			// Note that this test makes sense only if we expect tuples to be
   364  			// returned in the same order (otherwise the second batch's selection
   365  			// vector or nulls info can be different and that is totally valid).
   366  			skipVerifySelAndNullsResets = false
   367  		case unorderedVerifier:
   368  			verifyFn = (*opTestOutput).VerifyAnyOrder
   369  		default:
   370  			colexecerror.InternalError(fmt.Sprintf("unexpected verifierType %d", v))
   371  		}
   372  	case verifierFn:
   373  		verifyFn = v
   374  	}
   375  	runTestsWithFn(t, tups, typs, func(t *testing.T, inputs []colexecbase.Operator) {
   376  		op, err := constructor(inputs)
   377  		if err != nil {
   378  			t.Fatal(err)
   379  		}
   380  		out := newOpTestOutput(op, expected)
   381  		if err := verifyFn(out); err != nil {
   382  			t.Fatal(err)
   383  		}
   384  	})
   385  
   386  	if !skipVerifySelAndNullsResets {
   387  		t.Run("verifySelAndNullResets", func(t *testing.T) {
   388  			// This test ensures that operators that "own their own batches", such as
   389  			// any operator that has to reshape its output, are not affected by
   390  			// downstream modification of batches.
   391  			// We run the main loop twice: once to determine what the operator would
   392  			// output on its second Next call (we need the first call to Next to get a
   393  			// reference to a batch to modify), and a second time to modify the batch
   394  			// and verify that this does not change the operator output.
   395  			// NOTE: this test makes sense only if the operator returns two non-zero
   396  			// length batches (if not, we short-circuit the test since the operator
   397  			// doesn't have to restore anything on a zero-length batch).
   398  			var (
   399  				secondBatchHasSelection, secondBatchHasNulls bool
   400  				inputTypes                                   []*types.T
   401  			)
   402  			for round := 0; round < 2; round++ {
   403  				inputSources := make([]colexecbase.Operator, len(tups))
   404  				for i, tup := range tups {
   405  					if typs != nil {
   406  						inputTypes = typs[i]
   407  					}
   408  					inputSources[i] = newOpTestInput(1 /* batchSize */, tup, inputTypes)
   409  				}
   410  				op, err := constructor(inputSources)
   411  				if err != nil {
   412  					t.Fatal(err)
   413  				}
   414  				if vbsiOp, ok := op.(variableOutputBatchSizeInitializer); ok {
   415  					// initialize the operator with a very small output batch size to
   416  					// increase the likelihood that multiple batches will be output.
   417  					vbsiOp.initWithOutputBatchSize(1)
   418  				} else {
   419  					op.Init()
   420  				}
   421  				ctx := context.Background()
   422  				b := op.Next(ctx)
   423  				if b.Length() == 0 {
   424  					return
   425  				}
   426  				if round == 1 {
   427  					if secondBatchHasSelection {
   428  						b.SetSelection(false)
   429  					} else {
   430  						b.SetSelection(true)
   431  					}
   432  					if secondBatchHasNulls {
   433  						// ResetInternalBatch will throw away the null information.
   434  						b.ResetInternalBatch()
   435  					} else {
   436  						for i := 0; i < b.Width(); i++ {
   437  							b.ColVec(i).Nulls().SetNulls()
   438  						}
   439  					}
   440  				}
   441  				b = op.Next(ctx)
   442  				if b.Length() == 0 {
   443  					return
   444  				}
   445  				if round == 0 {
   446  					secondBatchHasSelection = b.Selection() != nil
   447  					secondBatchHasNulls = maybeHasNulls(b)
   448  				}
   449  				if round == 1 {
   450  					if secondBatchHasSelection {
   451  						assert.NotNil(t, b.Selection())
   452  					} else {
   453  						assert.Nil(t, b.Selection())
   454  					}
   455  					if secondBatchHasNulls {
   456  						assert.True(t, maybeHasNulls(b))
   457  					} else {
   458  						assert.False(t, maybeHasNulls(b))
   459  					}
   460  				}
   461  				if c, ok := op.(IdempotentCloser); ok {
   462  					// Some operators need an explicit Close if not drained completely of
   463  					// input.
   464  					assert.NoError(t, c.IdempotentClose(ctx))
   465  				}
   466  			}
   467  		})
   468  	}
   469  
   470  	t.Run("randomNullsInjection", func(t *testing.T) {
   471  		// This test randomly injects nulls in the input tuples and ensures that
   472  		// the operator doesn't panic.
   473  		inputSources := make([]colexecbase.Operator, len(tups))
   474  		var inputTypes []*types.T
   475  		for i, tup := range tups {
   476  			if typs != nil {
   477  				inputTypes = typs[i]
   478  			}
   479  			input := newOpTestInput(1 /* batchSize */, tup, inputTypes)
   480  			input.injectRandomNulls = true
   481  			inputSources[i] = input
   482  		}
   483  		op, err := constructor(inputSources)
   484  		if err != nil {
   485  			t.Fatal(err)
   486  		}
   487  		op.Init()
   488  		ctx := context.Background()
   489  		for b := op.Next(ctx); b.Length() > 0; b = op.Next(ctx) {
   490  		}
   491  	})
   492  }
   493  
   494  // runTestsWithFn is like runTests, but the input function is responsible for
   495  // performing any required tests. Please note that runTestsWithFn is a worse
   496  // testing facility than runTests, because it can't get a handle on the operator
   497  // under test and therefore can't perform as many extra checks. You should
   498  // always prefer using runTests over runTestsWithFn.
   499  // - tups is the sets of input tuples.
   500  // - typs is the type schema of the input tuples. Note that this is a multi-
   501  //   dimensional slice which allows for specifying different schemas for each
   502  //   of the inputs. This can also be left nil in which case the types will be
   503  //   determined at the runtime looking at the first input tuple, and if the
   504  //   determination doesn't succeed for a value of the tuple (likely because
   505  //   it's a nil), then that column will be assumed by default of type Int64.
   506  // - test is a function that takes a list of input Operators and performs
   507  //   testing with t.
   508  func runTestsWithFn(
   509  	t *testing.T,
   510  	tups []tuples,
   511  	typs [][]*types.T,
   512  	test func(t *testing.T, inputs []colexecbase.Operator),
   513  ) {
   514  	// Run tests over batchSizes of 1, (sometimes) a batch size that is small but
   515  	// greater than 1, and a full coldata.BatchSize().
   516  	batchSizes := make([]int, 0, 3)
   517  	batchSizes = append(batchSizes, 1)
   518  	smallButGreaterThanOne := int(math.Trunc(.002 * float64(coldata.BatchSize())))
   519  	if smallButGreaterThanOne > 1 {
   520  		batchSizes = append(batchSizes, smallButGreaterThanOne)
   521  	}
   522  	batchSizes = append(batchSizes, coldata.BatchSize())
   523  
   524  	for _, batchSize := range batchSizes {
   525  		for _, useSel := range []bool{false, true} {
   526  			t.Run(fmt.Sprintf("batchSize=%d/sel=%t", batchSize, useSel), func(t *testing.T) {
   527  				inputSources := make([]colexecbase.Operator, len(tups))
   528  				var inputTypes []*types.T
   529  				if useSel {
   530  					for i, tup := range tups {
   531  						if typs != nil {
   532  							inputTypes = typs[i]
   533  						}
   534  						rng, _ := randutil.NewPseudoRand()
   535  						inputSources[i] = newOpTestSelInput(rng, batchSize, tup, inputTypes)
   536  					}
   537  				} else {
   538  					for i, tup := range tups {
   539  						if typs != nil {
   540  							inputTypes = typs[i]
   541  						}
   542  						inputSources[i] = newOpTestInput(batchSize, tup, inputTypes)
   543  					}
   544  				}
   545  				test(t, inputSources)
   546  			})
   547  		}
   548  	}
   549  }
   550  
   551  // runTestsWithFixedSel is a helper that (with a given fixed selection vector)
   552  // automatically runs your tests with varied batch sizes. Provide a test
   553  // function that takes a list of input Operators, which will give back the
   554  // tuples provided in batches.
   555  func runTestsWithFixedSel(
   556  	t *testing.T,
   557  	tups []tuples,
   558  	typs []*types.T,
   559  	sel []int,
   560  	test func(t *testing.T, inputs []colexecbase.Operator),
   561  ) {
   562  	for _, batchSize := range []int{1, 2, 3, 16, 1024} {
   563  		t.Run(fmt.Sprintf("batchSize=%d/fixedSel", batchSize), func(t *testing.T) {
   564  			inputSources := make([]colexecbase.Operator, len(tups))
   565  			for i, tup := range tups {
   566  				inputSources[i] = newOpFixedSelTestInput(sel, batchSize, tup, typs)
   567  			}
   568  			test(t, inputSources)
   569  		})
   570  	}
   571  }
   572  
   573  // setColVal is a test helper function to set the given value at the equivalent
   574  // col[idx]. This function is slow due to reflection.
   575  func setColVal(vec coldata.Vec, idx int, val interface{}) {
   576  	canonicalTypeFamily := vec.CanonicalTypeFamily()
   577  	if canonicalTypeFamily == types.BytesFamily {
   578  		var (
   579  			bytesVal []byte
   580  			ok       bool
   581  		)
   582  		bytesVal, ok = val.([]byte)
   583  		if !ok {
   584  			bytesVal = []byte(val.(string))
   585  		}
   586  		vec.Bytes().Set(idx, bytesVal)
   587  	} else if canonicalTypeFamily == types.DecimalFamily {
   588  		// setColVal is used in multiple places, therefore val can be either a float
   589  		// or apd.Decimal.
   590  		if decimalVal, ok := val.(apd.Decimal); ok {
   591  			vec.Decimal()[idx].Set(&decimalVal)
   592  		} else {
   593  			floatVal := val.(float64)
   594  			decimalVal, _, err := apd.NewFromString(fmt.Sprintf("%f", floatVal))
   595  			if err != nil {
   596  				colexecerror.InternalError(
   597  					fmt.Sprintf("unable to set decimal %f: %v", floatVal, err))
   598  			}
   599  			// .Set is used here instead of assignment to ensure the pointer address
   600  			// of the underlying storage for apd.Decimal remains the same. This can
   601  			// cause the code that does not properly use execgen package to fail.
   602  			vec.Decimal()[idx].Set(decimalVal)
   603  		}
   604  	} else if canonicalTypeFamily == typeconv.DatumVecCanonicalTypeFamily {
   605  		switch vec.Type().Family() {
   606  		case types.JsonFamily:
   607  			if jsonStr, ok := val.(string); ok {
   608  				jobj, err := json.ParseJSON(jsonStr)
   609  				if err != nil {
   610  					colexecerror.InternalError(
   611  						fmt.Sprintf("unable to parse json object: %v: %v", jobj, err))
   612  				}
   613  				vec.Datum().Set(idx, &tree.DJSON{JSON: jobj})
   614  			} else if jobj, ok := val.(json.JSON); ok {
   615  				vec.Datum().Set(idx, &tree.DJSON{JSON: jobj})
   616  			}
   617  		default:
   618  			colexecerror.InternalError(fmt.Sprintf("unexpected datum-backed type: %s", vec.Type()))
   619  		}
   620  	} else {
   621  		reflect.ValueOf(vec.Col()).Index(idx).Set(reflect.ValueOf(val).Convert(reflect.TypeOf(vec.Col()).Elem()))
   622  	}
   623  }
   624  
   625  // extrapolateTypesFromTuples determines the type schema based on the input
   626  // tuples.
   627  func extrapolateTypesFromTuples(tups tuples) []*types.T {
   628  	typs := make([]*types.T, len(tups[0]))
   629  	for i := range typs {
   630  		// Default type for test cases is Int64 in case the entire column is
   631  		// null and the type is indeterminate.
   632  		typs[i] = types.Int
   633  		for _, tup := range tups {
   634  			if tup[i] != nil {
   635  				typs[i] = typeconv.UnsafeFromGoType(tup[i])
   636  				break
   637  			}
   638  		}
   639  	}
   640  	return typs
   641  }
   642  
   643  // opTestInput is an Operator that columnarizes test input in the form of
   644  // tuples of arbitrary Go types. It's meant to be used in Operator unit tests
   645  // in conjunction with opTestOutput like the following:
   646  //
   647  // inputTuples := tuples{
   648  //   {1,2,3.3,true},
   649  //   {5,6,7.0,false},
   650  // }
   651  // tupleSource := newOpTestInput(inputTuples, types.Bool)
   652  // opUnderTest := newFooOp(tupleSource, ...)
   653  // output := newOpTestOutput(opUnderTest, expectedOutputTuples)
   654  // if err := output.Verify(); err != nil {
   655  //     t.Fatal(err)
   656  // }
   657  type opTestInput struct {
   658  	colexecbase.ZeroInputNode
   659  
   660  	typs []*types.T
   661  
   662  	batchSize int
   663  	tuples    tuples
   664  	batch     coldata.Batch
   665  	useSel    bool
   666  	rng       *rand.Rand
   667  	selection []int
   668  
   669  	// injectAllNulls determines whether opTestInput will replace all values in
   670  	// the input tuples with nulls.
   671  	injectAllNulls bool
   672  
   673  	// injectRandomNulls determines whether opTestInput will randomly replace
   674  	// each value in the input tuples with a null.
   675  	injectRandomNulls bool
   676  }
   677  
   678  var _ colexecbase.Operator = &opTestInput{}
   679  
   680  // newOpTestInput returns a new opTestInput with the given input tuples and the
   681  // given type schema. If typs is nil, the input tuples are translated into
   682  // types automatically, using simple rules (e.g. integers always become Int64).
   683  func newOpTestInput(batchSize int, tuples tuples, typs []*types.T) *opTestInput {
   684  	ret := &opTestInput{
   685  		batchSize: batchSize,
   686  		tuples:    tuples,
   687  		typs:      typs,
   688  	}
   689  	return ret
   690  }
   691  
   692  func newOpTestSelInput(rng *rand.Rand, batchSize int, tuples tuples, typs []*types.T) *opTestInput {
   693  	ret := &opTestInput{
   694  		useSel:    true,
   695  		rng:       rng,
   696  		batchSize: batchSize,
   697  		tuples:    tuples,
   698  		typs:      typs,
   699  	}
   700  	return ret
   701  }
   702  
   703  func (s *opTestInput) Init() {
   704  	if s.typs == nil {
   705  		if len(s.tuples) == 0 {
   706  			colexecerror.InternalError("empty tuple source with no specified types")
   707  		}
   708  		s.typs = extrapolateTypesFromTuples(s.tuples)
   709  	}
   710  	s.batch = testAllocator.NewMemBatch(s.typs)
   711  
   712  	s.selection = make([]int, coldata.BatchSize())
   713  	for i := range s.selection {
   714  		s.selection[i] = i
   715  	}
   716  }
   717  
   718  func (s *opTestInput) Next(context.Context) coldata.Batch {
   719  	s.batch.ResetInternalBatch()
   720  	if len(s.tuples) == 0 {
   721  		return coldata.ZeroBatch
   722  	}
   723  	batchSize := s.batchSize
   724  	if len(s.tuples) < batchSize {
   725  		batchSize = len(s.tuples)
   726  	}
   727  	tups := s.tuples[:batchSize]
   728  	s.tuples = s.tuples[batchSize:]
   729  
   730  	tupleLen := len(tups[0])
   731  	for i := range tups {
   732  		if len(tups[i]) != tupleLen {
   733  			colexecerror.InternalError(fmt.Sprintf("mismatched tuple lens: found %+v expected %d vals",
   734  				tups[i], tupleLen))
   735  		}
   736  	}
   737  
   738  	if s.useSel {
   739  		for i := range s.selection {
   740  			s.selection[i] = i
   741  		}
   742  		// We have populated s.selection vector with possibly more indices than we
   743  		// have actual tuples for, so some "default" tuples will be introduced but
   744  		// will not be selected due to the length of the batch being equal to the
   745  		// number of actual tuples.
   746  		//
   747  		// To introduce an element of chaos in the testing process we shuffle the
   748  		// selection vector; however, in the real environment we expect that
   749  		// indices in the selection vector to be in ascending order, so we sort
   750  		// only those indices that correspond to the actual tuples. For example,
   751  		// say we have 3 actual tuples, and after shuffling the selection vector
   752  		// is [200, 50, 100, ...], so we sort only those 3 values to get to
   753  		// [50, 100, 200, ...] in order to "scan" the selection vector in
   754  		// sequential order.
   755  		s.rng.Shuffle(len(s.selection), func(i, j int) {
   756  			s.selection[i], s.selection[j] = s.selection[j], s.selection[i]
   757  		})
   758  		sort.Slice(s.selection[:batchSize], func(i, j int) bool {
   759  			return s.selection[i] < s.selection[j]
   760  		})
   761  		// Any unused elements in the selection vector are set to a value larger
   762  		// than the max batch size, so the test will panic if this part of the slice
   763  		// is accidentally accessed.
   764  		for i := range s.selection[batchSize:] {
   765  			s.selection[batchSize+i] = coldata.BatchSize() + 1
   766  		}
   767  
   768  		s.batch.SetSelection(true)
   769  		copy(s.batch.Selection(), s.selection)
   770  	}
   771  
   772  	// Reset nulls for all columns in this batch.
   773  	for _, colVec := range s.batch.ColVecs() {
   774  		if colVec.CanonicalTypeFamily() != types.UnknownFamily {
   775  			colVec.Nulls().UnsetNulls()
   776  		}
   777  	}
   778  
   779  	rng := rand.New(rand.NewSource(123))
   780  
   781  	for i := range s.typs {
   782  		vec := s.batch.ColVec(i)
   783  		// Automatically convert the Go values into exec.Type slice elements using
   784  		// reflection. This is slow, but acceptable for tests.
   785  		col := reflect.ValueOf(vec.Col())
   786  		for j := 0; j < batchSize; j++ {
   787  			// If useSel is false, then the selection vector will contain
   788  			// [0, ..., batchSize] in ascending order.
   789  			outputIdx := s.selection[j]
   790  			injectRandomNull := s.injectRandomNulls && rng.Float64() < 0.5
   791  			if tups[j][i] == nil || s.injectAllNulls || injectRandomNull {
   792  				vec.Nulls().SetNull(outputIdx)
   793  				if rng.Float64() < 0.5 {
   794  					// With 50% probability we set garbage data in the value to make sure
   795  					// that it doesn't affect the computation when the value is actually
   796  					// NULL. For the other 50% of cases we leave the data unset which
   797  					// exercises other scenarios (like division by zero when the value is
   798  					// actually NULL).
   799  					canonicalTypeFamily := vec.CanonicalTypeFamily()
   800  					if canonicalTypeFamily == types.DecimalFamily {
   801  						d := apd.Decimal{}
   802  						_, err := d.SetFloat64(rng.Float64())
   803  						if err != nil {
   804  							colexecerror.InternalError(fmt.Sprintf("%v", err))
   805  						}
   806  						col.Index(outputIdx).Set(reflect.ValueOf(d))
   807  					} else if canonicalTypeFamily == types.BytesFamily {
   808  						newBytes := make([]byte, rng.Intn(16)+1)
   809  						rng.Read(newBytes)
   810  						setColVal(vec, outputIdx, newBytes)
   811  					} else if canonicalTypeFamily == typeconv.DatumVecCanonicalTypeFamily {
   812  						switch vec.Type().Family() {
   813  						case types.JsonFamily:
   814  							newBytes := make([]byte, rng.Intn(16)+1)
   815  							rng.Read(newBytes)
   816  							j := json.FromString(string(newBytes))
   817  							setColVal(vec, outputIdx, j)
   818  						default:
   819  							colexecerror.InternalError(fmt.Sprintf("unexpected datum-backed type: %s", vec.Type()))
   820  						}
   821  					} else if val, ok := quick.Value(reflect.TypeOf(vec.Col()).Elem(), rng); ok {
   822  						setColVal(vec, outputIdx, val.Interface())
   823  					} else {
   824  						colexecerror.InternalError(fmt.Sprintf("could not generate a random value of type %s", vec.Type()))
   825  					}
   826  				}
   827  			} else {
   828  				setColVal(vec, outputIdx, tups[j][i])
   829  			}
   830  		}
   831  	}
   832  
   833  	s.batch.SetLength(batchSize)
   834  	return s.batch
   835  }
   836  
   837  type opFixedSelTestInput struct {
   838  	colexecbase.ZeroInputNode
   839  
   840  	typs []*types.T
   841  
   842  	batchSize int
   843  	tuples    tuples
   844  	batch     coldata.Batch
   845  	sel       []int
   846  	// idx is the index of the tuple to be emitted next. We need to maintain it
   847  	// in case the provided selection vector or provided tuples (if sel is nil)
   848  	// is longer than requested batch size.
   849  	idx int
   850  }
   851  
   852  var _ colexecbase.Operator = &opFixedSelTestInput{}
   853  
   854  // newOpFixedSelTestInput returns a new opFixedSelTestInput with the given
   855  // input tuples and selection vector. The input tuples are translated into
   856  // types automatically, using simple rules (e.g. integers always become Int64).
   857  func newOpFixedSelTestInput(
   858  	sel []int, batchSize int, tuples tuples, typs []*types.T,
   859  ) *opFixedSelTestInput {
   860  	ret := &opFixedSelTestInput{
   861  		batchSize: batchSize,
   862  		sel:       sel,
   863  		tuples:    tuples,
   864  		typs:      typs,
   865  	}
   866  	return ret
   867  }
   868  
   869  func (s *opFixedSelTestInput) Init() {
   870  	if s.typs == nil {
   871  		if len(s.tuples) == 0 {
   872  			colexecerror.InternalError("empty tuple source with no specified types")
   873  		}
   874  		s.typs = extrapolateTypesFromTuples(s.tuples)
   875  	}
   876  
   877  	s.batch = testAllocator.NewMemBatch(s.typs)
   878  	tupleLen := len(s.tuples[0])
   879  	for _, i := range s.sel {
   880  		if len(s.tuples[i]) != tupleLen {
   881  			colexecerror.InternalError(fmt.Sprintf("mismatched tuple lens: found %+v expected %d vals",
   882  				s.tuples[i], tupleLen))
   883  		}
   884  	}
   885  
   886  	// Reset nulls for all columns in this batch.
   887  	for i := 0; i < s.batch.Width(); i++ {
   888  		s.batch.ColVec(i).Nulls().UnsetNulls()
   889  	}
   890  
   891  	if s.sel != nil {
   892  		s.batch.SetSelection(true)
   893  		// When non-nil selection vector is given, we convert all tuples into the
   894  		// Go values at once, and we'll be copying an appropriate chunk of the
   895  		// selection vector later in Next().
   896  		for i := range s.typs {
   897  			vec := s.batch.ColVec(i)
   898  			// Automatically convert the Go values into exec.Type slice elements using
   899  			// reflection. This is slow, but acceptable for tests.
   900  			for j := 0; j < len(s.tuples); j++ {
   901  				if s.tuples[j][i] == nil {
   902  					vec.Nulls().SetNull(j)
   903  				} else {
   904  					setColVal(vec, j, s.tuples[j][i])
   905  				}
   906  			}
   907  		}
   908  	}
   909  
   910  }
   911  
   912  func (s *opFixedSelTestInput) Next(context.Context) coldata.Batch {
   913  	var batchSize int
   914  	if s.sel == nil {
   915  		batchSize = s.batchSize
   916  		if len(s.tuples)-s.idx < batchSize {
   917  			batchSize = len(s.tuples) - s.idx
   918  		}
   919  		// When nil selection vector is given, we convert only the tuples that fit
   920  		// into the current batch (keeping the s.idx in mind).
   921  		for i := range s.typs {
   922  			vec := s.batch.ColVec(i)
   923  			vec.Nulls().UnsetNulls()
   924  			for j := 0; j < batchSize; j++ {
   925  				if s.tuples[s.idx+j][i] == nil {
   926  					vec.Nulls().SetNull(j)
   927  				} else {
   928  					// Automatically convert the Go values into exec.Type slice elements using
   929  					// reflection. This is slow, but acceptable for tests.
   930  					setColVal(vec, j, s.tuples[s.idx+j][i])
   931  				}
   932  			}
   933  		}
   934  	} else {
   935  		if s.idx == len(s.sel) {
   936  			return coldata.ZeroBatch
   937  		}
   938  		batchSize = s.batchSize
   939  		if len(s.sel)-s.idx < batchSize {
   940  			batchSize = len(s.sel) - s.idx
   941  		}
   942  		// All tuples have already been converted to the Go values, so we only need
   943  		// to set the right selection vector for s.batch.
   944  		copy(s.batch.Selection(), s.sel[s.idx:s.idx+batchSize])
   945  	}
   946  	s.batch.SetLength(batchSize)
   947  	s.idx += batchSize
   948  	return s.batch
   949  }
   950  
   951  // opTestOutput is a test verification struct that ensures its input batches
   952  // match some expected output tuples.
   953  type opTestOutput struct {
   954  	OneInputNode
   955  	expected tuples
   956  
   957  	curIdx int
   958  	batch  coldata.Batch
   959  }
   960  
   961  // newOpTestOutput returns a new opTestOutput, initialized with the given input
   962  // to verify that the output is exactly equal to the expected tuples.
   963  func newOpTestOutput(input colexecbase.Operator, expected tuples) *opTestOutput {
   964  	input.Init()
   965  
   966  	return &opTestOutput{
   967  		OneInputNode: NewOneInputNode(input),
   968  		expected:     expected,
   969  	}
   970  }
   971  
   972  // getTupleFromBatch is a helper function that extracts a tuple at index
   973  // tupleIdx from batch.
   974  func getTupleFromBatch(batch coldata.Batch, tupleIdx int) tuple {
   975  	ret := make(tuple, batch.Width())
   976  	out := reflect.ValueOf(ret)
   977  	if sel := batch.Selection(); sel != nil {
   978  		tupleIdx = sel[tupleIdx]
   979  	}
   980  	for colIdx := range ret {
   981  		vec := batch.ColVec(colIdx)
   982  		if vec.Nulls().NullAt(tupleIdx) {
   983  			ret[colIdx] = nil
   984  		} else {
   985  			var val reflect.Value
   986  			if colBytes, ok := vec.Col().(*coldata.Bytes); ok {
   987  				val = reflect.ValueOf(append([]byte(nil), colBytes.Get(tupleIdx)...))
   988  			} else if vec.CanonicalTypeFamily() == types.DecimalFamily {
   989  				colDec := vec.Decimal()
   990  				var newDec apd.Decimal
   991  				newDec.Set(&colDec[tupleIdx])
   992  				val = reflect.ValueOf(newDec)
   993  			} else if vec.CanonicalTypeFamily() == typeconv.DatumVecCanonicalTypeFamily {
   994  				switch vec.Type().Family() {
   995  				case types.JsonFamily:
   996  					d := vec.Datum().Get(tupleIdx).(*coldataext.Datum).Datum
   997  					if d == tree.DNull {
   998  						val = reflect.ValueOf(tree.DNull)
   999  					} else {
  1000  						val = reflect.ValueOf(d.(*tree.DJSON).JSON)
  1001  					}
  1002  				default:
  1003  					colexecerror.InternalError(fmt.Sprintf("unexpected datum-backed type: %s", vec.Type()))
  1004  				}
  1005  			} else {
  1006  				val = reflect.ValueOf(vec.Col()).Index(tupleIdx)
  1007  			}
  1008  			out.Index(colIdx).Set(val)
  1009  		}
  1010  	}
  1011  	return ret
  1012  }
  1013  
  1014  func (r *opTestOutput) next(ctx context.Context) tuple {
  1015  	if r.batch == nil || r.curIdx >= r.batch.Length() {
  1016  		// Get a fresh batch.
  1017  		r.batch = r.input.Next(ctx)
  1018  		if r.batch.Length() == 0 {
  1019  			return nil
  1020  		}
  1021  		r.curIdx = 0
  1022  	}
  1023  	ret := getTupleFromBatch(r.batch, r.curIdx)
  1024  	r.curIdx++
  1025  	return ret
  1026  }
  1027  
  1028  // Verify ensures that the input to this opTestOutput produced the same results
  1029  // and in the same order as the ones expected in the opTestOutput's expected
  1030  // tuples, using a slow, reflection-based comparison method, returning an error
  1031  // if the input isn't equal to the expected.
  1032  func (r *opTestOutput) Verify() error {
  1033  	ctx := context.Background()
  1034  	var actual tuples
  1035  	for {
  1036  		tup := r.next(ctx)
  1037  		if tup == nil {
  1038  			break
  1039  		}
  1040  		actual = append(actual, tup)
  1041  	}
  1042  	return assertTuplesOrderedEqual(r.expected, actual)
  1043  }
  1044  
  1045  // VerifyAnyOrder ensures that the input to this opTestOutput produced the same
  1046  // results but in any order (meaning set comparison behavior is used) as the
  1047  // ones expected in the opTestOutput's expected tuples, using a slow,
  1048  // reflection-based comparison method, returning an error if the input isn't
  1049  // equal to the expected.
  1050  func (r *opTestOutput) VerifyAnyOrder() error {
  1051  	ctx := context.Background()
  1052  	var actual tuples
  1053  	for {
  1054  		tup := r.next(ctx)
  1055  		if tup == nil {
  1056  			break
  1057  		}
  1058  		actual = append(actual, tup)
  1059  	}
  1060  	return assertTuplesSetsEqual(r.expected, actual)
  1061  }
  1062  
  1063  // tupleEquals checks that two tuples are equal, using a slow,
  1064  // reflection-based method to do the comparison. Reflection is used so that
  1065  // values can be compared in a type-agnostic way.
  1066  func tupleEquals(expected tuple, actual tuple) bool {
  1067  	if len(expected) != len(actual) {
  1068  		return false
  1069  	}
  1070  	for i := 0; i < len(actual); i++ {
  1071  		if expected[i] == nil || actual[i] == nil {
  1072  			if expected[i] != nil || actual[i] != nil {
  1073  				return false
  1074  			}
  1075  		} else {
  1076  			// Special case for NaN, since it does not equal itself.
  1077  			if f1, ok := expected[i].(float64); ok {
  1078  				if f2, ok := actual[i].(float64); ok {
  1079  					if math.IsNaN(f1) && math.IsNaN(f2) {
  1080  						continue
  1081  					} else if !math.IsNaN(f1) && !math.IsNaN(f2) && math.Abs(f1-f2) < 1e-6 {
  1082  						continue
  1083  					}
  1084  				}
  1085  			}
  1086  			if d1, ok := actual[i].(apd.Decimal); ok {
  1087  				if f2, ok := expected[i].(float64); ok {
  1088  					d2, _, err := apd.NewFromString(fmt.Sprintf("%f", f2))
  1089  					if err == nil && d1.Cmp(d2) == 0 {
  1090  						continue
  1091  					} else {
  1092  						return false
  1093  					}
  1094  				}
  1095  			}
  1096  			if j1, ok := actual[i].(json.JSON); ok {
  1097  				if j2, ok := expected[i].(json.JSON); ok {
  1098  					if cmp, err := j1.Compare(j2); err == nil && cmp == 0 {
  1099  						continue
  1100  					}
  1101  				} else if str2, ok := expected[i].(string); ok {
  1102  					j2, err := json.ParseJSON(str2)
  1103  					if err != nil {
  1104  						return false
  1105  					}
  1106  					if cmp, err := j1.Compare(j2); err == nil && cmp == 0 {
  1107  						continue
  1108  					}
  1109  				}
  1110  				return false
  1111  			}
  1112  			if !reflect.DeepEqual(
  1113  				reflect.ValueOf(actual[i]).Convert(reflect.TypeOf(expected[i])).Interface(),
  1114  				expected[i],
  1115  			) || !reflect.DeepEqual(
  1116  				reflect.ValueOf(expected[i]).Convert(reflect.TypeOf(actual[i])).Interface(),
  1117  				actual[i],
  1118  			) {
  1119  				return false
  1120  			}
  1121  		}
  1122  	}
  1123  	return true
  1124  }
  1125  
  1126  func makeError(expected tuples, actual tuples) error {
  1127  	var expStr, actStr strings.Builder
  1128  	for i := range expected {
  1129  		expStr.WriteString(fmt.Sprintf("%d: %s\n", i, expected[i].String()))
  1130  	}
  1131  	for i := range actual {
  1132  		actStr.WriteString(fmt.Sprintf("%d: %s\n", i, actual[i].String()))
  1133  	}
  1134  
  1135  	diff := difflib.UnifiedDiff{
  1136  		A:       difflib.SplitLines(expStr.String()),
  1137  		B:       difflib.SplitLines(actStr.String()),
  1138  		Context: 100,
  1139  	}
  1140  	text, err := difflib.GetUnifiedDiffString(diff)
  1141  	if err != nil {
  1142  		return errors.Errorf("expected didn't match actual, failed to make diff %s", err)
  1143  	}
  1144  	return errors.Errorf("expected didn't match actual. diff:\n%s", text)
  1145  }
  1146  
  1147  // assertTuplesSetsEqual asserts that two sets of tuples are equal.
  1148  func assertTuplesSetsEqual(expected tuples, actual tuples) error {
  1149  	if len(expected) != len(actual) {
  1150  		return makeError(expected, actual)
  1151  	}
  1152  	actual = actual.sort()
  1153  	expected = expected.sort()
  1154  	return assertTuplesOrderedEqual(expected, actual)
  1155  }
  1156  
  1157  // assertTuplesOrderedEqual asserts that two permutations of tuples are equal
  1158  // in order.
  1159  func assertTuplesOrderedEqual(expected tuples, actual tuples) error {
  1160  	if len(expected) != len(actual) {
  1161  		return errors.Errorf("expected %+v, actual %+v", expected, actual)
  1162  	}
  1163  	for i := range expected {
  1164  		if !tupleEquals(expected[i], actual[i]) {
  1165  			return makeError(expected, actual)
  1166  		}
  1167  	}
  1168  	return nil
  1169  }
  1170  
  1171  // finiteBatchSource is an Operator that returns the same batch a specified
  1172  // number of times.
  1173  type finiteBatchSource struct {
  1174  	colexecbase.ZeroInputNode
  1175  
  1176  	repeatableBatch *colexecbase.RepeatableBatchSource
  1177  
  1178  	usableCount int
  1179  }
  1180  
  1181  var _ colexecbase.Operator = &finiteBatchSource{}
  1182  
  1183  // newFiniteBatchSource returns a new Operator initialized to return its input
  1184  // batch a specified number of times.
  1185  func newFiniteBatchSource(
  1186  	batch coldata.Batch, typs []*types.T, usableCount int,
  1187  ) *finiteBatchSource {
  1188  	return &finiteBatchSource{
  1189  		repeatableBatch: colexecbase.NewRepeatableBatchSource(testAllocator, batch, typs),
  1190  		usableCount:     usableCount,
  1191  	}
  1192  }
  1193  
  1194  func (f *finiteBatchSource) Init() {
  1195  	f.repeatableBatch.Init()
  1196  }
  1197  
  1198  func (f *finiteBatchSource) Next(ctx context.Context) coldata.Batch {
  1199  	if f.usableCount > 0 {
  1200  		f.usableCount--
  1201  		return f.repeatableBatch.Next(ctx)
  1202  	}
  1203  	return coldata.ZeroBatch
  1204  }
  1205  
  1206  func (f *finiteBatchSource) reset(usableCount int) {
  1207  	f.usableCount = usableCount
  1208  }
  1209  
  1210  // finiteChunksSource is an Operator that returns a batch specified number of
  1211  // times. The first matchLen columns of the batch are incremented every time
  1212  // (except for the first) the batch is returned to emulate source that is
  1213  // already ordered on matchLen columns.
  1214  type finiteChunksSource struct {
  1215  	colexecbase.ZeroInputNode
  1216  	repeatableBatch *colexecbase.RepeatableBatchSource
  1217  
  1218  	usableCount int
  1219  	matchLen    int
  1220  	adjustment  []int64
  1221  }
  1222  
  1223  var _ colexecbase.Operator = &finiteChunksSource{}
  1224  
  1225  func newFiniteChunksSource(
  1226  	batch coldata.Batch, typs []*types.T, usableCount int, matchLen int,
  1227  ) *finiteChunksSource {
  1228  	return &finiteChunksSource{
  1229  		repeatableBatch: colexecbase.NewRepeatableBatchSource(testAllocator, batch, typs),
  1230  		usableCount:     usableCount,
  1231  		matchLen:        matchLen,
  1232  	}
  1233  }
  1234  
  1235  func (f *finiteChunksSource) Init() {
  1236  	f.repeatableBatch.Init()
  1237  	f.adjustment = make([]int64, f.matchLen)
  1238  }
  1239  
  1240  func (f *finiteChunksSource) Next(ctx context.Context) coldata.Batch {
  1241  	if f.usableCount > 0 {
  1242  		f.usableCount--
  1243  		batch := f.repeatableBatch.Next(ctx)
  1244  		if f.matchLen > 0 && f.adjustment[0] == 0 {
  1245  			// We need to calculate the difference between the first and the last
  1246  			// tuples in batch in first matchLen columns so that in the following
  1247  			// calls to Next() the batch is adjusted such that tuples in consecutive
  1248  			// batches are ordered on the first matchLen columns.
  1249  			for col := 0; col < f.matchLen; col++ {
  1250  				firstValue := batch.ColVec(col).Int64()[0]
  1251  				lastValue := batch.ColVec(col).Int64()[batch.Length()-1]
  1252  				f.adjustment[col] = lastValue - firstValue + 1
  1253  			}
  1254  		} else {
  1255  			for i := 0; i < f.matchLen; i++ {
  1256  				int64Vec := batch.ColVec(i).Int64()
  1257  				for j := range int64Vec {
  1258  					int64Vec[j] += f.adjustment[i]
  1259  				}
  1260  				// We need to update the adjustments because RepeatableBatchSource
  1261  				// returns the original batch that it was instantiated with, and we
  1262  				// want to have constantly non-decreasing vectors.
  1263  				firstValue := batch.ColVec(i).Int64()[0]
  1264  				lastValue := batch.ColVec(i).Int64()[batch.Length()-1]
  1265  				f.adjustment[i] += lastValue - firstValue + 1
  1266  			}
  1267  		}
  1268  		return batch
  1269  	}
  1270  	return coldata.ZeroBatch
  1271  }
  1272  
  1273  func TestOpTestInputOutput(t *testing.T) {
  1274  	defer leaktest.AfterTest(t)()
  1275  	inputs := []tuples{
  1276  		{
  1277  			{1, 2, 100},
  1278  			{1, 3, -3},
  1279  			{0, 4, 5},
  1280  			{1, 5, 0},
  1281  		},
  1282  	}
  1283  	runTestsWithFn(t, inputs, nil /* typs */, func(t *testing.T, sources []colexecbase.Operator) {
  1284  		out := newOpTestOutput(sources[0], inputs[0])
  1285  
  1286  		if err := out.Verify(); err != nil {
  1287  			t.Fatal(err)
  1288  		}
  1289  	})
  1290  }
  1291  
  1292  func TestRepeatableBatchSource(t *testing.T) {
  1293  	defer leaktest.AfterTest(t)()
  1294  	typs := []*types.T{types.Int}
  1295  	batch := testAllocator.NewMemBatch(typs)
  1296  	batchLen := 10
  1297  	if coldata.BatchSize() < batchLen {
  1298  		batchLen = coldata.BatchSize()
  1299  	}
  1300  	batch.SetLength(batchLen)
  1301  	input := colexecbase.NewRepeatableBatchSource(testAllocator, batch, typs)
  1302  
  1303  	b := input.Next(context.Background())
  1304  	b.SetLength(0)
  1305  	b.SetSelection(true)
  1306  
  1307  	b = input.Next(context.Background())
  1308  	if b.Length() != batchLen {
  1309  		t.Fatalf("expected RepeatableBatchSource to reset batch length to %d, found %d", batchLen, b.Length())
  1310  	}
  1311  	if b.Selection() != nil {
  1312  		t.Fatalf("expected RepeatableBatchSource to reset selection vector, found %+v", b.Selection())
  1313  	}
  1314  }
  1315  
  1316  func TestRepeatableBatchSourceWithFixedSel(t *testing.T) {
  1317  	defer leaktest.AfterTest(t)()
  1318  	typs := []*types.T{types.Int}
  1319  	batch := testAllocator.NewMemBatch(typs)
  1320  	rng, _ := randutil.NewPseudoRand()
  1321  	batchSize := 10
  1322  	if batchSize > coldata.BatchSize() {
  1323  		batchSize = coldata.BatchSize()
  1324  	}
  1325  	sel := coldatatestutils.RandomSel(rng, batchSize, 0 /* probOfOmitting */)
  1326  	batchLen := len(sel)
  1327  	batch.SetLength(batchLen)
  1328  	batch.SetSelection(true)
  1329  	copy(batch.Selection(), sel)
  1330  	input := colexecbase.NewRepeatableBatchSource(testAllocator, batch, typs)
  1331  	b := input.Next(context.Background())
  1332  
  1333  	b.SetLength(0)
  1334  	b.SetSelection(false)
  1335  	b = input.Next(context.Background())
  1336  	if b.Length() != batchLen {
  1337  		t.Fatalf("expected RepeatableBatchSource to reset batch length to %d, found %d", batchLen, b.Length())
  1338  	}
  1339  	if b.Selection() == nil {
  1340  		t.Fatalf("expected RepeatableBatchSource to reset selection vector, expected %v but found %+v", sel, b.Selection())
  1341  	} else {
  1342  		for i := 0; i < batchLen; i++ {
  1343  			if b.Selection()[i] != sel[i] {
  1344  				t.Fatalf("expected RepeatableBatchSource to reset selection vector, expected %v but found %+v", sel, b.Selection())
  1345  			}
  1346  		}
  1347  	}
  1348  
  1349  	newSel := coldatatestutils.RandomSel(rng, 10 /* batchSize */, 0.2 /* probOfOmitting */)
  1350  	newBatchLen := len(sel)
  1351  	b.SetLength(newBatchLen)
  1352  	b.SetSelection(true)
  1353  	copy(b.Selection(), newSel)
  1354  	b = input.Next(context.Background())
  1355  	if b.Length() != batchLen {
  1356  		t.Fatalf("expected RepeatableBatchSource to reset batch length to %d, found %d", batchLen, b.Length())
  1357  	}
  1358  	if b.Selection() == nil {
  1359  		t.Fatalf("expected RepeatableBatchSource to reset selection vector, expected %v but found %+v", sel, b.Selection())
  1360  	} else {
  1361  		for i := 0; i < batchLen; i++ {
  1362  			if b.Selection()[i] != sel[i] {
  1363  				t.Fatalf("expected RepeatableBatchSource to reset selection vector, expected %v but found %+v", sel, b.Selection())
  1364  			}
  1365  		}
  1366  	}
  1367  }
  1368  
  1369  // chunkingBatchSource is a batch source that takes unlimited-size columns and
  1370  // chunks them into BatchSize()-sized chunks when Nexted.
  1371  type chunkingBatchSource struct {
  1372  	colexecbase.ZeroInputNode
  1373  	typs []*types.T
  1374  	cols []coldata.Vec
  1375  	len  int
  1376  
  1377  	curIdx int
  1378  	batch  coldata.Batch
  1379  }
  1380  
  1381  var _ colexecbase.Operator = &chunkingBatchSource{}
  1382  
  1383  // newChunkingBatchSource returns a new chunkingBatchSource with the given
  1384  // column types, columns, and length.
  1385  func newChunkingBatchSource(typs []*types.T, cols []coldata.Vec, len int) *chunkingBatchSource {
  1386  	return &chunkingBatchSource{
  1387  		typs: typs,
  1388  		cols: cols,
  1389  		len:  len,
  1390  	}
  1391  }
  1392  
  1393  func (c *chunkingBatchSource) Init() {
  1394  	c.batch = testAllocator.NewMemBatch(c.typs)
  1395  	for i := range c.cols {
  1396  		c.batch.ColVec(i).SetCol(c.cols[i].Col())
  1397  		c.batch.ColVec(i).SetNulls(c.cols[i].Nulls())
  1398  	}
  1399  }
  1400  
  1401  func (c *chunkingBatchSource) Next(context.Context) coldata.Batch {
  1402  	if c.curIdx >= c.len {
  1403  		return coldata.ZeroBatch
  1404  	}
  1405  	// Explicitly set to false since this could be modified by the downstream
  1406  	// operators. This is sufficient because both the vectors and the nulls are
  1407  	// explicitly set below. ResetInternalBatch cannot be used here because we're
  1408  	// operating on Windows into the vectors.
  1409  	c.batch.SetSelection(false)
  1410  	lastIdx := c.curIdx + coldata.BatchSize()
  1411  	if lastIdx > c.len {
  1412  		lastIdx = c.len
  1413  	}
  1414  	for i, vec := range c.batch.ColVecs() {
  1415  		vec.SetCol(c.cols[i].Window(c.curIdx, lastIdx).Col())
  1416  		nullsSlice := c.cols[i].Nulls().Slice(c.curIdx, lastIdx)
  1417  		vec.SetNulls(&nullsSlice)
  1418  	}
  1419  	c.batch.SetLength(lastIdx - c.curIdx)
  1420  	c.curIdx = lastIdx
  1421  	return c.batch
  1422  }
  1423  
  1424  func (c *chunkingBatchSource) reset() {
  1425  	c.curIdx = 0
  1426  }
  1427  
  1428  // joinTestCase is a helper struct shared by the hash and merge join unit
  1429  // tests. Not all fields have to be filled in, but init() method *must* be
  1430  // called.
  1431  type joinTestCase struct {
  1432  	description           string
  1433  	joinType              sqlbase.JoinType
  1434  	leftTuples            tuples
  1435  	leftTypes             []*types.T
  1436  	leftOutCols           []uint32
  1437  	leftEqCols            []uint32
  1438  	leftDirections        []execinfrapb.Ordering_Column_Direction
  1439  	rightTuples           tuples
  1440  	rightTypes            []*types.T
  1441  	rightOutCols          []uint32
  1442  	rightEqCols           []uint32
  1443  	rightDirections       []execinfrapb.Ordering_Column_Direction
  1444  	leftEqColsAreKey      bool
  1445  	rightEqColsAreKey     bool
  1446  	expected              tuples
  1447  	outputBatchSize       int
  1448  	skipAllNullsInjection bool
  1449  	onExpr                execinfrapb.Expression
  1450  }
  1451  
  1452  func (tc *joinTestCase) init() {
  1453  	if tc.outputBatchSize == 0 {
  1454  		tc.outputBatchSize = coldata.BatchSize()
  1455  	}
  1456  
  1457  	if len(tc.leftDirections) == 0 {
  1458  		tc.leftDirections = make([]execinfrapb.Ordering_Column_Direction, len(tc.leftTypes))
  1459  		for i := range tc.leftDirections {
  1460  			tc.leftDirections[i] = execinfrapb.Ordering_Column_ASC
  1461  		}
  1462  	}
  1463  
  1464  	if len(tc.rightDirections) == 0 {
  1465  		tc.rightDirections = make([]execinfrapb.Ordering_Column_Direction, len(tc.rightTypes))
  1466  		for i := range tc.rightDirections {
  1467  			tc.rightDirections[i] = execinfrapb.Ordering_Column_ASC
  1468  		}
  1469  	}
  1470  }
  1471  
  1472  // mutateTypes returns a slice of joinTestCases with varied types. Assumes
  1473  // the input is made up of just int64s. Calling this
  1474  func (tc *joinTestCase) mutateTypes() []*joinTestCase {
  1475  	ret := []*joinTestCase{tc}
  1476  
  1477  	for _, typ := range []*types.T{types.Decimal, types.Bytes} {
  1478  		if typ.Identical(types.Bytes) {
  1479  			// Skip test cases with ON conditions for now, since those expect
  1480  			// numeric inputs.
  1481  			if !tc.onExpr.Empty() {
  1482  				continue
  1483  			}
  1484  		}
  1485  		newTc := *tc
  1486  		newTc.leftTypes = make([]*types.T, len(tc.leftTypes))
  1487  		newTc.rightTypes = make([]*types.T, len(tc.rightTypes))
  1488  		copy(newTc.leftTypes, tc.leftTypes)
  1489  		copy(newTc.rightTypes, tc.rightTypes)
  1490  		for _, typs := range [][]*types.T{newTc.leftTypes, newTc.rightTypes} {
  1491  			for i := range typs {
  1492  				if !typ.Identical(types.Int) {
  1493  					// We currently can only mutate test cases that are made up of int64
  1494  					// only.
  1495  					return ret
  1496  				}
  1497  				typs[i] = typ
  1498  			}
  1499  		}
  1500  		newTc.leftTuples = tc.leftTuples.clone()
  1501  		newTc.rightTuples = tc.rightTuples.clone()
  1502  		newTc.expected = tc.expected.clone()
  1503  
  1504  		for _, tups := range []tuples{newTc.leftTuples, newTc.rightTuples, newTc.expected} {
  1505  			for i := range tups {
  1506  				for j := range tups[i] {
  1507  					if tups[i][j] == nil {
  1508  						continue
  1509  					}
  1510  					switch typeconv.TypeFamilyToCanonicalTypeFamily(typ.Family()) {
  1511  					case types.DecimalFamily:
  1512  						var d apd.Decimal
  1513  						_, _ = d.SetFloat64(float64(tups[i][j].(int)))
  1514  						tups[i][j] = d
  1515  					case types.BytesFamily:
  1516  						tups[i][j] = fmt.Sprintf("%.10d", tups[i][j].(int))
  1517  					}
  1518  				}
  1519  			}
  1520  		}
  1521  		ret = append(ret, &newTc)
  1522  	}
  1523  	return ret
  1524  }
  1525  
  1526  type sortTestCase struct {
  1527  	description string
  1528  	tuples      tuples
  1529  	expected    tuples
  1530  	typs        []*types.T
  1531  	ordCols     []execinfrapb.Ordering_Column
  1532  	matchLen    int
  1533  	k           int
  1534  }
  1535  
  1536  // Mock typing context for the typechecker.
  1537  type mockTypeContext struct {
  1538  	typs []*types.T
  1539  }
  1540  
  1541  func (p *mockTypeContext) IndexedVarEval(idx int, ctx *tree.EvalContext) (tree.Datum, error) {
  1542  	return tree.DNull.Eval(ctx)
  1543  }
  1544  
  1545  func (p *mockTypeContext) IndexedVarResolvedType(idx int) *types.T {
  1546  	return p.typs[idx]
  1547  }
  1548  
  1549  func (p *mockTypeContext) IndexedVarNodeFormatter(idx int) tree.NodeFormatter {
  1550  	n := tree.Name(fmt.Sprintf("$%d", idx))
  1551  	return &n
  1552  }
  1553  
  1554  // createTestProjectingOperator creates a projecting operator that performs
  1555  // projectingExpr on input that has inputTypes as its output columns. It does
  1556  // so by making a noop processor core with post-processing step that passes
  1557  // through all input columns and renders an additional column using
  1558  // projectingExpr to create the render; then, the processor core is used to
  1559  // plan all necessary infrastructure using NewColOperator call.
  1560  // - canFallbackToRowexec determines whether NewColOperator will be able to use
  1561  // rowexec.NewProcessor to instantiate a wrapped rowexec processor. This should
  1562  // be false unless we expect that for some unit tests we will not be able to
  1563  // plan the "pure" vectorized operators.
  1564  func createTestProjectingOperator(
  1565  	ctx context.Context,
  1566  	flowCtx *execinfra.FlowCtx,
  1567  	input colexecbase.Operator,
  1568  	inputTypes []*types.T,
  1569  	projectingExpr string,
  1570  	canFallbackToRowexec bool,
  1571  ) (colexecbase.Operator, error) {
  1572  	expr, err := parser.ParseExpr(projectingExpr)
  1573  	if err != nil {
  1574  		return nil, err
  1575  	}
  1576  	p := &mockTypeContext{typs: inputTypes}
  1577  	semaCtx := tree.MakeSemaContext()
  1578  	semaCtx.IVarContainer = p
  1579  	typedExpr, err := tree.TypeCheck(ctx, expr, &semaCtx, types.Any)
  1580  	if err != nil {
  1581  		return nil, err
  1582  	}
  1583  	renderExprs := make([]execinfrapb.Expression, len(inputTypes)+1)
  1584  	for i := range inputTypes {
  1585  		renderExprs[i].Expr = fmt.Sprintf("@%d", i+1)
  1586  	}
  1587  	renderExprs[len(inputTypes)].LocalExpr = typedExpr
  1588  	spec := &execinfrapb.ProcessorSpec{
  1589  		Input: []execinfrapb.InputSyncSpec{{ColumnTypes: inputTypes}},
  1590  		Core: execinfrapb.ProcessorCoreUnion{
  1591  			Noop: &execinfrapb.NoopCoreSpec{},
  1592  		},
  1593  		Post: execinfrapb.PostProcessSpec{
  1594  			RenderExprs: renderExprs,
  1595  		},
  1596  	}
  1597  	args := NewColOperatorArgs{
  1598  		Spec:                spec,
  1599  		Inputs:              []colexecbase.Operator{input},
  1600  		StreamingMemAccount: testMemAcc,
  1601  	}
  1602  	if canFallbackToRowexec {
  1603  		args.ProcessorConstructor = rowexec.NewProcessor
  1604  	} else {
  1605  		// It is possible that there is a valid projecting operator with the
  1606  		// given input types, but the vectorized engine doesn't support it. In
  1607  		// such case in the production code we fall back to row-by-row engine,
  1608  		// but the caller of this method doesn't want such behavior. In order
  1609  		// to avoid a nil-pointer exception we mock out the processor
  1610  		// constructor.
  1611  		args.ProcessorConstructor = func(
  1612  			context.Context, *execinfra.FlowCtx, int32,
  1613  			*execinfrapb.ProcessorCoreUnion, *execinfrapb.PostProcessSpec,
  1614  			[]execinfra.RowSource, []execinfra.RowReceiver,
  1615  			[]execinfra.LocalProcessor) (execinfra.Processor, error) {
  1616  			return nil, errors.Errorf("fallback to rowexec is disabled")
  1617  		}
  1618  	}
  1619  	args.TestingKnobs.UseStreamingMemAccountForBuffering = true
  1620  	result, err := NewColOperator(ctx, flowCtx, args)
  1621  	if err != nil {
  1622  		return nil, err
  1623  	}
  1624  	return result.Op, nil
  1625  }