github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/utils_test.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package colexec 12 13 import ( 14 "context" 15 "fmt" 16 "math" 17 "math/rand" 18 "reflect" 19 "sort" 20 "strings" 21 "testing" 22 "testing/quick" 23 24 "github.com/cockroachdb/apd" 25 "github.com/cockroachdb/cockroach/pkg/col/coldata" 26 "github.com/cockroachdb/cockroach/pkg/col/coldataext" 27 "github.com/cockroachdb/cockroach/pkg/col/coldatatestutils" 28 "github.com/cockroachdb/cockroach/pkg/col/typeconv" 29 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase" 30 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror" 31 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 32 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 33 "github.com/cockroachdb/cockroach/pkg/sql/parser" 34 "github.com/cockroachdb/cockroach/pkg/sql/rowexec" 35 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 36 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 37 "github.com/cockroachdb/cockroach/pkg/sql/types" 38 "github.com/cockroachdb/cockroach/pkg/util/json" 39 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 40 "github.com/cockroachdb/cockroach/pkg/util/randutil" 41 "github.com/cockroachdb/errors" 42 "github.com/pmezard/go-difflib/difflib" 43 "github.com/stretchr/testify/assert" 44 "github.com/stretchr/testify/require" 45 ) 46 47 // tuple represents a row with any-type columns. 48 type tuple []interface{} 49 50 func (t tuple) String() string { 51 var sb strings.Builder 52 sb.WriteString("[") 53 for i := range t { 54 if i != 0 { 55 sb.WriteString(", ") 56 } 57 if d, ok := t[i].(apd.Decimal); ok { 58 sb.WriteString(d.String()) 59 } else if d, ok := t[i].(*apd.Decimal); ok { 60 sb.WriteString(d.String()) 61 } else if d, ok := t[i].([]byte); ok { 62 sb.WriteString(string(d)) 63 } else { 64 sb.WriteString(fmt.Sprintf("%v", t[i])) 65 } 66 } 67 sb.WriteString("]") 68 return sb.String() 69 } 70 71 func (t tuple) less(other tuple) bool { 72 for i := range t { 73 // If either side is nil, we short circuit the comparison. For nil, we 74 // define: nil < {any_none_nil} 75 if t[i] == nil && other[i] == nil { 76 continue 77 } else if t[i] == nil && other[i] != nil { 78 return true 79 } else if t[i] != nil && other[i] == nil { 80 return false 81 } 82 83 lhsVal := reflect.ValueOf(t[i]) 84 rhsVal := reflect.ValueOf(other[i]) 85 86 // apd.Decimal are not comparable, so we check that first. 87 if lhsVal.Type().Name() == "Decimal" && lhsVal.CanInterface() { 88 lhsDecimal := lhsVal.Interface().(apd.Decimal) 89 rhsDecimal := rhsVal.Interface().(apd.Decimal) 90 cmp := (&lhsDecimal).CmpTotal(&rhsDecimal) 91 if cmp == 0 { 92 continue 93 } else if cmp == -1 { 94 return true 95 } else { 96 return false 97 } 98 } 99 100 // Since the expected values are provided as strings, we convert the json 101 // values here to strings so we can use the string lexical ordering. This is 102 // because json orders certain values differently (e.g. null) compared to 103 // string. 104 if strings.Contains(lhsVal.Type().String(), "json") { 105 lhsStr := lhsVal.Interface().(fmt.Stringer).String() 106 rhsStr := rhsVal.Interface().(fmt.Stringer).String() 107 if lhsStr == rhsStr { 108 continue 109 } else { 110 return lhsStr < rhsStr 111 } 112 } 113 114 // types.Bytes is represented as []uint8. 115 if lhsVal.Type().String() == "[]uint8" { 116 lhsStr := string(lhsVal.Interface().([]uint8)) 117 rhsStr := string(rhsVal.Interface().([]uint8)) 118 if lhsStr == rhsStr { 119 continue 120 } else if lhsStr < rhsStr { 121 return true 122 } else { 123 return false 124 } 125 } 126 127 // No need to compare these two elements when they are the same. 128 if t[i] == other[i] { 129 continue 130 } 131 132 switch typ := lhsVal.Type().Name(); typ { 133 case "int", "int16", "int32", "int64": 134 return lhsVal.Int() < rhsVal.Int() 135 case "uint", "uint16", "uint32", "uint64": 136 return lhsVal.Uint() < rhsVal.Uint() 137 case "float", "float64": 138 return lhsVal.Float() < rhsVal.Float() 139 case "bool": 140 return lhsVal.Bool() == false && rhsVal.Bool() == true 141 case "string": 142 return lhsVal.String() < rhsVal.String() 143 default: 144 colexecerror.InternalError(fmt.Sprintf("Unhandled comparison type: %s", typ)) 145 } 146 } 147 return false 148 } 149 150 func (t tuple) clone() tuple { 151 b := make(tuple, len(t)) 152 for i := range b { 153 b[i] = t[i] 154 } 155 156 return b 157 } 158 159 // tuples represents a table with any-type columns. 160 type tuples []tuple 161 162 func (t tuples) clone() tuples { 163 b := make(tuples, len(t)) 164 for i := range b { 165 b[i] = t[i].clone() 166 } 167 return b 168 } 169 170 func (t tuples) String() string { 171 var sb strings.Builder 172 sb.WriteString("[") 173 for i := range t { 174 if i != 0 { 175 sb.WriteString(", ") 176 } 177 sb.WriteString(t[i].String()) 178 } 179 sb.WriteString("]") 180 return sb.String() 181 } 182 183 // sort returns a copy of sorted tuples. 184 func (t tuples) sort() tuples { 185 b := make(tuples, len(t)) 186 for i := range b { 187 b[i] = make(tuple, len(t[i])) 188 copy(b[i], t[i]) 189 } 190 sort.SliceStable(b, func(i, j int) bool { 191 lhs := b[i] 192 rhs := b[j] 193 return lhs.less(rhs) 194 }) 195 return b 196 } 197 198 type verifierType int 199 200 const ( 201 // orderedVerifier compares the input and output tuples, returning an error 202 // if they're not identical. 203 orderedVerifier verifierType = iota 204 // unorderedVerifier compares the input and output tuples as sets, returning 205 // an error if they aren't equal by set comparison (irrespective of order). 206 unorderedVerifier 207 ) 208 209 type verifierFn func(output *opTestOutput) error 210 211 // maybeHasNulls is a helper function that returns whether any of the columns in b 212 // (maybe) have nulls. 213 func maybeHasNulls(b coldata.Batch) bool { 214 if b.Length() == 0 { 215 return false 216 } 217 for i := 0; i < b.Width(); i++ { 218 if b.ColVec(i).MaybeHasNulls() { 219 return true 220 } 221 } 222 return false 223 } 224 225 type testRunner func(*testing.T, []tuples, [][]*types.T, tuples, interface{}, func([]colexecbase.Operator) (colexecbase.Operator, error)) 226 227 // variableOutputBatchSizeInitializer is implemented by operators that can be 228 // initialized with variable output size batches. This allows runTests to 229 // increase test coverage of these operators. 230 type variableOutputBatchSizeInitializer interface { 231 initWithOutputBatchSize(int) 232 } 233 234 // runTests is a helper that automatically runs your tests with varied batch 235 // sizes and with and without a random selection vector. 236 // tups is the sets of input tuples. 237 // expected is the set of output tuples. 238 // constructor is a function that takes a list of input Operators and returns 239 // the operator to test, or an error. 240 func runTests( 241 t *testing.T, 242 tups []tuples, 243 expected tuples, 244 verifier interface{}, 245 constructor func(inputs []colexecbase.Operator) (colexecbase.Operator, error), 246 ) { 247 runTestsWithTyps(t, tups, nil /* typs */, expected, verifier, constructor) 248 } 249 250 // runTestsWithTyps is the same as runTests with an ability to specify the 251 // types of the input tuples. 252 // - typs is the type schema of the input tuples. Note that this is a multi- 253 // dimensional slice which allows for specifying different schemas for each 254 // of the inputs. 255 func runTestsWithTyps( 256 t *testing.T, 257 tups []tuples, 258 typs [][]*types.T, 259 expected tuples, 260 verifier interface{}, 261 constructor func(inputs []colexecbase.Operator) (colexecbase.Operator, error), 262 ) { 263 runTestsWithoutAllNullsInjection(t, tups, typs, expected, verifier, constructor) 264 265 t.Run("allNullsInjection", func(t *testing.T) { 266 // This test replaces all values in the input tuples with nulls and ensures 267 // that the output is different from the "original" output (i.e. from the 268 // one that is returned without nulls injection). 269 onlyNullsInTheInput := true 270 OUTER: 271 for _, tup := range tups { 272 for i := 0; i < len(tup); i++ { 273 for j := 0; j < len(tup[i]); j++ { 274 if tup[i][j] != nil { 275 onlyNullsInTheInput = false 276 break OUTER 277 } 278 } 279 } 280 } 281 opConstructor := func(injectAllNulls bool) colexecbase.Operator { 282 inputSources := make([]colexecbase.Operator, len(tups)) 283 var inputTypes []*types.T 284 for i, tup := range tups { 285 if typs != nil { 286 inputTypes = typs[i] 287 } 288 input := newOpTestInput(1 /* batchSize */, tup, inputTypes) 289 input.injectAllNulls = injectAllNulls 290 inputSources[i] = input 291 } 292 op, err := constructor(inputSources) 293 if err != nil { 294 t.Fatal(err) 295 } 296 op.Init() 297 return op 298 } 299 ctx := context.Background() 300 originalOp := opConstructor(false /* injectAllNulls */) 301 opWithNulls := opConstructor(true /* injectAllNulls */) 302 foundDifference := false 303 for { 304 originalBatch := originalOp.Next(ctx) 305 batchWithNulls := opWithNulls.Next(ctx) 306 if originalBatch.Length() != batchWithNulls.Length() { 307 foundDifference = true 308 break 309 } 310 if originalBatch.Length() == 0 { 311 break 312 } 313 var originalTuples, tuplesWithNulls tuples 314 for i := 0; i < originalBatch.Length(); i++ { 315 // We checked that the batches have the same length. 316 originalTuples = append(originalTuples, getTupleFromBatch(originalBatch, i)) 317 tuplesWithNulls = append(tuplesWithNulls, getTupleFromBatch(batchWithNulls, i)) 318 } 319 if err := assertTuplesSetsEqual(originalTuples, tuplesWithNulls); err != nil { 320 // err is non-nil which means that the batches are different. 321 foundDifference = true 322 break 323 } 324 } 325 if onlyNullsInTheInput { 326 require.False(t, foundDifference, "since there were only "+ 327 "nulls in the input tuples, we expect for all nulls injection to not "+ 328 "change the output") 329 } else { 330 require.True(t, foundDifference, "since there were "+ 331 "non-nulls in the input tuples, we expect for all nulls injection to "+ 332 "change the output") 333 } 334 if c, ok := originalOp.(IdempotentCloser); ok { 335 require.NoError(t, c.IdempotentClose(ctx)) 336 } 337 if c, ok := opWithNulls.(IdempotentCloser); ok { 338 require.NoError(t, c.IdempotentClose(ctx)) 339 } 340 }) 341 } 342 343 // runTestsWithoutAllNullsInjection is the same as runTests, but it skips the 344 // all nulls injection test. Use this only when the all nulls injection should 345 // not change the output of the operator under testing. 346 // NOTE: please leave a justification why you're using this variant of 347 // runTests. 348 func runTestsWithoutAllNullsInjection( 349 t *testing.T, 350 tups []tuples, 351 typs [][]*types.T, 352 expected tuples, 353 verifier interface{}, 354 constructor func(inputs []colexecbase.Operator) (colexecbase.Operator, error), 355 ) { 356 skipVerifySelAndNullsResets := true 357 var verifyFn verifierFn 358 switch v := verifier.(type) { 359 case verifierType: 360 switch v { 361 case orderedVerifier: 362 verifyFn = (*opTestOutput).Verify 363 // Note that this test makes sense only if we expect tuples to be 364 // returned in the same order (otherwise the second batch's selection 365 // vector or nulls info can be different and that is totally valid). 366 skipVerifySelAndNullsResets = false 367 case unorderedVerifier: 368 verifyFn = (*opTestOutput).VerifyAnyOrder 369 default: 370 colexecerror.InternalError(fmt.Sprintf("unexpected verifierType %d", v)) 371 } 372 case verifierFn: 373 verifyFn = v 374 } 375 runTestsWithFn(t, tups, typs, func(t *testing.T, inputs []colexecbase.Operator) { 376 op, err := constructor(inputs) 377 if err != nil { 378 t.Fatal(err) 379 } 380 out := newOpTestOutput(op, expected) 381 if err := verifyFn(out); err != nil { 382 t.Fatal(err) 383 } 384 }) 385 386 if !skipVerifySelAndNullsResets { 387 t.Run("verifySelAndNullResets", func(t *testing.T) { 388 // This test ensures that operators that "own their own batches", such as 389 // any operator that has to reshape its output, are not affected by 390 // downstream modification of batches. 391 // We run the main loop twice: once to determine what the operator would 392 // output on its second Next call (we need the first call to Next to get a 393 // reference to a batch to modify), and a second time to modify the batch 394 // and verify that this does not change the operator output. 395 // NOTE: this test makes sense only if the operator returns two non-zero 396 // length batches (if not, we short-circuit the test since the operator 397 // doesn't have to restore anything on a zero-length batch). 398 var ( 399 secondBatchHasSelection, secondBatchHasNulls bool 400 inputTypes []*types.T 401 ) 402 for round := 0; round < 2; round++ { 403 inputSources := make([]colexecbase.Operator, len(tups)) 404 for i, tup := range tups { 405 if typs != nil { 406 inputTypes = typs[i] 407 } 408 inputSources[i] = newOpTestInput(1 /* batchSize */, tup, inputTypes) 409 } 410 op, err := constructor(inputSources) 411 if err != nil { 412 t.Fatal(err) 413 } 414 if vbsiOp, ok := op.(variableOutputBatchSizeInitializer); ok { 415 // initialize the operator with a very small output batch size to 416 // increase the likelihood that multiple batches will be output. 417 vbsiOp.initWithOutputBatchSize(1) 418 } else { 419 op.Init() 420 } 421 ctx := context.Background() 422 b := op.Next(ctx) 423 if b.Length() == 0 { 424 return 425 } 426 if round == 1 { 427 if secondBatchHasSelection { 428 b.SetSelection(false) 429 } else { 430 b.SetSelection(true) 431 } 432 if secondBatchHasNulls { 433 // ResetInternalBatch will throw away the null information. 434 b.ResetInternalBatch() 435 } else { 436 for i := 0; i < b.Width(); i++ { 437 b.ColVec(i).Nulls().SetNulls() 438 } 439 } 440 } 441 b = op.Next(ctx) 442 if b.Length() == 0 { 443 return 444 } 445 if round == 0 { 446 secondBatchHasSelection = b.Selection() != nil 447 secondBatchHasNulls = maybeHasNulls(b) 448 } 449 if round == 1 { 450 if secondBatchHasSelection { 451 assert.NotNil(t, b.Selection()) 452 } else { 453 assert.Nil(t, b.Selection()) 454 } 455 if secondBatchHasNulls { 456 assert.True(t, maybeHasNulls(b)) 457 } else { 458 assert.False(t, maybeHasNulls(b)) 459 } 460 } 461 if c, ok := op.(IdempotentCloser); ok { 462 // Some operators need an explicit Close if not drained completely of 463 // input. 464 assert.NoError(t, c.IdempotentClose(ctx)) 465 } 466 } 467 }) 468 } 469 470 t.Run("randomNullsInjection", func(t *testing.T) { 471 // This test randomly injects nulls in the input tuples and ensures that 472 // the operator doesn't panic. 473 inputSources := make([]colexecbase.Operator, len(tups)) 474 var inputTypes []*types.T 475 for i, tup := range tups { 476 if typs != nil { 477 inputTypes = typs[i] 478 } 479 input := newOpTestInput(1 /* batchSize */, tup, inputTypes) 480 input.injectRandomNulls = true 481 inputSources[i] = input 482 } 483 op, err := constructor(inputSources) 484 if err != nil { 485 t.Fatal(err) 486 } 487 op.Init() 488 ctx := context.Background() 489 for b := op.Next(ctx); b.Length() > 0; b = op.Next(ctx) { 490 } 491 }) 492 } 493 494 // runTestsWithFn is like runTests, but the input function is responsible for 495 // performing any required tests. Please note that runTestsWithFn is a worse 496 // testing facility than runTests, because it can't get a handle on the operator 497 // under test and therefore can't perform as many extra checks. You should 498 // always prefer using runTests over runTestsWithFn. 499 // - tups is the sets of input tuples. 500 // - typs is the type schema of the input tuples. Note that this is a multi- 501 // dimensional slice which allows for specifying different schemas for each 502 // of the inputs. This can also be left nil in which case the types will be 503 // determined at the runtime looking at the first input tuple, and if the 504 // determination doesn't succeed for a value of the tuple (likely because 505 // it's a nil), then that column will be assumed by default of type Int64. 506 // - test is a function that takes a list of input Operators and performs 507 // testing with t. 508 func runTestsWithFn( 509 t *testing.T, 510 tups []tuples, 511 typs [][]*types.T, 512 test func(t *testing.T, inputs []colexecbase.Operator), 513 ) { 514 // Run tests over batchSizes of 1, (sometimes) a batch size that is small but 515 // greater than 1, and a full coldata.BatchSize(). 516 batchSizes := make([]int, 0, 3) 517 batchSizes = append(batchSizes, 1) 518 smallButGreaterThanOne := int(math.Trunc(.002 * float64(coldata.BatchSize()))) 519 if smallButGreaterThanOne > 1 { 520 batchSizes = append(batchSizes, smallButGreaterThanOne) 521 } 522 batchSizes = append(batchSizes, coldata.BatchSize()) 523 524 for _, batchSize := range batchSizes { 525 for _, useSel := range []bool{false, true} { 526 t.Run(fmt.Sprintf("batchSize=%d/sel=%t", batchSize, useSel), func(t *testing.T) { 527 inputSources := make([]colexecbase.Operator, len(tups)) 528 var inputTypes []*types.T 529 if useSel { 530 for i, tup := range tups { 531 if typs != nil { 532 inputTypes = typs[i] 533 } 534 rng, _ := randutil.NewPseudoRand() 535 inputSources[i] = newOpTestSelInput(rng, batchSize, tup, inputTypes) 536 } 537 } else { 538 for i, tup := range tups { 539 if typs != nil { 540 inputTypes = typs[i] 541 } 542 inputSources[i] = newOpTestInput(batchSize, tup, inputTypes) 543 } 544 } 545 test(t, inputSources) 546 }) 547 } 548 } 549 } 550 551 // runTestsWithFixedSel is a helper that (with a given fixed selection vector) 552 // automatically runs your tests with varied batch sizes. Provide a test 553 // function that takes a list of input Operators, which will give back the 554 // tuples provided in batches. 555 func runTestsWithFixedSel( 556 t *testing.T, 557 tups []tuples, 558 typs []*types.T, 559 sel []int, 560 test func(t *testing.T, inputs []colexecbase.Operator), 561 ) { 562 for _, batchSize := range []int{1, 2, 3, 16, 1024} { 563 t.Run(fmt.Sprintf("batchSize=%d/fixedSel", batchSize), func(t *testing.T) { 564 inputSources := make([]colexecbase.Operator, len(tups)) 565 for i, tup := range tups { 566 inputSources[i] = newOpFixedSelTestInput(sel, batchSize, tup, typs) 567 } 568 test(t, inputSources) 569 }) 570 } 571 } 572 573 // setColVal is a test helper function to set the given value at the equivalent 574 // col[idx]. This function is slow due to reflection. 575 func setColVal(vec coldata.Vec, idx int, val interface{}) { 576 canonicalTypeFamily := vec.CanonicalTypeFamily() 577 if canonicalTypeFamily == types.BytesFamily { 578 var ( 579 bytesVal []byte 580 ok bool 581 ) 582 bytesVal, ok = val.([]byte) 583 if !ok { 584 bytesVal = []byte(val.(string)) 585 } 586 vec.Bytes().Set(idx, bytesVal) 587 } else if canonicalTypeFamily == types.DecimalFamily { 588 // setColVal is used in multiple places, therefore val can be either a float 589 // or apd.Decimal. 590 if decimalVal, ok := val.(apd.Decimal); ok { 591 vec.Decimal()[idx].Set(&decimalVal) 592 } else { 593 floatVal := val.(float64) 594 decimalVal, _, err := apd.NewFromString(fmt.Sprintf("%f", floatVal)) 595 if err != nil { 596 colexecerror.InternalError( 597 fmt.Sprintf("unable to set decimal %f: %v", floatVal, err)) 598 } 599 // .Set is used here instead of assignment to ensure the pointer address 600 // of the underlying storage for apd.Decimal remains the same. This can 601 // cause the code that does not properly use execgen package to fail. 602 vec.Decimal()[idx].Set(decimalVal) 603 } 604 } else if canonicalTypeFamily == typeconv.DatumVecCanonicalTypeFamily { 605 switch vec.Type().Family() { 606 case types.JsonFamily: 607 if jsonStr, ok := val.(string); ok { 608 jobj, err := json.ParseJSON(jsonStr) 609 if err != nil { 610 colexecerror.InternalError( 611 fmt.Sprintf("unable to parse json object: %v: %v", jobj, err)) 612 } 613 vec.Datum().Set(idx, &tree.DJSON{JSON: jobj}) 614 } else if jobj, ok := val.(json.JSON); ok { 615 vec.Datum().Set(idx, &tree.DJSON{JSON: jobj}) 616 } 617 default: 618 colexecerror.InternalError(fmt.Sprintf("unexpected datum-backed type: %s", vec.Type())) 619 } 620 } else { 621 reflect.ValueOf(vec.Col()).Index(idx).Set(reflect.ValueOf(val).Convert(reflect.TypeOf(vec.Col()).Elem())) 622 } 623 } 624 625 // extrapolateTypesFromTuples determines the type schema based on the input 626 // tuples. 627 func extrapolateTypesFromTuples(tups tuples) []*types.T { 628 typs := make([]*types.T, len(tups[0])) 629 for i := range typs { 630 // Default type for test cases is Int64 in case the entire column is 631 // null and the type is indeterminate. 632 typs[i] = types.Int 633 for _, tup := range tups { 634 if tup[i] != nil { 635 typs[i] = typeconv.UnsafeFromGoType(tup[i]) 636 break 637 } 638 } 639 } 640 return typs 641 } 642 643 // opTestInput is an Operator that columnarizes test input in the form of 644 // tuples of arbitrary Go types. It's meant to be used in Operator unit tests 645 // in conjunction with opTestOutput like the following: 646 // 647 // inputTuples := tuples{ 648 // {1,2,3.3,true}, 649 // {5,6,7.0,false}, 650 // } 651 // tupleSource := newOpTestInput(inputTuples, types.Bool) 652 // opUnderTest := newFooOp(tupleSource, ...) 653 // output := newOpTestOutput(opUnderTest, expectedOutputTuples) 654 // if err := output.Verify(); err != nil { 655 // t.Fatal(err) 656 // } 657 type opTestInput struct { 658 colexecbase.ZeroInputNode 659 660 typs []*types.T 661 662 batchSize int 663 tuples tuples 664 batch coldata.Batch 665 useSel bool 666 rng *rand.Rand 667 selection []int 668 669 // injectAllNulls determines whether opTestInput will replace all values in 670 // the input tuples with nulls. 671 injectAllNulls bool 672 673 // injectRandomNulls determines whether opTestInput will randomly replace 674 // each value in the input tuples with a null. 675 injectRandomNulls bool 676 } 677 678 var _ colexecbase.Operator = &opTestInput{} 679 680 // newOpTestInput returns a new opTestInput with the given input tuples and the 681 // given type schema. If typs is nil, the input tuples are translated into 682 // types automatically, using simple rules (e.g. integers always become Int64). 683 func newOpTestInput(batchSize int, tuples tuples, typs []*types.T) *opTestInput { 684 ret := &opTestInput{ 685 batchSize: batchSize, 686 tuples: tuples, 687 typs: typs, 688 } 689 return ret 690 } 691 692 func newOpTestSelInput(rng *rand.Rand, batchSize int, tuples tuples, typs []*types.T) *opTestInput { 693 ret := &opTestInput{ 694 useSel: true, 695 rng: rng, 696 batchSize: batchSize, 697 tuples: tuples, 698 typs: typs, 699 } 700 return ret 701 } 702 703 func (s *opTestInput) Init() { 704 if s.typs == nil { 705 if len(s.tuples) == 0 { 706 colexecerror.InternalError("empty tuple source with no specified types") 707 } 708 s.typs = extrapolateTypesFromTuples(s.tuples) 709 } 710 s.batch = testAllocator.NewMemBatch(s.typs) 711 712 s.selection = make([]int, coldata.BatchSize()) 713 for i := range s.selection { 714 s.selection[i] = i 715 } 716 } 717 718 func (s *opTestInput) Next(context.Context) coldata.Batch { 719 s.batch.ResetInternalBatch() 720 if len(s.tuples) == 0 { 721 return coldata.ZeroBatch 722 } 723 batchSize := s.batchSize 724 if len(s.tuples) < batchSize { 725 batchSize = len(s.tuples) 726 } 727 tups := s.tuples[:batchSize] 728 s.tuples = s.tuples[batchSize:] 729 730 tupleLen := len(tups[0]) 731 for i := range tups { 732 if len(tups[i]) != tupleLen { 733 colexecerror.InternalError(fmt.Sprintf("mismatched tuple lens: found %+v expected %d vals", 734 tups[i], tupleLen)) 735 } 736 } 737 738 if s.useSel { 739 for i := range s.selection { 740 s.selection[i] = i 741 } 742 // We have populated s.selection vector with possibly more indices than we 743 // have actual tuples for, so some "default" tuples will be introduced but 744 // will not be selected due to the length of the batch being equal to the 745 // number of actual tuples. 746 // 747 // To introduce an element of chaos in the testing process we shuffle the 748 // selection vector; however, in the real environment we expect that 749 // indices in the selection vector to be in ascending order, so we sort 750 // only those indices that correspond to the actual tuples. For example, 751 // say we have 3 actual tuples, and after shuffling the selection vector 752 // is [200, 50, 100, ...], so we sort only those 3 values to get to 753 // [50, 100, 200, ...] in order to "scan" the selection vector in 754 // sequential order. 755 s.rng.Shuffle(len(s.selection), func(i, j int) { 756 s.selection[i], s.selection[j] = s.selection[j], s.selection[i] 757 }) 758 sort.Slice(s.selection[:batchSize], func(i, j int) bool { 759 return s.selection[i] < s.selection[j] 760 }) 761 // Any unused elements in the selection vector are set to a value larger 762 // than the max batch size, so the test will panic if this part of the slice 763 // is accidentally accessed. 764 for i := range s.selection[batchSize:] { 765 s.selection[batchSize+i] = coldata.BatchSize() + 1 766 } 767 768 s.batch.SetSelection(true) 769 copy(s.batch.Selection(), s.selection) 770 } 771 772 // Reset nulls for all columns in this batch. 773 for _, colVec := range s.batch.ColVecs() { 774 if colVec.CanonicalTypeFamily() != types.UnknownFamily { 775 colVec.Nulls().UnsetNulls() 776 } 777 } 778 779 rng := rand.New(rand.NewSource(123)) 780 781 for i := range s.typs { 782 vec := s.batch.ColVec(i) 783 // Automatically convert the Go values into exec.Type slice elements using 784 // reflection. This is slow, but acceptable for tests. 785 col := reflect.ValueOf(vec.Col()) 786 for j := 0; j < batchSize; j++ { 787 // If useSel is false, then the selection vector will contain 788 // [0, ..., batchSize] in ascending order. 789 outputIdx := s.selection[j] 790 injectRandomNull := s.injectRandomNulls && rng.Float64() < 0.5 791 if tups[j][i] == nil || s.injectAllNulls || injectRandomNull { 792 vec.Nulls().SetNull(outputIdx) 793 if rng.Float64() < 0.5 { 794 // With 50% probability we set garbage data in the value to make sure 795 // that it doesn't affect the computation when the value is actually 796 // NULL. For the other 50% of cases we leave the data unset which 797 // exercises other scenarios (like division by zero when the value is 798 // actually NULL). 799 canonicalTypeFamily := vec.CanonicalTypeFamily() 800 if canonicalTypeFamily == types.DecimalFamily { 801 d := apd.Decimal{} 802 _, err := d.SetFloat64(rng.Float64()) 803 if err != nil { 804 colexecerror.InternalError(fmt.Sprintf("%v", err)) 805 } 806 col.Index(outputIdx).Set(reflect.ValueOf(d)) 807 } else if canonicalTypeFamily == types.BytesFamily { 808 newBytes := make([]byte, rng.Intn(16)+1) 809 rng.Read(newBytes) 810 setColVal(vec, outputIdx, newBytes) 811 } else if canonicalTypeFamily == typeconv.DatumVecCanonicalTypeFamily { 812 switch vec.Type().Family() { 813 case types.JsonFamily: 814 newBytes := make([]byte, rng.Intn(16)+1) 815 rng.Read(newBytes) 816 j := json.FromString(string(newBytes)) 817 setColVal(vec, outputIdx, j) 818 default: 819 colexecerror.InternalError(fmt.Sprintf("unexpected datum-backed type: %s", vec.Type())) 820 } 821 } else if val, ok := quick.Value(reflect.TypeOf(vec.Col()).Elem(), rng); ok { 822 setColVal(vec, outputIdx, val.Interface()) 823 } else { 824 colexecerror.InternalError(fmt.Sprintf("could not generate a random value of type %s", vec.Type())) 825 } 826 } 827 } else { 828 setColVal(vec, outputIdx, tups[j][i]) 829 } 830 } 831 } 832 833 s.batch.SetLength(batchSize) 834 return s.batch 835 } 836 837 type opFixedSelTestInput struct { 838 colexecbase.ZeroInputNode 839 840 typs []*types.T 841 842 batchSize int 843 tuples tuples 844 batch coldata.Batch 845 sel []int 846 // idx is the index of the tuple to be emitted next. We need to maintain it 847 // in case the provided selection vector or provided tuples (if sel is nil) 848 // is longer than requested batch size. 849 idx int 850 } 851 852 var _ colexecbase.Operator = &opFixedSelTestInput{} 853 854 // newOpFixedSelTestInput returns a new opFixedSelTestInput with the given 855 // input tuples and selection vector. The input tuples are translated into 856 // types automatically, using simple rules (e.g. integers always become Int64). 857 func newOpFixedSelTestInput( 858 sel []int, batchSize int, tuples tuples, typs []*types.T, 859 ) *opFixedSelTestInput { 860 ret := &opFixedSelTestInput{ 861 batchSize: batchSize, 862 sel: sel, 863 tuples: tuples, 864 typs: typs, 865 } 866 return ret 867 } 868 869 func (s *opFixedSelTestInput) Init() { 870 if s.typs == nil { 871 if len(s.tuples) == 0 { 872 colexecerror.InternalError("empty tuple source with no specified types") 873 } 874 s.typs = extrapolateTypesFromTuples(s.tuples) 875 } 876 877 s.batch = testAllocator.NewMemBatch(s.typs) 878 tupleLen := len(s.tuples[0]) 879 for _, i := range s.sel { 880 if len(s.tuples[i]) != tupleLen { 881 colexecerror.InternalError(fmt.Sprintf("mismatched tuple lens: found %+v expected %d vals", 882 s.tuples[i], tupleLen)) 883 } 884 } 885 886 // Reset nulls for all columns in this batch. 887 for i := 0; i < s.batch.Width(); i++ { 888 s.batch.ColVec(i).Nulls().UnsetNulls() 889 } 890 891 if s.sel != nil { 892 s.batch.SetSelection(true) 893 // When non-nil selection vector is given, we convert all tuples into the 894 // Go values at once, and we'll be copying an appropriate chunk of the 895 // selection vector later in Next(). 896 for i := range s.typs { 897 vec := s.batch.ColVec(i) 898 // Automatically convert the Go values into exec.Type slice elements using 899 // reflection. This is slow, but acceptable for tests. 900 for j := 0; j < len(s.tuples); j++ { 901 if s.tuples[j][i] == nil { 902 vec.Nulls().SetNull(j) 903 } else { 904 setColVal(vec, j, s.tuples[j][i]) 905 } 906 } 907 } 908 } 909 910 } 911 912 func (s *opFixedSelTestInput) Next(context.Context) coldata.Batch { 913 var batchSize int 914 if s.sel == nil { 915 batchSize = s.batchSize 916 if len(s.tuples)-s.idx < batchSize { 917 batchSize = len(s.tuples) - s.idx 918 } 919 // When nil selection vector is given, we convert only the tuples that fit 920 // into the current batch (keeping the s.idx in mind). 921 for i := range s.typs { 922 vec := s.batch.ColVec(i) 923 vec.Nulls().UnsetNulls() 924 for j := 0; j < batchSize; j++ { 925 if s.tuples[s.idx+j][i] == nil { 926 vec.Nulls().SetNull(j) 927 } else { 928 // Automatically convert the Go values into exec.Type slice elements using 929 // reflection. This is slow, but acceptable for tests. 930 setColVal(vec, j, s.tuples[s.idx+j][i]) 931 } 932 } 933 } 934 } else { 935 if s.idx == len(s.sel) { 936 return coldata.ZeroBatch 937 } 938 batchSize = s.batchSize 939 if len(s.sel)-s.idx < batchSize { 940 batchSize = len(s.sel) - s.idx 941 } 942 // All tuples have already been converted to the Go values, so we only need 943 // to set the right selection vector for s.batch. 944 copy(s.batch.Selection(), s.sel[s.idx:s.idx+batchSize]) 945 } 946 s.batch.SetLength(batchSize) 947 s.idx += batchSize 948 return s.batch 949 } 950 951 // opTestOutput is a test verification struct that ensures its input batches 952 // match some expected output tuples. 953 type opTestOutput struct { 954 OneInputNode 955 expected tuples 956 957 curIdx int 958 batch coldata.Batch 959 } 960 961 // newOpTestOutput returns a new opTestOutput, initialized with the given input 962 // to verify that the output is exactly equal to the expected tuples. 963 func newOpTestOutput(input colexecbase.Operator, expected tuples) *opTestOutput { 964 input.Init() 965 966 return &opTestOutput{ 967 OneInputNode: NewOneInputNode(input), 968 expected: expected, 969 } 970 } 971 972 // getTupleFromBatch is a helper function that extracts a tuple at index 973 // tupleIdx from batch. 974 func getTupleFromBatch(batch coldata.Batch, tupleIdx int) tuple { 975 ret := make(tuple, batch.Width()) 976 out := reflect.ValueOf(ret) 977 if sel := batch.Selection(); sel != nil { 978 tupleIdx = sel[tupleIdx] 979 } 980 for colIdx := range ret { 981 vec := batch.ColVec(colIdx) 982 if vec.Nulls().NullAt(tupleIdx) { 983 ret[colIdx] = nil 984 } else { 985 var val reflect.Value 986 if colBytes, ok := vec.Col().(*coldata.Bytes); ok { 987 val = reflect.ValueOf(append([]byte(nil), colBytes.Get(tupleIdx)...)) 988 } else if vec.CanonicalTypeFamily() == types.DecimalFamily { 989 colDec := vec.Decimal() 990 var newDec apd.Decimal 991 newDec.Set(&colDec[tupleIdx]) 992 val = reflect.ValueOf(newDec) 993 } else if vec.CanonicalTypeFamily() == typeconv.DatumVecCanonicalTypeFamily { 994 switch vec.Type().Family() { 995 case types.JsonFamily: 996 d := vec.Datum().Get(tupleIdx).(*coldataext.Datum).Datum 997 if d == tree.DNull { 998 val = reflect.ValueOf(tree.DNull) 999 } else { 1000 val = reflect.ValueOf(d.(*tree.DJSON).JSON) 1001 } 1002 default: 1003 colexecerror.InternalError(fmt.Sprintf("unexpected datum-backed type: %s", vec.Type())) 1004 } 1005 } else { 1006 val = reflect.ValueOf(vec.Col()).Index(tupleIdx) 1007 } 1008 out.Index(colIdx).Set(val) 1009 } 1010 } 1011 return ret 1012 } 1013 1014 func (r *opTestOutput) next(ctx context.Context) tuple { 1015 if r.batch == nil || r.curIdx >= r.batch.Length() { 1016 // Get a fresh batch. 1017 r.batch = r.input.Next(ctx) 1018 if r.batch.Length() == 0 { 1019 return nil 1020 } 1021 r.curIdx = 0 1022 } 1023 ret := getTupleFromBatch(r.batch, r.curIdx) 1024 r.curIdx++ 1025 return ret 1026 } 1027 1028 // Verify ensures that the input to this opTestOutput produced the same results 1029 // and in the same order as the ones expected in the opTestOutput's expected 1030 // tuples, using a slow, reflection-based comparison method, returning an error 1031 // if the input isn't equal to the expected. 1032 func (r *opTestOutput) Verify() error { 1033 ctx := context.Background() 1034 var actual tuples 1035 for { 1036 tup := r.next(ctx) 1037 if tup == nil { 1038 break 1039 } 1040 actual = append(actual, tup) 1041 } 1042 return assertTuplesOrderedEqual(r.expected, actual) 1043 } 1044 1045 // VerifyAnyOrder ensures that the input to this opTestOutput produced the same 1046 // results but in any order (meaning set comparison behavior is used) as the 1047 // ones expected in the opTestOutput's expected tuples, using a slow, 1048 // reflection-based comparison method, returning an error if the input isn't 1049 // equal to the expected. 1050 func (r *opTestOutput) VerifyAnyOrder() error { 1051 ctx := context.Background() 1052 var actual tuples 1053 for { 1054 tup := r.next(ctx) 1055 if tup == nil { 1056 break 1057 } 1058 actual = append(actual, tup) 1059 } 1060 return assertTuplesSetsEqual(r.expected, actual) 1061 } 1062 1063 // tupleEquals checks that two tuples are equal, using a slow, 1064 // reflection-based method to do the comparison. Reflection is used so that 1065 // values can be compared in a type-agnostic way. 1066 func tupleEquals(expected tuple, actual tuple) bool { 1067 if len(expected) != len(actual) { 1068 return false 1069 } 1070 for i := 0; i < len(actual); i++ { 1071 if expected[i] == nil || actual[i] == nil { 1072 if expected[i] != nil || actual[i] != nil { 1073 return false 1074 } 1075 } else { 1076 // Special case for NaN, since it does not equal itself. 1077 if f1, ok := expected[i].(float64); ok { 1078 if f2, ok := actual[i].(float64); ok { 1079 if math.IsNaN(f1) && math.IsNaN(f2) { 1080 continue 1081 } else if !math.IsNaN(f1) && !math.IsNaN(f2) && math.Abs(f1-f2) < 1e-6 { 1082 continue 1083 } 1084 } 1085 } 1086 if d1, ok := actual[i].(apd.Decimal); ok { 1087 if f2, ok := expected[i].(float64); ok { 1088 d2, _, err := apd.NewFromString(fmt.Sprintf("%f", f2)) 1089 if err == nil && d1.Cmp(d2) == 0 { 1090 continue 1091 } else { 1092 return false 1093 } 1094 } 1095 } 1096 if j1, ok := actual[i].(json.JSON); ok { 1097 if j2, ok := expected[i].(json.JSON); ok { 1098 if cmp, err := j1.Compare(j2); err == nil && cmp == 0 { 1099 continue 1100 } 1101 } else if str2, ok := expected[i].(string); ok { 1102 j2, err := json.ParseJSON(str2) 1103 if err != nil { 1104 return false 1105 } 1106 if cmp, err := j1.Compare(j2); err == nil && cmp == 0 { 1107 continue 1108 } 1109 } 1110 return false 1111 } 1112 if !reflect.DeepEqual( 1113 reflect.ValueOf(actual[i]).Convert(reflect.TypeOf(expected[i])).Interface(), 1114 expected[i], 1115 ) || !reflect.DeepEqual( 1116 reflect.ValueOf(expected[i]).Convert(reflect.TypeOf(actual[i])).Interface(), 1117 actual[i], 1118 ) { 1119 return false 1120 } 1121 } 1122 } 1123 return true 1124 } 1125 1126 func makeError(expected tuples, actual tuples) error { 1127 var expStr, actStr strings.Builder 1128 for i := range expected { 1129 expStr.WriteString(fmt.Sprintf("%d: %s\n", i, expected[i].String())) 1130 } 1131 for i := range actual { 1132 actStr.WriteString(fmt.Sprintf("%d: %s\n", i, actual[i].String())) 1133 } 1134 1135 diff := difflib.UnifiedDiff{ 1136 A: difflib.SplitLines(expStr.String()), 1137 B: difflib.SplitLines(actStr.String()), 1138 Context: 100, 1139 } 1140 text, err := difflib.GetUnifiedDiffString(diff) 1141 if err != nil { 1142 return errors.Errorf("expected didn't match actual, failed to make diff %s", err) 1143 } 1144 return errors.Errorf("expected didn't match actual. diff:\n%s", text) 1145 } 1146 1147 // assertTuplesSetsEqual asserts that two sets of tuples are equal. 1148 func assertTuplesSetsEqual(expected tuples, actual tuples) error { 1149 if len(expected) != len(actual) { 1150 return makeError(expected, actual) 1151 } 1152 actual = actual.sort() 1153 expected = expected.sort() 1154 return assertTuplesOrderedEqual(expected, actual) 1155 } 1156 1157 // assertTuplesOrderedEqual asserts that two permutations of tuples are equal 1158 // in order. 1159 func assertTuplesOrderedEqual(expected tuples, actual tuples) error { 1160 if len(expected) != len(actual) { 1161 return errors.Errorf("expected %+v, actual %+v", expected, actual) 1162 } 1163 for i := range expected { 1164 if !tupleEquals(expected[i], actual[i]) { 1165 return makeError(expected, actual) 1166 } 1167 } 1168 return nil 1169 } 1170 1171 // finiteBatchSource is an Operator that returns the same batch a specified 1172 // number of times. 1173 type finiteBatchSource struct { 1174 colexecbase.ZeroInputNode 1175 1176 repeatableBatch *colexecbase.RepeatableBatchSource 1177 1178 usableCount int 1179 } 1180 1181 var _ colexecbase.Operator = &finiteBatchSource{} 1182 1183 // newFiniteBatchSource returns a new Operator initialized to return its input 1184 // batch a specified number of times. 1185 func newFiniteBatchSource( 1186 batch coldata.Batch, typs []*types.T, usableCount int, 1187 ) *finiteBatchSource { 1188 return &finiteBatchSource{ 1189 repeatableBatch: colexecbase.NewRepeatableBatchSource(testAllocator, batch, typs), 1190 usableCount: usableCount, 1191 } 1192 } 1193 1194 func (f *finiteBatchSource) Init() { 1195 f.repeatableBatch.Init() 1196 } 1197 1198 func (f *finiteBatchSource) Next(ctx context.Context) coldata.Batch { 1199 if f.usableCount > 0 { 1200 f.usableCount-- 1201 return f.repeatableBatch.Next(ctx) 1202 } 1203 return coldata.ZeroBatch 1204 } 1205 1206 func (f *finiteBatchSource) reset(usableCount int) { 1207 f.usableCount = usableCount 1208 } 1209 1210 // finiteChunksSource is an Operator that returns a batch specified number of 1211 // times. The first matchLen columns of the batch are incremented every time 1212 // (except for the first) the batch is returned to emulate source that is 1213 // already ordered on matchLen columns. 1214 type finiteChunksSource struct { 1215 colexecbase.ZeroInputNode 1216 repeatableBatch *colexecbase.RepeatableBatchSource 1217 1218 usableCount int 1219 matchLen int 1220 adjustment []int64 1221 } 1222 1223 var _ colexecbase.Operator = &finiteChunksSource{} 1224 1225 func newFiniteChunksSource( 1226 batch coldata.Batch, typs []*types.T, usableCount int, matchLen int, 1227 ) *finiteChunksSource { 1228 return &finiteChunksSource{ 1229 repeatableBatch: colexecbase.NewRepeatableBatchSource(testAllocator, batch, typs), 1230 usableCount: usableCount, 1231 matchLen: matchLen, 1232 } 1233 } 1234 1235 func (f *finiteChunksSource) Init() { 1236 f.repeatableBatch.Init() 1237 f.adjustment = make([]int64, f.matchLen) 1238 } 1239 1240 func (f *finiteChunksSource) Next(ctx context.Context) coldata.Batch { 1241 if f.usableCount > 0 { 1242 f.usableCount-- 1243 batch := f.repeatableBatch.Next(ctx) 1244 if f.matchLen > 0 && f.adjustment[0] == 0 { 1245 // We need to calculate the difference between the first and the last 1246 // tuples in batch in first matchLen columns so that in the following 1247 // calls to Next() the batch is adjusted such that tuples in consecutive 1248 // batches are ordered on the first matchLen columns. 1249 for col := 0; col < f.matchLen; col++ { 1250 firstValue := batch.ColVec(col).Int64()[0] 1251 lastValue := batch.ColVec(col).Int64()[batch.Length()-1] 1252 f.adjustment[col] = lastValue - firstValue + 1 1253 } 1254 } else { 1255 for i := 0; i < f.matchLen; i++ { 1256 int64Vec := batch.ColVec(i).Int64() 1257 for j := range int64Vec { 1258 int64Vec[j] += f.adjustment[i] 1259 } 1260 // We need to update the adjustments because RepeatableBatchSource 1261 // returns the original batch that it was instantiated with, and we 1262 // want to have constantly non-decreasing vectors. 1263 firstValue := batch.ColVec(i).Int64()[0] 1264 lastValue := batch.ColVec(i).Int64()[batch.Length()-1] 1265 f.adjustment[i] += lastValue - firstValue + 1 1266 } 1267 } 1268 return batch 1269 } 1270 return coldata.ZeroBatch 1271 } 1272 1273 func TestOpTestInputOutput(t *testing.T) { 1274 defer leaktest.AfterTest(t)() 1275 inputs := []tuples{ 1276 { 1277 {1, 2, 100}, 1278 {1, 3, -3}, 1279 {0, 4, 5}, 1280 {1, 5, 0}, 1281 }, 1282 } 1283 runTestsWithFn(t, inputs, nil /* typs */, func(t *testing.T, sources []colexecbase.Operator) { 1284 out := newOpTestOutput(sources[0], inputs[0]) 1285 1286 if err := out.Verify(); err != nil { 1287 t.Fatal(err) 1288 } 1289 }) 1290 } 1291 1292 func TestRepeatableBatchSource(t *testing.T) { 1293 defer leaktest.AfterTest(t)() 1294 typs := []*types.T{types.Int} 1295 batch := testAllocator.NewMemBatch(typs) 1296 batchLen := 10 1297 if coldata.BatchSize() < batchLen { 1298 batchLen = coldata.BatchSize() 1299 } 1300 batch.SetLength(batchLen) 1301 input := colexecbase.NewRepeatableBatchSource(testAllocator, batch, typs) 1302 1303 b := input.Next(context.Background()) 1304 b.SetLength(0) 1305 b.SetSelection(true) 1306 1307 b = input.Next(context.Background()) 1308 if b.Length() != batchLen { 1309 t.Fatalf("expected RepeatableBatchSource to reset batch length to %d, found %d", batchLen, b.Length()) 1310 } 1311 if b.Selection() != nil { 1312 t.Fatalf("expected RepeatableBatchSource to reset selection vector, found %+v", b.Selection()) 1313 } 1314 } 1315 1316 func TestRepeatableBatchSourceWithFixedSel(t *testing.T) { 1317 defer leaktest.AfterTest(t)() 1318 typs := []*types.T{types.Int} 1319 batch := testAllocator.NewMemBatch(typs) 1320 rng, _ := randutil.NewPseudoRand() 1321 batchSize := 10 1322 if batchSize > coldata.BatchSize() { 1323 batchSize = coldata.BatchSize() 1324 } 1325 sel := coldatatestutils.RandomSel(rng, batchSize, 0 /* probOfOmitting */) 1326 batchLen := len(sel) 1327 batch.SetLength(batchLen) 1328 batch.SetSelection(true) 1329 copy(batch.Selection(), sel) 1330 input := colexecbase.NewRepeatableBatchSource(testAllocator, batch, typs) 1331 b := input.Next(context.Background()) 1332 1333 b.SetLength(0) 1334 b.SetSelection(false) 1335 b = input.Next(context.Background()) 1336 if b.Length() != batchLen { 1337 t.Fatalf("expected RepeatableBatchSource to reset batch length to %d, found %d", batchLen, b.Length()) 1338 } 1339 if b.Selection() == nil { 1340 t.Fatalf("expected RepeatableBatchSource to reset selection vector, expected %v but found %+v", sel, b.Selection()) 1341 } else { 1342 for i := 0; i < batchLen; i++ { 1343 if b.Selection()[i] != sel[i] { 1344 t.Fatalf("expected RepeatableBatchSource to reset selection vector, expected %v but found %+v", sel, b.Selection()) 1345 } 1346 } 1347 } 1348 1349 newSel := coldatatestutils.RandomSel(rng, 10 /* batchSize */, 0.2 /* probOfOmitting */) 1350 newBatchLen := len(sel) 1351 b.SetLength(newBatchLen) 1352 b.SetSelection(true) 1353 copy(b.Selection(), newSel) 1354 b = input.Next(context.Background()) 1355 if b.Length() != batchLen { 1356 t.Fatalf("expected RepeatableBatchSource to reset batch length to %d, found %d", batchLen, b.Length()) 1357 } 1358 if b.Selection() == nil { 1359 t.Fatalf("expected RepeatableBatchSource to reset selection vector, expected %v but found %+v", sel, b.Selection()) 1360 } else { 1361 for i := 0; i < batchLen; i++ { 1362 if b.Selection()[i] != sel[i] { 1363 t.Fatalf("expected RepeatableBatchSource to reset selection vector, expected %v but found %+v", sel, b.Selection()) 1364 } 1365 } 1366 } 1367 } 1368 1369 // chunkingBatchSource is a batch source that takes unlimited-size columns and 1370 // chunks them into BatchSize()-sized chunks when Nexted. 1371 type chunkingBatchSource struct { 1372 colexecbase.ZeroInputNode 1373 typs []*types.T 1374 cols []coldata.Vec 1375 len int 1376 1377 curIdx int 1378 batch coldata.Batch 1379 } 1380 1381 var _ colexecbase.Operator = &chunkingBatchSource{} 1382 1383 // newChunkingBatchSource returns a new chunkingBatchSource with the given 1384 // column types, columns, and length. 1385 func newChunkingBatchSource(typs []*types.T, cols []coldata.Vec, len int) *chunkingBatchSource { 1386 return &chunkingBatchSource{ 1387 typs: typs, 1388 cols: cols, 1389 len: len, 1390 } 1391 } 1392 1393 func (c *chunkingBatchSource) Init() { 1394 c.batch = testAllocator.NewMemBatch(c.typs) 1395 for i := range c.cols { 1396 c.batch.ColVec(i).SetCol(c.cols[i].Col()) 1397 c.batch.ColVec(i).SetNulls(c.cols[i].Nulls()) 1398 } 1399 } 1400 1401 func (c *chunkingBatchSource) Next(context.Context) coldata.Batch { 1402 if c.curIdx >= c.len { 1403 return coldata.ZeroBatch 1404 } 1405 // Explicitly set to false since this could be modified by the downstream 1406 // operators. This is sufficient because both the vectors and the nulls are 1407 // explicitly set below. ResetInternalBatch cannot be used here because we're 1408 // operating on Windows into the vectors. 1409 c.batch.SetSelection(false) 1410 lastIdx := c.curIdx + coldata.BatchSize() 1411 if lastIdx > c.len { 1412 lastIdx = c.len 1413 } 1414 for i, vec := range c.batch.ColVecs() { 1415 vec.SetCol(c.cols[i].Window(c.curIdx, lastIdx).Col()) 1416 nullsSlice := c.cols[i].Nulls().Slice(c.curIdx, lastIdx) 1417 vec.SetNulls(&nullsSlice) 1418 } 1419 c.batch.SetLength(lastIdx - c.curIdx) 1420 c.curIdx = lastIdx 1421 return c.batch 1422 } 1423 1424 func (c *chunkingBatchSource) reset() { 1425 c.curIdx = 0 1426 } 1427 1428 // joinTestCase is a helper struct shared by the hash and merge join unit 1429 // tests. Not all fields have to be filled in, but init() method *must* be 1430 // called. 1431 type joinTestCase struct { 1432 description string 1433 joinType sqlbase.JoinType 1434 leftTuples tuples 1435 leftTypes []*types.T 1436 leftOutCols []uint32 1437 leftEqCols []uint32 1438 leftDirections []execinfrapb.Ordering_Column_Direction 1439 rightTuples tuples 1440 rightTypes []*types.T 1441 rightOutCols []uint32 1442 rightEqCols []uint32 1443 rightDirections []execinfrapb.Ordering_Column_Direction 1444 leftEqColsAreKey bool 1445 rightEqColsAreKey bool 1446 expected tuples 1447 outputBatchSize int 1448 skipAllNullsInjection bool 1449 onExpr execinfrapb.Expression 1450 } 1451 1452 func (tc *joinTestCase) init() { 1453 if tc.outputBatchSize == 0 { 1454 tc.outputBatchSize = coldata.BatchSize() 1455 } 1456 1457 if len(tc.leftDirections) == 0 { 1458 tc.leftDirections = make([]execinfrapb.Ordering_Column_Direction, len(tc.leftTypes)) 1459 for i := range tc.leftDirections { 1460 tc.leftDirections[i] = execinfrapb.Ordering_Column_ASC 1461 } 1462 } 1463 1464 if len(tc.rightDirections) == 0 { 1465 tc.rightDirections = make([]execinfrapb.Ordering_Column_Direction, len(tc.rightTypes)) 1466 for i := range tc.rightDirections { 1467 tc.rightDirections[i] = execinfrapb.Ordering_Column_ASC 1468 } 1469 } 1470 } 1471 1472 // mutateTypes returns a slice of joinTestCases with varied types. Assumes 1473 // the input is made up of just int64s. Calling this 1474 func (tc *joinTestCase) mutateTypes() []*joinTestCase { 1475 ret := []*joinTestCase{tc} 1476 1477 for _, typ := range []*types.T{types.Decimal, types.Bytes} { 1478 if typ.Identical(types.Bytes) { 1479 // Skip test cases with ON conditions for now, since those expect 1480 // numeric inputs. 1481 if !tc.onExpr.Empty() { 1482 continue 1483 } 1484 } 1485 newTc := *tc 1486 newTc.leftTypes = make([]*types.T, len(tc.leftTypes)) 1487 newTc.rightTypes = make([]*types.T, len(tc.rightTypes)) 1488 copy(newTc.leftTypes, tc.leftTypes) 1489 copy(newTc.rightTypes, tc.rightTypes) 1490 for _, typs := range [][]*types.T{newTc.leftTypes, newTc.rightTypes} { 1491 for i := range typs { 1492 if !typ.Identical(types.Int) { 1493 // We currently can only mutate test cases that are made up of int64 1494 // only. 1495 return ret 1496 } 1497 typs[i] = typ 1498 } 1499 } 1500 newTc.leftTuples = tc.leftTuples.clone() 1501 newTc.rightTuples = tc.rightTuples.clone() 1502 newTc.expected = tc.expected.clone() 1503 1504 for _, tups := range []tuples{newTc.leftTuples, newTc.rightTuples, newTc.expected} { 1505 for i := range tups { 1506 for j := range tups[i] { 1507 if tups[i][j] == nil { 1508 continue 1509 } 1510 switch typeconv.TypeFamilyToCanonicalTypeFamily(typ.Family()) { 1511 case types.DecimalFamily: 1512 var d apd.Decimal 1513 _, _ = d.SetFloat64(float64(tups[i][j].(int))) 1514 tups[i][j] = d 1515 case types.BytesFamily: 1516 tups[i][j] = fmt.Sprintf("%.10d", tups[i][j].(int)) 1517 } 1518 } 1519 } 1520 } 1521 ret = append(ret, &newTc) 1522 } 1523 return ret 1524 } 1525 1526 type sortTestCase struct { 1527 description string 1528 tuples tuples 1529 expected tuples 1530 typs []*types.T 1531 ordCols []execinfrapb.Ordering_Column 1532 matchLen int 1533 k int 1534 } 1535 1536 // Mock typing context for the typechecker. 1537 type mockTypeContext struct { 1538 typs []*types.T 1539 } 1540 1541 func (p *mockTypeContext) IndexedVarEval(idx int, ctx *tree.EvalContext) (tree.Datum, error) { 1542 return tree.DNull.Eval(ctx) 1543 } 1544 1545 func (p *mockTypeContext) IndexedVarResolvedType(idx int) *types.T { 1546 return p.typs[idx] 1547 } 1548 1549 func (p *mockTypeContext) IndexedVarNodeFormatter(idx int) tree.NodeFormatter { 1550 n := tree.Name(fmt.Sprintf("$%d", idx)) 1551 return &n 1552 } 1553 1554 // createTestProjectingOperator creates a projecting operator that performs 1555 // projectingExpr on input that has inputTypes as its output columns. It does 1556 // so by making a noop processor core with post-processing step that passes 1557 // through all input columns and renders an additional column using 1558 // projectingExpr to create the render; then, the processor core is used to 1559 // plan all necessary infrastructure using NewColOperator call. 1560 // - canFallbackToRowexec determines whether NewColOperator will be able to use 1561 // rowexec.NewProcessor to instantiate a wrapped rowexec processor. This should 1562 // be false unless we expect that for some unit tests we will not be able to 1563 // plan the "pure" vectorized operators. 1564 func createTestProjectingOperator( 1565 ctx context.Context, 1566 flowCtx *execinfra.FlowCtx, 1567 input colexecbase.Operator, 1568 inputTypes []*types.T, 1569 projectingExpr string, 1570 canFallbackToRowexec bool, 1571 ) (colexecbase.Operator, error) { 1572 expr, err := parser.ParseExpr(projectingExpr) 1573 if err != nil { 1574 return nil, err 1575 } 1576 p := &mockTypeContext{typs: inputTypes} 1577 semaCtx := tree.MakeSemaContext() 1578 semaCtx.IVarContainer = p 1579 typedExpr, err := tree.TypeCheck(ctx, expr, &semaCtx, types.Any) 1580 if err != nil { 1581 return nil, err 1582 } 1583 renderExprs := make([]execinfrapb.Expression, len(inputTypes)+1) 1584 for i := range inputTypes { 1585 renderExprs[i].Expr = fmt.Sprintf("@%d", i+1) 1586 } 1587 renderExprs[len(inputTypes)].LocalExpr = typedExpr 1588 spec := &execinfrapb.ProcessorSpec{ 1589 Input: []execinfrapb.InputSyncSpec{{ColumnTypes: inputTypes}}, 1590 Core: execinfrapb.ProcessorCoreUnion{ 1591 Noop: &execinfrapb.NoopCoreSpec{}, 1592 }, 1593 Post: execinfrapb.PostProcessSpec{ 1594 RenderExprs: renderExprs, 1595 }, 1596 } 1597 args := NewColOperatorArgs{ 1598 Spec: spec, 1599 Inputs: []colexecbase.Operator{input}, 1600 StreamingMemAccount: testMemAcc, 1601 } 1602 if canFallbackToRowexec { 1603 args.ProcessorConstructor = rowexec.NewProcessor 1604 } else { 1605 // It is possible that there is a valid projecting operator with the 1606 // given input types, but the vectorized engine doesn't support it. In 1607 // such case in the production code we fall back to row-by-row engine, 1608 // but the caller of this method doesn't want such behavior. In order 1609 // to avoid a nil-pointer exception we mock out the processor 1610 // constructor. 1611 args.ProcessorConstructor = func( 1612 context.Context, *execinfra.FlowCtx, int32, 1613 *execinfrapb.ProcessorCoreUnion, *execinfrapb.PostProcessSpec, 1614 []execinfra.RowSource, []execinfra.RowReceiver, 1615 []execinfra.LocalProcessor) (execinfra.Processor, error) { 1616 return nil, errors.Errorf("fallback to rowexec is disabled") 1617 } 1618 } 1619 args.TestingKnobs.UseStreamingMemAccountForBuffering = true 1620 result, err := NewColOperator(ctx, flowCtx, args) 1621 if err != nil { 1622 return nil, err 1623 } 1624 return result.Op, nil 1625 }