github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/distsql/columnar_operators_test.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package distsql 12 13 import ( 14 "context" 15 "fmt" 16 "math/rand" 17 "sort" 18 "strings" 19 "testing" 20 21 "github.com/cockroachdb/cockroach/pkg/col/coldata" 22 "github.com/cockroachdb/cockroach/pkg/col/typeconv" 23 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 24 "github.com/cockroachdb/cockroach/pkg/sql/colexec" 25 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 26 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 27 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 28 "github.com/cockroachdb/cockroach/pkg/sql/types" 29 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 30 "github.com/cockroachdb/cockroach/pkg/util/randutil" 31 "github.com/stretchr/testify/require" 32 ) 33 34 const nullProbability = 0.2 35 const randTypesProbability = 0.5 36 37 func TestAggregatorAgainstProcessor(t *testing.T) { 38 defer leaktest.AfterTest(t)() 39 st := cluster.MakeTestingClusterSettings() 40 evalCtx := tree.MakeTestingEvalContext(st) 41 defer evalCtx.Stop(context.Background()) 42 43 rng, seed := randutil.NewPseudoRand() 44 nRuns := 20 45 nRows := 100 46 const ( 47 maxNumGroupingCols = 3 48 nextGroupProb = 0.2 49 ) 50 groupingCols := make([]uint32, maxNumGroupingCols) 51 orderingCols := make([]execinfrapb.Ordering_Column, maxNumGroupingCols) 52 for i := uint32(0); i < maxNumGroupingCols; i++ { 53 groupingCols[i] = i 54 orderingCols[i].ColIdx = i 55 } 56 var da sqlbase.DatumAlloc 57 58 deterministicAggFns := make([]execinfrapb.AggregatorSpec_Func, 0, len(colexec.SupportedAggFns)-1) 59 for _, aggFn := range colexec.SupportedAggFns { 60 if aggFn == execinfrapb.AggregatorSpec_ANY_NOT_NULL { 61 // We skip ANY_NOT_NULL aggregate function because it returns 62 // non-deterministic results. 63 continue 64 } 65 deterministicAggFns = append(deterministicAggFns, aggFn) 66 } 67 aggregations := make([]execinfrapb.AggregatorSpec_Aggregation, len(deterministicAggFns)) 68 for _, hashAgg := range []bool{false, true} { 69 for numGroupingCols := 1; numGroupingCols <= maxNumGroupingCols; numGroupingCols++ { 70 for i, aggFn := range deterministicAggFns { 71 aggregations[i].Func = aggFn 72 aggregations[i].ColIdx = []uint32{uint32(i + numGroupingCols)} 73 } 74 inputTypes := make([]*types.T, len(aggregations)+numGroupingCols) 75 for i := 0; i < numGroupingCols; i++ { 76 inputTypes[i] = types.Int 77 } 78 outputTypes := make([]*types.T, len(aggregations)) 79 80 for run := 0; run < nRuns; run++ { 81 var rows sqlbase.EncDatumRows 82 // We will be grouping based on the first numGroupingCols columns (which 83 // we already set to be of INT types) with the values for the column set 84 // manually below. 85 for i := range aggregations { 86 aggFn := aggregations[i].Func 87 var aggTyp *types.T 88 for { 89 aggTyp = sqlbase.RandType(rng) 90 aggInputTypes := []*types.T{aggTyp} 91 if aggFn == execinfrapb.AggregatorSpec_COUNT_ROWS { 92 // Count rows takes no arguments. 93 aggregations[i].ColIdx = []uint32{} 94 aggInputTypes = aggInputTypes[:0] 95 } 96 if _, outputType, err := execinfrapb.GetAggregateInfo(aggFn, aggInputTypes...); err == nil { 97 outputTypes[i] = outputType 98 break 99 } 100 } 101 inputTypes[i+numGroupingCols] = aggTyp 102 } 103 rows = sqlbase.RandEncDatumRowsOfTypes(rng, nRows, inputTypes) 104 groupIdx := 0 105 for _, row := range rows { 106 for i := 0; i < numGroupingCols; i++ { 107 if rng.Float64() < nullProbability { 108 row[i] = sqlbase.EncDatum{Datum: tree.DNull} 109 } else { 110 row[i] = sqlbase.EncDatum{Datum: tree.NewDInt(tree.DInt(groupIdx))} 111 if rng.Float64() < nextGroupProb { 112 groupIdx++ 113 } 114 } 115 } 116 } 117 118 aggregatorSpec := &execinfrapb.AggregatorSpec{ 119 Type: execinfrapb.AggregatorSpec_NON_SCALAR, 120 GroupCols: groupingCols[:numGroupingCols], 121 Aggregations: aggregations, 122 } 123 if !hashAgg { 124 aggregatorSpec.OrderedGroupCols = groupingCols[:numGroupingCols] 125 orderedCols := execinfrapb.ConvertToColumnOrdering( 126 execinfrapb.Ordering{Columns: orderingCols[:numGroupingCols]}, 127 ) 128 // Although we build the input rows in "non-decreasing" order, it is 129 // possible that some NULL values are present here and there, so we 130 // need to sort the rows to satisfy the ordering conditions. 131 sort.Slice(rows, func(i, j int) bool { 132 cmp, err := rows[i].Compare(inputTypes, &da, orderedCols, &evalCtx, rows[j]) 133 if err != nil { 134 t.Fatal(err) 135 } 136 return cmp < 0 137 }) 138 } 139 pspec := &execinfrapb.ProcessorSpec{ 140 Input: []execinfrapb.InputSyncSpec{{ColumnTypes: inputTypes}}, 141 Core: execinfrapb.ProcessorCoreUnion{Aggregator: aggregatorSpec}, 142 } 143 args := verifyColOperatorArgs{ 144 anyOrder: hashAgg, 145 inputTypes: [][]*types.T{inputTypes}, 146 inputs: []sqlbase.EncDatumRows{rows}, 147 outputTypes: outputTypes, 148 pspec: pspec, 149 } 150 if err := verifyColOperator(args); err != nil { 151 // Columnar aggregators check whether an overflow occurs whereas 152 // processors don't, so we simply swallow the integer out of range 153 // error if such occurs and move on. 154 if strings.Contains(err.Error(), tree.ErrIntOutOfRange.Error()) { 155 continue 156 } 157 fmt.Printf("--- seed = %d run = %d hash = %t ---\n", 158 seed, run, hashAgg) 159 prettyPrintTypes(inputTypes, "t" /* tableName */) 160 prettyPrintInput(rows, inputTypes, "t" /* tableName */) 161 t.Fatal(err) 162 } 163 } 164 } 165 } 166 } 167 168 func TestDistinctAgainstProcessor(t *testing.T) { 169 defer leaktest.AfterTest(t)() 170 var da sqlbase.DatumAlloc 171 evalCtx := tree.MakeTestingEvalContext(cluster.MakeTestingClusterSettings()) 172 defer evalCtx.Stop(context.Background()) 173 174 rng, seed := randutil.NewPseudoRand() 175 nRuns := 10 176 nRows := 10 177 maxCols := 3 178 maxNum := 3 179 intTyps := make([]*types.T, maxCols) 180 for i := range intTyps { 181 intTyps[i] = types.Int 182 } 183 184 for run := 0; run < nRuns; run++ { 185 for nCols := 1; nCols <= maxCols; nCols++ { 186 for nDistinctCols := 1; nDistinctCols <= nCols; nDistinctCols++ { 187 for nOrderedCols := 0; nOrderedCols <= nDistinctCols; nOrderedCols++ { 188 var ( 189 rows sqlbase.EncDatumRows 190 inputTypes []*types.T 191 ordCols []execinfrapb.Ordering_Column 192 ) 193 if rng.Float64() < randTypesProbability { 194 inputTypes = generateRandomSupportedTypes(rng, nCols) 195 rows = sqlbase.RandEncDatumRowsOfTypes(rng, nRows, inputTypes) 196 } else { 197 inputTypes = intTyps[:nCols] 198 rows = sqlbase.MakeRandIntRowsInRange(rng, nRows, nCols, maxNum, nullProbability) 199 } 200 distinctCols := make([]uint32, nDistinctCols) 201 for i, distinctCol := range rng.Perm(nCols)[:nDistinctCols] { 202 distinctCols[i] = uint32(distinctCol) 203 } 204 orderedCols := make([]uint32, nOrderedCols) 205 for i, orderedColIdx := range rng.Perm(nDistinctCols)[:nOrderedCols] { 206 // From the set of distinct columns we need to choose nOrderedCols 207 // to be in the ordered columns set. 208 orderedCols[i] = distinctCols[orderedColIdx] 209 } 210 ordCols = make([]execinfrapb.Ordering_Column, nOrderedCols) 211 for i, col := range orderedCols { 212 ordCols[i] = execinfrapb.Ordering_Column{ 213 ColIdx: col, 214 } 215 } 216 sort.Slice(rows, func(i, j int) bool { 217 cmp, err := rows[i].Compare( 218 inputTypes, &da, 219 execinfrapb.ConvertToColumnOrdering(execinfrapb.Ordering{Columns: ordCols}), 220 &evalCtx, rows[j], 221 ) 222 if err != nil { 223 t.Fatal(err) 224 } 225 return cmp < 0 226 }) 227 228 spec := &execinfrapb.DistinctSpec{ 229 DistinctColumns: distinctCols, 230 OrderedColumns: orderedCols, 231 } 232 pspec := &execinfrapb.ProcessorSpec{ 233 Input: []execinfrapb.InputSyncSpec{{ColumnTypes: inputTypes}}, 234 Core: execinfrapb.ProcessorCoreUnion{Distinct: spec}, 235 } 236 args := verifyColOperatorArgs{ 237 anyOrder: false, 238 inputTypes: [][]*types.T{inputTypes}, 239 inputs: []sqlbase.EncDatumRows{rows}, 240 outputTypes: inputTypes, 241 pspec: pspec, 242 } 243 if err := verifyColOperator(args); err != nil { 244 fmt.Printf("--- seed = %d run = %d nCols = %d distinct cols = %v ordered cols = %v ---\n", 245 seed, run, nCols, distinctCols, orderedCols) 246 prettyPrintTypes(inputTypes, "t" /* tableName */) 247 prettyPrintInput(rows, inputTypes, "t" /* tableName */) 248 t.Fatal(err) 249 } 250 } 251 } 252 } 253 } 254 } 255 256 func TestSorterAgainstProcessor(t *testing.T) { 257 defer leaktest.AfterTest(t)() 258 st := cluster.MakeTestingClusterSettings() 259 evalCtx := tree.MakeTestingEvalContext(st) 260 defer evalCtx.Stop(context.Background()) 261 262 rng, seed := randutil.NewPseudoRand() 263 nRuns := 5 264 nRows := 8 * coldata.BatchSize() 265 maxCols := 5 266 maxNum := 10 267 intTyps := make([]*types.T, maxCols) 268 for i := range intTyps { 269 intTyps[i] = types.Int 270 } 271 272 for _, spillForced := range []bool{false, true} { 273 for run := 0; run < nRuns; run++ { 274 for nCols := 1; nCols <= maxCols; nCols++ { 275 // We will try both general sort and top K sort. 276 for _, topK := range []uint64{0, uint64(1 + rng.Intn(64))} { 277 var ( 278 rows sqlbase.EncDatumRows 279 inputTypes []*types.T 280 ) 281 if rng.Float64() < randTypesProbability { 282 inputTypes = generateRandomSupportedTypes(rng, nCols) 283 rows = sqlbase.RandEncDatumRowsOfTypes(rng, nRows, inputTypes) 284 } else { 285 inputTypes = intTyps[:nCols] 286 rows = sqlbase.MakeRandIntRowsInRange(rng, nRows, nCols, maxNum, nullProbability) 287 } 288 289 // Note: we're only generating column orderings on all nCols columns since 290 // if there are columns not in the ordering, the results are not fully 291 // deterministic. 292 orderingCols := generateColumnOrdering(rng, nCols, nCols) 293 sorterSpec := &execinfrapb.SorterSpec{ 294 OutputOrdering: execinfrapb.Ordering{Columns: orderingCols}, 295 } 296 var limit, offset uint64 297 if topK > 0 { 298 offset = uint64(rng.Intn(int(topK))) 299 limit = topK - offset 300 } 301 pspec := &execinfrapb.ProcessorSpec{ 302 Input: []execinfrapb.InputSyncSpec{{ColumnTypes: inputTypes}}, 303 Core: execinfrapb.ProcessorCoreUnion{Sorter: sorterSpec}, 304 Post: execinfrapb.PostProcessSpec{Limit: limit, Offset: offset}, 305 } 306 args := verifyColOperatorArgs{ 307 inputTypes: [][]*types.T{inputTypes}, 308 inputs: []sqlbase.EncDatumRows{rows}, 309 outputTypes: inputTypes, 310 pspec: pspec, 311 forceDiskSpill: spillForced, 312 } 313 if spillForced { 314 args.numForcedRepartitions = 2 + rng.Intn(3) 315 } 316 if err := verifyColOperator(args); err != nil { 317 fmt.Printf("--- seed = %d spillForced = %t nCols = %d K = %d ---\n", 318 seed, spillForced, nCols, topK) 319 prettyPrintTypes(inputTypes, "t" /* tableName */) 320 prettyPrintInput(rows, inputTypes, "t" /* tableName */) 321 t.Fatal(err) 322 } 323 } 324 } 325 } 326 } 327 } 328 329 func TestSortChunksAgainstProcessor(t *testing.T) { 330 defer leaktest.AfterTest(t)() 331 var da sqlbase.DatumAlloc 332 st := cluster.MakeTestingClusterSettings() 333 evalCtx := tree.MakeTestingEvalContext(st) 334 defer evalCtx.Stop(context.Background()) 335 336 rng, seed := randutil.NewPseudoRand() 337 nRuns := 5 338 nRows := 5 * coldata.BatchSize() / 4 339 maxCols := 3 340 maxNum := 10 341 intTyps := make([]*types.T, maxCols) 342 for i := range intTyps { 343 intTyps[i] = types.Int 344 } 345 346 for _, spillForced := range []bool{false, true} { 347 for run := 0; run < nRuns; run++ { 348 for nCols := 2; nCols <= maxCols; nCols++ { 349 for matchLen := 1; matchLen < nCols; matchLen++ { 350 var ( 351 rows sqlbase.EncDatumRows 352 inputTypes []*types.T 353 ) 354 if rng.Float64() < randTypesProbability { 355 inputTypes = generateRandomSupportedTypes(rng, nCols) 356 rows = sqlbase.RandEncDatumRowsOfTypes(rng, nRows, inputTypes) 357 } else { 358 inputTypes = intTyps[:nCols] 359 rows = sqlbase.MakeRandIntRowsInRange(rng, nRows, nCols, maxNum, nullProbability) 360 } 361 362 // Note: we're only generating column orderings on all nCols columns since 363 // if there are columns not in the ordering, the results are not fully 364 // deterministic. 365 orderingCols := generateColumnOrdering(rng, nCols, nCols) 366 matchedCols := execinfrapb.ConvertToColumnOrdering(execinfrapb.Ordering{Columns: orderingCols[:matchLen]}) 367 // Presort the input on first matchLen columns. 368 sort.Slice(rows, func(i, j int) bool { 369 cmp, err := rows[i].Compare(inputTypes, &da, matchedCols, &evalCtx, rows[j]) 370 if err != nil { 371 t.Fatal(err) 372 } 373 return cmp < 0 374 }) 375 376 sorterSpec := &execinfrapb.SorterSpec{ 377 OutputOrdering: execinfrapb.Ordering{Columns: orderingCols}, 378 OrderingMatchLen: uint32(matchLen), 379 } 380 pspec := &execinfrapb.ProcessorSpec{ 381 Input: []execinfrapb.InputSyncSpec{{ColumnTypes: inputTypes}}, 382 Core: execinfrapb.ProcessorCoreUnion{Sorter: sorterSpec}, 383 } 384 args := verifyColOperatorArgs{ 385 inputTypes: [][]*types.T{inputTypes}, 386 inputs: []sqlbase.EncDatumRows{rows}, 387 outputTypes: inputTypes, 388 pspec: pspec, 389 forceDiskSpill: spillForced, 390 } 391 if err := verifyColOperator(args); err != nil { 392 fmt.Printf("--- seed = %d spillForced = %t orderingCols = %v matchLen = %d run = %d ---\n", 393 seed, spillForced, orderingCols, matchLen, run) 394 prettyPrintTypes(inputTypes, "t" /* tableName */) 395 prettyPrintInput(rows, inputTypes, "t" /* tableName */) 396 t.Fatal(err) 397 } 398 } 399 } 400 } 401 } 402 } 403 404 func TestHashJoinerAgainstProcessor(t *testing.T) { 405 defer leaktest.AfterTest(t)() 406 evalCtx := tree.MakeTestingEvalContext(cluster.MakeTestingClusterSettings()) 407 defer evalCtx.Stop(context.Background()) 408 409 type hjTestSpec struct { 410 joinType sqlbase.JoinType 411 onExprSupported bool 412 } 413 testSpecs := []hjTestSpec{ 414 { 415 joinType: sqlbase.InnerJoin, 416 onExprSupported: true, 417 }, 418 { 419 joinType: sqlbase.LeftOuterJoin, 420 }, 421 { 422 joinType: sqlbase.RightOuterJoin, 423 }, 424 { 425 joinType: sqlbase.FullOuterJoin, 426 }, 427 { 428 joinType: sqlbase.LeftSemiJoin, 429 }, 430 { 431 joinType: sqlbase.LeftAntiJoin, 432 }, 433 { 434 joinType: sqlbase.IntersectAllJoin, 435 }, 436 { 437 joinType: sqlbase.ExceptAllJoin, 438 }, 439 } 440 441 rng, seed := randutil.NewPseudoRand() 442 nRuns := 3 443 nRows := 10 444 maxCols := 3 445 maxNum := 5 446 intTyps := make([]*types.T, maxCols) 447 for i := range intTyps { 448 intTyps[i] = types.Int 449 } 450 451 for _, spillForced := range []bool{false, true} { 452 for run := 0; run < nRuns; run++ { 453 for _, testSpec := range testSpecs { 454 for nCols := 1; nCols <= maxCols; nCols++ { 455 for nEqCols := 1; nEqCols <= nCols; nEqCols++ { 456 for _, addFilter := range getAddFilterOptions(testSpec.joinType, nEqCols < nCols) { 457 triedWithoutOnExpr, triedWithOnExpr := false, false 458 if !testSpec.onExprSupported { 459 triedWithOnExpr = true 460 } 461 for !triedWithoutOnExpr || !triedWithOnExpr { 462 var ( 463 lRows, rRows sqlbase.EncDatumRows 464 lEqCols, rEqCols []uint32 465 lInputTypes, rInputTypes []*types.T 466 usingRandomTypes bool 467 ) 468 if rng.Float64() < randTypesProbability { 469 lInputTypes = generateRandomSupportedTypes(rng, nCols) 470 lEqCols = generateEqualityColumns(rng, nCols, nEqCols) 471 rInputTypes = append(rInputTypes[:0], lInputTypes...) 472 rEqCols = append(rEqCols[:0], lEqCols...) 473 rng.Shuffle(nEqCols, func(i, j int) { 474 iColIdx, jColIdx := rEqCols[i], rEqCols[j] 475 rInputTypes[iColIdx], rInputTypes[jColIdx] = rInputTypes[jColIdx], rInputTypes[iColIdx] 476 rEqCols[i], rEqCols[j] = rEqCols[j], rEqCols[i] 477 }) 478 rInputTypes = generateRandomComparableTypes(rng, rInputTypes) 479 lRows = sqlbase.RandEncDatumRowsOfTypes(rng, nRows, lInputTypes) 480 rRows = sqlbase.RandEncDatumRowsOfTypes(rng, nRows, rInputTypes) 481 usingRandomTypes = true 482 } else { 483 lInputTypes = intTyps[:nCols] 484 rInputTypes = lInputTypes 485 lRows = sqlbase.MakeRandIntRowsInRange(rng, nRows, nCols, maxNum, nullProbability) 486 rRows = sqlbase.MakeRandIntRowsInRange(rng, nRows, nCols, maxNum, nullProbability) 487 lEqCols = generateEqualityColumns(rng, nCols, nEqCols) 488 rEqCols = generateEqualityColumns(rng, nCols, nEqCols) 489 } 490 491 var outputTypes []*types.T 492 if testSpec.joinType.ShouldIncludeRightColsInOutput() { 493 outputTypes = append(lInputTypes, rInputTypes...) 494 } else { 495 outputTypes = lInputTypes 496 } 497 outputColumns := make([]uint32, len(outputTypes)) 498 for i := range outputColumns { 499 outputColumns[i] = uint32(i) 500 } 501 502 var filter, onExpr execinfrapb.Expression 503 if addFilter { 504 colTypes := append(lInputTypes, rInputTypes...) 505 filter = generateFilterExpr( 506 rng, nCols, nEqCols, colTypes, usingRandomTypes, 507 !testSpec.joinType.ShouldIncludeRightColsInOutput(), 508 ) 509 } 510 if triedWithoutOnExpr { 511 colTypes := append(lInputTypes, rInputTypes...) 512 onExpr = generateFilterExpr( 513 rng, nCols, nEqCols, colTypes, usingRandomTypes, false, /* forceLeftSide */ 514 ) 515 } 516 hjSpec := &execinfrapb.HashJoinerSpec{ 517 LeftEqColumns: lEqCols, 518 RightEqColumns: rEqCols, 519 OnExpr: onExpr, 520 Type: testSpec.joinType, 521 } 522 pspec := &execinfrapb.ProcessorSpec{ 523 Input: []execinfrapb.InputSyncSpec{ 524 {ColumnTypes: lInputTypes}, 525 {ColumnTypes: rInputTypes}, 526 }, 527 Core: execinfrapb.ProcessorCoreUnion{HashJoiner: hjSpec}, 528 Post: execinfrapb.PostProcessSpec{ 529 Projection: true, 530 OutputColumns: outputColumns, 531 Filter: filter, 532 }, 533 } 534 args := verifyColOperatorArgs{ 535 anyOrder: true, 536 inputTypes: [][]*types.T{lInputTypes, rInputTypes}, 537 inputs: []sqlbase.EncDatumRows{lRows, rRows}, 538 outputTypes: outputTypes, 539 pspec: pspec, 540 forceDiskSpill: spillForced, 541 // It is possible that we have a filter that is always false, and this 542 // will allow us to plan a zero operator which always returns a zero 543 // batch. In such case, the spilling might not occur and that's ok. 544 forcedDiskSpillMightNotOccur: !filter.Empty() || !onExpr.Empty(), 545 numForcedRepartitions: 2, 546 rng: rng, 547 } 548 if testSpec.joinType.IsSetOpJoin() && nEqCols < nCols { 549 // The output of set operation joins is not fully 550 // deterministic when there are non-equality 551 // columns, however, the rows must match on the 552 // equality columns between vectorized and row 553 // executions. 554 args.colIdxsToCheckForEquality = make([]int, nEqCols) 555 for i := range args.colIdxsToCheckForEquality { 556 args.colIdxsToCheckForEquality[i] = int(lEqCols[i]) 557 } 558 } 559 560 if err := verifyColOperator(args); err != nil { 561 fmt.Printf("--- spillForced = %t join type = %s onExpr = %q"+ 562 " filter = %q seed = %d run = %d ---\n", 563 spillForced, testSpec.joinType.String(), onExpr.Expr, filter.Expr, seed, run) 564 fmt.Printf("--- lEqCols = %v rEqCols = %v ---\n", lEqCols, rEqCols) 565 prettyPrintTypes(lInputTypes, "left_table" /* tableName */) 566 prettyPrintTypes(rInputTypes, "right_table" /* tableName */) 567 prettyPrintInput(lRows, lInputTypes, "left_table" /* tableName */) 568 prettyPrintInput(rRows, rInputTypes, "right_table" /* tableName */) 569 t.Fatal(err) 570 } 571 if onExpr.Expr == "" { 572 triedWithoutOnExpr = true 573 } else { 574 triedWithOnExpr = true 575 } 576 } 577 } 578 } 579 } 580 } 581 } 582 } 583 } 584 585 // generateEqualityColumns produces a random permutation of nEqCols random 586 // columns on a table with nCols columns, so nEqCols must be not greater than 587 // nCols. 588 func generateEqualityColumns(rng *rand.Rand, nCols int, nEqCols int) []uint32 { 589 if nEqCols > nCols { 590 panic("nEqCols > nCols in generateEqualityColumns") 591 } 592 eqCols := make([]uint32, 0, nEqCols) 593 for _, eqCol := range rng.Perm(nCols)[:nEqCols] { 594 eqCols = append(eqCols, uint32(eqCol)) 595 } 596 return eqCols 597 } 598 599 func TestMergeJoinerAgainstProcessor(t *testing.T) { 600 defer leaktest.AfterTest(t)() 601 var da sqlbase.DatumAlloc 602 evalCtx := tree.MakeTestingEvalContext(cluster.MakeTestingClusterSettings()) 603 defer evalCtx.Stop(context.Background()) 604 605 type mjTestSpec struct { 606 joinType sqlbase.JoinType 607 anyOrder bool 608 onExprSupported bool 609 } 610 testSpecs := []mjTestSpec{ 611 { 612 joinType: sqlbase.InnerJoin, 613 onExprSupported: true, 614 }, 615 { 616 joinType: sqlbase.LeftOuterJoin, 617 }, 618 { 619 joinType: sqlbase.RightOuterJoin, 620 }, 621 { 622 joinType: sqlbase.FullOuterJoin, 623 // FULL OUTER JOIN doesn't guarantee any ordering on its output (since it 624 // is ambiguous), so we're comparing the outputs as sets. 625 anyOrder: true, 626 }, 627 { 628 joinType: sqlbase.LeftSemiJoin, 629 }, 630 { 631 joinType: sqlbase.LeftAntiJoin, 632 }, 633 { 634 joinType: sqlbase.IntersectAllJoin, 635 }, 636 { 637 joinType: sqlbase.ExceptAllJoin, 638 }, 639 } 640 641 rng, seed := randutil.NewPseudoRand() 642 nRuns := 3 643 nRows := 10 644 maxCols := 3 645 maxNum := 5 646 intTyps := make([]*types.T, maxCols) 647 for i := range intTyps { 648 intTyps[i] = types.Int 649 } 650 651 for run := 0; run < nRuns; run++ { 652 for _, testSpec := range testSpecs { 653 for nCols := 1; nCols <= maxCols; nCols++ { 654 for nOrderingCols := 1; nOrderingCols <= nCols; nOrderingCols++ { 655 for _, addFilter := range getAddFilterOptions(testSpec.joinType, nOrderingCols < nCols) { 656 triedWithoutOnExpr, triedWithOnExpr := false, false 657 if !testSpec.onExprSupported { 658 triedWithOnExpr = true 659 } 660 for !triedWithoutOnExpr || !triedWithOnExpr { 661 var ( 662 lRows, rRows sqlbase.EncDatumRows 663 lInputTypes, rInputTypes []*types.T 664 lOrderingCols, rOrderingCols []execinfrapb.Ordering_Column 665 usingRandomTypes bool 666 ) 667 if rng.Float64() < randTypesProbability { 668 lInputTypes = generateRandomSupportedTypes(rng, nCols) 669 lOrderingCols = generateColumnOrdering(rng, nCols, nOrderingCols) 670 rInputTypes = append(rInputTypes[:0], lInputTypes...) 671 rOrderingCols = append(rOrderingCols[:0], lOrderingCols...) 672 rng.Shuffle(nOrderingCols, func(i, j int) { 673 iColIdx, jColIdx := rOrderingCols[i].ColIdx, rOrderingCols[j].ColIdx 674 rInputTypes[iColIdx], rInputTypes[jColIdx] = rInputTypes[jColIdx], rInputTypes[iColIdx] 675 rOrderingCols[i], rOrderingCols[j] = rOrderingCols[j], rOrderingCols[i] 676 }) 677 rInputTypes = generateRandomComparableTypes(rng, rInputTypes) 678 lRows = sqlbase.RandEncDatumRowsOfTypes(rng, nRows, lInputTypes) 679 rRows = sqlbase.RandEncDatumRowsOfTypes(rng, nRows, rInputTypes) 680 usingRandomTypes = true 681 } else { 682 lInputTypes = intTyps[:nCols] 683 rInputTypes = lInputTypes 684 lRows = sqlbase.MakeRandIntRowsInRange(rng, nRows, nCols, maxNum, nullProbability) 685 rRows = sqlbase.MakeRandIntRowsInRange(rng, nRows, nCols, maxNum, nullProbability) 686 lOrderingCols = generateColumnOrdering(rng, nCols, nOrderingCols) 687 rOrderingCols = generateColumnOrdering(rng, nCols, nOrderingCols) 688 } 689 // Set the directions of both columns to be the same. 690 for i, lCol := range lOrderingCols { 691 rOrderingCols[i].Direction = lCol.Direction 692 } 693 694 lMatchedCols := execinfrapb.ConvertToColumnOrdering(execinfrapb.Ordering{Columns: lOrderingCols}) 695 rMatchedCols := execinfrapb.ConvertToColumnOrdering(execinfrapb.Ordering{Columns: rOrderingCols}) 696 sort.Slice(lRows, func(i, j int) bool { 697 cmp, err := lRows[i].Compare(lInputTypes, &da, lMatchedCols, &evalCtx, lRows[j]) 698 if err != nil { 699 t.Fatal(err) 700 } 701 return cmp < 0 702 }) 703 sort.Slice(rRows, func(i, j int) bool { 704 cmp, err := rRows[i].Compare(rInputTypes, &da, rMatchedCols, &evalCtx, rRows[j]) 705 if err != nil { 706 t.Fatal(err) 707 } 708 return cmp < 0 709 }) 710 var outputTypes []*types.T 711 if testSpec.joinType.ShouldIncludeRightColsInOutput() { 712 outputTypes = append(lInputTypes, rInputTypes...) 713 } else { 714 outputTypes = lInputTypes 715 } 716 outputColumns := make([]uint32, len(outputTypes)) 717 for i := range outputColumns { 718 outputColumns[i] = uint32(i) 719 } 720 721 var filter, onExpr execinfrapb.Expression 722 if addFilter { 723 colTypes := append(lInputTypes, rInputTypes...) 724 filter = generateFilterExpr( 725 rng, nCols, nOrderingCols, colTypes, usingRandomTypes, 726 !testSpec.joinType.ShouldIncludeRightColsInOutput(), 727 ) 728 } 729 if triedWithoutOnExpr { 730 colTypes := append(lInputTypes, rInputTypes...) 731 onExpr = generateFilterExpr( 732 rng, nCols, nOrderingCols, colTypes, usingRandomTypes, false, /* forceLeftSide */ 733 ) 734 } 735 mjSpec := &execinfrapb.MergeJoinerSpec{ 736 OnExpr: onExpr, 737 LeftOrdering: execinfrapb.Ordering{Columns: lOrderingCols}, 738 RightOrdering: execinfrapb.Ordering{Columns: rOrderingCols}, 739 Type: testSpec.joinType, 740 NullEquality: testSpec.joinType.IsSetOpJoin(), 741 } 742 pspec := &execinfrapb.ProcessorSpec{ 743 Input: []execinfrapb.InputSyncSpec{{ColumnTypes: lInputTypes}, {ColumnTypes: rInputTypes}}, 744 Core: execinfrapb.ProcessorCoreUnion{MergeJoiner: mjSpec}, 745 Post: execinfrapb.PostProcessSpec{Projection: true, OutputColumns: outputColumns, Filter: filter}, 746 } 747 args := verifyColOperatorArgs{ 748 anyOrder: testSpec.anyOrder, 749 inputTypes: [][]*types.T{lInputTypes, rInputTypes}, 750 inputs: []sqlbase.EncDatumRows{lRows, rRows}, 751 outputTypes: outputTypes, 752 pspec: pspec, 753 rng: rng, 754 } 755 if testSpec.joinType.IsSetOpJoin() && nOrderingCols < nCols { 756 // The output of set operation joins is not fully 757 // deterministic when there are non-equality 758 // columns, however, the rows must match on the 759 // equality columns between vectorized and row 760 // executions. 761 args.colIdxsToCheckForEquality = make([]int, nOrderingCols) 762 for i := range args.colIdxsToCheckForEquality { 763 args.colIdxsToCheckForEquality[i] = int(lOrderingCols[i].ColIdx) 764 } 765 } 766 if err := verifyColOperator(args); err != nil { 767 fmt.Printf("--- join type = %s onExpr = %q filter = %q seed = %d run = %d ---\n", 768 testSpec.joinType.String(), onExpr.Expr, filter.Expr, seed, run) 769 fmt.Printf("--- left ordering = %v right ordering = %v ---\n", lOrderingCols, rOrderingCols) 770 prettyPrintTypes(lInputTypes, "left_table" /* tableName */) 771 prettyPrintTypes(rInputTypes, "right_table" /* tableName */) 772 prettyPrintInput(lRows, lInputTypes, "left_table" /* tableName */) 773 prettyPrintInput(rRows, rInputTypes, "right_table" /* tableName */) 774 t.Fatal(err) 775 } 776 if onExpr.Expr == "" { 777 triedWithoutOnExpr = true 778 } else { 779 triedWithOnExpr = true 780 } 781 } 782 } 783 } 784 } 785 } 786 } 787 } 788 789 // generateColumnOrdering produces a random ordering of nOrderingCols columns 790 // on a table with nCols columns, so nOrderingCols must be not greater than 791 // nCols. 792 func generateColumnOrdering( 793 rng *rand.Rand, nCols int, nOrderingCols int, 794 ) []execinfrapb.Ordering_Column { 795 if nOrderingCols > nCols { 796 panic("nOrderingCols > nCols in generateColumnOrdering") 797 } 798 799 orderingCols := make([]execinfrapb.Ordering_Column, nOrderingCols) 800 for i, col := range rng.Perm(nCols)[:nOrderingCols] { 801 orderingCols[i] = execinfrapb.Ordering_Column{ 802 ColIdx: uint32(col), 803 Direction: execinfrapb.Ordering_Column_Direction(rng.Intn(2)), 804 } 805 } 806 return orderingCols 807 } 808 809 func getAddFilterOptions(joinType sqlbase.JoinType, nonEqualityColsPresent bool) []bool { 810 if joinType.IsSetOpJoin() && nonEqualityColsPresent { 811 // Output of set operation join when rows have non equality columns is 812 // not deterministic, so applying a filter on top of it can produce 813 // arbitrary results, and we skip such configuration. 814 return []bool{false} 815 } 816 return []bool{false, true} 817 } 818 819 // generateFilterExpr populates an execinfrapb.Expression that contains a 820 // single comparison which can be either comparing a column from the left 821 // against a column from the right or comparing a column from either side 822 // against a constant. 823 // If forceConstComparison is true, then the comparison against the constant 824 // will be used. 825 // If forceLeftSide is true, then the comparison of a column from the left 826 // against a constant will be used. 827 func generateFilterExpr( 828 rng *rand.Rand, 829 nCols int, 830 nEqCols int, 831 colTypes []*types.T, 832 forceConstComparison bool, 833 forceLeftSide bool, 834 ) execinfrapb.Expression { 835 var comparison string 836 r := rng.Float64() 837 if r < 0.25 { 838 comparison = "<" 839 } else if r < 0.5 { 840 comparison = ">" 841 } else if r < 0.75 { 842 comparison = "=" 843 } else { 844 comparison = "<>" 845 } 846 // When all columns are used in equality comparison between inputs, there is 847 // only one interesting case when a column from either side is compared 848 // against a constant. The second conditional is us choosing to compare 849 // against a constant. 850 if nCols == nEqCols || rng.Float64() < 0.33 || forceConstComparison || forceLeftSide { 851 colIdx := rng.Intn(nCols) 852 if !forceLeftSide && rng.Float64() >= 0.5 { 853 // Use right side. 854 colIdx += nCols 855 } 856 constDatum := sqlbase.RandDatum(rng, colTypes[colIdx], true /* nullOk */) 857 constDatumString := constDatum.String() 858 switch colTypes[colIdx].Family() { 859 case types.FloatFamily, types.DecimalFamily: 860 if strings.Contains(strings.ToLower(constDatumString), "nan") || 861 strings.Contains(strings.ToLower(constDatumString), "inf") { 862 // We need to surround special numerical values with quotes. 863 constDatumString = fmt.Sprintf("'%s'", constDatumString) 864 } 865 } 866 return execinfrapb.Expression{Expr: fmt.Sprintf("@%d %s %s", colIdx+1, comparison, constDatumString)} 867 } 868 // We will compare a column from the left against a column from the right. 869 leftColIdx := rng.Intn(nCols) + 1 870 rightColIdx := rng.Intn(nCols) + nCols + 1 871 return execinfrapb.Expression{Expr: fmt.Sprintf("@%d %s @%d", leftColIdx, comparison, rightColIdx)} 872 } 873 874 func TestWindowFunctionsAgainstProcessor(t *testing.T) { 875 defer leaktest.AfterTest(t)() 876 877 rng, seed := randutil.NewPseudoRand() 878 nRows := 2 * coldata.BatchSize() 879 maxCols := 4 880 maxNum := 10 881 typs := make([]*types.T, maxCols) 882 for i := range typs { 883 // TODO(yuzefovich): randomize the types of the columns once we support 884 // window functions that take in arguments. 885 typs[i] = types.Int 886 } 887 for windowFn := range colexec.SupportedWindowFns { 888 for _, partitionBy := range [][]uint32{ 889 {}, // No PARTITION BY clause. 890 {0}, // Partitioning on the first input column. 891 {0, 1}, // Partitioning on the first and second input columns. 892 } { 893 for _, nOrderingCols := range []int{ 894 0, // No ORDER BY clause. 895 1, // ORDER BY on at most one column. 896 2, // ORDER BY on at most two columns. 897 } { 898 for nCols := 1; nCols <= maxCols; nCols++ { 899 if len(partitionBy) > nCols || nOrderingCols > nCols { 900 continue 901 } 902 inputTypes := typs[:nCols:nCols] 903 rows := sqlbase.MakeRandIntRowsInRange(rng, nRows, nCols, maxNum, nullProbability) 904 905 windowerSpec := &execinfrapb.WindowerSpec{ 906 PartitionBy: partitionBy, 907 WindowFns: []execinfrapb.WindowerSpec_WindowFn{ 908 { 909 Func: execinfrapb.WindowerSpec_Func{WindowFunc: &windowFn}, 910 Ordering: generateOrderingGivenPartitionBy(rng, nCols, nOrderingCols, partitionBy), 911 OutputColIdx: uint32(nCols), 912 }, 913 }, 914 } 915 if windowFn == execinfrapb.WindowerSpec_ROW_NUMBER && 916 len(partitionBy)+len(windowerSpec.WindowFns[0].Ordering.Columns) < nCols { 917 // The output of row_number is not deterministic if there are 918 // columns that are not present in either PARTITION BY or ORDER BY 919 // clauses, so we skip such a configuration. 920 continue 921 } 922 923 pspec := &execinfrapb.ProcessorSpec{ 924 Input: []execinfrapb.InputSyncSpec{{ColumnTypes: inputTypes}}, 925 Core: execinfrapb.ProcessorCoreUnion{Windower: windowerSpec}, 926 } 927 // Currently, we only support window functions that take no 928 // arguments, so we leave the second argument empty. 929 _, outputType, err := execinfrapb.GetWindowFunctionInfo(execinfrapb.WindowerSpec_Func{WindowFunc: &windowFn}) 930 require.NoError(t, err) 931 args := verifyColOperatorArgs{ 932 anyOrder: true, 933 inputTypes: [][]*types.T{inputTypes}, 934 inputs: []sqlbase.EncDatumRows{rows}, 935 outputTypes: append(inputTypes, outputType), 936 pspec: pspec, 937 } 938 if err := verifyColOperator(args); err != nil { 939 fmt.Printf("seed = %d\n", seed) 940 prettyPrintTypes(inputTypes, "t" /* tableName */) 941 prettyPrintInput(rows, inputTypes, "t" /* tableName */) 942 t.Fatal(err) 943 } 944 } 945 } 946 } 947 } 948 } 949 950 // generateRandomSupportedTypes generates nCols random types that are supported 951 // by the vectorized engine. 952 func generateRandomSupportedTypes(rng *rand.Rand, nCols int) []*types.T { 953 typs := make([]*types.T, 0, nCols) 954 for len(typs) < nCols { 955 typ := sqlbase.RandType(rng) 956 if typeconv.TypeFamilyToCanonicalTypeFamily(typ.Family()) == typeconv.DatumVecCanonicalTypeFamily { 957 // At the moment, we disallow datum-backed types. 958 // TODO(yuzefovich): remove this. 959 continue 960 } 961 typs = append(typs, typ) 962 } 963 return typs 964 } 965 966 // generateRandomComparableTypes generates random types that are supported by 967 // the vectorized engine and are such that they are comparable to the 968 // corresponding types in inputTypes. 969 func generateRandomComparableTypes(rng *rand.Rand, inputTypes []*types.T) []*types.T { 970 typs := make([]*types.T, len(inputTypes)) 971 for i, inputType := range inputTypes { 972 for { 973 typ := sqlbase.RandType(rng) 974 if typeconv.TypeFamilyToCanonicalTypeFamily(typ.Family()) == typeconv.DatumVecCanonicalTypeFamily { 975 // At the moment, we disallow datum-backed types. 976 // TODO(yuzefovich): remove this. 977 continue 978 } 979 comparable := false 980 for _, cmpOverloads := range tree.CmpOps[tree.LT] { 981 o := cmpOverloads.(*tree.CmpOp) 982 if inputType.Equivalent(o.LeftType) && typ.Equivalent(o.RightType) { 983 if (typ.Family() == types.DateFamily && inputType.Family() != types.DateFamily) || 984 (typ.Family() != types.DateFamily && inputType.Family() == types.DateFamily) { 985 // We map Dates to int64 and don't have casts from int64 to 986 // timestamps (and there is a comparison between dates and 987 // timestamps). 988 continue 989 } 990 comparable = true 991 break 992 } 993 } 994 if comparable { 995 typs[i] = typ 996 break 997 } 998 } 999 } 1000 return typs 1001 } 1002 1003 // generateOrderingGivenPartitionBy produces a random ordering of up to 1004 // nOrderingCols columns on a table with nCols columns such that only columns 1005 // not present in partitionBy are used. This is useful to simulate how 1006 // optimizer plans window functions - for example, with an OVER clause as 1007 // (PARTITION BY a ORDER BY a DESC), the optimizer will omit the ORDER BY 1008 // clause entirely. 1009 func generateOrderingGivenPartitionBy( 1010 rng *rand.Rand, nCols int, nOrderingCols int, partitionBy []uint32, 1011 ) execinfrapb.Ordering { 1012 var ordering execinfrapb.Ordering 1013 if nOrderingCols == 0 || len(partitionBy) == nCols { 1014 return ordering 1015 } 1016 ordering = execinfrapb.Ordering{Columns: make([]execinfrapb.Ordering_Column, 0, nOrderingCols)} 1017 for len(ordering.Columns) == 0 { 1018 for _, ordCol := range generateColumnOrdering(rng, nCols, nOrderingCols) { 1019 usedInPartitionBy := false 1020 for _, p := range partitionBy { 1021 if p == ordCol.ColIdx { 1022 usedInPartitionBy = true 1023 break 1024 } 1025 } 1026 if !usedInPartitionBy { 1027 ordering.Columns = append(ordering.Columns, ordCol) 1028 } 1029 } 1030 } 1031 return ordering 1032 } 1033 1034 // prettyPrintTypes prints out typs as a CREATE TABLE statement. 1035 func prettyPrintTypes(typs []*types.T, tableName string) { 1036 fmt.Printf("CREATE TABLE %s(", tableName) 1037 colName := byte('a') 1038 for typIdx, typ := range typs { 1039 if typIdx < len(typs)-1 { 1040 fmt.Printf("%c %s, ", colName, typ.SQLStandardName()) 1041 } else { 1042 fmt.Printf("%c %s);\n", colName, typ.SQLStandardName()) 1043 } 1044 colName++ 1045 } 1046 } 1047 1048 // prettyPrintInput prints out rows as INSERT INTO tableName VALUES statement. 1049 func prettyPrintInput(rows sqlbase.EncDatumRows, inputTypes []*types.T, tableName string) { 1050 fmt.Printf("INSERT INTO %s VALUES\n", tableName) 1051 for rowIdx, row := range rows { 1052 fmt.Printf("(%s", row[0].String(inputTypes[0])) 1053 for i := range row[1:] { 1054 fmt.Printf(", %s", row[i+1].String(inputTypes[i+1])) 1055 } 1056 if rowIdx < len(rows)-1 { 1057 fmt.Printf("),\n") 1058 } else { 1059 fmt.Printf(");\n") 1060 } 1061 } 1062 }