github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/distsql/columnar_utils_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package distsql
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"math"
    17  	"math/rand"
    18  	"strconv"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/base"
    21  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    22  	"github.com/cockroachdb/cockroach/pkg/col/coldataext"
    23  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/colcontainer"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/colexec"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    27  	"github.com/cockroachdb/cockroach/pkg/sql/colmem"
    28  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    29  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    30  	"github.com/cockroachdb/cockroach/pkg/sql/rowexec"
    31  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    32  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    33  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    34  	"github.com/cockroachdb/cockroach/pkg/storage"
    35  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    36  	"github.com/cockroachdb/errors"
    37  )
    38  
    39  type verifyColOperatorArgs struct {
    40  	// anyOrder determines whether the results should be matched in order (when
    41  	// anyOrder is false) or as sets (when anyOrder is true).
    42  	anyOrder bool
    43  	// colIdxsToCheckForEquality determines which columns of the rows to use
    44  	// for equality check. If left unset, full rows are compared. Use this
    45  	// with caution and leave a comment that justifies using this knob.
    46  	colIdxsToCheckForEquality []int
    47  	inputTypes                [][]*types.T
    48  	inputs                    []sqlbase.EncDatumRows
    49  	outputTypes               []*types.T
    50  	pspec                     *execinfrapb.ProcessorSpec
    51  	// forceDiskSpill, if set, will force the operator to spill to disk.
    52  	forceDiskSpill bool
    53  	// forcedDiskSpillMightNotOccur determines whether we error out if
    54  	// forceDiskSpill is true but the spilling doesn't occur. Please leave an
    55  	// explanation for why that could be the case.
    56  	forcedDiskSpillMightNotOccur bool
    57  	// numForcedRepartitions specifies a number of "repartitions" that a
    58  	// disk-backed operator should be forced to perform. "Repartition" can mean
    59  	// different things depending on the operator (for example, for hash joiner
    60  	// it is dividing original partition into multiple new partitions; for sorter
    61  	// it is merging already created partitions into new one before proceeding
    62  	// to the next partition from the input).
    63  	numForcedRepartitions int
    64  	// rng (if set) will be used to randomize batch size.
    65  	rng *rand.Rand
    66  }
    67  
    68  // verifyColOperator passes inputs through both the processor defined by pspec
    69  // and the corresponding columnar operator and verifies that the results match.
    70  func verifyColOperator(args verifyColOperatorArgs) error {
    71  	const floatPrecision = 0.0000001
    72  	rng := args.rng
    73  	if rng == nil {
    74  		rng, _ = randutil.NewPseudoRand()
    75  	}
    76  	if rng.Float64() < 0.5 {
    77  		randomBatchSize := 1 + rng.Intn(3)
    78  		fmt.Printf("coldata.BatchSize() is set to %d\n", randomBatchSize)
    79  		if err := coldata.SetBatchSizeForTests(randomBatchSize); err != nil {
    80  			return err
    81  		}
    82  	}
    83  
    84  	ctx := context.Background()
    85  	st := cluster.MakeTestingClusterSettings()
    86  	tempEngine, tempFS, err := storage.NewTempEngine(ctx, storage.DefaultStorageEngine, base.DefaultTestTempStorageConfig(st), base.DefaultTestStoreSpec)
    87  	if err != nil {
    88  		return err
    89  	}
    90  	defer tempEngine.Close()
    91  
    92  	evalCtx := tree.MakeTestingEvalContext(st)
    93  	defer evalCtx.Stop(ctx)
    94  	diskMonitor := execinfra.NewTestDiskMonitor(ctx, st)
    95  	defer diskMonitor.Stop(ctx)
    96  	flowCtx := &execinfra.FlowCtx{
    97  		EvalCtx: &evalCtx,
    98  		Cfg: &execinfra.ServerConfig{
    99  			Settings:    st,
   100  			TempStorage: tempEngine,
   101  			DiskMonitor: diskMonitor,
   102  		},
   103  	}
   104  	flowCtx.Cfg.TestingKnobs.ForceDiskSpill = args.forceDiskSpill
   105  
   106  	inputsProc := make([]execinfra.RowSource, len(args.inputs))
   107  	inputsColOp := make([]execinfra.RowSource, len(args.inputs))
   108  	for i, input := range args.inputs {
   109  		inputsProc[i] = execinfra.NewRepeatableRowSource(args.inputTypes[i], input)
   110  		inputsColOp[i] = execinfra.NewRepeatableRowSource(args.inputTypes[i], input)
   111  	}
   112  
   113  	proc, err := rowexec.NewProcessor(
   114  		ctx, flowCtx, 0, &args.pspec.Core, &args.pspec.Post,
   115  		inputsProc, []execinfra.RowReceiver{nil}, nil,
   116  	)
   117  	if err != nil {
   118  		return err
   119  	}
   120  	outProc, ok := proc.(execinfra.RowSource)
   121  	if !ok {
   122  		return errors.New("processor is unexpectedly not a RowSource")
   123  	}
   124  
   125  	acc := evalCtx.Mon.MakeBoundAccount()
   126  	defer acc.Close(ctx)
   127  	testAllocator := colmem.NewAllocator(ctx, &acc, coldataext.NewExtendedColumnFactory(&evalCtx))
   128  	columnarizers := make([]colexecbase.Operator, len(args.inputs))
   129  	for i, input := range inputsColOp {
   130  		c, err := colexec.NewColumnarizer(ctx, testAllocator, flowCtx, int32(i)+1, input)
   131  		if err != nil {
   132  			return err
   133  		}
   134  		columnarizers[i] = c
   135  	}
   136  
   137  	constructorArgs := colexec.NewColOperatorArgs{
   138  		Spec:                 args.pspec,
   139  		Inputs:               columnarizers,
   140  		StreamingMemAccount:  &acc,
   141  		ProcessorConstructor: rowexec.NewProcessor,
   142  		DiskQueueCfg:         colcontainer.DiskQueueCfg{FS: tempFS},
   143  		FDSemaphore:          colexecbase.NewTestingSemaphore(256),
   144  	}
   145  	var spilled bool
   146  	if args.forceDiskSpill {
   147  		constructorArgs.TestingKnobs.SpillingCallbackFn = func() { spilled = true }
   148  	}
   149  	constructorArgs.TestingKnobs.NumForcedRepartitions = args.numForcedRepartitions
   150  	result, err := colexec.NewColOperator(ctx, flowCtx, constructorArgs)
   151  	if err != nil {
   152  		return err
   153  	}
   154  	defer func() {
   155  		for _, memAccount := range result.OpAccounts {
   156  			memAccount.Close(ctx)
   157  		}
   158  		for _, memMonitor := range result.OpMonitors {
   159  			memMonitor.Stop(ctx)
   160  		}
   161  	}()
   162  
   163  	outColOp, err := colexec.NewMaterializer(
   164  		flowCtx,
   165  		int32(len(args.inputs))+2,
   166  		result.Op,
   167  		args.outputTypes,
   168  		nil, /* output */
   169  		result.MetadataSources,
   170  		nil, /* toClose */
   171  		nil, /* outputStatsToTrace */
   172  		nil, /* cancelFlow */
   173  	)
   174  	if err != nil {
   175  		return err
   176  	}
   177  
   178  	outProc.Start(ctx)
   179  	outColOp.Start(ctx)
   180  	defer outProc.ConsumerClosed()
   181  	defer outColOp.ConsumerClosed()
   182  
   183  	printRowForChecking := func(r sqlbase.EncDatumRow) []string {
   184  		res := make([]string, len(args.outputTypes))
   185  		for i, col := range r {
   186  			res[i] = col.String(args.outputTypes[i])
   187  		}
   188  		return res
   189  	}
   190  	var procRows, colOpRows [][]string
   191  	var procMetas, colOpMetas []execinfrapb.ProducerMetadata
   192  	for {
   193  		rowProc, metaProc := outProc.Next()
   194  		if rowProc != nil {
   195  			procRows = append(procRows, printRowForChecking(rowProc))
   196  		}
   197  		if metaProc != nil {
   198  			if metaProc.Err == nil {
   199  				return errors.Errorf("unexpectedly processor returned non-error "+
   200  					"meta\n%+v", metaProc)
   201  			}
   202  			procMetas = append(procMetas, *metaProc)
   203  		}
   204  		rowColOp, metaColOp := outColOp.Next()
   205  		if rowColOp != nil {
   206  			colOpRows = append(colOpRows, printRowForChecking(rowColOp))
   207  		}
   208  		if metaColOp != nil {
   209  			if metaColOp.Err == nil {
   210  				return errors.Errorf("unexpectedly columnar operator returned "+
   211  					"non-error meta\n%+v", metaColOp)
   212  			}
   213  			colOpMetas = append(colOpMetas, *metaColOp)
   214  		}
   215  
   216  		if rowProc == nil && metaProc == nil &&
   217  			rowColOp == nil && metaColOp == nil {
   218  			break
   219  		}
   220  	}
   221  
   222  	if len(procMetas) != len(colOpMetas) {
   223  		return errors.Errorf("different number of metas returned:\n"+
   224  			"processor returned\n%+v\n\ncolumnar operator returned\n%+v",
   225  			procMetas, colOpMetas)
   226  	}
   227  	// It is possible that a query will hit an error (for example, integer out of
   228  	// range). We then expect that both the processor and the operator returned
   229  	// such error.
   230  	if len(procMetas) > 1 {
   231  		return errors.Errorf("unexpectedly multiple metas returned:\n"+
   232  			"processor returned\n%+v\n\ncolumnar operator returned\n%+v",
   233  			procMetas, colOpMetas)
   234  	} else if len(procMetas) == 1 {
   235  		procErr := procMetas[0].Err.Error()
   236  		colOpErr := colOpMetas[0].Err.Error()
   237  		if procErr != colOpErr {
   238  			return errors.Errorf("different errors returned:\n"+
   239  				"processor return\n%+v\ncolumnar operator returned\n%+v",
   240  				procMetas[0].Err, colOpMetas[0].Err)
   241  		}
   242  		// The errors are the same, so the rows that were returned do not matter.
   243  		return nil
   244  	}
   245  
   246  	if len(procRows) != len(colOpRows) {
   247  		return errors.Errorf("different number of rows returned:\n"+
   248  			"processor returned\n%+v\n\ncolumnar operator returned\n%+v\n"+
   249  			"processor metas\n%+v\ncolumnar operator metas\n%+v\n",
   250  			procRows, colOpRows, procMetas, colOpMetas)
   251  	}
   252  
   253  	printRowsOutput := func(rows [][]string) string {
   254  		res := ""
   255  		for i, row := range rows {
   256  			res = fmt.Sprintf("%s\n%d: %v", res, i, row)
   257  		}
   258  		return res
   259  	}
   260  
   261  	datumsMatch := func(expected, actual string, typ *types.T) (bool, error) {
   262  		switch typ.Family() {
   263  		case types.FloatFamily:
   264  			// Some operations on floats (for example, aggregation) can produce
   265  			// slightly different results in the row-by-row and vectorized engines.
   266  			// That's why we handle them separately.
   267  
   268  			// We first try direct string matching. If that succeeds, then great!
   269  			if expected == actual {
   270  				return true, nil
   271  			}
   272  			// If only one of the values is NULL, then the datums do not match.
   273  			if expected == `NULL` || actual == `NULL` {
   274  				return false, nil
   275  			}
   276  			// Now we will try parsing both strings as floats and check whether they
   277  			// are within allowed precision from each other.
   278  			expFloat, err := strconv.ParseFloat(expected, 64)
   279  			if err != nil {
   280  				return false, err
   281  			}
   282  			actualFloat, err := strconv.ParseFloat(actual, 64)
   283  			if err != nil {
   284  				return false, err
   285  			}
   286  			return math.Abs(expFloat-actualFloat) < floatPrecision, nil
   287  		default:
   288  			return expected == actual, nil
   289  		}
   290  	}
   291  
   292  	colIdxsToCheckForEquality := args.colIdxsToCheckForEquality
   293  	if len(colIdxsToCheckForEquality) == 0 {
   294  		colIdxsToCheckForEquality = make([]int, len(args.outputTypes))
   295  		for i := range colIdxsToCheckForEquality {
   296  			colIdxsToCheckForEquality[i] = i
   297  		}
   298  	}
   299  	if args.anyOrder {
   300  		used := make([]bool, len(colOpRows))
   301  		for i, expStrRow := range procRows {
   302  			rowMatched := false
   303  			for j, retStrRow := range colOpRows {
   304  				if used[j] {
   305  					continue
   306  				}
   307  				foundDifference := false
   308  				for _, colIdx := range colIdxsToCheckForEquality {
   309  					match, err := datumsMatch(expStrRow[colIdx], retStrRow[colIdx], args.outputTypes[colIdx])
   310  					if err != nil {
   311  						return errors.Errorf("error while parsing datum in rows\n%v\n%v\n%s",
   312  							expStrRow, retStrRow, err.Error())
   313  					}
   314  					if !match {
   315  						foundDifference = true
   316  						break
   317  					}
   318  				}
   319  				if !foundDifference {
   320  					rowMatched = true
   321  					used[j] = true
   322  					break
   323  				}
   324  			}
   325  			if !rowMatched {
   326  				return errors.Errorf("different results: no match found for row %d of processor output\n"+
   327  					"processor output:%s\n\ncolumnar operator output:%s",
   328  					i, printRowsOutput(procRows), printRowsOutput(colOpRows))
   329  			}
   330  		}
   331  	} else {
   332  		for i, expStrRow := range procRows {
   333  			retStrRow := colOpRows[i]
   334  			// anyOrder is false, so the result rows must match in the same order.
   335  			for _, colIdx := range colIdxsToCheckForEquality {
   336  				match, err := datumsMatch(expStrRow[colIdx], retStrRow[colIdx], args.outputTypes[colIdx])
   337  				if err != nil {
   338  					return errors.Errorf("error while parsing datum in rows\n%v\n%v\n%s",
   339  						expStrRow, retStrRow, err.Error())
   340  				}
   341  				if !match {
   342  					return errors.Errorf(
   343  						"different results on row %d;\nexpected:\n%s\ngot:\n%s",
   344  						i, expStrRow, retStrRow,
   345  					)
   346  				}
   347  			}
   348  		}
   349  	}
   350  
   351  	if args.forceDiskSpill {
   352  		// Check that the spilling did occur.
   353  		if !spilled && !args.forcedDiskSpillMightNotOccur {
   354  			return errors.Errorf("expected spilling to disk but it did *not* occur")
   355  		}
   356  	}
   357  	return nil
   358  }