github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/distsql/columnar_utils_test.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package distsql 12 13 import ( 14 "context" 15 "fmt" 16 "math" 17 "math/rand" 18 "strconv" 19 20 "github.com/cockroachdb/cockroach/pkg/base" 21 "github.com/cockroachdb/cockroach/pkg/col/coldata" 22 "github.com/cockroachdb/cockroach/pkg/col/coldataext" 23 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 24 "github.com/cockroachdb/cockroach/pkg/sql/colcontainer" 25 "github.com/cockroachdb/cockroach/pkg/sql/colexec" 26 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase" 27 "github.com/cockroachdb/cockroach/pkg/sql/colmem" 28 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 29 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 30 "github.com/cockroachdb/cockroach/pkg/sql/rowexec" 31 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 32 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 33 "github.com/cockroachdb/cockroach/pkg/sql/types" 34 "github.com/cockroachdb/cockroach/pkg/storage" 35 "github.com/cockroachdb/cockroach/pkg/util/randutil" 36 "github.com/cockroachdb/errors" 37 ) 38 39 type verifyColOperatorArgs struct { 40 // anyOrder determines whether the results should be matched in order (when 41 // anyOrder is false) or as sets (when anyOrder is true). 42 anyOrder bool 43 // colIdxsToCheckForEquality determines which columns of the rows to use 44 // for equality check. If left unset, full rows are compared. Use this 45 // with caution and leave a comment that justifies using this knob. 46 colIdxsToCheckForEquality []int 47 inputTypes [][]*types.T 48 inputs []sqlbase.EncDatumRows 49 outputTypes []*types.T 50 pspec *execinfrapb.ProcessorSpec 51 // forceDiskSpill, if set, will force the operator to spill to disk. 52 forceDiskSpill bool 53 // forcedDiskSpillMightNotOccur determines whether we error out if 54 // forceDiskSpill is true but the spilling doesn't occur. Please leave an 55 // explanation for why that could be the case. 56 forcedDiskSpillMightNotOccur bool 57 // numForcedRepartitions specifies a number of "repartitions" that a 58 // disk-backed operator should be forced to perform. "Repartition" can mean 59 // different things depending on the operator (for example, for hash joiner 60 // it is dividing original partition into multiple new partitions; for sorter 61 // it is merging already created partitions into new one before proceeding 62 // to the next partition from the input). 63 numForcedRepartitions int 64 // rng (if set) will be used to randomize batch size. 65 rng *rand.Rand 66 } 67 68 // verifyColOperator passes inputs through both the processor defined by pspec 69 // and the corresponding columnar operator and verifies that the results match. 70 func verifyColOperator(args verifyColOperatorArgs) error { 71 const floatPrecision = 0.0000001 72 rng := args.rng 73 if rng == nil { 74 rng, _ = randutil.NewPseudoRand() 75 } 76 if rng.Float64() < 0.5 { 77 randomBatchSize := 1 + rng.Intn(3) 78 fmt.Printf("coldata.BatchSize() is set to %d\n", randomBatchSize) 79 if err := coldata.SetBatchSizeForTests(randomBatchSize); err != nil { 80 return err 81 } 82 } 83 84 ctx := context.Background() 85 st := cluster.MakeTestingClusterSettings() 86 tempEngine, tempFS, err := storage.NewTempEngine(ctx, storage.DefaultStorageEngine, base.DefaultTestTempStorageConfig(st), base.DefaultTestStoreSpec) 87 if err != nil { 88 return err 89 } 90 defer tempEngine.Close() 91 92 evalCtx := tree.MakeTestingEvalContext(st) 93 defer evalCtx.Stop(ctx) 94 diskMonitor := execinfra.NewTestDiskMonitor(ctx, st) 95 defer diskMonitor.Stop(ctx) 96 flowCtx := &execinfra.FlowCtx{ 97 EvalCtx: &evalCtx, 98 Cfg: &execinfra.ServerConfig{ 99 Settings: st, 100 TempStorage: tempEngine, 101 DiskMonitor: diskMonitor, 102 }, 103 } 104 flowCtx.Cfg.TestingKnobs.ForceDiskSpill = args.forceDiskSpill 105 106 inputsProc := make([]execinfra.RowSource, len(args.inputs)) 107 inputsColOp := make([]execinfra.RowSource, len(args.inputs)) 108 for i, input := range args.inputs { 109 inputsProc[i] = execinfra.NewRepeatableRowSource(args.inputTypes[i], input) 110 inputsColOp[i] = execinfra.NewRepeatableRowSource(args.inputTypes[i], input) 111 } 112 113 proc, err := rowexec.NewProcessor( 114 ctx, flowCtx, 0, &args.pspec.Core, &args.pspec.Post, 115 inputsProc, []execinfra.RowReceiver{nil}, nil, 116 ) 117 if err != nil { 118 return err 119 } 120 outProc, ok := proc.(execinfra.RowSource) 121 if !ok { 122 return errors.New("processor is unexpectedly not a RowSource") 123 } 124 125 acc := evalCtx.Mon.MakeBoundAccount() 126 defer acc.Close(ctx) 127 testAllocator := colmem.NewAllocator(ctx, &acc, coldataext.NewExtendedColumnFactory(&evalCtx)) 128 columnarizers := make([]colexecbase.Operator, len(args.inputs)) 129 for i, input := range inputsColOp { 130 c, err := colexec.NewColumnarizer(ctx, testAllocator, flowCtx, int32(i)+1, input) 131 if err != nil { 132 return err 133 } 134 columnarizers[i] = c 135 } 136 137 constructorArgs := colexec.NewColOperatorArgs{ 138 Spec: args.pspec, 139 Inputs: columnarizers, 140 StreamingMemAccount: &acc, 141 ProcessorConstructor: rowexec.NewProcessor, 142 DiskQueueCfg: colcontainer.DiskQueueCfg{FS: tempFS}, 143 FDSemaphore: colexecbase.NewTestingSemaphore(256), 144 } 145 var spilled bool 146 if args.forceDiskSpill { 147 constructorArgs.TestingKnobs.SpillingCallbackFn = func() { spilled = true } 148 } 149 constructorArgs.TestingKnobs.NumForcedRepartitions = args.numForcedRepartitions 150 result, err := colexec.NewColOperator(ctx, flowCtx, constructorArgs) 151 if err != nil { 152 return err 153 } 154 defer func() { 155 for _, memAccount := range result.OpAccounts { 156 memAccount.Close(ctx) 157 } 158 for _, memMonitor := range result.OpMonitors { 159 memMonitor.Stop(ctx) 160 } 161 }() 162 163 outColOp, err := colexec.NewMaterializer( 164 flowCtx, 165 int32(len(args.inputs))+2, 166 result.Op, 167 args.outputTypes, 168 nil, /* output */ 169 result.MetadataSources, 170 nil, /* toClose */ 171 nil, /* outputStatsToTrace */ 172 nil, /* cancelFlow */ 173 ) 174 if err != nil { 175 return err 176 } 177 178 outProc.Start(ctx) 179 outColOp.Start(ctx) 180 defer outProc.ConsumerClosed() 181 defer outColOp.ConsumerClosed() 182 183 printRowForChecking := func(r sqlbase.EncDatumRow) []string { 184 res := make([]string, len(args.outputTypes)) 185 for i, col := range r { 186 res[i] = col.String(args.outputTypes[i]) 187 } 188 return res 189 } 190 var procRows, colOpRows [][]string 191 var procMetas, colOpMetas []execinfrapb.ProducerMetadata 192 for { 193 rowProc, metaProc := outProc.Next() 194 if rowProc != nil { 195 procRows = append(procRows, printRowForChecking(rowProc)) 196 } 197 if metaProc != nil { 198 if metaProc.Err == nil { 199 return errors.Errorf("unexpectedly processor returned non-error "+ 200 "meta\n%+v", metaProc) 201 } 202 procMetas = append(procMetas, *metaProc) 203 } 204 rowColOp, metaColOp := outColOp.Next() 205 if rowColOp != nil { 206 colOpRows = append(colOpRows, printRowForChecking(rowColOp)) 207 } 208 if metaColOp != nil { 209 if metaColOp.Err == nil { 210 return errors.Errorf("unexpectedly columnar operator returned "+ 211 "non-error meta\n%+v", metaColOp) 212 } 213 colOpMetas = append(colOpMetas, *metaColOp) 214 } 215 216 if rowProc == nil && metaProc == nil && 217 rowColOp == nil && metaColOp == nil { 218 break 219 } 220 } 221 222 if len(procMetas) != len(colOpMetas) { 223 return errors.Errorf("different number of metas returned:\n"+ 224 "processor returned\n%+v\n\ncolumnar operator returned\n%+v", 225 procMetas, colOpMetas) 226 } 227 // It is possible that a query will hit an error (for example, integer out of 228 // range). We then expect that both the processor and the operator returned 229 // such error. 230 if len(procMetas) > 1 { 231 return errors.Errorf("unexpectedly multiple metas returned:\n"+ 232 "processor returned\n%+v\n\ncolumnar operator returned\n%+v", 233 procMetas, colOpMetas) 234 } else if len(procMetas) == 1 { 235 procErr := procMetas[0].Err.Error() 236 colOpErr := colOpMetas[0].Err.Error() 237 if procErr != colOpErr { 238 return errors.Errorf("different errors returned:\n"+ 239 "processor return\n%+v\ncolumnar operator returned\n%+v", 240 procMetas[0].Err, colOpMetas[0].Err) 241 } 242 // The errors are the same, so the rows that were returned do not matter. 243 return nil 244 } 245 246 if len(procRows) != len(colOpRows) { 247 return errors.Errorf("different number of rows returned:\n"+ 248 "processor returned\n%+v\n\ncolumnar operator returned\n%+v\n"+ 249 "processor metas\n%+v\ncolumnar operator metas\n%+v\n", 250 procRows, colOpRows, procMetas, colOpMetas) 251 } 252 253 printRowsOutput := func(rows [][]string) string { 254 res := "" 255 for i, row := range rows { 256 res = fmt.Sprintf("%s\n%d: %v", res, i, row) 257 } 258 return res 259 } 260 261 datumsMatch := func(expected, actual string, typ *types.T) (bool, error) { 262 switch typ.Family() { 263 case types.FloatFamily: 264 // Some operations on floats (for example, aggregation) can produce 265 // slightly different results in the row-by-row and vectorized engines. 266 // That's why we handle them separately. 267 268 // We first try direct string matching. If that succeeds, then great! 269 if expected == actual { 270 return true, nil 271 } 272 // If only one of the values is NULL, then the datums do not match. 273 if expected == `NULL` || actual == `NULL` { 274 return false, nil 275 } 276 // Now we will try parsing both strings as floats and check whether they 277 // are within allowed precision from each other. 278 expFloat, err := strconv.ParseFloat(expected, 64) 279 if err != nil { 280 return false, err 281 } 282 actualFloat, err := strconv.ParseFloat(actual, 64) 283 if err != nil { 284 return false, err 285 } 286 return math.Abs(expFloat-actualFloat) < floatPrecision, nil 287 default: 288 return expected == actual, nil 289 } 290 } 291 292 colIdxsToCheckForEquality := args.colIdxsToCheckForEquality 293 if len(colIdxsToCheckForEquality) == 0 { 294 colIdxsToCheckForEquality = make([]int, len(args.outputTypes)) 295 for i := range colIdxsToCheckForEquality { 296 colIdxsToCheckForEquality[i] = i 297 } 298 } 299 if args.anyOrder { 300 used := make([]bool, len(colOpRows)) 301 for i, expStrRow := range procRows { 302 rowMatched := false 303 for j, retStrRow := range colOpRows { 304 if used[j] { 305 continue 306 } 307 foundDifference := false 308 for _, colIdx := range colIdxsToCheckForEquality { 309 match, err := datumsMatch(expStrRow[colIdx], retStrRow[colIdx], args.outputTypes[colIdx]) 310 if err != nil { 311 return errors.Errorf("error while parsing datum in rows\n%v\n%v\n%s", 312 expStrRow, retStrRow, err.Error()) 313 } 314 if !match { 315 foundDifference = true 316 break 317 } 318 } 319 if !foundDifference { 320 rowMatched = true 321 used[j] = true 322 break 323 } 324 } 325 if !rowMatched { 326 return errors.Errorf("different results: no match found for row %d of processor output\n"+ 327 "processor output:%s\n\ncolumnar operator output:%s", 328 i, printRowsOutput(procRows), printRowsOutput(colOpRows)) 329 } 330 } 331 } else { 332 for i, expStrRow := range procRows { 333 retStrRow := colOpRows[i] 334 // anyOrder is false, so the result rows must match in the same order. 335 for _, colIdx := range colIdxsToCheckForEquality { 336 match, err := datumsMatch(expStrRow[colIdx], retStrRow[colIdx], args.outputTypes[colIdx]) 337 if err != nil { 338 return errors.Errorf("error while parsing datum in rows\n%v\n%v\n%s", 339 expStrRow, retStrRow, err.Error()) 340 } 341 if !match { 342 return errors.Errorf( 343 "different results on row %d;\nexpected:\n%s\ngot:\n%s", 344 i, expStrRow, retStrRow, 345 ) 346 } 347 } 348 } 349 } 350 351 if args.forceDiskSpill { 352 // Check that the spilling did occur. 353 if !spilled && !args.forcedDiskSpillMightNotOccur { 354 return errors.Errorf("expected spilling to disk but it did *not* occur") 355 } 356 } 357 return nil 358 }