github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/interlock/benchmark_test.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package interlock 15 16 import ( 17 "context" 18 "encoding/base64" 19 "fmt" 20 "math/rand" 21 "sort" 22 "strings" 23 "sync" 24 "testing" 25 "time" 26 27 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 28 "github.com/whtcorpsinc/BerolinaSQL/ast" 29 "github.com/whtcorpsinc/log" 30 "github.com/whtcorpsinc/milevadb/causet/embedded" 31 "github.com/whtcorpsinc/milevadb/causet/property" 32 "github.com/whtcorpsinc/milevadb/causet/soliton" 33 "github.com/whtcorpsinc/milevadb/memex" 34 "github.com/whtcorpsinc/milevadb/memex/aggregation" 35 "github.com/whtcorpsinc/milevadb/soliton/chunk" 36 "github.com/whtcorpsinc/milevadb/soliton/disk" 37 "github.com/whtcorpsinc/milevadb/soliton/memory" 38 "github.com/whtcorpsinc/milevadb/soliton/mock" 39 "github.com/whtcorpsinc/milevadb/soliton/stringutil" 40 "github.com/whtcorpsinc/milevadb/stochastikctx" 41 "github.com/whtcorpsinc/milevadb/stochastikctx/variable" 42 "github.com/whtcorpsinc/milevadb/types" 43 "go.uber.org/zap/zapembedded" 44 ) 45 46 var ( 47 _ InterlockingDirectorate = &mockDataSource{} 48 _ embedded.PhysicalCauset = &mockDataPhysicalCauset{} 49 wideString = strings.Repeat("x", 5*1024) 50 ) 51 52 type mockDataSourceParameters struct { 53 schemaReplicant *memex.Schema 54 genDataFunc func(event int, typ *types.FieldType) interface{} 55 ndvs []int // number of distinct values on defCausumns[i] and zero represents no limit 56 orders []bool // defCausumns[i] should be ordered if orders[i] is true 57 rows int // number of rows the DataSource should output 58 ctx stochastikctx.Context 59 } 60 61 type mockDataSource struct { 62 baseInterlockingDirectorate 63 p mockDataSourceParameters 64 genData []*chunk.Chunk 65 chunks []*chunk.Chunk 66 chunkPtr int 67 } 68 69 type mockDataPhysicalCauset struct { 70 MockPhysicalCauset 71 schemaReplicant *memex.Schema 72 exec InterlockingDirectorate 73 } 74 75 func (mp *mockDataPhysicalCauset) GetInterlockingDirectorate() InterlockingDirectorate { 76 return mp.exec 77 } 78 79 func (mp *mockDataPhysicalCauset) Schema() *memex.Schema { 80 return mp.schemaReplicant 81 } 82 83 func (mp *mockDataPhysicalCauset) ExplainID() fmt.Stringer { 84 return stringutil.MemoizeStr(func() string { 85 return "mockData_0" 86 }) 87 } 88 89 func (mp *mockDataPhysicalCauset) Stats() *property.StatsInfo { 90 return nil 91 } 92 93 func (mp *mockDataPhysicalCauset) SelectBlockOffset() int { 94 return 0 95 } 96 97 func (mds *mockDataSource) genDefCausCausets(defCaus int) (results []interface{}) { 98 typ := mds.retFieldTypes[defCaus] 99 order := false 100 if defCaus < len(mds.p.orders) { 101 order = mds.p.orders[defCaus] 102 } 103 rows := mds.p.rows 104 NDV := 0 105 if defCaus < len(mds.p.ndvs) { 106 NDV = mds.p.ndvs[defCaus] 107 } 108 results = make([]interface{}, 0, rows) 109 if NDV == 0 { 110 if mds.p.genDataFunc == nil { 111 for i := 0; i < rows; i++ { 112 results = append(results, mds.randCauset(typ)) 113 } 114 } else { 115 for i := 0; i < rows; i++ { 116 results = append(results, mds.p.genDataFunc(i, typ)) 117 } 118 } 119 } else { 120 datumSet := make(map[string]bool, NDV) 121 datums := make([]interface{}, 0, NDV) 122 for len(datums) < NDV { 123 d := mds.randCauset(typ) 124 str := fmt.Sprintf("%v", d) 125 if datumSet[str] { 126 continue 127 } 128 datumSet[str] = true 129 datums = append(datums, d) 130 } 131 132 for i := 0; i < rows; i++ { 133 results = append(results, datums[rand.Intn(NDV)]) 134 } 135 } 136 137 if order { 138 sort.Slice(results, func(i, j int) bool { 139 switch typ.Tp { 140 case allegrosql.TypeLong, allegrosql.TypeLonglong: 141 return results[i].(int64) < results[j].(int64) 142 case allegrosql.TypeDouble: 143 return results[i].(float64) < results[j].(float64) 144 case allegrosql.TypeVarString: 145 return results[i].(string) < results[j].(string) 146 default: 147 panic("not implement") 148 } 149 }) 150 } 151 152 return 153 } 154 155 func (mds *mockDataSource) randCauset(typ *types.FieldType) interface{} { 156 switch typ.Tp { 157 case allegrosql.TypeLong, allegrosql.TypeLonglong: 158 return int64(rand.Int()) 159 case allegrosql.TypeDouble, allegrosql.TypeFloat: 160 return rand.Float64() 161 case allegrosql.TypeNewDecimal: 162 var d types.MyDecimal 163 return d.FromInt(int64(rand.Int())) 164 case allegrosql.TypeVarString: 165 buff := make([]byte, 10) 166 rand.Read(buff) 167 return base64.RawURLEncoding.EncodeToString(buff) 168 default: 169 panic("not implement") 170 } 171 } 172 173 func (mds *mockDataSource) prepareChunks() { 174 mds.chunks = make([]*chunk.Chunk, len(mds.genData)) 175 for i := range mds.chunks { 176 mds.chunks[i] = mds.genData[i].CopyConstruct() 177 } 178 mds.chunkPtr = 0 179 } 180 181 func (mds *mockDataSource) Next(ctx context.Context, req *chunk.Chunk) error { 182 if mds.chunkPtr >= len(mds.chunks) { 183 req.Reset() 184 return nil 185 } 186 dataChk := mds.chunks[mds.chunkPtr] 187 dataChk.SwapDeferredCausets(req) 188 mds.chunkPtr++ 189 return nil 190 } 191 192 func buildMockDataSource(opt mockDataSourceParameters) *mockDataSource { 193 baseInterDirc := newBaseInterlockingDirectorate(opt.ctx, opt.schemaReplicant, 0) 194 m := &mockDataSource{baseInterDirc, opt, nil, nil, 0} 195 rTypes := retTypes(m) 196 defCausData := make([][]interface{}, len(rTypes)) 197 for i := 0; i < len(rTypes); i++ { 198 defCausData[i] = m.genDefCausCausets(i) 199 } 200 201 m.genData = make([]*chunk.Chunk, (m.p.rows+m.maxChunkSize-1)/m.maxChunkSize) 202 for i := range m.genData { 203 m.genData[i] = chunk.NewChunkWithCapacity(retTypes(m), m.maxChunkSize) 204 } 205 206 for i := 0; i < m.p.rows; i++ { 207 idx := i / m.maxChunkSize 208 retTypes := retTypes(m) 209 for defCausIdx := 0; defCausIdx < len(rTypes); defCausIdx++ { 210 switch retTypes[defCausIdx].Tp { 211 case allegrosql.TypeLong, allegrosql.TypeLonglong: 212 m.genData[idx].AppendInt64(defCausIdx, defCausData[defCausIdx][i].(int64)) 213 case allegrosql.TypeDouble, allegrosql.TypeFloat: 214 m.genData[idx].AppendFloat64(defCausIdx, defCausData[defCausIdx][i].(float64)) 215 case allegrosql.TypeNewDecimal: 216 m.genData[idx].AppendMyDecimal(defCausIdx, defCausData[defCausIdx][i].(*types.MyDecimal)) 217 case allegrosql.TypeVarString: 218 m.genData[idx].AppendString(defCausIdx, defCausData[defCausIdx][i].(string)) 219 default: 220 panic("not implement") 221 } 222 } 223 } 224 return m 225 } 226 227 func buildMockDataSourceWithIndex(opt mockDataSourceParameters, index []int) *mockDataSource { 228 opt.orders = make([]bool, len(opt.schemaReplicant.DeferredCausets)) 229 for _, idx := range index { 230 opt.orders[idx] = true 231 } 232 return buildMockDataSource(opt) 233 } 234 235 // aggTestCase has a fixed schemaReplicant (aggDefCaus Double, groupBy LongLong). 236 type aggTestCase struct { 237 execType string // "hash" or "stream" 238 aggFunc string // sum, avg, count .... 239 groupByNDV int // the number of distinct group-by keys 240 hasDistinct bool 241 rows int 242 concurrency int 243 ctx stochastikctx.Context 244 } 245 246 func (a aggTestCase) defCausumns() []*memex.DeferredCauset { 247 return []*memex.DeferredCauset{ 248 {Index: 0, RetType: types.NewFieldType(allegrosql.TypeDouble)}, 249 {Index: 1, RetType: types.NewFieldType(allegrosql.TypeLonglong)}, 250 } 251 } 252 253 func (a aggTestCase) String() string { 254 return fmt.Sprintf("(execType:%v, aggFunc:%v, ndv:%v, hasDistinct:%v, rows:%v, concurrency:%v)", 255 a.execType, a.aggFunc, a.groupByNDV, a.hasDistinct, a.rows, a.concurrency) 256 } 257 258 func defaultAggTestCase(exec string) *aggTestCase { 259 ctx := mock.NewContext() 260 ctx.GetStochastikVars().InitChunkSize = variable.DefInitChunkSize 261 ctx.GetStochastikVars().MaxChunkSize = variable.DefMaxChunkSize 262 return &aggTestCase{exec, ast.AggFuncSum, 1000, false, 10000000, 4, ctx} 263 } 264 265 func buildHashAggInterlockingDirectorate(ctx stochastikctx.Context, src InterlockingDirectorate, schemaReplicant *memex.Schema, 266 aggFuncs []*aggregation.AggFuncDesc, groupItems []memex.Expression) InterlockingDirectorate { 267 plan := new(embedded.PhysicalHashAgg) 268 plan.AggFuncs = aggFuncs 269 plan.GroupByItems = groupItems 270 plan.SetSchema(schemaReplicant) 271 plan.Init(ctx, nil, 0) 272 plan.SetChildren(nil) 273 b := newInterlockingDirectorateBuilder(ctx, nil) 274 exec := b.build(plan) 275 hashAgg := exec.(*HashAggInterDirc) 276 hashAgg.children[0] = src 277 return exec 278 } 279 280 func buildStreamAggInterlockingDirectorate(ctx stochastikctx.Context, src InterlockingDirectorate, schemaReplicant *memex.Schema, 281 aggFuncs []*aggregation.AggFuncDesc, groupItems []memex.Expression) InterlockingDirectorate { 282 plan := new(embedded.PhysicalStreamAgg) 283 plan.AggFuncs = aggFuncs 284 plan.GroupByItems = groupItems 285 plan.SetSchema(schemaReplicant) 286 plan.Init(ctx, nil, 0) 287 plan.SetChildren(nil) 288 b := newInterlockingDirectorateBuilder(ctx, nil) 289 exec := b.build(plan) 290 streamAgg := exec.(*StreamAggInterDirc) 291 streamAgg.children[0] = src 292 return exec 293 } 294 295 func builPosetDaggInterlockingDirectorate(b *testing.B, testCase *aggTestCase, child InterlockingDirectorate) InterlockingDirectorate { 296 ctx := testCase.ctx 297 if err := ctx.GetStochastikVars().SetSystemVar(variable.MilevaDBHashAggFinalConcurrency, fmt.Sprintf("%v", testCase.concurrency)); err != nil { 298 b.Fatal(err) 299 } 300 if err := ctx.GetStochastikVars().SetSystemVar(variable.MilevaDBHashAggPartialConcurrency, fmt.Sprintf("%v", testCase.concurrency)); err != nil { 301 b.Fatal(err) 302 } 303 304 childDefCauss := testCase.defCausumns() 305 schemaReplicant := memex.NewSchema(childDefCauss...) 306 groupBy := []memex.Expression{childDefCauss[1]} 307 aggFunc, err := aggregation.NewAggFuncDesc(testCase.ctx, testCase.aggFunc, []memex.Expression{childDefCauss[0]}, testCase.hasDistinct) 308 if err != nil { 309 b.Fatal(err) 310 } 311 aggFuncs := []*aggregation.AggFuncDesc{aggFunc} 312 313 var aggInterDirc InterlockingDirectorate 314 switch testCase.execType { 315 case "hash": 316 aggInterDirc = buildHashAggInterlockingDirectorate(testCase.ctx, child, schemaReplicant, aggFuncs, groupBy) 317 case "stream": 318 aggInterDirc = buildStreamAggInterlockingDirectorate(testCase.ctx, child, schemaReplicant, aggFuncs, groupBy) 319 default: 320 b.Fatal("not implement") 321 } 322 return aggInterDirc 323 } 324 325 func benchmarkAggInterDircWithCase(b *testing.B, casTest *aggTestCase) { 326 defcaus := casTest.defCausumns() 327 orders := []bool{false, casTest.execType == "stream"} 328 dataSource := buildMockDataSource(mockDataSourceParameters{ 329 schemaReplicant: memex.NewSchema(defcaus...), 330 ndvs: []int{0, casTest.groupByNDV}, 331 orders: orders, 332 rows: casTest.rows, 333 ctx: casTest.ctx, 334 }) 335 336 b.ResetTimer() 337 for i := 0; i < b.N; i++ { 338 b.StopTimer() // prepare a new agg-interlock 339 aggInterDirc := builPosetDaggInterlockingDirectorate(b, casTest, dataSource) 340 tmpCtx := context.Background() 341 chk := newFirstChunk(aggInterDirc) 342 dataSource.prepareChunks() 343 344 b.StartTimer() 345 if err := aggInterDirc.Open(tmpCtx); err != nil { 346 b.Fatal(err) 347 } 348 for { 349 if err := aggInterDirc.Next(tmpCtx, chk); err != nil { 350 b.Fatal(b) 351 } 352 if chk.NumEvents() == 0 { 353 break 354 } 355 } 356 357 if err := aggInterDirc.Close(); err != nil { 358 b.Fatal(err) 359 } 360 b.StopTimer() 361 } 362 } 363 364 func BenchmarkAggEvents(b *testing.B) { 365 rows := []int{100000, 1000000, 10000000} 366 concurrencies := []int{1, 4, 8, 15, 20, 30, 40} 367 for _, event := range rows { 368 for _, con := range concurrencies { 369 for _, exec := range []string{"hash", "stream"} { 370 if exec == "stream" && con > 1 { 371 continue 372 } 373 cas := defaultAggTestCase(exec) 374 cas.rows = event 375 cas.concurrency = con 376 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 377 benchmarkAggInterDircWithCase(b, cas) 378 }) 379 } 380 } 381 } 382 } 383 384 func BenchmarkAggGroupByNDV(b *testing.B) { 385 NDVs := []int{10, 100, 1000, 10000, 100000, 1000000, 10000000} 386 for _, NDV := range NDVs { 387 for _, exec := range []string{"hash", "stream"} { 388 cas := defaultAggTestCase(exec) 389 cas.groupByNDV = NDV 390 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 391 benchmarkAggInterDircWithCase(b, cas) 392 }) 393 } 394 } 395 } 396 397 func BenchmarkAggConcurrency(b *testing.B) { 398 concs := []int{1, 4, 8, 15, 20, 30, 40} 399 for _, con := range concs { 400 for _, exec := range []string{"hash", "stream"} { 401 if exec == "stream" && con > 1 { 402 continue 403 } 404 cas := defaultAggTestCase(exec) 405 cas.concurrency = con 406 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 407 benchmarkAggInterDircWithCase(b, cas) 408 }) 409 } 410 } 411 } 412 413 func BenchmarkAggDistinct(b *testing.B) { 414 rows := []int{100000, 1000000, 10000000} 415 distincts := []bool{false, true} 416 for _, event := range rows { 417 for _, exec := range []string{"hash", "stream"} { 418 for _, distinct := range distincts { 419 cas := defaultAggTestCase(exec) 420 cas.rows = event 421 cas.hasDistinct = distinct 422 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 423 benchmarkAggInterDircWithCase(b, cas) 424 }) 425 } 426 } 427 } 428 } 429 430 func buildWindowInterlockingDirectorate(ctx stochastikctx.Context, windowFunc string, funcs int, frame *embedded.WindowFrame, srcInterDirc InterlockingDirectorate, schemaReplicant *memex.Schema, partitionBy []*memex.DeferredCauset, concurrency int, dataSourceSorted bool) InterlockingDirectorate { 431 src := &mockDataPhysicalCauset{ 432 schemaReplicant: srcInterDirc.Schema(), 433 exec: srcInterDirc, 434 } 435 436 win := new(embedded.PhysicalWindow) 437 win.WindowFuncDescs = make([]*aggregation.WindowFuncDesc, 0) 438 winSchema := schemaReplicant.Clone() 439 for i := 0; i < funcs; i++ { 440 var args []memex.Expression 441 switch windowFunc { 442 case ast.WindowFuncNtile: 443 args = append(args, &memex.Constant{Value: types.NewUintCauset(2)}) 444 case ast.WindowFuncNthValue: 445 args = append(args, partitionBy[0], &memex.Constant{Value: types.NewUintCauset(2)}) 446 case ast.AggFuncSum: 447 args = append(args, src.Schema().DeferredCausets[0]) 448 case ast.AggFuncAvg: 449 args = append(args, src.Schema().DeferredCausets[0]) 450 case ast.AggFuncBitXor: 451 args = append(args, src.Schema().DeferredCausets[0]) 452 case ast.AggFuncMax, ast.AggFuncMin: 453 args = append(args, src.Schema().DeferredCausets[0]) 454 default: 455 args = append(args, partitionBy[0]) 456 } 457 desc, _ := aggregation.NewWindowFuncDesc(ctx, windowFunc, args) 458 459 win.WindowFuncDescs = append(win.WindowFuncDescs, desc) 460 winSchema.Append(&memex.DeferredCauset{ 461 UniqueID: 10 + (int64)(i), 462 RetType: types.NewFieldType(allegrosql.TypeLonglong), 463 }) 464 } 465 for _, defCaus := range partitionBy { 466 win.PartitionBy = append(win.PartitionBy, property.Item{DefCaus: defCaus}) 467 } 468 win.Frame = frame 469 win.OrderBy = nil 470 471 win.SetSchema(winSchema) 472 win.Init(ctx, nil, 0) 473 474 var tail embedded.PhysicalCauset = win 475 if !dataSourceSorted { 476 byItems := make([]*soliton.ByItems, 0, len(partitionBy)) 477 for _, defCaus := range partitionBy { 478 byItems = append(byItems, &soliton.ByItems{Expr: defCaus, Desc: false}) 479 } 480 sort := &embedded.PhysicalSort{ByItems: byItems} 481 sort.SetChildren(src) 482 win.SetChildren(sort) 483 tail = sort 484 } else { 485 win.SetChildren(src) 486 } 487 488 var plan embedded.PhysicalCauset 489 if concurrency > 1 { 490 byItems := make([]memex.Expression, 0, len(win.PartitionBy)) 491 for _, item := range win.PartitionBy { 492 byItems = append(byItems, item.DefCaus) 493 } 494 495 plan = embedded.PhysicalShuffle{ 496 Concurrency: concurrency, 497 Tail: tail, 498 DataSource: src, 499 SplitterType: embedded.PartitionHashSplitterType, 500 HashByItems: byItems, 501 }.Init(ctx, nil, 0) 502 plan.SetChildren(win) 503 } else { 504 plan = win 505 } 506 507 b := newInterlockingDirectorateBuilder(ctx, nil) 508 exec := b.build(plan) 509 return exec 510 } 511 512 // windowTestCase has a fixed schemaReplicant (defCaus Double, partitionBy LongLong, rawData VarString(16), defCaus LongLong). 513 type windowTestCase struct { 514 windowFunc string 515 numFunc int // The number of windowFuncs. Default: 1. 516 frame *embedded.WindowFrame 517 ndv int // the number of distinct group-by keys 518 rows int 519 concurrency int 520 dataSourceSorted bool 521 ctx stochastikctx.Context 522 rawDataSmall string 523 defCausumns []*memex.DeferredCauset // the defCausumns of mock schemaReplicant 524 } 525 526 func (a windowTestCase) String() string { 527 return fmt.Sprintf("(func:%v, aggDefCausType:%s, numFunc:%v, ndv:%v, rows:%v, sorted:%v, concurrency:%v)", 528 a.windowFunc, a.defCausumns[0].RetType, a.numFunc, a.ndv, a.rows, a.dataSourceSorted, a.concurrency) 529 } 530 531 func defaultWindowTestCase() *windowTestCase { 532 ctx := mock.NewContext() 533 ctx.GetStochastikVars().InitChunkSize = variable.DefInitChunkSize 534 ctx.GetStochastikVars().MaxChunkSize = variable.DefMaxChunkSize 535 return &windowTestCase{ast.WindowFuncEventNumber, 1, nil, 1000, 10000000, 1, true, ctx, strings.Repeat("x", 16), 536 []*memex.DeferredCauset{ 537 {Index: 0, RetType: types.NewFieldType(allegrosql.TypeDouble)}, 538 {Index: 1, RetType: types.NewFieldType(allegrosql.TypeLonglong)}, 539 {Index: 2, RetType: types.NewFieldType(allegrosql.TypeVarString)}, 540 {Index: 3, RetType: types.NewFieldType(allegrosql.TypeLonglong)}, 541 }} 542 } 543 544 func benchmarkWindowInterDircWithCase(b *testing.B, casTest *windowTestCase) { 545 ctx := casTest.ctx 546 if err := ctx.GetStochastikVars().SetSystemVar(variable.MilevaDBWindowConcurrency, fmt.Sprintf("%v", casTest.concurrency)); err != nil { 547 b.Fatal(err) 548 } 549 550 defcaus := casTest.defCausumns 551 dataSource := buildMockDataSource(mockDataSourceParameters{ 552 schemaReplicant: memex.NewSchema(defcaus...), 553 ndvs: []int{0, casTest.ndv, 0, 0}, 554 orders: []bool{false, casTest.dataSourceSorted, false, false}, 555 rows: casTest.rows, 556 ctx: casTest.ctx, 557 }) 558 559 b.ResetTimer() 560 for i := 0; i < b.N; i++ { 561 b.StopTimer() // prepare a new window-interlock 562 childDefCauss := casTest.defCausumns 563 schemaReplicant := memex.NewSchema(childDefCauss...) 564 windowInterDirc := buildWindowInterlockingDirectorate(casTest.ctx, casTest.windowFunc, casTest.numFunc, casTest.frame, dataSource, schemaReplicant, childDefCauss[1:2], casTest.concurrency, casTest.dataSourceSorted) 565 tmpCtx := context.Background() 566 chk := newFirstChunk(windowInterDirc) 567 dataSource.prepareChunks() 568 569 b.StartTimer() 570 if err := windowInterDirc.Open(tmpCtx); err != nil { 571 b.Fatal(err) 572 } 573 for { 574 if err := windowInterDirc.Next(tmpCtx, chk); err != nil { 575 b.Fatal(b) 576 } 577 if chk.NumEvents() == 0 { 578 break 579 } 580 } 581 582 if err := windowInterDirc.Close(); err != nil { 583 b.Fatal(err) 584 } 585 b.StopTimer() 586 } 587 } 588 589 func BenchmarkWindowEvents(b *testing.B) { 590 b.ReportAllocs() 591 rows := []int{1000, 100000} 592 ndvs := []int{10, 1000} 593 concs := []int{1, 2, 4} 594 for _, event := range rows { 595 for _, ndv := range ndvs { 596 for _, con := range concs { 597 cas := defaultWindowTestCase() 598 cas.rows = event 599 cas.ndv = ndv 600 cas.concurrency = con 601 cas.dataSourceSorted = false 602 cas.windowFunc = ast.WindowFuncEventNumber // cheapest 603 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 604 benchmarkWindowInterDircWithCase(b, cas) 605 }) 606 } 607 } 608 } 609 } 610 611 func BenchmarkWindowFunctions(b *testing.B) { 612 b.ReportAllocs() 613 windowFuncs := []string{ 614 ast.WindowFuncEventNumber, 615 ast.WindowFuncRank, 616 ast.WindowFuncDenseRank, 617 ast.WindowFuncCumeDist, 618 ast.WindowFuncPercentRank, 619 ast.WindowFuncNtile, 620 ast.WindowFuncLead, 621 ast.WindowFuncLag, 622 ast.WindowFuncFirstValue, 623 ast.WindowFuncLastValue, 624 ast.WindowFuncNthValue, 625 } 626 concs := []int{1, 4} 627 for _, windowFunc := range windowFuncs { 628 for _, con := range concs { 629 cas := defaultWindowTestCase() 630 cas.rows = 100000 631 cas.ndv = 1000 632 cas.concurrency = con 633 cas.dataSourceSorted = false 634 cas.windowFunc = windowFunc 635 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 636 benchmarkWindowInterDircWithCase(b, cas) 637 }) 638 } 639 } 640 } 641 642 func BenchmarkWindowFunctionsWithFrame(b *testing.B) { 643 b.ReportAllocs() 644 windowFuncs := []string{ 645 ast.WindowFuncEventNumber, 646 ast.AggFuncBitXor, 647 } 648 numFuncs := []int{1, 5} 649 frames := []*embedded.WindowFrame{ 650 {Type: ast.Events, Start: &embedded.FrameBound{UnBounded: true}, End: &embedded.FrameBound{Type: ast.CurrentEvent}}, 651 } 652 sortTypes := []bool{false, true} 653 concs := []int{1, 2, 3, 4, 5, 6} 654 for i, windowFunc := range windowFuncs { 655 for _, sorted := range sortTypes { 656 for _, numFunc := range numFuncs { 657 for _, con := range concs { 658 cas := defaultWindowTestCase() 659 cas.rows = 100000 660 cas.ndv = 1000 661 cas.concurrency = con 662 cas.dataSourceSorted = sorted 663 cas.windowFunc = windowFunc 664 cas.numFunc = numFunc 665 if i < len(frames) { 666 cas.frame = frames[i] 667 } 668 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 669 benchmarkWindowInterDircWithCase(b, cas) 670 }) 671 } 672 } 673 } 674 } 675 } 676 677 func BenchmarkWindowFunctionsAggWindowProcessorAboutFrame(b *testing.B) { 678 b.ReportAllocs() 679 windowFunc := ast.AggFuncMax 680 frame := &embedded.WindowFrame{Type: ast.Events, Start: &embedded.FrameBound{UnBounded: true}, End: &embedded.FrameBound{UnBounded: true}} 681 cas := defaultWindowTestCase() 682 cas.rows = 10000 683 cas.ndv = 10 684 cas.concurrency = 1 685 cas.dataSourceSorted = false 686 cas.windowFunc = windowFunc 687 cas.numFunc = 1 688 cas.frame = frame 689 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 690 benchmarkWindowInterDircWithCase(b, cas) 691 }) 692 } 693 694 func baseBenchmarkWindowFunctionsWithSlidingWindow(b *testing.B, frameType ast.FrameType) { 695 b.ReportAllocs() 696 windowFuncs := []struct { 697 aggFunc string 698 aggDefCausTypes byte 699 }{ 700 {ast.AggFuncSum, allegrosql.TypeFloat}, 701 {ast.AggFuncSum, allegrosql.TypeNewDecimal}, 702 {ast.AggFuncCount, allegrosql.TypeLong}, 703 {ast.AggFuncAvg, allegrosql.TypeFloat}, 704 {ast.AggFuncAvg, allegrosql.TypeNewDecimal}, 705 {ast.AggFuncBitXor, allegrosql.TypeLong}, 706 {ast.AggFuncMax, allegrosql.TypeLong}, 707 {ast.AggFuncMax, allegrosql.TypeFloat}, 708 {ast.AggFuncMin, allegrosql.TypeLong}, 709 {ast.AggFuncMin, allegrosql.TypeFloat}, 710 } 711 event := 100000 712 ndv := 100 713 frame := &embedded.WindowFrame{ 714 Type: frameType, 715 Start: &embedded.FrameBound{Type: ast.Preceding, Num: 10}, 716 End: &embedded.FrameBound{Type: ast.Following, Num: 10}, 717 } 718 for _, windowFunc := range windowFuncs { 719 cas := defaultWindowTestCase() 720 cas.ctx.GetStochastikVars().WindowingUseHighPrecision = false 721 cas.rows = event 722 cas.ndv = ndv 723 cas.windowFunc = windowFunc.aggFunc 724 cas.frame = frame 725 cas.defCausumns[0].RetType.Tp = windowFunc.aggDefCausTypes 726 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 727 benchmarkWindowInterDircWithCase(b, cas) 728 }) 729 } 730 } 731 732 func BenchmarkWindowFunctionsWithSlidingWindow(b *testing.B) { 733 baseBenchmarkWindowFunctionsWithSlidingWindow(b, ast.Events) 734 baseBenchmarkWindowFunctionsWithSlidingWindow(b, ast.Ranges) 735 } 736 737 type hashJoinTestCase struct { 738 rows int 739 defcaus []*types.FieldType 740 concurrency int 741 ctx stochastikctx.Context 742 keyIdx []int 743 joinType embedded.JoinType 744 disk bool 745 useOuterToBuild bool 746 rawData string 747 childrenUsedSchema [][]bool 748 } 749 750 func (tc hashJoinTestCase) defCausumns() []*memex.DeferredCauset { 751 ret := make([]*memex.DeferredCauset, 0) 752 for i, t := range tc.defcaus { 753 defCausumn := &memex.DeferredCauset{Index: i, RetType: t, UniqueID: tc.ctx.GetStochastikVars().AllocCausetDeferredCausetID()} 754 ret = append(ret, defCausumn) 755 } 756 return ret 757 } 758 759 func (tc hashJoinTestCase) String() string { 760 return fmt.Sprintf("(rows:%v, defcaus:%v, concurency:%v, joinKeyIdx: %v, disk:%v)", 761 tc.rows, tc.defcaus, tc.concurrency, tc.keyIdx, tc.disk) 762 } 763 764 func defaultHashJoinTestCase(defcaus []*types.FieldType, joinType embedded.JoinType, useOuterToBuild bool) *hashJoinTestCase { 765 ctx := mock.NewContext() 766 ctx.GetStochastikVars().InitChunkSize = variable.DefInitChunkSize 767 ctx.GetStochastikVars().MaxChunkSize = variable.DefMaxChunkSize 768 ctx.GetStochastikVars().StmtCtx.MemTracker = memory.NewTracker(-1, -1) 769 ctx.GetStochastikVars().StmtCtx.DiskTracker = disk.NewTracker(-1, -1) 770 ctx.GetStochastikVars().SetIndexLookupJoinConcurrency(4) 771 tc := &hashJoinTestCase{rows: 100000, concurrency: 4, ctx: ctx, keyIdx: []int{0, 1}, rawData: wideString} 772 tc.defcaus = defcaus 773 tc.useOuterToBuild = useOuterToBuild 774 tc.joinType = joinType 775 return tc 776 } 777 778 func prepare4HashJoin(testCase *hashJoinTestCase, innerInterDirc, outerInterDirc InterlockingDirectorate) *HashJoinInterDirc { 779 if testCase.useOuterToBuild { 780 innerInterDirc, outerInterDirc = outerInterDirc, innerInterDirc 781 } 782 defcaus0 := innerInterDirc.Schema().DeferredCausets 783 defcaus1 := outerInterDirc.Schema().DeferredCausets 784 785 joinSchema := memex.NewSchema() 786 if testCase.childrenUsedSchema != nil { 787 for i, used := range testCase.childrenUsedSchema[0] { 788 if used { 789 joinSchema.Append(defcaus0[i]) 790 } 791 } 792 for i, used := range testCase.childrenUsedSchema[1] { 793 if used { 794 joinSchema.Append(defcaus1[i]) 795 } 796 } 797 } else { 798 joinSchema.Append(defcaus0...) 799 joinSchema.Append(defcaus1...) 800 } 801 802 joinKeys := make([]*memex.DeferredCauset, 0, len(testCase.keyIdx)) 803 for _, keyIdx := range testCase.keyIdx { 804 joinKeys = append(joinKeys, defcaus0[keyIdx]) 805 } 806 probeKeys := make([]*memex.DeferredCauset, 0, len(testCase.keyIdx)) 807 for _, keyIdx := range testCase.keyIdx { 808 probeKeys = append(probeKeys, defcaus1[keyIdx]) 809 } 810 e := &HashJoinInterDirc{ 811 baseInterlockingDirectorate: newBaseInterlockingDirectorate(testCase.ctx, joinSchema, 5, innerInterDirc, outerInterDirc), 812 concurrency: uint(testCase.concurrency), 813 joinType: testCase.joinType, // 0 for InnerJoin, 1 for LeftOutersJoin, 2 for RightOuterJoin 814 isOuterJoin: false, 815 buildKeys: joinKeys, 816 probeKeys: probeKeys, 817 buildSideInterDirc: innerInterDirc, 818 probeSideInterDirc: outerInterDirc, 819 buildSideEstCount: float64(testCase.rows), 820 useOuterToBuild: testCase.useOuterToBuild, 821 } 822 823 childrenUsedSchema := markChildrenUsedDefCauss(e.Schema(), e.children[0].Schema(), e.children[1].Schema()) 824 defaultValues := make([]types.Causet, e.buildSideInterDirc.Schema().Len()) 825 lhsTypes, rhsTypes := retTypes(innerInterDirc), retTypes(outerInterDirc) 826 e.joiners = make([]joiner, e.concurrency) 827 for i := uint(0); i < e.concurrency; i++ { 828 e.joiners[i] = newJoiner(testCase.ctx, e.joinType, true, defaultValues, 829 nil, lhsTypes, rhsTypes, childrenUsedSchema) 830 } 831 memLimit := int64(-1) 832 if testCase.disk { 833 memLimit = 1 834 } 835 t := memory.NewTracker(-1, memLimit) 836 t.SetSuperCowOrNoCausetOnExceed(nil) 837 t2 := disk.NewTracker(-1, -1) 838 e.ctx.GetStochastikVars().StmtCtx.MemTracker = t 839 e.ctx.GetStochastikVars().StmtCtx.DiskTracker = t2 840 return e 841 } 842 843 func benchmarkHashJoinInterDircWithCase(b *testing.B, casTest *hashJoinTestCase) { 844 opt1 := mockDataSourceParameters{ 845 rows: casTest.rows, 846 ctx: casTest.ctx, 847 genDataFunc: func(event int, typ *types.FieldType) interface{} { 848 switch typ.Tp { 849 case allegrosql.TypeLong, allegrosql.TypeLonglong: 850 return int64(event) 851 case allegrosql.TypeVarString: 852 return casTest.rawData 853 case allegrosql.TypeDouble: 854 return float64(event) 855 default: 856 panic("not implement") 857 } 858 }, 859 } 860 opt2 := opt1 861 opt1.schemaReplicant = memex.NewSchema(casTest.defCausumns()...) 862 opt2.schemaReplicant = memex.NewSchema(casTest.defCausumns()...) 863 dataSource1 := buildMockDataSource(opt1) 864 dataSource2 := buildMockDataSource(opt2) 865 // Test spill result. 866 benchmarkHashJoinInterDirc(b, casTest, dataSource1, dataSource2, true) 867 b.ResetTimer() 868 for i := 0; i < b.N; i++ { 869 benchmarkHashJoinInterDirc(b, casTest, dataSource1, dataSource2, false) 870 } 871 } 872 873 func benchmarkHashJoinInterDirc(b *testing.B, casTest *hashJoinTestCase, opt1, opt2 *mockDataSource, testResult bool) { 874 b.StopTimer() 875 exec := prepare4HashJoin(casTest, opt1, opt2) 876 tmpCtx := context.Background() 877 chk := newFirstChunk(exec) 878 opt1.prepareChunks() 879 opt2.prepareChunks() 880 881 totalEvent := 0 882 b.StartTimer() 883 if err := exec.Open(tmpCtx); err != nil { 884 b.Fatal(err) 885 } 886 for { 887 if err := exec.Next(tmpCtx, chk); err != nil { 888 b.Fatal(err) 889 } 890 if chk.NumEvents() == 0 { 891 break 892 } 893 totalEvent += chk.NumEvents() 894 } 895 896 if testResult { 897 time.Sleep(200 * time.Millisecond) 898 if spilled := exec.rowContainer.alreadySpilledSafeForTest(); spilled != casTest.disk { 899 b.Fatal("wrong usage with disk:", spilled, casTest.disk) 900 } 901 } 902 903 if err := exec.Close(); err != nil { 904 b.Fatal(err) 905 } 906 b.StopTimer() 907 if totalEvent == 0 { 908 b.Fatal("totalEvent == 0") 909 } 910 } 911 912 func BenchmarkHashJoinInlineProjection(b *testing.B) { 913 defcaus := []*types.FieldType{ 914 types.NewFieldType(allegrosql.TypeLonglong), 915 types.NewFieldType(allegrosql.TypeVarString), 916 } 917 918 b.ReportAllocs() 919 920 { 921 cas := defaultHashJoinTestCase(defcaus, 0, false) 922 cas.keyIdx = []int{0} 923 cas.childrenUsedSchema = [][]bool{ 924 {false, true}, 925 {false, false}, 926 } 927 b.Run("InlineProjection:ON", func(b *testing.B) { 928 benchmarkHashJoinInterDircWithCase(b, cas) 929 }) 930 } 931 932 { 933 cas := defaultHashJoinTestCase(defcaus, 0, false) 934 cas.keyIdx = []int{0} 935 b.Run("InlineProjection:OFF", func(b *testing.B) { 936 benchmarkHashJoinInterDircWithCase(b, cas) 937 }) 938 } 939 } 940 941 func BenchmarkHashJoinInterDirc(b *testing.B) { 942 lvl := log.GetLevel() 943 log.SetLevel(zapembedded.ErrorLevel) 944 defer log.SetLevel(lvl) 945 946 defcaus := []*types.FieldType{ 947 types.NewFieldType(allegrosql.TypeLonglong), 948 types.NewFieldType(allegrosql.TypeVarString), 949 } 950 951 b.ReportAllocs() 952 cas := defaultHashJoinTestCase(defcaus, 0, false) 953 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 954 benchmarkHashJoinInterDircWithCase(b, cas) 955 }) 956 957 cas.keyIdx = []int{0} 958 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 959 benchmarkHashJoinInterDircWithCase(b, cas) 960 }) 961 962 cas.keyIdx = []int{0} 963 cas.disk = true 964 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 965 benchmarkHashJoinInterDircWithCase(b, cas) 966 }) 967 968 // Replace the wide string defCausumn with double defCausumn 969 defcaus = []*types.FieldType{ 970 types.NewFieldType(allegrosql.TypeLonglong), 971 types.NewFieldType(allegrosql.TypeDouble), 972 } 973 974 cas = defaultHashJoinTestCase(defcaus, 0, false) 975 cas.keyIdx = []int{0} 976 cas.rows = 5 977 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 978 benchmarkHashJoinInterDircWithCase(b, cas) 979 }) 980 981 cas = defaultHashJoinTestCase(defcaus, 0, false) 982 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 983 benchmarkHashJoinInterDircWithCase(b, cas) 984 }) 985 986 cas.keyIdx = []int{0} 987 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 988 benchmarkHashJoinInterDircWithCase(b, cas) 989 }) 990 } 991 992 func BenchmarkOuterHashJoinInterDirc(b *testing.B) { 993 lvl := log.GetLevel() 994 log.SetLevel(zapembedded.ErrorLevel) 995 defer log.SetLevel(lvl) 996 997 defcaus := []*types.FieldType{ 998 types.NewFieldType(allegrosql.TypeLonglong), 999 types.NewFieldType(allegrosql.TypeVarString), 1000 } 1001 1002 b.ReportAllocs() 1003 cas := defaultHashJoinTestCase(defcaus, 2, true) 1004 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 1005 benchmarkHashJoinInterDircWithCase(b, cas) 1006 }) 1007 1008 cas.keyIdx = []int{0} 1009 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 1010 benchmarkHashJoinInterDircWithCase(b, cas) 1011 }) 1012 1013 cas.keyIdx = []int{0} 1014 cas.disk = true 1015 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 1016 benchmarkHashJoinInterDircWithCase(b, cas) 1017 }) 1018 1019 // Replace the wide string defCausumn with double defCausumn 1020 defcaus = []*types.FieldType{ 1021 types.NewFieldType(allegrosql.TypeLonglong), 1022 types.NewFieldType(allegrosql.TypeDouble), 1023 } 1024 1025 cas = defaultHashJoinTestCase(defcaus, 2, true) 1026 cas.keyIdx = []int{0} 1027 cas.rows = 5 1028 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 1029 benchmarkHashJoinInterDircWithCase(b, cas) 1030 }) 1031 1032 cas = defaultHashJoinTestCase(defcaus, 2, true) 1033 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 1034 benchmarkHashJoinInterDircWithCase(b, cas) 1035 }) 1036 1037 cas.keyIdx = []int{0} 1038 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 1039 benchmarkHashJoinInterDircWithCase(b, cas) 1040 }) 1041 } 1042 1043 func benchmarkBuildHashBlockForList(b *testing.B, casTest *hashJoinTestCase) { 1044 opt := mockDataSourceParameters{ 1045 schemaReplicant: memex.NewSchema(casTest.defCausumns()...), 1046 rows: casTest.rows, 1047 ctx: casTest.ctx, 1048 genDataFunc: func(event int, typ *types.FieldType) interface{} { 1049 switch typ.Tp { 1050 case allegrosql.TypeLong, allegrosql.TypeLonglong: 1051 return int64(event) 1052 case allegrosql.TypeVarString: 1053 return casTest.rawData 1054 default: 1055 panic("not implement") 1056 } 1057 }, 1058 } 1059 dataSource1 := buildMockDataSource(opt) 1060 dataSource2 := buildMockDataSource(opt) 1061 1062 dataSource1.prepareChunks() 1063 benchmarkBuildHashBlock(b, casTest, dataSource1, dataSource2, true) 1064 b.ResetTimer() 1065 for i := 0; i < b.N; i++ { 1066 benchmarkBuildHashBlock(b, casTest, dataSource1, dataSource2, false) 1067 } 1068 } 1069 1070 func benchmarkBuildHashBlock(b *testing.B, casTest *hashJoinTestCase, dataSource1, dataSource2 *mockDataSource, testResult bool) { 1071 b.StopTimer() 1072 exec := prepare4HashJoin(casTest, dataSource1, dataSource2) 1073 tmpCtx := context.Background() 1074 if err := exec.Open(tmpCtx); err != nil { 1075 b.Fatal(err) 1076 } 1077 exec.prepared = true 1078 1079 innerResultCh := make(chan *chunk.Chunk, len(dataSource1.chunks)) 1080 for _, chk := range dataSource1.chunks { 1081 innerResultCh <- chk 1082 } 1083 close(innerResultCh) 1084 1085 b.StartTimer() 1086 if err := exec.buildHashBlockForList(innerResultCh); err != nil { 1087 b.Fatal(err) 1088 } 1089 1090 if testResult { 1091 time.Sleep(200 * time.Millisecond) 1092 if exec.rowContainer.alreadySpilledSafeForTest() != casTest.disk { 1093 b.Fatal("wrong usage with disk") 1094 } 1095 } 1096 1097 if err := exec.Close(); err != nil { 1098 b.Fatal(err) 1099 } 1100 b.StopTimer() 1101 } 1102 1103 func BenchmarkBuildHashBlockForList(b *testing.B) { 1104 lvl := log.GetLevel() 1105 log.SetLevel(zapembedded.ErrorLevel) 1106 defer log.SetLevel(lvl) 1107 1108 defcaus := []*types.FieldType{ 1109 types.NewFieldType(allegrosql.TypeLonglong), 1110 types.NewFieldType(allegrosql.TypeVarString), 1111 } 1112 1113 b.ReportAllocs() 1114 cas := defaultHashJoinTestCase(defcaus, 0, false) 1115 rows := []int{10, 100000} 1116 keyIdxs := [][]int{{0, 1}, {0}} 1117 disks := []bool{false, true} 1118 for _, event := range rows { 1119 for _, keyIdx := range keyIdxs { 1120 for _, disk := range disks { 1121 cas.rows = event 1122 cas.keyIdx = keyIdx 1123 cas.disk = disk 1124 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 1125 benchmarkBuildHashBlockForList(b, cas) 1126 }) 1127 } 1128 } 1129 } 1130 } 1131 1132 type indexJoinTestCase struct { 1133 outerEvents int 1134 innerEvents int 1135 concurrency int 1136 ctx stochastikctx.Context 1137 outerJoinKeyIdx []int 1138 innerJoinKeyIdx []int 1139 innerIdx []int 1140 needOuterSort bool 1141 rawData string 1142 } 1143 1144 func (tc indexJoinTestCase) defCausumns() []*memex.DeferredCauset { 1145 return []*memex.DeferredCauset{ 1146 {Index: 0, RetType: types.NewFieldType(allegrosql.TypeLonglong)}, 1147 {Index: 1, RetType: types.NewFieldType(allegrosql.TypeDouble)}, 1148 {Index: 2, RetType: types.NewFieldType(allegrosql.TypeVarString)}, 1149 } 1150 } 1151 1152 func defaultIndexJoinTestCase() *indexJoinTestCase { 1153 ctx := mock.NewContext() 1154 ctx.GetStochastikVars().InitChunkSize = variable.DefInitChunkSize 1155 ctx.GetStochastikVars().MaxChunkSize = variable.DefMaxChunkSize 1156 ctx.GetStochastikVars().SnapshotTS = 1 1157 ctx.GetStochastikVars().StmtCtx.MemTracker = memory.NewTracker(-1, -1) 1158 ctx.GetStochastikVars().StmtCtx.DiskTracker = disk.NewTracker(-1, -1) 1159 tc := &indexJoinTestCase{ 1160 outerEvents: 100000, 1161 innerEvents: variable.DefMaxChunkSize * 100, 1162 concurrency: 4, 1163 ctx: ctx, 1164 outerJoinKeyIdx: []int{0, 1}, 1165 innerJoinKeyIdx: []int{0, 1}, 1166 innerIdx: []int{0, 1}, 1167 rawData: wideString, 1168 } 1169 return tc 1170 } 1171 1172 func (tc indexJoinTestCase) String() string { 1173 return fmt.Sprintf("(outerEvents:%v, innerEvents:%v, concurency:%v, outerJoinKeyIdx: %v, innerJoinKeyIdx: %v, NeedOuterSort:%v)", 1174 tc.outerEvents, tc.innerEvents, tc.concurrency, tc.outerJoinKeyIdx, tc.innerJoinKeyIdx, tc.needOuterSort) 1175 } 1176 func (tc indexJoinTestCase) getMockDataSourceOptByEvents(rows int) mockDataSourceParameters { 1177 return mockDataSourceParameters{ 1178 schemaReplicant: memex.NewSchema(tc.defCausumns()...), 1179 rows: rows, 1180 ctx: tc.ctx, 1181 genDataFunc: func(event int, typ *types.FieldType) interface{} { 1182 switch typ.Tp { 1183 case allegrosql.TypeLong, allegrosql.TypeLonglong: 1184 return int64(event) 1185 case allegrosql.TypeDouble: 1186 return float64(event) 1187 case allegrosql.TypeVarString: 1188 return tc.rawData 1189 default: 1190 panic("not implement") 1191 } 1192 }, 1193 } 1194 } 1195 1196 func prepare4IndexInnerHashJoin(tc *indexJoinTestCase, outerDS *mockDataSource, innerDS *mockDataSource) InterlockingDirectorate { 1197 outerDefCauss, innerDefCauss := tc.defCausumns(), tc.defCausumns() 1198 joinSchema := memex.NewSchema(outerDefCauss...) 1199 joinSchema.Append(innerDefCauss...) 1200 leftTypes, rightTypes := retTypes(outerDS), retTypes(innerDS) 1201 defaultValues := make([]types.Causet, len(innerDefCauss)) 1202 defCausLens := make([]int, len(innerDefCauss)) 1203 for i := range defCausLens { 1204 defCausLens[i] = types.UnspecifiedLength 1205 } 1206 keyOff2IdxOff := make([]int, len(tc.outerJoinKeyIdx)) 1207 for i := range keyOff2IdxOff { 1208 keyOff2IdxOff[i] = i 1209 } 1210 e := &IndexLookUpJoin{ 1211 baseInterlockingDirectorate: newBaseInterlockingDirectorate(tc.ctx, joinSchema, 1, outerDS), 1212 outerCtx: outerCtx{ 1213 rowTypes: leftTypes, 1214 keyDefCauss: tc.outerJoinKeyIdx, 1215 }, 1216 innerCtx: innerCtx{ 1217 readerBuilder: &dataReaderBuilder{Causet: &mockPhysicalIndexReader{e: innerDS}, interlockBuilder: newInterlockingDirectorateBuilder(tc.ctx, nil)}, 1218 rowTypes: rightTypes, 1219 defCausLens: defCausLens, 1220 keyDefCauss: tc.innerJoinKeyIdx, 1221 }, 1222 workerWg: new(sync.WaitGroup), 1223 joiner: newJoiner(tc.ctx, 0, false, defaultValues, nil, leftTypes, rightTypes, nil), 1224 isOuterJoin: false, 1225 keyOff2IdxOff: keyOff2IdxOff, 1226 lastDefCausHelper: nil, 1227 } 1228 e.joinResult = newFirstChunk(e) 1229 return e 1230 } 1231 1232 func prepare4IndexOuterHashJoin(tc *indexJoinTestCase, outerDS *mockDataSource, innerDS *mockDataSource) InterlockingDirectorate { 1233 e := prepare4IndexInnerHashJoin(tc, outerDS, innerDS).(*IndexLookUpJoin) 1234 idxHash := &IndexNestedLoopHashJoin{IndexLookUpJoin: *e} 1235 concurrency := tc.concurrency 1236 idxHash.joiners = make([]joiner, concurrency) 1237 for i := 0; i < concurrency; i++ { 1238 idxHash.joiners[i] = e.joiner.Clone() 1239 } 1240 return idxHash 1241 } 1242 1243 func prepare4IndexMergeJoin(tc *indexJoinTestCase, outerDS *mockDataSource, innerDS *mockDataSource) InterlockingDirectorate { 1244 outerDefCauss, innerDefCauss := tc.defCausumns(), tc.defCausumns() 1245 joinSchema := memex.NewSchema(outerDefCauss...) 1246 joinSchema.Append(innerDefCauss...) 1247 outerJoinKeys := make([]*memex.DeferredCauset, 0, len(tc.outerJoinKeyIdx)) 1248 innerJoinKeys := make([]*memex.DeferredCauset, 0, len(tc.innerJoinKeyIdx)) 1249 for _, keyIdx := range tc.outerJoinKeyIdx { 1250 outerJoinKeys = append(outerJoinKeys, outerDefCauss[keyIdx]) 1251 } 1252 for _, keyIdx := range tc.innerJoinKeyIdx { 1253 innerJoinKeys = append(innerJoinKeys, innerDefCauss[keyIdx]) 1254 } 1255 leftTypes, rightTypes := retTypes(outerDS), retTypes(innerDS) 1256 defaultValues := make([]types.Causet, len(innerDefCauss)) 1257 defCausLens := make([]int, len(innerDefCauss)) 1258 for i := range defCausLens { 1259 defCausLens[i] = types.UnspecifiedLength 1260 } 1261 keyOff2IdxOff := make([]int, len(outerJoinKeys)) 1262 for i := range keyOff2IdxOff { 1263 keyOff2IdxOff[i] = i 1264 } 1265 1266 compareFuncs := make([]memex.CompareFunc, 0, len(outerJoinKeys)) 1267 outerCompareFuncs := make([]memex.CompareFunc, 0, len(outerJoinKeys)) 1268 for i := range outerJoinKeys { 1269 compareFuncs = append(compareFuncs, memex.GetCmpFunction(nil, outerJoinKeys[i], innerJoinKeys[i])) 1270 outerCompareFuncs = append(outerCompareFuncs, memex.GetCmpFunction(nil, outerJoinKeys[i], outerJoinKeys[i])) 1271 } 1272 e := &IndexLookUpMergeJoin{ 1273 baseInterlockingDirectorate: newBaseInterlockingDirectorate(tc.ctx, joinSchema, 2, outerDS), 1274 outerMergeCtx: outerMergeCtx{ 1275 rowTypes: leftTypes, 1276 keyDefCauss: tc.outerJoinKeyIdx, 1277 joinKeys: outerJoinKeys, 1278 needOuterSort: tc.needOuterSort, 1279 compareFuncs: outerCompareFuncs, 1280 }, 1281 innerMergeCtx: innerMergeCtx{ 1282 readerBuilder: &dataReaderBuilder{Causet: &mockPhysicalIndexReader{e: innerDS}, interlockBuilder: newInterlockingDirectorateBuilder(tc.ctx, nil)}, 1283 rowTypes: rightTypes, 1284 joinKeys: innerJoinKeys, 1285 defCausLens: defCausLens, 1286 keyDefCauss: tc.innerJoinKeyIdx, 1287 compareFuncs: compareFuncs, 1288 }, 1289 workerWg: new(sync.WaitGroup), 1290 isOuterJoin: false, 1291 keyOff2IdxOff: keyOff2IdxOff, 1292 lastDefCausHelper: nil, 1293 } 1294 concurrency := e.ctx.GetStochastikVars().IndexLookupJoinConcurrency() 1295 joiners := make([]joiner, concurrency) 1296 for i := 0; i < concurrency; i++ { 1297 joiners[i] = newJoiner(tc.ctx, 0, false, defaultValues, nil, leftTypes, rightTypes, nil) 1298 } 1299 e.joiners = joiners 1300 return e 1301 } 1302 1303 type indexJoinType int8 1304 1305 const ( 1306 indexInnerHashJoin indexJoinType = iota 1307 indexOuterHashJoin 1308 indexMergeJoin 1309 ) 1310 1311 func benchmarHoTTexJoinInterDircWithCase( 1312 b *testing.B, 1313 tc *indexJoinTestCase, 1314 outerDS *mockDataSource, 1315 innerDS *mockDataSource, 1316 execType indexJoinType, 1317 ) { 1318 b.ResetTimer() 1319 for i := 0; i < b.N; i++ { 1320 b.StopTimer() 1321 var exec InterlockingDirectorate 1322 switch execType { 1323 case indexInnerHashJoin: 1324 exec = prepare4IndexInnerHashJoin(tc, outerDS, innerDS) 1325 case indexOuterHashJoin: 1326 exec = prepare4IndexOuterHashJoin(tc, outerDS, innerDS) 1327 case indexMergeJoin: 1328 exec = prepare4IndexMergeJoin(tc, outerDS, innerDS) 1329 } 1330 1331 tmpCtx := context.Background() 1332 chk := newFirstChunk(exec) 1333 outerDS.prepareChunks() 1334 innerDS.prepareChunks() 1335 1336 b.StartTimer() 1337 if err := exec.Open(tmpCtx); err != nil { 1338 b.Fatal(err) 1339 } 1340 for { 1341 if err := exec.Next(tmpCtx, chk); err != nil { 1342 b.Fatal(err) 1343 } 1344 if chk.NumEvents() == 0 { 1345 break 1346 } 1347 } 1348 1349 if err := exec.Close(); err != nil { 1350 b.Fatal(err) 1351 } 1352 b.StopTimer() 1353 } 1354 } 1355 1356 func BenchmarHoTTexJoinInterDirc(b *testing.B) { 1357 lvl := log.GetLevel() 1358 log.SetLevel(zapembedded.ErrorLevel) 1359 defer log.SetLevel(lvl) 1360 1361 b.ReportAllocs() 1362 tc := defaultIndexJoinTestCase() 1363 outerOpt := tc.getMockDataSourceOptByEvents(tc.outerEvents) 1364 innerOpt := tc.getMockDataSourceOptByEvents(tc.innerEvents) 1365 outerDS := buildMockDataSourceWithIndex(outerOpt, tc.innerIdx) 1366 innerDS := buildMockDataSourceWithIndex(innerOpt, tc.innerIdx) 1367 1368 tc.needOuterSort = true 1369 b.Run(fmt.Sprintf("index merge join need outer sort %v", tc), func(b *testing.B) { 1370 benchmarHoTTexJoinInterDircWithCase(b, tc, outerDS, innerDS, indexMergeJoin) 1371 }) 1372 1373 tc.needOuterSort = false 1374 b.Run(fmt.Sprintf("index merge join %v", tc), func(b *testing.B) { 1375 benchmarHoTTexJoinInterDircWithCase(b, tc, outerDS, innerDS, indexMergeJoin) 1376 }) 1377 1378 b.Run(fmt.Sprintf("index inner hash join %v", tc), func(b *testing.B) { 1379 benchmarHoTTexJoinInterDircWithCase(b, tc, outerDS, innerDS, indexInnerHashJoin) 1380 }) 1381 1382 b.Run(fmt.Sprintf("index outer hash join %v", tc), func(b *testing.B) { 1383 benchmarHoTTexJoinInterDircWithCase(b, tc, outerDS, innerDS, indexOuterHashJoin) 1384 }) 1385 } 1386 1387 type mergeJoinTestCase struct { 1388 indexJoinTestCase 1389 childrenUsedSchema [][]bool 1390 } 1391 1392 func prepare4MergeJoin(tc *mergeJoinTestCase, leftInterDirc, rightInterDirc *mockDataSource) *MergeJoinInterDirc { 1393 outerDefCauss, innerDefCauss := tc.defCausumns(), tc.defCausumns() 1394 1395 joinSchema := memex.NewSchema() 1396 if tc.childrenUsedSchema != nil { 1397 for i, used := range tc.childrenUsedSchema[0] { 1398 if used { 1399 joinSchema.Append(outerDefCauss[i]) 1400 } 1401 } 1402 for i, used := range tc.childrenUsedSchema[1] { 1403 if used { 1404 joinSchema.Append(innerDefCauss[i]) 1405 } 1406 } 1407 } else { 1408 joinSchema.Append(outerDefCauss...) 1409 joinSchema.Append(innerDefCauss...) 1410 } 1411 1412 outerJoinKeys := make([]*memex.DeferredCauset, 0, len(tc.outerJoinKeyIdx)) 1413 innerJoinKeys := make([]*memex.DeferredCauset, 0, len(tc.innerJoinKeyIdx)) 1414 for _, keyIdx := range tc.outerJoinKeyIdx { 1415 outerJoinKeys = append(outerJoinKeys, outerDefCauss[keyIdx]) 1416 } 1417 for _, keyIdx := range tc.innerJoinKeyIdx { 1418 innerJoinKeys = append(innerJoinKeys, innerDefCauss[keyIdx]) 1419 } 1420 compareFuncs := make([]memex.CompareFunc, 0, len(outerJoinKeys)) 1421 outerCompareFuncs := make([]memex.CompareFunc, 0, len(outerJoinKeys)) 1422 for i := range outerJoinKeys { 1423 compareFuncs = append(compareFuncs, memex.GetCmpFunction(nil, outerJoinKeys[i], innerJoinKeys[i])) 1424 outerCompareFuncs = append(outerCompareFuncs, memex.GetCmpFunction(nil, outerJoinKeys[i], outerJoinKeys[i])) 1425 } 1426 1427 defaultValues := make([]types.Causet, len(innerDefCauss)) 1428 1429 // only benchmark inner join 1430 e := &MergeJoinInterDirc{ 1431 stmtCtx: tc.ctx.GetStochastikVars().StmtCtx, 1432 baseInterlockingDirectorate: newBaseInterlockingDirectorate(tc.ctx, joinSchema, 3, leftInterDirc, rightInterDirc), 1433 compareFuncs: compareFuncs, 1434 isOuterJoin: false, 1435 } 1436 1437 e.joiner = newJoiner( 1438 tc.ctx, 1439 0, 1440 false, 1441 defaultValues, 1442 nil, 1443 retTypes(leftInterDirc), 1444 retTypes(rightInterDirc), 1445 tc.childrenUsedSchema, 1446 ) 1447 1448 e.innerBlock = &mergeJoinBlock{ 1449 isInner: true, 1450 childIndex: 1, 1451 joinKeys: innerJoinKeys, 1452 } 1453 1454 e.outerBlock = &mergeJoinBlock{ 1455 childIndex: 0, 1456 filters: nil, 1457 joinKeys: outerJoinKeys, 1458 } 1459 1460 return e 1461 } 1462 1463 func defaultMergeJoinTestCase() *mergeJoinTestCase { 1464 return &mergeJoinTestCase{*defaultIndexJoinTestCase(), nil} 1465 } 1466 1467 func newMergeJoinBenchmark(numOuterEvents, numInnerDup, numInnerRedundant int) (tc *mergeJoinTestCase, innerDS, outerDS *mockDataSource) { 1468 ctx := mock.NewContext() 1469 ctx.GetStochastikVars().InitChunkSize = variable.DefInitChunkSize 1470 ctx.GetStochastikVars().MaxChunkSize = variable.DefMaxChunkSize 1471 ctx.GetStochastikVars().SnapshotTS = 1 1472 ctx.GetStochastikVars().StmtCtx.MemTracker = memory.NewTracker(-1, -1) 1473 ctx.GetStochastikVars().StmtCtx.DiskTracker = disk.NewTracker(-1, -1) 1474 1475 numInnerEvents := numOuterEvents*numInnerDup + numInnerRedundant 1476 itc := &indexJoinTestCase{ 1477 outerEvents: numOuterEvents, 1478 innerEvents: numInnerEvents, 1479 concurrency: 4, 1480 ctx: ctx, 1481 outerJoinKeyIdx: []int{0, 1}, 1482 innerJoinKeyIdx: []int{0, 1}, 1483 innerIdx: []int{0, 1}, 1484 rawData: wideString, 1485 } 1486 tc = &mergeJoinTestCase{*itc, nil} 1487 outerOpt := mockDataSourceParameters{ 1488 schemaReplicant: memex.NewSchema(tc.defCausumns()...), 1489 rows: numOuterEvents, 1490 ctx: tc.ctx, 1491 genDataFunc: func(event int, typ *types.FieldType) interface{} { 1492 switch typ.Tp { 1493 case allegrosql.TypeLong, allegrosql.TypeLonglong: 1494 return int64(event) 1495 case allegrosql.TypeDouble: 1496 return float64(event) 1497 case allegrosql.TypeVarString: 1498 return tc.rawData 1499 default: 1500 panic("not implement") 1501 } 1502 }, 1503 } 1504 1505 innerOpt := mockDataSourceParameters{ 1506 schemaReplicant: memex.NewSchema(tc.defCausumns()...), 1507 rows: numInnerEvents, 1508 ctx: tc.ctx, 1509 genDataFunc: func(event int, typ *types.FieldType) interface{} { 1510 event = event / numInnerDup 1511 switch typ.Tp { 1512 case allegrosql.TypeLong, allegrosql.TypeLonglong: 1513 return int64(event) 1514 case allegrosql.TypeDouble: 1515 return float64(event) 1516 case allegrosql.TypeVarString: 1517 return tc.rawData 1518 default: 1519 panic("not implement") 1520 } 1521 }, 1522 } 1523 1524 innerDS = buildMockDataSource(innerOpt) 1525 outerDS = buildMockDataSource(outerOpt) 1526 1527 return 1528 } 1529 1530 type mergeJoinType int8 1531 1532 const ( 1533 innerMergeJoin mergeJoinType = iota 1534 ) 1535 1536 func benchmarkMergeJoinInterDircWithCase(b *testing.B, tc *mergeJoinTestCase, innerDS, outerDS *mockDataSource, joinType mergeJoinType) { 1537 b.ResetTimer() 1538 for i := 0; i < b.N; i++ { 1539 b.StopTimer() 1540 var exec InterlockingDirectorate 1541 switch joinType { 1542 case innerMergeJoin: 1543 exec = prepare4MergeJoin(tc, innerDS, outerDS) 1544 } 1545 1546 tmpCtx := context.Background() 1547 chk := newFirstChunk(exec) 1548 outerDS.prepareChunks() 1549 innerDS.prepareChunks() 1550 1551 b.StartTimer() 1552 if err := exec.Open(tmpCtx); err != nil { 1553 b.Fatal(err) 1554 } 1555 for { 1556 if err := exec.Next(tmpCtx, chk); err != nil { 1557 b.Fatal(err) 1558 } 1559 if chk.NumEvents() == 0 { 1560 break 1561 } 1562 } 1563 1564 if err := exec.Close(); err != nil { 1565 b.Fatal(err) 1566 } 1567 b.StopTimer() 1568 } 1569 } 1570 1571 func BenchmarkMergeJoinInterDirc(b *testing.B) { 1572 lvl := log.GetLevel() 1573 log.SetLevel(zapembedded.ErrorLevel) 1574 defer log.SetLevel(lvl) 1575 b.ReportAllocs() 1576 1577 totalEvents := 300000 1578 1579 innerDupAndRedundant := [][]int{ 1580 {1, 0}, 1581 {100, 0}, 1582 {10000, 0}, 1583 {1, 30000}, 1584 } 1585 1586 childrenUsedSchemas := [][][]bool{ 1587 nil, 1588 { 1589 {true, false, false}, 1590 {false, true, false}, 1591 }, 1592 } 1593 1594 for _, params := range innerDupAndRedundant { 1595 numInnerDup, numInnerRedundant := params[0], params[1] 1596 for _, childrenUsedSchema := range childrenUsedSchemas { 1597 tc, innerDS, outerDS := newMergeJoinBenchmark(totalEvents/numInnerDup, numInnerDup, numInnerRedundant) 1598 inlineProj := false 1599 if childrenUsedSchema != nil { 1600 inlineProj = true 1601 tc.childrenUsedSchema = childrenUsedSchema 1602 } 1603 1604 b.Run(fmt.Sprintf("merge join %v InlineProj:%v", tc, inlineProj), func(b *testing.B) { 1605 benchmarkMergeJoinInterDircWithCase(b, tc, outerDS, innerDS, innerMergeJoin) 1606 }) 1607 } 1608 } 1609 } 1610 1611 type sortCase struct { 1612 rows int 1613 orderByIdx []int 1614 ndvs []int 1615 ctx stochastikctx.Context 1616 } 1617 1618 func (tc sortCase) defCausumns() []*memex.DeferredCauset { 1619 return []*memex.DeferredCauset{ 1620 {Index: 0, RetType: types.NewFieldType(allegrosql.TypeLonglong)}, 1621 {Index: 1, RetType: types.NewFieldType(allegrosql.TypeLonglong)}, 1622 } 1623 } 1624 1625 func (tc sortCase) String() string { 1626 return fmt.Sprintf("(rows:%v, orderBy:%v, ndvs: %v)", tc.rows, tc.orderByIdx, tc.ndvs) 1627 } 1628 1629 func defaultSortTestCase() *sortCase { 1630 ctx := mock.NewContext() 1631 ctx.GetStochastikVars().InitChunkSize = variable.DefInitChunkSize 1632 ctx.GetStochastikVars().MaxChunkSize = variable.DefMaxChunkSize 1633 ctx.GetStochastikVars().StmtCtx.MemTracker = memory.NewTracker(-1, -1) 1634 tc := &sortCase{rows: 300000, orderByIdx: []int{0, 1}, ndvs: []int{0, 0}, ctx: ctx} 1635 return tc 1636 } 1637 1638 func benchmarkSortInterDirc(b *testing.B, cas *sortCase) { 1639 opt := mockDataSourceParameters{ 1640 schemaReplicant: memex.NewSchema(cas.defCausumns()...), 1641 rows: cas.rows, 1642 ctx: cas.ctx, 1643 ndvs: cas.ndvs, 1644 } 1645 dataSource := buildMockDataSource(opt) 1646 exec := &SortInterDirc{ 1647 baseInterlockingDirectorate: newBaseInterlockingDirectorate(cas.ctx, dataSource.schemaReplicant, 4, dataSource), 1648 ByItems: make([]*soliton.ByItems, 0, len(cas.orderByIdx)), 1649 schemaReplicant: dataSource.schemaReplicant, 1650 } 1651 for _, idx := range cas.orderByIdx { 1652 exec.ByItems = append(exec.ByItems, &soliton.ByItems{Expr: cas.defCausumns()[idx]}) 1653 } 1654 b.ResetTimer() 1655 for i := 0; i < b.N; i++ { 1656 b.StopTimer() 1657 tmpCtx := context.Background() 1658 chk := newFirstChunk(exec) 1659 dataSource.prepareChunks() 1660 1661 b.StartTimer() 1662 if err := exec.Open(tmpCtx); err != nil { 1663 b.Fatal(err) 1664 } 1665 for { 1666 if err := exec.Next(tmpCtx, chk); err != nil { 1667 b.Fatal(err) 1668 } 1669 if chk.NumEvents() == 0 { 1670 break 1671 } 1672 } 1673 1674 if err := exec.Close(); err != nil { 1675 b.Fatal(err) 1676 } 1677 b.StopTimer() 1678 } 1679 } 1680 1681 func BenchmarkSortInterDirc(b *testing.B) { 1682 b.ReportAllocs() 1683 cas := defaultSortTestCase() 1684 // all random data 1685 cas.ndvs = []int{0, 0} 1686 cas.orderByIdx = []int{0, 1} 1687 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 1688 benchmarkSortInterDirc(b, cas) 1689 }) 1690 1691 ndvs := []int{1, 10000} 1692 for _, ndv := range ndvs { 1693 cas.ndvs = []int{ndv, 0} 1694 cas.orderByIdx = []int{0, 1} 1695 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 1696 benchmarkSortInterDirc(b, cas) 1697 }) 1698 1699 cas.ndvs = []int{ndv, 0} 1700 cas.orderByIdx = []int{0} 1701 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 1702 benchmarkSortInterDirc(b, cas) 1703 }) 1704 1705 cas.ndvs = []int{ndv, 0} 1706 cas.orderByIdx = []int{1} 1707 b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) { 1708 benchmarkSortInterDirc(b, cas) 1709 }) 1710 } 1711 }