github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/interlock/benchmark_test.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package interlock
    15  
    16  import (
    17  	"context"
    18  	"encoding/base64"
    19  	"fmt"
    20  	"math/rand"
    21  	"sort"
    22  	"strings"
    23  	"sync"
    24  	"testing"
    25  	"time"
    26  
    27  	"github.com/whtcorpsinc/BerolinaSQL/allegrosql"
    28  	"github.com/whtcorpsinc/BerolinaSQL/ast"
    29  	"github.com/whtcorpsinc/log"
    30  	"github.com/whtcorpsinc/milevadb/causet/embedded"
    31  	"github.com/whtcorpsinc/milevadb/causet/property"
    32  	"github.com/whtcorpsinc/milevadb/causet/soliton"
    33  	"github.com/whtcorpsinc/milevadb/memex"
    34  	"github.com/whtcorpsinc/milevadb/memex/aggregation"
    35  	"github.com/whtcorpsinc/milevadb/soliton/chunk"
    36  	"github.com/whtcorpsinc/milevadb/soliton/disk"
    37  	"github.com/whtcorpsinc/milevadb/soliton/memory"
    38  	"github.com/whtcorpsinc/milevadb/soliton/mock"
    39  	"github.com/whtcorpsinc/milevadb/soliton/stringutil"
    40  	"github.com/whtcorpsinc/milevadb/stochastikctx"
    41  	"github.com/whtcorpsinc/milevadb/stochastikctx/variable"
    42  	"github.com/whtcorpsinc/milevadb/types"
    43  	"go.uber.org/zap/zapembedded"
    44  )
    45  
    46  var (
    47  	_          InterlockingDirectorate = &mockDataSource{}
    48  	_          embedded.PhysicalCauset = &mockDataPhysicalCauset{}
    49  	wideString                         = strings.Repeat("x", 5*1024)
    50  )
    51  
    52  type mockDataSourceParameters struct {
    53  	schemaReplicant *memex.Schema
    54  	genDataFunc     func(event int, typ *types.FieldType) interface{}
    55  	ndvs            []int  // number of distinct values on defCausumns[i] and zero represents no limit
    56  	orders          []bool // defCausumns[i] should be ordered if orders[i] is true
    57  	rows            int    // number of rows the DataSource should output
    58  	ctx             stochastikctx.Context
    59  }
    60  
    61  type mockDataSource struct {
    62  	baseInterlockingDirectorate
    63  	p        mockDataSourceParameters
    64  	genData  []*chunk.Chunk
    65  	chunks   []*chunk.Chunk
    66  	chunkPtr int
    67  }
    68  
    69  type mockDataPhysicalCauset struct {
    70  	MockPhysicalCauset
    71  	schemaReplicant *memex.Schema
    72  	exec            InterlockingDirectorate
    73  }
    74  
    75  func (mp *mockDataPhysicalCauset) GetInterlockingDirectorate() InterlockingDirectorate {
    76  	return mp.exec
    77  }
    78  
    79  func (mp *mockDataPhysicalCauset) Schema() *memex.Schema {
    80  	return mp.schemaReplicant
    81  }
    82  
    83  func (mp *mockDataPhysicalCauset) ExplainID() fmt.Stringer {
    84  	return stringutil.MemoizeStr(func() string {
    85  		return "mockData_0"
    86  	})
    87  }
    88  
    89  func (mp *mockDataPhysicalCauset) Stats() *property.StatsInfo {
    90  	return nil
    91  }
    92  
    93  func (mp *mockDataPhysicalCauset) SelectBlockOffset() int {
    94  	return 0
    95  }
    96  
    97  func (mds *mockDataSource) genDefCausCausets(defCaus int) (results []interface{}) {
    98  	typ := mds.retFieldTypes[defCaus]
    99  	order := false
   100  	if defCaus < len(mds.p.orders) {
   101  		order = mds.p.orders[defCaus]
   102  	}
   103  	rows := mds.p.rows
   104  	NDV := 0
   105  	if defCaus < len(mds.p.ndvs) {
   106  		NDV = mds.p.ndvs[defCaus]
   107  	}
   108  	results = make([]interface{}, 0, rows)
   109  	if NDV == 0 {
   110  		if mds.p.genDataFunc == nil {
   111  			for i := 0; i < rows; i++ {
   112  				results = append(results, mds.randCauset(typ))
   113  			}
   114  		} else {
   115  			for i := 0; i < rows; i++ {
   116  				results = append(results, mds.p.genDataFunc(i, typ))
   117  			}
   118  		}
   119  	} else {
   120  		datumSet := make(map[string]bool, NDV)
   121  		datums := make([]interface{}, 0, NDV)
   122  		for len(datums) < NDV {
   123  			d := mds.randCauset(typ)
   124  			str := fmt.Sprintf("%v", d)
   125  			if datumSet[str] {
   126  				continue
   127  			}
   128  			datumSet[str] = true
   129  			datums = append(datums, d)
   130  		}
   131  
   132  		for i := 0; i < rows; i++ {
   133  			results = append(results, datums[rand.Intn(NDV)])
   134  		}
   135  	}
   136  
   137  	if order {
   138  		sort.Slice(results, func(i, j int) bool {
   139  			switch typ.Tp {
   140  			case allegrosql.TypeLong, allegrosql.TypeLonglong:
   141  				return results[i].(int64) < results[j].(int64)
   142  			case allegrosql.TypeDouble:
   143  				return results[i].(float64) < results[j].(float64)
   144  			case allegrosql.TypeVarString:
   145  				return results[i].(string) < results[j].(string)
   146  			default:
   147  				panic("not implement")
   148  			}
   149  		})
   150  	}
   151  
   152  	return
   153  }
   154  
   155  func (mds *mockDataSource) randCauset(typ *types.FieldType) interface{} {
   156  	switch typ.Tp {
   157  	case allegrosql.TypeLong, allegrosql.TypeLonglong:
   158  		return int64(rand.Int())
   159  	case allegrosql.TypeDouble, allegrosql.TypeFloat:
   160  		return rand.Float64()
   161  	case allegrosql.TypeNewDecimal:
   162  		var d types.MyDecimal
   163  		return d.FromInt(int64(rand.Int()))
   164  	case allegrosql.TypeVarString:
   165  		buff := make([]byte, 10)
   166  		rand.Read(buff)
   167  		return base64.RawURLEncoding.EncodeToString(buff)
   168  	default:
   169  		panic("not implement")
   170  	}
   171  }
   172  
   173  func (mds *mockDataSource) prepareChunks() {
   174  	mds.chunks = make([]*chunk.Chunk, len(mds.genData))
   175  	for i := range mds.chunks {
   176  		mds.chunks[i] = mds.genData[i].CopyConstruct()
   177  	}
   178  	mds.chunkPtr = 0
   179  }
   180  
   181  func (mds *mockDataSource) Next(ctx context.Context, req *chunk.Chunk) error {
   182  	if mds.chunkPtr >= len(mds.chunks) {
   183  		req.Reset()
   184  		return nil
   185  	}
   186  	dataChk := mds.chunks[mds.chunkPtr]
   187  	dataChk.SwapDeferredCausets(req)
   188  	mds.chunkPtr++
   189  	return nil
   190  }
   191  
   192  func buildMockDataSource(opt mockDataSourceParameters) *mockDataSource {
   193  	baseInterDirc := newBaseInterlockingDirectorate(opt.ctx, opt.schemaReplicant, 0)
   194  	m := &mockDataSource{baseInterDirc, opt, nil, nil, 0}
   195  	rTypes := retTypes(m)
   196  	defCausData := make([][]interface{}, len(rTypes))
   197  	for i := 0; i < len(rTypes); i++ {
   198  		defCausData[i] = m.genDefCausCausets(i)
   199  	}
   200  
   201  	m.genData = make([]*chunk.Chunk, (m.p.rows+m.maxChunkSize-1)/m.maxChunkSize)
   202  	for i := range m.genData {
   203  		m.genData[i] = chunk.NewChunkWithCapacity(retTypes(m), m.maxChunkSize)
   204  	}
   205  
   206  	for i := 0; i < m.p.rows; i++ {
   207  		idx := i / m.maxChunkSize
   208  		retTypes := retTypes(m)
   209  		for defCausIdx := 0; defCausIdx < len(rTypes); defCausIdx++ {
   210  			switch retTypes[defCausIdx].Tp {
   211  			case allegrosql.TypeLong, allegrosql.TypeLonglong:
   212  				m.genData[idx].AppendInt64(defCausIdx, defCausData[defCausIdx][i].(int64))
   213  			case allegrosql.TypeDouble, allegrosql.TypeFloat:
   214  				m.genData[idx].AppendFloat64(defCausIdx, defCausData[defCausIdx][i].(float64))
   215  			case allegrosql.TypeNewDecimal:
   216  				m.genData[idx].AppendMyDecimal(defCausIdx, defCausData[defCausIdx][i].(*types.MyDecimal))
   217  			case allegrosql.TypeVarString:
   218  				m.genData[idx].AppendString(defCausIdx, defCausData[defCausIdx][i].(string))
   219  			default:
   220  				panic("not implement")
   221  			}
   222  		}
   223  	}
   224  	return m
   225  }
   226  
   227  func buildMockDataSourceWithIndex(opt mockDataSourceParameters, index []int) *mockDataSource {
   228  	opt.orders = make([]bool, len(opt.schemaReplicant.DeferredCausets))
   229  	for _, idx := range index {
   230  		opt.orders[idx] = true
   231  	}
   232  	return buildMockDataSource(opt)
   233  }
   234  
   235  // aggTestCase has a fixed schemaReplicant (aggDefCaus Double, groupBy LongLong).
   236  type aggTestCase struct {
   237  	execType    string // "hash" or "stream"
   238  	aggFunc     string // sum, avg, count ....
   239  	groupByNDV  int    // the number of distinct group-by keys
   240  	hasDistinct bool
   241  	rows        int
   242  	concurrency int
   243  	ctx         stochastikctx.Context
   244  }
   245  
   246  func (a aggTestCase) defCausumns() []*memex.DeferredCauset {
   247  	return []*memex.DeferredCauset{
   248  		{Index: 0, RetType: types.NewFieldType(allegrosql.TypeDouble)},
   249  		{Index: 1, RetType: types.NewFieldType(allegrosql.TypeLonglong)},
   250  	}
   251  }
   252  
   253  func (a aggTestCase) String() string {
   254  	return fmt.Sprintf("(execType:%v, aggFunc:%v, ndv:%v, hasDistinct:%v, rows:%v, concurrency:%v)",
   255  		a.execType, a.aggFunc, a.groupByNDV, a.hasDistinct, a.rows, a.concurrency)
   256  }
   257  
   258  func defaultAggTestCase(exec string) *aggTestCase {
   259  	ctx := mock.NewContext()
   260  	ctx.GetStochastikVars().InitChunkSize = variable.DefInitChunkSize
   261  	ctx.GetStochastikVars().MaxChunkSize = variable.DefMaxChunkSize
   262  	return &aggTestCase{exec, ast.AggFuncSum, 1000, false, 10000000, 4, ctx}
   263  }
   264  
   265  func buildHashAggInterlockingDirectorate(ctx stochastikctx.Context, src InterlockingDirectorate, schemaReplicant *memex.Schema,
   266  	aggFuncs []*aggregation.AggFuncDesc, groupItems []memex.Expression) InterlockingDirectorate {
   267  	plan := new(embedded.PhysicalHashAgg)
   268  	plan.AggFuncs = aggFuncs
   269  	plan.GroupByItems = groupItems
   270  	plan.SetSchema(schemaReplicant)
   271  	plan.Init(ctx, nil, 0)
   272  	plan.SetChildren(nil)
   273  	b := newInterlockingDirectorateBuilder(ctx, nil)
   274  	exec := b.build(plan)
   275  	hashAgg := exec.(*HashAggInterDirc)
   276  	hashAgg.children[0] = src
   277  	return exec
   278  }
   279  
   280  func buildStreamAggInterlockingDirectorate(ctx stochastikctx.Context, src InterlockingDirectorate, schemaReplicant *memex.Schema,
   281  	aggFuncs []*aggregation.AggFuncDesc, groupItems []memex.Expression) InterlockingDirectorate {
   282  	plan := new(embedded.PhysicalStreamAgg)
   283  	plan.AggFuncs = aggFuncs
   284  	plan.GroupByItems = groupItems
   285  	plan.SetSchema(schemaReplicant)
   286  	plan.Init(ctx, nil, 0)
   287  	plan.SetChildren(nil)
   288  	b := newInterlockingDirectorateBuilder(ctx, nil)
   289  	exec := b.build(plan)
   290  	streamAgg := exec.(*StreamAggInterDirc)
   291  	streamAgg.children[0] = src
   292  	return exec
   293  }
   294  
   295  func builPosetDaggInterlockingDirectorate(b *testing.B, testCase *aggTestCase, child InterlockingDirectorate) InterlockingDirectorate {
   296  	ctx := testCase.ctx
   297  	if err := ctx.GetStochastikVars().SetSystemVar(variable.MilevaDBHashAggFinalConcurrency, fmt.Sprintf("%v", testCase.concurrency)); err != nil {
   298  		b.Fatal(err)
   299  	}
   300  	if err := ctx.GetStochastikVars().SetSystemVar(variable.MilevaDBHashAggPartialConcurrency, fmt.Sprintf("%v", testCase.concurrency)); err != nil {
   301  		b.Fatal(err)
   302  	}
   303  
   304  	childDefCauss := testCase.defCausumns()
   305  	schemaReplicant := memex.NewSchema(childDefCauss...)
   306  	groupBy := []memex.Expression{childDefCauss[1]}
   307  	aggFunc, err := aggregation.NewAggFuncDesc(testCase.ctx, testCase.aggFunc, []memex.Expression{childDefCauss[0]}, testCase.hasDistinct)
   308  	if err != nil {
   309  		b.Fatal(err)
   310  	}
   311  	aggFuncs := []*aggregation.AggFuncDesc{aggFunc}
   312  
   313  	var aggInterDirc InterlockingDirectorate
   314  	switch testCase.execType {
   315  	case "hash":
   316  		aggInterDirc = buildHashAggInterlockingDirectorate(testCase.ctx, child, schemaReplicant, aggFuncs, groupBy)
   317  	case "stream":
   318  		aggInterDirc = buildStreamAggInterlockingDirectorate(testCase.ctx, child, schemaReplicant, aggFuncs, groupBy)
   319  	default:
   320  		b.Fatal("not implement")
   321  	}
   322  	return aggInterDirc
   323  }
   324  
   325  func benchmarkAggInterDircWithCase(b *testing.B, casTest *aggTestCase) {
   326  	defcaus := casTest.defCausumns()
   327  	orders := []bool{false, casTest.execType == "stream"}
   328  	dataSource := buildMockDataSource(mockDataSourceParameters{
   329  		schemaReplicant: memex.NewSchema(defcaus...),
   330  		ndvs:            []int{0, casTest.groupByNDV},
   331  		orders:          orders,
   332  		rows:            casTest.rows,
   333  		ctx:             casTest.ctx,
   334  	})
   335  
   336  	b.ResetTimer()
   337  	for i := 0; i < b.N; i++ {
   338  		b.StopTimer() // prepare a new agg-interlock
   339  		aggInterDirc := builPosetDaggInterlockingDirectorate(b, casTest, dataSource)
   340  		tmpCtx := context.Background()
   341  		chk := newFirstChunk(aggInterDirc)
   342  		dataSource.prepareChunks()
   343  
   344  		b.StartTimer()
   345  		if err := aggInterDirc.Open(tmpCtx); err != nil {
   346  			b.Fatal(err)
   347  		}
   348  		for {
   349  			if err := aggInterDirc.Next(tmpCtx, chk); err != nil {
   350  				b.Fatal(b)
   351  			}
   352  			if chk.NumEvents() == 0 {
   353  				break
   354  			}
   355  		}
   356  
   357  		if err := aggInterDirc.Close(); err != nil {
   358  			b.Fatal(err)
   359  		}
   360  		b.StopTimer()
   361  	}
   362  }
   363  
   364  func BenchmarkAggEvents(b *testing.B) {
   365  	rows := []int{100000, 1000000, 10000000}
   366  	concurrencies := []int{1, 4, 8, 15, 20, 30, 40}
   367  	for _, event := range rows {
   368  		for _, con := range concurrencies {
   369  			for _, exec := range []string{"hash", "stream"} {
   370  				if exec == "stream" && con > 1 {
   371  					continue
   372  				}
   373  				cas := defaultAggTestCase(exec)
   374  				cas.rows = event
   375  				cas.concurrency = con
   376  				b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
   377  					benchmarkAggInterDircWithCase(b, cas)
   378  				})
   379  			}
   380  		}
   381  	}
   382  }
   383  
   384  func BenchmarkAggGroupByNDV(b *testing.B) {
   385  	NDVs := []int{10, 100, 1000, 10000, 100000, 1000000, 10000000}
   386  	for _, NDV := range NDVs {
   387  		for _, exec := range []string{"hash", "stream"} {
   388  			cas := defaultAggTestCase(exec)
   389  			cas.groupByNDV = NDV
   390  			b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
   391  				benchmarkAggInterDircWithCase(b, cas)
   392  			})
   393  		}
   394  	}
   395  }
   396  
   397  func BenchmarkAggConcurrency(b *testing.B) {
   398  	concs := []int{1, 4, 8, 15, 20, 30, 40}
   399  	for _, con := range concs {
   400  		for _, exec := range []string{"hash", "stream"} {
   401  			if exec == "stream" && con > 1 {
   402  				continue
   403  			}
   404  			cas := defaultAggTestCase(exec)
   405  			cas.concurrency = con
   406  			b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
   407  				benchmarkAggInterDircWithCase(b, cas)
   408  			})
   409  		}
   410  	}
   411  }
   412  
   413  func BenchmarkAggDistinct(b *testing.B) {
   414  	rows := []int{100000, 1000000, 10000000}
   415  	distincts := []bool{false, true}
   416  	for _, event := range rows {
   417  		for _, exec := range []string{"hash", "stream"} {
   418  			for _, distinct := range distincts {
   419  				cas := defaultAggTestCase(exec)
   420  				cas.rows = event
   421  				cas.hasDistinct = distinct
   422  				b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
   423  					benchmarkAggInterDircWithCase(b, cas)
   424  				})
   425  			}
   426  		}
   427  	}
   428  }
   429  
   430  func buildWindowInterlockingDirectorate(ctx stochastikctx.Context, windowFunc string, funcs int, frame *embedded.WindowFrame, srcInterDirc InterlockingDirectorate, schemaReplicant *memex.Schema, partitionBy []*memex.DeferredCauset, concurrency int, dataSourceSorted bool) InterlockingDirectorate {
   431  	src := &mockDataPhysicalCauset{
   432  		schemaReplicant: srcInterDirc.Schema(),
   433  		exec:            srcInterDirc,
   434  	}
   435  
   436  	win := new(embedded.PhysicalWindow)
   437  	win.WindowFuncDescs = make([]*aggregation.WindowFuncDesc, 0)
   438  	winSchema := schemaReplicant.Clone()
   439  	for i := 0; i < funcs; i++ {
   440  		var args []memex.Expression
   441  		switch windowFunc {
   442  		case ast.WindowFuncNtile:
   443  			args = append(args, &memex.Constant{Value: types.NewUintCauset(2)})
   444  		case ast.WindowFuncNthValue:
   445  			args = append(args, partitionBy[0], &memex.Constant{Value: types.NewUintCauset(2)})
   446  		case ast.AggFuncSum:
   447  			args = append(args, src.Schema().DeferredCausets[0])
   448  		case ast.AggFuncAvg:
   449  			args = append(args, src.Schema().DeferredCausets[0])
   450  		case ast.AggFuncBitXor:
   451  			args = append(args, src.Schema().DeferredCausets[0])
   452  		case ast.AggFuncMax, ast.AggFuncMin:
   453  			args = append(args, src.Schema().DeferredCausets[0])
   454  		default:
   455  			args = append(args, partitionBy[0])
   456  		}
   457  		desc, _ := aggregation.NewWindowFuncDesc(ctx, windowFunc, args)
   458  
   459  		win.WindowFuncDescs = append(win.WindowFuncDescs, desc)
   460  		winSchema.Append(&memex.DeferredCauset{
   461  			UniqueID: 10 + (int64)(i),
   462  			RetType:  types.NewFieldType(allegrosql.TypeLonglong),
   463  		})
   464  	}
   465  	for _, defCaus := range partitionBy {
   466  		win.PartitionBy = append(win.PartitionBy, property.Item{DefCaus: defCaus})
   467  	}
   468  	win.Frame = frame
   469  	win.OrderBy = nil
   470  
   471  	win.SetSchema(winSchema)
   472  	win.Init(ctx, nil, 0)
   473  
   474  	var tail embedded.PhysicalCauset = win
   475  	if !dataSourceSorted {
   476  		byItems := make([]*soliton.ByItems, 0, len(partitionBy))
   477  		for _, defCaus := range partitionBy {
   478  			byItems = append(byItems, &soliton.ByItems{Expr: defCaus, Desc: false})
   479  		}
   480  		sort := &embedded.PhysicalSort{ByItems: byItems}
   481  		sort.SetChildren(src)
   482  		win.SetChildren(sort)
   483  		tail = sort
   484  	} else {
   485  		win.SetChildren(src)
   486  	}
   487  
   488  	var plan embedded.PhysicalCauset
   489  	if concurrency > 1 {
   490  		byItems := make([]memex.Expression, 0, len(win.PartitionBy))
   491  		for _, item := range win.PartitionBy {
   492  			byItems = append(byItems, item.DefCaus)
   493  		}
   494  
   495  		plan = embedded.PhysicalShuffle{
   496  			Concurrency:  concurrency,
   497  			Tail:         tail,
   498  			DataSource:   src,
   499  			SplitterType: embedded.PartitionHashSplitterType,
   500  			HashByItems:  byItems,
   501  		}.Init(ctx, nil, 0)
   502  		plan.SetChildren(win)
   503  	} else {
   504  		plan = win
   505  	}
   506  
   507  	b := newInterlockingDirectorateBuilder(ctx, nil)
   508  	exec := b.build(plan)
   509  	return exec
   510  }
   511  
   512  // windowTestCase has a fixed schemaReplicant (defCaus Double, partitionBy LongLong, rawData VarString(16), defCaus LongLong).
   513  type windowTestCase struct {
   514  	windowFunc       string
   515  	numFunc          int // The number of windowFuncs. Default: 1.
   516  	frame            *embedded.WindowFrame
   517  	ndv              int // the number of distinct group-by keys
   518  	rows             int
   519  	concurrency      int
   520  	dataSourceSorted bool
   521  	ctx              stochastikctx.Context
   522  	rawDataSmall     string
   523  	defCausumns      []*memex.DeferredCauset // the defCausumns of mock schemaReplicant
   524  }
   525  
   526  func (a windowTestCase) String() string {
   527  	return fmt.Sprintf("(func:%v, aggDefCausType:%s, numFunc:%v, ndv:%v, rows:%v, sorted:%v, concurrency:%v)",
   528  		a.windowFunc, a.defCausumns[0].RetType, a.numFunc, a.ndv, a.rows, a.dataSourceSorted, a.concurrency)
   529  }
   530  
   531  func defaultWindowTestCase() *windowTestCase {
   532  	ctx := mock.NewContext()
   533  	ctx.GetStochastikVars().InitChunkSize = variable.DefInitChunkSize
   534  	ctx.GetStochastikVars().MaxChunkSize = variable.DefMaxChunkSize
   535  	return &windowTestCase{ast.WindowFuncEventNumber, 1, nil, 1000, 10000000, 1, true, ctx, strings.Repeat("x", 16),
   536  		[]*memex.DeferredCauset{
   537  			{Index: 0, RetType: types.NewFieldType(allegrosql.TypeDouble)},
   538  			{Index: 1, RetType: types.NewFieldType(allegrosql.TypeLonglong)},
   539  			{Index: 2, RetType: types.NewFieldType(allegrosql.TypeVarString)},
   540  			{Index: 3, RetType: types.NewFieldType(allegrosql.TypeLonglong)},
   541  		}}
   542  }
   543  
   544  func benchmarkWindowInterDircWithCase(b *testing.B, casTest *windowTestCase) {
   545  	ctx := casTest.ctx
   546  	if err := ctx.GetStochastikVars().SetSystemVar(variable.MilevaDBWindowConcurrency, fmt.Sprintf("%v", casTest.concurrency)); err != nil {
   547  		b.Fatal(err)
   548  	}
   549  
   550  	defcaus := casTest.defCausumns
   551  	dataSource := buildMockDataSource(mockDataSourceParameters{
   552  		schemaReplicant: memex.NewSchema(defcaus...),
   553  		ndvs:            []int{0, casTest.ndv, 0, 0},
   554  		orders:          []bool{false, casTest.dataSourceSorted, false, false},
   555  		rows:            casTest.rows,
   556  		ctx:             casTest.ctx,
   557  	})
   558  
   559  	b.ResetTimer()
   560  	for i := 0; i < b.N; i++ {
   561  		b.StopTimer() // prepare a new window-interlock
   562  		childDefCauss := casTest.defCausumns
   563  		schemaReplicant := memex.NewSchema(childDefCauss...)
   564  		windowInterDirc := buildWindowInterlockingDirectorate(casTest.ctx, casTest.windowFunc, casTest.numFunc, casTest.frame, dataSource, schemaReplicant, childDefCauss[1:2], casTest.concurrency, casTest.dataSourceSorted)
   565  		tmpCtx := context.Background()
   566  		chk := newFirstChunk(windowInterDirc)
   567  		dataSource.prepareChunks()
   568  
   569  		b.StartTimer()
   570  		if err := windowInterDirc.Open(tmpCtx); err != nil {
   571  			b.Fatal(err)
   572  		}
   573  		for {
   574  			if err := windowInterDirc.Next(tmpCtx, chk); err != nil {
   575  				b.Fatal(b)
   576  			}
   577  			if chk.NumEvents() == 0 {
   578  				break
   579  			}
   580  		}
   581  
   582  		if err := windowInterDirc.Close(); err != nil {
   583  			b.Fatal(err)
   584  		}
   585  		b.StopTimer()
   586  	}
   587  }
   588  
   589  func BenchmarkWindowEvents(b *testing.B) {
   590  	b.ReportAllocs()
   591  	rows := []int{1000, 100000}
   592  	ndvs := []int{10, 1000}
   593  	concs := []int{1, 2, 4}
   594  	for _, event := range rows {
   595  		for _, ndv := range ndvs {
   596  			for _, con := range concs {
   597  				cas := defaultWindowTestCase()
   598  				cas.rows = event
   599  				cas.ndv = ndv
   600  				cas.concurrency = con
   601  				cas.dataSourceSorted = false
   602  				cas.windowFunc = ast.WindowFuncEventNumber // cheapest
   603  				b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
   604  					benchmarkWindowInterDircWithCase(b, cas)
   605  				})
   606  			}
   607  		}
   608  	}
   609  }
   610  
   611  func BenchmarkWindowFunctions(b *testing.B) {
   612  	b.ReportAllocs()
   613  	windowFuncs := []string{
   614  		ast.WindowFuncEventNumber,
   615  		ast.WindowFuncRank,
   616  		ast.WindowFuncDenseRank,
   617  		ast.WindowFuncCumeDist,
   618  		ast.WindowFuncPercentRank,
   619  		ast.WindowFuncNtile,
   620  		ast.WindowFuncLead,
   621  		ast.WindowFuncLag,
   622  		ast.WindowFuncFirstValue,
   623  		ast.WindowFuncLastValue,
   624  		ast.WindowFuncNthValue,
   625  	}
   626  	concs := []int{1, 4}
   627  	for _, windowFunc := range windowFuncs {
   628  		for _, con := range concs {
   629  			cas := defaultWindowTestCase()
   630  			cas.rows = 100000
   631  			cas.ndv = 1000
   632  			cas.concurrency = con
   633  			cas.dataSourceSorted = false
   634  			cas.windowFunc = windowFunc
   635  			b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
   636  				benchmarkWindowInterDircWithCase(b, cas)
   637  			})
   638  		}
   639  	}
   640  }
   641  
   642  func BenchmarkWindowFunctionsWithFrame(b *testing.B) {
   643  	b.ReportAllocs()
   644  	windowFuncs := []string{
   645  		ast.WindowFuncEventNumber,
   646  		ast.AggFuncBitXor,
   647  	}
   648  	numFuncs := []int{1, 5}
   649  	frames := []*embedded.WindowFrame{
   650  		{Type: ast.Events, Start: &embedded.FrameBound{UnBounded: true}, End: &embedded.FrameBound{Type: ast.CurrentEvent}},
   651  	}
   652  	sortTypes := []bool{false, true}
   653  	concs := []int{1, 2, 3, 4, 5, 6}
   654  	for i, windowFunc := range windowFuncs {
   655  		for _, sorted := range sortTypes {
   656  			for _, numFunc := range numFuncs {
   657  				for _, con := range concs {
   658  					cas := defaultWindowTestCase()
   659  					cas.rows = 100000
   660  					cas.ndv = 1000
   661  					cas.concurrency = con
   662  					cas.dataSourceSorted = sorted
   663  					cas.windowFunc = windowFunc
   664  					cas.numFunc = numFunc
   665  					if i < len(frames) {
   666  						cas.frame = frames[i]
   667  					}
   668  					b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
   669  						benchmarkWindowInterDircWithCase(b, cas)
   670  					})
   671  				}
   672  			}
   673  		}
   674  	}
   675  }
   676  
   677  func BenchmarkWindowFunctionsAggWindowProcessorAboutFrame(b *testing.B) {
   678  	b.ReportAllocs()
   679  	windowFunc := ast.AggFuncMax
   680  	frame := &embedded.WindowFrame{Type: ast.Events, Start: &embedded.FrameBound{UnBounded: true}, End: &embedded.FrameBound{UnBounded: true}}
   681  	cas := defaultWindowTestCase()
   682  	cas.rows = 10000
   683  	cas.ndv = 10
   684  	cas.concurrency = 1
   685  	cas.dataSourceSorted = false
   686  	cas.windowFunc = windowFunc
   687  	cas.numFunc = 1
   688  	cas.frame = frame
   689  	b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
   690  		benchmarkWindowInterDircWithCase(b, cas)
   691  	})
   692  }
   693  
   694  func baseBenchmarkWindowFunctionsWithSlidingWindow(b *testing.B, frameType ast.FrameType) {
   695  	b.ReportAllocs()
   696  	windowFuncs := []struct {
   697  		aggFunc         string
   698  		aggDefCausTypes byte
   699  	}{
   700  		{ast.AggFuncSum, allegrosql.TypeFloat},
   701  		{ast.AggFuncSum, allegrosql.TypeNewDecimal},
   702  		{ast.AggFuncCount, allegrosql.TypeLong},
   703  		{ast.AggFuncAvg, allegrosql.TypeFloat},
   704  		{ast.AggFuncAvg, allegrosql.TypeNewDecimal},
   705  		{ast.AggFuncBitXor, allegrosql.TypeLong},
   706  		{ast.AggFuncMax, allegrosql.TypeLong},
   707  		{ast.AggFuncMax, allegrosql.TypeFloat},
   708  		{ast.AggFuncMin, allegrosql.TypeLong},
   709  		{ast.AggFuncMin, allegrosql.TypeFloat},
   710  	}
   711  	event := 100000
   712  	ndv := 100
   713  	frame := &embedded.WindowFrame{
   714  		Type:  frameType,
   715  		Start: &embedded.FrameBound{Type: ast.Preceding, Num: 10},
   716  		End:   &embedded.FrameBound{Type: ast.Following, Num: 10},
   717  	}
   718  	for _, windowFunc := range windowFuncs {
   719  		cas := defaultWindowTestCase()
   720  		cas.ctx.GetStochastikVars().WindowingUseHighPrecision = false
   721  		cas.rows = event
   722  		cas.ndv = ndv
   723  		cas.windowFunc = windowFunc.aggFunc
   724  		cas.frame = frame
   725  		cas.defCausumns[0].RetType.Tp = windowFunc.aggDefCausTypes
   726  		b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
   727  			benchmarkWindowInterDircWithCase(b, cas)
   728  		})
   729  	}
   730  }
   731  
   732  func BenchmarkWindowFunctionsWithSlidingWindow(b *testing.B) {
   733  	baseBenchmarkWindowFunctionsWithSlidingWindow(b, ast.Events)
   734  	baseBenchmarkWindowFunctionsWithSlidingWindow(b, ast.Ranges)
   735  }
   736  
   737  type hashJoinTestCase struct {
   738  	rows               int
   739  	defcaus            []*types.FieldType
   740  	concurrency        int
   741  	ctx                stochastikctx.Context
   742  	keyIdx             []int
   743  	joinType           embedded.JoinType
   744  	disk               bool
   745  	useOuterToBuild    bool
   746  	rawData            string
   747  	childrenUsedSchema [][]bool
   748  }
   749  
   750  func (tc hashJoinTestCase) defCausumns() []*memex.DeferredCauset {
   751  	ret := make([]*memex.DeferredCauset, 0)
   752  	for i, t := range tc.defcaus {
   753  		defCausumn := &memex.DeferredCauset{Index: i, RetType: t, UniqueID: tc.ctx.GetStochastikVars().AllocCausetDeferredCausetID()}
   754  		ret = append(ret, defCausumn)
   755  	}
   756  	return ret
   757  }
   758  
   759  func (tc hashJoinTestCase) String() string {
   760  	return fmt.Sprintf("(rows:%v, defcaus:%v, concurency:%v, joinKeyIdx: %v, disk:%v)",
   761  		tc.rows, tc.defcaus, tc.concurrency, tc.keyIdx, tc.disk)
   762  }
   763  
   764  func defaultHashJoinTestCase(defcaus []*types.FieldType, joinType embedded.JoinType, useOuterToBuild bool) *hashJoinTestCase {
   765  	ctx := mock.NewContext()
   766  	ctx.GetStochastikVars().InitChunkSize = variable.DefInitChunkSize
   767  	ctx.GetStochastikVars().MaxChunkSize = variable.DefMaxChunkSize
   768  	ctx.GetStochastikVars().StmtCtx.MemTracker = memory.NewTracker(-1, -1)
   769  	ctx.GetStochastikVars().StmtCtx.DiskTracker = disk.NewTracker(-1, -1)
   770  	ctx.GetStochastikVars().SetIndexLookupJoinConcurrency(4)
   771  	tc := &hashJoinTestCase{rows: 100000, concurrency: 4, ctx: ctx, keyIdx: []int{0, 1}, rawData: wideString}
   772  	tc.defcaus = defcaus
   773  	tc.useOuterToBuild = useOuterToBuild
   774  	tc.joinType = joinType
   775  	return tc
   776  }
   777  
   778  func prepare4HashJoin(testCase *hashJoinTestCase, innerInterDirc, outerInterDirc InterlockingDirectorate) *HashJoinInterDirc {
   779  	if testCase.useOuterToBuild {
   780  		innerInterDirc, outerInterDirc = outerInterDirc, innerInterDirc
   781  	}
   782  	defcaus0 := innerInterDirc.Schema().DeferredCausets
   783  	defcaus1 := outerInterDirc.Schema().DeferredCausets
   784  
   785  	joinSchema := memex.NewSchema()
   786  	if testCase.childrenUsedSchema != nil {
   787  		for i, used := range testCase.childrenUsedSchema[0] {
   788  			if used {
   789  				joinSchema.Append(defcaus0[i])
   790  			}
   791  		}
   792  		for i, used := range testCase.childrenUsedSchema[1] {
   793  			if used {
   794  				joinSchema.Append(defcaus1[i])
   795  			}
   796  		}
   797  	} else {
   798  		joinSchema.Append(defcaus0...)
   799  		joinSchema.Append(defcaus1...)
   800  	}
   801  
   802  	joinKeys := make([]*memex.DeferredCauset, 0, len(testCase.keyIdx))
   803  	for _, keyIdx := range testCase.keyIdx {
   804  		joinKeys = append(joinKeys, defcaus0[keyIdx])
   805  	}
   806  	probeKeys := make([]*memex.DeferredCauset, 0, len(testCase.keyIdx))
   807  	for _, keyIdx := range testCase.keyIdx {
   808  		probeKeys = append(probeKeys, defcaus1[keyIdx])
   809  	}
   810  	e := &HashJoinInterDirc{
   811  		baseInterlockingDirectorate: newBaseInterlockingDirectorate(testCase.ctx, joinSchema, 5, innerInterDirc, outerInterDirc),
   812  		concurrency:                 uint(testCase.concurrency),
   813  		joinType:                    testCase.joinType, // 0 for InnerJoin, 1 for LeftOutersJoin, 2 for RightOuterJoin
   814  		isOuterJoin:                 false,
   815  		buildKeys:                   joinKeys,
   816  		probeKeys:                   probeKeys,
   817  		buildSideInterDirc:          innerInterDirc,
   818  		probeSideInterDirc:          outerInterDirc,
   819  		buildSideEstCount:           float64(testCase.rows),
   820  		useOuterToBuild:             testCase.useOuterToBuild,
   821  	}
   822  
   823  	childrenUsedSchema := markChildrenUsedDefCauss(e.Schema(), e.children[0].Schema(), e.children[1].Schema())
   824  	defaultValues := make([]types.Causet, e.buildSideInterDirc.Schema().Len())
   825  	lhsTypes, rhsTypes := retTypes(innerInterDirc), retTypes(outerInterDirc)
   826  	e.joiners = make([]joiner, e.concurrency)
   827  	for i := uint(0); i < e.concurrency; i++ {
   828  		e.joiners[i] = newJoiner(testCase.ctx, e.joinType, true, defaultValues,
   829  			nil, lhsTypes, rhsTypes, childrenUsedSchema)
   830  	}
   831  	memLimit := int64(-1)
   832  	if testCase.disk {
   833  		memLimit = 1
   834  	}
   835  	t := memory.NewTracker(-1, memLimit)
   836  	t.SetSuperCowOrNoCausetOnExceed(nil)
   837  	t2 := disk.NewTracker(-1, -1)
   838  	e.ctx.GetStochastikVars().StmtCtx.MemTracker = t
   839  	e.ctx.GetStochastikVars().StmtCtx.DiskTracker = t2
   840  	return e
   841  }
   842  
   843  func benchmarkHashJoinInterDircWithCase(b *testing.B, casTest *hashJoinTestCase) {
   844  	opt1 := mockDataSourceParameters{
   845  		rows: casTest.rows,
   846  		ctx:  casTest.ctx,
   847  		genDataFunc: func(event int, typ *types.FieldType) interface{} {
   848  			switch typ.Tp {
   849  			case allegrosql.TypeLong, allegrosql.TypeLonglong:
   850  				return int64(event)
   851  			case allegrosql.TypeVarString:
   852  				return casTest.rawData
   853  			case allegrosql.TypeDouble:
   854  				return float64(event)
   855  			default:
   856  				panic("not implement")
   857  			}
   858  		},
   859  	}
   860  	opt2 := opt1
   861  	opt1.schemaReplicant = memex.NewSchema(casTest.defCausumns()...)
   862  	opt2.schemaReplicant = memex.NewSchema(casTest.defCausumns()...)
   863  	dataSource1 := buildMockDataSource(opt1)
   864  	dataSource2 := buildMockDataSource(opt2)
   865  	// Test spill result.
   866  	benchmarkHashJoinInterDirc(b, casTest, dataSource1, dataSource2, true)
   867  	b.ResetTimer()
   868  	for i := 0; i < b.N; i++ {
   869  		benchmarkHashJoinInterDirc(b, casTest, dataSource1, dataSource2, false)
   870  	}
   871  }
   872  
   873  func benchmarkHashJoinInterDirc(b *testing.B, casTest *hashJoinTestCase, opt1, opt2 *mockDataSource, testResult bool) {
   874  	b.StopTimer()
   875  	exec := prepare4HashJoin(casTest, opt1, opt2)
   876  	tmpCtx := context.Background()
   877  	chk := newFirstChunk(exec)
   878  	opt1.prepareChunks()
   879  	opt2.prepareChunks()
   880  
   881  	totalEvent := 0
   882  	b.StartTimer()
   883  	if err := exec.Open(tmpCtx); err != nil {
   884  		b.Fatal(err)
   885  	}
   886  	for {
   887  		if err := exec.Next(tmpCtx, chk); err != nil {
   888  			b.Fatal(err)
   889  		}
   890  		if chk.NumEvents() == 0 {
   891  			break
   892  		}
   893  		totalEvent += chk.NumEvents()
   894  	}
   895  
   896  	if testResult {
   897  		time.Sleep(200 * time.Millisecond)
   898  		if spilled := exec.rowContainer.alreadySpilledSafeForTest(); spilled != casTest.disk {
   899  			b.Fatal("wrong usage with disk:", spilled, casTest.disk)
   900  		}
   901  	}
   902  
   903  	if err := exec.Close(); err != nil {
   904  		b.Fatal(err)
   905  	}
   906  	b.StopTimer()
   907  	if totalEvent == 0 {
   908  		b.Fatal("totalEvent == 0")
   909  	}
   910  }
   911  
   912  func BenchmarkHashJoinInlineProjection(b *testing.B) {
   913  	defcaus := []*types.FieldType{
   914  		types.NewFieldType(allegrosql.TypeLonglong),
   915  		types.NewFieldType(allegrosql.TypeVarString),
   916  	}
   917  
   918  	b.ReportAllocs()
   919  
   920  	{
   921  		cas := defaultHashJoinTestCase(defcaus, 0, false)
   922  		cas.keyIdx = []int{0}
   923  		cas.childrenUsedSchema = [][]bool{
   924  			{false, true},
   925  			{false, false},
   926  		}
   927  		b.Run("InlineProjection:ON", func(b *testing.B) {
   928  			benchmarkHashJoinInterDircWithCase(b, cas)
   929  		})
   930  	}
   931  
   932  	{
   933  		cas := defaultHashJoinTestCase(defcaus, 0, false)
   934  		cas.keyIdx = []int{0}
   935  		b.Run("InlineProjection:OFF", func(b *testing.B) {
   936  			benchmarkHashJoinInterDircWithCase(b, cas)
   937  		})
   938  	}
   939  }
   940  
   941  func BenchmarkHashJoinInterDirc(b *testing.B) {
   942  	lvl := log.GetLevel()
   943  	log.SetLevel(zapembedded.ErrorLevel)
   944  	defer log.SetLevel(lvl)
   945  
   946  	defcaus := []*types.FieldType{
   947  		types.NewFieldType(allegrosql.TypeLonglong),
   948  		types.NewFieldType(allegrosql.TypeVarString),
   949  	}
   950  
   951  	b.ReportAllocs()
   952  	cas := defaultHashJoinTestCase(defcaus, 0, false)
   953  	b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
   954  		benchmarkHashJoinInterDircWithCase(b, cas)
   955  	})
   956  
   957  	cas.keyIdx = []int{0}
   958  	b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
   959  		benchmarkHashJoinInterDircWithCase(b, cas)
   960  	})
   961  
   962  	cas.keyIdx = []int{0}
   963  	cas.disk = true
   964  	b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
   965  		benchmarkHashJoinInterDircWithCase(b, cas)
   966  	})
   967  
   968  	// Replace the wide string defCausumn with double defCausumn
   969  	defcaus = []*types.FieldType{
   970  		types.NewFieldType(allegrosql.TypeLonglong),
   971  		types.NewFieldType(allegrosql.TypeDouble),
   972  	}
   973  
   974  	cas = defaultHashJoinTestCase(defcaus, 0, false)
   975  	cas.keyIdx = []int{0}
   976  	cas.rows = 5
   977  	b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
   978  		benchmarkHashJoinInterDircWithCase(b, cas)
   979  	})
   980  
   981  	cas = defaultHashJoinTestCase(defcaus, 0, false)
   982  	b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
   983  		benchmarkHashJoinInterDircWithCase(b, cas)
   984  	})
   985  
   986  	cas.keyIdx = []int{0}
   987  	b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
   988  		benchmarkHashJoinInterDircWithCase(b, cas)
   989  	})
   990  }
   991  
   992  func BenchmarkOuterHashJoinInterDirc(b *testing.B) {
   993  	lvl := log.GetLevel()
   994  	log.SetLevel(zapembedded.ErrorLevel)
   995  	defer log.SetLevel(lvl)
   996  
   997  	defcaus := []*types.FieldType{
   998  		types.NewFieldType(allegrosql.TypeLonglong),
   999  		types.NewFieldType(allegrosql.TypeVarString),
  1000  	}
  1001  
  1002  	b.ReportAllocs()
  1003  	cas := defaultHashJoinTestCase(defcaus, 2, true)
  1004  	b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
  1005  		benchmarkHashJoinInterDircWithCase(b, cas)
  1006  	})
  1007  
  1008  	cas.keyIdx = []int{0}
  1009  	b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
  1010  		benchmarkHashJoinInterDircWithCase(b, cas)
  1011  	})
  1012  
  1013  	cas.keyIdx = []int{0}
  1014  	cas.disk = true
  1015  	b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
  1016  		benchmarkHashJoinInterDircWithCase(b, cas)
  1017  	})
  1018  
  1019  	// Replace the wide string defCausumn with double defCausumn
  1020  	defcaus = []*types.FieldType{
  1021  		types.NewFieldType(allegrosql.TypeLonglong),
  1022  		types.NewFieldType(allegrosql.TypeDouble),
  1023  	}
  1024  
  1025  	cas = defaultHashJoinTestCase(defcaus, 2, true)
  1026  	cas.keyIdx = []int{0}
  1027  	cas.rows = 5
  1028  	b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
  1029  		benchmarkHashJoinInterDircWithCase(b, cas)
  1030  	})
  1031  
  1032  	cas = defaultHashJoinTestCase(defcaus, 2, true)
  1033  	b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
  1034  		benchmarkHashJoinInterDircWithCase(b, cas)
  1035  	})
  1036  
  1037  	cas.keyIdx = []int{0}
  1038  	b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
  1039  		benchmarkHashJoinInterDircWithCase(b, cas)
  1040  	})
  1041  }
  1042  
  1043  func benchmarkBuildHashBlockForList(b *testing.B, casTest *hashJoinTestCase) {
  1044  	opt := mockDataSourceParameters{
  1045  		schemaReplicant: memex.NewSchema(casTest.defCausumns()...),
  1046  		rows:            casTest.rows,
  1047  		ctx:             casTest.ctx,
  1048  		genDataFunc: func(event int, typ *types.FieldType) interface{} {
  1049  			switch typ.Tp {
  1050  			case allegrosql.TypeLong, allegrosql.TypeLonglong:
  1051  				return int64(event)
  1052  			case allegrosql.TypeVarString:
  1053  				return casTest.rawData
  1054  			default:
  1055  				panic("not implement")
  1056  			}
  1057  		},
  1058  	}
  1059  	dataSource1 := buildMockDataSource(opt)
  1060  	dataSource2 := buildMockDataSource(opt)
  1061  
  1062  	dataSource1.prepareChunks()
  1063  	benchmarkBuildHashBlock(b, casTest, dataSource1, dataSource2, true)
  1064  	b.ResetTimer()
  1065  	for i := 0; i < b.N; i++ {
  1066  		benchmarkBuildHashBlock(b, casTest, dataSource1, dataSource2, false)
  1067  	}
  1068  }
  1069  
  1070  func benchmarkBuildHashBlock(b *testing.B, casTest *hashJoinTestCase, dataSource1, dataSource2 *mockDataSource, testResult bool) {
  1071  	b.StopTimer()
  1072  	exec := prepare4HashJoin(casTest, dataSource1, dataSource2)
  1073  	tmpCtx := context.Background()
  1074  	if err := exec.Open(tmpCtx); err != nil {
  1075  		b.Fatal(err)
  1076  	}
  1077  	exec.prepared = true
  1078  
  1079  	innerResultCh := make(chan *chunk.Chunk, len(dataSource1.chunks))
  1080  	for _, chk := range dataSource1.chunks {
  1081  		innerResultCh <- chk
  1082  	}
  1083  	close(innerResultCh)
  1084  
  1085  	b.StartTimer()
  1086  	if err := exec.buildHashBlockForList(innerResultCh); err != nil {
  1087  		b.Fatal(err)
  1088  	}
  1089  
  1090  	if testResult {
  1091  		time.Sleep(200 * time.Millisecond)
  1092  		if exec.rowContainer.alreadySpilledSafeForTest() != casTest.disk {
  1093  			b.Fatal("wrong usage with disk")
  1094  		}
  1095  	}
  1096  
  1097  	if err := exec.Close(); err != nil {
  1098  		b.Fatal(err)
  1099  	}
  1100  	b.StopTimer()
  1101  }
  1102  
  1103  func BenchmarkBuildHashBlockForList(b *testing.B) {
  1104  	lvl := log.GetLevel()
  1105  	log.SetLevel(zapembedded.ErrorLevel)
  1106  	defer log.SetLevel(lvl)
  1107  
  1108  	defcaus := []*types.FieldType{
  1109  		types.NewFieldType(allegrosql.TypeLonglong),
  1110  		types.NewFieldType(allegrosql.TypeVarString),
  1111  	}
  1112  
  1113  	b.ReportAllocs()
  1114  	cas := defaultHashJoinTestCase(defcaus, 0, false)
  1115  	rows := []int{10, 100000}
  1116  	keyIdxs := [][]int{{0, 1}, {0}}
  1117  	disks := []bool{false, true}
  1118  	for _, event := range rows {
  1119  		for _, keyIdx := range keyIdxs {
  1120  			for _, disk := range disks {
  1121  				cas.rows = event
  1122  				cas.keyIdx = keyIdx
  1123  				cas.disk = disk
  1124  				b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
  1125  					benchmarkBuildHashBlockForList(b, cas)
  1126  				})
  1127  			}
  1128  		}
  1129  	}
  1130  }
  1131  
  1132  type indexJoinTestCase struct {
  1133  	outerEvents     int
  1134  	innerEvents     int
  1135  	concurrency     int
  1136  	ctx             stochastikctx.Context
  1137  	outerJoinKeyIdx []int
  1138  	innerJoinKeyIdx []int
  1139  	innerIdx        []int
  1140  	needOuterSort   bool
  1141  	rawData         string
  1142  }
  1143  
  1144  func (tc indexJoinTestCase) defCausumns() []*memex.DeferredCauset {
  1145  	return []*memex.DeferredCauset{
  1146  		{Index: 0, RetType: types.NewFieldType(allegrosql.TypeLonglong)},
  1147  		{Index: 1, RetType: types.NewFieldType(allegrosql.TypeDouble)},
  1148  		{Index: 2, RetType: types.NewFieldType(allegrosql.TypeVarString)},
  1149  	}
  1150  }
  1151  
  1152  func defaultIndexJoinTestCase() *indexJoinTestCase {
  1153  	ctx := mock.NewContext()
  1154  	ctx.GetStochastikVars().InitChunkSize = variable.DefInitChunkSize
  1155  	ctx.GetStochastikVars().MaxChunkSize = variable.DefMaxChunkSize
  1156  	ctx.GetStochastikVars().SnapshotTS = 1
  1157  	ctx.GetStochastikVars().StmtCtx.MemTracker = memory.NewTracker(-1, -1)
  1158  	ctx.GetStochastikVars().StmtCtx.DiskTracker = disk.NewTracker(-1, -1)
  1159  	tc := &indexJoinTestCase{
  1160  		outerEvents:     100000,
  1161  		innerEvents:     variable.DefMaxChunkSize * 100,
  1162  		concurrency:     4,
  1163  		ctx:             ctx,
  1164  		outerJoinKeyIdx: []int{0, 1},
  1165  		innerJoinKeyIdx: []int{0, 1},
  1166  		innerIdx:        []int{0, 1},
  1167  		rawData:         wideString,
  1168  	}
  1169  	return tc
  1170  }
  1171  
  1172  func (tc indexJoinTestCase) String() string {
  1173  	return fmt.Sprintf("(outerEvents:%v, innerEvents:%v, concurency:%v, outerJoinKeyIdx: %v, innerJoinKeyIdx: %v, NeedOuterSort:%v)",
  1174  		tc.outerEvents, tc.innerEvents, tc.concurrency, tc.outerJoinKeyIdx, tc.innerJoinKeyIdx, tc.needOuterSort)
  1175  }
  1176  func (tc indexJoinTestCase) getMockDataSourceOptByEvents(rows int) mockDataSourceParameters {
  1177  	return mockDataSourceParameters{
  1178  		schemaReplicant: memex.NewSchema(tc.defCausumns()...),
  1179  		rows:            rows,
  1180  		ctx:             tc.ctx,
  1181  		genDataFunc: func(event int, typ *types.FieldType) interface{} {
  1182  			switch typ.Tp {
  1183  			case allegrosql.TypeLong, allegrosql.TypeLonglong:
  1184  				return int64(event)
  1185  			case allegrosql.TypeDouble:
  1186  				return float64(event)
  1187  			case allegrosql.TypeVarString:
  1188  				return tc.rawData
  1189  			default:
  1190  				panic("not implement")
  1191  			}
  1192  		},
  1193  	}
  1194  }
  1195  
  1196  func prepare4IndexInnerHashJoin(tc *indexJoinTestCase, outerDS *mockDataSource, innerDS *mockDataSource) InterlockingDirectorate {
  1197  	outerDefCauss, innerDefCauss := tc.defCausumns(), tc.defCausumns()
  1198  	joinSchema := memex.NewSchema(outerDefCauss...)
  1199  	joinSchema.Append(innerDefCauss...)
  1200  	leftTypes, rightTypes := retTypes(outerDS), retTypes(innerDS)
  1201  	defaultValues := make([]types.Causet, len(innerDefCauss))
  1202  	defCausLens := make([]int, len(innerDefCauss))
  1203  	for i := range defCausLens {
  1204  		defCausLens[i] = types.UnspecifiedLength
  1205  	}
  1206  	keyOff2IdxOff := make([]int, len(tc.outerJoinKeyIdx))
  1207  	for i := range keyOff2IdxOff {
  1208  		keyOff2IdxOff[i] = i
  1209  	}
  1210  	e := &IndexLookUpJoin{
  1211  		baseInterlockingDirectorate: newBaseInterlockingDirectorate(tc.ctx, joinSchema, 1, outerDS),
  1212  		outerCtx: outerCtx{
  1213  			rowTypes:    leftTypes,
  1214  			keyDefCauss: tc.outerJoinKeyIdx,
  1215  		},
  1216  		innerCtx: innerCtx{
  1217  			readerBuilder: &dataReaderBuilder{Causet: &mockPhysicalIndexReader{e: innerDS}, interlockBuilder: newInterlockingDirectorateBuilder(tc.ctx, nil)},
  1218  			rowTypes:      rightTypes,
  1219  			defCausLens:   defCausLens,
  1220  			keyDefCauss:   tc.innerJoinKeyIdx,
  1221  		},
  1222  		workerWg:          new(sync.WaitGroup),
  1223  		joiner:            newJoiner(tc.ctx, 0, false, defaultValues, nil, leftTypes, rightTypes, nil),
  1224  		isOuterJoin:       false,
  1225  		keyOff2IdxOff:     keyOff2IdxOff,
  1226  		lastDefCausHelper: nil,
  1227  	}
  1228  	e.joinResult = newFirstChunk(e)
  1229  	return e
  1230  }
  1231  
  1232  func prepare4IndexOuterHashJoin(tc *indexJoinTestCase, outerDS *mockDataSource, innerDS *mockDataSource) InterlockingDirectorate {
  1233  	e := prepare4IndexInnerHashJoin(tc, outerDS, innerDS).(*IndexLookUpJoin)
  1234  	idxHash := &IndexNestedLoopHashJoin{IndexLookUpJoin: *e}
  1235  	concurrency := tc.concurrency
  1236  	idxHash.joiners = make([]joiner, concurrency)
  1237  	for i := 0; i < concurrency; i++ {
  1238  		idxHash.joiners[i] = e.joiner.Clone()
  1239  	}
  1240  	return idxHash
  1241  }
  1242  
  1243  func prepare4IndexMergeJoin(tc *indexJoinTestCase, outerDS *mockDataSource, innerDS *mockDataSource) InterlockingDirectorate {
  1244  	outerDefCauss, innerDefCauss := tc.defCausumns(), tc.defCausumns()
  1245  	joinSchema := memex.NewSchema(outerDefCauss...)
  1246  	joinSchema.Append(innerDefCauss...)
  1247  	outerJoinKeys := make([]*memex.DeferredCauset, 0, len(tc.outerJoinKeyIdx))
  1248  	innerJoinKeys := make([]*memex.DeferredCauset, 0, len(tc.innerJoinKeyIdx))
  1249  	for _, keyIdx := range tc.outerJoinKeyIdx {
  1250  		outerJoinKeys = append(outerJoinKeys, outerDefCauss[keyIdx])
  1251  	}
  1252  	for _, keyIdx := range tc.innerJoinKeyIdx {
  1253  		innerJoinKeys = append(innerJoinKeys, innerDefCauss[keyIdx])
  1254  	}
  1255  	leftTypes, rightTypes := retTypes(outerDS), retTypes(innerDS)
  1256  	defaultValues := make([]types.Causet, len(innerDefCauss))
  1257  	defCausLens := make([]int, len(innerDefCauss))
  1258  	for i := range defCausLens {
  1259  		defCausLens[i] = types.UnspecifiedLength
  1260  	}
  1261  	keyOff2IdxOff := make([]int, len(outerJoinKeys))
  1262  	for i := range keyOff2IdxOff {
  1263  		keyOff2IdxOff[i] = i
  1264  	}
  1265  
  1266  	compareFuncs := make([]memex.CompareFunc, 0, len(outerJoinKeys))
  1267  	outerCompareFuncs := make([]memex.CompareFunc, 0, len(outerJoinKeys))
  1268  	for i := range outerJoinKeys {
  1269  		compareFuncs = append(compareFuncs, memex.GetCmpFunction(nil, outerJoinKeys[i], innerJoinKeys[i]))
  1270  		outerCompareFuncs = append(outerCompareFuncs, memex.GetCmpFunction(nil, outerJoinKeys[i], outerJoinKeys[i]))
  1271  	}
  1272  	e := &IndexLookUpMergeJoin{
  1273  		baseInterlockingDirectorate: newBaseInterlockingDirectorate(tc.ctx, joinSchema, 2, outerDS),
  1274  		outerMergeCtx: outerMergeCtx{
  1275  			rowTypes:      leftTypes,
  1276  			keyDefCauss:   tc.outerJoinKeyIdx,
  1277  			joinKeys:      outerJoinKeys,
  1278  			needOuterSort: tc.needOuterSort,
  1279  			compareFuncs:  outerCompareFuncs,
  1280  		},
  1281  		innerMergeCtx: innerMergeCtx{
  1282  			readerBuilder: &dataReaderBuilder{Causet: &mockPhysicalIndexReader{e: innerDS}, interlockBuilder: newInterlockingDirectorateBuilder(tc.ctx, nil)},
  1283  			rowTypes:      rightTypes,
  1284  			joinKeys:      innerJoinKeys,
  1285  			defCausLens:   defCausLens,
  1286  			keyDefCauss:   tc.innerJoinKeyIdx,
  1287  			compareFuncs:  compareFuncs,
  1288  		},
  1289  		workerWg:          new(sync.WaitGroup),
  1290  		isOuterJoin:       false,
  1291  		keyOff2IdxOff:     keyOff2IdxOff,
  1292  		lastDefCausHelper: nil,
  1293  	}
  1294  	concurrency := e.ctx.GetStochastikVars().IndexLookupJoinConcurrency()
  1295  	joiners := make([]joiner, concurrency)
  1296  	for i := 0; i < concurrency; i++ {
  1297  		joiners[i] = newJoiner(tc.ctx, 0, false, defaultValues, nil, leftTypes, rightTypes, nil)
  1298  	}
  1299  	e.joiners = joiners
  1300  	return e
  1301  }
  1302  
  1303  type indexJoinType int8
  1304  
  1305  const (
  1306  	indexInnerHashJoin indexJoinType = iota
  1307  	indexOuterHashJoin
  1308  	indexMergeJoin
  1309  )
  1310  
  1311  func benchmarHoTTexJoinInterDircWithCase(
  1312  	b *testing.B,
  1313  	tc *indexJoinTestCase,
  1314  	outerDS *mockDataSource,
  1315  	innerDS *mockDataSource,
  1316  	execType indexJoinType,
  1317  ) {
  1318  	b.ResetTimer()
  1319  	for i := 0; i < b.N; i++ {
  1320  		b.StopTimer()
  1321  		var exec InterlockingDirectorate
  1322  		switch execType {
  1323  		case indexInnerHashJoin:
  1324  			exec = prepare4IndexInnerHashJoin(tc, outerDS, innerDS)
  1325  		case indexOuterHashJoin:
  1326  			exec = prepare4IndexOuterHashJoin(tc, outerDS, innerDS)
  1327  		case indexMergeJoin:
  1328  			exec = prepare4IndexMergeJoin(tc, outerDS, innerDS)
  1329  		}
  1330  
  1331  		tmpCtx := context.Background()
  1332  		chk := newFirstChunk(exec)
  1333  		outerDS.prepareChunks()
  1334  		innerDS.prepareChunks()
  1335  
  1336  		b.StartTimer()
  1337  		if err := exec.Open(tmpCtx); err != nil {
  1338  			b.Fatal(err)
  1339  		}
  1340  		for {
  1341  			if err := exec.Next(tmpCtx, chk); err != nil {
  1342  				b.Fatal(err)
  1343  			}
  1344  			if chk.NumEvents() == 0 {
  1345  				break
  1346  			}
  1347  		}
  1348  
  1349  		if err := exec.Close(); err != nil {
  1350  			b.Fatal(err)
  1351  		}
  1352  		b.StopTimer()
  1353  	}
  1354  }
  1355  
  1356  func BenchmarHoTTexJoinInterDirc(b *testing.B) {
  1357  	lvl := log.GetLevel()
  1358  	log.SetLevel(zapembedded.ErrorLevel)
  1359  	defer log.SetLevel(lvl)
  1360  
  1361  	b.ReportAllocs()
  1362  	tc := defaultIndexJoinTestCase()
  1363  	outerOpt := tc.getMockDataSourceOptByEvents(tc.outerEvents)
  1364  	innerOpt := tc.getMockDataSourceOptByEvents(tc.innerEvents)
  1365  	outerDS := buildMockDataSourceWithIndex(outerOpt, tc.innerIdx)
  1366  	innerDS := buildMockDataSourceWithIndex(innerOpt, tc.innerIdx)
  1367  
  1368  	tc.needOuterSort = true
  1369  	b.Run(fmt.Sprintf("index merge join need outer sort %v", tc), func(b *testing.B) {
  1370  		benchmarHoTTexJoinInterDircWithCase(b, tc, outerDS, innerDS, indexMergeJoin)
  1371  	})
  1372  
  1373  	tc.needOuterSort = false
  1374  	b.Run(fmt.Sprintf("index merge join %v", tc), func(b *testing.B) {
  1375  		benchmarHoTTexJoinInterDircWithCase(b, tc, outerDS, innerDS, indexMergeJoin)
  1376  	})
  1377  
  1378  	b.Run(fmt.Sprintf("index inner hash join %v", tc), func(b *testing.B) {
  1379  		benchmarHoTTexJoinInterDircWithCase(b, tc, outerDS, innerDS, indexInnerHashJoin)
  1380  	})
  1381  
  1382  	b.Run(fmt.Sprintf("index outer hash join %v", tc), func(b *testing.B) {
  1383  		benchmarHoTTexJoinInterDircWithCase(b, tc, outerDS, innerDS, indexOuterHashJoin)
  1384  	})
  1385  }
  1386  
  1387  type mergeJoinTestCase struct {
  1388  	indexJoinTestCase
  1389  	childrenUsedSchema [][]bool
  1390  }
  1391  
  1392  func prepare4MergeJoin(tc *mergeJoinTestCase, leftInterDirc, rightInterDirc *mockDataSource) *MergeJoinInterDirc {
  1393  	outerDefCauss, innerDefCauss := tc.defCausumns(), tc.defCausumns()
  1394  
  1395  	joinSchema := memex.NewSchema()
  1396  	if tc.childrenUsedSchema != nil {
  1397  		for i, used := range tc.childrenUsedSchema[0] {
  1398  			if used {
  1399  				joinSchema.Append(outerDefCauss[i])
  1400  			}
  1401  		}
  1402  		for i, used := range tc.childrenUsedSchema[1] {
  1403  			if used {
  1404  				joinSchema.Append(innerDefCauss[i])
  1405  			}
  1406  		}
  1407  	} else {
  1408  		joinSchema.Append(outerDefCauss...)
  1409  		joinSchema.Append(innerDefCauss...)
  1410  	}
  1411  
  1412  	outerJoinKeys := make([]*memex.DeferredCauset, 0, len(tc.outerJoinKeyIdx))
  1413  	innerJoinKeys := make([]*memex.DeferredCauset, 0, len(tc.innerJoinKeyIdx))
  1414  	for _, keyIdx := range tc.outerJoinKeyIdx {
  1415  		outerJoinKeys = append(outerJoinKeys, outerDefCauss[keyIdx])
  1416  	}
  1417  	for _, keyIdx := range tc.innerJoinKeyIdx {
  1418  		innerJoinKeys = append(innerJoinKeys, innerDefCauss[keyIdx])
  1419  	}
  1420  	compareFuncs := make([]memex.CompareFunc, 0, len(outerJoinKeys))
  1421  	outerCompareFuncs := make([]memex.CompareFunc, 0, len(outerJoinKeys))
  1422  	for i := range outerJoinKeys {
  1423  		compareFuncs = append(compareFuncs, memex.GetCmpFunction(nil, outerJoinKeys[i], innerJoinKeys[i]))
  1424  		outerCompareFuncs = append(outerCompareFuncs, memex.GetCmpFunction(nil, outerJoinKeys[i], outerJoinKeys[i]))
  1425  	}
  1426  
  1427  	defaultValues := make([]types.Causet, len(innerDefCauss))
  1428  
  1429  	// only benchmark inner join
  1430  	e := &MergeJoinInterDirc{
  1431  		stmtCtx:                     tc.ctx.GetStochastikVars().StmtCtx,
  1432  		baseInterlockingDirectorate: newBaseInterlockingDirectorate(tc.ctx, joinSchema, 3, leftInterDirc, rightInterDirc),
  1433  		compareFuncs:                compareFuncs,
  1434  		isOuterJoin:                 false,
  1435  	}
  1436  
  1437  	e.joiner = newJoiner(
  1438  		tc.ctx,
  1439  		0,
  1440  		false,
  1441  		defaultValues,
  1442  		nil,
  1443  		retTypes(leftInterDirc),
  1444  		retTypes(rightInterDirc),
  1445  		tc.childrenUsedSchema,
  1446  	)
  1447  
  1448  	e.innerBlock = &mergeJoinBlock{
  1449  		isInner:    true,
  1450  		childIndex: 1,
  1451  		joinKeys:   innerJoinKeys,
  1452  	}
  1453  
  1454  	e.outerBlock = &mergeJoinBlock{
  1455  		childIndex: 0,
  1456  		filters:    nil,
  1457  		joinKeys:   outerJoinKeys,
  1458  	}
  1459  
  1460  	return e
  1461  }
  1462  
  1463  func defaultMergeJoinTestCase() *mergeJoinTestCase {
  1464  	return &mergeJoinTestCase{*defaultIndexJoinTestCase(), nil}
  1465  }
  1466  
  1467  func newMergeJoinBenchmark(numOuterEvents, numInnerDup, numInnerRedundant int) (tc *mergeJoinTestCase, innerDS, outerDS *mockDataSource) {
  1468  	ctx := mock.NewContext()
  1469  	ctx.GetStochastikVars().InitChunkSize = variable.DefInitChunkSize
  1470  	ctx.GetStochastikVars().MaxChunkSize = variable.DefMaxChunkSize
  1471  	ctx.GetStochastikVars().SnapshotTS = 1
  1472  	ctx.GetStochastikVars().StmtCtx.MemTracker = memory.NewTracker(-1, -1)
  1473  	ctx.GetStochastikVars().StmtCtx.DiskTracker = disk.NewTracker(-1, -1)
  1474  
  1475  	numInnerEvents := numOuterEvents*numInnerDup + numInnerRedundant
  1476  	itc := &indexJoinTestCase{
  1477  		outerEvents:     numOuterEvents,
  1478  		innerEvents:     numInnerEvents,
  1479  		concurrency:     4,
  1480  		ctx:             ctx,
  1481  		outerJoinKeyIdx: []int{0, 1},
  1482  		innerJoinKeyIdx: []int{0, 1},
  1483  		innerIdx:        []int{0, 1},
  1484  		rawData:         wideString,
  1485  	}
  1486  	tc = &mergeJoinTestCase{*itc, nil}
  1487  	outerOpt := mockDataSourceParameters{
  1488  		schemaReplicant: memex.NewSchema(tc.defCausumns()...),
  1489  		rows:            numOuterEvents,
  1490  		ctx:             tc.ctx,
  1491  		genDataFunc: func(event int, typ *types.FieldType) interface{} {
  1492  			switch typ.Tp {
  1493  			case allegrosql.TypeLong, allegrosql.TypeLonglong:
  1494  				return int64(event)
  1495  			case allegrosql.TypeDouble:
  1496  				return float64(event)
  1497  			case allegrosql.TypeVarString:
  1498  				return tc.rawData
  1499  			default:
  1500  				panic("not implement")
  1501  			}
  1502  		},
  1503  	}
  1504  
  1505  	innerOpt := mockDataSourceParameters{
  1506  		schemaReplicant: memex.NewSchema(tc.defCausumns()...),
  1507  		rows:            numInnerEvents,
  1508  		ctx:             tc.ctx,
  1509  		genDataFunc: func(event int, typ *types.FieldType) interface{} {
  1510  			event = event / numInnerDup
  1511  			switch typ.Tp {
  1512  			case allegrosql.TypeLong, allegrosql.TypeLonglong:
  1513  				return int64(event)
  1514  			case allegrosql.TypeDouble:
  1515  				return float64(event)
  1516  			case allegrosql.TypeVarString:
  1517  				return tc.rawData
  1518  			default:
  1519  				panic("not implement")
  1520  			}
  1521  		},
  1522  	}
  1523  
  1524  	innerDS = buildMockDataSource(innerOpt)
  1525  	outerDS = buildMockDataSource(outerOpt)
  1526  
  1527  	return
  1528  }
  1529  
  1530  type mergeJoinType int8
  1531  
  1532  const (
  1533  	innerMergeJoin mergeJoinType = iota
  1534  )
  1535  
  1536  func benchmarkMergeJoinInterDircWithCase(b *testing.B, tc *mergeJoinTestCase, innerDS, outerDS *mockDataSource, joinType mergeJoinType) {
  1537  	b.ResetTimer()
  1538  	for i := 0; i < b.N; i++ {
  1539  		b.StopTimer()
  1540  		var exec InterlockingDirectorate
  1541  		switch joinType {
  1542  		case innerMergeJoin:
  1543  			exec = prepare4MergeJoin(tc, innerDS, outerDS)
  1544  		}
  1545  
  1546  		tmpCtx := context.Background()
  1547  		chk := newFirstChunk(exec)
  1548  		outerDS.prepareChunks()
  1549  		innerDS.prepareChunks()
  1550  
  1551  		b.StartTimer()
  1552  		if err := exec.Open(tmpCtx); err != nil {
  1553  			b.Fatal(err)
  1554  		}
  1555  		for {
  1556  			if err := exec.Next(tmpCtx, chk); err != nil {
  1557  				b.Fatal(err)
  1558  			}
  1559  			if chk.NumEvents() == 0 {
  1560  				break
  1561  			}
  1562  		}
  1563  
  1564  		if err := exec.Close(); err != nil {
  1565  			b.Fatal(err)
  1566  		}
  1567  		b.StopTimer()
  1568  	}
  1569  }
  1570  
  1571  func BenchmarkMergeJoinInterDirc(b *testing.B) {
  1572  	lvl := log.GetLevel()
  1573  	log.SetLevel(zapembedded.ErrorLevel)
  1574  	defer log.SetLevel(lvl)
  1575  	b.ReportAllocs()
  1576  
  1577  	totalEvents := 300000
  1578  
  1579  	innerDupAndRedundant := [][]int{
  1580  		{1, 0},
  1581  		{100, 0},
  1582  		{10000, 0},
  1583  		{1, 30000},
  1584  	}
  1585  
  1586  	childrenUsedSchemas := [][][]bool{
  1587  		nil,
  1588  		{
  1589  			{true, false, false},
  1590  			{false, true, false},
  1591  		},
  1592  	}
  1593  
  1594  	for _, params := range innerDupAndRedundant {
  1595  		numInnerDup, numInnerRedundant := params[0], params[1]
  1596  		for _, childrenUsedSchema := range childrenUsedSchemas {
  1597  			tc, innerDS, outerDS := newMergeJoinBenchmark(totalEvents/numInnerDup, numInnerDup, numInnerRedundant)
  1598  			inlineProj := false
  1599  			if childrenUsedSchema != nil {
  1600  				inlineProj = true
  1601  				tc.childrenUsedSchema = childrenUsedSchema
  1602  			}
  1603  
  1604  			b.Run(fmt.Sprintf("merge join %v InlineProj:%v", tc, inlineProj), func(b *testing.B) {
  1605  				benchmarkMergeJoinInterDircWithCase(b, tc, outerDS, innerDS, innerMergeJoin)
  1606  			})
  1607  		}
  1608  	}
  1609  }
  1610  
  1611  type sortCase struct {
  1612  	rows       int
  1613  	orderByIdx []int
  1614  	ndvs       []int
  1615  	ctx        stochastikctx.Context
  1616  }
  1617  
  1618  func (tc sortCase) defCausumns() []*memex.DeferredCauset {
  1619  	return []*memex.DeferredCauset{
  1620  		{Index: 0, RetType: types.NewFieldType(allegrosql.TypeLonglong)},
  1621  		{Index: 1, RetType: types.NewFieldType(allegrosql.TypeLonglong)},
  1622  	}
  1623  }
  1624  
  1625  func (tc sortCase) String() string {
  1626  	return fmt.Sprintf("(rows:%v, orderBy:%v, ndvs: %v)", tc.rows, tc.orderByIdx, tc.ndvs)
  1627  }
  1628  
  1629  func defaultSortTestCase() *sortCase {
  1630  	ctx := mock.NewContext()
  1631  	ctx.GetStochastikVars().InitChunkSize = variable.DefInitChunkSize
  1632  	ctx.GetStochastikVars().MaxChunkSize = variable.DefMaxChunkSize
  1633  	ctx.GetStochastikVars().StmtCtx.MemTracker = memory.NewTracker(-1, -1)
  1634  	tc := &sortCase{rows: 300000, orderByIdx: []int{0, 1}, ndvs: []int{0, 0}, ctx: ctx}
  1635  	return tc
  1636  }
  1637  
  1638  func benchmarkSortInterDirc(b *testing.B, cas *sortCase) {
  1639  	opt := mockDataSourceParameters{
  1640  		schemaReplicant: memex.NewSchema(cas.defCausumns()...),
  1641  		rows:            cas.rows,
  1642  		ctx:             cas.ctx,
  1643  		ndvs:            cas.ndvs,
  1644  	}
  1645  	dataSource := buildMockDataSource(opt)
  1646  	exec := &SortInterDirc{
  1647  		baseInterlockingDirectorate: newBaseInterlockingDirectorate(cas.ctx, dataSource.schemaReplicant, 4, dataSource),
  1648  		ByItems:                     make([]*soliton.ByItems, 0, len(cas.orderByIdx)),
  1649  		schemaReplicant:             dataSource.schemaReplicant,
  1650  	}
  1651  	for _, idx := range cas.orderByIdx {
  1652  		exec.ByItems = append(exec.ByItems, &soliton.ByItems{Expr: cas.defCausumns()[idx]})
  1653  	}
  1654  	b.ResetTimer()
  1655  	for i := 0; i < b.N; i++ {
  1656  		b.StopTimer()
  1657  		tmpCtx := context.Background()
  1658  		chk := newFirstChunk(exec)
  1659  		dataSource.prepareChunks()
  1660  
  1661  		b.StartTimer()
  1662  		if err := exec.Open(tmpCtx); err != nil {
  1663  			b.Fatal(err)
  1664  		}
  1665  		for {
  1666  			if err := exec.Next(tmpCtx, chk); err != nil {
  1667  				b.Fatal(err)
  1668  			}
  1669  			if chk.NumEvents() == 0 {
  1670  				break
  1671  			}
  1672  		}
  1673  
  1674  		if err := exec.Close(); err != nil {
  1675  			b.Fatal(err)
  1676  		}
  1677  		b.StopTimer()
  1678  	}
  1679  }
  1680  
  1681  func BenchmarkSortInterDirc(b *testing.B) {
  1682  	b.ReportAllocs()
  1683  	cas := defaultSortTestCase()
  1684  	// all random data
  1685  	cas.ndvs = []int{0, 0}
  1686  	cas.orderByIdx = []int{0, 1}
  1687  	b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
  1688  		benchmarkSortInterDirc(b, cas)
  1689  	})
  1690  
  1691  	ndvs := []int{1, 10000}
  1692  	for _, ndv := range ndvs {
  1693  		cas.ndvs = []int{ndv, 0}
  1694  		cas.orderByIdx = []int{0, 1}
  1695  		b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
  1696  			benchmarkSortInterDirc(b, cas)
  1697  		})
  1698  
  1699  		cas.ndvs = []int{ndv, 0}
  1700  		cas.orderByIdx = []int{0}
  1701  		b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
  1702  			benchmarkSortInterDirc(b, cas)
  1703  		})
  1704  
  1705  		cas.ndvs = []int{ndv, 0}
  1706  		cas.orderByIdx = []int{1}
  1707  		b.Run(fmt.Sprintf("%v", cas), func(b *testing.B) {
  1708  			benchmarkSortInterDirc(b, cas)
  1709  		})
  1710  	}
  1711  }