github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/single/join_test.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package single
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"testing"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/common/hashmap"
    23  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    24  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    25  	"github.com/matrixorigin/matrixone/pkg/container/types"
    26  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    27  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    28  	"github.com/matrixorigin/matrixone/pkg/sql/colexec/hashbuild"
    29  	"github.com/matrixorigin/matrixone/pkg/sql/plan/function"
    30  	"github.com/matrixorigin/matrixone/pkg/testutil"
    31  	"github.com/matrixorigin/matrixone/pkg/vm"
    32  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    33  	"github.com/stretchr/testify/require"
    34  )
    35  
    36  const (
    37  	Rows          = 10     // default rows
    38  	BenchmarkRows = 100000 // default rows for benchmark
    39  )
    40  
    41  // add unit tests for cases
    42  type joinTestCase struct {
    43  	arg    *Argument
    44  	flgs   []bool // flgs[i] == true: nullable
    45  	types  []types.Type
    46  	proc   *process.Process
    47  	cancel context.CancelFunc
    48  	barg   *hashbuild.Argument
    49  }
    50  
    51  var (
    52  	tcs []joinTestCase
    53  )
    54  
    55  func init() {
    56  	tcs = []joinTestCase{
    57  		newTestCase([]bool{false}, []types.Type{types.T_int8.ToType()}, []colexec.ResultPos{colexec.NewResultPos(0, 0)},
    58  			[][]*plan.Expr{
    59  				{
    60  					newExpr(0, types.T_int8.ToType()),
    61  				},
    62  				{
    63  					newExpr(0, types.T_int8.ToType()),
    64  				},
    65  			}),
    66  	}
    67  }
    68  
    69  func TestString(t *testing.T) {
    70  	buf := new(bytes.Buffer)
    71  	for _, tc := range tcs {
    72  		tc.arg.String(buf)
    73  	}
    74  }
    75  
    76  func TestJoin(t *testing.T) {
    77  	for _, tc := range tcs {
    78  		bats := hashBuild(t, tc)
    79  		if jm, ok := bats[0].AuxData.(*hashmap.JoinMap); ok {
    80  			jm.SetDupCount(int64(1))
    81  		}
    82  		err := tc.arg.Prepare(tc.proc)
    83  		require.NoError(t, err)
    84  		tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
    85  		tc.proc.Reg.MergeReceivers[0].Ch <- batch.EmptyBatch
    86  		tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
    87  		tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
    88  		tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
    89  		tc.proc.Reg.MergeReceivers[0].Ch <- nil
    90  		tc.proc.Reg.MergeReceivers[1].Ch <- bats[0]
    91  		tc.proc.Reg.MergeReceivers[1].Ch <- bats[1]
    92  		tc.proc.Reg.MergeReceivers[0].Ch <- nil
    93  		tc.proc.Reg.MergeReceivers[1].Ch <- nil
    94  		for {
    95  			ok, err := tc.arg.Call(tc.proc)
    96  			if ok.Status == vm.ExecStop || err != nil {
    97  				break
    98  			}
    99  		}
   100  		tc.arg.Free(tc.proc, false, nil)
   101  		tc.proc.FreeVectors()
   102  		require.Equal(t, int64(0), tc.proc.Mp().CurrNB())
   103  	}
   104  	for _, tc := range tcs {
   105  		err := tc.arg.Prepare(tc.proc)
   106  		require.NoError(t, err)
   107  		tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
   108  		tc.proc.Reg.MergeReceivers[0].Ch <- batch.EmptyBatch
   109  		tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
   110  		tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
   111  		tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
   112  		tc.proc.Reg.MergeReceivers[0].Ch <- nil
   113  		tc.proc.Reg.MergeReceivers[1].Ch <- nil
   114  		tc.proc.Reg.MergeReceivers[0].Ch <- nil
   115  		tc.proc.Reg.MergeReceivers[1].Ch <- nil
   116  		for {
   117  			ok, err := tc.arg.Call(tc.proc)
   118  			if ok.Status == vm.ExecStop || err != nil {
   119  				break
   120  			}
   121  		}
   122  		tc.arg.Free(tc.proc, false, nil)
   123  		tc.proc.FreeVectors()
   124  		require.Equal(t, int64(0), tc.proc.Mp().CurrNB())
   125  	}
   126  }
   127  
   128  func BenchmarkJoin(b *testing.B) {
   129  	for i := 0; i < b.N; i++ {
   130  		tcs = []joinTestCase{
   131  			newTestCase([]bool{false}, []types.Type{types.T_int8.ToType()}, []colexec.ResultPos{colexec.NewResultPos(0, 0), colexec.NewResultPos(1, 0)},
   132  				[][]*plan.Expr{
   133  					{
   134  						newExpr(0, types.T_int8.ToType()),
   135  					},
   136  					{
   137  						newExpr(0, types.T_int8.ToType()),
   138  					},
   139  				}),
   140  			newTestCase([]bool{true}, []types.Type{types.T_int8.ToType()}, []colexec.ResultPos{colexec.NewResultPos(0, 0), colexec.NewResultPos(1, 0)},
   141  				[][]*plan.Expr{
   142  					{
   143  						newExpr(0, types.T_int8.ToType()),
   144  					},
   145  					{
   146  						newExpr(0, types.T_int8.ToType()),
   147  					},
   148  				}),
   149  		}
   150  		t := new(testing.T)
   151  		for _, tc := range tcs {
   152  			bats := hashBuild(t, tc)
   153  			err := tc.arg.Prepare(tc.proc)
   154  			require.NoError(t, err)
   155  			tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
   156  			tc.proc.Reg.MergeReceivers[0].Ch <- batch.EmptyBatch
   157  			tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
   158  			tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
   159  			tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
   160  			tc.proc.Reg.MergeReceivers[0].Ch <- nil
   161  			tc.proc.Reg.MergeReceivers[1].Ch <- bats[0]
   162  			tc.proc.Reg.MergeReceivers[1].Ch <- bats[1]
   163  			for {
   164  				ok, err := tc.arg.Call(tc.proc)
   165  				if ok.Status == vm.ExecStop || err != nil {
   166  					break
   167  				}
   168  			}
   169  		}
   170  	}
   171  }
   172  
   173  func newExpr(pos int32, typ types.Type) *plan.Expr {
   174  	return &plan.Expr{
   175  		Typ: plan.Type{
   176  			Scale: typ.Scale,
   177  			Width: typ.Width,
   178  			Id:    int32(typ.Oid),
   179  		},
   180  		Expr: &plan.Expr_Col{
   181  			Col: &plan.ColRef{
   182  				ColPos: pos,
   183  			},
   184  		},
   185  	}
   186  }
   187  
   188  func newTestCase(flgs []bool, ts []types.Type, rp []colexec.ResultPos, cs [][]*plan.Expr) joinTestCase {
   189  	proc := testutil.NewProcessWithMPool(mpool.MustNewZero())
   190  	proc.Reg.MergeReceivers = make([]*process.WaitRegister, 2)
   191  	ctx, cancel := context.WithCancel(context.Background())
   192  	proc.Reg.MergeReceivers[0] = &process.WaitRegister{
   193  		Ctx: ctx,
   194  		Ch:  make(chan *batch.Batch, 10),
   195  	}
   196  	proc.Reg.MergeReceivers[1] = &process.WaitRegister{
   197  		Ctx: ctx,
   198  		Ch:  make(chan *batch.Batch, 10),
   199  	}
   200  	fr, _ := function.GetFunctionByName(ctx, "=", ts)
   201  	fid := fr.GetEncodedOverloadID()
   202  	args := make([]*plan.Expr, 0, 2)
   203  	args = append(args, &plan.Expr{
   204  		Typ: plan.Type{
   205  			Id: int32(ts[0].Oid),
   206  		},
   207  		Expr: &plan.Expr_Col{
   208  			Col: &plan.ColRef{
   209  				RelPos: 0,
   210  				ColPos: 0,
   211  			},
   212  		},
   213  	})
   214  	args = append(args, &plan.Expr{
   215  		Typ: plan.Type{
   216  			Id: int32(ts[0].Oid),
   217  		},
   218  		Expr: &plan.Expr_Col{
   219  			Col: &plan.ColRef{
   220  				RelPos: 1,
   221  				ColPos: 0,
   222  			},
   223  		},
   224  	})
   225  	cond := &plan.Expr{
   226  		Typ: plan.Type{
   227  			Id: int32(types.T_bool),
   228  		},
   229  		Expr: &plan.Expr_F{
   230  			F: &plan.Function{
   231  				Args: args,
   232  				Func: &plan.ObjectRef{Obj: fid, ObjName: "="},
   233  			},
   234  		},
   235  	}
   236  	return joinTestCase{
   237  		types:  ts,
   238  		flgs:   flgs,
   239  		proc:   proc,
   240  		cancel: cancel,
   241  		arg: &Argument{
   242  			Typs:       ts,
   243  			Result:     rp,
   244  			Conditions: cs,
   245  			Cond:       cond,
   246  			OperatorBase: vm.OperatorBase{
   247  				OperatorInfo: vm.OperatorInfo{
   248  					Idx:     1,
   249  					IsFirst: false,
   250  					IsLast:  false,
   251  				},
   252  			},
   253  		},
   254  		barg: &hashbuild.Argument{
   255  			Typs:        ts,
   256  			NeedHashMap: true,
   257  			Conditions:  cs[1],
   258  			OperatorBase: vm.OperatorBase{
   259  				OperatorInfo: vm.OperatorInfo{
   260  					Idx:     0,
   261  					IsFirst: false,
   262  					IsLast:  false,
   263  				},
   264  			},
   265  			NeedAllocateSels: true,
   266  			NeedMergedBatch:  true,
   267  		},
   268  	}
   269  }
   270  
   271  func hashBuild(t *testing.T, tc joinTestCase) []*batch.Batch {
   272  	err := tc.barg.Prepare(tc.proc)
   273  	require.NoError(t, err)
   274  	tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
   275  	for _, r := range tc.proc.Reg.MergeReceivers {
   276  		r.Ch <- nil
   277  	}
   278  	ok1, err := tc.barg.Call(tc.proc)
   279  	require.NoError(t, err)
   280  	require.Equal(t, false, ok1.Status == vm.ExecStop)
   281  	ok2, err := tc.barg.Call(tc.proc)
   282  	require.NoError(t, err)
   283  	require.Equal(t, false, ok2.Status == vm.ExecStop)
   284  	return []*batch.Batch{ok1.Batch, ok2.Batch}
   285  }
   286  
   287  // create a new block based on the type information, flgs[i] == ture: has null
   288  func newBatch(ts []types.Type, proc *process.Process, rows int64) *batch.Batch {
   289  	return testutil.NewBatch(ts, false, int(rows), proc.Mp())
   290  }