github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/join/join_test.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package join
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"testing"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/common/hashmap"
    23  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    24  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    25  	"github.com/matrixorigin/matrixone/pkg/container/types"
    26  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    27  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    28  	"github.com/matrixorigin/matrixone/pkg/sql/colexec/hashbuild"
    29  	"github.com/matrixorigin/matrixone/pkg/sql/plan/function"
    30  	"github.com/matrixorigin/matrixone/pkg/testutil"
    31  	"github.com/matrixorigin/matrixone/pkg/vm"
    32  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    33  	"github.com/stretchr/testify/require"
    34  )
    35  
    36  const (
    37  	Rows          = 10     // default rows
    38  	BenchmarkRows = 100000 // default rows for benchmark
    39  )
    40  
    41  // add unit tests for cases
    42  type joinTestCase struct {
    43  	arg    *Argument
    44  	flgs   []bool // flgs[i] == true: nullable
    45  	types  []types.Type
    46  	proc   *process.Process
    47  	cancel context.CancelFunc
    48  	barg   *hashbuild.Argument
    49  }
    50  
    51  var (
    52  	tcs []joinTestCase
    53  )
    54  
    55  func init() {
    56  	tcs = []joinTestCase{
    57  		newTestCase([]bool{false}, []types.Type{types.T_int8.ToType()}, []colexec.ResultPos{colexec.NewResultPos(0, 0)},
    58  			[][]*plan.Expr{
    59  				{
    60  					newExpr(0, types.T_int8.ToType()),
    61  				},
    62  				{
    63  					newExpr(0, types.T_int8.ToType()),
    64  				},
    65  			}),
    66  		newTestCase([]bool{true}, []types.Type{types.T_int8.ToType()}, []colexec.ResultPos{colexec.NewResultPos(0, 0), colexec.NewResultPos(1, 0)},
    67  			[][]*plan.Expr{
    68  				{
    69  					newExpr(0, types.T_int8.ToType()),
    70  				},
    71  				{
    72  					newExpr(0, types.T_int8.ToType()),
    73  				},
    74  			}),
    75  	}
    76  }
    77  
    78  func TestString(t *testing.T) {
    79  	buf := new(bytes.Buffer)
    80  	for _, tc := range tcs {
    81  		tc.arg.String(buf)
    82  	}
    83  }
    84  
    85  func TestJoin(t *testing.T) {
    86  	for _, tc := range tcs {
    87  		nb0 := tc.proc.Mp().CurrNB()
    88  		bats := hashBuild(t, tc)
    89  		if jm, ok := bats[0].AuxData.(*hashmap.JoinMap); ok {
    90  			jm.SetDupCount(int64(1))
    91  		}
    92  		err := tc.arg.Prepare(tc.proc)
    93  		require.NoError(t, err)
    94  		tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
    95  		tc.proc.Reg.MergeReceivers[0].Ch <- batch.EmptyBatch
    96  		tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
    97  		tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
    98  		tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
    99  		tc.proc.Reg.MergeReceivers[0].Ch <- nil
   100  		tc.proc.Reg.MergeReceivers[1].Ch <- bats[0]
   101  		tc.proc.Reg.MergeReceivers[1].Ch <- bats[1]
   102  		tc.proc.Reg.MergeReceivers[0].Ch <- nil
   103  		tc.proc.Reg.MergeReceivers[1].Ch <- nil
   104  		for {
   105  			ok, err := tc.arg.Call(tc.proc)
   106  			if ok.Status == vm.ExecStop || err != nil {
   107  				break
   108  			}
   109  		}
   110  		tc.arg.Free(tc.proc, false, nil)
   111  		tc.proc.FreeVectors()
   112  		nb1 := tc.proc.Mp().CurrNB()
   113  		require.Equal(t, nb0, nb1)
   114  	}
   115  }
   116  
   117  /*
   118  func TestLowCardinalityJoin(t *testing.T) {
   119  	tc := newTestCase([]bool{false}, []types.Type{types.T_varchar.ToType()}, []colexec.ResultPos{colexec.NewResultPos(1, 0)},
   120  		[][]*plan.Expr{
   121  			{
   122  				newExpr(0, types.T_varchar.ToType()),
   123  			},
   124  			{
   125  				newExpr(0, types.T_varchar.ToType()),
   126  			},
   127  		})
   128  	tc.arg.Cond = nil // only numeric type can be compared
   129  
   130  	values0 := []string{"a", "b", "a", "c", "b", "c", "a", "a"}
   131  	v0 := testutil.NewVector(len(values0), types.T_varchar.ToType(), tc.proc.Mp(), false, values0)
   132  	constructIndex(t, v0, tc.proc.Mp())
   133  
   134  	// hashbuild
   135  	bat := hashBuildWithBatch(t, tc, testutil.NewBatchWithVectors([]*vector.Vector{v0}, nil))
   136  
   137  	values1 := []string{"c", "d", "c", "c", "b", "a", "b", "d", "a", "b"}
   138  	v1 := testutil.NewVector(len(values1), types.T_varchar.ToType(), tc.proc.Mp(), false, values1)
   139  
   140  	// probe
   141  	// only the join column of right table is indexed
   142  	rbat := probeWithBatches(t, tc, testutil.NewBatchWithVectors([]*vector.Vector{v1}, nil), bat)
   143  
   144  	result := rbat.Vecs[0]
   145  	require.NotNil(t, result.Index())
   146  	resultIdx := result.Index().(*index.LowCardinalityIndex)
   147  	require.Equal(
   148  		t,
   149  		[]uint16{3, 3, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 2},
   150  		vector.MustFixedCol[uint16](resultIdx.GetPoses()),
   151  	)
   152  }
   153  
   154  func TestLowCardinalityIndexesJoin(t *testing.T) {
   155  	tc := newTestCase([]bool{false}, []types.Type{types.T_varchar.ToType()}, []colexec.ResultPos{colexec.NewResultPos(0, 0)},
   156  		[][]*plan.Expr{
   157  			{
   158  				newExpr(0, types.T_varchar.ToType()),
   159  			},
   160  			{
   161  				newExpr(0, types.T_varchar.ToType()),
   162  			},
   163  		})
   164  	tc.arg.Cond = nil // only numeric type can be compared
   165  
   166  	values0 := []string{"a", "b", "a", "c", "b", "c", "a", "a"}
   167  	v0 := testutil.NewVector(len(values0), types.T_varchar.ToType(), tc.proc.Mp(), false, values0)
   168  	constructIndex(t, v0, tc.proc.Mp())
   169  
   170  	// hashbuild
   171  	bat := hashBuildWithBatch(t, tc, testutil.NewBatchWithVectors([]*vector.Vector{v0}, nil))
   172  
   173  	values1 := []string{"c", "d", "c", "c", "b", "a", "b", "d", "a", "b"}
   174  	v1 := testutil.NewVector(len(values1), types.T_varchar.ToType(), tc.proc.Mp(), false, values1)
   175  	constructIndex(t, v1, tc.proc.Mp())
   176  
   177  	// probe
   178  	// the join columns of both left table and right table are indexed
   179  	rbat := probeWithBatches(t, tc, testutil.NewBatchWithVectors([]*vector.Vector{v1}, nil), bat)
   180  
   181  	result := rbat.Vecs[0]
   182  	require.NotNil(t, result.Index())
   183  	resultIdx := result.Index().(*index.LowCardinalityIndex)
   184  	require.Equal(
   185  		t,
   186  		[]uint16{1, 1, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 3, 3},
   187  		vector.MustFixedCol[uint16](resultIdx.GetPoses()),
   188  	)
   189  }
   190  */
   191  
   192  func BenchmarkJoin(b *testing.B) {
   193  	for i := 0; i < b.N; i++ {
   194  		tcs = []joinTestCase{
   195  			newTestCase([]bool{false}, []types.Type{types.T_int8.ToType()}, []colexec.ResultPos{colexec.NewResultPos(0, 0), colexec.NewResultPos(1, 0)},
   196  				[][]*plan.Expr{
   197  					{
   198  						newExpr(0, types.T_int8.ToType()),
   199  					},
   200  					{
   201  						newExpr(0, types.T_int8.ToType()),
   202  					},
   203  				}),
   204  			newTestCase([]bool{true}, []types.Type{types.T_int8.ToType()}, []colexec.ResultPos{colexec.NewResultPos(0, 0), colexec.NewResultPos(1, 0)},
   205  				[][]*plan.Expr{
   206  					{
   207  						newExpr(0, types.T_int8.ToType()),
   208  					},
   209  					{
   210  						newExpr(0, types.T_int8.ToType()),
   211  					},
   212  				}),
   213  		}
   214  		t := new(testing.T)
   215  		for _, tc := range tcs {
   216  			bats := hashBuild(t, tc)
   217  			err := tc.arg.Prepare(tc.proc)
   218  			require.NoError(t, err)
   219  			tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
   220  			tc.proc.Reg.MergeReceivers[0].Ch <- batch.EmptyBatch
   221  			tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
   222  			tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
   223  			tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows)
   224  			tc.proc.Reg.MergeReceivers[0].Ch <- nil
   225  			tc.proc.Reg.MergeReceivers[1].Ch <- bats[0]
   226  			tc.proc.Reg.MergeReceivers[1].Ch <- bats[1]
   227  			tc.proc.Reg.MergeReceivers[0].Ch <- nil
   228  			tc.proc.Reg.MergeReceivers[1].Ch <- nil
   229  			for {
   230  				ok, err := tc.arg.Call(tc.proc)
   231  				if ok.Status == vm.ExecStop || err != nil {
   232  					break
   233  				}
   234  			}
   235  		}
   236  	}
   237  }
   238  
   239  func newExpr(pos int32, typ types.Type) *plan.Expr {
   240  	return &plan.Expr{
   241  		Typ: plan.Type{
   242  			Scale: typ.Scale,
   243  			Width: typ.Width,
   244  			Id:    int32(typ.Oid),
   245  		},
   246  		Expr: &plan.Expr_Col{
   247  			Col: &plan.ColRef{
   248  				ColPos: pos,
   249  			},
   250  		},
   251  	}
   252  }
   253  
   254  func newTestCase(flgs []bool, ts []types.Type, rp []colexec.ResultPos, cs [][]*plan.Expr) joinTestCase {
   255  	proc := testutil.NewProcessWithMPool(mpool.MustNewZero())
   256  	proc.Reg.MergeReceivers = make([]*process.WaitRegister, 2)
   257  	ctx, cancel := context.WithCancel(context.Background())
   258  	proc.Reg.MergeReceivers[0] = &process.WaitRegister{
   259  		Ctx: ctx,
   260  		Ch:  make(chan *batch.Batch, 10),
   261  	}
   262  	proc.Reg.MergeReceivers[1] = &process.WaitRegister{
   263  		Ctx: ctx,
   264  		Ch:  make(chan *batch.Batch, 3),
   265  	}
   266  	fr, _ := function.GetFunctionByName(ctx, "=", ts)
   267  	fid := fr.GetEncodedOverloadID()
   268  	args := make([]*plan.Expr, 0, 2)
   269  	args = append(args, &plan.Expr{
   270  		Typ: plan.Type{
   271  			Id: int32(ts[0].Oid),
   272  		},
   273  		Expr: &plan.Expr_Col{
   274  			Col: &plan.ColRef{
   275  				RelPos: 0,
   276  				ColPos: 0,
   277  			},
   278  		},
   279  	})
   280  	args = append(args, &plan.Expr{
   281  		Typ: plan.Type{
   282  			Id: int32(ts[0].Oid),
   283  		},
   284  		Expr: &plan.Expr_Col{
   285  			Col: &plan.ColRef{
   286  				RelPos: 1,
   287  				ColPos: 0,
   288  			},
   289  		},
   290  	})
   291  	cond := &plan.Expr{
   292  		Typ: plan.Type{
   293  			Id: int32(types.T_bool),
   294  		},
   295  		Expr: &plan.Expr_F{
   296  			F: &plan.Function{
   297  				Args: args,
   298  				Func: &plan.ObjectRef{Obj: fid, ObjName: "="},
   299  			},
   300  		},
   301  	}
   302  	return joinTestCase{
   303  		types:  ts,
   304  		flgs:   flgs,
   305  		proc:   proc,
   306  		cancel: cancel,
   307  		arg: &Argument{
   308  			Typs:       ts,
   309  			Result:     rp,
   310  			Conditions: cs,
   311  			Cond:       cond,
   312  			OperatorBase: vm.OperatorBase{
   313  				OperatorInfo: vm.OperatorInfo{
   314  					Idx:     1,
   315  					IsFirst: false,
   316  					IsLast:  false,
   317  				},
   318  			},
   319  		},
   320  		barg: &hashbuild.Argument{
   321  			Typs:            ts,
   322  			NeedHashMap:     true,
   323  			Conditions:      cs[1],
   324  			NeedMergedBatch: true,
   325  			OperatorBase: vm.OperatorBase{
   326  				OperatorInfo: vm.OperatorInfo{
   327  					Idx:     0,
   328  					IsFirst: false,
   329  					IsLast:  false,
   330  				},
   331  			},
   332  			NeedAllocateSels: true,
   333  		},
   334  	}
   335  }
   336  
   337  func hashBuild(t *testing.T, tc joinTestCase) []*batch.Batch {
   338  	return hashBuildWithBatch(t, tc, newBatch(tc.types, tc.proc, Rows))
   339  }
   340  
   341  func hashBuildWithBatch(t *testing.T, tc joinTestCase, bat *batch.Batch) []*batch.Batch {
   342  	err := tc.barg.Prepare(tc.proc)
   343  	require.NoError(t, err)
   344  	tc.proc.Reg.MergeReceivers[0].Ch <- bat
   345  	for _, r := range tc.proc.Reg.MergeReceivers {
   346  		r.Ch <- nil
   347  	}
   348  	ok1, err := tc.barg.Call(tc.proc)
   349  	require.NoError(t, err)
   350  	require.Equal(t, false, ok1.Status == vm.ExecStop)
   351  	ok2, err := tc.barg.Call(tc.proc)
   352  	require.NoError(t, err)
   353  	require.Equal(t, false, ok2.Status == vm.ExecStop)
   354  	return []*batch.Batch{ok1.Batch, ok2.Batch}
   355  }
   356  
   357  // create a new block based on the type information, flgs[i] == ture: has null
   358  func newBatch(ts []types.Type, proc *process.Process, rows int64) *batch.Batch {
   359  	return testutil.NewBatch(ts, false, int(rows), proc.Mp())
   360  }
   361  
   362  /*
   363  func constructIndex(t *testing.T, v *vector.Vector, m *mpool.MPool) {
   364  	idx, err := index.New(*v.GetType(), m)
   365  	require.NoError(t, err)
   366  
   367  	err = idx.InsertBatch(v)
   368  	require.NoError(t, err)
   369  
   370  	v.SetIndex(idx)
   371  }
   372  */