github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/join/join_test.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package join 16 17 import ( 18 "bytes" 19 "context" 20 "testing" 21 22 "github.com/matrixorigin/matrixone/pkg/common/hashmap" 23 "github.com/matrixorigin/matrixone/pkg/common/mpool" 24 "github.com/matrixorigin/matrixone/pkg/container/batch" 25 "github.com/matrixorigin/matrixone/pkg/container/types" 26 "github.com/matrixorigin/matrixone/pkg/pb/plan" 27 "github.com/matrixorigin/matrixone/pkg/sql/colexec" 28 "github.com/matrixorigin/matrixone/pkg/sql/colexec/hashbuild" 29 "github.com/matrixorigin/matrixone/pkg/sql/plan/function" 30 "github.com/matrixorigin/matrixone/pkg/testutil" 31 "github.com/matrixorigin/matrixone/pkg/vm" 32 "github.com/matrixorigin/matrixone/pkg/vm/process" 33 "github.com/stretchr/testify/require" 34 ) 35 36 const ( 37 Rows = 10 // default rows 38 BenchmarkRows = 100000 // default rows for benchmark 39 ) 40 41 // add unit tests for cases 42 type joinTestCase struct { 43 arg *Argument 44 flgs []bool // flgs[i] == true: nullable 45 types []types.Type 46 proc *process.Process 47 cancel context.CancelFunc 48 barg *hashbuild.Argument 49 } 50 51 var ( 52 tcs []joinTestCase 53 ) 54 55 func init() { 56 tcs = []joinTestCase{ 57 newTestCase([]bool{false}, []types.Type{types.T_int8.ToType()}, []colexec.ResultPos{colexec.NewResultPos(0, 0)}, 58 [][]*plan.Expr{ 59 { 60 newExpr(0, types.T_int8.ToType()), 61 }, 62 { 63 newExpr(0, types.T_int8.ToType()), 64 }, 65 }), 66 newTestCase([]bool{true}, []types.Type{types.T_int8.ToType()}, []colexec.ResultPos{colexec.NewResultPos(0, 0), colexec.NewResultPos(1, 0)}, 67 [][]*plan.Expr{ 68 { 69 newExpr(0, types.T_int8.ToType()), 70 }, 71 { 72 newExpr(0, types.T_int8.ToType()), 73 }, 74 }), 75 } 76 } 77 78 func TestString(t *testing.T) { 79 buf := new(bytes.Buffer) 80 for _, tc := range tcs { 81 tc.arg.String(buf) 82 } 83 } 84 85 func TestJoin(t *testing.T) { 86 for _, tc := range tcs { 87 nb0 := tc.proc.Mp().CurrNB() 88 bats := hashBuild(t, tc) 89 if jm, ok := bats[0].AuxData.(*hashmap.JoinMap); ok { 90 jm.SetDupCount(int64(1)) 91 } 92 err := tc.arg.Prepare(tc.proc) 93 require.NoError(t, err) 94 tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows) 95 tc.proc.Reg.MergeReceivers[0].Ch <- batch.EmptyBatch 96 tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows) 97 tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows) 98 tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows) 99 tc.proc.Reg.MergeReceivers[0].Ch <- nil 100 tc.proc.Reg.MergeReceivers[1].Ch <- bats[0] 101 tc.proc.Reg.MergeReceivers[1].Ch <- bats[1] 102 tc.proc.Reg.MergeReceivers[0].Ch <- nil 103 tc.proc.Reg.MergeReceivers[1].Ch <- nil 104 for { 105 ok, err := tc.arg.Call(tc.proc) 106 if ok.Status == vm.ExecStop || err != nil { 107 break 108 } 109 } 110 tc.arg.Free(tc.proc, false, nil) 111 tc.proc.FreeVectors() 112 nb1 := tc.proc.Mp().CurrNB() 113 require.Equal(t, nb0, nb1) 114 } 115 } 116 117 /* 118 func TestLowCardinalityJoin(t *testing.T) { 119 tc := newTestCase([]bool{false}, []types.Type{types.T_varchar.ToType()}, []colexec.ResultPos{colexec.NewResultPos(1, 0)}, 120 [][]*plan.Expr{ 121 { 122 newExpr(0, types.T_varchar.ToType()), 123 }, 124 { 125 newExpr(0, types.T_varchar.ToType()), 126 }, 127 }) 128 tc.arg.Cond = nil // only numeric type can be compared 129 130 values0 := []string{"a", "b", "a", "c", "b", "c", "a", "a"} 131 v0 := testutil.NewVector(len(values0), types.T_varchar.ToType(), tc.proc.Mp(), false, values0) 132 constructIndex(t, v0, tc.proc.Mp()) 133 134 // hashbuild 135 bat := hashBuildWithBatch(t, tc, testutil.NewBatchWithVectors([]*vector.Vector{v0}, nil)) 136 137 values1 := []string{"c", "d", "c", "c", "b", "a", "b", "d", "a", "b"} 138 v1 := testutil.NewVector(len(values1), types.T_varchar.ToType(), tc.proc.Mp(), false, values1) 139 140 // probe 141 // only the join column of right table is indexed 142 rbat := probeWithBatches(t, tc, testutil.NewBatchWithVectors([]*vector.Vector{v1}, nil), bat) 143 144 result := rbat.Vecs[0] 145 require.NotNil(t, result.Index()) 146 resultIdx := result.Index().(*index.LowCardinalityIndex) 147 require.Equal( 148 t, 149 []uint16{3, 3, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 2}, 150 vector.MustFixedCol[uint16](resultIdx.GetPoses()), 151 ) 152 } 153 154 func TestLowCardinalityIndexesJoin(t *testing.T) { 155 tc := newTestCase([]bool{false}, []types.Type{types.T_varchar.ToType()}, []colexec.ResultPos{colexec.NewResultPos(0, 0)}, 156 [][]*plan.Expr{ 157 { 158 newExpr(0, types.T_varchar.ToType()), 159 }, 160 { 161 newExpr(0, types.T_varchar.ToType()), 162 }, 163 }) 164 tc.arg.Cond = nil // only numeric type can be compared 165 166 values0 := []string{"a", "b", "a", "c", "b", "c", "a", "a"} 167 v0 := testutil.NewVector(len(values0), types.T_varchar.ToType(), tc.proc.Mp(), false, values0) 168 constructIndex(t, v0, tc.proc.Mp()) 169 170 // hashbuild 171 bat := hashBuildWithBatch(t, tc, testutil.NewBatchWithVectors([]*vector.Vector{v0}, nil)) 172 173 values1 := []string{"c", "d", "c", "c", "b", "a", "b", "d", "a", "b"} 174 v1 := testutil.NewVector(len(values1), types.T_varchar.ToType(), tc.proc.Mp(), false, values1) 175 constructIndex(t, v1, tc.proc.Mp()) 176 177 // probe 178 // the join columns of both left table and right table are indexed 179 rbat := probeWithBatches(t, tc, testutil.NewBatchWithVectors([]*vector.Vector{v1}, nil), bat) 180 181 result := rbat.Vecs[0] 182 require.NotNil(t, result.Index()) 183 resultIdx := result.Index().(*index.LowCardinalityIndex) 184 require.Equal( 185 t, 186 []uint16{1, 1, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 3, 3}, 187 vector.MustFixedCol[uint16](resultIdx.GetPoses()), 188 ) 189 } 190 */ 191 192 func BenchmarkJoin(b *testing.B) { 193 for i := 0; i < b.N; i++ { 194 tcs = []joinTestCase{ 195 newTestCase([]bool{false}, []types.Type{types.T_int8.ToType()}, []colexec.ResultPos{colexec.NewResultPos(0, 0), colexec.NewResultPos(1, 0)}, 196 [][]*plan.Expr{ 197 { 198 newExpr(0, types.T_int8.ToType()), 199 }, 200 { 201 newExpr(0, types.T_int8.ToType()), 202 }, 203 }), 204 newTestCase([]bool{true}, []types.Type{types.T_int8.ToType()}, []colexec.ResultPos{colexec.NewResultPos(0, 0), colexec.NewResultPos(1, 0)}, 205 [][]*plan.Expr{ 206 { 207 newExpr(0, types.T_int8.ToType()), 208 }, 209 { 210 newExpr(0, types.T_int8.ToType()), 211 }, 212 }), 213 } 214 t := new(testing.T) 215 for _, tc := range tcs { 216 bats := hashBuild(t, tc) 217 err := tc.arg.Prepare(tc.proc) 218 require.NoError(t, err) 219 tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows) 220 tc.proc.Reg.MergeReceivers[0].Ch <- batch.EmptyBatch 221 tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows) 222 tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows) 223 tc.proc.Reg.MergeReceivers[0].Ch <- newBatch(tc.types, tc.proc, Rows) 224 tc.proc.Reg.MergeReceivers[0].Ch <- nil 225 tc.proc.Reg.MergeReceivers[1].Ch <- bats[0] 226 tc.proc.Reg.MergeReceivers[1].Ch <- bats[1] 227 tc.proc.Reg.MergeReceivers[0].Ch <- nil 228 tc.proc.Reg.MergeReceivers[1].Ch <- nil 229 for { 230 ok, err := tc.arg.Call(tc.proc) 231 if ok.Status == vm.ExecStop || err != nil { 232 break 233 } 234 } 235 } 236 } 237 } 238 239 func newExpr(pos int32, typ types.Type) *plan.Expr { 240 return &plan.Expr{ 241 Typ: plan.Type{ 242 Scale: typ.Scale, 243 Width: typ.Width, 244 Id: int32(typ.Oid), 245 }, 246 Expr: &plan.Expr_Col{ 247 Col: &plan.ColRef{ 248 ColPos: pos, 249 }, 250 }, 251 } 252 } 253 254 func newTestCase(flgs []bool, ts []types.Type, rp []colexec.ResultPos, cs [][]*plan.Expr) joinTestCase { 255 proc := testutil.NewProcessWithMPool(mpool.MustNewZero()) 256 proc.Reg.MergeReceivers = make([]*process.WaitRegister, 2) 257 ctx, cancel := context.WithCancel(context.Background()) 258 proc.Reg.MergeReceivers[0] = &process.WaitRegister{ 259 Ctx: ctx, 260 Ch: make(chan *batch.Batch, 10), 261 } 262 proc.Reg.MergeReceivers[1] = &process.WaitRegister{ 263 Ctx: ctx, 264 Ch: make(chan *batch.Batch, 3), 265 } 266 fr, _ := function.GetFunctionByName(ctx, "=", ts) 267 fid := fr.GetEncodedOverloadID() 268 args := make([]*plan.Expr, 0, 2) 269 args = append(args, &plan.Expr{ 270 Typ: plan.Type{ 271 Id: int32(ts[0].Oid), 272 }, 273 Expr: &plan.Expr_Col{ 274 Col: &plan.ColRef{ 275 RelPos: 0, 276 ColPos: 0, 277 }, 278 }, 279 }) 280 args = append(args, &plan.Expr{ 281 Typ: plan.Type{ 282 Id: int32(ts[0].Oid), 283 }, 284 Expr: &plan.Expr_Col{ 285 Col: &plan.ColRef{ 286 RelPos: 1, 287 ColPos: 0, 288 }, 289 }, 290 }) 291 cond := &plan.Expr{ 292 Typ: plan.Type{ 293 Id: int32(types.T_bool), 294 }, 295 Expr: &plan.Expr_F{ 296 F: &plan.Function{ 297 Args: args, 298 Func: &plan.ObjectRef{Obj: fid, ObjName: "="}, 299 }, 300 }, 301 } 302 return joinTestCase{ 303 types: ts, 304 flgs: flgs, 305 proc: proc, 306 cancel: cancel, 307 arg: &Argument{ 308 Typs: ts, 309 Result: rp, 310 Conditions: cs, 311 Cond: cond, 312 OperatorBase: vm.OperatorBase{ 313 OperatorInfo: vm.OperatorInfo{ 314 Idx: 1, 315 IsFirst: false, 316 IsLast: false, 317 }, 318 }, 319 }, 320 barg: &hashbuild.Argument{ 321 Typs: ts, 322 NeedHashMap: true, 323 Conditions: cs[1], 324 NeedMergedBatch: true, 325 OperatorBase: vm.OperatorBase{ 326 OperatorInfo: vm.OperatorInfo{ 327 Idx: 0, 328 IsFirst: false, 329 IsLast: false, 330 }, 331 }, 332 NeedAllocateSels: true, 333 }, 334 } 335 } 336 337 func hashBuild(t *testing.T, tc joinTestCase) []*batch.Batch { 338 return hashBuildWithBatch(t, tc, newBatch(tc.types, tc.proc, Rows)) 339 } 340 341 func hashBuildWithBatch(t *testing.T, tc joinTestCase, bat *batch.Batch) []*batch.Batch { 342 err := tc.barg.Prepare(tc.proc) 343 require.NoError(t, err) 344 tc.proc.Reg.MergeReceivers[0].Ch <- bat 345 for _, r := range tc.proc.Reg.MergeReceivers { 346 r.Ch <- nil 347 } 348 ok1, err := tc.barg.Call(tc.proc) 349 require.NoError(t, err) 350 require.Equal(t, false, ok1.Status == vm.ExecStop) 351 ok2, err := tc.barg.Call(tc.proc) 352 require.NoError(t, err) 353 require.Equal(t, false, ok2.Status == vm.ExecStop) 354 return []*batch.Batch{ok1.Batch, ok2.Batch} 355 } 356 357 // create a new block based on the type information, flgs[i] == ture: has null 358 func newBatch(ts []types.Type, proc *process.Process, rows int64) *batch.Batch { 359 return testutil.NewBatch(ts, false, int(rows), proc.Mp()) 360 } 361 362 /* 363 func constructIndex(t *testing.T, v *vector.Vector, m *mpool.MPool) { 364 idx, err := index.New(*v.GetType(), m) 365 require.NoError(t, err) 366 367 err = idx.InsertBatch(v) 368 require.NoError(t, err) 369 370 v.SetIndex(idx) 371 } 372 */