github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/interlock/index_lookup_hash_join.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package interlock 15 16 import ( 17 "context" 18 "fmt" 19 "hash" 20 "hash/fnv" 21 "runtime/trace" 22 "sync" 23 "sync/atomic" 24 "time" 25 26 "github.com/whtcorpsinc/errors" 27 "github.com/whtcorpsinc/failpoint" 28 causetembedded "github.com/whtcorpsinc/milevadb/causet/embedded" 29 "github.com/whtcorpsinc/milevadb/memex" 30 "github.com/whtcorpsinc/milevadb/soliton" 31 "github.com/whtcorpsinc/milevadb/soliton/chunk" 32 "github.com/whtcorpsinc/milevadb/soliton/codec" 33 "github.com/whtcorpsinc/milevadb/soliton/memory" 34 "github.com/whtcorpsinc/milevadb/soliton/ranger" 35 ) 36 37 // numResChkHold indicates the number of resource chunks that an inner worker 38 // holds at the same time. 39 // It's used in 2 cases individually: 40 // 1. IndexMergeJoin 41 // 2. IndexNestedLoopHashJoin: 42 // It's used when IndexNestedLoopHashJoin.keepOuterOrder is true. 43 // Otherwise, there will be at most `concurrency` resource chunks throughout 44 // the execution of IndexNestedLoopHashJoin. 45 const numResChkHold = 4 46 47 // IndexNestedLoopHashJoin employs one outer worker and N inner workers to 48 // execute concurrently. The output order is not promised. 49 // 50 // The execution flow is very similar to IndexLookUpReader: 51 // 1. The outer worker reads N outer rows, builds a task and sends it to the 52 // inner worker channel. 53 // 2. The inner worker receives the tasks and does 3 things for every task: 54 // 1. builds hash causet from the outer rows 55 // 2. builds key ranges from outer rows and fetches inner rows 56 // 3. probes the hash causet and sends the join result to the main thread channel. 57 // Note: step 1 and step 2 runs concurrently. 58 // 3. The main thread receives the join results. 59 type IndexNestedLoopHashJoin struct { 60 IndexLookUpJoin 61 resultCh chan *indexHashJoinResult 62 joinChkResourceCh []chan *chunk.Chunk 63 // We build individual joiner for each inner worker when using chunk-based 64 // execution, to avoid the concurrency of joiner.chk and joiner.selected. 65 joiners []joiner 66 keepOuterOrder bool 67 curTask *indexHashJoinTask 68 // taskCh is only used when `keepOuterOrder` is true. 69 taskCh chan *indexHashJoinTask 70 71 stats *indexLookUpJoinRuntimeStats 72 } 73 74 type indexHashJoinOuterWorker struct { 75 outerWorker 76 innerCh chan *indexHashJoinTask 77 keepOuterOrder bool 78 // taskCh is only used when the outer order needs to be promised. 79 taskCh chan *indexHashJoinTask 80 } 81 82 type indexHashJoinInnerWorker struct { 83 innerWorker 84 matchedOuterPtrs []chunk.EventPtr 85 joiner joiner 86 joinChkResourceCh chan *chunk.Chunk 87 // resultCh is valid only when indexNestedLoopHashJoin do not need to keep 88 // order. Otherwise, it will be nil. 89 resultCh chan *indexHashJoinResult 90 taskCh <-chan *indexHashJoinTask 91 wg *sync.WaitGroup 92 joinKeyBuf []byte 93 outerEventStatus []outerEventStatusFlag 94 } 95 96 type indexHashJoinResult struct { 97 chk *chunk.Chunk 98 err error 99 src chan<- *chunk.Chunk 100 } 101 102 type indexHashJoinTask struct { 103 *lookUpJoinTask 104 outerEventStatus [][]outerEventStatusFlag 105 lookupMap baseHashBlock 106 err error 107 keepOuterOrder bool 108 // resultCh is only used when the outer order needs to be promised. 109 resultCh chan *indexHashJoinResult 110 // matchedInnerEventPtrs is only valid when the outer order needs to be 111 // promised. Otherwise, it will be nil. 112 // len(matchedInnerEventPtrs) equals to 113 // lookUpJoinTask.outerResult.NumChunks(), and the elements of every 114 // matchedInnerEventPtrs[chkIdx][rowIdx] indicates the matched inner event ptrs 115 // of the corresponding outer event. 116 matchedInnerEventPtrs [][][]chunk.EventPtr 117 } 118 119 // Open implements the IndexNestedLoopHashJoin InterlockingDirectorate interface. 120 func (e *IndexNestedLoopHashJoin) Open(ctx context.Context) error { 121 // Be careful, very dirty replog in this line!!! 122 // IndexLookUpJoin need to rebuild interlock (the dataReaderBuilder) during 123 // executing. However `interlock.Next()` is lazy evaluation when the RecordSet 124 // result is drained. 125 // Lazy evaluation means the saved stochastik context may change during interlock's 126 // building and its running. 127 // A specific sequence for example: 128 // 129 // e := buildInterlockingDirectorate() // txn at build time 130 // recordSet := runStmt(e) 131 // stochastik.CommitTxn() // txn closed 132 // recordSet.Next() 133 // e.dataReaderBuilder.Build() // txn is used again, which is already closed 134 // 135 // The trick here is `getSnapshotTS` will cache snapshot ts in the dataReaderBuilder, 136 // so even txn is destroyed later, the dataReaderBuilder could still use the 137 // cached snapshot ts to construct PosetDag. 138 _, err := e.innerCtx.readerBuilder.getSnapshotTS() 139 if err != nil { 140 return err 141 } 142 143 err = e.children[0].Open(ctx) 144 if err != nil { 145 return err 146 } 147 e.memTracker = memory.NewTracker(e.id, -1) 148 e.memTracker.AttachTo(e.ctx.GetStochastikVars().StmtCtx.MemTracker) 149 e.innerPtrBytes = make([][]byte, 0, 8) 150 if e.runtimeStats != nil { 151 e.stats = &indexLookUpJoinRuntimeStats{} 152 e.ctx.GetStochastikVars().StmtCtx.RuntimeStatsDefCausl.RegisterStats(e.id, e.stats) 153 } 154 e.startWorkers(ctx) 155 return nil 156 } 157 158 func (e *IndexNestedLoopHashJoin) startWorkers(ctx context.Context) { 159 concurrency := e.ctx.GetStochastikVars().IndexLookupJoinConcurrency() 160 if e.stats != nil { 161 e.stats.concurrency = concurrency 162 } 163 workerCtx, cancelFunc := context.WithCancel(ctx) 164 e.cancelFunc = cancelFunc 165 innerCh := make(chan *indexHashJoinTask, concurrency) 166 if e.keepOuterOrder { 167 e.taskCh = make(chan *indexHashJoinTask, concurrency) 168 } 169 e.workerWg.Add(1) 170 ow := e.newOuterWorker(innerCh) 171 go soliton.WithRecovery(func() { ow.run(workerCtx) }, e.finishJoinWorkers) 172 173 if !e.keepOuterOrder { 174 e.resultCh = make(chan *indexHashJoinResult, concurrency) 175 } else { 176 // When `keepOuterOrder` is true, each task holds their own `resultCh` 177 // individually, thus we do not need a global resultCh. 178 e.resultCh = nil 179 } 180 e.joinChkResourceCh = make([]chan *chunk.Chunk, concurrency) 181 for i := 0; i < concurrency; i++ { 182 if !e.keepOuterOrder { 183 e.joinChkResourceCh[i] = make(chan *chunk.Chunk, 1) 184 e.joinChkResourceCh[i] <- newFirstChunk(e) 185 } else { 186 e.joinChkResourceCh[i] = make(chan *chunk.Chunk, numResChkHold) 187 for j := 0; j < numResChkHold; j++ { 188 e.joinChkResourceCh[i] <- newFirstChunk(e) 189 } 190 } 191 } 192 193 e.workerWg.Add(concurrency) 194 for i := 0; i < concurrency; i++ { 195 workerID := i 196 go soliton.WithRecovery(func() { e.newInnerWorker(innerCh, workerID).run(workerCtx, cancelFunc) }, e.finishJoinWorkers) 197 } 198 go e.wait4JoinWorkers() 199 } 200 201 func (e *IndexNestedLoopHashJoin) finishJoinWorkers(r interface{}) { 202 if r != nil { 203 e.resultCh <- &indexHashJoinResult{ 204 err: errors.New(fmt.Sprintf("%v", r)), 205 } 206 if e.cancelFunc != nil { 207 e.cancelFunc() 208 } 209 } 210 e.workerWg.Done() 211 } 212 213 func (e *IndexNestedLoopHashJoin) wait4JoinWorkers() { 214 e.workerWg.Wait() 215 if e.resultCh != nil { 216 close(e.resultCh) 217 } 218 if e.taskCh != nil { 219 close(e.taskCh) 220 } 221 } 222 223 // Next implements the IndexNestedLoopHashJoin InterlockingDirectorate interface. 224 func (e *IndexNestedLoopHashJoin) Next(ctx context.Context, req *chunk.Chunk) error { 225 req.Reset() 226 if e.keepOuterOrder { 227 return e.runInOrder(ctx, req) 228 } 229 // unordered run 230 var ( 231 result *indexHashJoinResult 232 ok bool 233 ) 234 select { 235 case result, ok = <-e.resultCh: 236 if !ok { 237 return nil 238 } 239 if result.err != nil { 240 return result.err 241 } 242 case <-ctx.Done(): 243 return ctx.Err() 244 } 245 req.SwapDeferredCausets(result.chk) 246 result.src <- result.chk 247 return nil 248 } 249 250 func (e *IndexNestedLoopHashJoin) runInOrder(ctx context.Context, req *chunk.Chunk) error { 251 var ( 252 result *indexHashJoinResult 253 ok bool 254 ) 255 for { 256 if e.isDryUpTasks(ctx) { 257 return nil 258 } 259 select { 260 case result, ok = <-e.curTask.resultCh: 261 if !ok { 262 e.curTask = nil 263 continue 264 } 265 if result.err != nil { 266 return result.err 267 } 268 case <-ctx.Done(): 269 return ctx.Err() 270 } 271 req.SwapDeferredCausets(result.chk) 272 result.src <- result.chk 273 return nil 274 } 275 } 276 277 // isDryUpTasks indicates whether all the tasks have been processed. 278 func (e *IndexNestedLoopHashJoin) isDryUpTasks(ctx context.Context) bool { 279 if e.curTask != nil { 280 return false 281 } 282 var ok bool 283 select { 284 case e.curTask, ok = <-e.taskCh: 285 if !ok { 286 return true 287 } 288 case <-ctx.Done(): 289 return true 290 } 291 return false 292 } 293 294 // Close implements the IndexNestedLoopHashJoin InterlockingDirectorate interface. 295 func (e *IndexNestedLoopHashJoin) Close() error { 296 if e.cancelFunc != nil { 297 e.cancelFunc() 298 e.cancelFunc = nil 299 } 300 if e.resultCh != nil { 301 for range e.resultCh { 302 } 303 e.resultCh = nil 304 } 305 if e.taskCh != nil { 306 for range e.taskCh { 307 } 308 e.taskCh = nil 309 } 310 for i := range e.joinChkResourceCh { 311 close(e.joinChkResourceCh[i]) 312 } 313 e.joinChkResourceCh = nil 314 return e.baseInterlockingDirectorate.Close() 315 } 316 317 func (ow *indexHashJoinOuterWorker) run(ctx context.Context) { 318 defer trace.StartRegion(ctx, "IndexHashJoinOuterWorker").End() 319 defer close(ow.innerCh) 320 for { 321 task, err := ow.buildTask(ctx) 322 failpoint.Inject("testIndexHashJoinOuterWorkerErr", func() { 323 err = errors.New("mocHoTTexHashJoinOuterWorkerErr") 324 }) 325 if err != nil { 326 task = &indexHashJoinTask{err: err} 327 if ow.keepOuterOrder { 328 task.keepOuterOrder, task.resultCh = true, make(chan *indexHashJoinResult, 1) 329 ow.pushToChan(ctx, task, ow.taskCh) 330 } 331 ow.pushToChan(ctx, task, ow.innerCh) 332 return 333 } 334 if task == nil { 335 return 336 } 337 if finished := ow.pushToChan(ctx, task, ow.innerCh); finished { 338 return 339 } 340 if ow.keepOuterOrder { 341 if finished := ow.pushToChan(ctx, task, ow.taskCh); finished { 342 return 343 } 344 } 345 } 346 } 347 348 func (ow *indexHashJoinOuterWorker) buildTask(ctx context.Context) (*indexHashJoinTask, error) { 349 task, err := ow.outerWorker.buildTask(ctx) 350 if task == nil || err != nil { 351 return nil, err 352 } 353 var ( 354 resultCh chan *indexHashJoinResult 355 matchedInnerEventPtrs [][][]chunk.EventPtr 356 ) 357 if ow.keepOuterOrder { 358 resultCh = make(chan *indexHashJoinResult, numResChkHold) 359 matchedInnerEventPtrs = make([][][]chunk.EventPtr, task.outerResult.NumChunks()) 360 for i := range matchedInnerEventPtrs { 361 matchedInnerEventPtrs[i] = make([][]chunk.EventPtr, task.outerResult.GetChunk(i).NumEvents()) 362 } 363 } 364 numChks := task.outerResult.NumChunks() 365 outerEventStatus := make([][]outerEventStatusFlag, numChks) 366 for i := 0; i < numChks; i++ { 367 outerEventStatus[i] = make([]outerEventStatusFlag, task.outerResult.GetChunk(i).NumEvents()) 368 } 369 return &indexHashJoinTask{ 370 lookUpJoinTask: task, 371 outerEventStatus: outerEventStatus, 372 keepOuterOrder: ow.keepOuterOrder, 373 resultCh: resultCh, 374 matchedInnerEventPtrs: matchedInnerEventPtrs, 375 }, nil 376 } 377 378 func (ow *indexHashJoinOuterWorker) pushToChan(ctx context.Context, task *indexHashJoinTask, dst chan<- *indexHashJoinTask) bool { 379 select { 380 case <-ctx.Done(): 381 return true 382 case dst <- task: 383 } 384 return false 385 } 386 387 func (e *IndexNestedLoopHashJoin) newOuterWorker(innerCh chan *indexHashJoinTask) *indexHashJoinOuterWorker { 388 ow := &indexHashJoinOuterWorker{ 389 outerWorker: outerWorker{ 390 outerCtx: e.outerCtx, 391 ctx: e.ctx, 392 interlock: e.children[0], 393 batchSize: 32, 394 maxBatchSize: e.ctx.GetStochastikVars().IndexJoinBatchSize, 395 parentMemTracker: e.memTracker, 396 lookup: &e.IndexLookUpJoin, 397 }, 398 innerCh: innerCh, 399 keepOuterOrder: e.keepOuterOrder, 400 taskCh: e.taskCh, 401 } 402 return ow 403 } 404 405 func (e *IndexNestedLoopHashJoin) newInnerWorker(taskCh chan *indexHashJoinTask, workerID int) *indexHashJoinInnerWorker { 406 // Since multiple inner workers run concurrently, we should copy join's indexRanges for every worker to avoid data race. 407 copiedRanges := make([]*ranger.Range, 0, len(e.indexRanges)) 408 for _, ran := range e.indexRanges { 409 copiedRanges = append(copiedRanges, ran.Clone()) 410 } 411 var innerStats *innerWorkerRuntimeStats 412 if e.stats != nil { 413 innerStats = &e.stats.innerWorker 414 } 415 iw := &indexHashJoinInnerWorker{ 416 innerWorker: innerWorker{ 417 innerCtx: e.innerCtx, 418 outerCtx: e.outerCtx, 419 ctx: e.ctx, 420 interlockChk: chunk.NewChunkWithCapacity(e.innerCtx.rowTypes, e.maxChunkSize), 421 indexRanges: copiedRanges, 422 keyOff2IdxOff: e.keyOff2IdxOff, 423 stats: innerStats, 424 }, 425 taskCh: taskCh, 426 joiner: e.joiners[workerID], 427 joinChkResourceCh: e.joinChkResourceCh[workerID], 428 resultCh: e.resultCh, 429 matchedOuterPtrs: make([]chunk.EventPtr, 0, e.maxChunkSize), 430 joinKeyBuf: make([]byte, 1), 431 outerEventStatus: make([]outerEventStatusFlag, 0, e.maxChunkSize), 432 } 433 if e.lastDefCausHelper != nil { 434 // nextCwf.TmpConstant needs to be reset for every individual 435 // inner worker to avoid data race when the inner workers is running 436 // concurrently. 437 nextCwf := *e.lastDefCausHelper 438 nextCwf.TmpConstant = make([]*memex.Constant, len(e.lastDefCausHelper.TmpConstant)) 439 for i := range e.lastDefCausHelper.TmpConstant { 440 nextCwf.TmpConstant[i] = &memex.Constant{RetType: nextCwf.TargetDefCaus.RetType} 441 } 442 iw.nextDefCausCompareFilters = &nextCwf 443 } 444 return iw 445 } 446 447 func (iw *indexHashJoinInnerWorker) run(ctx context.Context, cancelFunc context.CancelFunc) { 448 defer trace.StartRegion(ctx, "IndexHashJoinInnerWorker").End() 449 var task *indexHashJoinTask 450 joinResult, ok := iw.getNewJoinResult(ctx) 451 if !ok { 452 cancelFunc() 453 return 454 } 455 h, resultCh := fnv.New64(), iw.resultCh 456 for { 457 select { 458 case <-ctx.Done(): 459 return 460 case task, ok = <-iw.taskCh: 461 } 462 if !ok { 463 break 464 } 465 // We need to init resultCh before the err is returned. 466 if task.keepOuterOrder { 467 resultCh = task.resultCh 468 } 469 if task.err != nil { 470 joinResult.err = task.err 471 break 472 } 473 err := iw.handleTask(ctx, task, joinResult, h, resultCh) 474 if err != nil { 475 joinResult.err = err 476 break 477 } 478 if task.keepOuterOrder { 479 // We need to get a new result holder here because the old 480 // `joinResult` hash been sent to the `resultCh` or to the 481 // `joinChkResourceCh`. 482 joinResult, ok = iw.getNewJoinResult(ctx) 483 if !ok { 484 cancelFunc() 485 return 486 } 487 } 488 } 489 failpoint.Inject("testIndexHashJoinInnerWorkerErr", func() { 490 joinResult.err = errors.New("mocHoTTexHashJoinInnerWorkerErr") 491 }) 492 if joinResult.err != nil { 493 resultCh <- joinResult 494 return 495 } 496 // When task.keepOuterOrder is TRUE(resultCh != iw.resultCh), the last 497 // joinResult will be checked when the a task has been processed, thus we do 498 // not need to check it here again. 499 if resultCh == iw.resultCh && joinResult.chk != nil && joinResult.chk.NumEvents() > 0 { 500 select { 501 case resultCh <- joinResult: 502 case <-ctx.Done(): 503 return 504 } 505 } 506 } 507 508 func (iw *indexHashJoinInnerWorker) getNewJoinResult(ctx context.Context) (*indexHashJoinResult, bool) { 509 joinResult := &indexHashJoinResult{ 510 src: iw.joinChkResourceCh, 511 } 512 ok := true 513 select { 514 case joinResult.chk, ok = <-iw.joinChkResourceCh: 515 case <-ctx.Done(): 516 return nil, false 517 } 518 return joinResult, ok 519 } 520 521 func (iw *indexHashJoinInnerWorker) buildHashBlockForOuterResult(ctx context.Context, task *indexHashJoinTask, h hash.Hash64) { 522 if iw.stats != nil { 523 start := time.Now() 524 defer func() { 525 atomic.AddInt64(&iw.stats.build, int64(time.Since(start))) 526 }() 527 } 528 buf, numChks := make([]byte, 1), task.outerResult.NumChunks() 529 task.lookupMap = newUnsafeHashBlock(task.outerResult.Len()) 530 for chkIdx := 0; chkIdx < numChks; chkIdx++ { 531 chk := task.outerResult.GetChunk(chkIdx) 532 numEvents := chk.NumEvents() 533 OUTER: 534 for rowIdx := 0; rowIdx < numEvents; rowIdx++ { 535 if task.outerMatch != nil && !task.outerMatch[chkIdx][rowIdx] { 536 continue 537 } 538 event := chk.GetEvent(rowIdx) 539 keyDefCausIdx := iw.outerCtx.keyDefCauss 540 for _, i := range keyDefCausIdx { 541 if event.IsNull(i) { 542 continue OUTER 543 } 544 } 545 h.Reset() 546 err := codec.HashChunkEvent(iw.ctx.GetStochastikVars().StmtCtx, h, event, iw.outerCtx.rowTypes, keyDefCausIdx, buf) 547 failpoint.Inject("testIndexHashJoinBuildErr", func() { 548 err = errors.New("mocHoTTexHashJoinBuildErr") 549 }) 550 if err != nil { 551 // This panic will be recovered by the invoker. 552 panic(err.Error()) 553 } 554 rowPtr := chunk.EventPtr{ChkIdx: uint32(chkIdx), EventIdx: uint32(rowIdx)} 555 task.lookupMap.Put(h.Sum64(), rowPtr) 556 } 557 } 558 } 559 560 func (iw *indexHashJoinInnerWorker) fetchInnerResults(ctx context.Context, task *lookUpJoinTask) error { 561 lookUpContents, err := iw.constructLookupContent(task) 562 if err != nil { 563 return err 564 } 565 return iw.innerWorker.fetchInnerResults(ctx, task, lookUpContents) 566 } 567 568 func (iw *indexHashJoinInnerWorker) handleHashJoinInnerWorkerPanic(r interface{}) { 569 if r != nil { 570 iw.resultCh <- &indexHashJoinResult{err: errors.Errorf("%v", r)} 571 } 572 iw.wg.Done() 573 } 574 575 func (iw *indexHashJoinInnerWorker) handleTask(ctx context.Context, task *indexHashJoinTask, joinResult *indexHashJoinResult, h hash.Hash64, resultCh chan *indexHashJoinResult) error { 576 var joinStartTime time.Time 577 if iw.stats != nil { 578 start := time.Now() 579 defer func() { 580 endTime := time.Now() 581 atomic.AddInt64(&iw.stats.totalTime, int64(endTime.Sub(start))) 582 atomic.AddInt64(&iw.stats.join, int64(endTime.Sub(joinStartTime))) 583 }() 584 } 585 586 iw.wg = &sync.WaitGroup{} 587 iw.wg.Add(1) 588 // TODO(XuHuaiyu): we may always use the smaller side to build the hashblock. 589 go soliton.WithRecovery(func() { iw.buildHashBlockForOuterResult(ctx, task, h) }, iw.handleHashJoinInnerWorkerPanic) 590 err := iw.fetchInnerResults(ctx, task.lookUpJoinTask) 591 if err != nil { 592 return err 593 } 594 iw.wg.Wait() 595 596 joinStartTime = time.Now() 597 if !task.keepOuterOrder { 598 return iw.doJoinUnordered(ctx, task, joinResult, h, resultCh) 599 } 600 return iw.doJoinInOrder(ctx, task, joinResult, h, resultCh) 601 } 602 603 func (iw *indexHashJoinInnerWorker) doJoinUnordered(ctx context.Context, task *indexHashJoinTask, joinResult *indexHashJoinResult, h hash.Hash64, resultCh chan *indexHashJoinResult) error { 604 var ok bool 605 iter := chunk.NewIterator4List(task.innerResult) 606 for event := iter.Begin(); event != iter.End(); event = iter.Next() { 607 ok, joinResult = iw.joinMatchedInnerEvent2Chunk(ctx, event, task, joinResult, h, iw.joinKeyBuf) 608 if !ok { 609 return errors.New("indexHashJoinInnerWorker.doJoinUnordered failed") 610 } 611 } 612 for chkIdx, outerEventStatus := range task.outerEventStatus { 613 chk := task.outerResult.GetChunk(chkIdx) 614 for rowIdx, val := range outerEventStatus { 615 if val == outerEventMatched { 616 continue 617 } 618 iw.joiner.onMissMatch(val == outerEventHasNull, chk.GetEvent(rowIdx), joinResult.chk) 619 if joinResult.chk.IsFull() { 620 select { 621 case resultCh <- joinResult: 622 case <-ctx.Done(): 623 return ctx.Err() 624 } 625 joinResult, ok = iw.getNewJoinResult(ctx) 626 if !ok { 627 return errors.New("indexHashJoinInnerWorker.doJoinUnordered failed") 628 } 629 } 630 } 631 } 632 return nil 633 } 634 635 func (iw *indexHashJoinInnerWorker) getMatchedOuterEvents(innerEvent chunk.Event, task *indexHashJoinTask, h hash.Hash64, buf []byte) (matchedEvents []chunk.Event, matchedEventPtr []chunk.EventPtr, err error) { 636 h.Reset() 637 err = codec.HashChunkEvent(iw.ctx.GetStochastikVars().StmtCtx, h, innerEvent, iw.rowTypes, iw.keyDefCauss, buf) 638 if err != nil { 639 return nil, nil, err 640 } 641 iw.matchedOuterPtrs = task.lookupMap.Get(h.Sum64()) 642 if len(iw.matchedOuterPtrs) == 0 { 643 return nil, nil, nil 644 } 645 joinType := JoinerType(iw.joiner) 646 isSemiJoin := joinType == causetembedded.SemiJoin || joinType == causetembedded.LeftOuterSemiJoin 647 matchedEvents = make([]chunk.Event, 0, len(iw.matchedOuterPtrs)) 648 matchedEventPtr = make([]chunk.EventPtr, 0, len(iw.matchedOuterPtrs)) 649 for _, ptr := range iw.matchedOuterPtrs { 650 outerEvent := task.outerResult.GetEvent(ptr) 651 ok, err := codec.EqualChunkEvent(iw.ctx.GetStochastikVars().StmtCtx, innerEvent, iw.rowTypes, iw.keyDefCauss, outerEvent, iw.outerCtx.rowTypes, iw.outerCtx.keyDefCauss) 652 if err != nil { 653 return nil, nil, err 654 } 655 if !ok || (task.outerEventStatus[ptr.ChkIdx][ptr.EventIdx] == outerEventMatched && isSemiJoin) { 656 continue 657 } 658 matchedEvents = append(matchedEvents, outerEvent) 659 matchedEventPtr = append(matchedEventPtr, chunk.EventPtr{ChkIdx: ptr.ChkIdx, EventIdx: ptr.EventIdx}) 660 } 661 return matchedEvents, matchedEventPtr, nil 662 } 663 664 func (iw *indexHashJoinInnerWorker) joinMatchedInnerEvent2Chunk(ctx context.Context, innerEvent chunk.Event, task *indexHashJoinTask, 665 joinResult *indexHashJoinResult, h hash.Hash64, buf []byte) (bool, *indexHashJoinResult) { 666 matchedOuterEvents, matchedOuterEventPtr, err := iw.getMatchedOuterEvents(innerEvent, task, h, buf) 667 if err != nil { 668 joinResult.err = err 669 return false, joinResult 670 } 671 if len(matchedOuterEvents) == 0 { 672 return true, joinResult 673 } 674 var ( 675 ok bool 676 iter = chunk.NewIterator4Slice(matchedOuterEvents) 677 cursor = 0 678 ) 679 for iter.Begin(); iter.Current() != iter.End(); { 680 iw.outerEventStatus, err = iw.joiner.tryToMatchOuters(iter, innerEvent, joinResult.chk, iw.outerEventStatus) 681 if err != nil { 682 joinResult.err = err 683 return false, joinResult 684 } 685 for _, status := range iw.outerEventStatus { 686 chkIdx, rowIdx := matchedOuterEventPtr[cursor].ChkIdx, matchedOuterEventPtr[cursor].EventIdx 687 if status == outerEventMatched || task.outerEventStatus[chkIdx][rowIdx] == outerEventUnmatched { 688 task.outerEventStatus[chkIdx][rowIdx] = status 689 } 690 cursor++ 691 } 692 if joinResult.chk.IsFull() { 693 select { 694 case iw.resultCh <- joinResult: 695 case <-ctx.Done(): 696 } 697 joinResult, ok = iw.getNewJoinResult(ctx) 698 if !ok { 699 return false, joinResult 700 } 701 } 702 } 703 return true, joinResult 704 } 705 706 func (iw *indexHashJoinInnerWorker) defCauslectMatchedInnerPtrs4OuterEvents(ctx context.Context, innerEvent chunk.Event, innerEventPtr chunk.EventPtr, 707 task *indexHashJoinTask, h hash.Hash64, buf []byte) error { 708 _, matchedOuterEventIdx, err := iw.getMatchedOuterEvents(innerEvent, task, h, buf) 709 if err != nil { 710 return err 711 } 712 for _, outerEventPtr := range matchedOuterEventIdx { 713 chkIdx, rowIdx := outerEventPtr.ChkIdx, outerEventPtr.EventIdx 714 task.matchedInnerEventPtrs[chkIdx][rowIdx] = append(task.matchedInnerEventPtrs[chkIdx][rowIdx], innerEventPtr) 715 } 716 return nil 717 } 718 719 // doJoinInOrder follows the following steps: 720 // 1. defCauslect all the matched inner event ptrs for every outer event 721 // 2. do the join work 722 // 2.1 defCauslect all the matched inner rows using the defCauslected ptrs for every outer event 723 // 2.2 call tryToMatchInners for every outer event 724 // 2.3 call onMissMatch when no inner rows are matched 725 func (iw *indexHashJoinInnerWorker) doJoinInOrder(ctx context.Context, task *indexHashJoinTask, joinResult *indexHashJoinResult, h hash.Hash64, resultCh chan *indexHashJoinResult) (err error) { 726 defer func() { 727 if err == nil && joinResult.chk != nil { 728 if joinResult.chk.NumEvents() > 0 { 729 select { 730 case resultCh <- joinResult: 731 case <-ctx.Done(): 732 return 733 } 734 } else { 735 joinResult.src <- joinResult.chk 736 } 737 } 738 close(resultCh) 739 }() 740 for i, numChunks := 0, task.innerResult.NumChunks(); i < numChunks; i++ { 741 for j, chk := 0, task.innerResult.GetChunk(i); j < chk.NumEvents(); j++ { 742 event := chk.GetEvent(j) 743 ptr := chunk.EventPtr{ChkIdx: uint32(i), EventIdx: uint32(j)} 744 err = iw.defCauslectMatchedInnerPtrs4OuterEvents(ctx, event, ptr, task, h, iw.joinKeyBuf) 745 if err != nil { 746 return err 747 } 748 } 749 } 750 // TODO: matchedInnerEventPtrs and matchedInnerEvents can be moved to inner worker. 751 matchedInnerEvents := make([]chunk.Event, len(task.matchedInnerEventPtrs)) 752 var hasMatched, hasNull, ok bool 753 for chkIdx, innerEventPtrs4Chk := range task.matchedInnerEventPtrs { 754 for outerEventIdx, innerEventPtrs := range innerEventPtrs4Chk { 755 matchedInnerEvents, hasMatched, hasNull = matchedInnerEvents[:0], false, false 756 outerEvent := task.outerResult.GetChunk(chkIdx).GetEvent(outerEventIdx) 757 for _, ptr := range innerEventPtrs { 758 matchedInnerEvents = append(matchedInnerEvents, task.innerResult.GetEvent(ptr)) 759 } 760 iter := chunk.NewIterator4Slice(matchedInnerEvents) 761 for iter.Begin(); iter.Current() != iter.End(); { 762 matched, isNull, err := iw.joiner.tryToMatchInners(outerEvent, iter, joinResult.chk) 763 if err != nil { 764 return err 765 } 766 hasMatched, hasNull = matched || hasMatched, isNull || hasNull 767 if joinResult.chk.IsFull() { 768 select { 769 case resultCh <- joinResult: 770 case <-ctx.Done(): 771 return ctx.Err() 772 } 773 joinResult, ok = iw.getNewJoinResult(ctx) 774 if !ok { 775 return errors.New("indexHashJoinInnerWorker.doJoinInOrder failed") 776 } 777 } 778 } 779 if !hasMatched { 780 iw.joiner.onMissMatch(hasNull, outerEvent, joinResult.chk) 781 } 782 } 783 } 784 return nil 785 }