github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/interlock/index_lookup_merge_join.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package interlock 15 16 import ( 17 "context" 18 "fmt" 19 "runtime" 20 "runtime/trace" 21 "sort" 22 "sync" 23 "sync/atomic" 24 25 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 26 "github.com/whtcorpsinc/BerolinaSQL/terror" 27 "github.com/whtcorpsinc/errors" 28 "github.com/whtcorpsinc/failpoint" 29 causetembedded "github.com/whtcorpsinc/milevadb/causet/embedded" 30 "github.com/whtcorpsinc/milevadb/memex" 31 "github.com/whtcorpsinc/milevadb/soliton/chunk" 32 "github.com/whtcorpsinc/milevadb/soliton/execdetails" 33 "github.com/whtcorpsinc/milevadb/soliton/logutil" 34 "github.com/whtcorpsinc/milevadb/soliton/memory" 35 "github.com/whtcorpsinc/milevadb/soliton/ranger" 36 "github.com/whtcorpsinc/milevadb/stochastikctx" 37 "github.com/whtcorpsinc/milevadb/types" 38 "go.uber.org/zap" 39 ) 40 41 // IndexLookUpMergeJoin realizes IndexLookUpJoin by merge join 42 // It preserves the order of the outer causet and support batch lookup. 43 // 44 // The execution flow is very similar to IndexLookUpReader: 45 // 1. outerWorker read N outer rows, build a task and send it to result channel and inner worker channel. 46 // 2. The innerWorker receives the task, builds key ranges from outer rows and fetch inner rows, then do merge join. 47 // 3. main thread receives the task and fetch results from the channel in task one by one. 48 // 4. If channel has been closed, main thread receives the next task. 49 type IndexLookUpMergeJoin struct { 50 baseInterlockingDirectorate 51 52 resultCh <-chan *lookUpMergeJoinTask 53 cancelFunc context.CancelFunc 54 workerWg *sync.WaitGroup 55 56 outerMergeCtx outerMergeCtx 57 innerMergeCtx innerMergeCtx 58 59 joiners []joiner 60 joinChkResourceCh []chan *chunk.Chunk 61 isOuterJoin bool 62 63 requiredEvents int64 64 65 task *lookUpMergeJoinTask 66 67 indexRanges []*ranger.Range 68 keyOff2IdxOff []int 69 70 // lastDefCausHelper causetstore the information for last defCaus if there's complicated filter like defCaus > x_defCaus and defCaus < x_defCaus + 100. 71 lastDefCausHelper *causetembedded.DefCausWithCmpFuncManager 72 73 memTracker *memory.Tracker // track memory usage 74 } 75 76 type outerMergeCtx struct { 77 rowTypes []*types.FieldType 78 joinKeys []*memex.DeferredCauset 79 keyDefCauss []int 80 filter memex.CNFExprs 81 needOuterSort bool 82 compareFuncs []memex.CompareFunc 83 } 84 85 type innerMergeCtx struct { 86 readerBuilder *dataReaderBuilder 87 rowTypes []*types.FieldType 88 joinKeys []*memex.DeferredCauset 89 keyDefCauss []int 90 compareFuncs []memex.CompareFunc 91 defCausLens []int 92 desc bool 93 keyOff2KeyOffOrderByIdx []int 94 } 95 96 type lookUpMergeJoinTask struct { 97 outerResult *chunk.List 98 outerOrderIdx []chunk.EventPtr 99 100 innerResult *chunk.Chunk 101 innerIter chunk.Iterator 102 103 sameKeyInnerEvents []chunk.Event 104 sameKeyIter chunk.Iterator 105 106 doneErr error 107 results chan *indexMergeJoinResult 108 109 memTracker *memory.Tracker 110 } 111 112 type outerMergeWorker struct { 113 outerMergeCtx 114 115 lookup *IndexLookUpMergeJoin 116 117 ctx stochastikctx.Context 118 interlock InterlockingDirectorate 119 120 maxBatchSize int 121 batchSize int 122 123 nextDefCausCompareFilters *causetembedded.DefCausWithCmpFuncManager 124 125 resultCh chan<- *lookUpMergeJoinTask 126 innerCh chan<- *lookUpMergeJoinTask 127 128 parentMemTracker *memory.Tracker 129 } 130 131 type innerMergeWorker struct { 132 innerMergeCtx 133 134 taskCh <-chan *lookUpMergeJoinTask 135 joinChkResourceCh chan *chunk.Chunk 136 outerMergeCtx outerMergeCtx 137 ctx stochastikctx.Context 138 innerInterDirc InterlockingDirectorate 139 joiner joiner 140 retFieldTypes []*types.FieldType 141 142 maxChunkSize int 143 indexRanges []*ranger.Range 144 nextDefCausCompareFilters *causetembedded.DefCausWithCmpFuncManager 145 keyOff2IdxOff []int 146 } 147 148 type indexMergeJoinResult struct { 149 chk *chunk.Chunk 150 src chan<- *chunk.Chunk 151 } 152 153 // Open implements the InterlockingDirectorate interface 154 func (e *IndexLookUpMergeJoin) Open(ctx context.Context) error { 155 // Be careful, very dirty replog in this line!!! 156 // IndexLookMergeUpJoin need to rebuild interlock (the dataReaderBuilder) during 157 // executing. However `interlock.Next()` is lazy evaluation when the RecordSet 158 // result is drained. 159 // Lazy evaluation means the saved stochastik context may change during interlock's 160 // building and its running. 161 // A specific sequence for example: 162 // 163 // e := buildInterlockingDirectorate() // txn at build time 164 // recordSet := runStmt(e) 165 // stochastik.CommitTxn() // txn closed 166 // recordSet.Next() 167 // e.dataReaderBuilder.Build() // txn is used again, which is already closed 168 // 169 // The trick here is `getSnapshotTS` will cache snapshot ts in the dataReaderBuilder, 170 // so even txn is destroyed later, the dataReaderBuilder could still use the 171 // cached snapshot ts to construct PosetDag. 172 _, err := e.innerMergeCtx.readerBuilder.getSnapshotTS() 173 if err != nil { 174 return err 175 } 176 177 err = e.children[0].Open(ctx) 178 if err != nil { 179 return err 180 } 181 e.memTracker = memory.NewTracker(e.id, -1) 182 e.memTracker.AttachTo(e.ctx.GetStochastikVars().StmtCtx.MemTracker) 183 e.startWorkers(ctx) 184 return nil 185 } 186 187 func (e *IndexLookUpMergeJoin) startWorkers(ctx context.Context) { 188 // TODO: consider another stochastik currency variable for index merge join. 189 // Because its parallelization is not complete. 190 concurrency := e.ctx.GetStochastikVars().IndexLookupJoinConcurrency() 191 resultCh := make(chan *lookUpMergeJoinTask, concurrency) 192 e.resultCh = resultCh 193 e.joinChkResourceCh = make([]chan *chunk.Chunk, concurrency) 194 for i := 0; i < concurrency; i++ { 195 e.joinChkResourceCh[i] = make(chan *chunk.Chunk, numResChkHold) 196 for j := 0; j < numResChkHold; j++ { 197 e.joinChkResourceCh[i] <- chunk.NewChunkWithCapacity(e.retFieldTypes, e.maxChunkSize) 198 } 199 } 200 workerCtx, cancelFunc := context.WithCancel(ctx) 201 e.cancelFunc = cancelFunc 202 innerCh := make(chan *lookUpMergeJoinTask, concurrency) 203 e.workerWg.Add(1) 204 go e.newOuterWorker(resultCh, innerCh).run(workerCtx, e.workerWg, e.cancelFunc) 205 e.workerWg.Add(concurrency) 206 for i := 0; i < concurrency; i++ { 207 go e.newInnerMergeWorker(innerCh, i).run(workerCtx, e.workerWg, e.cancelFunc) 208 } 209 } 210 211 func (e *IndexLookUpMergeJoin) newOuterWorker(resultCh, innerCh chan *lookUpMergeJoinTask) *outerMergeWorker { 212 omw := &outerMergeWorker{ 213 outerMergeCtx: e.outerMergeCtx, 214 ctx: e.ctx, 215 lookup: e, 216 interlock: e.children[0], 217 resultCh: resultCh, 218 innerCh: innerCh, 219 batchSize: 32, 220 maxBatchSize: e.ctx.GetStochastikVars().IndexJoinBatchSize, 221 parentMemTracker: e.memTracker, 222 nextDefCausCompareFilters: e.lastDefCausHelper, 223 } 224 failpoint.Inject("testIssue18068", func() { 225 omw.batchSize = 1 226 }) 227 return omw 228 } 229 230 func (e *IndexLookUpMergeJoin) newInnerMergeWorker(taskCh chan *lookUpMergeJoinTask, workID int) *innerMergeWorker { 231 // Since multiple inner workers run concurrently, we should copy join's indexRanges for every worker to avoid data race. 232 copiedRanges := make([]*ranger.Range, 0, len(e.indexRanges)) 233 for _, ran := range e.indexRanges { 234 copiedRanges = append(copiedRanges, ran.Clone()) 235 } 236 imw := &innerMergeWorker{ 237 innerMergeCtx: e.innerMergeCtx, 238 outerMergeCtx: e.outerMergeCtx, 239 taskCh: taskCh, 240 ctx: e.ctx, 241 indexRanges: copiedRanges, 242 keyOff2IdxOff: e.keyOff2IdxOff, 243 joiner: e.joiners[workID], 244 joinChkResourceCh: e.joinChkResourceCh[workID], 245 retFieldTypes: e.retFieldTypes, 246 maxChunkSize: e.maxChunkSize, 247 } 248 if e.lastDefCausHelper != nil { 249 // nextCwf.TmpConstant needs to be reset for every individual 250 // inner worker to avoid data race when the inner workers is running 251 // concurrently. 252 nextCwf := *e.lastDefCausHelper 253 nextCwf.TmpConstant = make([]*memex.Constant, len(e.lastDefCausHelper.TmpConstant)) 254 for i := range e.lastDefCausHelper.TmpConstant { 255 nextCwf.TmpConstant[i] = &memex.Constant{RetType: nextCwf.TargetDefCaus.RetType} 256 } 257 imw.nextDefCausCompareFilters = &nextCwf 258 } 259 return imw 260 } 261 262 // Next implements the InterlockingDirectorate interface 263 func (e *IndexLookUpMergeJoin) Next(ctx context.Context, req *chunk.Chunk) error { 264 if e.isOuterJoin { 265 atomic.StoreInt64(&e.requiredEvents, int64(req.RequiredEvents())) 266 } 267 req.Reset() 268 if e.task == nil { 269 e.getFinishedTask(ctx) 270 } 271 for e.task != nil { 272 select { 273 case result, ok := <-e.task.results: 274 if !ok { 275 if e.task.doneErr != nil { 276 return e.task.doneErr 277 } 278 e.getFinishedTask(ctx) 279 continue 280 } 281 req.SwapDeferredCausets(result.chk) 282 result.src <- result.chk 283 return nil 284 case <-ctx.Done(): 285 return ctx.Err() 286 } 287 } 288 289 return nil 290 } 291 292 func (e *IndexLookUpMergeJoin) getFinishedTask(ctx context.Context) { 293 select { 294 case e.task = <-e.resultCh: 295 case <-ctx.Done(): 296 e.task = nil 297 } 298 299 // TODO: reuse the finished task memory to build tasks. 300 } 301 302 func (omw *outerMergeWorker) run(ctx context.Context, wg *sync.WaitGroup, cancelFunc context.CancelFunc) { 303 defer trace.StartRegion(ctx, "IndexLookupMergeJoinOuterWorker").End() 304 defer func() { 305 if r := recover(); r != nil { 306 task := &lookUpMergeJoinTask{ 307 doneErr: errors.New(fmt.Sprintf("%v", r)), 308 results: make(chan *indexMergeJoinResult, numResChkHold), 309 } 310 close(task.results) 311 omw.resultCh <- task 312 cancelFunc() 313 } 314 close(omw.resultCh) 315 close(omw.innerCh) 316 wg.Done() 317 }() 318 for { 319 task, err := omw.buildTask(ctx) 320 if err != nil { 321 task.doneErr = err 322 close(task.results) 323 omw.pushToChan(ctx, task, omw.resultCh) 324 return 325 } 326 failpoint.Inject("mocHoTTexMergeJoinOOMPanic", nil) 327 if task == nil { 328 return 329 } 330 331 if finished := omw.pushToChan(ctx, task, omw.innerCh); finished { 332 return 333 } 334 335 if finished := omw.pushToChan(ctx, task, omw.resultCh); finished { 336 return 337 } 338 } 339 } 340 341 func (omw *outerMergeWorker) pushToChan(ctx context.Context, task *lookUpMergeJoinTask, dst chan<- *lookUpMergeJoinTask) (finished bool) { 342 select { 343 case <-ctx.Done(): 344 return true 345 case dst <- task: 346 } 347 return false 348 } 349 350 // buildTask builds a lookUpMergeJoinTask and read outer rows. 351 // When err is not nil, task must not be nil to send the error to the main thread via task 352 func (omw *outerMergeWorker) buildTask(ctx context.Context) (*lookUpMergeJoinTask, error) { 353 task := &lookUpMergeJoinTask{ 354 results: make(chan *indexMergeJoinResult, numResChkHold), 355 outerResult: chunk.NewList(omw.rowTypes, omw.interlock.base().initCap, omw.interlock.base().maxChunkSize), 356 } 357 task.memTracker = memory.NewTracker(memory.LabelForSimpleTask, -1) 358 task.memTracker.AttachTo(omw.parentMemTracker) 359 360 omw.increaseBatchSize() 361 requiredEvents := omw.batchSize 362 if omw.lookup.isOuterJoin { 363 requiredEvents = int(atomic.LoadInt64(&omw.lookup.requiredEvents)) 364 } 365 if requiredEvents <= 0 || requiredEvents > omw.maxBatchSize { 366 requiredEvents = omw.maxBatchSize 367 } 368 for requiredEvents > 0 { 369 execChk := newFirstChunk(omw.interlock) 370 err := Next(ctx, omw.interlock, execChk) 371 if err != nil { 372 return task, err 373 } 374 if execChk.NumEvents() == 0 { 375 break 376 } 377 378 task.outerResult.Add(execChk) 379 requiredEvents -= execChk.NumEvents() 380 task.memTracker.Consume(execChk.MemoryUsage()) 381 } 382 383 if task.outerResult.Len() == 0 { 384 return nil, nil 385 } 386 387 return task, nil 388 } 389 390 func (omw *outerMergeWorker) increaseBatchSize() { 391 if omw.batchSize < omw.maxBatchSize { 392 omw.batchSize *= 2 393 } 394 if omw.batchSize > omw.maxBatchSize { 395 omw.batchSize = omw.maxBatchSize 396 } 397 } 398 399 func (imw *innerMergeWorker) run(ctx context.Context, wg *sync.WaitGroup, cancelFunc context.CancelFunc) { 400 defer trace.StartRegion(ctx, "IndexLookupMergeJoinInnerWorker").End() 401 var task *lookUpMergeJoinTask 402 defer func() { 403 wg.Done() 404 if r := recover(); r != nil { 405 if task != nil { 406 task.doneErr = errors.Errorf("%v", r) 407 close(task.results) 408 } 409 buf := make([]byte, 4096) 410 stackSize := runtime.Stack(buf, false) 411 buf = buf[:stackSize] 412 logutil.Logger(ctx).Error("innerMergeWorker panicked", zap.String("stack", string(buf))) 413 cancelFunc() 414 } 415 }() 416 417 for ok := true; ok; { 418 select { 419 case task, ok = <-imw.taskCh: 420 if !ok { 421 return 422 } 423 case <-ctx.Done(): 424 return 425 } 426 427 err := imw.handleTask(ctx, task) 428 task.doneErr = err 429 close(task.results) 430 } 431 } 432 433 func (imw *innerMergeWorker) handleTask(ctx context.Context, task *lookUpMergeJoinTask) (err error) { 434 numOuterChks := task.outerResult.NumChunks() 435 var outerMatch [][]bool 436 if imw.outerMergeCtx.filter != nil { 437 outerMatch = make([][]bool, numOuterChks) 438 for i := 0; i < numOuterChks; i++ { 439 chk := task.outerResult.GetChunk(i) 440 outerMatch[i] = make([]bool, chk.NumEvents()) 441 outerMatch[i], err = memex.VectorizedFilter(imw.ctx, imw.outerMergeCtx.filter, chunk.NewIterator4Chunk(chk), outerMatch[i]) 442 if err != nil { 443 return err 444 } 445 } 446 } 447 task.outerOrderIdx = make([]chunk.EventPtr, 0, task.outerResult.Len()) 448 for i := 0; i < numOuterChks; i++ { 449 numEvent := task.outerResult.GetChunk(i).NumEvents() 450 for j := 0; j < numEvent; j++ { 451 if len(outerMatch) == 0 || outerMatch[i][j] { 452 task.outerOrderIdx = append(task.outerOrderIdx, chunk.EventPtr{ChkIdx: uint32(i), EventIdx: uint32(j)}) 453 } 454 } 455 } 456 task.memTracker.Consume(int64(cap(task.outerOrderIdx))) 457 failpoint.Inject("IndexMergeJoinMockOOM", func(val failpoint.Value) { 458 if val.(bool) { 459 panic("OOM test index merge join doesn't hang here.") 460 } 461 }) 462 // needOuterSort means the outer side property items can't guarantee the order of join keys. 463 // Because the necessary condition of merge join is both outer and inner keep order of join keys. 464 // In this case, we need sort the outer side. 465 if imw.outerMergeCtx.needOuterSort { 466 sort.Slice(task.outerOrderIdx, func(i, j int) bool { 467 idxI, idxJ := task.outerOrderIdx[i], task.outerOrderIdx[j] 468 rowI, rowJ := task.outerResult.GetEvent(idxI), task.outerResult.GetEvent(idxJ) 469 var cmp int64 470 var err error 471 for _, keyOff := range imw.keyOff2KeyOffOrderByIdx { 472 joinKey := imw.outerMergeCtx.joinKeys[keyOff] 473 cmp, _, err = imw.outerMergeCtx.compareFuncs[keyOff](imw.ctx, joinKey, joinKey, rowI, rowJ) 474 terror.Log(err) 475 if cmp != 0 { 476 break 477 } 478 } 479 if cmp != 0 || imw.nextDefCausCompareFilters == nil { 480 return (cmp < 0 && !imw.desc) || (cmp > 0 && imw.desc) 481 } 482 cmp = int64(imw.nextDefCausCompareFilters.CompareEvent(rowI, rowJ)) 483 return (cmp < 0 && !imw.desc) || (cmp > 0 && imw.desc) 484 }) 485 } 486 dLookUpKeys, err := imw.constructCausetLookupKeys(task) 487 if err != nil { 488 return err 489 } 490 dLookUpKeys = imw.deduFIDelatumLookUpKeys(dLookUpKeys) 491 // If the order requires descending, the deDupedLookUpContents is keep descending order before. 492 // So at the end, we should generate the ascending deDupedLookUpContents to build the correct range for inner read. 493 if imw.desc { 494 lenKeys := len(dLookUpKeys) 495 for i := 0; i < lenKeys/2; i++ { 496 dLookUpKeys[i], dLookUpKeys[lenKeys-i-1] = dLookUpKeys[lenKeys-i-1], dLookUpKeys[i] 497 } 498 } 499 imw.innerInterDirc, err = imw.readerBuilder.buildInterlockingDirectorateForIndexJoin(ctx, dLookUpKeys, imw.indexRanges, imw.keyOff2IdxOff, imw.nextDefCausCompareFilters) 500 if err != nil { 501 return err 502 } 503 defer terror.Call(imw.innerInterDirc.Close) 504 _, err = imw.fetchNextInnerResult(ctx, task) 505 if err != nil { 506 return err 507 } 508 err = imw.doMergeJoin(ctx, task) 509 return err 510 } 511 512 func (imw *innerMergeWorker) fetchNewChunkWhenFull(ctx context.Context, task *lookUpMergeJoinTask, chk **chunk.Chunk) (continueJoin bool) { 513 if !(*chk).IsFull() { 514 return true 515 } 516 select { 517 case task.results <- &indexMergeJoinResult{*chk, imw.joinChkResourceCh}: 518 case <-ctx.Done(): 519 return false 520 } 521 var ok bool 522 select { 523 case *chk, ok = <-imw.joinChkResourceCh: 524 if !ok { 525 return false 526 } 527 case <-ctx.Done(): 528 return false 529 } 530 (*chk).Reset() 531 return true 532 } 533 534 func (imw *innerMergeWorker) doMergeJoin(ctx context.Context, task *lookUpMergeJoinTask) (err error) { 535 chk := <-imw.joinChkResourceCh 536 defer func() { 537 if chk == nil { 538 return 539 } 540 if chk.NumEvents() > 0 { 541 select { 542 case task.results <- &indexMergeJoinResult{chk, imw.joinChkResourceCh}: 543 case <-ctx.Done(): 544 return 545 } 546 } else { 547 imw.joinChkResourceCh <- chk 548 } 549 }() 550 551 initCmpResult := 1 552 if imw.innerMergeCtx.desc { 553 initCmpResult = -1 554 } 555 noneInnerEventsRemain := task.innerResult.NumEvents() == 0 556 557 for _, outerIdx := range task.outerOrderIdx { 558 outerEvent := task.outerResult.GetEvent(outerIdx) 559 hasMatch, hasNull, cmpResult := false, false, initCmpResult 560 // If it has iterated out all inner rows and the inner rows with same key is empty, 561 // that means the outer event needn't match any inner rows. 562 if noneInnerEventsRemain && len(task.sameKeyInnerEvents) == 0 { 563 goto missMatch 564 } 565 if len(task.sameKeyInnerEvents) > 0 { 566 cmpResult, err = imw.compare(outerEvent, task.sameKeyIter.Begin()) 567 if err != nil { 568 return err 569 } 570 } 571 if (cmpResult > 0 && !imw.innerMergeCtx.desc) || (cmpResult < 0 && imw.innerMergeCtx.desc) { 572 if noneInnerEventsRemain { 573 task.sameKeyInnerEvents = task.sameKeyInnerEvents[:0] 574 goto missMatch 575 } 576 noneInnerEventsRemain, err = imw.fetchInnerEventsWithSameKey(ctx, task, outerEvent) 577 if err != nil { 578 return err 579 } 580 } 581 582 for task.sameKeyIter.Current() != task.sameKeyIter.End() { 583 matched, isNull, err := imw.joiner.tryToMatchInners(outerEvent, task.sameKeyIter, chk) 584 if err != nil { 585 return err 586 } 587 hasMatch = hasMatch || matched 588 hasNull = hasNull || isNull 589 if !imw.fetchNewChunkWhenFull(ctx, task, &chk) { 590 return nil 591 } 592 } 593 594 missMatch: 595 if !hasMatch { 596 imw.joiner.onMissMatch(hasNull, outerEvent, chk) 597 if !imw.fetchNewChunkWhenFull(ctx, task, &chk) { 598 return nil 599 } 600 } 601 } 602 603 return nil 604 } 605 606 // fetchInnerEventsWithSameKey defCauslects the inner rows having the same key with one outer event. 607 func (imw *innerMergeWorker) fetchInnerEventsWithSameKey(ctx context.Context, task *lookUpMergeJoinTask, key chunk.Event) (noneInnerEvents bool, err error) { 608 task.sameKeyInnerEvents = task.sameKeyInnerEvents[:0] 609 curEvent := task.innerIter.Current() 610 var cmpRes int 611 for cmpRes, err = imw.compare(key, curEvent); ((cmpRes >= 0 && !imw.desc) || (cmpRes <= 0 && imw.desc)) && err == nil; cmpRes, err = imw.compare(key, curEvent) { 612 if cmpRes == 0 { 613 task.sameKeyInnerEvents = append(task.sameKeyInnerEvents, curEvent) 614 } 615 curEvent = task.innerIter.Next() 616 if curEvent == task.innerIter.End() { 617 curEvent, err = imw.fetchNextInnerResult(ctx, task) 618 if err != nil || task.innerResult.NumEvents() == 0 { 619 break 620 } 621 } 622 } 623 task.sameKeyIter = chunk.NewIterator4Slice(task.sameKeyInnerEvents) 624 task.sameKeyIter.Begin() 625 noneInnerEvents = task.innerResult.NumEvents() == 0 626 return 627 } 628 629 func (imw *innerMergeWorker) compare(outerEvent, innerEvent chunk.Event) (int, error) { 630 for _, keyOff := range imw.innerMergeCtx.keyOff2KeyOffOrderByIdx { 631 cmp, _, err := imw.innerMergeCtx.compareFuncs[keyOff](imw.ctx, imw.outerMergeCtx.joinKeys[keyOff], imw.innerMergeCtx.joinKeys[keyOff], outerEvent, innerEvent) 632 if err != nil || cmp != 0 { 633 return int(cmp), err 634 } 635 } 636 return 0, nil 637 } 638 639 func (imw *innerMergeWorker) constructCausetLookupKeys(task *lookUpMergeJoinTask) ([]*indexJoinLookUpContent, error) { 640 numEvents := len(task.outerOrderIdx) 641 dLookUpKeys := make([]*indexJoinLookUpContent, 0, numEvents) 642 for i := 0; i < numEvents; i++ { 643 dLookUpKey, err := imw.constructCausetLookupKey(task, task.outerOrderIdx[i]) 644 if err != nil { 645 return nil, err 646 } 647 if dLookUpKey == nil { 648 continue 649 } 650 dLookUpKeys = append(dLookUpKeys, dLookUpKey) 651 } 652 653 return dLookUpKeys, nil 654 } 655 656 func (imw *innerMergeWorker) constructCausetLookupKey(task *lookUpMergeJoinTask, rowIdx chunk.EventPtr) (*indexJoinLookUpContent, error) { 657 outerEvent := task.outerResult.GetEvent(rowIdx) 658 sc := imw.ctx.GetStochastikVars().StmtCtx 659 keyLen := len(imw.keyDefCauss) 660 dLookupKey := make([]types.Causet, 0, keyLen) 661 for i, keyDefCaus := range imw.outerMergeCtx.keyDefCauss { 662 outerValue := outerEvent.GetCauset(keyDefCaus, imw.outerMergeCtx.rowTypes[keyDefCaus]) 663 // Join-on-condition can be promised to be equal-condition in 664 // IndexNestedLoopJoin, thus the filter will always be false if 665 // outerValue is null, and we don't need to lookup it. 666 if outerValue.IsNull() { 667 return nil, nil 668 } 669 innerDefCausType := imw.rowTypes[imw.keyDefCauss[i]] 670 innerValue, err := outerValue.ConvertTo(sc, innerDefCausType) 671 if err != nil { 672 // If the converted outerValue overflows, we don't need to lookup it. 673 if terror.ErrorEqual(err, types.ErrOverflow) { 674 return nil, nil 675 } 676 if terror.ErrorEqual(err, types.ErrTruncated) && (innerDefCausType.Tp == allegrosql.TypeSet || innerDefCausType.Tp == allegrosql.TypeEnum) { 677 return nil, nil 678 } 679 return nil, err 680 } 681 cmp, err := outerValue.CompareCauset(sc, &innerValue) 682 if err != nil { 683 return nil, err 684 } 685 if cmp != 0 { 686 // If the converted outerValue is not equal to the origin outerValue, we don't need to lookup it. 687 return nil, nil 688 } 689 dLookupKey = append(dLookupKey, innerValue) 690 } 691 return &indexJoinLookUpContent{keys: dLookupKey, event: task.outerResult.GetEvent(rowIdx)}, nil 692 } 693 694 func (imw *innerMergeWorker) deduFIDelatumLookUpKeys(lookUpContents []*indexJoinLookUpContent) []*indexJoinLookUpContent { 695 if len(lookUpContents) < 2 { 696 return lookUpContents 697 } 698 sc := imw.ctx.GetStochastikVars().StmtCtx 699 deDupedLookUpContents := lookUpContents[:1] 700 for i := 1; i < len(lookUpContents); i++ { 701 cmp := compareEvent(sc, lookUpContents[i].keys, lookUpContents[i-1].keys) 702 if cmp != 0 || (imw.nextDefCausCompareFilters != nil && imw.nextDefCausCompareFilters.CompareEvent(lookUpContents[i].event, lookUpContents[i-1].event) != 0) { 703 deDupedLookUpContents = append(deDupedLookUpContents, lookUpContents[i]) 704 } 705 } 706 return deDupedLookUpContents 707 } 708 709 // fetchNextInnerResult defCauslects a chunk of inner results from inner child interlock. 710 func (imw *innerMergeWorker) fetchNextInnerResult(ctx context.Context, task *lookUpMergeJoinTask) (beginEvent chunk.Event, err error) { 711 task.innerResult = chunk.NewChunkWithCapacity(retTypes(imw.innerInterDirc), imw.ctx.GetStochastikVars().MaxChunkSize) 712 err = Next(ctx, imw.innerInterDirc, task.innerResult) 713 task.innerIter = chunk.NewIterator4Chunk(task.innerResult) 714 beginEvent = task.innerIter.Begin() 715 return 716 } 717 718 // Close implements the InterlockingDirectorate interface. 719 func (e *IndexLookUpMergeJoin) Close() error { 720 if e.cancelFunc != nil { 721 e.cancelFunc() 722 e.cancelFunc = nil 723 } 724 if e.resultCh != nil { 725 for range e.resultCh { 726 } 727 e.resultCh = nil 728 } 729 e.joinChkResourceCh = nil 730 // joinChkResourceCh is to recycle result chunks, used by inner worker. 731 // resultCh is the main thread get the results, used by main thread and inner worker. 732 // cancelFunc control the outer worker and outer worker close the task channel. 733 e.workerWg.Wait() 734 e.memTracker = nil 735 if e.runtimeStats != nil { 736 concurrency := cap(e.resultCh) 737 runtimeStats := &execdetails.RuntimeStatsWithConcurrencyInfo{} 738 runtimeStats.SetConcurrencyInfo(execdetails.NewConcurrencyInfo("Concurrency", concurrency)) 739 e.ctx.GetStochastikVars().StmtCtx.RuntimeStatsDefCausl.RegisterStats(e.id, runtimeStats) 740 } 741 return e.baseInterlockingDirectorate.Close() 742 }