github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/interlock/index_lookup_join.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package interlock 15 16 import ( 17 "bytes" 18 "context" 19 "runtime" 20 "runtime/trace" 21 "sort" 22 "strconv" 23 "sync" 24 "sync/atomic" 25 "time" 26 "unsafe" 27 28 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 29 "github.com/whtcorpsinc/BerolinaSQL/terror" 30 "github.com/whtcorpsinc/errors" 31 causetembedded "github.com/whtcorpsinc/milevadb/causet/embedded" 32 "github.com/whtcorpsinc/milevadb/memex" 33 "github.com/whtcorpsinc/milevadb/soliton/chunk" 34 "github.com/whtcorpsinc/milevadb/soliton/codec" 35 "github.com/whtcorpsinc/milevadb/soliton/execdetails" 36 "github.com/whtcorpsinc/milevadb/soliton/logutil" 37 "github.com/whtcorpsinc/milevadb/soliton/memory" 38 "github.com/whtcorpsinc/milevadb/soliton/mvmap" 39 "github.com/whtcorpsinc/milevadb/soliton/ranger" 40 "github.com/whtcorpsinc/milevadb/stochastikctx" 41 "github.com/whtcorpsinc/milevadb/stochastikctx/stmtctx" 42 "github.com/whtcorpsinc/milevadb/types" 43 "go.uber.org/zap" 44 ) 45 46 var _ InterlockingDirectorate = &IndexLookUpJoin{} 47 48 // IndexLookUpJoin employs one outer worker and N innerWorkers to execute concurrently. 49 // It preserves the order of the outer causet and support batch lookup. 50 // 51 // The execution flow is very similar to IndexLookUpReader: 52 // 1. outerWorker read N outer rows, build a task and send it to result channel and inner worker channel. 53 // 2. The innerWorker receives the task, builds key ranges from outer rows and fetch inner rows, builds inner event hash map. 54 // 3. main thread receives the task, waits for inner worker finish handling the task. 55 // 4. main thread join each outer event by look up the inner rows hash map in the task. 56 type IndexLookUpJoin struct { 57 baseInterlockingDirectorate 58 59 resultCh <-chan *lookUpJoinTask 60 cancelFunc context.CancelFunc 61 workerWg *sync.WaitGroup 62 63 outerCtx outerCtx 64 innerCtx innerCtx 65 66 task *lookUpJoinTask 67 joinResult *chunk.Chunk 68 innerIter chunk.Iterator 69 70 joiner joiner 71 isOuterJoin bool 72 73 requiredEvents int64 74 75 indexRanges []*ranger.Range 76 keyOff2IdxOff []int 77 innerPtrBytes [][]byte 78 79 // lastDefCausHelper causetstore the information for last defCaus if there's complicated filter like defCaus > x_defCaus and defCaus < x_defCaus + 100. 80 lastDefCausHelper *causetembedded.DefCausWithCmpFuncManager 81 82 memTracker *memory.Tracker // track memory usage. 83 84 stats *indexLookUpJoinRuntimeStats 85 } 86 87 type outerCtx struct { 88 rowTypes []*types.FieldType 89 keyDefCauss []int 90 filter memex.CNFExprs 91 } 92 93 type innerCtx struct { 94 readerBuilder *dataReaderBuilder 95 rowTypes []*types.FieldType 96 keyDefCauss []int 97 defCausLens []int 98 hasPrefixDefCaus bool 99 } 100 101 type lookUpJoinTask struct { 102 outerResult *chunk.List 103 outerMatch [][]bool 104 105 innerResult *chunk.List 106 encodedLookUpKeys []*chunk.Chunk 107 lookupMap *mvmap.MVMap 108 matchedInners []chunk.Event 109 110 doneCh chan error 111 cursor chunk.EventPtr 112 hasMatch bool 113 hasNull bool 114 115 memTracker *memory.Tracker // track memory usage. 116 } 117 118 type outerWorker struct { 119 outerCtx 120 121 lookup *IndexLookUpJoin 122 123 ctx stochastikctx.Context 124 interlock InterlockingDirectorate 125 126 maxBatchSize int 127 batchSize int 128 129 resultCh chan<- *lookUpJoinTask 130 innerCh chan<- *lookUpJoinTask 131 132 parentMemTracker *memory.Tracker 133 } 134 135 type innerWorker struct { 136 innerCtx 137 138 taskCh <-chan *lookUpJoinTask 139 outerCtx outerCtx 140 ctx stochastikctx.Context 141 interlockChk *chunk.Chunk 142 143 indexRanges []*ranger.Range 144 nextDefCausCompareFilters *causetembedded.DefCausWithCmpFuncManager 145 keyOff2IdxOff []int 146 stats *innerWorkerRuntimeStats 147 } 148 149 // Open implements the InterlockingDirectorate interface. 150 func (e *IndexLookUpJoin) Open(ctx context.Context) error { 151 // Be careful, very dirty replog in this line!!! 152 // IndexLookUpJoin need to rebuild interlock (the dataReaderBuilder) during 153 // executing. However `interlock.Next()` is lazy evaluation when the RecordSet 154 // result is drained. 155 // Lazy evaluation means the saved stochastik context may change during interlock's 156 // building and its running. 157 // A specific sequence for example: 158 // 159 // e := buildInterlockingDirectorate() // txn at build time 160 // recordSet := runStmt(e) 161 // stochastik.CommitTxn() // txn closed 162 // recordSet.Next() 163 // e.dataReaderBuilder.Build() // txn is used again, which is already closed 164 // 165 // The trick here is `getSnapshotTS` will cache snapshot ts in the dataReaderBuilder, 166 // so even txn is destroyed later, the dataReaderBuilder could still use the 167 // cached snapshot ts to construct PosetDag. 168 _, err := e.innerCtx.readerBuilder.getSnapshotTS() 169 if err != nil { 170 return err 171 } 172 173 err = e.children[0].Open(ctx) 174 if err != nil { 175 return err 176 } 177 e.memTracker = memory.NewTracker(e.id, -1) 178 e.memTracker.AttachTo(e.ctx.GetStochastikVars().StmtCtx.MemTracker) 179 e.innerPtrBytes = make([][]byte, 0, 8) 180 if e.runtimeStats != nil { 181 e.stats = &indexLookUpJoinRuntimeStats{} 182 e.ctx.GetStochastikVars().StmtCtx.RuntimeStatsDefCausl.RegisterStats(e.id, e.stats) 183 } 184 e.startWorkers(ctx) 185 return nil 186 } 187 188 func (e *IndexLookUpJoin) startWorkers(ctx context.Context) { 189 concurrency := e.ctx.GetStochastikVars().IndexLookupJoinConcurrency() 190 if e.stats != nil { 191 e.stats.concurrency = concurrency 192 } 193 resultCh := make(chan *lookUpJoinTask, concurrency) 194 e.resultCh = resultCh 195 workerCtx, cancelFunc := context.WithCancel(ctx) 196 e.cancelFunc = cancelFunc 197 innerCh := make(chan *lookUpJoinTask, concurrency) 198 e.workerWg.Add(1) 199 go e.newOuterWorker(resultCh, innerCh).run(workerCtx, e.workerWg) 200 e.workerWg.Add(concurrency) 201 for i := 0; i < concurrency; i++ { 202 go e.newInnerWorker(innerCh).run(workerCtx, e.workerWg) 203 } 204 } 205 206 func (e *IndexLookUpJoin) newOuterWorker(resultCh, innerCh chan *lookUpJoinTask) *outerWorker { 207 ow := &outerWorker{ 208 outerCtx: e.outerCtx, 209 ctx: e.ctx, 210 interlock: e.children[0], 211 resultCh: resultCh, 212 innerCh: innerCh, 213 batchSize: 32, 214 maxBatchSize: e.ctx.GetStochastikVars().IndexJoinBatchSize, 215 parentMemTracker: e.memTracker, 216 lookup: e, 217 } 218 return ow 219 } 220 221 func (e *IndexLookUpJoin) newInnerWorker(taskCh chan *lookUpJoinTask) *innerWorker { 222 // Since multiple inner workers run concurrently, we should copy join's indexRanges for every worker to avoid data race. 223 copiedRanges := make([]*ranger.Range, 0, len(e.indexRanges)) 224 for _, ran := range e.indexRanges { 225 copiedRanges = append(copiedRanges, ran.Clone()) 226 } 227 228 var innerStats *innerWorkerRuntimeStats 229 if e.stats != nil { 230 innerStats = &e.stats.innerWorker 231 } 232 iw := &innerWorker{ 233 innerCtx: e.innerCtx, 234 outerCtx: e.outerCtx, 235 taskCh: taskCh, 236 ctx: e.ctx, 237 interlockChk: chunk.NewChunkWithCapacity(e.innerCtx.rowTypes, e.maxChunkSize), 238 indexRanges: copiedRanges, 239 keyOff2IdxOff: e.keyOff2IdxOff, 240 stats: innerStats, 241 } 242 if e.lastDefCausHelper != nil { 243 // nextCwf.TmpConstant needs to be reset for every individual 244 // inner worker to avoid data race when the inner workers is running 245 // concurrently. 246 nextCwf := *e.lastDefCausHelper 247 nextCwf.TmpConstant = make([]*memex.Constant, len(e.lastDefCausHelper.TmpConstant)) 248 for i := range e.lastDefCausHelper.TmpConstant { 249 nextCwf.TmpConstant[i] = &memex.Constant{RetType: nextCwf.TargetDefCaus.RetType} 250 } 251 iw.nextDefCausCompareFilters = &nextCwf 252 } 253 return iw 254 } 255 256 // Next implements the InterlockingDirectorate interface. 257 func (e *IndexLookUpJoin) Next(ctx context.Context, req *chunk.Chunk) error { 258 if e.isOuterJoin { 259 atomic.StoreInt64(&e.requiredEvents, int64(req.RequiredEvents())) 260 } 261 req.Reset() 262 e.joinResult.Reset() 263 for { 264 task, err := e.getFinishedTask(ctx) 265 if err != nil { 266 return err 267 } 268 if task == nil { 269 return nil 270 } 271 startTime := time.Now() 272 if e.innerIter == nil || e.innerIter.Current() == e.innerIter.End() { 273 e.lookUpMatchedInners(task, task.cursor) 274 e.innerIter = chunk.NewIterator4Slice(task.matchedInners) 275 e.innerIter.Begin() 276 } 277 278 outerEvent := task.outerResult.GetEvent(task.cursor) 279 if e.innerIter.Current() != e.innerIter.End() { 280 matched, isNull, err := e.joiner.tryToMatchInners(outerEvent, e.innerIter, req) 281 if err != nil { 282 return err 283 } 284 task.hasMatch = task.hasMatch || matched 285 task.hasNull = task.hasNull || isNull 286 } 287 if e.innerIter.Current() == e.innerIter.End() { 288 if !task.hasMatch { 289 e.joiner.onMissMatch(task.hasNull, outerEvent, req) 290 } 291 task.cursor.EventIdx++ 292 if int(task.cursor.EventIdx) == task.outerResult.GetChunk(int(task.cursor.ChkIdx)).NumEvents() { 293 task.cursor.ChkIdx++ 294 task.cursor.EventIdx = 0 295 } 296 task.hasMatch = false 297 task.hasNull = false 298 } 299 if e.stats != nil { 300 atomic.AddInt64(&e.stats.probe, int64(time.Since(startTime))) 301 } 302 if req.IsFull() { 303 return nil 304 } 305 } 306 } 307 308 func (e *IndexLookUpJoin) getFinishedTask(ctx context.Context) (*lookUpJoinTask, error) { 309 task := e.task 310 if task != nil && int(task.cursor.ChkIdx) < task.outerResult.NumChunks() { 311 return task, nil 312 } 313 314 select { 315 case task = <-e.resultCh: 316 case <-ctx.Done(): 317 return nil, ctx.Err() 318 } 319 if task == nil { 320 return nil, nil 321 } 322 323 select { 324 case err := <-task.doneCh: 325 if err != nil { 326 return nil, err 327 } 328 case <-ctx.Done(): 329 return nil, ctx.Err() 330 } 331 332 e.task = task 333 return task, nil 334 } 335 336 func (e *IndexLookUpJoin) lookUpMatchedInners(task *lookUpJoinTask, rowPtr chunk.EventPtr) { 337 outerKey := task.encodedLookUpKeys[rowPtr.ChkIdx].GetEvent(int(rowPtr.EventIdx)).GetBytes(0) 338 e.innerPtrBytes = task.lookupMap.Get(outerKey, e.innerPtrBytes[:0]) 339 task.matchedInners = task.matchedInners[:0] 340 341 for _, b := range e.innerPtrBytes { 342 ptr := *(*chunk.EventPtr)(unsafe.Pointer(&b[0])) 343 matchedInner := task.innerResult.GetEvent(ptr) 344 task.matchedInners = append(task.matchedInners, matchedInner) 345 } 346 } 347 348 func (ow *outerWorker) run(ctx context.Context, wg *sync.WaitGroup) { 349 defer trace.StartRegion(ctx, "IndexLookupJoinOuterWorker").End() 350 defer func() { 351 if r := recover(); r != nil { 352 buf := make([]byte, 4096) 353 stackSize := runtime.Stack(buf, false) 354 buf = buf[:stackSize] 355 logutil.Logger(ctx).Error("outerWorker panicked", zap.String("stack", string(buf))) 356 task := &lookUpJoinTask{doneCh: make(chan error, 1)} 357 task.doneCh <- errors.Errorf("%v", r) 358 ow.pushToChan(ctx, task, ow.resultCh) 359 } 360 close(ow.resultCh) 361 close(ow.innerCh) 362 wg.Done() 363 }() 364 for { 365 task, err := ow.buildTask(ctx) 366 if err != nil { 367 task.doneCh <- err 368 ow.pushToChan(ctx, task, ow.resultCh) 369 return 370 } 371 if task == nil { 372 return 373 } 374 375 if finished := ow.pushToChan(ctx, task, ow.innerCh); finished { 376 return 377 } 378 379 if finished := ow.pushToChan(ctx, task, ow.resultCh); finished { 380 return 381 } 382 } 383 } 384 385 func (ow *outerWorker) pushToChan(ctx context.Context, task *lookUpJoinTask, dst chan<- *lookUpJoinTask) bool { 386 select { 387 case <-ctx.Done(): 388 return true 389 case dst <- task: 390 } 391 return false 392 } 393 394 // buildTask builds a lookUpJoinTask and read outer rows. 395 // When err is not nil, task must not be nil to send the error to the main thread via task. 396 func (ow *outerWorker) buildTask(ctx context.Context) (*lookUpJoinTask, error) { 397 task := &lookUpJoinTask{ 398 doneCh: make(chan error, 1), 399 outerResult: newList(ow.interlock), 400 lookupMap: mvmap.NewMVMap(), 401 } 402 task.memTracker = memory.NewTracker(-1, -1) 403 task.outerResult.GetMemTracker().AttachTo(task.memTracker) 404 task.memTracker.AttachTo(ow.parentMemTracker) 405 406 ow.increaseBatchSize() 407 requiredEvents := ow.batchSize 408 if ow.lookup.isOuterJoin { 409 // If it is outerJoin, push the requiredEvents down. 410 // Note: buildTask is triggered when `Open` is called, but 411 // ow.lookup.requiredEvents is set when `Next` is called. Thus we check 412 // whether it's 0 here. 413 if parentRequired := int(atomic.LoadInt64(&ow.lookup.requiredEvents)); parentRequired != 0 { 414 requiredEvents = parentRequired 415 } 416 } 417 maxChunkSize := ow.ctx.GetStochastikVars().MaxChunkSize 418 for requiredEvents > task.outerResult.Len() { 419 chk := chunk.NewChunkWithCapacity(ow.outerCtx.rowTypes, maxChunkSize) 420 chk = chk.SetRequiredEvents(requiredEvents, maxChunkSize) 421 err := Next(ctx, ow.interlock, chk) 422 if err != nil { 423 return task, err 424 } 425 if chk.NumEvents() == 0 { 426 break 427 } 428 429 task.outerResult.Add(chk) 430 } 431 if task.outerResult.Len() == 0 { 432 return nil, nil 433 } 434 numChks := task.outerResult.NumChunks() 435 if ow.filter != nil { 436 task.outerMatch = make([][]bool, task.outerResult.NumChunks()) 437 var err error 438 for i := 0; i < numChks; i++ { 439 chk := task.outerResult.GetChunk(i) 440 outerMatch := make([]bool, 0, chk.NumEvents()) 441 task.memTracker.Consume(int64(cap(outerMatch))) 442 task.outerMatch[i], err = memex.VectorizedFilter(ow.ctx, ow.filter, chunk.NewIterator4Chunk(chk), outerMatch) 443 if err != nil { 444 return task, err 445 } 446 } 447 } 448 task.encodedLookUpKeys = make([]*chunk.Chunk, task.outerResult.NumChunks()) 449 for i := range task.encodedLookUpKeys { 450 task.encodedLookUpKeys[i] = chunk.NewChunkWithCapacity([]*types.FieldType{types.NewFieldType(allegrosql.TypeBlob)}, task.outerResult.GetChunk(i).NumEvents()) 451 } 452 return task, nil 453 } 454 455 func (ow *outerWorker) increaseBatchSize() { 456 if ow.batchSize < ow.maxBatchSize { 457 ow.batchSize *= 2 458 } 459 if ow.batchSize > ow.maxBatchSize { 460 ow.batchSize = ow.maxBatchSize 461 } 462 } 463 464 func (iw *innerWorker) run(ctx context.Context, wg *sync.WaitGroup) { 465 defer trace.StartRegion(ctx, "IndexLookupJoinInnerWorker").End() 466 var task *lookUpJoinTask 467 defer func() { 468 if r := recover(); r != nil { 469 buf := make([]byte, 4096) 470 stackSize := runtime.Stack(buf, false) 471 buf = buf[:stackSize] 472 logutil.Logger(ctx).Error("innerWorker panicked", zap.String("stack", string(buf))) 473 // "task != nil" is guaranteed when panic happened. 474 task.doneCh <- errors.Errorf("%v", r) 475 } 476 wg.Done() 477 }() 478 479 for ok := true; ok; { 480 select { 481 case task, ok = <-iw.taskCh: 482 if !ok { 483 return 484 } 485 case <-ctx.Done(): 486 return 487 } 488 489 err := iw.handleTask(ctx, task) 490 task.doneCh <- err 491 } 492 } 493 494 type indexJoinLookUpContent struct { 495 keys []types.Causet 496 event chunk.Event 497 } 498 499 func (iw *innerWorker) handleTask(ctx context.Context, task *lookUpJoinTask) error { 500 if iw.stats != nil { 501 start := time.Now() 502 defer func() { 503 atomic.AddInt64(&iw.stats.totalTime, int64(time.Since(start))) 504 }() 505 } 506 lookUpContents, err := iw.constructLookupContent(task) 507 if err != nil { 508 return err 509 } 510 err = iw.fetchInnerResults(ctx, task, lookUpContents) 511 if err != nil { 512 return err 513 } 514 err = iw.buildLookUpMap(task) 515 if err != nil { 516 return err 517 } 518 return nil 519 } 520 521 func (iw *innerWorker) constructLookupContent(task *lookUpJoinTask) ([]*indexJoinLookUpContent, error) { 522 if iw.stats != nil { 523 start := time.Now() 524 defer func() { 525 atomic.AddInt64(&iw.stats.task, 1) 526 atomic.AddInt64(&iw.stats.construct, int64(time.Since(start))) 527 }() 528 } 529 lookUpContents := make([]*indexJoinLookUpContent, 0, task.outerResult.Len()) 530 keyBuf := make([]byte, 0, 64) 531 for chkIdx := 0; chkIdx < task.outerResult.NumChunks(); chkIdx++ { 532 chk := task.outerResult.GetChunk(chkIdx) 533 numEvents := chk.NumEvents() 534 for rowIdx := 0; rowIdx < numEvents; rowIdx++ { 535 dLookUpKey, err := iw.constructCausetLookupKey(task, chkIdx, rowIdx) 536 if err != nil { 537 return nil, err 538 } 539 if dLookUpKey == nil { 540 // Append null to make looUpKeys the same length as outer Result. 541 task.encodedLookUpKeys[chkIdx].AppendNull(0) 542 continue 543 } 544 keyBuf = keyBuf[:0] 545 keyBuf, err = codec.EncodeKey(iw.ctx.GetStochastikVars().StmtCtx, keyBuf, dLookUpKey...) 546 if err != nil { 547 return nil, err 548 } 549 // CausetStore the encoded lookup key in chunk, so we can use it to lookup the matched inners directly. 550 task.encodedLookUpKeys[chkIdx].AppendBytes(0, keyBuf) 551 if iw.hasPrefixDefCaus { 552 for i := range iw.outerCtx.keyDefCauss { 553 // If it's a prefix defCausumn. Try to fix it. 554 if iw.defCausLens[i] != types.UnspecifiedLength { 555 ranger.CutCausetByPrefixLen(&dLookUpKey[i], iw.defCausLens[i], iw.rowTypes[iw.keyDefCauss[i]]) 556 } 557 } 558 // dLookUpKey is sorted and deduplicated at sortAndDedupLookUpContents. 559 // So we don't need to do it here. 560 } 561 lookUpContents = append(lookUpContents, &indexJoinLookUpContent{keys: dLookUpKey, event: chk.GetEvent(rowIdx)}) 562 } 563 } 564 565 for i := range task.encodedLookUpKeys { 566 task.memTracker.Consume(task.encodedLookUpKeys[i].MemoryUsage()) 567 } 568 lookUpContents = iw.sortAndDedupLookUpContents(lookUpContents) 569 return lookUpContents, nil 570 } 571 572 func (iw *innerWorker) constructCausetLookupKey(task *lookUpJoinTask, chkIdx, rowIdx int) ([]types.Causet, error) { 573 if task.outerMatch != nil && !task.outerMatch[chkIdx][rowIdx] { 574 return nil, nil 575 } 576 outerEvent := task.outerResult.GetChunk(chkIdx).GetEvent(rowIdx) 577 sc := iw.ctx.GetStochastikVars().StmtCtx 578 keyLen := len(iw.keyDefCauss) 579 dLookupKey := make([]types.Causet, 0, keyLen) 580 for i, keyDefCaus := range iw.outerCtx.keyDefCauss { 581 outerValue := outerEvent.GetCauset(keyDefCaus, iw.outerCtx.rowTypes[keyDefCaus]) 582 // Join-on-condition can be promised to be equal-condition in 583 // IndexNestedLoopJoin, thus the filter will always be false if 584 // outerValue is null, and we don't need to lookup it. 585 if outerValue.IsNull() { 586 return nil, nil 587 } 588 innerDefCausType := iw.rowTypes[iw.keyDefCauss[i]] 589 innerValue, err := outerValue.ConvertTo(sc, innerDefCausType) 590 if err != nil { 591 // If the converted outerValue overflows, we don't need to lookup it. 592 if terror.ErrorEqual(err, types.ErrOverflow) { 593 return nil, nil 594 } 595 if terror.ErrorEqual(err, types.ErrTruncated) && (innerDefCausType.Tp == allegrosql.TypeSet || innerDefCausType.Tp == allegrosql.TypeEnum) { 596 return nil, nil 597 } 598 return nil, err 599 } 600 cmp, err := outerValue.CompareCauset(sc, &innerValue) 601 if err != nil { 602 return nil, err 603 } 604 if cmp != 0 { 605 // If the converted outerValue is not equal to the origin outerValue, we don't need to lookup it. 606 return nil, nil 607 } 608 dLookupKey = append(dLookupKey, innerValue) 609 } 610 return dLookupKey, nil 611 } 612 613 func (iw *innerWorker) sortAndDedupLookUpContents(lookUpContents []*indexJoinLookUpContent) []*indexJoinLookUpContent { 614 if len(lookUpContents) < 2 { 615 return lookUpContents 616 } 617 sc := iw.ctx.GetStochastikVars().StmtCtx 618 sort.Slice(lookUpContents, func(i, j int) bool { 619 cmp := compareEvent(sc, lookUpContents[i].keys, lookUpContents[j].keys) 620 if cmp != 0 || iw.nextDefCausCompareFilters == nil { 621 return cmp < 0 622 } 623 return iw.nextDefCausCompareFilters.CompareEvent(lookUpContents[i].event, lookUpContents[j].event) < 0 624 }) 625 deDupedLookupKeys := lookUpContents[:1] 626 for i := 1; i < len(lookUpContents); i++ { 627 cmp := compareEvent(sc, lookUpContents[i].keys, lookUpContents[i-1].keys) 628 if cmp != 0 || (iw.nextDefCausCompareFilters != nil && iw.nextDefCausCompareFilters.CompareEvent(lookUpContents[i].event, lookUpContents[i-1].event) != 0) { 629 deDupedLookupKeys = append(deDupedLookupKeys, lookUpContents[i]) 630 } 631 } 632 return deDupedLookupKeys 633 } 634 635 func compareEvent(sc *stmtctx.StatementContext, left, right []types.Causet) int { 636 for idx := 0; idx < len(left); idx++ { 637 cmp, err := left[idx].CompareCauset(sc, &right[idx]) 638 // We only compare rows with the same type, no error to return. 639 terror.Log(err) 640 if cmp > 0 { 641 return 1 642 } else if cmp < 0 { 643 return -1 644 } 645 } 646 return 0 647 } 648 649 func (iw *innerWorker) fetchInnerResults(ctx context.Context, task *lookUpJoinTask, lookUpContent []*indexJoinLookUpContent) error { 650 if iw.stats != nil { 651 start := time.Now() 652 defer func() { 653 atomic.AddInt64(&iw.stats.fetch, int64(time.Since(start))) 654 }() 655 } 656 innerInterDirc, err := iw.readerBuilder.buildInterlockingDirectorateForIndexJoin(ctx, lookUpContent, iw.indexRanges, iw.keyOff2IdxOff, iw.nextDefCausCompareFilters) 657 if err != nil { 658 return err 659 } 660 defer terror.Call(innerInterDirc.Close) 661 innerResult := chunk.NewList(retTypes(innerInterDirc), iw.ctx.GetStochastikVars().MaxChunkSize, iw.ctx.GetStochastikVars().MaxChunkSize) 662 innerResult.GetMemTracker().SetLabel(memory.LabelForBuildSideResult) 663 innerResult.GetMemTracker().AttachTo(task.memTracker) 664 for { 665 select { 666 case <-ctx.Done(): 667 return ctx.Err() 668 default: 669 } 670 err := Next(ctx, innerInterDirc, iw.interlockChk) 671 if err != nil { 672 return err 673 } 674 if iw.interlockChk.NumEvents() == 0 { 675 break 676 } 677 innerResult.Add(iw.interlockChk) 678 iw.interlockChk = newFirstChunk(innerInterDirc) 679 } 680 task.innerResult = innerResult 681 return nil 682 } 683 684 func (iw *innerWorker) buildLookUpMap(task *lookUpJoinTask) error { 685 if iw.stats != nil { 686 start := time.Now() 687 defer func() { 688 atomic.AddInt64(&iw.stats.build, int64(time.Since(start))) 689 }() 690 } 691 keyBuf := make([]byte, 0, 64) 692 valBuf := make([]byte, 8) 693 for i := 0; i < task.innerResult.NumChunks(); i++ { 694 chk := task.innerResult.GetChunk(i) 695 for j := 0; j < chk.NumEvents(); j++ { 696 innerEvent := chk.GetEvent(j) 697 if iw.hasNullInJoinKey(innerEvent) { 698 continue 699 } 700 701 keyBuf = keyBuf[:0] 702 for _, keyDefCaus := range iw.keyDefCauss { 703 d := innerEvent.GetCauset(keyDefCaus, iw.rowTypes[keyDefCaus]) 704 var err error 705 keyBuf, err = codec.EncodeKey(iw.ctx.GetStochastikVars().StmtCtx, keyBuf, d) 706 if err != nil { 707 return err 708 } 709 } 710 rowPtr := chunk.EventPtr{ChkIdx: uint32(i), EventIdx: uint32(j)} 711 *(*chunk.EventPtr)(unsafe.Pointer(&valBuf[0])) = rowPtr 712 task.lookupMap.Put(keyBuf, valBuf) 713 } 714 } 715 return nil 716 } 717 718 func (iw *innerWorker) hasNullInJoinKey(event chunk.Event) bool { 719 for _, ordinal := range iw.keyDefCauss { 720 if event.IsNull(ordinal) { 721 return true 722 } 723 } 724 return false 725 } 726 727 // Close implements the InterlockingDirectorate interface. 728 func (e *IndexLookUpJoin) Close() error { 729 if e.cancelFunc != nil { 730 e.cancelFunc() 731 } 732 e.workerWg.Wait() 733 e.memTracker = nil 734 e.task = nil 735 return e.baseInterlockingDirectorate.Close() 736 } 737 738 type indexLookUpJoinRuntimeStats struct { 739 concurrency int 740 probe int64 741 innerWorker innerWorkerRuntimeStats 742 } 743 744 type innerWorkerRuntimeStats struct { 745 totalTime int64 746 task int64 747 construct int64 748 fetch int64 749 build int64 750 join int64 751 } 752 753 func (e *indexLookUpJoinRuntimeStats) String() string { 754 buf := bytes.NewBuffer(make([]byte, 0, 16)) 755 if e.innerWorker.totalTime > 0 { 756 buf.WriteString("inner:{total:") 757 buf.WriteString(time.Duration(e.innerWorker.totalTime).String()) 758 buf.WriteString(", concurrency:") 759 if e.concurrency > 0 { 760 buf.WriteString(strconv.Itoa(e.concurrency)) 761 } else { 762 buf.WriteString("OFF") 763 } 764 buf.WriteString(", task:") 765 buf.WriteString(strconv.FormatInt(e.innerWorker.task, 10)) 766 buf.WriteString(", construct:") 767 buf.WriteString(time.Duration(e.innerWorker.construct).String()) 768 buf.WriteString(", fetch:") 769 buf.WriteString(time.Duration(e.innerWorker.fetch).String()) 770 buf.WriteString(", build:") 771 buf.WriteString(time.Duration(e.innerWorker.build).String()) 772 if e.innerWorker.join > 0 { 773 buf.WriteString(", join:") 774 buf.WriteString(time.Duration(e.innerWorker.join).String()) 775 } 776 buf.WriteString("}") 777 } 778 if e.probe > 0 { 779 buf.WriteString(", probe:") 780 buf.WriteString(time.Duration(e.probe).String()) 781 } 782 return buf.String() 783 } 784 785 func (e *indexLookUpJoinRuntimeStats) Clone() execdetails.RuntimeStats { 786 return &indexLookUpJoinRuntimeStats{ 787 concurrency: e.concurrency, 788 probe: e.probe, 789 innerWorker: e.innerWorker, 790 } 791 } 792 793 func (e *indexLookUpJoinRuntimeStats) Merge(rs execdetails.RuntimeStats) { 794 tmp, ok := rs.(*indexLookUpJoinRuntimeStats) 795 if !ok { 796 return 797 } 798 e.probe += tmp.probe 799 e.innerWorker.totalTime += tmp.innerWorker.totalTime 800 e.innerWorker.task += tmp.innerWorker.task 801 e.innerWorker.construct += tmp.innerWorker.construct 802 e.innerWorker.fetch += tmp.innerWorker.fetch 803 e.innerWorker.build += tmp.innerWorker.build 804 e.innerWorker.join += tmp.innerWorker.join 805 } 806 807 // Tp implements the RuntimeStats interface. 808 func (e *indexLookUpJoinRuntimeStats) Tp() int { 809 return execdetails.TpIndexLookUpJoinRuntimeStats 810 }