github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/interlock/index_lookup_hash_join.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package interlock
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"hash"
    20  	"hash/fnv"
    21  	"runtime/trace"
    22  	"sync"
    23  	"sync/atomic"
    24  	"time"
    25  
    26  	"github.com/whtcorpsinc/errors"
    27  	"github.com/whtcorpsinc/failpoint"
    28  	causetembedded "github.com/whtcorpsinc/milevadb/causet/embedded"
    29  	"github.com/whtcorpsinc/milevadb/memex"
    30  	"github.com/whtcorpsinc/milevadb/soliton"
    31  	"github.com/whtcorpsinc/milevadb/soliton/chunk"
    32  	"github.com/whtcorpsinc/milevadb/soliton/codec"
    33  	"github.com/whtcorpsinc/milevadb/soliton/memory"
    34  	"github.com/whtcorpsinc/milevadb/soliton/ranger"
    35  )
    36  
    37  // numResChkHold indicates the number of resource chunks that an inner worker
    38  // holds at the same time.
    39  // It's used in 2 cases individually:
    40  // 1. IndexMergeJoin
    41  // 2. IndexNestedLoopHashJoin:
    42  //    It's used when IndexNestedLoopHashJoin.keepOuterOrder is true.
    43  //    Otherwise, there will be at most `concurrency` resource chunks throughout
    44  //    the execution of IndexNestedLoopHashJoin.
    45  const numResChkHold = 4
    46  
    47  // IndexNestedLoopHashJoin employs one outer worker and N inner workers to
    48  // execute concurrently. The output order is not promised.
    49  //
    50  // The execution flow is very similar to IndexLookUpReader:
    51  // 1. The outer worker reads N outer rows, builds a task and sends it to the
    52  // inner worker channel.
    53  // 2. The inner worker receives the tasks and does 3 things for every task:
    54  //    1. builds hash causet from the outer rows
    55  //    2. builds key ranges from outer rows and fetches inner rows
    56  //    3. probes the hash causet and sends the join result to the main thread channel.
    57  //    Note: step 1 and step 2 runs concurrently.
    58  // 3. The main thread receives the join results.
    59  type IndexNestedLoopHashJoin struct {
    60  	IndexLookUpJoin
    61  	resultCh          chan *indexHashJoinResult
    62  	joinChkResourceCh []chan *chunk.Chunk
    63  	// We build individual joiner for each inner worker when using chunk-based
    64  	// execution, to avoid the concurrency of joiner.chk and joiner.selected.
    65  	joiners        []joiner
    66  	keepOuterOrder bool
    67  	curTask        *indexHashJoinTask
    68  	// taskCh is only used when `keepOuterOrder` is true.
    69  	taskCh chan *indexHashJoinTask
    70  
    71  	stats *indexLookUpJoinRuntimeStats
    72  }
    73  
    74  type indexHashJoinOuterWorker struct {
    75  	outerWorker
    76  	innerCh        chan *indexHashJoinTask
    77  	keepOuterOrder bool
    78  	// taskCh is only used when the outer order needs to be promised.
    79  	taskCh chan *indexHashJoinTask
    80  }
    81  
    82  type indexHashJoinInnerWorker struct {
    83  	innerWorker
    84  	matchedOuterPtrs  []chunk.EventPtr
    85  	joiner            joiner
    86  	joinChkResourceCh chan *chunk.Chunk
    87  	// resultCh is valid only when indexNestedLoopHashJoin do not need to keep
    88  	// order. Otherwise, it will be nil.
    89  	resultCh         chan *indexHashJoinResult
    90  	taskCh           <-chan *indexHashJoinTask
    91  	wg               *sync.WaitGroup
    92  	joinKeyBuf       []byte
    93  	outerEventStatus []outerEventStatusFlag
    94  }
    95  
    96  type indexHashJoinResult struct {
    97  	chk *chunk.Chunk
    98  	err error
    99  	src chan<- *chunk.Chunk
   100  }
   101  
   102  type indexHashJoinTask struct {
   103  	*lookUpJoinTask
   104  	outerEventStatus [][]outerEventStatusFlag
   105  	lookupMap        baseHashBlock
   106  	err              error
   107  	keepOuterOrder   bool
   108  	// resultCh is only used when the outer order needs to be promised.
   109  	resultCh chan *indexHashJoinResult
   110  	// matchedInnerEventPtrs is only valid when the outer order needs to be
   111  	// promised. Otherwise, it will be nil.
   112  	// len(matchedInnerEventPtrs) equals to
   113  	// lookUpJoinTask.outerResult.NumChunks(), and the elements of every
   114  	// matchedInnerEventPtrs[chkIdx][rowIdx] indicates the matched inner event ptrs
   115  	// of the corresponding outer event.
   116  	matchedInnerEventPtrs [][][]chunk.EventPtr
   117  }
   118  
   119  // Open implements the IndexNestedLoopHashJoin InterlockingDirectorate interface.
   120  func (e *IndexNestedLoopHashJoin) Open(ctx context.Context) error {
   121  	// Be careful, very dirty replog in this line!!!
   122  	// IndexLookUpJoin need to rebuild interlock (the dataReaderBuilder) during
   123  	// executing. However `interlock.Next()` is lazy evaluation when the RecordSet
   124  	// result is drained.
   125  	// Lazy evaluation means the saved stochastik context may change during interlock's
   126  	// building and its running.
   127  	// A specific sequence for example:
   128  	//
   129  	// e := buildInterlockingDirectorate()   // txn at build time
   130  	// recordSet := runStmt(e)
   131  	// stochastik.CommitTxn()    // txn closed
   132  	// recordSet.Next()
   133  	// e.dataReaderBuilder.Build() // txn is used again, which is already closed
   134  	//
   135  	// The trick here is `getSnapshotTS` will cache snapshot ts in the dataReaderBuilder,
   136  	// so even txn is destroyed later, the dataReaderBuilder could still use the
   137  	// cached snapshot ts to construct PosetDag.
   138  	_, err := e.innerCtx.readerBuilder.getSnapshotTS()
   139  	if err != nil {
   140  		return err
   141  	}
   142  
   143  	err = e.children[0].Open(ctx)
   144  	if err != nil {
   145  		return err
   146  	}
   147  	e.memTracker = memory.NewTracker(e.id, -1)
   148  	e.memTracker.AttachTo(e.ctx.GetStochastikVars().StmtCtx.MemTracker)
   149  	e.innerPtrBytes = make([][]byte, 0, 8)
   150  	if e.runtimeStats != nil {
   151  		e.stats = &indexLookUpJoinRuntimeStats{}
   152  		e.ctx.GetStochastikVars().StmtCtx.RuntimeStatsDefCausl.RegisterStats(e.id, e.stats)
   153  	}
   154  	e.startWorkers(ctx)
   155  	return nil
   156  }
   157  
   158  func (e *IndexNestedLoopHashJoin) startWorkers(ctx context.Context) {
   159  	concurrency := e.ctx.GetStochastikVars().IndexLookupJoinConcurrency()
   160  	if e.stats != nil {
   161  		e.stats.concurrency = concurrency
   162  	}
   163  	workerCtx, cancelFunc := context.WithCancel(ctx)
   164  	e.cancelFunc = cancelFunc
   165  	innerCh := make(chan *indexHashJoinTask, concurrency)
   166  	if e.keepOuterOrder {
   167  		e.taskCh = make(chan *indexHashJoinTask, concurrency)
   168  	}
   169  	e.workerWg.Add(1)
   170  	ow := e.newOuterWorker(innerCh)
   171  	go soliton.WithRecovery(func() { ow.run(workerCtx) }, e.finishJoinWorkers)
   172  
   173  	if !e.keepOuterOrder {
   174  		e.resultCh = make(chan *indexHashJoinResult, concurrency)
   175  	} else {
   176  		// When `keepOuterOrder` is true, each task holds their own `resultCh`
   177  		// individually, thus we do not need a global resultCh.
   178  		e.resultCh = nil
   179  	}
   180  	e.joinChkResourceCh = make([]chan *chunk.Chunk, concurrency)
   181  	for i := 0; i < concurrency; i++ {
   182  		if !e.keepOuterOrder {
   183  			e.joinChkResourceCh[i] = make(chan *chunk.Chunk, 1)
   184  			e.joinChkResourceCh[i] <- newFirstChunk(e)
   185  		} else {
   186  			e.joinChkResourceCh[i] = make(chan *chunk.Chunk, numResChkHold)
   187  			for j := 0; j < numResChkHold; j++ {
   188  				e.joinChkResourceCh[i] <- newFirstChunk(e)
   189  			}
   190  		}
   191  	}
   192  
   193  	e.workerWg.Add(concurrency)
   194  	for i := 0; i < concurrency; i++ {
   195  		workerID := i
   196  		go soliton.WithRecovery(func() { e.newInnerWorker(innerCh, workerID).run(workerCtx, cancelFunc) }, e.finishJoinWorkers)
   197  	}
   198  	go e.wait4JoinWorkers()
   199  }
   200  
   201  func (e *IndexNestedLoopHashJoin) finishJoinWorkers(r interface{}) {
   202  	if r != nil {
   203  		e.resultCh <- &indexHashJoinResult{
   204  			err: errors.New(fmt.Sprintf("%v", r)),
   205  		}
   206  		if e.cancelFunc != nil {
   207  			e.cancelFunc()
   208  		}
   209  	}
   210  	e.workerWg.Done()
   211  }
   212  
   213  func (e *IndexNestedLoopHashJoin) wait4JoinWorkers() {
   214  	e.workerWg.Wait()
   215  	if e.resultCh != nil {
   216  		close(e.resultCh)
   217  	}
   218  	if e.taskCh != nil {
   219  		close(e.taskCh)
   220  	}
   221  }
   222  
   223  // Next implements the IndexNestedLoopHashJoin InterlockingDirectorate interface.
   224  func (e *IndexNestedLoopHashJoin) Next(ctx context.Context, req *chunk.Chunk) error {
   225  	req.Reset()
   226  	if e.keepOuterOrder {
   227  		return e.runInOrder(ctx, req)
   228  	}
   229  	// unordered run
   230  	var (
   231  		result *indexHashJoinResult
   232  		ok     bool
   233  	)
   234  	select {
   235  	case result, ok = <-e.resultCh:
   236  		if !ok {
   237  			return nil
   238  		}
   239  		if result.err != nil {
   240  			return result.err
   241  		}
   242  	case <-ctx.Done():
   243  		return ctx.Err()
   244  	}
   245  	req.SwapDeferredCausets(result.chk)
   246  	result.src <- result.chk
   247  	return nil
   248  }
   249  
   250  func (e *IndexNestedLoopHashJoin) runInOrder(ctx context.Context, req *chunk.Chunk) error {
   251  	var (
   252  		result *indexHashJoinResult
   253  		ok     bool
   254  	)
   255  	for {
   256  		if e.isDryUpTasks(ctx) {
   257  			return nil
   258  		}
   259  		select {
   260  		case result, ok = <-e.curTask.resultCh:
   261  			if !ok {
   262  				e.curTask = nil
   263  				continue
   264  			}
   265  			if result.err != nil {
   266  				return result.err
   267  			}
   268  		case <-ctx.Done():
   269  			return ctx.Err()
   270  		}
   271  		req.SwapDeferredCausets(result.chk)
   272  		result.src <- result.chk
   273  		return nil
   274  	}
   275  }
   276  
   277  // isDryUpTasks indicates whether all the tasks have been processed.
   278  func (e *IndexNestedLoopHashJoin) isDryUpTasks(ctx context.Context) bool {
   279  	if e.curTask != nil {
   280  		return false
   281  	}
   282  	var ok bool
   283  	select {
   284  	case e.curTask, ok = <-e.taskCh:
   285  		if !ok {
   286  			return true
   287  		}
   288  	case <-ctx.Done():
   289  		return true
   290  	}
   291  	return false
   292  }
   293  
   294  // Close implements the IndexNestedLoopHashJoin InterlockingDirectorate interface.
   295  func (e *IndexNestedLoopHashJoin) Close() error {
   296  	if e.cancelFunc != nil {
   297  		e.cancelFunc()
   298  		e.cancelFunc = nil
   299  	}
   300  	if e.resultCh != nil {
   301  		for range e.resultCh {
   302  		}
   303  		e.resultCh = nil
   304  	}
   305  	if e.taskCh != nil {
   306  		for range e.taskCh {
   307  		}
   308  		e.taskCh = nil
   309  	}
   310  	for i := range e.joinChkResourceCh {
   311  		close(e.joinChkResourceCh[i])
   312  	}
   313  	e.joinChkResourceCh = nil
   314  	return e.baseInterlockingDirectorate.Close()
   315  }
   316  
   317  func (ow *indexHashJoinOuterWorker) run(ctx context.Context) {
   318  	defer trace.StartRegion(ctx, "IndexHashJoinOuterWorker").End()
   319  	defer close(ow.innerCh)
   320  	for {
   321  		task, err := ow.buildTask(ctx)
   322  		failpoint.Inject("testIndexHashJoinOuterWorkerErr", func() {
   323  			err = errors.New("mocHoTTexHashJoinOuterWorkerErr")
   324  		})
   325  		if err != nil {
   326  			task = &indexHashJoinTask{err: err}
   327  			if ow.keepOuterOrder {
   328  				task.keepOuterOrder, task.resultCh = true, make(chan *indexHashJoinResult, 1)
   329  				ow.pushToChan(ctx, task, ow.taskCh)
   330  			}
   331  			ow.pushToChan(ctx, task, ow.innerCh)
   332  			return
   333  		}
   334  		if task == nil {
   335  			return
   336  		}
   337  		if finished := ow.pushToChan(ctx, task, ow.innerCh); finished {
   338  			return
   339  		}
   340  		if ow.keepOuterOrder {
   341  			if finished := ow.pushToChan(ctx, task, ow.taskCh); finished {
   342  				return
   343  			}
   344  		}
   345  	}
   346  }
   347  
   348  func (ow *indexHashJoinOuterWorker) buildTask(ctx context.Context) (*indexHashJoinTask, error) {
   349  	task, err := ow.outerWorker.buildTask(ctx)
   350  	if task == nil || err != nil {
   351  		return nil, err
   352  	}
   353  	var (
   354  		resultCh              chan *indexHashJoinResult
   355  		matchedInnerEventPtrs [][][]chunk.EventPtr
   356  	)
   357  	if ow.keepOuterOrder {
   358  		resultCh = make(chan *indexHashJoinResult, numResChkHold)
   359  		matchedInnerEventPtrs = make([][][]chunk.EventPtr, task.outerResult.NumChunks())
   360  		for i := range matchedInnerEventPtrs {
   361  			matchedInnerEventPtrs[i] = make([][]chunk.EventPtr, task.outerResult.GetChunk(i).NumEvents())
   362  		}
   363  	}
   364  	numChks := task.outerResult.NumChunks()
   365  	outerEventStatus := make([][]outerEventStatusFlag, numChks)
   366  	for i := 0; i < numChks; i++ {
   367  		outerEventStatus[i] = make([]outerEventStatusFlag, task.outerResult.GetChunk(i).NumEvents())
   368  	}
   369  	return &indexHashJoinTask{
   370  		lookUpJoinTask:        task,
   371  		outerEventStatus:      outerEventStatus,
   372  		keepOuterOrder:        ow.keepOuterOrder,
   373  		resultCh:              resultCh,
   374  		matchedInnerEventPtrs: matchedInnerEventPtrs,
   375  	}, nil
   376  }
   377  
   378  func (ow *indexHashJoinOuterWorker) pushToChan(ctx context.Context, task *indexHashJoinTask, dst chan<- *indexHashJoinTask) bool {
   379  	select {
   380  	case <-ctx.Done():
   381  		return true
   382  	case dst <- task:
   383  	}
   384  	return false
   385  }
   386  
   387  func (e *IndexNestedLoopHashJoin) newOuterWorker(innerCh chan *indexHashJoinTask) *indexHashJoinOuterWorker {
   388  	ow := &indexHashJoinOuterWorker{
   389  		outerWorker: outerWorker{
   390  			outerCtx:         e.outerCtx,
   391  			ctx:              e.ctx,
   392  			interlock:        e.children[0],
   393  			batchSize:        32,
   394  			maxBatchSize:     e.ctx.GetStochastikVars().IndexJoinBatchSize,
   395  			parentMemTracker: e.memTracker,
   396  			lookup:           &e.IndexLookUpJoin,
   397  		},
   398  		innerCh:        innerCh,
   399  		keepOuterOrder: e.keepOuterOrder,
   400  		taskCh:         e.taskCh,
   401  	}
   402  	return ow
   403  }
   404  
   405  func (e *IndexNestedLoopHashJoin) newInnerWorker(taskCh chan *indexHashJoinTask, workerID int) *indexHashJoinInnerWorker {
   406  	// Since multiple inner workers run concurrently, we should copy join's indexRanges for every worker to avoid data race.
   407  	copiedRanges := make([]*ranger.Range, 0, len(e.indexRanges))
   408  	for _, ran := range e.indexRanges {
   409  		copiedRanges = append(copiedRanges, ran.Clone())
   410  	}
   411  	var innerStats *innerWorkerRuntimeStats
   412  	if e.stats != nil {
   413  		innerStats = &e.stats.innerWorker
   414  	}
   415  	iw := &indexHashJoinInnerWorker{
   416  		innerWorker: innerWorker{
   417  			innerCtx:      e.innerCtx,
   418  			outerCtx:      e.outerCtx,
   419  			ctx:           e.ctx,
   420  			interlockChk:  chunk.NewChunkWithCapacity(e.innerCtx.rowTypes, e.maxChunkSize),
   421  			indexRanges:   copiedRanges,
   422  			keyOff2IdxOff: e.keyOff2IdxOff,
   423  			stats:         innerStats,
   424  		},
   425  		taskCh:            taskCh,
   426  		joiner:            e.joiners[workerID],
   427  		joinChkResourceCh: e.joinChkResourceCh[workerID],
   428  		resultCh:          e.resultCh,
   429  		matchedOuterPtrs:  make([]chunk.EventPtr, 0, e.maxChunkSize),
   430  		joinKeyBuf:        make([]byte, 1),
   431  		outerEventStatus:  make([]outerEventStatusFlag, 0, e.maxChunkSize),
   432  	}
   433  	if e.lastDefCausHelper != nil {
   434  		// nextCwf.TmpConstant needs to be reset for every individual
   435  		// inner worker to avoid data race when the inner workers is running
   436  		// concurrently.
   437  		nextCwf := *e.lastDefCausHelper
   438  		nextCwf.TmpConstant = make([]*memex.Constant, len(e.lastDefCausHelper.TmpConstant))
   439  		for i := range e.lastDefCausHelper.TmpConstant {
   440  			nextCwf.TmpConstant[i] = &memex.Constant{RetType: nextCwf.TargetDefCaus.RetType}
   441  		}
   442  		iw.nextDefCausCompareFilters = &nextCwf
   443  	}
   444  	return iw
   445  }
   446  
   447  func (iw *indexHashJoinInnerWorker) run(ctx context.Context, cancelFunc context.CancelFunc) {
   448  	defer trace.StartRegion(ctx, "IndexHashJoinInnerWorker").End()
   449  	var task *indexHashJoinTask
   450  	joinResult, ok := iw.getNewJoinResult(ctx)
   451  	if !ok {
   452  		cancelFunc()
   453  		return
   454  	}
   455  	h, resultCh := fnv.New64(), iw.resultCh
   456  	for {
   457  		select {
   458  		case <-ctx.Done():
   459  			return
   460  		case task, ok = <-iw.taskCh:
   461  		}
   462  		if !ok {
   463  			break
   464  		}
   465  		// We need to init resultCh before the err is returned.
   466  		if task.keepOuterOrder {
   467  			resultCh = task.resultCh
   468  		}
   469  		if task.err != nil {
   470  			joinResult.err = task.err
   471  			break
   472  		}
   473  		err := iw.handleTask(ctx, task, joinResult, h, resultCh)
   474  		if err != nil {
   475  			joinResult.err = err
   476  			break
   477  		}
   478  		if task.keepOuterOrder {
   479  			// We need to get a new result holder here because the old
   480  			// `joinResult` hash been sent to the `resultCh` or to the
   481  			// `joinChkResourceCh`.
   482  			joinResult, ok = iw.getNewJoinResult(ctx)
   483  			if !ok {
   484  				cancelFunc()
   485  				return
   486  			}
   487  		}
   488  	}
   489  	failpoint.Inject("testIndexHashJoinInnerWorkerErr", func() {
   490  		joinResult.err = errors.New("mocHoTTexHashJoinInnerWorkerErr")
   491  	})
   492  	if joinResult.err != nil {
   493  		resultCh <- joinResult
   494  		return
   495  	}
   496  	// When task.keepOuterOrder is TRUE(resultCh != iw.resultCh), the last
   497  	// joinResult will be checked when the a task has been processed, thus we do
   498  	// not need to check it here again.
   499  	if resultCh == iw.resultCh && joinResult.chk != nil && joinResult.chk.NumEvents() > 0 {
   500  		select {
   501  		case resultCh <- joinResult:
   502  		case <-ctx.Done():
   503  			return
   504  		}
   505  	}
   506  }
   507  
   508  func (iw *indexHashJoinInnerWorker) getNewJoinResult(ctx context.Context) (*indexHashJoinResult, bool) {
   509  	joinResult := &indexHashJoinResult{
   510  		src: iw.joinChkResourceCh,
   511  	}
   512  	ok := true
   513  	select {
   514  	case joinResult.chk, ok = <-iw.joinChkResourceCh:
   515  	case <-ctx.Done():
   516  		return nil, false
   517  	}
   518  	return joinResult, ok
   519  }
   520  
   521  func (iw *indexHashJoinInnerWorker) buildHashBlockForOuterResult(ctx context.Context, task *indexHashJoinTask, h hash.Hash64) {
   522  	if iw.stats != nil {
   523  		start := time.Now()
   524  		defer func() {
   525  			atomic.AddInt64(&iw.stats.build, int64(time.Since(start)))
   526  		}()
   527  	}
   528  	buf, numChks := make([]byte, 1), task.outerResult.NumChunks()
   529  	task.lookupMap = newUnsafeHashBlock(task.outerResult.Len())
   530  	for chkIdx := 0; chkIdx < numChks; chkIdx++ {
   531  		chk := task.outerResult.GetChunk(chkIdx)
   532  		numEvents := chk.NumEvents()
   533  	OUTER:
   534  		for rowIdx := 0; rowIdx < numEvents; rowIdx++ {
   535  			if task.outerMatch != nil && !task.outerMatch[chkIdx][rowIdx] {
   536  				continue
   537  			}
   538  			event := chk.GetEvent(rowIdx)
   539  			keyDefCausIdx := iw.outerCtx.keyDefCauss
   540  			for _, i := range keyDefCausIdx {
   541  				if event.IsNull(i) {
   542  					continue OUTER
   543  				}
   544  			}
   545  			h.Reset()
   546  			err := codec.HashChunkEvent(iw.ctx.GetStochastikVars().StmtCtx, h, event, iw.outerCtx.rowTypes, keyDefCausIdx, buf)
   547  			failpoint.Inject("testIndexHashJoinBuildErr", func() {
   548  				err = errors.New("mocHoTTexHashJoinBuildErr")
   549  			})
   550  			if err != nil {
   551  				// This panic will be recovered by the invoker.
   552  				panic(err.Error())
   553  			}
   554  			rowPtr := chunk.EventPtr{ChkIdx: uint32(chkIdx), EventIdx: uint32(rowIdx)}
   555  			task.lookupMap.Put(h.Sum64(), rowPtr)
   556  		}
   557  	}
   558  }
   559  
   560  func (iw *indexHashJoinInnerWorker) fetchInnerResults(ctx context.Context, task *lookUpJoinTask) error {
   561  	lookUpContents, err := iw.constructLookupContent(task)
   562  	if err != nil {
   563  		return err
   564  	}
   565  	return iw.innerWorker.fetchInnerResults(ctx, task, lookUpContents)
   566  }
   567  
   568  func (iw *indexHashJoinInnerWorker) handleHashJoinInnerWorkerPanic(r interface{}) {
   569  	if r != nil {
   570  		iw.resultCh <- &indexHashJoinResult{err: errors.Errorf("%v", r)}
   571  	}
   572  	iw.wg.Done()
   573  }
   574  
   575  func (iw *indexHashJoinInnerWorker) handleTask(ctx context.Context, task *indexHashJoinTask, joinResult *indexHashJoinResult, h hash.Hash64, resultCh chan *indexHashJoinResult) error {
   576  	var joinStartTime time.Time
   577  	if iw.stats != nil {
   578  		start := time.Now()
   579  		defer func() {
   580  			endTime := time.Now()
   581  			atomic.AddInt64(&iw.stats.totalTime, int64(endTime.Sub(start)))
   582  			atomic.AddInt64(&iw.stats.join, int64(endTime.Sub(joinStartTime)))
   583  		}()
   584  	}
   585  
   586  	iw.wg = &sync.WaitGroup{}
   587  	iw.wg.Add(1)
   588  	// TODO(XuHuaiyu): we may always use the smaller side to build the hashblock.
   589  	go soliton.WithRecovery(func() { iw.buildHashBlockForOuterResult(ctx, task, h) }, iw.handleHashJoinInnerWorkerPanic)
   590  	err := iw.fetchInnerResults(ctx, task.lookUpJoinTask)
   591  	if err != nil {
   592  		return err
   593  	}
   594  	iw.wg.Wait()
   595  
   596  	joinStartTime = time.Now()
   597  	if !task.keepOuterOrder {
   598  		return iw.doJoinUnordered(ctx, task, joinResult, h, resultCh)
   599  	}
   600  	return iw.doJoinInOrder(ctx, task, joinResult, h, resultCh)
   601  }
   602  
   603  func (iw *indexHashJoinInnerWorker) doJoinUnordered(ctx context.Context, task *indexHashJoinTask, joinResult *indexHashJoinResult, h hash.Hash64, resultCh chan *indexHashJoinResult) error {
   604  	var ok bool
   605  	iter := chunk.NewIterator4List(task.innerResult)
   606  	for event := iter.Begin(); event != iter.End(); event = iter.Next() {
   607  		ok, joinResult = iw.joinMatchedInnerEvent2Chunk(ctx, event, task, joinResult, h, iw.joinKeyBuf)
   608  		if !ok {
   609  			return errors.New("indexHashJoinInnerWorker.doJoinUnordered failed")
   610  		}
   611  	}
   612  	for chkIdx, outerEventStatus := range task.outerEventStatus {
   613  		chk := task.outerResult.GetChunk(chkIdx)
   614  		for rowIdx, val := range outerEventStatus {
   615  			if val == outerEventMatched {
   616  				continue
   617  			}
   618  			iw.joiner.onMissMatch(val == outerEventHasNull, chk.GetEvent(rowIdx), joinResult.chk)
   619  			if joinResult.chk.IsFull() {
   620  				select {
   621  				case resultCh <- joinResult:
   622  				case <-ctx.Done():
   623  					return ctx.Err()
   624  				}
   625  				joinResult, ok = iw.getNewJoinResult(ctx)
   626  				if !ok {
   627  					return errors.New("indexHashJoinInnerWorker.doJoinUnordered failed")
   628  				}
   629  			}
   630  		}
   631  	}
   632  	return nil
   633  }
   634  
   635  func (iw *indexHashJoinInnerWorker) getMatchedOuterEvents(innerEvent chunk.Event, task *indexHashJoinTask, h hash.Hash64, buf []byte) (matchedEvents []chunk.Event, matchedEventPtr []chunk.EventPtr, err error) {
   636  	h.Reset()
   637  	err = codec.HashChunkEvent(iw.ctx.GetStochastikVars().StmtCtx, h, innerEvent, iw.rowTypes, iw.keyDefCauss, buf)
   638  	if err != nil {
   639  		return nil, nil, err
   640  	}
   641  	iw.matchedOuterPtrs = task.lookupMap.Get(h.Sum64())
   642  	if len(iw.matchedOuterPtrs) == 0 {
   643  		return nil, nil, nil
   644  	}
   645  	joinType := JoinerType(iw.joiner)
   646  	isSemiJoin := joinType == causetembedded.SemiJoin || joinType == causetembedded.LeftOuterSemiJoin
   647  	matchedEvents = make([]chunk.Event, 0, len(iw.matchedOuterPtrs))
   648  	matchedEventPtr = make([]chunk.EventPtr, 0, len(iw.matchedOuterPtrs))
   649  	for _, ptr := range iw.matchedOuterPtrs {
   650  		outerEvent := task.outerResult.GetEvent(ptr)
   651  		ok, err := codec.EqualChunkEvent(iw.ctx.GetStochastikVars().StmtCtx, innerEvent, iw.rowTypes, iw.keyDefCauss, outerEvent, iw.outerCtx.rowTypes, iw.outerCtx.keyDefCauss)
   652  		if err != nil {
   653  			return nil, nil, err
   654  		}
   655  		if !ok || (task.outerEventStatus[ptr.ChkIdx][ptr.EventIdx] == outerEventMatched && isSemiJoin) {
   656  			continue
   657  		}
   658  		matchedEvents = append(matchedEvents, outerEvent)
   659  		matchedEventPtr = append(matchedEventPtr, chunk.EventPtr{ChkIdx: ptr.ChkIdx, EventIdx: ptr.EventIdx})
   660  	}
   661  	return matchedEvents, matchedEventPtr, nil
   662  }
   663  
   664  func (iw *indexHashJoinInnerWorker) joinMatchedInnerEvent2Chunk(ctx context.Context, innerEvent chunk.Event, task *indexHashJoinTask,
   665  	joinResult *indexHashJoinResult, h hash.Hash64, buf []byte) (bool, *indexHashJoinResult) {
   666  	matchedOuterEvents, matchedOuterEventPtr, err := iw.getMatchedOuterEvents(innerEvent, task, h, buf)
   667  	if err != nil {
   668  		joinResult.err = err
   669  		return false, joinResult
   670  	}
   671  	if len(matchedOuterEvents) == 0 {
   672  		return true, joinResult
   673  	}
   674  	var (
   675  		ok     bool
   676  		iter   = chunk.NewIterator4Slice(matchedOuterEvents)
   677  		cursor = 0
   678  	)
   679  	for iter.Begin(); iter.Current() != iter.End(); {
   680  		iw.outerEventStatus, err = iw.joiner.tryToMatchOuters(iter, innerEvent, joinResult.chk, iw.outerEventStatus)
   681  		if err != nil {
   682  			joinResult.err = err
   683  			return false, joinResult
   684  		}
   685  		for _, status := range iw.outerEventStatus {
   686  			chkIdx, rowIdx := matchedOuterEventPtr[cursor].ChkIdx, matchedOuterEventPtr[cursor].EventIdx
   687  			if status == outerEventMatched || task.outerEventStatus[chkIdx][rowIdx] == outerEventUnmatched {
   688  				task.outerEventStatus[chkIdx][rowIdx] = status
   689  			}
   690  			cursor++
   691  		}
   692  		if joinResult.chk.IsFull() {
   693  			select {
   694  			case iw.resultCh <- joinResult:
   695  			case <-ctx.Done():
   696  			}
   697  			joinResult, ok = iw.getNewJoinResult(ctx)
   698  			if !ok {
   699  				return false, joinResult
   700  			}
   701  		}
   702  	}
   703  	return true, joinResult
   704  }
   705  
   706  func (iw *indexHashJoinInnerWorker) defCauslectMatchedInnerPtrs4OuterEvents(ctx context.Context, innerEvent chunk.Event, innerEventPtr chunk.EventPtr,
   707  	task *indexHashJoinTask, h hash.Hash64, buf []byte) error {
   708  	_, matchedOuterEventIdx, err := iw.getMatchedOuterEvents(innerEvent, task, h, buf)
   709  	if err != nil {
   710  		return err
   711  	}
   712  	for _, outerEventPtr := range matchedOuterEventIdx {
   713  		chkIdx, rowIdx := outerEventPtr.ChkIdx, outerEventPtr.EventIdx
   714  		task.matchedInnerEventPtrs[chkIdx][rowIdx] = append(task.matchedInnerEventPtrs[chkIdx][rowIdx], innerEventPtr)
   715  	}
   716  	return nil
   717  }
   718  
   719  // doJoinInOrder follows the following steps:
   720  // 1. defCauslect all the matched inner event ptrs for every outer event
   721  // 2. do the join work
   722  //   2.1 defCauslect all the matched inner rows using the defCauslected ptrs for every outer event
   723  //   2.2 call tryToMatchInners for every outer event
   724  //   2.3 call onMissMatch when no inner rows are matched
   725  func (iw *indexHashJoinInnerWorker) doJoinInOrder(ctx context.Context, task *indexHashJoinTask, joinResult *indexHashJoinResult, h hash.Hash64, resultCh chan *indexHashJoinResult) (err error) {
   726  	defer func() {
   727  		if err == nil && joinResult.chk != nil {
   728  			if joinResult.chk.NumEvents() > 0 {
   729  				select {
   730  				case resultCh <- joinResult:
   731  				case <-ctx.Done():
   732  					return
   733  				}
   734  			} else {
   735  				joinResult.src <- joinResult.chk
   736  			}
   737  		}
   738  		close(resultCh)
   739  	}()
   740  	for i, numChunks := 0, task.innerResult.NumChunks(); i < numChunks; i++ {
   741  		for j, chk := 0, task.innerResult.GetChunk(i); j < chk.NumEvents(); j++ {
   742  			event := chk.GetEvent(j)
   743  			ptr := chunk.EventPtr{ChkIdx: uint32(i), EventIdx: uint32(j)}
   744  			err = iw.defCauslectMatchedInnerPtrs4OuterEvents(ctx, event, ptr, task, h, iw.joinKeyBuf)
   745  			if err != nil {
   746  				return err
   747  			}
   748  		}
   749  	}
   750  	// TODO: matchedInnerEventPtrs and matchedInnerEvents can be moved to inner worker.
   751  	matchedInnerEvents := make([]chunk.Event, len(task.matchedInnerEventPtrs))
   752  	var hasMatched, hasNull, ok bool
   753  	for chkIdx, innerEventPtrs4Chk := range task.matchedInnerEventPtrs {
   754  		for outerEventIdx, innerEventPtrs := range innerEventPtrs4Chk {
   755  			matchedInnerEvents, hasMatched, hasNull = matchedInnerEvents[:0], false, false
   756  			outerEvent := task.outerResult.GetChunk(chkIdx).GetEvent(outerEventIdx)
   757  			for _, ptr := range innerEventPtrs {
   758  				matchedInnerEvents = append(matchedInnerEvents, task.innerResult.GetEvent(ptr))
   759  			}
   760  			iter := chunk.NewIterator4Slice(matchedInnerEvents)
   761  			for iter.Begin(); iter.Current() != iter.End(); {
   762  				matched, isNull, err := iw.joiner.tryToMatchInners(outerEvent, iter, joinResult.chk)
   763  				if err != nil {
   764  					return err
   765  				}
   766  				hasMatched, hasNull = matched || hasMatched, isNull || hasNull
   767  				if joinResult.chk.IsFull() {
   768  					select {
   769  					case resultCh <- joinResult:
   770  					case <-ctx.Done():
   771  						return ctx.Err()
   772  					}
   773  					joinResult, ok = iw.getNewJoinResult(ctx)
   774  					if !ok {
   775  						return errors.New("indexHashJoinInnerWorker.doJoinInOrder failed")
   776  					}
   777  				}
   778  			}
   779  			if !hasMatched {
   780  				iw.joiner.onMissMatch(hasNull, outerEvent, joinResult.chk)
   781  			}
   782  		}
   783  	}
   784  	return nil
   785  }