github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/interlock/parallel_apply.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package interlock
    15  
    16  import (
    17  	"context"
    18  	"runtime/trace"
    19  	"sync"
    20  	"sync/atomic"
    21  
    22  	"github.com/whtcorpsinc/errors"
    23  	"github.com/whtcorpsinc/BerolinaSQL/terror"
    24  	"github.com/whtcorpsinc/milevadb/memex"
    25  	"github.com/whtcorpsinc/milevadb/soliton/chunk"
    26  	"github.com/whtcorpsinc/milevadb/soliton/codec"
    27  	"github.com/whtcorpsinc/milevadb/soliton/execdetails"
    28  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    29  	"github.com/whtcorpsinc/milevadb/soliton/memory"
    30  	"go.uber.org/zap"
    31  )
    32  
    33  type result struct {
    34  	chk *chunk.Chunk
    35  	err error
    36  }
    37  
    38  type outerEvent struct {
    39  	event      *chunk.Event
    40  	selected bool // if this event is selected by the outer side
    41  }
    42  
    43  // ParallelNestedLoopApplyInterDirc is the interlock for apply.
    44  type ParallelNestedLoopApplyInterDirc struct {
    45  	baseInterlockingDirectorate
    46  
    47  	// outer-side fields
    48  	cursor        int
    49  	outerInterDirc     InterlockingDirectorate
    50  	outerFilter   memex.CNFExprs
    51  	outerList     *chunk.List
    52  	outerEventMutex sync.Mutex
    53  	outer         bool
    54  
    55  	// inner-side fields
    56  	// use slices since the inner side is paralleled
    57  	corDefCauss       [][]*memex.CorrelatedDeferredCauset
    58  	innerFilter   []memex.CNFExprs
    59  	innerInterDircs    []InterlockingDirectorate
    60  	innerList     []*chunk.List
    61  	innerChunk    []*chunk.Chunk
    62  	innerSelected [][]bool
    63  	innerIter     []chunk.Iterator
    64  	outerEvent      []*chunk.Event
    65  	hasMatch      []bool
    66  	hasNull       []bool
    67  	joiners       []joiner
    68  
    69  	// fields about concurrency control
    70  	concurrency int
    71  	started     uint32
    72  	freeChkCh   chan *chunk.Chunk
    73  	resultChkCh chan result
    74  	outerEventCh  chan outerEvent
    75  	exit        chan struct{}
    76  	workerWg    sync.WaitGroup
    77  	notifyWg    sync.WaitGroup
    78  
    79  	// fields about cache
    80  	cache              *applyCache
    81  	useCache           bool
    82  	cacheHitCounter    int64
    83  	cacheAccessCounter int64
    84  	cacheLock          sync.RWMutex
    85  
    86  	memTracker *memory.Tracker // track memory usage.
    87  }
    88  
    89  // Open implements the InterlockingDirectorate interface.
    90  func (e *ParallelNestedLoopApplyInterDirc) Open(ctx context.Context) error {
    91  	err := e.outerInterDirc.Open(ctx)
    92  	if err != nil {
    93  		return err
    94  	}
    95  	e.memTracker = memory.NewTracker(e.id, -1)
    96  	e.memTracker.AttachTo(e.ctx.GetStochastikVars().StmtCtx.MemTracker)
    97  
    98  	e.outerList = chunk.NewList(retTypes(e.outerInterDirc), e.initCap, e.maxChunkSize)
    99  	e.outerList.GetMemTracker().SetLabel(memory.LabelForOuterList)
   100  	e.outerList.GetMemTracker().AttachTo(e.memTracker)
   101  
   102  	e.innerList = make([]*chunk.List, e.concurrency)
   103  	e.innerChunk = make([]*chunk.Chunk, e.concurrency)
   104  	e.innerSelected = make([][]bool, e.concurrency)
   105  	e.innerIter = make([]chunk.Iterator, e.concurrency)
   106  	e.outerEvent = make([]*chunk.Event, e.concurrency)
   107  	e.hasMatch = make([]bool, e.concurrency)
   108  	e.hasNull = make([]bool, e.concurrency)
   109  	for i := 0; i < e.concurrency; i++ {
   110  		e.innerChunk[i] = newFirstChunk(e.innerInterDircs[i])
   111  		e.innerList[i] = chunk.NewList(retTypes(e.innerInterDircs[i]), e.initCap, e.maxChunkSize)
   112  		e.innerList[i].GetMemTracker().SetLabel(memory.LabelForInnerList)
   113  		e.innerList[i].GetMemTracker().AttachTo(e.memTracker)
   114  	}
   115  
   116  	e.freeChkCh = make(chan *chunk.Chunk, e.concurrency)
   117  	e.resultChkCh = make(chan result, e.concurrency+1) // innerWorkers + outerWorker
   118  	e.outerEventCh = make(chan outerEvent)
   119  	e.exit = make(chan struct{})
   120  	for i := 0; i < e.concurrency; i++ {
   121  		e.freeChkCh <- newFirstChunk(e)
   122  	}
   123  
   124  	if e.useCache {
   125  		if e.cache, err = newApplyCache(e.ctx); err != nil {
   126  			return err
   127  		}
   128  		e.cache.GetMemTracker().AttachTo(e.memTracker)
   129  	}
   130  	return nil
   131  }
   132  
   133  // Next implements the InterlockingDirectorate interface.
   134  func (e *ParallelNestedLoopApplyInterDirc) Next(ctx context.Context, req *chunk.Chunk) (err error) {
   135  	if atomic.CompareAndSwapUint32(&e.started, 0, 1) {
   136  		e.workerWg.Add(1)
   137  		go e.outerWorker(ctx)
   138  		for i := 0; i < e.concurrency; i++ {
   139  			e.workerWg.Add(1)
   140  			workID := i
   141  			go e.innerWorker(ctx, workID)
   142  		}
   143  		e.notifyWg.Add(1)
   144  		go e.notifyWorker(ctx)
   145  	}
   146  	result := <-e.resultChkCh
   147  	if result.err != nil {
   148  		return result.err
   149  	}
   150  	if result.chk == nil { // no more data
   151  		req.Reset()
   152  		return nil
   153  	}
   154  	req.SwapDeferredCausets(result.chk)
   155  	e.freeChkCh <- result.chk
   156  	return nil
   157  }
   158  
   159  // Close implements the InterlockingDirectorate interface.
   160  func (e *ParallelNestedLoopApplyInterDirc) Close() error {
   161  	e.memTracker = nil
   162  	err := e.outerInterDirc.Close()
   163  	if atomic.LoadUint32(&e.started) == 1 {
   164  		close(e.exit)
   165  		e.notifyWg.Wait()
   166  		e.started = 0
   167  	}
   168  
   169  	if e.runtimeStats != nil {
   170  		runtimeStats := newJoinRuntimeStats()
   171  		e.ctx.GetStochastikVars().StmtCtx.RuntimeStatsDefCausl.RegisterStats(e.id, runtimeStats)
   172  		if e.useCache {
   173  			var hitRatio float64
   174  			if e.cacheAccessCounter > 0 {
   175  				hitRatio = float64(e.cacheHitCounter) / float64(e.cacheAccessCounter)
   176  			}
   177  			runtimeStats.setCacheInfo(true, hitRatio)
   178  		} else {
   179  			runtimeStats.setCacheInfo(false, 0)
   180  		}
   181  		runtimeStats.SetConcurrencyInfo(execdetails.NewConcurrencyInfo("Concurrency", e.concurrency))
   182  	}
   183  	return err
   184  }
   185  
   186  // notifyWorker waits for all inner/outer-workers finishing and then put an empty
   187  // chunk into the resultCh to notify the upper interlock there is no more data.
   188  func (e *ParallelNestedLoopApplyInterDirc) notifyWorker(ctx context.Context) {
   189  	defer e.handleWorkerPanic(ctx, &e.notifyWg)
   190  	e.workerWg.Wait()
   191  	e.putResult(nil, nil)
   192  }
   193  
   194  func (e *ParallelNestedLoopApplyInterDirc) outerWorker(ctx context.Context) {
   195  	defer trace.StartRegion(ctx, "ParallelApplyOuterWorker").End()
   196  	defer e.handleWorkerPanic(ctx, &e.workerWg)
   197  	var selected []bool
   198  	var err error
   199  	for {
   200  		chk := newFirstChunk(e.outerInterDirc)
   201  		if err := Next(ctx, e.outerInterDirc, chk); err != nil {
   202  			e.putResult(nil, err)
   203  			return
   204  		}
   205  		if chk.NumEvents() == 0 {
   206  			close(e.outerEventCh)
   207  			return
   208  		}
   209  		e.outerList.Add(chk)
   210  		outerIter := chunk.NewIterator4Chunk(chk)
   211  		selected, err = memex.VectorizedFilter(e.ctx, e.outerFilter, outerIter, selected)
   212  		if err != nil {
   213  			e.putResult(nil, err)
   214  			return
   215  		}
   216  		for i := 0; i < chk.NumEvents(); i++ {
   217  			event := chk.GetEvent(i)
   218  			select {
   219  			case e.outerEventCh <- outerEvent{&event, selected[i]}:
   220  			case <-e.exit:
   221  				return
   222  			}
   223  		}
   224  	}
   225  }
   226  
   227  func (e *ParallelNestedLoopApplyInterDirc) innerWorker(ctx context.Context, id int) {
   228  	defer trace.StartRegion(ctx, "ParallelApplyInnerWorker").End()
   229  	defer e.handleWorkerPanic(ctx, &e.workerWg)
   230  	for {
   231  		var chk *chunk.Chunk
   232  		select {
   233  		case chk = <-e.freeChkCh:
   234  		case <-e.exit:
   235  			return
   236  		}
   237  		err := e.fillInnerChunk(ctx, id, chk)
   238  		if err == nil && chk.NumEvents() == 0 { // no more data, this goroutine can exit
   239  			return
   240  		}
   241  		if e.putResult(chk, err) {
   242  			return
   243  		}
   244  	}
   245  }
   246  
   247  func (e *ParallelNestedLoopApplyInterDirc) putResult(chk *chunk.Chunk, err error) (exit bool) {
   248  	select {
   249  	case e.resultChkCh <- result{chk, err}:
   250  		return false
   251  	case <-e.exit:
   252  		return true
   253  	}
   254  }
   255  
   256  func (e *ParallelNestedLoopApplyInterDirc) handleWorkerPanic(ctx context.Context, wg *sync.WaitGroup) {
   257  	if r := recover(); r != nil {
   258  		err := errors.Errorf("%v", r)
   259  		logutil.Logger(ctx).Error("parallel nested loop join worker panicked", zap.Error(err), zap.Stack("stack"))
   260  		e.resultChkCh <- result{nil, err}
   261  	}
   262  	if wg != nil {
   263  		wg.Done()
   264  	}
   265  }
   266  
   267  // fetchAllInners reads all data from the inner causet and stores them in a List.
   268  func (e *ParallelNestedLoopApplyInterDirc) fetchAllInners(ctx context.Context, id int) (err error) {
   269  	var key []byte
   270  	for _, defCaus := range e.corDefCauss[id] {
   271  		*defCaus.Data = e.outerEvent[id].GetCauset(defCaus.Index, defCaus.RetType)
   272  		if e.useCache {
   273  			if key, err = codec.EncodeKey(e.ctx.GetStochastikVars().StmtCtx, key, *defCaus.Data); err != nil {
   274  				return err
   275  			}
   276  		}
   277  	}
   278  	if e.useCache { // look up the cache
   279  		atomic.AddInt64(&e.cacheAccessCounter, 1)
   280  		e.cacheLock.RLock()
   281  		value, err := e.cache.Get(key)
   282  		e.cacheLock.RUnlock()
   283  		if err != nil {
   284  			return err
   285  		}
   286  		if value != nil {
   287  			e.innerList[id] = value
   288  			atomic.AddInt64(&e.cacheHitCounter, 1)
   289  			return nil
   290  		}
   291  	}
   292  
   293  	err = e.innerInterDircs[id].Open(ctx)
   294  	defer terror.Call(e.innerInterDircs[id].Close)
   295  	if err != nil {
   296  		return err
   297  	}
   298  
   299  	if e.useCache {
   300  		// create a new one in this case since it may be in the cache
   301  		e.innerList[id] = chunk.NewList(retTypes(e.innerInterDircs[id]), e.initCap, e.maxChunkSize)
   302  	} else {
   303  		e.innerList[id].Reset()
   304  	}
   305  
   306  	innerIter := chunk.NewIterator4Chunk(e.innerChunk[id])
   307  	for {
   308  		err := Next(ctx, e.innerInterDircs[id], e.innerChunk[id])
   309  		if err != nil {
   310  			return err
   311  		}
   312  		if e.innerChunk[id].NumEvents() == 0 {
   313  			break
   314  		}
   315  
   316  		e.innerSelected[id], err = memex.VectorizedFilter(e.ctx, e.innerFilter[id], innerIter, e.innerSelected[id])
   317  		if err != nil {
   318  			return err
   319  		}
   320  		for event := innerIter.Begin(); event != innerIter.End(); event = innerIter.Next() {
   321  			if e.innerSelected[id][event.Idx()] {
   322  				e.innerList[id].AppendEvent(event)
   323  			}
   324  		}
   325  	}
   326  
   327  	if e.useCache { // uFIDelate the cache
   328  		e.cacheLock.Lock()
   329  		defer e.cacheLock.Unlock()
   330  		if _, err := e.cache.Set(key, e.innerList[id]); err != nil {
   331  			return err
   332  		}
   333  	}
   334  	return nil
   335  }
   336  
   337  func (e *ParallelNestedLoopApplyInterDirc) fetchNextOuterEvent(id int, req *chunk.Chunk) (event *chunk.Event, exit bool) {
   338  	for {
   339  		select {
   340  		case outerEvent, ok := <-e.outerEventCh:
   341  			if !ok { // no more data
   342  				return nil, false
   343  			}
   344  			if !outerEvent.selected {
   345  				if e.outer {
   346  					e.joiners[id].onMissMatch(false, *outerEvent.event, req)
   347  					if req.IsFull() {
   348  						return nil, false
   349  					}
   350  				}
   351  				continue // try the next outer event
   352  			}
   353  			return outerEvent.event, false
   354  		case <-e.exit:
   355  			return nil, true
   356  		}
   357  	}
   358  }
   359  
   360  func (e *ParallelNestedLoopApplyInterDirc) fillInnerChunk(ctx context.Context, id int, req *chunk.Chunk) (err error) {
   361  	req.Reset()
   362  	for {
   363  		if e.innerIter[id] == nil || e.innerIter[id].Current() == e.innerIter[id].End() {
   364  			if e.outerEvent[id] != nil && !e.hasMatch[id] {
   365  				e.joiners[id].onMissMatch(e.hasNull[id], *e.outerEvent[id], req)
   366  			}
   367  			var exit bool
   368  			e.outerEvent[id], exit = e.fetchNextOuterEvent(id, req)
   369  			if exit || req.IsFull() || e.outerEvent[id] == nil {
   370  				return nil
   371  			}
   372  
   373  			e.hasMatch[id] = false
   374  			e.hasNull[id] = false
   375  
   376  			err = e.fetchAllInners(ctx, id)
   377  			if err != nil {
   378  				return err
   379  			}
   380  			e.innerIter[id] = chunk.NewIterator4List(e.innerList[id])
   381  			e.innerIter[id].Begin()
   382  		}
   383  
   384  		matched, isNull, err := e.joiners[id].tryToMatchInners(*e.outerEvent[id], e.innerIter[id], req)
   385  		e.hasMatch[id] = e.hasMatch[id] || matched
   386  		e.hasNull[id] = e.hasNull[id] || isNull
   387  
   388  		if err != nil || req.IsFull() {
   389  			return err
   390  		}
   391  	}
   392  }