github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/interlock/shuffle.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package interlock
    15  
    16  import (
    17  	"context"
    18  	"sync"
    19  
    20  	"github.com/whtcorpsinc/errors"
    21  	"github.com/whtcorpsinc/failpoint"
    22  	"github.com/whtcorpsinc/milevadb/memex"
    23  	"github.com/whtcorpsinc/milevadb/stochastikctx"
    24  	"github.com/whtcorpsinc/milevadb/soliton/chunk"
    25  	"github.com/whtcorpsinc/milevadb/soliton/execdetails"
    26  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    27  	"github.com/twmb/murmur3"
    28  	"go.uber.org/zap"
    29  )
    30  
    31  // ShuffleInterDirc is the interlock to run other interlocks in a parallel manner.
    32  //  1. It fetches chunks from `DataSource`.
    33  //  2. It splits tuples from `DataSource` into N partitions (Only "split by hash" is implemented so far).
    34  //  3. It invokes N workers in parallel, assign each partition as input to each worker and execute child interlocks.
    35  //  4. It defCauslects outputs from each worker, then sends outputs to its parent.
    36  //
    37  //                                +-------------+
    38  //                        +-------| Main Thread |
    39  //                        |       +------+------+
    40  //                        |              ^
    41  //                        |              |
    42  //                        |              +
    43  //                        v             +++
    44  //                 outputHolderCh       | | outputCh (1 x Concurrency)
    45  //                        v             +++
    46  //                        |              ^
    47  //                        |              |
    48  //                        |      +-------+-------+
    49  //                        v      |               |
    50  //                 +--------------+             +--------------+
    51  //          +----- |    worker    |   .......   |    worker    |  worker (N Concurrency): child interlock, eg. WindowInterDirc (+SortInterDirc)
    52  //          |      +------------+-+             +-+------------+
    53  //          |                 ^                 ^
    54  //          |                 |                 |
    55  //          |                +-+  +-+  ......  +-+
    56  //          |                | |  | |          | |
    57  //          |                ...  ...          ...  inputCh (Concurrency x 1)
    58  //          v                | |  | |          | |
    59  //    inputHolderCh          +++  +++          +++
    60  //          v                 ^    ^            ^
    61  //          |                 |    |            |
    62  //          |          +------o----+            |
    63  //          |          |      +-----------------+-----+
    64  //          |          |                              |
    65  //          |      +---+------------+------------+----+-----------+
    66  //          |      |              Partition Splitter              |
    67  //          |      +--------------+-+------------+-+--------------+
    68  //          |                             ^
    69  //          |                             |
    70  //          |             +---------------v-----------------+
    71  //          +---------->  |    fetch data from DataSource   |
    72  //                        +---------------------------------+
    73  //
    74  ////////////////////////////////////////////////////////////////////////////////////////
    75  type ShuffleInterDirc struct {
    76  	baseInterlockingDirectorate
    77  	concurrency int
    78  	workers     []*shuffleWorker
    79  
    80  	prepared bool
    81  	executed bool
    82  
    83  	splitter   partitionSplitter
    84  	dataSource InterlockingDirectorate
    85  
    86  	finishCh chan struct{}
    87  	outputCh chan *shuffleOutput
    88  }
    89  
    90  type shuffleOutput struct {
    91  	chk        *chunk.Chunk
    92  	err        error
    93  	giveBackCh chan *chunk.Chunk
    94  }
    95  
    96  // Open implements the InterlockingDirectorate Open interface.
    97  func (e *ShuffleInterDirc) Open(ctx context.Context) error {
    98  	if err := e.dataSource.Open(ctx); err != nil {
    99  		return err
   100  	}
   101  	if err := e.baseInterlockingDirectorate.Open(ctx); err != nil {
   102  		return err
   103  	}
   104  
   105  	e.prepared = false
   106  	e.finishCh = make(chan struct{}, 1)
   107  	e.outputCh = make(chan *shuffleOutput, e.concurrency)
   108  
   109  	for _, w := range e.workers {
   110  		w.finishCh = e.finishCh
   111  
   112  		w.inputCh = make(chan *chunk.Chunk, 1)
   113  		w.inputHolderCh = make(chan *chunk.Chunk, 1)
   114  		w.outputCh = e.outputCh
   115  		w.outputHolderCh = make(chan *chunk.Chunk, 1)
   116  
   117  		if err := w.childInterDirc.Open(ctx); err != nil {
   118  			return err
   119  		}
   120  
   121  		w.inputHolderCh <- newFirstChunk(e.dataSource)
   122  		w.outputHolderCh <- newFirstChunk(e)
   123  	}
   124  
   125  	return nil
   126  }
   127  
   128  // Close implements the InterlockingDirectorate Close interface.
   129  func (e *ShuffleInterDirc) Close() error {
   130  	if !e.prepared {
   131  		for _, w := range e.workers {
   132  			close(w.inputHolderCh)
   133  			close(w.inputCh)
   134  			close(w.outputHolderCh)
   135  		}
   136  		close(e.outputCh)
   137  	}
   138  	close(e.finishCh)
   139  	for _, w := range e.workers {
   140  		for range w.inputCh {
   141  		}
   142  	}
   143  	for range e.outputCh { // workers exit before `e.outputCh` is closed.
   144  	}
   145  	e.executed = false
   146  
   147  	if e.runtimeStats != nil {
   148  		runtimeStats := &execdetails.RuntimeStatsWithConcurrencyInfo{}
   149  		runtimeStats.SetConcurrencyInfo(execdetails.NewConcurrencyInfo("ShuffleConcurrency", e.concurrency))
   150  		e.ctx.GetStochastikVars().StmtCtx.RuntimeStatsDefCausl.RegisterStats(e.id, runtimeStats)
   151  	}
   152  
   153  	err := e.dataSource.Close()
   154  	err1 := e.baseInterlockingDirectorate.Close()
   155  	if err != nil {
   156  		return errors.Trace(err)
   157  	}
   158  	return errors.Trace(err1)
   159  }
   160  
   161  func (e *ShuffleInterDirc) prepare4ParallelInterDirc(ctx context.Context) {
   162  	go e.fetchDataAndSplit(ctx)
   163  
   164  	waitGroup := &sync.WaitGroup{}
   165  	waitGroup.Add(len(e.workers))
   166  	for _, w := range e.workers {
   167  		go w.run(ctx, waitGroup)
   168  	}
   169  
   170  	go e.waitWorkerAndCloseOutput(waitGroup)
   171  }
   172  
   173  func (e *ShuffleInterDirc) waitWorkerAndCloseOutput(waitGroup *sync.WaitGroup) {
   174  	waitGroup.Wait()
   175  	close(e.outputCh)
   176  }
   177  
   178  // Next implements the InterlockingDirectorate Next interface.
   179  func (e *ShuffleInterDirc) Next(ctx context.Context, req *chunk.Chunk) error {
   180  	req.Reset()
   181  	if !e.prepared {
   182  		e.prepare4ParallelInterDirc(ctx)
   183  		e.prepared = true
   184  	}
   185  
   186  	failpoint.Inject("shuffleError", func(val failpoint.Value) {
   187  		if val.(bool) {
   188  			failpoint.Return(errors.New("ShuffleInterDirc.Next error"))
   189  		}
   190  	})
   191  
   192  	if e.executed {
   193  		return nil
   194  	}
   195  
   196  	result, ok := <-e.outputCh
   197  	if !ok {
   198  		e.executed = true
   199  		return nil
   200  	}
   201  	if result.err != nil {
   202  		return result.err
   203  	}
   204  	req.SwapDeferredCausets(result.chk) // `shuffleWorker` will not send an empty `result.chk` to `e.outputCh`.
   205  	result.giveBackCh <- result.chk
   206  
   207  	return nil
   208  }
   209  
   210  func recoveryShuffleInterDirc(output chan *shuffleOutput, r interface{}) {
   211  	err := errors.Errorf("%v", r)
   212  	output <- &shuffleOutput{err: errors.Errorf("%v", r)}
   213  	logutil.BgLogger().Error("shuffle panicked", zap.Error(err), zap.Stack("stack"))
   214  }
   215  
   216  func (e *ShuffleInterDirc) fetchDataAndSplit(ctx context.Context) {
   217  	var (
   218  		err           error
   219  		workerIndices []int
   220  	)
   221  	results := make([]*chunk.Chunk, len(e.workers))
   222  	chk := newFirstChunk(e.dataSource)
   223  
   224  	defer func() {
   225  		if r := recover(); r != nil {
   226  			recoveryShuffleInterDirc(e.outputCh, r)
   227  		}
   228  		for _, w := range e.workers {
   229  			close(w.inputCh)
   230  		}
   231  	}()
   232  
   233  	for {
   234  		err = Next(ctx, e.dataSource, chk)
   235  		if err != nil {
   236  			e.outputCh <- &shuffleOutput{err: err}
   237  			return
   238  		}
   239  		if chk.NumEvents() == 0 {
   240  			break
   241  		}
   242  
   243  		workerIndices, err = e.splitter.split(e.ctx, chk, workerIndices)
   244  		if err != nil {
   245  			e.outputCh <- &shuffleOutput{err: err}
   246  			return
   247  		}
   248  		numEvents := chk.NumEvents()
   249  		for i := 0; i < numEvents; i++ {
   250  			workerIdx := workerIndices[i]
   251  			w := e.workers[workerIdx]
   252  
   253  			if results[workerIdx] == nil {
   254  				select {
   255  				case <-e.finishCh:
   256  					return
   257  				case results[workerIdx] = <-w.inputHolderCh:
   258  					break
   259  				}
   260  			}
   261  			results[workerIdx].AppendEvent(chk.GetEvent(i))
   262  			if results[workerIdx].IsFull() {
   263  				w.inputCh <- results[workerIdx]
   264  				results[workerIdx] = nil
   265  			}
   266  		}
   267  	}
   268  	for i, w := range e.workers {
   269  		if results[i] != nil {
   270  			w.inputCh <- results[i]
   271  			results[i] = nil
   272  		}
   273  	}
   274  }
   275  
   276  var _ InterlockingDirectorate = &shuffleWorker{}
   277  
   278  // shuffleWorker is the multi-thread worker executing child interlocks within "partition".
   279  type shuffleWorker struct {
   280  	baseInterlockingDirectorate
   281  	childInterDirc InterlockingDirectorate
   282  
   283  	finishCh <-chan struct{}
   284  	executed bool
   285  
   286  	// Workers get inputs from dataFetcherThread by `inputCh`,
   287  	//   and output results to main thread by `outputCh`.
   288  	// `inputHolderCh` and `outputHolderCh` are "Chunk Holder" channels of `inputCh` and `outputCh` respectively,
   289  	//   which give the `*Chunk` back, to implement the data transport in a streaming manner.
   290  	inputCh        chan *chunk.Chunk
   291  	inputHolderCh  chan *chunk.Chunk
   292  	outputCh       chan *shuffleOutput
   293  	outputHolderCh chan *chunk.Chunk
   294  }
   295  
   296  // Open implements the InterlockingDirectorate Open interface.
   297  func (e *shuffleWorker) Open(ctx context.Context) error {
   298  	if err := e.baseInterlockingDirectorate.Open(ctx); err != nil {
   299  		return err
   300  	}
   301  	e.executed = false
   302  	return nil
   303  }
   304  
   305  // Close implements the InterlockingDirectorate Close interface.
   306  func (e *shuffleWorker) Close() error {
   307  	return errors.Trace(e.baseInterlockingDirectorate.Close())
   308  }
   309  
   310  // Next implements the InterlockingDirectorate Next interface.
   311  // It is called by `Tail` interlock within "shuffle", to fetch data from `DataSource` by `inputCh`.
   312  func (e *shuffleWorker) Next(ctx context.Context, req *chunk.Chunk) error {
   313  	req.Reset()
   314  	if e.executed {
   315  		return nil
   316  	}
   317  	select {
   318  	case <-e.finishCh:
   319  		e.executed = true
   320  		return nil
   321  	case result, ok := <-e.inputCh:
   322  		if !ok || result.NumEvents() == 0 {
   323  			e.executed = true
   324  			return nil
   325  		}
   326  		req.SwapDeferredCausets(result)
   327  		e.inputHolderCh <- result
   328  		return nil
   329  	}
   330  }
   331  
   332  func (e *shuffleWorker) run(ctx context.Context, waitGroup *sync.WaitGroup) {
   333  	defer func() {
   334  		if r := recover(); r != nil {
   335  			recoveryShuffleInterDirc(e.outputCh, r)
   336  		}
   337  		waitGroup.Done()
   338  	}()
   339  
   340  	for {
   341  		select {
   342  		case <-e.finishCh:
   343  			return
   344  		case chk := <-e.outputHolderCh:
   345  			if err := Next(ctx, e.childInterDirc, chk); err != nil {
   346  				e.outputCh <- &shuffleOutput{err: err}
   347  				return
   348  			}
   349  
   350  			// Should not send an empty `chk` to `e.outputCh`.
   351  			if chk.NumEvents() == 0 {
   352  				return
   353  			}
   354  			e.outputCh <- &shuffleOutput{chk: chk, giveBackCh: e.outputHolderCh}
   355  		}
   356  	}
   357  }
   358  
   359  var _ partitionSplitter = &partitionHashSplitter{}
   360  
   361  type partitionSplitter interface {
   362  	split(ctx stochastikctx.Context, input *chunk.Chunk, workerIndices []int) ([]int, error)
   363  }
   364  
   365  type partitionHashSplitter struct {
   366  	byItems    []memex.Expression
   367  	numWorkers int
   368  	hashKeys   [][]byte
   369  }
   370  
   371  func (s *partitionHashSplitter) split(ctx stochastikctx.Context, input *chunk.Chunk, workerIndices []int) ([]int, error) {
   372  	var err error
   373  	s.hashKeys, err = getGroupKey(ctx, input, s.hashKeys, s.byItems)
   374  	if err != nil {
   375  		return workerIndices, err
   376  	}
   377  	workerIndices = workerIndices[:0]
   378  	numEvents := input.NumEvents()
   379  	for i := 0; i < numEvents; i++ {
   380  		workerIndices = append(workerIndices, int(murmur3.Sum32(s.hashKeys[i]))%s.numWorkers)
   381  	}
   382  	return workerIndices, nil
   383  }