github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/interlock/projection.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package interlock
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"runtime/trace"
    20  	"sync"
    21  	"sync/atomic"
    22  
    23  	"github.com/whtcorpsinc/errors"
    24  	"github.com/whtcorpsinc/milevadb/memex"
    25  	"github.com/whtcorpsinc/milevadb/stochastikctx"
    26  	"github.com/whtcorpsinc/milevadb/soliton"
    27  	"github.com/whtcorpsinc/milevadb/soliton/chunk"
    28  	"github.com/whtcorpsinc/milevadb/soliton/execdetails"
    29  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    30  	"github.com/whtcorpsinc/milevadb/soliton/memory"
    31  	"go.uber.org/zap"
    32  )
    33  
    34  // This file contains the implementation of the physical Projection Operator:
    35  // https://en.wikipedia.org/wiki/Projection_(relational_algebra)
    36  //
    37  // NOTE:
    38  // 1. The number of "projectionWorker" is controlled by the global stochastik
    39  //    variable "milevadb_projection_concurrency".
    40  // 2. Unparallel version is used when one of the following situations occurs:
    41  //    a. "milevadb_projection_concurrency" is set to 0.
    42  //    b. The estimated input size is smaller than "milevadb_max_chunk_size".
    43  //    c. This projection can not be executed vectorially.
    44  
    45  type projectionInput struct {
    46  	chk          *chunk.Chunk
    47  	targetWorker *projectionWorker
    48  }
    49  
    50  type projectionOutput struct {
    51  	chk  *chunk.Chunk
    52  	done chan error
    53  }
    54  
    55  // ProjectionInterDirc implements the physical Projection Operator:
    56  // https://en.wikipedia.org/wiki/Projection_(relational_algebra)
    57  type ProjectionInterDirc struct {
    58  	baseInterlockingDirectorate
    59  
    60  	evaluatorSuit *memex.EvaluatorSuite
    61  
    62  	finishCh    chan struct{}
    63  	outputCh    chan *projectionOutput
    64  	fetcher     projectionInputFetcher
    65  	numWorkers  int64
    66  	workers     []*projectionWorker
    67  	childResult *chunk.Chunk
    68  
    69  	// parentReqEvents indicates how many rows the parent interlock is
    70  	// requiring. It is set when parallelInterDircute() is called and used by the
    71  	// concurrent projectionInputFetcher.
    72  	//
    73  	// NOTE: It should be protected by atomic operations.
    74  	parentReqEvents int64
    75  
    76  	memTracker *memory.Tracker
    77  	wg         sync.WaitGroup
    78  
    79  	calculateNoDelay bool
    80  	prepared         bool
    81  }
    82  
    83  // Open implements the InterlockingDirectorate Open interface.
    84  func (e *ProjectionInterDirc) Open(ctx context.Context) error {
    85  	if err := e.baseInterlockingDirectorate.Open(ctx); err != nil {
    86  		return err
    87  	}
    88  	return e.open(ctx)
    89  }
    90  
    91  func (e *ProjectionInterDirc) open(ctx context.Context) error {
    92  	e.prepared = false
    93  	e.parentReqEvents = int64(e.maxChunkSize)
    94  
    95  	e.memTracker = memory.NewTracker(e.id, -1)
    96  	e.memTracker.AttachTo(e.ctx.GetStochastikVars().StmtCtx.MemTracker)
    97  
    98  	// For now a Projection can not be executed vectorially only because it
    99  	// contains "SetVar" or "GetVar" functions, in this scenario this
   100  	// Projection can not be executed parallelly.
   101  	if e.numWorkers > 0 && !e.evaluatorSuit.Vectorizable() {
   102  		e.numWorkers = 0
   103  	}
   104  
   105  	if e.isUnparallelInterDirc() {
   106  		e.childResult = newFirstChunk(e.children[0])
   107  		e.memTracker.Consume(e.childResult.MemoryUsage())
   108  	}
   109  
   110  	return nil
   111  }
   112  
   113  // Next implements the InterlockingDirectorate Next interface.
   114  //
   115  // Here we explain the execution flow of the parallel projection implementation.
   116  // There are 3 main components:
   117  //   1. "projectionInputFetcher": Fetch input "Chunk" from child.
   118  //   2. "projectionWorker":       Do the projection work.
   119  //   3. "ProjectionInterDirc.Next":    Return result to parent.
   120  //
   121  // 1. "projectionInputFetcher" gets its input and output resources from its
   122  // "inputCh" and "outputCh" channel, once the input and output resources are
   123  // abtained, it fetches child's result into "input.chk" and:
   124  //   a. Dispatches this input to the worker specified in "input.targetWorker"
   125  //   b. Dispatches this output to the main thread: "ProjectionInterDirc.Next"
   126  //   c. Dispatches this output to the worker specified in "input.targetWorker"
   127  // It is finished and exited once:
   128  //   a. There is no more input from child.
   129  //   b. "ProjectionInterDirc" close the "globalFinishCh"
   130  //
   131  // 2. "projectionWorker" gets its input and output resources from its
   132  // "inputCh" and "outputCh" channel, once the input and output resources are
   133  // abtained, it calculates the projection result use "input.chk" as the input
   134  // and "output.chk" as the output, once the calculation is done, it:
   135  //   a. Sends "nil" or error to "output.done" to mark this input is finished.
   136  //   b. Returns the "input" resource to "projectionInputFetcher.inputCh"
   137  // They are finished and exited once:
   138  //   a. "ProjectionInterDirc" closes the "globalFinishCh"
   139  //
   140  // 3. "ProjectionInterDirc.Next" gets its output resources from its "outputCh" channel.
   141  // After receiving an output from "outputCh", it should wait to receive a "nil"
   142  // or error from "output.done" channel. Once a "nil" or error is received:
   143  //   a. Returns this output to its parent
   144  //   b. Returns the "output" resource to "projectionInputFetcher.outputCh"
   145  //
   146  //  +-----------+----------------------+--------------------------+
   147  //  |           |                      |                          |
   148  //  |  +--------+---------+   +--------+---------+       +--------+---------+
   149  //  |  | projectionWorker |   + projectionWorker |  ...  + projectionWorker |
   150  //  |  +------------------+   +------------------+       +------------------+
   151  //  |       ^       ^              ^       ^                  ^       ^
   152  //  |       |       |              |       |                  |       |
   153  //  |    inputCh outputCh       inputCh outputCh           inputCh outputCh
   154  //  |       ^       ^              ^       ^                  ^       ^
   155  //  |       |       |              |       |                  |       |
   156  //  |                              |       |
   157  //  |                              |       +----------------->outputCh
   158  //  |                              |       |                      |
   159  //  |                              |       |                      v
   160  //  |                      +-------+-------+--------+   +---------------------+
   161  //  |                      | projectionInputFetcher |   | ProjectionInterDirc.Next |
   162  //  |                      +------------------------+   +---------+-----------+
   163  //  |                              ^       ^                      |
   164  //  |                              |       |                      |
   165  //  |                           inputCh outputCh                  |
   166  //  |                              ^       ^                      |
   167  //  |                              |       |                      |
   168  //  +------------------------------+       +----------------------+
   169  //
   170  func (e *ProjectionInterDirc) Next(ctx context.Context, req *chunk.Chunk) error {
   171  	req.GrowAndReset(e.maxChunkSize)
   172  	if e.isUnparallelInterDirc() {
   173  		return e.unParallelInterDircute(ctx, req)
   174  	}
   175  	return e.parallelInterDircute(ctx, req)
   176  
   177  }
   178  
   179  func (e *ProjectionInterDirc) isUnparallelInterDirc() bool {
   180  	return e.numWorkers <= 0
   181  }
   182  
   183  func (e *ProjectionInterDirc) unParallelInterDircute(ctx context.Context, chk *chunk.Chunk) error {
   184  	// transmit the requiredEvents
   185  	e.childResult.SetRequiredEvents(chk.RequiredEvents(), e.maxChunkSize)
   186  	mSize := e.childResult.MemoryUsage()
   187  	err := Next(ctx, e.children[0], e.childResult)
   188  	e.memTracker.Consume(e.childResult.MemoryUsage() - mSize)
   189  	if err != nil {
   190  		return err
   191  	}
   192  	if e.childResult.NumEvents() == 0 {
   193  		return nil
   194  	}
   195  	err = e.evaluatorSuit.Run(e.ctx, e.childResult, chk)
   196  	return err
   197  }
   198  
   199  func (e *ProjectionInterDirc) parallelInterDircute(ctx context.Context, chk *chunk.Chunk) error {
   200  	atomic.StoreInt64(&e.parentReqEvents, int64(chk.RequiredEvents()))
   201  	if !e.prepared {
   202  		e.prepare(ctx)
   203  		e.prepared = true
   204  	}
   205  
   206  	output, ok := <-e.outputCh
   207  	if !ok {
   208  		return nil
   209  	}
   210  
   211  	err := <-output.done
   212  	if err != nil {
   213  		return err
   214  	}
   215  	mSize := output.chk.MemoryUsage()
   216  	chk.SwapDeferredCausets(output.chk)
   217  	e.memTracker.Consume(output.chk.MemoryUsage() - mSize)
   218  	e.fetcher.outputCh <- output
   219  	return nil
   220  }
   221  
   222  func (e *ProjectionInterDirc) prepare(ctx context.Context) {
   223  	e.finishCh = make(chan struct{})
   224  	e.outputCh = make(chan *projectionOutput, e.numWorkers)
   225  
   226  	// Initialize projectionInputFetcher.
   227  	e.fetcher = projectionInputFetcher{
   228  		proj:           e,
   229  		child:          e.children[0],
   230  		globalFinishCh: e.finishCh,
   231  		globalOutputCh: e.outputCh,
   232  		inputCh:        make(chan *projectionInput, e.numWorkers),
   233  		outputCh:       make(chan *projectionOutput, e.numWorkers),
   234  	}
   235  
   236  	// Initialize projectionWorker.
   237  	e.workers = make([]*projectionWorker, 0, e.numWorkers)
   238  	for i := int64(0); i < e.numWorkers; i++ {
   239  		e.workers = append(e.workers, &projectionWorker{
   240  			proj:            e,
   241  			sctx:            e.ctx,
   242  			evaluatorSuit:   e.evaluatorSuit,
   243  			globalFinishCh:  e.finishCh,
   244  			inputGiveBackCh: e.fetcher.inputCh,
   245  			inputCh:         make(chan *projectionInput, 1),
   246  			outputCh:        make(chan *projectionOutput, 1),
   247  		})
   248  
   249  		inputChk := newFirstChunk(e.children[0])
   250  		e.memTracker.Consume(inputChk.MemoryUsage())
   251  		e.fetcher.inputCh <- &projectionInput{
   252  			chk:          inputChk,
   253  			targetWorker: e.workers[i],
   254  		}
   255  
   256  		outputChk := newFirstChunk(e)
   257  		e.memTracker.Consume(outputChk.MemoryUsage())
   258  		e.fetcher.outputCh <- &projectionOutput{
   259  			chk:  outputChk,
   260  			done: make(chan error, 1),
   261  		}
   262  	}
   263  
   264  	e.wg.Add(1)
   265  	go e.fetcher.run(ctx)
   266  
   267  	for i := range e.workers {
   268  		e.wg.Add(1)
   269  		go e.workers[i].run(ctx)
   270  	}
   271  }
   272  
   273  func (e *ProjectionInterDirc) drainInputCh(ch chan *projectionInput) {
   274  	close(ch)
   275  	for item := range ch {
   276  		if item.chk != nil {
   277  			e.memTracker.Consume(-item.chk.MemoryUsage())
   278  		}
   279  	}
   280  }
   281  
   282  func (e *ProjectionInterDirc) drainOutputCh(ch chan *projectionOutput) {
   283  	close(ch)
   284  	for item := range ch {
   285  		if item.chk != nil {
   286  			e.memTracker.Consume(-item.chk.MemoryUsage())
   287  		}
   288  	}
   289  }
   290  
   291  // Close implements the InterlockingDirectorate Close interface.
   292  func (e *ProjectionInterDirc) Close() error {
   293  	if e.isUnparallelInterDirc() {
   294  		e.memTracker.Consume(-e.childResult.MemoryUsage())
   295  		e.childResult = nil
   296  	}
   297  	if e.prepared {
   298  		close(e.finishCh)
   299  		e.wg.Wait() // Wait for fetcher and workers to finish and exit.
   300  
   301  		// clear fetcher
   302  		e.drainInputCh(e.fetcher.inputCh)
   303  		e.drainOutputCh(e.fetcher.outputCh)
   304  
   305  		// clear workers
   306  		for _, w := range e.workers {
   307  			e.drainInputCh(w.inputCh)
   308  			e.drainOutputCh(w.outputCh)
   309  		}
   310  	}
   311  	if e.baseInterlockingDirectorate.runtimeStats != nil {
   312  		runtimeStats := &execdetails.RuntimeStatsWithConcurrencyInfo{}
   313  		if e.isUnparallelInterDirc() {
   314  			runtimeStats.SetConcurrencyInfo(execdetails.NewConcurrencyInfo("Concurrency", 0))
   315  		} else {
   316  			runtimeStats.SetConcurrencyInfo(execdetails.NewConcurrencyInfo("Concurrency", int(e.numWorkers)))
   317  		}
   318  		e.ctx.GetStochastikVars().StmtCtx.RuntimeStatsDefCausl.RegisterStats(e.id, runtimeStats)
   319  	}
   320  	return e.baseInterlockingDirectorate.Close()
   321  }
   322  
   323  type projectionInputFetcher struct {
   324  	proj           *ProjectionInterDirc
   325  	child          InterlockingDirectorate
   326  	globalFinishCh <-chan struct{}
   327  	globalOutputCh chan<- *projectionOutput
   328  	wg             sync.WaitGroup
   329  
   330  	inputCh  chan *projectionInput
   331  	outputCh chan *projectionOutput
   332  }
   333  
   334  // run gets projectionInputFetcher's input and output resources from its
   335  // "inputCh" and "outputCh" channel, once the input and output resources are
   336  // abtained, it fetches child's result into "input.chk" and:
   337  //   a. Dispatches this input to the worker specified in "input.targetWorker"
   338  //   b. Dispatches this output to the main thread: "ProjectionInterDirc.Next"
   339  //   c. Dispatches this output to the worker specified in "input.targetWorker"
   340  //
   341  // It is finished and exited once:
   342  //   a. There is no more input from child.
   343  //   b. "ProjectionInterDirc" close the "globalFinishCh"
   344  func (f *projectionInputFetcher) run(ctx context.Context) {
   345  	defer trace.StartRegion(ctx, "ProjectionFetcher").End()
   346  	var output *projectionOutput
   347  	defer func() {
   348  		if r := recover(); r != nil {
   349  			recoveryProjection(output, r)
   350  		}
   351  		close(f.globalOutputCh)
   352  		f.proj.wg.Done()
   353  	}()
   354  
   355  	for {
   356  		input := readProjectionInput(f.inputCh, f.globalFinishCh)
   357  		if input == nil {
   358  			return
   359  		}
   360  		targetWorker := input.targetWorker
   361  
   362  		output = readProjectionOutput(f.outputCh, f.globalFinishCh)
   363  		if output == nil {
   364  			f.proj.memTracker.Consume(-input.chk.MemoryUsage())
   365  			return
   366  		}
   367  
   368  		f.globalOutputCh <- output
   369  
   370  		requiredEvents := atomic.LoadInt64(&f.proj.parentReqEvents)
   371  		input.chk.SetRequiredEvents(int(requiredEvents), f.proj.maxChunkSize)
   372  		mSize := input.chk.MemoryUsage()
   373  		err := Next(ctx, f.child, input.chk)
   374  		f.proj.memTracker.Consume(input.chk.MemoryUsage() - mSize)
   375  		if err != nil || input.chk.NumEvents() == 0 {
   376  			output.done <- err
   377  			f.proj.memTracker.Consume(-input.chk.MemoryUsage())
   378  			return
   379  		}
   380  
   381  		targetWorker.inputCh <- input
   382  		targetWorker.outputCh <- output
   383  	}
   384  }
   385  
   386  type projectionWorker struct {
   387  	proj            *ProjectionInterDirc
   388  	sctx            stochastikctx.Context
   389  	evaluatorSuit   *memex.EvaluatorSuite
   390  	globalFinishCh  <-chan struct{}
   391  	inputGiveBackCh chan<- *projectionInput
   392  
   393  	// channel "input" and "output" is :
   394  	// a. initialized by "ProjectionInterDirc.prepare"
   395  	// b. written	  by "projectionInputFetcher.run"
   396  	// c. read    	  by "projectionWorker.run"
   397  	inputCh  chan *projectionInput
   398  	outputCh chan *projectionOutput
   399  }
   400  
   401  // run gets projectionWorker's input and output resources from its
   402  // "inputCh" and "outputCh" channel, once the input and output resources are
   403  // abtained, it calculate the projection result use "input.chk" as the input
   404  // and "output.chk" as the output, once the calculation is done, it:
   405  //   a. Sends "nil" or error to "output.done" to mark this input is finished.
   406  //   b. Returns the "input" resource to "projectionInputFetcher.inputCh".
   407  //
   408  // It is finished and exited once:
   409  //   a. "ProjectionInterDirc" closes the "globalFinishCh".
   410  func (w *projectionWorker) run(ctx context.Context) {
   411  	defer trace.StartRegion(ctx, "ProjectionWorker").End()
   412  	var output *projectionOutput
   413  	defer func() {
   414  		if r := recover(); r != nil {
   415  			recoveryProjection(output, r)
   416  		}
   417  		w.proj.wg.Done()
   418  	}()
   419  	for {
   420  		input := readProjectionInput(w.inputCh, w.globalFinishCh)
   421  		if input == nil {
   422  			return
   423  		}
   424  
   425  		output = readProjectionOutput(w.outputCh, w.globalFinishCh)
   426  		if output == nil {
   427  			return
   428  		}
   429  
   430  		mSize := output.chk.MemoryUsage() + input.chk.MemoryUsage()
   431  		err := w.evaluatorSuit.Run(w.sctx, input.chk, output.chk)
   432  		w.proj.memTracker.Consume(output.chk.MemoryUsage() + input.chk.MemoryUsage() - mSize)
   433  		output.done <- err
   434  
   435  		if err != nil {
   436  			return
   437  		}
   438  
   439  		w.inputGiveBackCh <- input
   440  	}
   441  }
   442  
   443  func recoveryProjection(output *projectionOutput, r interface{}) {
   444  	if output != nil {
   445  		output.done <- errors.Errorf("%v", r)
   446  	}
   447  	buf := soliton.GetStack()
   448  	logutil.BgLogger().Error("projection interlock panicked", zap.String("error", fmt.Sprintf("%v", r)), zap.String("stack", string(buf)))
   449  }
   450  
   451  func readProjectionInput(inputCh <-chan *projectionInput, finishCh <-chan struct{}) *projectionInput {
   452  	select {
   453  	case <-finishCh:
   454  		return nil
   455  	case input, ok := <-inputCh:
   456  		if !ok {
   457  			return nil
   458  		}
   459  		return input
   460  	}
   461  }
   462  
   463  func readProjectionOutput(outputCh <-chan *projectionOutput, finishCh <-chan struct{}) *projectionOutput {
   464  	select {
   465  	case <-finishCh:
   466  		return nil
   467  	case output, ok := <-outputCh:
   468  		if !ok {
   469  			return nil
   470  		}
   471  		return output
   472  	}
   473  }