github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowflow/row_based_flow.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rowflow
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"sync"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/flowinfra"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/rowexec"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/errors"
    25  )
    26  
    27  type rowBasedFlow struct {
    28  	*flowinfra.FlowBase
    29  
    30  	localStreams map[execinfrapb.StreamID]execinfra.RowReceiver
    31  }
    32  
    33  var _ flowinfra.Flow = &rowBasedFlow{}
    34  
    35  var rowBasedFlowPool = sync.Pool{
    36  	New: func() interface{} {
    37  		return &rowBasedFlow{}
    38  	},
    39  }
    40  
    41  // NewRowBasedFlow returns a row based flow using base as its FlowBase.
    42  func NewRowBasedFlow(base *flowinfra.FlowBase) flowinfra.Flow {
    43  	rbf := rowBasedFlowPool.Get().(*rowBasedFlow)
    44  	rbf.FlowBase = base
    45  	return rbf
    46  }
    47  
    48  // Setup if part of the flowinfra.Flow interface.
    49  func (f *rowBasedFlow) Setup(
    50  	ctx context.Context, spec *execinfrapb.FlowSpec, opt flowinfra.FuseOpt,
    51  ) (context.Context, error) {
    52  	var err error
    53  	ctx, err = f.FlowBase.Setup(ctx, spec, opt)
    54  	if err != nil {
    55  		return ctx, err
    56  	}
    57  	// First step: setup the input synchronizers for all processors.
    58  	inputSyncs, err := f.setupInputSyncs(ctx, spec, opt)
    59  	if err != nil {
    60  		return ctx, err
    61  	}
    62  
    63  	// Then, populate processors.
    64  	return ctx, f.setupProcessors(ctx, spec, inputSyncs)
    65  }
    66  
    67  // setupProcessors creates processors for each spec in f.spec, fusing processors
    68  // together when possible (when an upstream processor implements RowSource, only
    69  // has one output, and that output is a simple PASS_THROUGH output), and
    70  // populates f.processors with all created processors that weren't fused to and
    71  // thus need their own goroutine.
    72  func (f *rowBasedFlow) setupProcessors(
    73  	ctx context.Context, spec *execinfrapb.FlowSpec, inputSyncs [][]execinfra.RowSource,
    74  ) error {
    75  	processors := make([]execinfra.Processor, 0, len(spec.Processors))
    76  
    77  	// Populate processors: see which processors need their own goroutine and
    78  	// which are fused with their consumer.
    79  	for i := range spec.Processors {
    80  		pspec := &spec.Processors[i]
    81  		p, err := f.makeProcessor(ctx, pspec, inputSyncs[i])
    82  		if err != nil {
    83  			return err
    84  		}
    85  
    86  		// fuse will return true if we managed to fuse p with its consumer.
    87  		fuse := func() bool {
    88  			// If the processor implements RowSource try to hook it up directly to the
    89  			// input of a later processor.
    90  			source, ok := p.(execinfra.RowSource)
    91  			if !ok {
    92  				return false
    93  			}
    94  			if len(pspec.Output) != 1 {
    95  				// The processor has more than one output, use the normal routing
    96  				// machinery.
    97  				return false
    98  			}
    99  			ospec := &pspec.Output[0]
   100  			if ospec.Type != execinfrapb.OutputRouterSpec_PASS_THROUGH {
   101  				// The output is not pass-through and thus is being sent through a
   102  				// router.
   103  				return false
   104  			}
   105  			if len(ospec.Streams) != 1 {
   106  				// The output contains more than one stream.
   107  				return false
   108  			}
   109  
   110  			for pIdx, ps := range spec.Processors {
   111  				if pIdx <= i {
   112  					// Skip processors which have already been created.
   113  					continue
   114  				}
   115  				for inIdx, in := range ps.Input {
   116  					if len(in.Streams) == 1 {
   117  						if in.Streams[0].StreamID != ospec.Streams[0].StreamID {
   118  							continue
   119  						}
   120  						// We found a consumer to fuse our proc to.
   121  						inputSyncs[pIdx][inIdx] = source
   122  						return true
   123  					}
   124  					// ps has an input with multiple streams. This can be either a
   125  					// multiplexed RowChannel (in case of some unordered synchronizers)
   126  					// or an orderedSynchronizer (for other unordered synchronizers or
   127  					// ordered synchronizers). If it's a multiplexed RowChannel,
   128  					// then its inputs run in parallel, so there's no fusing with them.
   129  					// If it's an orderedSynchronizer, then we look inside it to see if
   130  					// the processor we're trying to fuse feeds into it.
   131  					orderedSync, ok := inputSyncs[pIdx][inIdx].(*orderedSynchronizer)
   132  					if !ok {
   133  						continue
   134  					}
   135  					// See if we can find a stream attached to the processor we're
   136  					// trying to fuse.
   137  					for sIdx, sspec := range in.Streams {
   138  						input := findProcByOutputStreamID(spec, sspec.StreamID)
   139  						if input == nil {
   140  							continue
   141  						}
   142  						if input.ProcessorID != pspec.ProcessorID {
   143  							continue
   144  						}
   145  						// Fuse the processor with this orderedSynchronizer.
   146  						orderedSync.sources[sIdx].src = source
   147  						return true
   148  					}
   149  				}
   150  			}
   151  			return false
   152  		}
   153  		if !fuse() {
   154  			processors = append(processors, p)
   155  		}
   156  	}
   157  	f.SetProcessors(processors)
   158  	return nil
   159  }
   160  
   161  // findProcByOutputStreamID looks in spec for a processor that has a
   162  // pass-through output router connected to the specified stream. Returns nil if
   163  // such a processor is not found.
   164  func findProcByOutputStreamID(
   165  	spec *execinfrapb.FlowSpec, streamID execinfrapb.StreamID,
   166  ) *execinfrapb.ProcessorSpec {
   167  	for i := range spec.Processors {
   168  		pspec := &spec.Processors[i]
   169  		if len(pspec.Output) > 1 {
   170  			// We don't have any processors with more than one output. But if we
   171  			// didn't, we couldn't fuse them, so ignore.
   172  			continue
   173  		}
   174  		ospec := &pspec.Output[0]
   175  		if ospec.Type != execinfrapb.OutputRouterSpec_PASS_THROUGH {
   176  			// The output is not pass-through and thus is being sent through a
   177  			// router.
   178  			continue
   179  		}
   180  		if len(ospec.Streams) != 1 {
   181  			panic(fmt.Sprintf("pass-through router with %d streams", len(ospec.Streams)))
   182  		}
   183  		if ospec.Streams[0].StreamID == streamID {
   184  			return pspec
   185  		}
   186  	}
   187  	return nil
   188  }
   189  
   190  func (f *rowBasedFlow) makeProcessor(
   191  	ctx context.Context, ps *execinfrapb.ProcessorSpec, inputs []execinfra.RowSource,
   192  ) (execinfra.Processor, error) {
   193  	if len(ps.Output) != 1 {
   194  		return nil, errors.Errorf("only single-output processors supported")
   195  	}
   196  	var output execinfra.RowReceiver
   197  	spec := &ps.Output[0]
   198  	if spec.Type == execinfrapb.OutputRouterSpec_PASS_THROUGH {
   199  		// There is no entity that corresponds to a pass-through router - we just
   200  		// use its output stream directly.
   201  		if len(spec.Streams) != 1 {
   202  			return nil, errors.Errorf("expected one stream for passthrough router")
   203  		}
   204  		var err error
   205  		output, err = f.setupOutboundStream(spec.Streams[0])
   206  		if err != nil {
   207  			return nil, err
   208  		}
   209  	} else {
   210  		r, err := f.setupRouter(spec)
   211  		if err != nil {
   212  			return nil, err
   213  		}
   214  		output = r
   215  		f.AddStartable(r)
   216  	}
   217  
   218  	// No output router or channel is safe to push rows to, unless the row won't
   219  	// be modified later by the thing that created it. No processor creates safe
   220  	// rows, either. So, we always wrap our outputs in copyingRowReceivers. These
   221  	// outputs aren't used at all if they are processors that get fused to their
   222  	// upstreams, though, which means that copyingRowReceivers are only used on
   223  	// non-fused processors like the output routers.
   224  
   225  	output = &copyingRowReceiver{RowReceiver: output}
   226  
   227  	outputs := []execinfra.RowReceiver{output}
   228  	proc, err := rowexec.NewProcessor(
   229  		ctx,
   230  		&f.FlowCtx,
   231  		ps.ProcessorID,
   232  		&ps.Core,
   233  		&ps.Post,
   234  		inputs,
   235  		outputs,
   236  		f.GetLocalProcessors(),
   237  	)
   238  	if err != nil {
   239  		return nil, err
   240  	}
   241  
   242  	// Initialize any routers (the setupRouter case above) and outboxes.
   243  	types := proc.OutputTypes()
   244  	rowRecv := output.(*copyingRowReceiver).RowReceiver
   245  	switch o := rowRecv.(type) {
   246  	case router:
   247  		o.init(ctx, &f.FlowCtx, types)
   248  	case *flowinfra.Outbox:
   249  		o.Init(types)
   250  	}
   251  	return proc, nil
   252  }
   253  
   254  // setupInputSyncs populates a slice of input syncs, one for each Processor in
   255  // f.Spec, each containing one RowSource for each input to that Processor.
   256  func (f *rowBasedFlow) setupInputSyncs(
   257  	ctx context.Context, spec *execinfrapb.FlowSpec, opt flowinfra.FuseOpt,
   258  ) ([][]execinfra.RowSource, error) {
   259  	inputSyncs := make([][]execinfra.RowSource, len(spec.Processors))
   260  	for pIdx, ps := range spec.Processors {
   261  		for _, is := range ps.Input {
   262  			if len(is.Streams) == 0 {
   263  				return nil, errors.Errorf("input sync with no streams")
   264  			}
   265  			var sync execinfra.RowSource
   266  			if is.Type != execinfrapb.InputSyncSpec_UNORDERED &&
   267  				is.Type != execinfrapb.InputSyncSpec_ORDERED {
   268  				return nil, errors.Errorf("unsupported input sync type %s", is.Type)
   269  			}
   270  
   271  			if is.Type == execinfrapb.InputSyncSpec_UNORDERED {
   272  				if opt == flowinfra.FuseNormally || len(is.Streams) == 1 {
   273  					// Unordered synchronizer: create a RowChannel for each input.
   274  
   275  					mrc := &execinfra.RowChannel{}
   276  					mrc.InitWithNumSenders(is.ColumnTypes, len(is.Streams))
   277  					for _, s := range is.Streams {
   278  						if err := f.setupInboundStream(ctx, s, mrc); err != nil {
   279  							return nil, err
   280  						}
   281  					}
   282  					sync = mrc
   283  				}
   284  			}
   285  			if sync == nil {
   286  				// We have an ordered synchronizer or an unordered one that we really
   287  				// want to fuse because of the FuseAggressively option. We'll create a
   288  				// RowChannel for each input for now, but the inputs might be fused with
   289  				// the orderedSynchronizer later (in which case the RowChannels will be
   290  				// dropped).
   291  				streams := make([]execinfra.RowSource, len(is.Streams))
   292  				for i, s := range is.Streams {
   293  					rowChan := &execinfra.RowChannel{}
   294  					rowChan.InitWithNumSenders(is.ColumnTypes, 1 /* numSenders */)
   295  					if err := f.setupInboundStream(ctx, s, rowChan); err != nil {
   296  						return nil, err
   297  					}
   298  					streams[i] = rowChan
   299  				}
   300  				var err error
   301  				ordering := sqlbase.NoOrdering
   302  				if is.Type == execinfrapb.InputSyncSpec_ORDERED {
   303  					ordering = execinfrapb.ConvertToColumnOrdering(is.Ordering)
   304  				}
   305  				sync, err = makeOrderedSync(ordering, f.EvalCtx, streams)
   306  				if err != nil {
   307  					return nil, err
   308  				}
   309  			}
   310  			inputSyncs[pIdx] = append(inputSyncs[pIdx], sync)
   311  		}
   312  	}
   313  	return inputSyncs, nil
   314  }
   315  
   316  // setupInboundStream adds a stream to the stream map (inboundStreams or
   317  // localStreams).
   318  func (f *rowBasedFlow) setupInboundStream(
   319  	ctx context.Context, spec execinfrapb.StreamEndpointSpec, receiver execinfra.RowReceiver,
   320  ) error {
   321  	sid := spec.StreamID
   322  	switch spec.Type {
   323  	case execinfrapb.StreamEndpointSpec_SYNC_RESPONSE:
   324  		return errors.Errorf("inbound stream of type SYNC_RESPONSE")
   325  
   326  	case execinfrapb.StreamEndpointSpec_REMOTE:
   327  		if err := f.CheckInboundStreamID(sid); err != nil {
   328  			return err
   329  		}
   330  		if log.V(2) {
   331  			log.Infof(ctx, "set up inbound stream %d", sid)
   332  		}
   333  		f.AddRemoteStream(sid, flowinfra.NewInboundStreamInfo(
   334  			flowinfra.RowInboundStreamHandler{RowReceiver: receiver},
   335  			f.GetWaitGroup(),
   336  		))
   337  
   338  	case execinfrapb.StreamEndpointSpec_LOCAL:
   339  		if _, found := f.localStreams[sid]; found {
   340  			return errors.Errorf("local stream %d has multiple consumers", sid)
   341  		}
   342  		if f.localStreams == nil {
   343  			f.localStreams = make(map[execinfrapb.StreamID]execinfra.RowReceiver)
   344  		}
   345  		f.localStreams[sid] = receiver
   346  
   347  	default:
   348  		return errors.Errorf("invalid stream type %d", spec.Type)
   349  	}
   350  
   351  	return nil
   352  }
   353  
   354  // setupOutboundStream sets up an output stream; if the stream is local, the
   355  // RowChannel is looked up in the localStreams map; otherwise an outgoing
   356  // mailbox is created.
   357  func (f *rowBasedFlow) setupOutboundStream(
   358  	spec execinfrapb.StreamEndpointSpec,
   359  ) (execinfra.RowReceiver, error) {
   360  	sid := spec.StreamID
   361  	switch spec.Type {
   362  	case execinfrapb.StreamEndpointSpec_SYNC_RESPONSE:
   363  		return f.GetSyncFlowConsumer(), nil
   364  
   365  	case execinfrapb.StreamEndpointSpec_REMOTE:
   366  		outbox := flowinfra.NewOutbox(&f.FlowCtx, spec.TargetNodeID, f.ID, sid)
   367  		f.AddStartable(outbox)
   368  		return outbox, nil
   369  
   370  	case execinfrapb.StreamEndpointSpec_LOCAL:
   371  		rowChan, found := f.localStreams[sid]
   372  		if !found {
   373  			return nil, errors.Errorf("unconnected inbound stream %d", sid)
   374  		}
   375  		// Once we "connect" a stream, we set the value in the map to nil.
   376  		if rowChan == nil {
   377  			return nil, errors.Errorf("stream %d has multiple connections", sid)
   378  		}
   379  		f.localStreams[sid] = nil
   380  		return rowChan, nil
   381  	default:
   382  		return nil, errors.Errorf("invalid stream type %d", spec.Type)
   383  	}
   384  }
   385  
   386  // setupRouter initializes a router and the outbound streams.
   387  //
   388  // Pass-through routers are not supported; they should be handled separately.
   389  func (f *rowBasedFlow) setupRouter(spec *execinfrapb.OutputRouterSpec) (router, error) {
   390  	streams := make([]execinfra.RowReceiver, len(spec.Streams))
   391  	for i := range spec.Streams {
   392  		var err error
   393  		streams[i], err = f.setupOutboundStream(spec.Streams[i])
   394  		if err != nil {
   395  			return nil, err
   396  		}
   397  	}
   398  	return makeRouter(spec, streams)
   399  }
   400  
   401  // IsVectorized is part of the flowinfra.Flow interface.
   402  func (f *rowBasedFlow) IsVectorized() bool {
   403  	return false
   404  }
   405  
   406  // Release releases this rowBasedFlow back to the pool.
   407  func (f *rowBasedFlow) Release() {
   408  	*f = rowBasedFlow{}
   409  	rowBasedFlowPool.Put(f)
   410  }
   411  
   412  // Cleanup is part of the flowinfra.Flow interface.
   413  func (f *rowBasedFlow) Cleanup(ctx context.Context) {
   414  	f.FlowBase.Cleanup(ctx)
   415  	f.Release()
   416  }
   417  
   418  type copyingRowReceiver struct {
   419  	execinfra.RowReceiver
   420  	alloc sqlbase.EncDatumRowAlloc
   421  }
   422  
   423  func (r *copyingRowReceiver) Push(
   424  	row sqlbase.EncDatumRow, meta *execinfrapb.ProducerMetadata,
   425  ) execinfra.ConsumerStatus {
   426  	if row != nil {
   427  		row = r.alloc.CopyRow(row)
   428  	}
   429  	return r.RowReceiver.Push(row, meta)
   430  }