github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowflow/row_based_flow.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rowflow 12 13 import ( 14 "context" 15 "fmt" 16 "sync" 17 18 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 19 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 20 "github.com/cockroachdb/cockroach/pkg/sql/flowinfra" 21 "github.com/cockroachdb/cockroach/pkg/sql/rowexec" 22 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 23 "github.com/cockroachdb/cockroach/pkg/util/log" 24 "github.com/cockroachdb/errors" 25 ) 26 27 type rowBasedFlow struct { 28 *flowinfra.FlowBase 29 30 localStreams map[execinfrapb.StreamID]execinfra.RowReceiver 31 } 32 33 var _ flowinfra.Flow = &rowBasedFlow{} 34 35 var rowBasedFlowPool = sync.Pool{ 36 New: func() interface{} { 37 return &rowBasedFlow{} 38 }, 39 } 40 41 // NewRowBasedFlow returns a row based flow using base as its FlowBase. 42 func NewRowBasedFlow(base *flowinfra.FlowBase) flowinfra.Flow { 43 rbf := rowBasedFlowPool.Get().(*rowBasedFlow) 44 rbf.FlowBase = base 45 return rbf 46 } 47 48 // Setup if part of the flowinfra.Flow interface. 49 func (f *rowBasedFlow) Setup( 50 ctx context.Context, spec *execinfrapb.FlowSpec, opt flowinfra.FuseOpt, 51 ) (context.Context, error) { 52 var err error 53 ctx, err = f.FlowBase.Setup(ctx, spec, opt) 54 if err != nil { 55 return ctx, err 56 } 57 // First step: setup the input synchronizers for all processors. 58 inputSyncs, err := f.setupInputSyncs(ctx, spec, opt) 59 if err != nil { 60 return ctx, err 61 } 62 63 // Then, populate processors. 64 return ctx, f.setupProcessors(ctx, spec, inputSyncs) 65 } 66 67 // setupProcessors creates processors for each spec in f.spec, fusing processors 68 // together when possible (when an upstream processor implements RowSource, only 69 // has one output, and that output is a simple PASS_THROUGH output), and 70 // populates f.processors with all created processors that weren't fused to and 71 // thus need their own goroutine. 72 func (f *rowBasedFlow) setupProcessors( 73 ctx context.Context, spec *execinfrapb.FlowSpec, inputSyncs [][]execinfra.RowSource, 74 ) error { 75 processors := make([]execinfra.Processor, 0, len(spec.Processors)) 76 77 // Populate processors: see which processors need their own goroutine and 78 // which are fused with their consumer. 79 for i := range spec.Processors { 80 pspec := &spec.Processors[i] 81 p, err := f.makeProcessor(ctx, pspec, inputSyncs[i]) 82 if err != nil { 83 return err 84 } 85 86 // fuse will return true if we managed to fuse p with its consumer. 87 fuse := func() bool { 88 // If the processor implements RowSource try to hook it up directly to the 89 // input of a later processor. 90 source, ok := p.(execinfra.RowSource) 91 if !ok { 92 return false 93 } 94 if len(pspec.Output) != 1 { 95 // The processor has more than one output, use the normal routing 96 // machinery. 97 return false 98 } 99 ospec := &pspec.Output[0] 100 if ospec.Type != execinfrapb.OutputRouterSpec_PASS_THROUGH { 101 // The output is not pass-through and thus is being sent through a 102 // router. 103 return false 104 } 105 if len(ospec.Streams) != 1 { 106 // The output contains more than one stream. 107 return false 108 } 109 110 for pIdx, ps := range spec.Processors { 111 if pIdx <= i { 112 // Skip processors which have already been created. 113 continue 114 } 115 for inIdx, in := range ps.Input { 116 if len(in.Streams) == 1 { 117 if in.Streams[0].StreamID != ospec.Streams[0].StreamID { 118 continue 119 } 120 // We found a consumer to fuse our proc to. 121 inputSyncs[pIdx][inIdx] = source 122 return true 123 } 124 // ps has an input with multiple streams. This can be either a 125 // multiplexed RowChannel (in case of some unordered synchronizers) 126 // or an orderedSynchronizer (for other unordered synchronizers or 127 // ordered synchronizers). If it's a multiplexed RowChannel, 128 // then its inputs run in parallel, so there's no fusing with them. 129 // If it's an orderedSynchronizer, then we look inside it to see if 130 // the processor we're trying to fuse feeds into it. 131 orderedSync, ok := inputSyncs[pIdx][inIdx].(*orderedSynchronizer) 132 if !ok { 133 continue 134 } 135 // See if we can find a stream attached to the processor we're 136 // trying to fuse. 137 for sIdx, sspec := range in.Streams { 138 input := findProcByOutputStreamID(spec, sspec.StreamID) 139 if input == nil { 140 continue 141 } 142 if input.ProcessorID != pspec.ProcessorID { 143 continue 144 } 145 // Fuse the processor with this orderedSynchronizer. 146 orderedSync.sources[sIdx].src = source 147 return true 148 } 149 } 150 } 151 return false 152 } 153 if !fuse() { 154 processors = append(processors, p) 155 } 156 } 157 f.SetProcessors(processors) 158 return nil 159 } 160 161 // findProcByOutputStreamID looks in spec for a processor that has a 162 // pass-through output router connected to the specified stream. Returns nil if 163 // such a processor is not found. 164 func findProcByOutputStreamID( 165 spec *execinfrapb.FlowSpec, streamID execinfrapb.StreamID, 166 ) *execinfrapb.ProcessorSpec { 167 for i := range spec.Processors { 168 pspec := &spec.Processors[i] 169 if len(pspec.Output) > 1 { 170 // We don't have any processors with more than one output. But if we 171 // didn't, we couldn't fuse them, so ignore. 172 continue 173 } 174 ospec := &pspec.Output[0] 175 if ospec.Type != execinfrapb.OutputRouterSpec_PASS_THROUGH { 176 // The output is not pass-through and thus is being sent through a 177 // router. 178 continue 179 } 180 if len(ospec.Streams) != 1 { 181 panic(fmt.Sprintf("pass-through router with %d streams", len(ospec.Streams))) 182 } 183 if ospec.Streams[0].StreamID == streamID { 184 return pspec 185 } 186 } 187 return nil 188 } 189 190 func (f *rowBasedFlow) makeProcessor( 191 ctx context.Context, ps *execinfrapb.ProcessorSpec, inputs []execinfra.RowSource, 192 ) (execinfra.Processor, error) { 193 if len(ps.Output) != 1 { 194 return nil, errors.Errorf("only single-output processors supported") 195 } 196 var output execinfra.RowReceiver 197 spec := &ps.Output[0] 198 if spec.Type == execinfrapb.OutputRouterSpec_PASS_THROUGH { 199 // There is no entity that corresponds to a pass-through router - we just 200 // use its output stream directly. 201 if len(spec.Streams) != 1 { 202 return nil, errors.Errorf("expected one stream for passthrough router") 203 } 204 var err error 205 output, err = f.setupOutboundStream(spec.Streams[0]) 206 if err != nil { 207 return nil, err 208 } 209 } else { 210 r, err := f.setupRouter(spec) 211 if err != nil { 212 return nil, err 213 } 214 output = r 215 f.AddStartable(r) 216 } 217 218 // No output router or channel is safe to push rows to, unless the row won't 219 // be modified later by the thing that created it. No processor creates safe 220 // rows, either. So, we always wrap our outputs in copyingRowReceivers. These 221 // outputs aren't used at all if they are processors that get fused to their 222 // upstreams, though, which means that copyingRowReceivers are only used on 223 // non-fused processors like the output routers. 224 225 output = ©ingRowReceiver{RowReceiver: output} 226 227 outputs := []execinfra.RowReceiver{output} 228 proc, err := rowexec.NewProcessor( 229 ctx, 230 &f.FlowCtx, 231 ps.ProcessorID, 232 &ps.Core, 233 &ps.Post, 234 inputs, 235 outputs, 236 f.GetLocalProcessors(), 237 ) 238 if err != nil { 239 return nil, err 240 } 241 242 // Initialize any routers (the setupRouter case above) and outboxes. 243 types := proc.OutputTypes() 244 rowRecv := output.(*copyingRowReceiver).RowReceiver 245 switch o := rowRecv.(type) { 246 case router: 247 o.init(ctx, &f.FlowCtx, types) 248 case *flowinfra.Outbox: 249 o.Init(types) 250 } 251 return proc, nil 252 } 253 254 // setupInputSyncs populates a slice of input syncs, one for each Processor in 255 // f.Spec, each containing one RowSource for each input to that Processor. 256 func (f *rowBasedFlow) setupInputSyncs( 257 ctx context.Context, spec *execinfrapb.FlowSpec, opt flowinfra.FuseOpt, 258 ) ([][]execinfra.RowSource, error) { 259 inputSyncs := make([][]execinfra.RowSource, len(spec.Processors)) 260 for pIdx, ps := range spec.Processors { 261 for _, is := range ps.Input { 262 if len(is.Streams) == 0 { 263 return nil, errors.Errorf("input sync with no streams") 264 } 265 var sync execinfra.RowSource 266 if is.Type != execinfrapb.InputSyncSpec_UNORDERED && 267 is.Type != execinfrapb.InputSyncSpec_ORDERED { 268 return nil, errors.Errorf("unsupported input sync type %s", is.Type) 269 } 270 271 if is.Type == execinfrapb.InputSyncSpec_UNORDERED { 272 if opt == flowinfra.FuseNormally || len(is.Streams) == 1 { 273 // Unordered synchronizer: create a RowChannel for each input. 274 275 mrc := &execinfra.RowChannel{} 276 mrc.InitWithNumSenders(is.ColumnTypes, len(is.Streams)) 277 for _, s := range is.Streams { 278 if err := f.setupInboundStream(ctx, s, mrc); err != nil { 279 return nil, err 280 } 281 } 282 sync = mrc 283 } 284 } 285 if sync == nil { 286 // We have an ordered synchronizer or an unordered one that we really 287 // want to fuse because of the FuseAggressively option. We'll create a 288 // RowChannel for each input for now, but the inputs might be fused with 289 // the orderedSynchronizer later (in which case the RowChannels will be 290 // dropped). 291 streams := make([]execinfra.RowSource, len(is.Streams)) 292 for i, s := range is.Streams { 293 rowChan := &execinfra.RowChannel{} 294 rowChan.InitWithNumSenders(is.ColumnTypes, 1 /* numSenders */) 295 if err := f.setupInboundStream(ctx, s, rowChan); err != nil { 296 return nil, err 297 } 298 streams[i] = rowChan 299 } 300 var err error 301 ordering := sqlbase.NoOrdering 302 if is.Type == execinfrapb.InputSyncSpec_ORDERED { 303 ordering = execinfrapb.ConvertToColumnOrdering(is.Ordering) 304 } 305 sync, err = makeOrderedSync(ordering, f.EvalCtx, streams) 306 if err != nil { 307 return nil, err 308 } 309 } 310 inputSyncs[pIdx] = append(inputSyncs[pIdx], sync) 311 } 312 } 313 return inputSyncs, nil 314 } 315 316 // setupInboundStream adds a stream to the stream map (inboundStreams or 317 // localStreams). 318 func (f *rowBasedFlow) setupInboundStream( 319 ctx context.Context, spec execinfrapb.StreamEndpointSpec, receiver execinfra.RowReceiver, 320 ) error { 321 sid := spec.StreamID 322 switch spec.Type { 323 case execinfrapb.StreamEndpointSpec_SYNC_RESPONSE: 324 return errors.Errorf("inbound stream of type SYNC_RESPONSE") 325 326 case execinfrapb.StreamEndpointSpec_REMOTE: 327 if err := f.CheckInboundStreamID(sid); err != nil { 328 return err 329 } 330 if log.V(2) { 331 log.Infof(ctx, "set up inbound stream %d", sid) 332 } 333 f.AddRemoteStream(sid, flowinfra.NewInboundStreamInfo( 334 flowinfra.RowInboundStreamHandler{RowReceiver: receiver}, 335 f.GetWaitGroup(), 336 )) 337 338 case execinfrapb.StreamEndpointSpec_LOCAL: 339 if _, found := f.localStreams[sid]; found { 340 return errors.Errorf("local stream %d has multiple consumers", sid) 341 } 342 if f.localStreams == nil { 343 f.localStreams = make(map[execinfrapb.StreamID]execinfra.RowReceiver) 344 } 345 f.localStreams[sid] = receiver 346 347 default: 348 return errors.Errorf("invalid stream type %d", spec.Type) 349 } 350 351 return nil 352 } 353 354 // setupOutboundStream sets up an output stream; if the stream is local, the 355 // RowChannel is looked up in the localStreams map; otherwise an outgoing 356 // mailbox is created. 357 func (f *rowBasedFlow) setupOutboundStream( 358 spec execinfrapb.StreamEndpointSpec, 359 ) (execinfra.RowReceiver, error) { 360 sid := spec.StreamID 361 switch spec.Type { 362 case execinfrapb.StreamEndpointSpec_SYNC_RESPONSE: 363 return f.GetSyncFlowConsumer(), nil 364 365 case execinfrapb.StreamEndpointSpec_REMOTE: 366 outbox := flowinfra.NewOutbox(&f.FlowCtx, spec.TargetNodeID, f.ID, sid) 367 f.AddStartable(outbox) 368 return outbox, nil 369 370 case execinfrapb.StreamEndpointSpec_LOCAL: 371 rowChan, found := f.localStreams[sid] 372 if !found { 373 return nil, errors.Errorf("unconnected inbound stream %d", sid) 374 } 375 // Once we "connect" a stream, we set the value in the map to nil. 376 if rowChan == nil { 377 return nil, errors.Errorf("stream %d has multiple connections", sid) 378 } 379 f.localStreams[sid] = nil 380 return rowChan, nil 381 default: 382 return nil, errors.Errorf("invalid stream type %d", spec.Type) 383 } 384 } 385 386 // setupRouter initializes a router and the outbound streams. 387 // 388 // Pass-through routers are not supported; they should be handled separately. 389 func (f *rowBasedFlow) setupRouter(spec *execinfrapb.OutputRouterSpec) (router, error) { 390 streams := make([]execinfra.RowReceiver, len(spec.Streams)) 391 for i := range spec.Streams { 392 var err error 393 streams[i], err = f.setupOutboundStream(spec.Streams[i]) 394 if err != nil { 395 return nil, err 396 } 397 } 398 return makeRouter(spec, streams) 399 } 400 401 // IsVectorized is part of the flowinfra.Flow interface. 402 func (f *rowBasedFlow) IsVectorized() bool { 403 return false 404 } 405 406 // Release releases this rowBasedFlow back to the pool. 407 func (f *rowBasedFlow) Release() { 408 *f = rowBasedFlow{} 409 rowBasedFlowPool.Put(f) 410 } 411 412 // Cleanup is part of the flowinfra.Flow interface. 413 func (f *rowBasedFlow) Cleanup(ctx context.Context) { 414 f.FlowBase.Cleanup(ctx) 415 f.Release() 416 } 417 418 type copyingRowReceiver struct { 419 execinfra.RowReceiver 420 alloc sqlbase.EncDatumRowAlloc 421 } 422 423 func (r *copyingRowReceiver) Push( 424 row sqlbase.EncDatumRow, meta *execinfrapb.ProducerMetadata, 425 ) execinfra.ConsumerStatus { 426 if row != nil { 427 row = r.alloc.CopyRow(row) 428 } 429 return r.RowReceiver.Push(row, meta) 430 }