github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colflow/vectorized_flow.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package colflow 12 13 import ( 14 "context" 15 "fmt" 16 "math" 17 "path/filepath" 18 "strconv" 19 "strings" 20 "sync" 21 "sync/atomic" 22 23 "github.com/cockroachdb/cockroach/pkg/col/coldata" 24 "github.com/cockroachdb/cockroach/pkg/col/coldataext" 25 "github.com/cockroachdb/cockroach/pkg/rpc/nodedialer" 26 "github.com/cockroachdb/cockroach/pkg/settings" 27 "github.com/cockroachdb/cockroach/pkg/sql/colcontainer" 28 "github.com/cockroachdb/cockroach/pkg/sql/colexec" 29 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase" 30 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror" 31 "github.com/cockroachdb/cockroach/pkg/sql/colflow/colrpc" 32 "github.com/cockroachdb/cockroach/pkg/sql/colmem" 33 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 34 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 35 "github.com/cockroachdb/cockroach/pkg/sql/flowinfra" 36 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 37 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 38 "github.com/cockroachdb/cockroach/pkg/sql/rowexec" 39 "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" 40 "github.com/cockroachdb/cockroach/pkg/sql/types" 41 "github.com/cockroachdb/cockroach/pkg/util" 42 "github.com/cockroachdb/cockroach/pkg/util/log" 43 "github.com/cockroachdb/cockroach/pkg/util/metric" 44 "github.com/cockroachdb/cockroach/pkg/util/mon" 45 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 46 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 47 "github.com/cockroachdb/cockroach/pkg/util/tracing" 48 "github.com/cockroachdb/cockroach/pkg/util/uuid" 49 "github.com/cockroachdb/errors" 50 "github.com/cockroachdb/logtags" 51 "github.com/marusama/semaphore" 52 opentracing "github.com/opentracing/opentracing-go" 53 ) 54 55 // countingSemaphore is a semaphore that keeps track of the semaphore count from 56 // its perspective. 57 type countingSemaphore struct { 58 semaphore.Semaphore 59 globalCount *metric.Gauge 60 count int64 61 } 62 63 func (s *countingSemaphore) Acquire(ctx context.Context, n int) error { 64 if err := s.Semaphore.Acquire(ctx, n); err != nil { 65 return err 66 } 67 atomic.AddInt64(&s.count, int64(n)) 68 s.globalCount.Inc(int64(n)) 69 return nil 70 } 71 72 func (s *countingSemaphore) TryAcquire(n int) bool { 73 success := s.Semaphore.TryAcquire(n) 74 if !success { 75 return false 76 } 77 atomic.AddInt64(&s.count, int64(n)) 78 s.globalCount.Inc(int64(n)) 79 return success 80 } 81 82 func (s *countingSemaphore) Release(n int) int { 83 atomic.AddInt64(&s.count, int64(-n)) 84 s.globalCount.Dec(int64(n)) 85 return s.Semaphore.Release(n) 86 } 87 88 type vectorizedFlow struct { 89 *flowinfra.FlowBase 90 // operatorConcurrency is set if any operators are executed in parallel. 91 operatorConcurrency bool 92 93 // countingSemaphore is a wrapper over a semaphore.Semaphore that keeps track 94 // of the number of resources held in a semaphore.Semaphore requested from the 95 // context of this flow so that these can be released unconditionally upon 96 // Cleanup. 97 countingSemaphore *countingSemaphore 98 99 // streamingMemAccounts are the memory accounts that are tracking the static 100 // memory usage of the whole vectorized flow as well as all dynamic memory of 101 // the streaming components. 102 streamingMemAccounts []*mon.BoundAccount 103 104 // monitors are the monitors (of both memory and disk usage) of the 105 // buffering components. 106 monitors []*mon.BytesMonitor 107 // accounts are the accounts that are tracking the dynamic memory and disk 108 // usage of the buffering components. 109 accounts []*mon.BoundAccount 110 111 tempStorage struct { 112 // path is the path to this flow's temporary storage directory. 113 path string 114 createdStateMu struct { 115 syncutil.Mutex 116 // created is a protected boolean that is true when the flow's temporary 117 // storage directory has been created. 118 created bool 119 } 120 } 121 122 testingKnobs struct { 123 // onSetupFlow is a testing knob that is called before calling 124 // creator.setupFlow with the given creator. 125 onSetupFlow func(*vectorizedFlowCreator) 126 } 127 } 128 129 var _ flowinfra.Flow = &vectorizedFlow{} 130 131 var vectorizedFlowPool = sync.Pool{ 132 New: func() interface{} { 133 return &vectorizedFlow{} 134 }, 135 } 136 137 // NewVectorizedFlow creates a new vectorized flow given the flow base. 138 func NewVectorizedFlow(base *flowinfra.FlowBase) flowinfra.Flow { 139 vf := vectorizedFlowPool.Get().(*vectorizedFlow) 140 vf.FlowBase = base 141 return vf 142 } 143 144 // VectorizeTestingBatchSize is a testing cluster setting that sets the default 145 // batch size used by the vectorized execution engine. A low batch size is 146 // useful to test batch reuse. 147 var VectorizeTestingBatchSize = settings.RegisterValidatedIntSetting( 148 "sql.testing.vectorize.batch_size", 149 fmt.Sprintf("the size of a batch of rows in the vectorized engine (0=default, value must be less than %d)", coldata.MaxBatchSize), 150 0, 151 func(newBatchSize int64) error { 152 if newBatchSize > coldata.MaxBatchSize { 153 return pgerror.Newf(pgcode.InvalidParameterValue, "batch size %d may not be larger than %d", newBatchSize, coldata.MaxBatchSize) 154 } 155 return nil 156 }, 157 ) 158 159 // Setup is part of the flowinfra.Flow interface. 160 func (f *vectorizedFlow) Setup( 161 ctx context.Context, spec *execinfrapb.FlowSpec, opt flowinfra.FuseOpt, 162 ) (context.Context, error) { 163 var err error 164 ctx, err = f.FlowBase.Setup(ctx, spec, opt) 165 if err != nil { 166 return ctx, err 167 } 168 log.VEventf(ctx, 1, "setting up vectorize flow %s", f.ID.Short()) 169 recordingStats := false 170 if sp := opentracing.SpanFromContext(ctx); sp != nil && tracing.IsRecording(sp) { 171 recordingStats = true 172 } 173 helper := &vectorizedFlowCreatorHelper{f: f.FlowBase} 174 175 testingBatchSize := int64(0) 176 if f.FlowCtx.Cfg.Settings != nil { 177 testingBatchSize = VectorizeTestingBatchSize.Get(&f.FlowCtx.Cfg.Settings.SV) 178 } 179 if testingBatchSize != 0 { 180 if err := coldata.SetBatchSizeForTests(int(testingBatchSize)); err != nil { 181 return ctx, err 182 } 183 } else { 184 coldata.ResetBatchSizeForTests() 185 } 186 187 // Create a name for this flow's temporary directory. Note that this directory 188 // is lazily created when necessary and cleaned up in Cleanup(). The directory 189 // name is the flow's ID in most cases apart from when the flow's ID is unset 190 // (in the case of local flows). In this case the directory will be prefixed 191 // with "local-flow" and a uuid is generated on the spot to provide a unique 192 // name. 193 tempDirName := f.GetID().String() 194 if f.GetID().Equal(uuid.Nil) { 195 tempDirName = "local-flow" + uuid.FastMakeV4().String() 196 } 197 f.tempStorage.path = filepath.Join(f.Cfg.TempStoragePath, tempDirName) 198 diskQueueCfg := colcontainer.DiskQueueCfg{ 199 FS: f.Cfg.TempFS, 200 Path: f.tempStorage.path, 201 OnNewDiskQueueCb: func() { 202 f.tempStorage.createdStateMu.Lock() 203 defer f.tempStorage.createdStateMu.Unlock() 204 if f.tempStorage.createdStateMu.created { 205 // The temporary storage directory has already been created. 206 return 207 } 208 log.VEventf(ctx, 1, "flow %s spilled to disk, stack trace: %s", f.ID, util.GetSmallTrace(2)) 209 if err := f.Cfg.TempFS.MkdirAll(f.tempStorage.path); err != nil { 210 colexecerror.InternalError(errors.Errorf("unable to create temporary storage directory: %v", err)) 211 } 212 f.tempStorage.createdStateMu.created = true 213 }, 214 } 215 if err := diskQueueCfg.EnsureDefaults(); err != nil { 216 return ctx, err 217 } 218 f.countingSemaphore = &countingSemaphore{Semaphore: f.Cfg.VecFDSemaphore, globalCount: f.Cfg.Metrics.VecOpenFDs} 219 creator := newVectorizedFlowCreator( 220 helper, 221 vectorizedRemoteComponentCreator{}, 222 recordingStats, 223 f.GetWaitGroup(), 224 f.GetSyncFlowConsumer(), 225 f.GetFlowCtx().Cfg.NodeDialer, 226 f.GetID(), 227 diskQueueCfg, 228 f.countingSemaphore, 229 ) 230 if f.testingKnobs.onSetupFlow != nil { 231 f.testingKnobs.onSetupFlow(creator) 232 } 233 _, err = creator.setupFlow(ctx, f.GetFlowCtx(), spec.Processors, opt) 234 if err == nil { 235 f.operatorConcurrency = creator.operatorConcurrency 236 f.streamingMemAccounts = append(f.streamingMemAccounts, creator.streamingMemAccounts...) 237 f.monitors = append(f.monitors, creator.monitors...) 238 f.accounts = append(f.accounts, creator.accounts...) 239 log.VEventf(ctx, 1, "vectorized flow setup succeeded") 240 return ctx, nil 241 } 242 // It is (theoretically) possible that some of the memory monitoring 243 // infrastructure was created even in case of an error, and we need to clean 244 // that up. 245 for _, acc := range creator.streamingMemAccounts { 246 acc.Close(ctx) 247 } 248 for _, acc := range creator.accounts { 249 acc.Close(ctx) 250 } 251 for _, mon := range creator.monitors { 252 mon.Stop(ctx) 253 } 254 log.VEventf(ctx, 1, "failed to vectorize: %s", err) 255 return ctx, err 256 } 257 258 // IsVectorized is part of the flowinfra.Flow interface. 259 func (f *vectorizedFlow) IsVectorized() bool { 260 return true 261 } 262 263 // ConcurrentExecution is part of the flowinfra.Flow interface. 264 func (f *vectorizedFlow) ConcurrentExecution() bool { 265 return f.operatorConcurrency || f.FlowBase.ConcurrentExecution() 266 } 267 268 // Release releases this vectorizedFlow back to the pool. 269 func (f *vectorizedFlow) Release() { 270 *f = vectorizedFlow{} 271 vectorizedFlowPool.Put(f) 272 } 273 274 // Cleanup is part of the flowinfra.Flow interface. 275 func (f *vectorizedFlow) Cleanup(ctx context.Context) { 276 // This cleans up all the memory and disk monitoring of the vectorized flow. 277 for _, acc := range f.streamingMemAccounts { 278 acc.Close(ctx) 279 } 280 for _, acc := range f.accounts { 281 acc.Close(ctx) 282 } 283 for _, mon := range f.monitors { 284 mon.Stop(ctx) 285 } 286 287 f.tempStorage.createdStateMu.Lock() 288 created := f.tempStorage.createdStateMu.created 289 f.tempStorage.createdStateMu.Unlock() 290 if created { 291 if err := f.Cfg.TempFS.RemoveAll(f.tempStorage.path); err != nil { 292 // Log error as a Warning but keep on going to close the memory 293 // infrastructure. 294 log.Warningf( 295 ctx, 296 "unable to remove flow %s's temporary directory at %s, files may be left over: %v", 297 f.GetID().Short(), 298 f.tempStorage.path, 299 err, 300 ) 301 } 302 } 303 // Release any leftover temporary storage file descriptors from this flow. 304 if unreleased := atomic.LoadInt64(&f.countingSemaphore.count); unreleased > 0 { 305 f.countingSemaphore.Release(int(unreleased)) 306 } 307 f.FlowBase.Cleanup(ctx) 308 f.Release() 309 } 310 311 // wrapWithVectorizedStatsCollector creates a new 312 // colexec.VectorizedStatsCollector that wraps op and connects the newly 313 // created wrapper with those corresponding to operators in inputs (the latter 314 // must have already been wrapped). 315 func (s *vectorizedFlowCreator) wrapWithVectorizedStatsCollector( 316 op colexecbase.Operator, 317 inputs []colexecbase.Operator, 318 id int32, 319 idTagKey string, 320 monitors []*mon.BytesMonitor, 321 ) (*colexec.VectorizedStatsCollector, error) { 322 inputWatch := timeutil.NewStopWatch() 323 var memMonitors, diskMonitors []*mon.BytesMonitor 324 for _, m := range monitors { 325 if m.Resource() == mon.DiskResource { 326 diskMonitors = append(diskMonitors, m) 327 } else { 328 memMonitors = append(memMonitors, m) 329 } 330 } 331 vsc := colexec.NewVectorizedStatsCollector( 332 op, id, idTagKey, len(inputs) == 0, inputWatch, memMonitors, diskMonitors, 333 ) 334 for _, input := range inputs { 335 sc, ok := input.(*colexec.VectorizedStatsCollector) 336 if !ok { 337 return nil, errors.New("unexpectedly an input is not collecting stats") 338 } 339 sc.SetOutputWatch(inputWatch) 340 } 341 s.vectorizedStatsCollectorsQueue = append(s.vectorizedStatsCollectorsQueue, vsc) 342 return vsc, nil 343 } 344 345 // finishVectorizedStatsCollectors finishes the given stats collectors and 346 // outputs their stats to the trace contained in the ctx's span. 347 func finishVectorizedStatsCollectors( 348 ctx context.Context, 349 flowID execinfrapb.FlowID, 350 deterministicStats bool, 351 vectorizedStatsCollectors []*colexec.VectorizedStatsCollector, 352 ) { 353 flowIDString := flowID.String() 354 for _, vsc := range vectorizedStatsCollectors { 355 vsc.OutputStats(ctx, flowIDString, deterministicStats) 356 } 357 } 358 359 type runFn func(context.Context, context.CancelFunc) 360 361 // flowCreatorHelper contains all the logic needed to add the vectorized 362 // infrastructure to be run asynchronously as well as to perform some sanity 363 // checks. 364 type flowCreatorHelper interface { 365 // addStreamEndpoint stores information about an inbound stream. 366 addStreamEndpoint(execinfrapb.StreamID, *colrpc.Inbox, *sync.WaitGroup) 367 // checkInboundStreamID checks that the provided stream ID has not been seen 368 // yet. 369 checkInboundStreamID(execinfrapb.StreamID) error 370 // accumulateAsyncComponent stores a component (either a router or an outbox) 371 // to be run asynchronously. 372 accumulateAsyncComponent(runFn) 373 // addMaterializer adds a materializer to the flow. 374 addMaterializer(*colexec.Materializer) 375 // getCancelFlowFn returns a flow cancellation function. 376 getCancelFlowFn() context.CancelFunc 377 } 378 379 // opDAGWithMetaSources is a helper struct that stores an operator DAG as well 380 // as the metadataSources and closers in this DAG that need to be drained and 381 // closed. 382 type opDAGWithMetaSources struct { 383 rootOperator colexecbase.Operator 384 metadataSources []execinfrapb.MetadataSource 385 toClose []colexec.IdempotentCloser 386 } 387 388 // remoteComponentCreator is an interface that abstracts the constructors for 389 // several components in a remote flow. Mostly for testing purposes. 390 type remoteComponentCreator interface { 391 newOutbox( 392 allocator *colmem.Allocator, 393 input colexecbase.Operator, 394 typs []*types.T, 395 metadataSources []execinfrapb.MetadataSource, 396 toClose []colexec.IdempotentCloser, 397 ) (*colrpc.Outbox, error) 398 newInbox(allocator *colmem.Allocator, typs []*types.T, streamID execinfrapb.StreamID) (*colrpc.Inbox, error) 399 } 400 401 type vectorizedRemoteComponentCreator struct{} 402 403 func (vectorizedRemoteComponentCreator) newOutbox( 404 allocator *colmem.Allocator, 405 input colexecbase.Operator, 406 typs []*types.T, 407 metadataSources []execinfrapb.MetadataSource, 408 toClose []colexec.IdempotentCloser, 409 ) (*colrpc.Outbox, error) { 410 return colrpc.NewOutbox(allocator, input, typs, metadataSources, toClose) 411 } 412 413 func (vectorizedRemoteComponentCreator) newInbox( 414 allocator *colmem.Allocator, typs []*types.T, streamID execinfrapb.StreamID, 415 ) (*colrpc.Inbox, error) { 416 return colrpc.NewInbox(allocator, typs, streamID) 417 } 418 419 // vectorizedFlowCreator performs all the setup of vectorized flows. Depending 420 // on embedded flowCreatorHelper, it can either do the actual setup in order 421 // to run the flow or do the setup needed to check that the flow is supported 422 // through the vectorized engine. 423 type vectorizedFlowCreator struct { 424 flowCreatorHelper 425 remoteComponentCreator 426 427 streamIDToInputOp map[execinfrapb.StreamID]opDAGWithMetaSources 428 recordingStats bool 429 vectorizedStatsCollectorsQueue []*colexec.VectorizedStatsCollector 430 waitGroup *sync.WaitGroup 431 syncFlowConsumer execinfra.RowReceiver 432 nodeDialer *nodedialer.Dialer 433 flowID execinfrapb.FlowID 434 435 // numOutboxes counts how many exec.Outboxes have been set up on this node. 436 // It must be accessed atomically. 437 numOutboxes int32 438 materializerAdded bool 439 440 // leaves accumulates all operators that have no further outputs on the 441 // current node, for the purposes of EXPLAIN output. 442 leaves []execinfra.OpNode 443 // operatorConcurrency is set if any operators are executed in parallel. 444 operatorConcurrency bool 445 // streamingMemAccounts contains all memory accounts of the non-buffering 446 // components in the vectorized flow. 447 streamingMemAccounts []*mon.BoundAccount 448 // monitors contains all monitors (for both memory and disk usage) of the 449 // buffering components in the vectorized flow. 450 monitors []*mon.BytesMonitor 451 // accounts contains all monitors (for both memory and disk usage) of the 452 // buffering components in the vectorized flow. 453 accounts []*mon.BoundAccount 454 455 diskQueueCfg colcontainer.DiskQueueCfg 456 fdSemaphore semaphore.Semaphore 457 } 458 459 func newVectorizedFlowCreator( 460 helper flowCreatorHelper, 461 componentCreator remoteComponentCreator, 462 recordingStats bool, 463 waitGroup *sync.WaitGroup, 464 syncFlowConsumer execinfra.RowReceiver, 465 nodeDialer *nodedialer.Dialer, 466 flowID execinfrapb.FlowID, 467 diskQueueCfg colcontainer.DiskQueueCfg, 468 fdSemaphore semaphore.Semaphore, 469 ) *vectorizedFlowCreator { 470 return &vectorizedFlowCreator{ 471 flowCreatorHelper: helper, 472 remoteComponentCreator: componentCreator, 473 streamIDToInputOp: make(map[execinfrapb.StreamID]opDAGWithMetaSources), 474 recordingStats: recordingStats, 475 vectorizedStatsCollectorsQueue: make([]*colexec.VectorizedStatsCollector, 0, 2), 476 waitGroup: waitGroup, 477 syncFlowConsumer: syncFlowConsumer, 478 nodeDialer: nodeDialer, 479 flowID: flowID, 480 diskQueueCfg: diskQueueCfg, 481 fdSemaphore: fdSemaphore, 482 } 483 } 484 485 // createBufferingUnlimitedMemMonitor instantiates an unlimited memory monitor. 486 // These should only be used when spilling to disk and an operator is made aware 487 // of a memory usage limit separately. 488 // The receiver is updated to have a reference to the unlimited memory monitor. 489 // TODO(asubiotto): This identical to the helper function in 490 // NewColOperatorResult, meaning that we should probably find a way to refactor 491 // this. 492 func (s *vectorizedFlowCreator) createBufferingUnlimitedMemMonitor( 493 ctx context.Context, flowCtx *execinfra.FlowCtx, name string, 494 ) *mon.BytesMonitor { 495 bufferingOpUnlimitedMemMonitor := execinfra.NewMonitor( 496 ctx, flowCtx.EvalCtx.Mon, name+"-unlimited", 497 ) 498 s.monitors = append(s.monitors, bufferingOpUnlimitedMemMonitor) 499 return bufferingOpUnlimitedMemMonitor 500 } 501 502 // createDiskAccounts instantiates an unlimited disk monitor and disk accounts 503 // to be used for disk spilling infrastructure in vectorized engine. 504 // TODO(azhng): consolidate all allocation monitors/account management into one 505 // place after branch cut for 20.1. 506 func (s *vectorizedFlowCreator) createDiskAccounts( 507 ctx context.Context, flowCtx *execinfra.FlowCtx, name string, numAccounts int, 508 ) (*mon.BytesMonitor, []*mon.BoundAccount) { 509 diskMonitor := execinfra.NewMonitor(ctx, flowCtx.Cfg.DiskMonitor, name) 510 s.monitors = append(s.monitors, diskMonitor) 511 diskAccounts := make([]*mon.BoundAccount, numAccounts) 512 for i := range diskAccounts { 513 diskAcc := diskMonitor.MakeBoundAccount() 514 diskAccounts[i] = &diskAcc 515 } 516 s.accounts = append(s.accounts, diskAccounts...) 517 return diskMonitor, diskAccounts 518 } 519 520 // newStreamingMemAccount creates a new memory account bound to the monitor in 521 // flowCtx and accumulates it into streamingMemAccounts slice. 522 func (s *vectorizedFlowCreator) newStreamingMemAccount( 523 flowCtx *execinfra.FlowCtx, 524 ) *mon.BoundAccount { 525 streamingMemAccount := flowCtx.EvalCtx.Mon.MakeBoundAccount() 526 s.streamingMemAccounts = append(s.streamingMemAccounts, &streamingMemAccount) 527 return &streamingMemAccount 528 } 529 530 // setupRemoteOutputStream sets up an Outbox that will operate according to 531 // the given StreamEndpointSpec. It will also drain all MetadataSources in the 532 // metadataSourcesQueue. 533 func (s *vectorizedFlowCreator) setupRemoteOutputStream( 534 ctx context.Context, 535 flowCtx *execinfra.FlowCtx, 536 op colexecbase.Operator, 537 outputTyps []*types.T, 538 stream *execinfrapb.StreamEndpointSpec, 539 metadataSourcesQueue []execinfrapb.MetadataSource, 540 toClose []colexec.IdempotentCloser, 541 factory coldata.ColumnFactory, 542 ) (execinfra.OpNode, error) { 543 // TODO(yuzefovich): we should collect some statistics on the outbox (e.g. 544 // number of bytes sent). 545 outbox, err := s.remoteComponentCreator.newOutbox( 546 colmem.NewAllocator(ctx, s.newStreamingMemAccount(flowCtx), factory), 547 op, outputTyps, metadataSourcesQueue, toClose, 548 ) 549 if err != nil { 550 return nil, err 551 } 552 atomic.AddInt32(&s.numOutboxes, 1) 553 run := func(ctx context.Context, cancelFn context.CancelFunc) { 554 outbox.Run(ctx, s.nodeDialer, stream.TargetNodeID, s.flowID, stream.StreamID, cancelFn) 555 currentOutboxes := atomic.AddInt32(&s.numOutboxes, -1) 556 // When the last Outbox on this node exits, we want to make sure that 557 // everything is shutdown; namely, we need to call cancelFn if: 558 // - it is the last Outbox 559 // - there is no root materializer on this node (if it were, it would take 560 // care of the cancellation itself) 561 // - cancelFn is non-nil (it can be nil in tests). 562 // Calling cancelFn will cancel the context that all infrastructure on this 563 // node is listening on, so it will shut everything down. 564 if currentOutboxes == 0 && !s.materializerAdded && cancelFn != nil { 565 cancelFn() 566 } 567 } 568 s.accumulateAsyncComponent(run) 569 return outbox, nil 570 } 571 572 // setupRouter sets up a vectorized hash router according to the output router 573 // spec. If the outputs are local, these are added to s.streamIDToInputOp to be 574 // used as inputs in further planning. metadataSourcesQueue is passed along to 575 // any outboxes created to be drained, or stored in streamIDToInputOp for any 576 // local outputs to pass that responsibility along. In any case, 577 // metadataSourcesQueue will always be fully consumed. 578 // NOTE: This method supports only BY_HASH routers. Callers should handle 579 // PASS_THROUGH routers separately. 580 func (s *vectorizedFlowCreator) setupRouter( 581 ctx context.Context, 582 flowCtx *execinfra.FlowCtx, 583 input colexecbase.Operator, 584 outputTyps []*types.T, 585 output *execinfrapb.OutputRouterSpec, 586 metadataSourcesQueue []execinfrapb.MetadataSource, 587 toClose []colexec.IdempotentCloser, 588 factory coldata.ColumnFactory, 589 ) error { 590 if output.Type != execinfrapb.OutputRouterSpec_BY_HASH { 591 return errors.Errorf("vectorized output router type %s unsupported", output.Type) 592 } 593 594 // HashRouter memory monitor names are the concatenated output stream IDs. 595 streamIDs := make([]string, len(output.Streams)) 596 for i, s := range output.Streams { 597 streamIDs[i] = strconv.Itoa(int(s.StreamID)) 598 } 599 mmName := "hash-router-[" + strings.Join(streamIDs, ",") + "]" 600 601 hashRouterMemMonitor := s.createBufferingUnlimitedMemMonitor(ctx, flowCtx, mmName) 602 allocators := make([]*colmem.Allocator, len(output.Streams)) 603 for i := range allocators { 604 acc := hashRouterMemMonitor.MakeBoundAccount() 605 allocators[i] = colmem.NewAllocator(ctx, &acc, factory) 606 s.accounts = append(s.accounts, &acc) 607 } 608 limit := execinfra.GetWorkMemLimit(flowCtx.Cfg) 609 if flowCtx.Cfg.TestingKnobs.ForceDiskSpill { 610 limit = 1 611 } 612 diskMon, diskAccounts := s.createDiskAccounts(ctx, flowCtx, mmName, len(output.Streams)) 613 router, outputs := colexec.NewHashRouter( 614 allocators, input, outputTyps, output.HashColumns, limit, 615 s.diskQueueCfg, s.fdSemaphore, diskAccounts, toClose, 616 ) 617 runRouter := func(ctx context.Context, _ context.CancelFunc) { 618 logtags.AddTag(ctx, "hashRouterID", mmName) 619 router.Run(ctx) 620 } 621 s.accumulateAsyncComponent(runRouter) 622 623 // Append the router to the metadata sources. 624 metadataSourcesQueue = append(metadataSourcesQueue, router) 625 626 foundLocalOutput := false 627 for i, op := range outputs { 628 stream := &output.Streams[i] 629 switch stream.Type { 630 case execinfrapb.StreamEndpointSpec_SYNC_RESPONSE: 631 return errors.Errorf("unexpected sync response output when setting up router") 632 case execinfrapb.StreamEndpointSpec_REMOTE: 633 // Note that here we pass in nil 'toClose' slice because hash 634 // router is responsible for closing all of the idempotent closers. 635 if _, err := s.setupRemoteOutputStream( 636 ctx, flowCtx, op, outputTyps, stream, metadataSourcesQueue, nil /* toClose */, factory, 637 ); err != nil { 638 return err 639 } 640 case execinfrapb.StreamEndpointSpec_LOCAL: 641 foundLocalOutput = true 642 if s.recordingStats { 643 mons := []*mon.BytesMonitor{hashRouterMemMonitor, diskMon} 644 // Wrap local outputs with vectorized stats collectors when recording 645 // stats. This is mostly for compatibility but will provide some useful 646 // information (e.g. output stall time). 647 var err error 648 op, err = s.wrapWithVectorizedStatsCollector( 649 op, nil /* inputs */, int32(stream.StreamID), 650 execinfrapb.StreamIDTagKey, mons, 651 ) 652 if err != nil { 653 return err 654 } 655 } 656 s.streamIDToInputOp[stream.StreamID] = opDAGWithMetaSources{ 657 rootOperator: op, metadataSources: metadataSourcesQueue, toClose: toClose, 658 } 659 } 660 // Either the metadataSourcesQueue will be drained by an outbox or we 661 // created an opDAGWithMetaSources to pass along these metadataSources. We don't need to 662 // worry about metadata sources for following iterations of the loop. 663 metadataSourcesQueue = nil 664 } 665 if !foundLocalOutput { 666 // No local output means that our router is a leaf node. 667 s.leaves = append(s.leaves, router) 668 } 669 return nil 670 } 671 672 // setupInput sets up one or more input operators (local or remote) and a 673 // synchronizer to expose these separate streams as one exec.Operator which is 674 // returned. If s.recordingStats is true, these inputs and synchronizer are 675 // wrapped in stats collectors if not done so, although these stats are not 676 // exposed as of yet. Inboxes that are created are also returned as 677 // []distqlpb.MetadataSource so that any remote metadata can be read through 678 // calling DrainMeta. 679 func (s *vectorizedFlowCreator) setupInput( 680 ctx context.Context, 681 flowCtx *execinfra.FlowCtx, 682 input execinfrapb.InputSyncSpec, 683 opt flowinfra.FuseOpt, 684 factory coldata.ColumnFactory, 685 ) (op colexecbase.Operator, _ []execinfrapb.MetadataSource, _ error) { 686 inputStreamOps := make([]colexecbase.Operator, 0, len(input.Streams)) 687 metaSources := make([]execinfrapb.MetadataSource, 0, len(input.Streams)) 688 // Before we can safely use types we received over the wire in the 689 // operators, we need to make sure they are hydrated. In row execution 690 // engine it is done during the processor initialization, but operators 691 // don't do that. However, all operators (apart from the colBatchScan) get 692 // their types from InputSyncSpec, so this is a convenient place to do the 693 // hydration so that all operators get the valid types. 694 if err := execinfrapb.HydrateTypeSlice(flowCtx.EvalCtx, input.ColumnTypes); err != nil { 695 return nil, nil, err 696 } 697 for _, inputStream := range input.Streams { 698 switch inputStream.Type { 699 case execinfrapb.StreamEndpointSpec_LOCAL: 700 in := s.streamIDToInputOp[inputStream.StreamID] 701 inputStreamOps = append(inputStreamOps, in.rootOperator) 702 metaSources = append(metaSources, in.metadataSources...) 703 case execinfrapb.StreamEndpointSpec_REMOTE: 704 // If the input is remote, the input operator does not exist in 705 // streamIDToInputOp. Create an inbox. 706 if err := s.checkInboundStreamID(inputStream.StreamID); err != nil { 707 return nil, nil, err 708 } 709 inbox, err := s.remoteComponentCreator.newInbox( 710 colmem.NewAllocator(ctx, s.newStreamingMemAccount(flowCtx), factory), 711 input.ColumnTypes, inputStream.StreamID, 712 ) 713 if err != nil { 714 return nil, nil, err 715 } 716 s.addStreamEndpoint(inputStream.StreamID, inbox, s.waitGroup) 717 metaSources = append(metaSources, inbox) 718 op = inbox 719 if s.recordingStats { 720 op, err = s.wrapWithVectorizedStatsCollector( 721 inbox, nil /* inputs */, int32(inputStream.StreamID), 722 execinfrapb.StreamIDTagKey, nil, /* monitors */ 723 ) 724 if err != nil { 725 return nil, nil, err 726 } 727 } 728 inputStreamOps = append(inputStreamOps, op) 729 default: 730 return nil, nil, errors.Errorf("unsupported input stream type %s", inputStream.Type) 731 } 732 } 733 op = inputStreamOps[0] 734 if len(inputStreamOps) > 1 { 735 var err error 736 statsInputs := inputStreamOps 737 if input.Type == execinfrapb.InputSyncSpec_ORDERED { 738 op, err = colexec.NewOrderedSynchronizer( 739 colmem.NewAllocator(ctx, s.newStreamingMemAccount(flowCtx), factory), 740 inputStreamOps, input.ColumnTypes, execinfrapb.ConvertToColumnOrdering(input.Ordering), 741 ) 742 if err != nil { 743 return nil, nil, err 744 } 745 } else { 746 if opt == flowinfra.FuseAggressively { 747 op = colexec.NewSerialUnorderedSynchronizer(inputStreamOps, input.ColumnTypes) 748 } else { 749 op = colexec.NewParallelUnorderedSynchronizer(inputStreamOps, input.ColumnTypes, s.waitGroup) 750 s.operatorConcurrency = true 751 } 752 // Don't use the unordered synchronizer's inputs for stats collection 753 // given that they run concurrently. The stall time will be collected 754 // instead. 755 statsInputs = nil 756 } 757 if s.recordingStats { 758 // TODO(asubiotto): Once we have IDs for synchronizers, plumb them into 759 // this stats collector to display stats. 760 op, err = s.wrapWithVectorizedStatsCollector( 761 op, statsInputs, -1 /* id */, "" /* idTagKey */, nil, /* monitors */ 762 ) 763 if err != nil { 764 return nil, nil, err 765 } 766 } 767 } 768 return op, metaSources, nil 769 } 770 771 // setupOutput sets up any necessary infrastructure according to the output 772 // spec of pspec. The metadataSourcesQueue is fully consumed by either 773 // connecting it to a component that can drain these MetadataSources (root 774 // materializer or outbox) or storing it in streamIDToInputOp with the given op 775 // to be processed later. 776 // NOTE: The caller must not reuse the metadataSourcesQueue. 777 func (s *vectorizedFlowCreator) setupOutput( 778 ctx context.Context, 779 flowCtx *execinfra.FlowCtx, 780 pspec *execinfrapb.ProcessorSpec, 781 op colexecbase.Operator, 782 opOutputTypes []*types.T, 783 metadataSourcesQueue []execinfrapb.MetadataSource, 784 toClose []colexec.IdempotentCloser, 785 factory coldata.ColumnFactory, 786 ) error { 787 output := &pspec.Output[0] 788 if output.Type != execinfrapb.OutputRouterSpec_PASS_THROUGH { 789 return s.setupRouter( 790 ctx, 791 flowCtx, 792 op, 793 opOutputTypes, 794 output, 795 // Pass in a copy of the queue to reset metadataSourcesQueue for 796 // further appends without overwriting. 797 metadataSourcesQueue, 798 toClose, 799 factory, 800 ) 801 } 802 803 if len(output.Streams) != 1 { 804 return errors.Errorf("unsupported multi outputstream proc (%d streams)", len(output.Streams)) 805 } 806 outputStream := &output.Streams[0] 807 switch outputStream.Type { 808 case execinfrapb.StreamEndpointSpec_LOCAL: 809 s.streamIDToInputOp[outputStream.StreamID] = opDAGWithMetaSources{ 810 rootOperator: op, metadataSources: metadataSourcesQueue, toClose: toClose, 811 } 812 case execinfrapb.StreamEndpointSpec_REMOTE: 813 // Set up an Outbox. Note that we pass in a copy of metadataSourcesQueue 814 // so that we can reset it below and keep on writing to it. 815 if s.recordingStats { 816 // If recording stats, we add a metadata source that will generate all 817 // stats data as metadata for the stats collectors created so far. 818 vscs := append([]*colexec.VectorizedStatsCollector(nil), s.vectorizedStatsCollectorsQueue...) 819 s.vectorizedStatsCollectorsQueue = s.vectorizedStatsCollectorsQueue[:0] 820 metadataSourcesQueue = append( 821 metadataSourcesQueue, 822 execinfrapb.CallbackMetadataSource{ 823 DrainMetaCb: func(ctx context.Context) []execinfrapb.ProducerMetadata { 824 // TODO(asubiotto): Who is responsible for the recording of the 825 // parent context? 826 // Start a separate recording so that GetRecording will return 827 // the recordings for only the child spans containing stats. 828 ctx, span := tracing.ChildSpanSeparateRecording(ctx, "") 829 finishVectorizedStatsCollectors( 830 ctx, flowCtx.ID, flowCtx.Cfg.TestingKnobs.DeterministicStats, vscs, 831 ) 832 return []execinfrapb.ProducerMetadata{{TraceData: tracing.GetRecording(span)}} 833 }, 834 }, 835 ) 836 } 837 outbox, err := 838 s.setupRemoteOutputStream(ctx, flowCtx, op, opOutputTypes, outputStream, metadataSourcesQueue, toClose, factory) 839 if err != nil { 840 return err 841 } 842 // An outbox is a leaf: there's nothing that sees it as an input on this 843 // node. 844 s.leaves = append(s.leaves, outbox) 845 case execinfrapb.StreamEndpointSpec_SYNC_RESPONSE: 846 if s.syncFlowConsumer == nil { 847 return errors.New("syncFlowConsumer unset, unable to create materializer") 848 } 849 // Make the materializer, which will write to the given receiver. 850 columnTypes := s.syncFlowConsumer.Types() 851 if err := assertTypesMatch(columnTypes, opOutputTypes); err != nil { 852 return err 853 } 854 var outputStatsToTrace func() 855 if s.recordingStats { 856 // Make a copy given that vectorizedStatsCollectorsQueue is reset and 857 // appended to. 858 vscq := append([]*colexec.VectorizedStatsCollector(nil), s.vectorizedStatsCollectorsQueue...) 859 outputStatsToTrace = func() { 860 finishVectorizedStatsCollectors( 861 ctx, flowCtx.ID, flowCtx.Cfg.TestingKnobs.DeterministicStats, vscq, 862 ) 863 } 864 } 865 proc, err := colexec.NewMaterializer( 866 flowCtx, 867 pspec.ProcessorID, 868 op, 869 columnTypes, 870 s.syncFlowConsumer, 871 metadataSourcesQueue, 872 toClose, 873 outputStatsToTrace, 874 s.getCancelFlowFn, 875 ) 876 if err != nil { 877 return err 878 } 879 s.vectorizedStatsCollectorsQueue = s.vectorizedStatsCollectorsQueue[:0] 880 // A materializer is a leaf. 881 s.leaves = append(s.leaves, proc) 882 s.addMaterializer(proc) 883 s.materializerAdded = true 884 default: 885 return errors.Errorf("unsupported output stream type %s", outputStream.Type) 886 } 887 return nil 888 } 889 890 func (s *vectorizedFlowCreator) setupFlow( 891 ctx context.Context, 892 flowCtx *execinfra.FlowCtx, 893 processorSpecs []execinfrapb.ProcessorSpec, 894 opt flowinfra.FuseOpt, 895 ) (leaves []execinfra.OpNode, err error) { 896 streamIDToSpecIdx := make(map[execinfrapb.StreamID]int) 897 factory := coldataext.NewExtendedColumnFactory(flowCtx.NewEvalCtx()) 898 // queue is a queue of indices into processorSpecs, for topologically 899 // ordered processing. 900 queue := make([]int, 0, len(processorSpecs)) 901 for i := range processorSpecs { 902 hasLocalInput := false 903 for j := range processorSpecs[i].Input { 904 input := &processorSpecs[i].Input[j] 905 for k := range input.Streams { 906 stream := &input.Streams[k] 907 streamIDToSpecIdx[stream.StreamID] = i 908 if stream.Type != execinfrapb.StreamEndpointSpec_REMOTE { 909 hasLocalInput = true 910 } 911 } 912 } 913 if hasLocalInput { 914 continue 915 } 916 // Queue all processors with either no inputs or remote inputs. 917 queue = append(queue, i) 918 } 919 920 inputs := make([]colexecbase.Operator, 0, 2) 921 for len(queue) > 0 { 922 pspec := &processorSpecs[queue[0]] 923 queue = queue[1:] 924 if len(pspec.Output) > 1 { 925 return nil, errors.Errorf("unsupported multi-output proc (%d outputs)", len(pspec.Output)) 926 } 927 928 // metadataSourcesQueue contains all the MetadataSources that need to be 929 // drained. If in a given loop iteration no component that can drain 930 // metadata from these sources is found, the metadataSourcesQueue should be 931 // added as part of one of the last unconnected inputDAGs in 932 // streamIDToInputOp. This is to avoid cycles. 933 metadataSourcesQueue := make([]execinfrapb.MetadataSource, 0, 1) 934 // toClose is similar to metadataSourcesQueue with the difference that these 935 // components do not produce metadata and should be Closed even during 936 // non-graceful termination. 937 toClose := make([]colexec.IdempotentCloser, 0, 1) 938 inputs = inputs[:0] 939 for i := range pspec.Input { 940 input, metadataSources, err := s.setupInput(ctx, flowCtx, pspec.Input[i], opt, factory) 941 if err != nil { 942 return nil, err 943 } 944 metadataSourcesQueue = append(metadataSourcesQueue, metadataSources...) 945 inputs = append(inputs, input) 946 } 947 948 args := colexec.NewColOperatorArgs{ 949 Spec: pspec, 950 Inputs: inputs, 951 StreamingMemAccount: s.newStreamingMemAccount(flowCtx), 952 ProcessorConstructor: rowexec.NewProcessor, 953 DiskQueueCfg: s.diskQueueCfg, 954 FDSemaphore: s.fdSemaphore, 955 } 956 result, err := colexec.NewColOperator(ctx, flowCtx, args) 957 // Even when err is non-nil, it is possible that the buffering memory 958 // monitor and account have been created, so we always want to accumulate 959 // them for a proper cleanup. 960 s.monitors = append(s.monitors, result.OpMonitors...) 961 s.accounts = append(s.accounts, result.OpAccounts...) 962 if err != nil { 963 return nil, errors.Wrapf(err, "unable to vectorize execution plan") 964 } 965 if flowCtx.Cfg != nil && flowCtx.Cfg.TestingKnobs.EnableVectorizedInvariantsChecker { 966 result.Op = colexec.NewInvariantsChecker(result.Op) 967 } 968 if flowCtx.EvalCtx.SessionData.VectorizeMode == sessiondata.Vectorize201Auto && 969 !result.IsStreaming { 970 return nil, errors.Errorf("non-streaming operator encountered when vectorize=201auto") 971 } 972 // We created a streaming memory account when calling NewColOperator above, 973 // so there is definitely at least one memory account, and it doesn't 974 // matter which one we grow. 975 if err = s.streamingMemAccounts[0].Grow(ctx, int64(result.InternalMemUsage)); err != nil { 976 return nil, errors.Wrapf(err, "not enough memory to setup vectorized plan") 977 } 978 metadataSourcesQueue = append(metadataSourcesQueue, result.MetadataSources...) 979 toClose = append(toClose, result.ToClose...) 980 981 op := result.Op 982 if s.recordingStats { 983 op, err = s.wrapWithVectorizedStatsCollector( 984 op, inputs, pspec.ProcessorID, execinfrapb.ProcessorIDTagKey, result.OpMonitors, 985 ) 986 if err != nil { 987 return nil, err 988 } 989 } 990 991 if (flowCtx.EvalCtx.SessionData.VectorizeMode == sessiondata.Vectorize201Auto) && 992 pspec.Output[0].Type == execinfrapb.OutputRouterSpec_BY_HASH { 993 // colexec.HashRouter is not supported when vectorize=auto since it can 994 // buffer an unlimited number of tuples, even though it falls back to 995 // disk. vectorize=on does support this. 996 return nil, errors.Errorf("hash router encountered when vectorize=201auto") 997 } 998 if err = s.setupOutput( 999 ctx, flowCtx, pspec, op, result.ColumnTypes, metadataSourcesQueue, toClose, factory, 1000 ); err != nil { 1001 return nil, err 1002 } 1003 1004 // Now queue all outputs from this op whose inputs are already all 1005 // populated. 1006 NEXTOUTPUT: 1007 for i := range pspec.Output { 1008 for j := range pspec.Output[i].Streams { 1009 outputStream := &pspec.Output[i].Streams[j] 1010 if outputStream.Type != execinfrapb.StreamEndpointSpec_LOCAL { 1011 continue 1012 } 1013 procIdx, ok := streamIDToSpecIdx[outputStream.StreamID] 1014 if !ok { 1015 return nil, errors.Errorf("couldn't find stream %d", outputStream.StreamID) 1016 } 1017 outputSpec := &processorSpecs[procIdx] 1018 for k := range outputSpec.Input { 1019 for l := range outputSpec.Input[k].Streams { 1020 inputStream := outputSpec.Input[k].Streams[l] 1021 if inputStream.StreamID == outputStream.StreamID { 1022 if err := assertTypesMatch(outputSpec.Input[k].ColumnTypes, result.ColumnTypes); err != nil { 1023 return nil, err 1024 } 1025 } 1026 if inputStream.Type == execinfrapb.StreamEndpointSpec_REMOTE { 1027 // Remote streams are not present in streamIDToInputOp. The 1028 // Inboxes that consume these streams are created at the same time 1029 // as the operator that needs them, so skip the creation check for 1030 // this input. 1031 continue 1032 } 1033 if _, ok := s.streamIDToInputOp[inputStream.StreamID]; !ok { 1034 continue NEXTOUTPUT 1035 } 1036 } 1037 } 1038 // We found an input op for every single stream in this output. Queue 1039 // it for processing. 1040 queue = append(queue, procIdx) 1041 } 1042 } 1043 } 1044 1045 if len(s.vectorizedStatsCollectorsQueue) > 0 { 1046 colexecerror.InternalError("not all vectorized stats collectors have been processed") 1047 } 1048 return s.leaves, nil 1049 } 1050 1051 // assertTypesMatch checks whether expected types match with actual types and 1052 // returns an error if not. 1053 func assertTypesMatch(expected []*types.T, actual []*types.T) error { 1054 for i := range expected { 1055 if !expected[i].Identical(actual[i]) { 1056 return errors.Errorf("mismatched types at index %d: expected %v\tactual %v ", 1057 i, expected, actual, 1058 ) 1059 } 1060 } 1061 return nil 1062 } 1063 1064 type vectorizedInboundStreamHandler struct { 1065 *colrpc.Inbox 1066 } 1067 1068 var _ flowinfra.InboundStreamHandler = vectorizedInboundStreamHandler{} 1069 1070 // Run is part of the flowinfra.InboundStreamHandler interface. 1071 func (s vectorizedInboundStreamHandler) Run( 1072 ctx context.Context, 1073 stream execinfrapb.DistSQL_FlowStreamServer, 1074 _ *execinfrapb.ProducerMessage, 1075 _ *flowinfra.FlowBase, 1076 ) error { 1077 return s.RunWithStream(ctx, stream) 1078 } 1079 1080 // Timeout is part of the flowinfra.InboundStreamHandler interface. 1081 func (s vectorizedInboundStreamHandler) Timeout(err error) { 1082 s.Inbox.Timeout(err) 1083 } 1084 1085 // vectorizedFlowCreatorHelper is a flowCreatorHelper that sets up all the 1086 // vectorized infrastructure to be actually run. 1087 type vectorizedFlowCreatorHelper struct { 1088 f *flowinfra.FlowBase 1089 } 1090 1091 var _ flowCreatorHelper = &vectorizedFlowCreatorHelper{} 1092 1093 func (r *vectorizedFlowCreatorHelper) addStreamEndpoint( 1094 streamID execinfrapb.StreamID, inbox *colrpc.Inbox, wg *sync.WaitGroup, 1095 ) { 1096 r.f.AddRemoteStream(streamID, flowinfra.NewInboundStreamInfo( 1097 vectorizedInboundStreamHandler{inbox}, 1098 wg, 1099 )) 1100 } 1101 1102 func (r *vectorizedFlowCreatorHelper) checkInboundStreamID(sid execinfrapb.StreamID) error { 1103 return r.f.CheckInboundStreamID(sid) 1104 } 1105 1106 func (r *vectorizedFlowCreatorHelper) accumulateAsyncComponent(run runFn) { 1107 r.f.AddStartable( 1108 flowinfra.StartableFn(func(ctx context.Context, wg *sync.WaitGroup, cancelFn context.CancelFunc) { 1109 if wg != nil { 1110 wg.Add(1) 1111 } 1112 go func() { 1113 run(ctx, cancelFn) 1114 if wg != nil { 1115 wg.Done() 1116 } 1117 }() 1118 })) 1119 } 1120 1121 func (r *vectorizedFlowCreatorHelper) addMaterializer(m *colexec.Materializer) { 1122 processors := make([]execinfra.Processor, 1) 1123 processors[0] = m 1124 r.f.SetProcessors(processors) 1125 } 1126 1127 func (r *vectorizedFlowCreatorHelper) getCancelFlowFn() context.CancelFunc { 1128 return r.f.GetCancelFlowFn() 1129 } 1130 1131 // noopFlowCreatorHelper is a flowCreatorHelper that only performs sanity 1132 // checks. 1133 type noopFlowCreatorHelper struct { 1134 inboundStreams map[execinfrapb.StreamID]struct{} 1135 } 1136 1137 var _ flowCreatorHelper = &noopFlowCreatorHelper{} 1138 1139 func newNoopFlowCreatorHelper() *noopFlowCreatorHelper { 1140 return &noopFlowCreatorHelper{ 1141 inboundStreams: make(map[execinfrapb.StreamID]struct{}), 1142 } 1143 } 1144 1145 func (r *noopFlowCreatorHelper) addStreamEndpoint( 1146 streamID execinfrapb.StreamID, _ *colrpc.Inbox, _ *sync.WaitGroup, 1147 ) { 1148 r.inboundStreams[streamID] = struct{}{} 1149 } 1150 1151 func (r *noopFlowCreatorHelper) checkInboundStreamID(sid execinfrapb.StreamID) error { 1152 if _, found := r.inboundStreams[sid]; found { 1153 return errors.Errorf("inbound stream %d already exists in map", sid) 1154 } 1155 return nil 1156 } 1157 1158 func (r *noopFlowCreatorHelper) accumulateAsyncComponent(runFn) {} 1159 1160 func (r *noopFlowCreatorHelper) addMaterializer(*colexec.Materializer) {} 1161 1162 func (r *noopFlowCreatorHelper) getCancelFlowFn() context.CancelFunc { 1163 return nil 1164 } 1165 1166 // SupportsVectorized checks whether flow is supported by the vectorized engine 1167 // and returns an error if it isn't. Note that it does so by setting up the 1168 // full flow without running the components asynchronously. 1169 // It returns a list of the leaf operators of all flows for the purposes of 1170 // EXPLAIN output. 1171 // Note that passed-in output can be nil, but if it is non-nil, only Types() 1172 // method on it might be called (nothing will actually get Push()'ed into it). 1173 func SupportsVectorized( 1174 ctx context.Context, 1175 flowCtx *execinfra.FlowCtx, 1176 processorSpecs []execinfrapb.ProcessorSpec, 1177 fuseOpt flowinfra.FuseOpt, 1178 output execinfra.RowReceiver, 1179 ) (leaves []execinfra.OpNode, err error) { 1180 if output == nil { 1181 output = &execinfra.RowChannel{} 1182 } 1183 creator := newVectorizedFlowCreator(newNoopFlowCreatorHelper(), vectorizedRemoteComponentCreator{}, false, nil, output, nil, execinfrapb.FlowID{}, colcontainer.DiskQueueCfg{}, flowCtx.Cfg.VecFDSemaphore) 1184 // We create an unlimited memory account because we're interested whether the 1185 // flow is supported via the vectorized engine in general (without paying 1186 // attention to the memory since it is node-dependent in the distributed 1187 // case). 1188 memoryMonitor := mon.MakeMonitor( 1189 "supports-vectorized", 1190 mon.MemoryResource, 1191 nil, /* curCount */ 1192 nil, /* maxHist */ 1193 -1, /* increment */ 1194 math.MaxInt64, /* noteworthy */ 1195 flowCtx.Cfg.Settings, 1196 ) 1197 memoryMonitor.Start(ctx, nil, mon.MakeStandaloneBudget(math.MaxInt64)) 1198 defer memoryMonitor.Stop(ctx) 1199 defer func() { 1200 for _, acc := range creator.streamingMemAccounts { 1201 acc.Close(ctx) 1202 } 1203 for _, acc := range creator.accounts { 1204 acc.Close(ctx) 1205 } 1206 for _, mon := range creator.monitors { 1207 mon.Stop(ctx) 1208 } 1209 }() 1210 if vecErr := colexecerror.CatchVectorizedRuntimeError(func() { 1211 leaves, err = creator.setupFlow(ctx, flowCtx, processorSpecs, fuseOpt) 1212 }); vecErr != nil { 1213 return leaves, vecErr 1214 } 1215 return leaves, err 1216 } 1217 1218 // VectorizeAlwaysException is an object that returns whether or not execution 1219 // should continue if vectorize=experimental_always and an error occurred when 1220 // setting up the vectorized flow. Consider the case in which 1221 // vectorize=experimental_always. The user must be able to unset this session 1222 // variable without getting an error. 1223 type VectorizeAlwaysException interface { 1224 // IsException returns whether this object should be an exception to the rule 1225 // that an inability to run this node in a vectorized flow should produce an 1226 // error. 1227 // TODO(asubiotto): This is the cleanest way I can think of to not error out 1228 // on SET statements when running with vectorize = experimental_always. If 1229 // there is a better way, we should get rid of this interface. 1230 IsException() bool 1231 }