github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/execinfra/processorsbase.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package execinfra 12 13 import ( 14 "context" 15 "math" 16 17 "github.com/cockroachdb/cockroach/pkg/roachpb" 18 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 19 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 20 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 21 "github.com/cockroachdb/cockroach/pkg/sql/types" 22 "github.com/cockroachdb/cockroach/pkg/util" 23 "github.com/cockroachdb/cockroach/pkg/util/log" 24 "github.com/cockroachdb/cockroach/pkg/util/mon" 25 "github.com/cockroachdb/cockroach/pkg/util/tracing" 26 "github.com/cockroachdb/errors" 27 opentracing "github.com/opentracing/opentracing-go" 28 ) 29 30 // Processor is a common interface implemented by all processors, used by the 31 // higher-level flow orchestration code. 32 type Processor interface { 33 // OutputTypes returns the column types of the results (that are to be fed 34 // through an output router). 35 OutputTypes() []*types.T 36 37 // Run is the main loop of the processor. 38 Run(context.Context) 39 } 40 41 // ProcOutputHelper is a helper type that performs filtering and projection on 42 // the output of a processor. 43 type ProcOutputHelper struct { 44 numInternalCols int 45 // output can be optionally passed in for use with EmitRow and 46 // rowexec.emitHelper. 47 // If output is nil, one can invoke ProcessRow to obtain the 48 // post-processed row directly. 49 output RowReceiver 50 RowAlloc sqlbase.EncDatumRowAlloc 51 52 filter *ExprHelper 53 // renderExprs has length > 0 if we have a rendering. Only one of renderExprs 54 // and outputCols can be set. 55 renderExprs []ExprHelper 56 // outputCols is non-nil if we have a projection. Only one of renderExprs and 57 // outputCols can be set. Note that 0-length projections are possible, in 58 // which case outputCols will be 0-length but non-nil. 59 outputCols []uint32 60 61 outputRow sqlbase.EncDatumRow 62 63 // OutputTypes is the schema of the rows produced by the processor after 64 // post-processing (i.e. the rows that are pushed through a router). 65 // 66 // If renderExprs is set, these types correspond to the types of those 67 // expressions. 68 // If outputCols is set, these types correspond to the types of 69 // those columns. 70 // If neither is set, this is the internal schema of the processor. 71 OutputTypes []*types.T 72 73 // offset is the number of rows that are suppressed. 74 offset uint64 75 // maxRowIdx is the number of rows after which we can stop (offset + limit), 76 // or MaxUint64 if there is no limit. 77 maxRowIdx uint64 78 79 rowIdx uint64 80 } 81 82 // Reset resets this ProcOutputHelper, retaining allocated memory in its slices. 83 func (h *ProcOutputHelper) Reset() { 84 *h = ProcOutputHelper{ 85 renderExprs: h.renderExprs[:0], 86 OutputTypes: h.OutputTypes[:0], 87 } 88 } 89 90 // Init sets up a ProcOutputHelper. The types describe the internal schema of 91 // the processor (as described for each processor core spec); they can be 92 // omitted if there is no filtering expression. 93 // Note that the types slice may be stored directly; the caller should not 94 // modify it. 95 func (h *ProcOutputHelper) Init( 96 post *execinfrapb.PostProcessSpec, typs []*types.T, evalCtx *tree.EvalContext, output RowReceiver, 97 ) error { 98 if !post.Projection && len(post.OutputColumns) > 0 { 99 return errors.Errorf("post-processing has projection unset but output columns set: %s", post) 100 } 101 if post.Projection && len(post.RenderExprs) > 0 { 102 return errors.Errorf("post-processing has both projection and rendering: %s", post) 103 } 104 h.output = output 105 h.numInternalCols = len(typs) 106 if post.Filter != (execinfrapb.Expression{}) { 107 h.filter = &ExprHelper{} 108 if err := h.filter.Init(post.Filter, typs, evalCtx); err != nil { 109 return err 110 } 111 } 112 if post.Projection { 113 for _, col := range post.OutputColumns { 114 if int(col) >= h.numInternalCols { 115 return errors.Errorf("invalid output column %d (only %d available)", col, h.numInternalCols) 116 } 117 } 118 h.outputCols = post.OutputColumns 119 if h.outputCols == nil { 120 // nil indicates no projection; use an empty slice. 121 h.outputCols = make([]uint32, 0) 122 } 123 nOutputCols := len(h.outputCols) 124 if cap(h.OutputTypes) >= nOutputCols { 125 h.OutputTypes = h.OutputTypes[:nOutputCols] 126 } else { 127 h.OutputTypes = make([]*types.T, nOutputCols) 128 } 129 for i, c := range h.outputCols { 130 h.OutputTypes[i] = typs[c] 131 } 132 } else if nRenders := len(post.RenderExprs); nRenders > 0 { 133 if cap(h.renderExprs) >= nRenders { 134 h.renderExprs = h.renderExprs[:nRenders] 135 } else { 136 h.renderExprs = make([]ExprHelper, nRenders) 137 } 138 if cap(h.OutputTypes) >= nRenders { 139 h.OutputTypes = h.OutputTypes[:nRenders] 140 } else { 141 h.OutputTypes = make([]*types.T, nRenders) 142 } 143 for i, expr := range post.RenderExprs { 144 h.renderExprs[i] = ExprHelper{} 145 if err := h.renderExprs[i].Init(expr, typs, evalCtx); err != nil { 146 return err 147 } 148 h.OutputTypes[i] = h.renderExprs[i].Expr.ResolvedType() 149 } 150 } else { 151 // No rendering or projection. 152 if cap(h.OutputTypes) >= len(typs) { 153 h.OutputTypes = h.OutputTypes[:len(typs)] 154 } else { 155 h.OutputTypes = make([]*types.T, len(typs)) 156 } 157 copy(h.OutputTypes, typs) 158 } 159 if h.outputCols != nil || len(h.renderExprs) > 0 { 160 // We're rendering or projecting, so allocate an output row. 161 h.outputRow = h.RowAlloc.AllocRow(len(h.OutputTypes)) 162 } 163 164 h.offset = post.Offset 165 if post.Limit == 0 || post.Limit >= math.MaxUint64-h.offset { 166 h.maxRowIdx = math.MaxUint64 167 } else { 168 h.maxRowIdx = h.offset + post.Limit 169 } 170 171 return nil 172 } 173 174 // NeededColumns calculates the set of internal processor columns that are 175 // actually used by the post-processing stage. 176 func (h *ProcOutputHelper) NeededColumns() (colIdxs util.FastIntSet) { 177 if h.outputCols == nil && len(h.renderExprs) == 0 { 178 // No projection or rendering; all columns are needed. 179 colIdxs.AddRange(0, h.numInternalCols-1) 180 return colIdxs 181 } 182 183 // Add all explicit output columns. 184 for _, c := range h.outputCols { 185 colIdxs.Add(int(c)) 186 } 187 188 for i := 0; i < h.numInternalCols; i++ { 189 // See if filter requires this column. 190 if h.filter != nil && h.filter.Vars.IndexedVarUsed(i) { 191 colIdxs.Add(i) 192 continue 193 } 194 195 // See if render expressions require this column. 196 for j := range h.renderExprs { 197 if h.renderExprs[j].Vars.IndexedVarUsed(i) { 198 colIdxs.Add(i) 199 break 200 } 201 } 202 } 203 204 return colIdxs 205 } 206 207 // EmitRow sends a row through the post-processing stage. The same row can be 208 // reused. 209 // 210 // It returns the consumer's status that was observed when pushing this row. If 211 // an error is returned, it's coming from the ProcOutputHelper's filtering or 212 // rendering processing; the output has not been closed and it's the caller's 213 // responsibility to push the error to the output. 214 // 215 // Note: check out rowexec.emitHelper() for a useful wrapper. 216 func (h *ProcOutputHelper) EmitRow( 217 ctx context.Context, row sqlbase.EncDatumRow, 218 ) (ConsumerStatus, error) { 219 if h.output == nil { 220 panic("output RowReceiver not initialized for emitting rows") 221 } 222 223 outRow, ok, err := h.ProcessRow(ctx, row) 224 if err != nil { 225 // The status doesn't matter. 226 return NeedMoreRows, err 227 } 228 if outRow == nil { 229 if ok { 230 return NeedMoreRows, nil 231 } 232 return DrainRequested, nil 233 } 234 235 if log.V(3) { 236 log.InfofDepth(ctx, 1, "pushing row %s", outRow.String(h.OutputTypes)) 237 } 238 if r := h.output.Push(outRow, nil); r != NeedMoreRows { 239 log.VEventf(ctx, 1, "no more rows required. drain requested: %t", 240 r == DrainRequested) 241 return r, nil 242 } 243 if h.rowIdx == h.maxRowIdx { 244 log.VEventf(ctx, 1, "hit row limit; asking producer to drain") 245 return DrainRequested, nil 246 } 247 status := NeedMoreRows 248 if !ok { 249 status = DrainRequested 250 } 251 return status, nil 252 } 253 254 // ProcessRow sends the invoked row through the post-processing stage and returns 255 // the post-processed row. Results from ProcessRow aren't safe past the next call 256 // to ProcessRow. 257 // 258 // The moreRowsOK retval is true if more rows can be processed, false if the 259 // limit has been reached (if there's a limit). Upon seeing a false value, the 260 // caller is expected to start draining. Note that both a row and 261 // moreRowsOK=false can be returned at the same time: the row that satisfies the 262 // limit is returned at the same time as a DrainRequested status. In that case, 263 // the caller is supposed to both deal with the row and start draining. 264 func (h *ProcOutputHelper) ProcessRow( 265 ctx context.Context, row sqlbase.EncDatumRow, 266 ) (_ sqlbase.EncDatumRow, moreRowsOK bool, _ error) { 267 if h.rowIdx >= h.maxRowIdx { 268 return nil, false, nil 269 } 270 271 if h.filter != nil { 272 // Filtering. 273 passes, err := h.filter.EvalFilter(row) 274 if err != nil { 275 return nil, false, err 276 } 277 if !passes { 278 if log.V(4) { 279 log.Infof(ctx, "filtered out row %s", row.String(h.filter.Types)) 280 } 281 return nil, true, nil 282 } 283 } 284 h.rowIdx++ 285 if h.rowIdx <= h.offset { 286 // Suppress row. 287 return nil, true, nil 288 } 289 290 if len(h.renderExprs) > 0 { 291 // Rendering. 292 for i := range h.renderExprs { 293 datum, err := h.renderExprs[i].Eval(row) 294 if err != nil { 295 return nil, false, err 296 } 297 h.outputRow[i] = sqlbase.DatumToEncDatum(h.OutputTypes[i], datum) 298 } 299 } else if h.outputCols != nil { 300 // Projection. 301 for i, col := range h.outputCols { 302 h.outputRow[i] = row[col] 303 } 304 } else { 305 // No rendering or projection. 306 return row, h.rowIdx < h.maxRowIdx, nil 307 } 308 309 // If this row satisfies the limit, the caller is told to drain. 310 return h.outputRow, h.rowIdx < h.maxRowIdx, nil 311 } 312 313 // Output returns the output of the ProcOutputHelper. 314 func (h *ProcOutputHelper) Output() RowReceiver { 315 return h.output 316 } 317 318 // Close signals to the output that there will be no more rows. 319 func (h *ProcOutputHelper) Close() { 320 h.output.ProducerDone() 321 } 322 323 // consumerClosed stops output of additional rows from ProcessRow. 324 func (h *ProcOutputHelper) consumerClosed() { 325 h.rowIdx = h.maxRowIdx 326 } 327 328 // ProcessorConstructor is a function that creates a Processor. It is 329 // abstracted away so that we could create mixed flows (i.e. a vectorized flow 330 // with wrapped processors) without bringing a dependency on sql/rowexec 331 // package into sql/colexec package. 332 type ProcessorConstructor func( 333 ctx context.Context, 334 flowCtx *FlowCtx, 335 processorID int32, 336 core *execinfrapb.ProcessorCoreUnion, 337 post *execinfrapb.PostProcessSpec, 338 inputs []RowSource, 339 outputs []RowReceiver, 340 localProcessors []LocalProcessor, 341 ) (Processor, error) 342 343 // ProcessorBase is supposed to be embedded by Processors. It provides 344 // facilities for dealing with filtering and projection (through a 345 // ProcOutputHelper) and for implementing the RowSource interface (draining, 346 // trailing metadata). 347 // 348 // If a Processor implements the RowSource interface, it's implementation is 349 // expected to look something like this: 350 // 351 // // concatProcessor concatenates rows from two sources (first returns rows 352 // // from the left, then from the right). 353 // type concatProcessor struct { 354 // ProcessorBase 355 // l, r RowSource 356 // 357 // // leftConsumed is set once we've exhausted the left input; once set, we start 358 // // consuming the right input. 359 // leftConsumed bool 360 // } 361 // 362 // func newConcatProcessor( 363 // FlowCtx *FlowCtx, l RowSource, r RowSource, post *PostProcessSpec, output RowReceiver, 364 // ) (*concatProcessor, error) { 365 // p := &concatProcessor{l: l, r: r} 366 // if err := p.init( 367 // post, l.OutputTypes(), FlowCtx, output, 368 // // We pass the inputs to the helper, to be consumed by DrainHelper() later. 369 // ProcStateOpts{ 370 // InputsToDrain: []RowSource{l, r}, 371 // // If the proc needed to return any metadata at the end other than the 372 // // tracing info, or if it needed to cleanup any resources other than those 373 // // handled by InternalClose() (say, close some memory account), it'd pass 374 // // a TrailingMetaCallback here. 375 // }, 376 // ); err != nil { 377 // return nil, err 378 // } 379 // return p, nil 380 // } 381 // 382 // // Start is part of the RowSource interface. 383 // func (p *concatProcessor) Start(ctx context.Context) context.Context { 384 // p.l.Start(ctx) 385 // p.r.Start(ctx) 386 // return p.StartInternal(ctx, concatProcName) 387 // } 388 // 389 // // Next is part of the RowSource interface. 390 // func (p *concatProcessor) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) { 391 // // Loop while we haven't produced a row or a metadata record. We loop around 392 // // in several cases, including when the filtering rejected a row coming. 393 // for p.State == StateRunning { 394 // var row sqlbase.EncDatumRow 395 // var meta *ProducerMetadata 396 // if !p.leftConsumed { 397 // row, meta = p.l.Next() 398 // } else { 399 // row, meta = p.r.Next() 400 // } 401 // 402 // if meta != nil { 403 // // If we got an error, we need to forward it along and remember that we're 404 // // draining. 405 // if meta.Err != nil { 406 // p.MoveToDraining(nil /* err */) 407 // } 408 // return nil, meta 409 // } 410 // if row == nil { 411 // if !p.leftConsumed { 412 // p.leftConsumed = true 413 // } else { 414 // // In this case we know that both inputs are consumed, so we could 415 // // transition directly to StateTrailingMeta, but implementations are 416 // // encouraged to just use MoveToDraining() for uniformity; DrainHelper() 417 // // will transition to StateTrailingMeta() quickly. 418 // p.MoveToDraining(nil /* err */) 419 // break 420 // } 421 // continue 422 // } 423 // 424 // if outRow := p.ProcessRowHelper(row); outRow != nil { 425 // return outRow, nil 426 // } 427 // } 428 // return nil, p.DrainHelper() 429 // } 430 // 431 // // ConsumerDone is part of the RowSource interface. 432 // func (p *concatProcessor) ConsumerDone() { 433 // p.MoveToDraining(nil /* err */) 434 // } 435 // 436 // // ConsumerClosed is part of the RowSource interface. 437 // func (p *concatProcessor) ConsumerClosed() { 438 // // The consumer is done, Next() will not be called again. 439 // p.InternalClose() 440 // } 441 // 442 type ProcessorBase struct { 443 self RowSource 444 445 processorID int32 446 447 Out ProcOutputHelper 448 FlowCtx *FlowCtx 449 450 // EvalCtx is used for expression evaluation. It overrides the one in flowCtx. 451 EvalCtx *tree.EvalContext 452 453 // MemMonitor is the processor's memory monitor. 454 MemMonitor *mon.BytesMonitor 455 456 // Closed is set by InternalClose(). Once set, the processor's tracing span 457 // has been closed. 458 Closed bool 459 460 // Ctx and span contain the tracing state while the processor is active 461 // (i.e. hasn't been closed). Initialized using flowCtx.Ctx (which should not be otherwise 462 // used). 463 Ctx context.Context 464 span opentracing.Span 465 // origCtx is the context from which ctx was derived. InternalClose() resets 466 // ctx to this. 467 origCtx context.Context 468 469 State procState 470 471 // FinishTrace, if set, will be called before getting the trace data from 472 // the span and adding the recording to the trailing metadata. Useful for 473 // adding any extra information (e.g. stats) that should be captured in a 474 // trace. 475 FinishTrace func() 476 477 // trailingMetaCallback, if set, will be called by moveToTrailingMeta(). The 478 // callback is expected to close all inputs, do other cleanup on the processor 479 // (including calling InternalClose()) and generate the trailing meta that 480 // needs to be returned to the consumer. As a special case, 481 // moveToTrailingMeta() handles getting the tracing information into 482 // trailingMeta, so the callback doesn't need to worry about that. 483 // 484 // If no callback is specified, InternalClose() will be called automatically. 485 // So, if no trailing metadata other than the trace needs to be returned (and 486 // other than what has otherwise been manually put in trailingMeta) and no 487 // closing other than InternalClose is needed, then no callback needs to be 488 // specified. 489 trailingMetaCallback func(context.Context) []execinfrapb.ProducerMetadata 490 // trailingMeta is scratch space where metadata is stored to be returned 491 // later. 492 trailingMeta []execinfrapb.ProducerMetadata 493 494 // inputsToDrain, if not empty, contains inputs to be drained by 495 // DrainHelper(). MoveToDraining() calls ConsumerDone() on them, 496 // InternalClose() calls ConsumerClosed() on then. 497 // 498 // ConsumerDone() is called on all inputs at once and then inputs are drained 499 // one by one (in StateDraining, inputsToDrain[0] is the one currently being 500 // drained). 501 inputsToDrain []RowSource 502 } 503 504 // Reset resets this ProcessorBase, retaining allocated memory in slices. 505 func (pb *ProcessorBase) Reset() { 506 pb.Out.Reset() 507 *pb = ProcessorBase{ 508 Out: pb.Out, 509 trailingMeta: pb.trailingMeta[:0], 510 inputsToDrain: pb.inputsToDrain[:0], 511 } 512 } 513 514 // procState represents the standard states that a processor can be in. These 515 // states are relevant when the processor is using the draining utilities in 516 // ProcessorBase. 517 type procState int 518 519 //go:generate stringer -type=procState 520 const ( 521 // StateRunning is the common state of a processor: it's producing rows for 522 // its consumer and forwarding metadata from its input. Different processors 523 // might have sub-states internally. 524 // 525 // If the consumer calls ConsumerDone or if the ProcOutputHelper.maxRowIdx is 526 // reached, then the processor will transition to StateDraining. If the input 527 // is exhausted, then the processor can transition to StateTrailingMeta 528 // directly, although most always go through StateDraining. 529 StateRunning procState = iota 530 531 // StateDraining is the state in which the processor is forwarding metadata 532 // from its input and otherwise ignoring all rows. Once the input is 533 // exhausted, the processor will transition to StateTrailingMeta. 534 // 535 // In StateDraining, processors are required to swallow 536 // ReadWithinUncertaintyIntervalErrors received from its sources. We're 537 // already draining, so we don't care about whatever data generated this 538 // uncertainty error. Besides generally seeming like a good idea, doing this 539 // allows us to offer a nice guarantee to SQL clients: a read-only query that 540 // produces at most one row, run as an implicit txn, never produces retriable 541 // errors, regardless of the size of the row being returned (in relation to 542 // the size of the result buffer on the connection). One would naively expect 543 // that to be true: either the error happens before any rows have been 544 // delivered to the client, in which case the auto-retries kick in, or, if a 545 // row has been delivered, then the query is done and so how can there be an 546 // error? What our naive friend is ignoring is that, if it weren't for this 547 // code, it'd be possible for a retriable error to sneak in after the query's 548 // limit has been satisfied but while processors are still draining. Note 549 // that uncertainty errors are not retried automatically by the leaf 550 // TxnCoordSenders (i.e. by refresh txn interceptor). 551 // 552 // Other categories of errors might be safe to ignore too; however we 553 // can't ignore all of them. Generally, we need to ensure that all the 554 // trailing metadata (e.g. LeafTxnFinalState's) make it to the gateway for 555 // successful flows. If an error is telling us that some metadata might 556 // have been dropped, we can't ignore that. 557 StateDraining 558 559 // StateTrailingMeta is the state in which the processor is outputting final 560 // metadata such as the tracing information or the LeafTxnFinalState. Once all the 561 // trailing metadata has been produced, the processor transitions to 562 // StateExhausted. 563 StateTrailingMeta 564 565 // StateExhausted is the state of a processor that has no more rows or 566 // metadata to produce. 567 StateExhausted 568 ) 569 570 // MoveToDraining switches the processor to the StateDraining. Only metadata is 571 // returned from now on. In this state, the processor is expected to drain its 572 // inputs (commonly by using DrainHelper()). 573 // 574 // If the processor has no input (ProcStateOpts.intputToDrain was not specified 575 // at init() time), then we move straight to the StateTrailingMeta. 576 // 577 // An error can be optionally passed. It will be the first piece of metadata 578 // returned by DrainHelper(). 579 func (pb *ProcessorBase) MoveToDraining(err error) { 580 if pb.State != StateRunning { 581 // Calling MoveToDraining in any state is allowed in order to facilitate the 582 // ConsumerDone() implementations that just call this unconditionally. 583 // However, calling it with an error in states other than StateRunning is 584 // not permitted. 585 if err != nil { 586 log.Fatalf(pb.Ctx, "MoveToDraining called in state %s with err: %s", 587 pb.State, err) 588 } 589 return 590 } 591 592 if err != nil { 593 pb.trailingMeta = append(pb.trailingMeta, execinfrapb.ProducerMetadata{Err: err}) 594 } 595 if len(pb.inputsToDrain) > 0 { 596 // We go to StateDraining here. DrainHelper() will transition to 597 // StateTrailingMeta when the inputs are drained (including if the inputs 598 // are already drained). 599 pb.State = StateDraining 600 for _, input := range pb.inputsToDrain { 601 input.ConsumerDone() 602 } 603 } else { 604 pb.moveToTrailingMeta() 605 } 606 } 607 608 // DrainHelper is supposed to be used in states draining and trailingMetadata. 609 // It deals with optionally draining an input and returning trailing meta. It 610 // also moves from StateDraining to StateTrailingMeta when appropriate. 611 func (pb *ProcessorBase) DrainHelper() *execinfrapb.ProducerMetadata { 612 if pb.State == StateRunning { 613 log.Fatal(pb.Ctx, "drain helper called in StateRunning") 614 } 615 616 // trailingMeta always has priority; it seems like a good idea because it 617 // causes metadata to be sent quickly after it is produced (e.g. the error 618 // passed to MoveToDraining()). 619 if len(pb.trailingMeta) > 0 { 620 return pb.popTrailingMeta() 621 } 622 623 if pb.State != StateDraining { 624 return nil 625 } 626 627 // Ignore all rows; only return meta. 628 for { 629 input := pb.inputsToDrain[0] 630 631 row, meta := input.Next() 632 if row == nil && meta == nil { 633 pb.inputsToDrain = pb.inputsToDrain[1:] 634 if len(pb.inputsToDrain) == 0 { 635 pb.moveToTrailingMeta() 636 return pb.popTrailingMeta() 637 } 638 continue 639 } 640 if meta != nil { 641 // Swallow ReadWithinUncertaintyIntervalErrors. See comments on 642 // StateDraining. 643 if err := meta.Err; err != nil { 644 // We only look for UnhandledRetryableErrors. Local reads (which would 645 // be transformed by the Root TxnCoordSender into 646 // TransactionRetryWithProtoRefreshErrors) don't have any uncertainty. 647 if ure := (*roachpb.UnhandledRetryableError)(nil); errors.As(err, &ure) { 648 uncertain := ure.PErr.Detail.GetReadWithinUncertaintyInterval() 649 if uncertain != nil { 650 continue 651 } 652 } 653 } 654 return meta 655 } 656 } 657 } 658 659 // popTrailingMeta peels off one piece of trailing metadata or advances to 660 // StateExhausted if there's no more trailing metadata. 661 func (pb *ProcessorBase) popTrailingMeta() *execinfrapb.ProducerMetadata { 662 if len(pb.trailingMeta) > 0 { 663 meta := &pb.trailingMeta[0] 664 pb.trailingMeta = pb.trailingMeta[1:] 665 return meta 666 } 667 pb.State = StateExhausted 668 return nil 669 } 670 671 // moveToTrailingMeta switches the processor to the "trailing meta" state: only 672 // trailing metadata is returned from now on. For simplicity, processors are 673 // encouraged to always use MoveToDraining() instead of this method, even when 674 // there's nothing to drain. moveToDrain() or DrainHelper() will internally call 675 // moveToTrailingMeta(). 676 // 677 // trailingMetaCallback, if any, is called; it is expected to close the 678 // processor's inputs. 679 // 680 // This method is to be called when the processor is done producing rows and 681 // draining its inputs (if it wants to drain them). 682 func (pb *ProcessorBase) moveToTrailingMeta() { 683 if pb.State == StateTrailingMeta || pb.State == StateExhausted { 684 log.Fatalf(pb.Ctx, "moveToTrailingMeta called in state: %s", pb.State) 685 } 686 687 if pb.FinishTrace != nil { 688 pb.FinishTrace() 689 } 690 691 pb.State = StateTrailingMeta 692 if pb.span != nil { 693 if trace := GetTraceData(pb.Ctx); trace != nil { 694 pb.trailingMeta = append(pb.trailingMeta, execinfrapb.ProducerMetadata{TraceData: trace}) 695 } 696 } 697 // trailingMetaCallback is called after reading the tracing data because it 698 // generally calls InternalClose, indirectly, which switches the context and 699 // the span. 700 if pb.trailingMetaCallback != nil { 701 pb.trailingMeta = append(pb.trailingMeta, pb.trailingMetaCallback(pb.Ctx)...) 702 } else { 703 pb.InternalClose() 704 } 705 } 706 707 // ProcessRowHelper is a wrapper on top of ProcOutputHelper.ProcessRow(). It 708 // takes care of handling errors and drain requests by moving the processor to 709 // StateDraining. 710 // 711 // It takes a row and returns the row after processing. The return value can be 712 // nil, in which case the caller shouldn't return anything to its consumer; it 713 // should continue processing other rows, with the awareness that the processor 714 // might have been transitioned to the draining phase. 715 func (pb *ProcessorBase) ProcessRowHelper(row sqlbase.EncDatumRow) sqlbase.EncDatumRow { 716 outRow, ok, err := pb.Out.ProcessRow(pb.Ctx, row) 717 if err != nil { 718 pb.MoveToDraining(err) 719 return nil 720 } 721 if !ok { 722 pb.MoveToDraining(nil /* err */) 723 } 724 // Note that outRow might be nil here. 725 // TODO(yuzefovich): there is a problem with this logging when MetadataTest* 726 // processors are planned - there is a mismatch between the row and the 727 // output types (rendering is added to the stage of test processors and the 728 // actual processors that are inputs to the test ones have an unset post 729 // processing; I think that we need to set the post processing on the stages 730 // of processors below the test ones). 731 //if outRow != nil && log.V(3) && pb.Ctx != nil { 732 // log.InfofDepth(pb.Ctx, 1, "pushing row %s", outRow.String(pb.Out.OutputTypes)) 733 //} 734 return outRow 735 } 736 737 // OutputTypes is part of the processor interface. 738 func (pb *ProcessorBase) OutputTypes() []*types.T { 739 return pb.Out.OutputTypes 740 } 741 742 // Run is part of the processor interface. 743 func (pb *ProcessorBase) Run(ctx context.Context) { 744 if pb.Out.output == nil { 745 panic("processor output not initialized for emitting rows") 746 } 747 ctx = pb.self.Start(ctx) 748 Run(ctx, pb.self, pb.Out.output) 749 } 750 751 // ProcStateOpts contains fields used by the ProcessorBase's family of functions 752 // that deal with draining and trailing metadata: the ProcessorBase implements 753 // generic useful functionality that needs to call back into the Processor. 754 type ProcStateOpts struct { 755 // TrailingMetaCallback, if specified, is a callback to be called by 756 // moveToTrailingMeta(). See ProcessorBase.TrailingMetaCallback. 757 TrailingMetaCallback func(context.Context) []execinfrapb.ProducerMetadata 758 // InputsToDrain, if specified, will be drained by DrainHelper(). 759 // MoveToDraining() calls ConsumerDone() on them, InternalClose() calls 760 // ConsumerClosed() on them. 761 InputsToDrain []RowSource 762 } 763 764 // Init initializes the ProcessorBase. 765 func (pb *ProcessorBase) Init( 766 self RowSource, 767 post *execinfrapb.PostProcessSpec, 768 types []*types.T, 769 flowCtx *FlowCtx, 770 processorID int32, 771 output RowReceiver, 772 memMonitor *mon.BytesMonitor, 773 opts ProcStateOpts, 774 ) error { 775 return pb.InitWithEvalCtx( 776 self, post, types, flowCtx, flowCtx.NewEvalCtx(), processorID, output, memMonitor, opts, 777 ) 778 } 779 780 // InitWithEvalCtx initializes the ProcessorBase with a given EvalContext. 781 func (pb *ProcessorBase) InitWithEvalCtx( 782 self RowSource, 783 post *execinfrapb.PostProcessSpec, 784 types []*types.T, 785 flowCtx *FlowCtx, 786 evalCtx *tree.EvalContext, 787 processorID int32, 788 output RowReceiver, 789 memMonitor *mon.BytesMonitor, 790 opts ProcStateOpts, 791 ) error { 792 pb.self = self 793 pb.FlowCtx = flowCtx 794 pb.EvalCtx = evalCtx 795 pb.processorID = processorID 796 pb.MemMonitor = memMonitor 797 pb.trailingMetaCallback = opts.TrailingMetaCallback 798 pb.inputsToDrain = opts.InputsToDrain 799 800 // Hydrate all types used in the processor. 801 if err := execinfrapb.HydrateTypeSlice(evalCtx, types); err != nil { 802 return err 803 } 804 805 return pb.Out.Init(post, types, pb.EvalCtx, output) 806 } 807 808 // AddInputToDrain adds an input to drain when moving the processor to a 809 // draining state. 810 func (pb *ProcessorBase) AddInputToDrain(input RowSource) { 811 pb.inputsToDrain = append(pb.inputsToDrain, input) 812 } 813 814 // AppendTrailingMeta appends metadata to the trailing metadata without changing 815 // the state to draining (as opposed to MoveToDraining). 816 func (pb *ProcessorBase) AppendTrailingMeta(meta execinfrapb.ProducerMetadata) { 817 pb.trailingMeta = append(pb.trailingMeta, meta) 818 } 819 820 // ProcessorSpan creates a child span for a processor (if we are doing any 821 // tracing). The returned span needs to be finished using tracing.FinishSpan. 822 func ProcessorSpan(ctx context.Context, name string) (context.Context, opentracing.Span) { 823 return tracing.ChildSpanSeparateRecording(ctx, name) 824 } 825 826 // StartInternal prepares the ProcessorBase for execution. It returns the 827 // annotated context that's also stored in pb.Ctx. 828 func (pb *ProcessorBase) StartInternal(ctx context.Context, name string) context.Context { 829 pb.origCtx = ctx 830 pb.Ctx, pb.span = ProcessorSpan(ctx, name) 831 if pb.span != nil && tracing.IsRecording(pb.span) { 832 pb.span.SetTag(execinfrapb.FlowIDTagKey, pb.FlowCtx.ID.String()) 833 pb.span.SetTag(execinfrapb.ProcessorIDTagKey, pb.processorID) 834 } 835 pb.EvalCtx.Context = pb.Ctx 836 return pb.Ctx 837 } 838 839 // InternalClose helps processors implement the RowSource interface, performing 840 // common close functionality. Returns true iff the processor was not already 841 // closed. 842 // 843 // Notably, it calls ConsumerClosed() on all the inputsToDrain. 844 // 845 // if pb.InternalClose() { 846 // // Perform processor specific close work. 847 // } 848 func (pb *ProcessorBase) InternalClose() bool { 849 closing := !pb.Closed 850 // Protection around double closing is useful for allowing ConsumerClosed() to 851 // be called on processors that have already closed themselves by moving to 852 // StateTrailingMeta. 853 if closing { 854 for _, input := range pb.inputsToDrain { 855 input.ConsumerClosed() 856 } 857 858 pb.Closed = true 859 tracing.FinishSpan(pb.span) 860 pb.span = nil 861 // Reset the context so that any incidental uses after this point do not 862 // access the finished span. 863 pb.Ctx = pb.origCtx 864 865 // This prevents Next() from returning more rows. 866 pb.Out.consumerClosed() 867 } 868 return closing 869 } 870 871 // ConsumerDone is part of the RowSource interface. 872 func (pb *ProcessorBase) ConsumerDone() { 873 pb.MoveToDraining(nil /* err */) 874 } 875 876 // NewMonitor is a utility function used by processors to create a new 877 // memory monitor with the given name and start it. The returned monitor must 878 // be closed. 879 func NewMonitor(ctx context.Context, parent *mon.BytesMonitor, name string) *mon.BytesMonitor { 880 monitor := mon.MakeMonitorInheritWithLimit(name, 0 /* limit */, parent) 881 monitor.Start(ctx, parent, mon.BoundAccount{}) 882 return &monitor 883 } 884 885 // NewLimitedMonitor is a utility function used by processors to create a new 886 // limited memory monitor with the given name and start it. The returned 887 // monitor must be closed. The limit is determined by SettingWorkMemBytes but 888 // overridden to 1 if config.TestingKnobs.ForceDiskSpill is set or 889 // config.TestingKnobs.MemoryLimitBytes if not. 890 func NewLimitedMonitor( 891 ctx context.Context, parent *mon.BytesMonitor, config *ServerConfig, name string, 892 ) *mon.BytesMonitor { 893 limit := GetWorkMemLimit(config) 894 if config.TestingKnobs.ForceDiskSpill { 895 limit = 1 896 } 897 limitedMon := mon.MakeMonitorInheritWithLimit(name, limit, parent) 898 limitedMon.Start(ctx, parent, mon.BoundAccount{}) 899 return &limitedMon 900 } 901 902 // LocalProcessor is a RowSourcedProcessor that needs to be initialized with 903 // its post processing spec and output row receiver. Most processors can accept 904 // these objects at creation time. 905 type LocalProcessor interface { 906 RowSourcedProcessor 907 // InitWithOutput initializes this processor. 908 InitWithOutput(post *execinfrapb.PostProcessSpec, output RowReceiver) error 909 // SetInput initializes this LocalProcessor with an input RowSource. Not all 910 // LocalProcessors need inputs, but this needs to be called if a 911 // LocalProcessor expects to get its data from another RowSource. 912 SetInput(ctx context.Context, input RowSource) error 913 }