github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/execinfra/base.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package execinfra 12 13 import ( 14 "context" 15 "sync" 16 "sync/atomic" 17 18 "github.com/cockroachdb/cockroach/pkg/kv" 19 "github.com/cockroachdb/cockroach/pkg/roachpb" 20 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 21 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 22 "github.com/cockroachdb/cockroach/pkg/sql/types" 23 "github.com/cockroachdb/cockroach/pkg/util/log" 24 "github.com/cockroachdb/cockroach/pkg/util/tracing" 25 "github.com/cockroachdb/cockroach/pkg/util/uuid" 26 "github.com/cockroachdb/errors" 27 "github.com/opentracing/opentracing-go" 28 ) 29 30 // RowChannelBufSize is the default buffer size of a RowChannel. 31 const RowChannelBufSize = 16 32 33 // ConsumerStatus is the type returned by RowReceiver.Push(), informing a 34 // producer of a consumer's state. 35 type ConsumerStatus uint32 36 37 //go:generate stringer -type=ConsumerStatus 38 39 const ( 40 // NeedMoreRows indicates that the consumer is still expecting more rows. 41 NeedMoreRows ConsumerStatus = iota 42 // DrainRequested indicates that the consumer will not process any more data 43 // rows, but will accept trailing metadata from the producer. 44 DrainRequested 45 // ConsumerClosed indicates that the consumer will not process any more data 46 // rows or metadata. This is also commonly returned in case the consumer has 47 // encountered an error. 48 ConsumerClosed 49 ) 50 51 // RowReceiver is any component of a flow that receives rows from another 52 // component. It can be an input synchronizer, a router, or a mailbox. 53 type RowReceiver interface { 54 // Push sends a record to the consumer of this RowReceiver. Exactly one of the 55 // row/meta must be specified (i.e. either row needs to be non-nil or meta 56 // needs to be non-Empty()). May block. 57 // 58 // The return value indicates the current status of the consumer. Depending on 59 // it, producers are expected to drain or shut down. In all cases, 60 // ProducerDone() needs to be called (after draining is done, if draining was 61 // requested). 62 // 63 // Unless specifically permitted by the underlying implementation, (see 64 // copyingRowReceiver, for example), the sender must not modify the row 65 // and the metadata after calling this function. 66 // 67 // After DrainRequested is returned, it is expected that all future calls only 68 // carry metadata (however that is not enforced and implementations should be 69 // prepared to discard non-metadata rows). If ConsumerClosed is returned, 70 // implementations have to ignore further calls to Push() (such calls are 71 // allowed because there might be multiple producers for a single RowReceiver 72 // and they might not all be aware of the last status returned). 73 // 74 // Implementations of Push() must be thread-safe. 75 Push(row sqlbase.EncDatumRow, meta *execinfrapb.ProducerMetadata) ConsumerStatus 76 77 // Types returns the types of the EncDatumRow that this RowReceiver expects 78 // to be pushed. 79 Types() []*types.T 80 81 // ProducerDone is called when the producer has pushed all the rows and 82 // metadata; it causes the RowReceiver to process all rows and clean up. 83 // 84 // ProducerDone() cannot be called concurrently with Push(), and after it 85 // is called, no other method can be called. 86 ProducerDone() 87 } 88 89 // RowSource is any component of a flow that produces rows that can be consumed 90 // by another component. 91 // 92 // Communication components generally (e.g. RowBuffer, RowChannel) implement 93 // this interface. Some processors also implement it (in addition to 94 // implementing the Processor interface) - in which case those 95 // processors can be "fused" with their consumer (i.e. run in the consumer's 96 // goroutine). 97 type RowSource interface { 98 // OutputTypes returns the schema for the rows in this source. 99 OutputTypes() []*types.T 100 101 // Start prepares the RowSource for future Next() calls and takes in the 102 // context in which these future calls should operate. Start needs to be 103 // called before Next/ConsumerDone/ConsumerClosed. 104 // 105 // RowSources that consume other RowSources are expected to Start() their 106 // inputs. 107 // 108 // Implementations are expected to hold on to the provided context. They may 109 // choose to derive and annotate it (Processors generally do). For convenience, 110 // the possibly updated context is returned. 111 Start(context.Context) context.Context 112 113 // Next returns the next record from the source. At most one of the return 114 // values will be non-empty. Both of them can be empty when the RowSource has 115 // been exhausted - no more records are coming and any further method calls 116 // will be no-ops. 117 // 118 // EncDatumRows returned by Next() are only valid until the next call to 119 // Next(), although the EncDatums inside them stay valid forever. 120 // 121 // A ProducerMetadata record may contain an error. In that case, this 122 // interface is oblivious about the semantics: implementers may continue 123 // returning different rows on future calls, or may return an empty record 124 // (thus asking the consumer to stop asking for rows). In particular, 125 // implementers are not required to only return metadata records from this 126 // point on (which means, for example, that they're not required to 127 // automatically ask every producer to drain, in case there's multiple 128 // producers). Therefore, consumers need to be aware that some rows might have 129 // been skipped in case they continue to consume rows. Usually a consumer 130 // should react to an error by calling ConsumerDone(), thus asking the 131 // RowSource to drain, and separately discard any future data rows. A consumer 132 // receiving an error should also call ConsumerDone() on any other input it 133 // has. 134 Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) 135 136 // ConsumerDone lets the source know that we will not need any more data 137 // rows. The source is expected to start draining and only send metadata 138 // rows. May be called multiple times on a RowSource, even after 139 // ConsumerClosed has been called. 140 // 141 // May block. If the consumer of the source stops consuming rows before 142 // Next indicates that there are no more rows, ConsumerDone() and/or 143 // ConsumerClosed() must be called; it is a no-op to call these methods after 144 // all the rows were consumed (i.e. after Next() returned an empty row). 145 ConsumerDone() 146 147 // ConsumerClosed informs the source that the consumer is done and will not 148 // make any more calls to Next(). Must only be called once on a given 149 // RowSource. 150 // 151 // Like ConsumerDone(), if the consumer of the source stops consuming rows 152 // before Next indicates that there are no more rows, ConsumerDone() and/or 153 // ConsumerClosed() must be called; it is a no-op to call these methods after 154 // all the rows were consumed (i.e. after Next() returned an empty row). 155 ConsumerClosed() 156 } 157 158 // RowSourcedProcessor is the union of RowSource and Processor. 159 type RowSourcedProcessor interface { 160 RowSource 161 Run(context.Context) 162 } 163 164 // Run reads records from the source and outputs them to the receiver, properly 165 // draining the source of metadata and closing both the source and receiver. 166 // 167 // src needs to have been Start()ed before calling this. 168 func Run(ctx context.Context, src RowSource, dst RowReceiver) { 169 for { 170 row, meta := src.Next() 171 // Emit the row; stop if no more rows are needed. 172 if row != nil || meta != nil { 173 switch dst.Push(row, meta) { 174 case NeedMoreRows: 175 continue 176 case DrainRequested: 177 DrainAndForwardMetadata(ctx, src, dst) 178 dst.ProducerDone() 179 return 180 case ConsumerClosed: 181 src.ConsumerClosed() 182 dst.ProducerDone() 183 return 184 } 185 } 186 // row == nil && meta == nil: the source has been fully drained. 187 dst.ProducerDone() 188 return 189 } 190 } 191 192 // Releasable is an interface for objects than can be Released back into a 193 // memory pool when finished. 194 type Releasable interface { 195 // Release allows this object to be returned to a memory pool. Objects must 196 // not be used after Release is called. 197 Release() 198 } 199 200 // DrainAndForwardMetadata calls src.ConsumerDone() (thus asking src for 201 // draining metadata) and then forwards all the metadata to dst. 202 // 203 // When this returns, src has been properly closed (regardless of the presence 204 // or absence of an error). dst, however, has not been closed; someone else must 205 // call dst.ProducerDone() when all producers have finished draining. 206 // 207 // It is OK to call DrainAndForwardMetadata() multiple times concurrently on the 208 // same dst (as RowReceiver.Push() is guaranteed to be thread safe). 209 func DrainAndForwardMetadata(ctx context.Context, src RowSource, dst RowReceiver) { 210 src.ConsumerDone() 211 for { 212 row, meta := src.Next() 213 if meta == nil { 214 if row == nil { 215 return 216 } 217 continue 218 } 219 if row != nil { 220 log.Fatalf( 221 ctx, "both row data and metadata in the same record. row: %s meta: %+v", 222 row.String(src.OutputTypes()), meta, 223 ) 224 } 225 226 switch dst.Push(nil /* row */, meta) { 227 case ConsumerClosed: 228 src.ConsumerClosed() 229 return 230 case NeedMoreRows: 231 case DrainRequested: 232 } 233 } 234 } 235 236 // GetTraceData returns the trace data. 237 func GetTraceData(ctx context.Context) []tracing.RecordedSpan { 238 if sp := opentracing.SpanFromContext(ctx); sp != nil { 239 return tracing.GetRecording(sp) 240 } 241 return nil 242 } 243 244 // SendTraceData collects the tracing information from the ctx and pushes it to 245 // dst. The ConsumerStatus returned by dst is ignored. 246 // 247 // Note that the tracing data is distinct between different processors, since 248 // each one gets its own trace "recording group". 249 func SendTraceData(ctx context.Context, dst RowReceiver) { 250 if rec := GetTraceData(ctx); rec != nil { 251 dst.Push(nil /* row */, &execinfrapb.ProducerMetadata{TraceData: rec}) 252 } 253 } 254 255 // GetLeafTxnFinalState returns the txn metadata from a transaction if 256 // it is present and the transaction is a leaf transaction. It returns 257 // nil when called on a Root. This is done as a convenience allowing 258 // DistSQL processors to be oblivious about whether they're running in 259 // a Leaf or a Root. 260 // 261 // NOTE(andrei): As of 04/2018, the txn is shared by all processors scheduled on 262 // a node, and so it's possible for multiple processors to send the same 263 // LeafTxnFinalState. The root TxnCoordSender doesn't care if it receives the same 264 // thing multiple times. 265 func GetLeafTxnFinalState(ctx context.Context, txn *kv.Txn) *roachpb.LeafTxnFinalState { 266 if txn.Type() != kv.LeafTxn { 267 return nil 268 } 269 txnMeta, err := txn.GetLeafTxnFinalState(ctx) 270 if err != nil { 271 // TODO(knz): plumb errors through the callers. 272 panic(errors.Wrap(err, "in execinfra.GetLeafTxnFinalState")) 273 } 274 275 if txnMeta.Txn.ID == uuid.Nil { 276 return nil 277 } 278 return &txnMeta 279 } 280 281 // DrainAndClose is a version of DrainAndForwardMetadata that drains multiple 282 // sources. These sources are assumed to be the only producers left for dst, so 283 // dst is closed once they're all exhausted (this is different from 284 // DrainAndForwardMetadata). 285 // 286 // If cause is specified, it is forwarded to the consumer before all the drain 287 // metadata. This is intended to have been the error, if any, that caused the 288 // draining. 289 // 290 // pushTrailingMeta is called after draining the sources and before calling 291 // dst.ProducerDone(). It gives the caller the opportunity to push some trailing 292 // metadata (e.g. tracing information and txn updates, if applicable). 293 // 294 // srcs can be nil. 295 // 296 // All errors are forwarded to the producer. 297 func DrainAndClose( 298 ctx context.Context, 299 dst RowReceiver, 300 cause error, 301 pushTrailingMeta func(context.Context), 302 srcs ...RowSource, 303 ) { 304 if cause != nil { 305 // We ignore the returned ConsumerStatus and rely on the 306 // DrainAndForwardMetadata() calls below to close srcs in all cases. 307 _ = dst.Push(nil /* row */, &execinfrapb.ProducerMetadata{Err: cause}) 308 } 309 if len(srcs) > 0 { 310 var wg sync.WaitGroup 311 for _, input := range srcs[1:] { 312 wg.Add(1) 313 go func(input RowSource) { 314 DrainAndForwardMetadata(ctx, input, dst) 315 wg.Done() 316 }(input) 317 } 318 DrainAndForwardMetadata(ctx, srcs[0], dst) 319 wg.Wait() 320 } 321 pushTrailingMeta(ctx) 322 dst.ProducerDone() 323 } 324 325 // NoMetadataRowSource is a wrapper on top of a RowSource that automatically 326 // forwards metadata to a RowReceiver. Data rows are returned through an 327 // interface similar to RowSource, except that, since metadata is taken care of, 328 // only the data rows are returned. 329 // 330 // The point of this struct is that it'd be burdensome for some row consumers to 331 // have to deal with metadata. 332 type NoMetadataRowSource struct { 333 src RowSource 334 metadataSink RowReceiver 335 } 336 337 // MakeNoMetadataRowSource builds a NoMetadataRowSource. 338 func MakeNoMetadataRowSource(src RowSource, sink RowReceiver) NoMetadataRowSource { 339 return NoMetadataRowSource{src: src, metadataSink: sink} 340 } 341 342 // NextRow is analogous to RowSource.Next. If the producer sends an error, we 343 // can't just forward it to metadataSink. We need to let the consumer know so 344 // that it's not under the impression that everything is hunky-dory and it can 345 // continue consuming rows. So, this interface returns the error. Just like with 346 // a raw RowSource, the consumer should generally call ConsumerDone() and drain. 347 func (rs *NoMetadataRowSource) NextRow() (sqlbase.EncDatumRow, error) { 348 for { 349 row, meta := rs.src.Next() 350 if meta == nil { 351 return row, nil 352 } 353 if meta.Err != nil { 354 return nil, meta.Err 355 } 356 // We forward the metadata and ignore the returned ConsumerStatus. There's 357 // no good way to use that status here; eventually the consumer of this 358 // NoMetadataRowSource will figure out the same status and act on it as soon 359 // as a non-metadata row is received. 360 _ = rs.metadataSink.Push(nil /* row */, meta) 361 } 362 } 363 364 // RowChannelMsg is the message used in the channels that implement 365 // local physical streams (i.e. the RowChannel's). 366 type RowChannelMsg struct { 367 // Only one of these fields will be set. 368 Row sqlbase.EncDatumRow 369 Meta *execinfrapb.ProducerMetadata 370 } 371 372 // rowSourceBase provides common functionality for RowSource implementations 373 // that need to track consumer status. It is intended to be used by RowSource 374 // implementations into which data is pushed by a producer async, as opposed to 375 // RowSources that pull data synchronously from their inputs, which don't need 376 // to deal with concurrent calls to ConsumerDone() / ConsumerClosed()). 377 // Things like the RowChannel falls in the first category; processors generally 378 // fall in the latter. 379 type rowSourceBase struct { 380 // ConsumerStatus is an atomic used in implementation of the 381 // RowSource.Consumer{Done,Closed} methods to signal that the consumer is 382 // done accepting rows or is no longer accepting data. 383 ConsumerStatus ConsumerStatus 384 } 385 386 // consumerDone helps processors implement RowSource.ConsumerDone. 387 func (rb *rowSourceBase) consumerDone() { 388 atomic.CompareAndSwapUint32((*uint32)(&rb.ConsumerStatus), 389 uint32(NeedMoreRows), uint32(DrainRequested)) 390 } 391 392 // consumerClosed helps processors implement RowSource.ConsumerClosed. The name 393 // is only used for debug messages. 394 func (rb *rowSourceBase) consumerClosed(name string) { 395 status := ConsumerStatus(atomic.LoadUint32((*uint32)(&rb.ConsumerStatus))) 396 if status == ConsumerClosed { 397 log.ReportOrPanic(context.Background(), nil, "%s already closed", log.Safe(name)) 398 } 399 atomic.StoreUint32((*uint32)(&rb.ConsumerStatus), uint32(ConsumerClosed)) 400 } 401 402 // RowChannel is a thin layer over a RowChannelMsg channel, which can be used to 403 // transfer rows between goroutines. 404 type RowChannel struct { 405 rowSourceBase 406 407 types []*types.T 408 409 // The channel on which rows are delivered. 410 C <-chan RowChannelMsg 411 412 // dataChan is the same channel as C. 413 dataChan chan RowChannelMsg 414 415 // numSenders is an atomic counter that keeps track of how many senders have 416 // yet to call ProducerDone(). 417 numSenders int32 418 } 419 420 var _ RowReceiver = &RowChannel{} 421 var _ RowSource = &RowChannel{} 422 423 // InitWithNumSenders initializes the RowChannel with the default buffer size. 424 // numSenders is the number of producers that will be pushing to this channel. 425 // RowChannel will not be closed until it receives numSenders calls to 426 // ProducerDone(). 427 func (rc *RowChannel) InitWithNumSenders(types []*types.T, numSenders int) { 428 rc.InitWithBufSizeAndNumSenders(types, RowChannelBufSize, numSenders) 429 } 430 431 // InitWithBufSizeAndNumSenders initializes the RowChannel with a given buffer 432 // size and number of senders. 433 func (rc *RowChannel) InitWithBufSizeAndNumSenders(types []*types.T, chanBufSize, numSenders int) { 434 rc.types = types 435 rc.dataChan = make(chan RowChannelMsg, chanBufSize) 436 rc.C = rc.dataChan 437 atomic.StoreInt32(&rc.numSenders, int32(numSenders)) 438 } 439 440 // Push is part of the RowReceiver interface. 441 func (rc *RowChannel) Push( 442 row sqlbase.EncDatumRow, meta *execinfrapb.ProducerMetadata, 443 ) ConsumerStatus { 444 consumerStatus := ConsumerStatus( 445 atomic.LoadUint32((*uint32)(&rc.ConsumerStatus))) 446 switch consumerStatus { 447 case NeedMoreRows: 448 rc.dataChan <- RowChannelMsg{Row: row, Meta: meta} 449 case DrainRequested: 450 // If we're draining, only forward metadata. 451 if meta != nil { 452 rc.dataChan <- RowChannelMsg{Meta: meta} 453 } 454 case ConsumerClosed: 455 // If the consumer is gone, swallow all the rows and the metadata. 456 } 457 return consumerStatus 458 } 459 460 // ProducerDone is part of the RowReceiver interface. 461 func (rc *RowChannel) ProducerDone() { 462 newVal := atomic.AddInt32(&rc.numSenders, -1) 463 if newVal < 0 { 464 panic("too many ProducerDone() calls") 465 } 466 if newVal == 0 { 467 close(rc.dataChan) 468 } 469 } 470 471 // OutputTypes is part of the RowSource interface. 472 func (rc *RowChannel) OutputTypes() []*types.T { 473 return rc.types 474 } 475 476 // Start is part of the RowSource interface. 477 func (rc *RowChannel) Start(ctx context.Context) context.Context { return ctx } 478 479 // Next is part of the RowSource interface. 480 func (rc *RowChannel) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) { 481 d, ok := <-rc.C 482 if !ok { 483 // No more rows. 484 return nil, nil 485 } 486 return d.Row, d.Meta 487 } 488 489 // ConsumerDone is part of the RowSource interface. 490 func (rc *RowChannel) ConsumerDone() { 491 rc.consumerDone() 492 } 493 494 // ConsumerClosed is part of the RowSource interface. 495 func (rc *RowChannel) ConsumerClosed() { 496 rc.consumerClosed("RowChannel") 497 numSenders := atomic.LoadInt32(&rc.numSenders) 498 // Drain (at most) numSenders messages in case senders are blocked trying to 499 // emit a row. 500 // Note that, if the producer is done, then it has also closed the 501 // channel this will not block. The producer might be neither blocked nor 502 // closed, though; hence the no data case. 503 for i := int32(0); i < numSenders; i++ { 504 select { 505 case <-rc.dataChan: 506 default: 507 } 508 } 509 } 510 511 // Types is part of the RowReceiver interface. 512 func (rc *RowChannel) Types() []*types.T { 513 return rc.types 514 }