github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/flowinfra/outbox.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package flowinfra 12 13 import ( 14 "context" 15 "fmt" 16 "io" 17 "sync" 18 "time" 19 20 "github.com/cockroachdb/cockroach/pkg/roachpb" 21 "github.com/cockroachdb/cockroach/pkg/rpc" 22 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 23 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 24 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 25 "github.com/cockroachdb/cockroach/pkg/sql/types" 26 "github.com/cockroachdb/cockroach/pkg/util/contextutil" 27 "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" 28 "github.com/cockroachdb/cockroach/pkg/util/log" 29 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 30 "github.com/cockroachdb/cockroach/pkg/util/tracing" 31 opentracing "github.com/opentracing/opentracing-go" 32 "google.golang.org/grpc" 33 ) 34 35 const outboxBufRows = 16 36 const outboxFlushPeriod = 100 * time.Microsecond 37 38 type flowStream interface { 39 Send(*execinfrapb.ProducerMessage) error 40 Recv() (*execinfrapb.ConsumerSignal, error) 41 } 42 43 // Outbox implements an outgoing mailbox as a RowReceiver that receives rows and 44 // sends them to a gRPC stream. Its core logic runs in a goroutine. We send rows 45 // when we accumulate outboxBufRows or every outboxFlushPeriod (whichever comes 46 // first). 47 type Outbox struct { 48 // RowChannel implements the RowReceiver interface. 49 execinfra.RowChannel 50 51 flowCtx *execinfra.FlowCtx 52 streamID execinfrapb.StreamID 53 nodeID roachpb.NodeID 54 // The rows received from the RowChannel will be forwarded on this stream once 55 // it is established. 56 stream flowStream 57 58 encoder StreamEncoder 59 // numRows is the number of rows that have been accumulated in the encoder. 60 numRows int 61 62 // flowCtxCancel is the cancellation function for this flow's ctx; context 63 // cancellation is used to stop processors on this flow. It is invoked 64 // whenever the consumer returns an error on the stream above. Set 65 // to a non-null value in start(). 66 flowCtxCancel context.CancelFunc 67 68 err error 69 70 statsCollectionEnabled bool 71 stats OutboxStats 72 } 73 74 var _ execinfra.RowReceiver = &Outbox{} 75 var _ Startable = &Outbox{} 76 77 // NewOutbox creates a new Outbox. 78 func NewOutbox( 79 flowCtx *execinfra.FlowCtx, 80 nodeID roachpb.NodeID, 81 flowID execinfrapb.FlowID, 82 streamID execinfrapb.StreamID, 83 ) *Outbox { 84 m := &Outbox{flowCtx: flowCtx, nodeID: nodeID} 85 m.encoder.SetHeaderFields(flowID, streamID) 86 m.streamID = streamID 87 return m 88 } 89 90 // NewOutboxSyncFlowStream sets up an outbox for the special "sync flow" 91 // stream. The flow context should be provided via SetFlowCtx when it is 92 // available. 93 func NewOutboxSyncFlowStream(stream execinfrapb.DistSQL_RunSyncFlowServer) *Outbox { 94 return &Outbox{stream: stream} 95 } 96 97 // SetFlowCtx sets the flow context for the Outbox. 98 func (m *Outbox) SetFlowCtx(flowCtx *execinfra.FlowCtx) { 99 m.flowCtx = flowCtx 100 } 101 102 // Init initializes the Outbox. 103 func (m *Outbox) Init(typs []*types.T) { 104 if typs == nil { 105 // We check for nil to detect uninitialized cases; but we support 0-length 106 // rows. 107 typs = make([]*types.T, 0) 108 } 109 m.RowChannel.InitWithNumSenders(typs, 1) 110 m.encoder.Init(typs) 111 } 112 113 // addRow encodes a row into rowBuf. If enough rows were accumulated, flush() is 114 // called. 115 // 116 // If an error is returned, the outbox's stream might or might not be usable; if 117 // it's not usable, it will have been set to nil. The error might be a 118 // communication error, in which case the other side of the stream should get it 119 // too, or it might be an encoding error, in which case we've forwarded it on 120 // the stream. 121 func (m *Outbox) addRow( 122 ctx context.Context, row sqlbase.EncDatumRow, meta *execinfrapb.ProducerMetadata, 123 ) error { 124 mustFlush := false 125 var encodingErr error 126 if meta != nil { 127 m.encoder.AddMetadata(ctx, *meta) 128 // If we hit an error, let's forward it ASAP. The consumer will probably 129 // close. 130 mustFlush = meta.Err != nil 131 } else { 132 encodingErr = m.encoder.AddRow(row) 133 if encodingErr != nil { 134 m.encoder.AddMetadata(ctx, execinfrapb.ProducerMetadata{Err: encodingErr}) 135 mustFlush = true 136 } 137 } 138 m.numRows++ 139 var flushErr error 140 if m.numRows >= outboxBufRows || mustFlush { 141 flushErr = m.flush(ctx) 142 } 143 if encodingErr != nil { 144 return encodingErr 145 } 146 return flushErr 147 } 148 149 // flush sends the rows accumulated so far in a ProducerMessage. Any error 150 // returned indicates that sending a message on the outbox's stream failed, and 151 // thus the stream can't be used any more. The stream is also set to nil if 152 // an error is returned. 153 func (m *Outbox) flush(ctx context.Context) error { 154 if m.numRows == 0 && m.encoder.HasHeaderBeenSent() { 155 return nil 156 } 157 msg := m.encoder.FormMessage(ctx) 158 if m.statsCollectionEnabled { 159 m.stats.BytesSent += int64(msg.Size()) 160 } 161 162 if log.V(3) { 163 log.Infof(ctx, "flushing outbox") 164 } 165 sendErr := m.stream.Send(msg) 166 for _, rpm := range msg.Data.Metadata { 167 if metricsMeta, ok := rpm.Value.(*execinfrapb.RemoteProducerMetadata_Metrics_); ok { 168 metricsMeta.Metrics.Release() 169 } 170 } 171 if sendErr != nil { 172 // Make sure the stream is not used any more. 173 m.stream = nil 174 if log.V(1) { 175 log.Errorf(ctx, "outbox flush error: %s", sendErr) 176 } 177 } else if log.V(3) { 178 log.Infof(ctx, "outbox flushed") 179 } 180 if sendErr != nil { 181 return sendErr 182 } 183 184 m.numRows = 0 185 return nil 186 } 187 188 // mainLoop reads from m.RowChannel and writes to the output stream through 189 // addRow()/flush() until the producer doesn't have any more data to send or an 190 // error happened. 191 // 192 // If the consumer asks the producer to drain, mainLoop() will relay this 193 // information and, again, wait until the producer doesn't have any more data to 194 // send (the producer is supposed to only send trailing metadata once it 195 // receives this signal). 196 // 197 // If an error is returned, it's either a communication error from the outbox's 198 // stream, or otherwise the error has already been forwarded on the stream. 199 // Depending on the specific error, the stream might or might not need to be 200 // closed. In case it doesn't, m.stream has been set to nil. 201 func (m *Outbox) mainLoop(ctx context.Context) error { 202 // No matter what happens, we need to make sure we close our RowChannel, since 203 // writers could be writing to it as soon as we are started. 204 defer m.RowChannel.ConsumerClosed() 205 206 var span opentracing.Span 207 ctx, span = execinfra.ProcessorSpan(ctx, "outbox") 208 if span != nil && tracing.IsRecording(span) { 209 m.statsCollectionEnabled = true 210 span.SetTag(execinfrapb.FlowIDTagKey, m.flowCtx.ID.String()) 211 span.SetTag(execinfrapb.StreamIDTagKey, m.streamID) 212 } 213 // spanFinished specifies whether we called tracing.FinishSpan on the span. 214 // Some code paths (e.g. stats collection) need to prematurely call 215 // FinishSpan to get trace data. 216 spanFinished := false 217 defer func() { 218 if !spanFinished { 219 tracing.FinishSpan(span) 220 } 221 }() 222 223 if m.stream == nil { 224 var conn *grpc.ClientConn 225 var err error 226 conn, err = m.flowCtx.Cfg.NodeDialer.DialNoBreaker(ctx, m.nodeID, rpc.DefaultClass) 227 if err != nil { 228 // Log any Dial errors. This does not have a verbosity check due to being 229 // a critical part of query execution: if this step doesn't work, the 230 // receiving side might end up hanging or timing out. 231 log.Infof(ctx, "outbox: connection dial error: %+v", err) 232 return err 233 } 234 client := execinfrapb.NewDistSQLClient(conn) 235 if log.V(2) { 236 log.Infof(ctx, "outbox: calling FlowStream") 237 } 238 // The context used here escapes, so it has to be a background context. 239 m.stream, err = client.FlowStream(context.TODO()) 240 if err != nil { 241 if log.V(1) { 242 log.Infof(ctx, "FlowStream error: %s", err) 243 } 244 return err 245 } 246 if log.V(2) { 247 log.Infof(ctx, "outbox: FlowStream returned") 248 } 249 } 250 251 var flushTimer timeutil.Timer 252 defer flushTimer.Stop() 253 254 draining := false 255 256 // TODO(andrei): It's unfortunate that we're spawning a goroutine for every 257 // outgoing stream, but I'm not sure what to do instead. The streams don't 258 // have a non-blocking API. We could start this goroutine only after a 259 // timeout, but that timeout would affect queries that use flows with 260 // LimitHint's (so, queries where the consumer is expected to quickly ask the 261 // producer to drain). Perhaps what we want is a way to tell when all the rows 262 // corresponding to the first KV batch have been sent and only start the 263 // goroutine if more batches are needed to satisfy the query. 264 listenToConsumerCtx, cancel := contextutil.WithCancel(ctx) 265 drainCh, err := m.listenForDrainSignalFromConsumer(listenToConsumerCtx) 266 defer cancel() 267 if err != nil { 268 return err 269 } 270 271 // Send a first message that will contain the header (i.e. the StreamID), so 272 // that the stream is properly initialized on the consumer. The consumer has 273 // a timeout in which inbound streams must be established. 274 if err := m.flush(ctx); err != nil { 275 return err 276 } 277 278 for { 279 select { 280 case msg, ok := <-m.RowChannel.C: 281 if !ok { 282 // No more data. 283 if m.statsCollectionEnabled { 284 err := m.flush(ctx) 285 if err != nil { 286 return err 287 } 288 if m.flowCtx.Cfg.TestingKnobs.DeterministicStats { 289 m.stats.BytesSent = 0 290 } 291 tracing.SetSpanStats(span, &m.stats) 292 tracing.FinishSpan(span) 293 spanFinished = true 294 if trace := execinfra.GetTraceData(ctx); trace != nil { 295 err := m.addRow(ctx, nil, &execinfrapb.ProducerMetadata{TraceData: trace}) 296 if err != nil { 297 return err 298 } 299 } 300 } 301 return m.flush(ctx) 302 } 303 if !draining || msg.Meta != nil { 304 // If we're draining, we ignore all the rows and just send metadata. 305 err := m.addRow(ctx, msg.Row, msg.Meta) 306 if err != nil { 307 return err 308 } 309 if msg.Meta != nil { 310 // Now that we have added metadata, it is safe to release it to the 311 // pool. 312 msg.Meta.Release() 313 } 314 // If the message to add was metadata, a flush was already forced. If 315 // this is our first row, restart the flushTimer. 316 if m.numRows == 1 { 317 flushTimer.Reset(outboxFlushPeriod) 318 } 319 } 320 case <-flushTimer.C: 321 flushTimer.Read = true 322 err := m.flush(ctx) 323 if err != nil { 324 return err 325 } 326 case drainSignal := <-drainCh: 327 if drainSignal.err != nil { 328 // Stop work from proceeding in this flow. This also causes FlowStream 329 // RPCs that have this node as consumer to return errors. 330 m.flowCtxCancel() 331 // The consumer either doesn't care any more (it returned from the 332 // FlowStream RPC with an error if the outbox established the stream or 333 // it canceled the client context if the consumer established the 334 // stream through a RunSyncFlow RPC), or there was a communication error 335 // and the stream is dead. In any case, the stream has been closed and 336 // the consumer will not consume more rows from this outbox. Make sure 337 // the stream is not used any more. 338 m.stream = nil 339 return drainSignal.err 340 } 341 drainCh = nil 342 if drainSignal.drainRequested { 343 // Enter draining mode. 344 draining = true 345 m.RowChannel.ConsumerDone() 346 } else { 347 // No draining required. We're done; no need to consume any more. 348 // m.RowChannel.ConsumerClosed() is called in a defer above. 349 return nil 350 } 351 } 352 } 353 } 354 355 // drainSignal is a signal received from the consumer telling the producer that 356 // it doesn't need any more rows and optionally asking the producer to drain. 357 type drainSignal struct { 358 // drainRequested, if set, means that the consumer is interested in the 359 // trailing metadata that the producer might have. If not set, the producer 360 // should close immediately (the consumer is probably gone by now). 361 drainRequested bool 362 // err, if set, is either the error that the consumer returned when closing 363 // the FlowStream RPC or a communication error. 364 err error 365 } 366 367 // listenForDrainSignalFromConsumer returns a channel that will be pinged once the 368 // consumer has closed its send-side of the stream, or has sent a drain signal. 369 // 370 // This method runs a task that will run until either the consumer closes the 371 // stream or until the caller cancels the context. The caller has to cancel the 372 // context once it no longer reads from the channel, otherwise this method might 373 // deadlock when attempting to write to the channel. 374 func (m *Outbox) listenForDrainSignalFromConsumer(ctx context.Context) (<-chan drainSignal, error) { 375 ch := make(chan drainSignal, 1) 376 377 stream := m.stream 378 if err := m.flowCtx.Cfg.Stopper.RunAsyncTask(ctx, "drain", func(ctx context.Context) { 379 sendDrainSignal := func(drainRequested bool, err error) bool { 380 select { 381 case ch <- drainSignal{drainRequested: drainRequested, err: err}: 382 return true 383 case <-ctx.Done(): 384 // Listening for consumer signals has been canceled. This generally 385 // means that the main outbox routine is no longer listening to these 386 // signals but, in the RunSyncFlow case, it may also mean that the 387 // client (the consumer) has canceled the RPC. In that case, the main 388 // routine is still listening (and this branch of the select has been 389 // randomly selected; the other was also available), so we have to 390 // notify it. Thus, we attempt sending again. 391 select { 392 case ch <- drainSignal{drainRequested: drainRequested, err: err}: 393 return true 394 default: 395 return false 396 } 397 } 398 } 399 400 for { 401 signal, err := stream.Recv() 402 if err == io.EOF { 403 sendDrainSignal(false, nil) 404 return 405 } 406 if err != nil { 407 sendDrainSignal(false, err) 408 return 409 } 410 switch { 411 case signal.DrainRequest != nil: 412 if !sendDrainSignal(true, nil) { 413 return 414 } 415 case signal.SetupFlowRequest != nil: 416 log.Fatalf(ctx, "Unexpected SetupFlowRequest. "+ 417 "This SyncFlow specific message should have been handled in RunSyncFlow.") 418 case signal.Handshake != nil: 419 log.Eventf(ctx, "Consumer sent handshake. Consuming flow scheduled: %t", 420 signal.Handshake.ConsumerScheduled) 421 } 422 } 423 }); err != nil { 424 return nil, err 425 } 426 return ch, nil 427 } 428 429 func (m *Outbox) run(ctx context.Context, wg *sync.WaitGroup) { 430 err := m.mainLoop(ctx) 431 if stream, ok := m.stream.(execinfrapb.DistSQL_FlowStreamClient); ok { 432 closeErr := stream.CloseSend() 433 if err == nil { 434 err = closeErr 435 } 436 } 437 m.err = err 438 if wg != nil { 439 wg.Done() 440 } 441 } 442 443 // Start starts the outbox. 444 func (m *Outbox) Start(ctx context.Context, wg *sync.WaitGroup, flowCtxCancel context.CancelFunc) { 445 if m.Types() == nil { 446 panic("outbox not initialized") 447 } 448 if wg != nil { 449 wg.Add(1) 450 } 451 m.flowCtxCancel = flowCtxCancel 452 go m.run(ctx, wg) 453 } 454 455 // Err returns the error (if any occurred) while Outbox was running. 456 func (m *Outbox) Err() error { 457 return m.err 458 } 459 460 const outboxTagPrefix = "outbox." 461 462 // Stats implements the SpanStats interface. 463 func (os *OutboxStats) Stats() map[string]string { 464 statsMap := make(map[string]string) 465 statsMap[outboxTagPrefix+"bytes_sent"] = humanizeutil.IBytes(os.BytesSent) 466 return statsMap 467 } 468 469 // StatsForQueryPlan implements the DistSQLSpanStats interface. 470 func (os *OutboxStats) StatsForQueryPlan() []string { 471 return []string{fmt.Sprintf("bytes sent: %s", humanizeutil.IBytes(os.BytesSent))} 472 }