github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colflow/colrpc/inbox.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package colrpc 12 13 import ( 14 "context" 15 "fmt" 16 "io" 17 "sync" 18 19 "github.com/apache/arrow/go/arrow/array" 20 "github.com/cockroachdb/cockroach/pkg/col/coldata" 21 "github.com/cockroachdb/cockroach/pkg/col/colserde" 22 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase" 23 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror" 24 "github.com/cockroachdb/cockroach/pkg/sql/colmem" 25 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 26 "github.com/cockroachdb/cockroach/pkg/sql/types" 27 "github.com/cockroachdb/cockroach/pkg/util/log" 28 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 29 "github.com/cockroachdb/logtags" 30 ) 31 32 // flowStreamServer is a utility interface used to mock out the RPC layer. 33 type flowStreamServer interface { 34 Send(*execinfrapb.ConsumerSignal) error 35 Recv() (*execinfrapb.ProducerMessage, error) 36 } 37 38 // Inbox is used to expose data from remote flows through an exec.Operator 39 // interface. FlowStream RPC handlers should call RunWithStream (which blocks 40 // until operation terminates, gracefully or unexpectedly) to pass the stream 41 // to the inbox. Next may be called before RunWithStream, it will just block 42 // until the stream is made available or its context is canceled. Note that 43 // ownership of the stream is passed from the RunWithStream goroutine to the 44 // Next goroutine. In exchange, the RunWithStream goroutine receives the first 45 // context passed into Next and listens for cancellation. Returning from 46 // RunWithStream (or more specifically, the RPC handler) will unblock Next by 47 // closing the stream. 48 type Inbox struct { 49 colexecbase.ZeroInputNode 50 typs []*types.T 51 52 converter *colserde.ArrowBatchConverter 53 serializer *colserde.RecordBatchSerializer 54 55 // streamID is used to overwrite a caller's streamID 56 // in the ctx argument of Next and DrainMeta. 57 streamID execinfrapb.StreamID 58 59 // streamCh is the channel over which the stream is passed from the stream 60 // handler to the reader goroutine. 61 streamCh chan flowStreamServer 62 // contextCh is the channel over which the reader goroutine passes the first 63 // context to the stream handler so that it can listen for context 64 // cancellation. 65 contextCh chan context.Context 66 67 // timeoutCh is the channel over which an error will be sent if the reader 68 // goroutine should exit while waiting for a stream. 69 timeoutCh chan error 70 71 // errCh is the channel that RunWithStream will block on, waiting until the 72 // Inbox does not need a stream any more. An error will only be sent on this 73 // channel in the event of a cancellation or a non-io.EOF error originating 74 // from a stream.Recv. 75 errCh chan error 76 77 // ctxInterceptorFn is a callback to expose the inbox's context 78 // right after init. To be used for unit testing. 79 ctxInterceptorFn func(context.Context) 80 81 // We need two mutexes because a single mutex is insufficient to handle 82 // concurrent calls to Next() and DrainMeta(). See comment in DrainMeta. 83 stateMu struct { 84 syncutil.Mutex 85 // initialized prevents double initialization. Should not be used by the 86 // RunWithStream goroutine. 87 initialized bool 88 // done prevents double closing. It should not be used by the RunWithStream 89 // goroutine. 90 done bool 91 // nextRunning indicates whether Next goroutine is running at the moment. 92 nextRunning bool 93 // nextExited is a condition variable on which DrainMeta might block in 94 // order to wait for Next goroutine to exit. 95 nextExited *sync.Cond 96 // nextShouldExit indicates to Next goroutine that it should exit. It must 97 // only be updated by DrainMeta goroutine. 98 nextShouldExit bool 99 // bufferedMeta buffers any metadata found in Next when reading from the 100 // stream and is returned by DrainMeta. 101 bufferedMeta []execinfrapb.ProducerMetadata 102 } 103 104 streamMu struct { 105 syncutil.Mutex 106 // stream is the RPC stream. It is set when RunWithStream is called but 107 // only the Next and DrainMeta goroutines may access it. 108 stream flowStreamServer 109 } 110 111 scratch struct { 112 data []*array.Data 113 b coldata.Batch 114 } 115 } 116 117 var _ colexecbase.Operator = &Inbox{} 118 119 // NewInbox creates a new Inbox. 120 func NewInbox( 121 allocator *colmem.Allocator, typs []*types.T, streamID execinfrapb.StreamID, 122 ) (*Inbox, error) { 123 c, err := colserde.NewArrowBatchConverter(typs) 124 if err != nil { 125 return nil, err 126 } 127 s, err := colserde.NewRecordBatchSerializer(typs) 128 if err != nil { 129 return nil, err 130 } 131 i := &Inbox{ 132 typs: typs, 133 converter: c, 134 serializer: s, 135 streamID: streamID, 136 streamCh: make(chan flowStreamServer, 1), 137 contextCh: make(chan context.Context, 1), 138 timeoutCh: make(chan error, 1), 139 errCh: make(chan error, 1), 140 } 141 i.scratch.data = make([]*array.Data, len(typs)) 142 i.scratch.b = allocator.NewMemBatch(typs) 143 i.stateMu.bufferedMeta = make([]execinfrapb.ProducerMetadata, 0) 144 i.stateMu.nextExited = sync.NewCond(&i.stateMu) 145 return i, nil 146 } 147 148 // maybeInitLocked calls Inbox.initLocked if the inbox is not initialized and 149 // returns an error if the initialization was not successful. Usually this is 150 // because the given context is canceled before the remote stream arrives. 151 // NOTE: i.stateMu *must* be held when calling this function. 152 func (i *Inbox) maybeInitLocked(ctx context.Context) error { 153 if !i.stateMu.initialized { 154 if err := i.initLocked(ctx); err != nil { 155 return err 156 } 157 i.stateMu.initialized = true 158 } 159 return nil 160 } 161 162 // initLocked initializes the Inbox for operation by blocking until 163 // RunWithStream sets the stream to read from. ctx ownership is retained until 164 // the stream arrives (to allow for unblocking the wait for a stream), at which 165 // point ownership is transferred to RunWithStream. This should only be called 166 // from the reader goroutine when it needs a stream. 167 // NOTE: i.stateMu *must* be held when calling this function because it is 168 // sufficient to protect access to i.streamMu.stream since the stream will only 169 // be accessed after the initialization. 170 func (i *Inbox) initLocked(ctx context.Context) error { 171 // Wait for the stream to be initialized. We're essentially waiting for the 172 // remote connection. 173 select { 174 case i.streamMu.stream = <-i.streamCh: 175 case err := <-i.timeoutCh: 176 i.errCh <- fmt.Errorf("%s: remote stream arrived too late", err) 177 return err 178 case <-ctx.Done(): 179 i.errCh <- fmt.Errorf("%s: Inbox while waiting for stream", ctx.Err()) 180 return ctx.Err() 181 } 182 183 if i.ctxInterceptorFn != nil { 184 i.ctxInterceptorFn(ctx) 185 } 186 i.contextCh <- ctx 187 return nil 188 } 189 190 // closeLocked closes the inbox, ensuring that any call to RunWithStream will 191 // return immediately. closeLocked is idempotent. 192 // NOTE: i.stateMu *must* be held when calling this function. 193 func (i *Inbox) closeLocked() { 194 if !i.stateMu.done { 195 i.stateMu.done = true 196 close(i.errCh) 197 } 198 } 199 200 // RunWithStream sets the Inbox's stream and waits until either streamCtx is 201 // canceled, a caller of Next cancels the first context passed into Next, or 202 // an EOF is encountered on the stream by the Next goroutine. 203 func (i *Inbox) RunWithStream(streamCtx context.Context, stream flowStreamServer) error { 204 streamCtx = logtags.AddTag(streamCtx, "streamID", i.streamID) 205 log.VEvent(streamCtx, 2, "Inbox handling stream") 206 defer log.VEvent(streamCtx, 2, "Inbox exited stream handler") 207 // Pass the stream to the reader goroutine (non-blocking) and get the context 208 // to listen for cancellation. 209 i.streamCh <- stream 210 var readerCtx context.Context 211 select { 212 case err := <-i.errCh: 213 return err 214 case readerCtx = <-i.contextCh: 215 log.VEvent(streamCtx, 2, "Inbox reader arrived") 216 case <-streamCtx.Done(): 217 return fmt.Errorf("%s: streamCtx while waiting for reader (remote client canceled)", streamCtx.Err()) 218 } 219 220 // Now wait for one of the events described in the method comment. If a 221 // cancellation is encountered, nothing special must be done to cancel the 222 // reader goroutine as returning from the handler will close the stream. 223 select { 224 case err := <-i.errCh: 225 // nil will be read from errCh when the channel is closed. 226 return err 227 case <-readerCtx.Done(): 228 // The reader canceled the stream. 229 return fmt.Errorf("%s: readerCtx in Inbox stream handler (local reader canceled)", readerCtx.Err()) 230 case <-streamCtx.Done(): 231 // The client canceled the stream. 232 return fmt.Errorf("%s: streamCtx in Inbox stream handler (remote client canceled)", streamCtx.Err()) 233 } 234 } 235 236 // Timeout sends the given error to any readers waiting for a stream to be 237 // established (i.e. RunWithStream to be called). 238 func (i *Inbox) Timeout(err error) { 239 i.timeoutCh <- err 240 } 241 242 // Init is part of the Operator interface. 243 func (i *Inbox) Init() {} 244 245 // Next returns the next batch. It will block until there is data available. 246 // The Inbox will exit when either the context passed in on the first call to 247 // Next is canceled or when DrainMeta goroutine tells it to do so. 248 func (i *Inbox) Next(ctx context.Context) coldata.Batch { 249 i.stateMu.Lock() 250 stateMuLocked := true 251 i.stateMu.nextRunning = true 252 defer func() { 253 i.stateMu.nextRunning = false 254 i.stateMu.nextExited.Signal() 255 i.stateMu.Unlock() 256 }() 257 if i.stateMu.done { 258 return coldata.ZeroBatch 259 } 260 261 ctx = logtags.AddTag(ctx, "streamID", i.streamID) 262 263 defer func() { 264 // Catch any panics that occur and close the errCh in order to not leak the 265 // goroutine listening for context cancellation. errCh must still be closed 266 // during normal termination. 267 if err := recover(); err != nil { 268 if !stateMuLocked { 269 // The panic occurred while we were Recv'ing when we were holding 270 // i.streamMu and were not holding i.stateMu. 271 i.stateMu.Lock() 272 i.streamMu.Unlock() 273 } 274 i.closeLocked() 275 colexecerror.InternalError(err) 276 } 277 }() 278 279 // NOTE: It is very important to close i.errCh only when execution terminates 280 // ungracefully or when DrainMeta has been called (which indicates a graceful 281 // termination). DrainMeta will use the stream to read any remaining metadata 282 // after Next returns a zero-length batch during normal execution. 283 if err := i.maybeInitLocked(ctx); err != nil { 284 // An error occurred while initializing the Inbox and is likely caused by 285 // the connection issues. It is expected that such an error can occur. 286 colexecerror.ExpectedError(err) 287 } 288 289 for { 290 // DrainMeta goroutine indicated to us that we should exit. We do so 291 // without closing errCh since DrainMeta still needs the stream. 292 if i.stateMu.nextShouldExit { 293 return coldata.ZeroBatch 294 } 295 296 i.stateMu.Unlock() 297 stateMuLocked = false 298 i.streamMu.Lock() 299 m, err := i.streamMu.stream.Recv() 300 i.streamMu.Unlock() 301 i.stateMu.Lock() 302 stateMuLocked = true 303 if err != nil { 304 if err == io.EOF { 305 // Done. 306 i.closeLocked() 307 return coldata.ZeroBatch 308 } 309 i.errCh <- err 310 colexecerror.ExpectedError(err) 311 } 312 if len(m.Data.Metadata) != 0 { 313 for _, rpm := range m.Data.Metadata { 314 meta, ok := execinfrapb.RemoteProducerMetaToLocalMeta(ctx, rpm) 315 if !ok { 316 continue 317 } 318 i.stateMu.bufferedMeta = append(i.stateMu.bufferedMeta, meta) 319 } 320 // Continue until we get the next batch or EOF. 321 continue 322 } 323 if len(m.Data.RawBytes) == 0 { 324 // Protect against Deserialization panics by skipping empty messages. 325 continue 326 } 327 i.scratch.data = i.scratch.data[:0] 328 if err := i.serializer.Deserialize(&i.scratch.data, m.Data.RawBytes); err != nil { 329 colexecerror.InternalError(err) 330 } 331 if err := i.converter.ArrowToBatch(i.scratch.data, i.scratch.b); err != nil { 332 colexecerror.InternalError(err) 333 } 334 return i.scratch.b 335 } 336 } 337 338 func (i *Inbox) sendDrainSignal(ctx context.Context) error { 339 log.VEvent(ctx, 2, "Inbox sending drain signal to Outbox") 340 // It is safe to Send without holding the mutex because it is legal to call 341 // Send and Recv from different goroutines. 342 if err := i.streamMu.stream.Send(&execinfrapb.ConsumerSignal{DrainRequest: &execinfrapb.DrainRequest{}}); err != nil { 343 if log.V(1) { 344 log.Warningf(ctx, "Inbox unable to send drain signal to Outbox: %+v", err) 345 } 346 return err 347 } 348 return nil 349 } 350 351 // DrainMeta is part of the MetadataGenerator interface. DrainMeta may be 352 // called concurrently with Next. 353 // Note: DrainMeta will cause Next goroutine to finish. 354 func (i *Inbox) DrainMeta(ctx context.Context) []execinfrapb.ProducerMetadata { 355 i.stateMu.Lock() 356 defer i.stateMu.Unlock() 357 allMeta := i.stateMu.bufferedMeta 358 i.stateMu.bufferedMeta = i.stateMu.bufferedMeta[:0] 359 360 if i.stateMu.done { 361 return allMeta 362 } 363 364 ctx = logtags.AddTag(ctx, "streamID", i.streamID) 365 366 // We want draining the Inbox to work regardless of whether or not we have a 367 // goroutine in Next. We essentially need to do two things: 1) Is the stream 368 // safe to use? If yes, then 2) Make sure nobody else is receiving. 369 // Unfortunately, there is no way to cancel a Recv on a stream, so we need to 370 // do this by sending the message. However, we can't unconditionally send a 371 // message since we don't know the state of the stream (is it initialized?). 372 // This leaves us with having two separate mutexes, one for the state and 373 // another one for the stream (to make sure we wait until the Next goroutine 374 // has finished Recv'ing). 375 drainSignalSent := false 376 if i.stateMu.initialized { 377 if err := i.sendDrainSignal(ctx); err != nil { 378 return allMeta 379 } 380 drainSignalSent = true 381 i.stateMu.nextShouldExit = true 382 for i.stateMu.nextRunning { 383 i.stateMu.nextExited.Wait() 384 } 385 // It is possible that Next goroutine has buffered more metadata, so we 386 // need to grab it. 387 allMeta = append(allMeta, i.stateMu.bufferedMeta...) 388 i.stateMu.bufferedMeta = i.stateMu.bufferedMeta[:0] 389 } 390 391 // Note that unlocking defer from above will execute after this defer because 392 // the unlocking one will be pushed below on the stack, so we still will have 393 // the lock when this one is executed. 394 defer i.closeLocked() 395 396 if err := i.maybeInitLocked(ctx); err != nil { 397 if log.V(1) { 398 log.Warningf(ctx, "Inbox unable to initialize stream while draining metadata: %+v", err) 399 } 400 return allMeta 401 } 402 if !drainSignalSent { 403 if err := i.sendDrainSignal(ctx); err != nil { 404 return allMeta 405 } 406 } 407 408 i.streamMu.Lock() 409 defer i.streamMu.Unlock() 410 for { 411 msg, err := i.streamMu.stream.Recv() 412 if err != nil { 413 if err == io.EOF { 414 break 415 } 416 if log.V(1) { 417 log.Warningf(ctx, "Inbox Recv connection error while draining metadata: %+v", err) 418 } 419 return allMeta 420 } 421 for _, remoteMeta := range msg.Data.Metadata { 422 meta, ok := execinfrapb.RemoteProducerMetaToLocalMeta(ctx, remoteMeta) 423 if !ok { 424 continue 425 } 426 allMeta = append(allMeta, meta) 427 } 428 } 429 430 return allMeta 431 }