github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/parallel_unordered_synchronizer.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package colexec 12 13 import ( 14 "context" 15 "sync" 16 "sync/atomic" 17 18 "github.com/cockroachdb/cockroach/pkg/col/coldata" 19 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase" 20 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror" 21 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 22 "github.com/cockroachdb/cockroach/pkg/sql/types" 23 "github.com/cockroachdb/cockroach/pkg/util/contextutil" 24 ) 25 26 // unorderedSynchronizerMsg is a light wrapper over a coldata.Batch sent over a 27 // channel so that the main goroutine can know which input this message 28 // originated from. 29 type unorderedSynchronizerMsg struct { 30 inputIdx int 31 b coldata.Batch 32 } 33 34 var _ colexecbase.Operator = &ParallelUnorderedSynchronizer{} 35 var _ execinfra.OpNode = &ParallelUnorderedSynchronizer{} 36 37 // ParallelUnorderedSynchronizer is an Operator that combines multiple Operator streams 38 // into one. 39 type ParallelUnorderedSynchronizer struct { 40 inputs []colexecbase.Operator 41 // readNextBatch is a slice of channels, where each channel corresponds to the 42 // input at the same index in inputs. It is used as a barrier for input 43 // goroutines to wait on until the Next goroutine signals that it is safe to 44 // retrieve the next batch. This is done so that inputs that are running 45 // asynchronously do not overwrite batches returned previously, given that 46 // batches must be safe for reuse until the next call to Next. 47 readNextBatch []chan struct{} 48 // numFinishedInputs is incremented atomically whenever one of the provided 49 // inputs exits from a goroutine (gracefully or otherwise). 50 numFinishedInputs uint32 51 // lastReadInputIdx is the index of the input whose batch we last returned. 52 // Used so that on the next call to Next, we can resume the input. 53 lastReadInputIdx int 54 // batches are the last batches read from the corresponding input. 55 batches []coldata.Batch 56 // nextBatch is a slice of functions each of which obtains a next batch from 57 // the corresponding to it input. 58 nextBatch []func() 59 60 initialized bool 61 done bool 62 // externalWaitGroup refers to the WaitGroup passed in externally. Since the 63 // ParallelUnorderedSynchronizer spawns goroutines, this allows callers to 64 // wait for the completion of these goroutines. 65 externalWaitGroup *sync.WaitGroup 66 // internalWaitGroup refers to the WaitGroup internally managed by the 67 // ParallelUnorderedSynchronizer. This will only ever be incremented by the 68 // ParallelUnorderedSynchronizer and decremented by the input goroutines. This 69 // allows the ParallelUnorderedSynchronizer to wait only on internal 70 // goroutines. 71 internalWaitGroup *sync.WaitGroup 72 cancelFn context.CancelFunc 73 batchCh chan *unorderedSynchronizerMsg 74 errCh chan error 75 } 76 77 // ChildCount implements the execinfra.OpNode interface. 78 func (s *ParallelUnorderedSynchronizer) ChildCount(verbose bool) int { 79 return len(s.inputs) 80 } 81 82 // Child implements the execinfra.OpNode interface. 83 func (s *ParallelUnorderedSynchronizer) Child(nth int, verbose bool) execinfra.OpNode { 84 return s.inputs[nth] 85 } 86 87 // NewParallelUnorderedSynchronizer creates a new ParallelUnorderedSynchronizer. 88 // On the first call to Next, len(inputs) goroutines are spawned to read each 89 // input asynchronously (to not be limited by a slow input). These will 90 // increment the passed-in WaitGroup and decrement when done. It is also 91 // guaranteed that these spawned goroutines will have completed on any error or 92 // zero-length batch received from Next. 93 func NewParallelUnorderedSynchronizer( 94 inputs []colexecbase.Operator, typs []*types.T, wg *sync.WaitGroup, 95 ) *ParallelUnorderedSynchronizer { 96 readNextBatch := make([]chan struct{}, len(inputs)) 97 for i := range readNextBatch { 98 // Buffer readNextBatch chans to allow for non-blocking writes. There will 99 // only be one message on the channel at a time. 100 readNextBatch[i] = make(chan struct{}, 1) 101 } 102 return &ParallelUnorderedSynchronizer{ 103 inputs: inputs, 104 readNextBatch: readNextBatch, 105 batches: make([]coldata.Batch, len(inputs)), 106 nextBatch: make([]func(), len(inputs)), 107 externalWaitGroup: wg, 108 internalWaitGroup: &sync.WaitGroup{}, 109 batchCh: make(chan *unorderedSynchronizerMsg, len(inputs)), 110 // errCh is buffered so that writers do not block. If errCh is full, the 111 // input goroutines will not push an error and exit immediately, given that 112 // the Next goroutine will read an error and panic anyway. 113 errCh: make(chan error, 1), 114 } 115 } 116 117 // Init is part of the Operator interface. 118 func (s *ParallelUnorderedSynchronizer) Init() { 119 for _, input := range s.inputs { 120 input.Init() 121 } 122 } 123 124 // init starts one goroutine per input to read from each input asynchronously 125 // and push to batchCh. Canceling the context results in all goroutines 126 // terminating, otherwise they keep on pushing batches until a zero-length batch 127 // is encountered. Once all inputs terminate, s.batchCh is closed. If an error 128 // occurs, the goroutines will make a non-blocking best effort to push that 129 // error on s.errCh, resulting in the first error pushed to be observed by the 130 // Next goroutine. Inputs are asynchronous so that the synchronizer is minimally 131 // affected by slow inputs. 132 func (s *ParallelUnorderedSynchronizer) init(ctx context.Context) { 133 ctx, s.cancelFn = contextutil.WithCancel(ctx) 134 for i, input := range s.inputs { 135 s.nextBatch[i] = func(input colexecbase.Operator, inputIdx int) func() { 136 return func() { 137 s.batches[inputIdx] = input.Next(ctx) 138 } 139 }(input, i) 140 s.externalWaitGroup.Add(1) 141 s.internalWaitGroup.Add(1) 142 // TODO(asubiotto): Most inputs are Inboxes, and these have handler 143 // goroutines just sitting around waiting for cancellation. I wonder if we 144 // could reuse those goroutines to push batches to batchCh directly. 145 go func(input colexecbase.Operator, inputIdx int) { 146 defer func() { 147 if int(atomic.AddUint32(&s.numFinishedInputs, 1)) == len(s.inputs) { 148 close(s.batchCh) 149 } 150 s.internalWaitGroup.Done() 151 s.externalWaitGroup.Done() 152 }() 153 msg := &unorderedSynchronizerMsg{ 154 inputIdx: inputIdx, 155 } 156 for { 157 if err := colexecerror.CatchVectorizedRuntimeError(s.nextBatch[inputIdx]); err != nil { 158 select { 159 // Non-blocking write to errCh, if an error is present the main 160 // goroutine will use that and cancel all inputs. 161 case s.errCh <- err: 162 default: 163 } 164 return 165 } 166 if s.batches[inputIdx].Length() == 0 { 167 return 168 } 169 msg.b = s.batches[inputIdx] 170 select { 171 case <-ctx.Done(): 172 select { 173 // Non-blocking write to errCh, if an error is present the main 174 // goroutine will use that and cancel all inputs. 175 case s.errCh <- ctx.Err(): 176 default: 177 } 178 return 179 case s.batchCh <- msg: 180 } 181 182 // Wait until Next goroutine tells us we are good to go. 183 select { 184 case <-s.readNextBatch[inputIdx]: 185 case <-ctx.Done(): 186 select { 187 // Non-blocking write to errCh, if an error is present the main 188 // goroutine will use that and cancel all inputs. 189 case s.errCh <- ctx.Err(): 190 default: 191 } 192 return 193 } 194 } 195 }(input, i) 196 } 197 s.initialized = true 198 } 199 200 // Next is part of the Operator interface. 201 func (s *ParallelUnorderedSynchronizer) Next(ctx context.Context) coldata.Batch { 202 if s.done { 203 return coldata.ZeroBatch 204 } 205 if !s.initialized { 206 s.init(ctx) 207 } else { 208 // Signal the input whose batch we returned in the last call to Next that it 209 // is safe to retrieve the next batch. Since Next has been called, we can 210 // reuse memory instead of making safe copies of batches returned. 211 s.readNextBatch[s.lastReadInputIdx] <- struct{}{} 212 } 213 select { 214 case err := <-s.errCh: 215 if err != nil { 216 // If we got an error from one of our inputs, cancel all inputs and 217 // propagate this error through a panic. 218 s.cancelFn() 219 s.internalWaitGroup.Wait() 220 colexecerror.InternalError(err) 221 } 222 case msg := <-s.batchCh: 223 if msg == nil { 224 // All inputs have exited, double check that this is indeed the case. 225 s.internalWaitGroup.Wait() 226 // Check if this was a graceful termination or not. 227 select { 228 case err := <-s.errCh: 229 if err != nil { 230 colexecerror.InternalError(err) 231 } 232 default: 233 } 234 s.done = true 235 return coldata.ZeroBatch 236 } 237 s.lastReadInputIdx = msg.inputIdx 238 return msg.b 239 } 240 return nil 241 }