github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/interlock/shuffle.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package interlock 15 16 import ( 17 "context" 18 "sync" 19 20 "github.com/whtcorpsinc/errors" 21 "github.com/whtcorpsinc/failpoint" 22 "github.com/whtcorpsinc/milevadb/memex" 23 "github.com/whtcorpsinc/milevadb/stochastikctx" 24 "github.com/whtcorpsinc/milevadb/soliton/chunk" 25 "github.com/whtcorpsinc/milevadb/soliton/execdetails" 26 "github.com/whtcorpsinc/milevadb/soliton/logutil" 27 "github.com/twmb/murmur3" 28 "go.uber.org/zap" 29 ) 30 31 // ShuffleInterDirc is the interlock to run other interlocks in a parallel manner. 32 // 1. It fetches chunks from `DataSource`. 33 // 2. It splits tuples from `DataSource` into N partitions (Only "split by hash" is implemented so far). 34 // 3. It invokes N workers in parallel, assign each partition as input to each worker and execute child interlocks. 35 // 4. It defCauslects outputs from each worker, then sends outputs to its parent. 36 // 37 // +-------------+ 38 // +-------| Main Thread | 39 // | +------+------+ 40 // | ^ 41 // | | 42 // | + 43 // v +++ 44 // outputHolderCh | | outputCh (1 x Concurrency) 45 // v +++ 46 // | ^ 47 // | | 48 // | +-------+-------+ 49 // v | | 50 // +--------------+ +--------------+ 51 // +----- | worker | ....... | worker | worker (N Concurrency): child interlock, eg. WindowInterDirc (+SortInterDirc) 52 // | +------------+-+ +-+------------+ 53 // | ^ ^ 54 // | | | 55 // | +-+ +-+ ...... +-+ 56 // | | | | | | | 57 // | ... ... ... inputCh (Concurrency x 1) 58 // v | | | | | | 59 // inputHolderCh +++ +++ +++ 60 // v ^ ^ ^ 61 // | | | | 62 // | +------o----+ | 63 // | | +-----------------+-----+ 64 // | | | 65 // | +---+------------+------------+----+-----------+ 66 // | | Partition Splitter | 67 // | +--------------+-+------------+-+--------------+ 68 // | ^ 69 // | | 70 // | +---------------v-----------------+ 71 // +----------> | fetch data from DataSource | 72 // +---------------------------------+ 73 // 74 //////////////////////////////////////////////////////////////////////////////////////// 75 type ShuffleInterDirc struct { 76 baseInterlockingDirectorate 77 concurrency int 78 workers []*shuffleWorker 79 80 prepared bool 81 executed bool 82 83 splitter partitionSplitter 84 dataSource InterlockingDirectorate 85 86 finishCh chan struct{} 87 outputCh chan *shuffleOutput 88 } 89 90 type shuffleOutput struct { 91 chk *chunk.Chunk 92 err error 93 giveBackCh chan *chunk.Chunk 94 } 95 96 // Open implements the InterlockingDirectorate Open interface. 97 func (e *ShuffleInterDirc) Open(ctx context.Context) error { 98 if err := e.dataSource.Open(ctx); err != nil { 99 return err 100 } 101 if err := e.baseInterlockingDirectorate.Open(ctx); err != nil { 102 return err 103 } 104 105 e.prepared = false 106 e.finishCh = make(chan struct{}, 1) 107 e.outputCh = make(chan *shuffleOutput, e.concurrency) 108 109 for _, w := range e.workers { 110 w.finishCh = e.finishCh 111 112 w.inputCh = make(chan *chunk.Chunk, 1) 113 w.inputHolderCh = make(chan *chunk.Chunk, 1) 114 w.outputCh = e.outputCh 115 w.outputHolderCh = make(chan *chunk.Chunk, 1) 116 117 if err := w.childInterDirc.Open(ctx); err != nil { 118 return err 119 } 120 121 w.inputHolderCh <- newFirstChunk(e.dataSource) 122 w.outputHolderCh <- newFirstChunk(e) 123 } 124 125 return nil 126 } 127 128 // Close implements the InterlockingDirectorate Close interface. 129 func (e *ShuffleInterDirc) Close() error { 130 if !e.prepared { 131 for _, w := range e.workers { 132 close(w.inputHolderCh) 133 close(w.inputCh) 134 close(w.outputHolderCh) 135 } 136 close(e.outputCh) 137 } 138 close(e.finishCh) 139 for _, w := range e.workers { 140 for range w.inputCh { 141 } 142 } 143 for range e.outputCh { // workers exit before `e.outputCh` is closed. 144 } 145 e.executed = false 146 147 if e.runtimeStats != nil { 148 runtimeStats := &execdetails.RuntimeStatsWithConcurrencyInfo{} 149 runtimeStats.SetConcurrencyInfo(execdetails.NewConcurrencyInfo("ShuffleConcurrency", e.concurrency)) 150 e.ctx.GetStochastikVars().StmtCtx.RuntimeStatsDefCausl.RegisterStats(e.id, runtimeStats) 151 } 152 153 err := e.dataSource.Close() 154 err1 := e.baseInterlockingDirectorate.Close() 155 if err != nil { 156 return errors.Trace(err) 157 } 158 return errors.Trace(err1) 159 } 160 161 func (e *ShuffleInterDirc) prepare4ParallelInterDirc(ctx context.Context) { 162 go e.fetchDataAndSplit(ctx) 163 164 waitGroup := &sync.WaitGroup{} 165 waitGroup.Add(len(e.workers)) 166 for _, w := range e.workers { 167 go w.run(ctx, waitGroup) 168 } 169 170 go e.waitWorkerAndCloseOutput(waitGroup) 171 } 172 173 func (e *ShuffleInterDirc) waitWorkerAndCloseOutput(waitGroup *sync.WaitGroup) { 174 waitGroup.Wait() 175 close(e.outputCh) 176 } 177 178 // Next implements the InterlockingDirectorate Next interface. 179 func (e *ShuffleInterDirc) Next(ctx context.Context, req *chunk.Chunk) error { 180 req.Reset() 181 if !e.prepared { 182 e.prepare4ParallelInterDirc(ctx) 183 e.prepared = true 184 } 185 186 failpoint.Inject("shuffleError", func(val failpoint.Value) { 187 if val.(bool) { 188 failpoint.Return(errors.New("ShuffleInterDirc.Next error")) 189 } 190 }) 191 192 if e.executed { 193 return nil 194 } 195 196 result, ok := <-e.outputCh 197 if !ok { 198 e.executed = true 199 return nil 200 } 201 if result.err != nil { 202 return result.err 203 } 204 req.SwapDeferredCausets(result.chk) // `shuffleWorker` will not send an empty `result.chk` to `e.outputCh`. 205 result.giveBackCh <- result.chk 206 207 return nil 208 } 209 210 func recoveryShuffleInterDirc(output chan *shuffleOutput, r interface{}) { 211 err := errors.Errorf("%v", r) 212 output <- &shuffleOutput{err: errors.Errorf("%v", r)} 213 logutil.BgLogger().Error("shuffle panicked", zap.Error(err), zap.Stack("stack")) 214 } 215 216 func (e *ShuffleInterDirc) fetchDataAndSplit(ctx context.Context) { 217 var ( 218 err error 219 workerIndices []int 220 ) 221 results := make([]*chunk.Chunk, len(e.workers)) 222 chk := newFirstChunk(e.dataSource) 223 224 defer func() { 225 if r := recover(); r != nil { 226 recoveryShuffleInterDirc(e.outputCh, r) 227 } 228 for _, w := range e.workers { 229 close(w.inputCh) 230 } 231 }() 232 233 for { 234 err = Next(ctx, e.dataSource, chk) 235 if err != nil { 236 e.outputCh <- &shuffleOutput{err: err} 237 return 238 } 239 if chk.NumEvents() == 0 { 240 break 241 } 242 243 workerIndices, err = e.splitter.split(e.ctx, chk, workerIndices) 244 if err != nil { 245 e.outputCh <- &shuffleOutput{err: err} 246 return 247 } 248 numEvents := chk.NumEvents() 249 for i := 0; i < numEvents; i++ { 250 workerIdx := workerIndices[i] 251 w := e.workers[workerIdx] 252 253 if results[workerIdx] == nil { 254 select { 255 case <-e.finishCh: 256 return 257 case results[workerIdx] = <-w.inputHolderCh: 258 break 259 } 260 } 261 results[workerIdx].AppendEvent(chk.GetEvent(i)) 262 if results[workerIdx].IsFull() { 263 w.inputCh <- results[workerIdx] 264 results[workerIdx] = nil 265 } 266 } 267 } 268 for i, w := range e.workers { 269 if results[i] != nil { 270 w.inputCh <- results[i] 271 results[i] = nil 272 } 273 } 274 } 275 276 var _ InterlockingDirectorate = &shuffleWorker{} 277 278 // shuffleWorker is the multi-thread worker executing child interlocks within "partition". 279 type shuffleWorker struct { 280 baseInterlockingDirectorate 281 childInterDirc InterlockingDirectorate 282 283 finishCh <-chan struct{} 284 executed bool 285 286 // Workers get inputs from dataFetcherThread by `inputCh`, 287 // and output results to main thread by `outputCh`. 288 // `inputHolderCh` and `outputHolderCh` are "Chunk Holder" channels of `inputCh` and `outputCh` respectively, 289 // which give the `*Chunk` back, to implement the data transport in a streaming manner. 290 inputCh chan *chunk.Chunk 291 inputHolderCh chan *chunk.Chunk 292 outputCh chan *shuffleOutput 293 outputHolderCh chan *chunk.Chunk 294 } 295 296 // Open implements the InterlockingDirectorate Open interface. 297 func (e *shuffleWorker) Open(ctx context.Context) error { 298 if err := e.baseInterlockingDirectorate.Open(ctx); err != nil { 299 return err 300 } 301 e.executed = false 302 return nil 303 } 304 305 // Close implements the InterlockingDirectorate Close interface. 306 func (e *shuffleWorker) Close() error { 307 return errors.Trace(e.baseInterlockingDirectorate.Close()) 308 } 309 310 // Next implements the InterlockingDirectorate Next interface. 311 // It is called by `Tail` interlock within "shuffle", to fetch data from `DataSource` by `inputCh`. 312 func (e *shuffleWorker) Next(ctx context.Context, req *chunk.Chunk) error { 313 req.Reset() 314 if e.executed { 315 return nil 316 } 317 select { 318 case <-e.finishCh: 319 e.executed = true 320 return nil 321 case result, ok := <-e.inputCh: 322 if !ok || result.NumEvents() == 0 { 323 e.executed = true 324 return nil 325 } 326 req.SwapDeferredCausets(result) 327 e.inputHolderCh <- result 328 return nil 329 } 330 } 331 332 func (e *shuffleWorker) run(ctx context.Context, waitGroup *sync.WaitGroup) { 333 defer func() { 334 if r := recover(); r != nil { 335 recoveryShuffleInterDirc(e.outputCh, r) 336 } 337 waitGroup.Done() 338 }() 339 340 for { 341 select { 342 case <-e.finishCh: 343 return 344 case chk := <-e.outputHolderCh: 345 if err := Next(ctx, e.childInterDirc, chk); err != nil { 346 e.outputCh <- &shuffleOutput{err: err} 347 return 348 } 349 350 // Should not send an empty `chk` to `e.outputCh`. 351 if chk.NumEvents() == 0 { 352 return 353 } 354 e.outputCh <- &shuffleOutput{chk: chk, giveBackCh: e.outputHolderCh} 355 } 356 } 357 } 358 359 var _ partitionSplitter = &partitionHashSplitter{} 360 361 type partitionSplitter interface { 362 split(ctx stochastikctx.Context, input *chunk.Chunk, workerIndices []int) ([]int, error) 363 } 364 365 type partitionHashSplitter struct { 366 byItems []memex.Expression 367 numWorkers int 368 hashKeys [][]byte 369 } 370 371 func (s *partitionHashSplitter) split(ctx stochastikctx.Context, input *chunk.Chunk, workerIndices []int) ([]int, error) { 372 var err error 373 s.hashKeys, err = getGroupKey(ctx, input, s.hashKeys, s.byItems) 374 if err != nil { 375 return workerIndices, err 376 } 377 workerIndices = workerIndices[:0] 378 numEvents := input.NumEvents() 379 for i := 0; i < numEvents; i++ { 380 workerIndices = append(workerIndices, int(murmur3.Sum32(s.hashKeys[i]))%s.numWorkers) 381 } 382 return workerIndices, nil 383 }