github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/interlock/projection.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package interlock 15 16 import ( 17 "context" 18 "fmt" 19 "runtime/trace" 20 "sync" 21 "sync/atomic" 22 23 "github.com/whtcorpsinc/errors" 24 "github.com/whtcorpsinc/milevadb/memex" 25 "github.com/whtcorpsinc/milevadb/stochastikctx" 26 "github.com/whtcorpsinc/milevadb/soliton" 27 "github.com/whtcorpsinc/milevadb/soliton/chunk" 28 "github.com/whtcorpsinc/milevadb/soliton/execdetails" 29 "github.com/whtcorpsinc/milevadb/soliton/logutil" 30 "github.com/whtcorpsinc/milevadb/soliton/memory" 31 "go.uber.org/zap" 32 ) 33 34 // This file contains the implementation of the physical Projection Operator: 35 // https://en.wikipedia.org/wiki/Projection_(relational_algebra) 36 // 37 // NOTE: 38 // 1. The number of "projectionWorker" is controlled by the global stochastik 39 // variable "milevadb_projection_concurrency". 40 // 2. Unparallel version is used when one of the following situations occurs: 41 // a. "milevadb_projection_concurrency" is set to 0. 42 // b. The estimated input size is smaller than "milevadb_max_chunk_size". 43 // c. This projection can not be executed vectorially. 44 45 type projectionInput struct { 46 chk *chunk.Chunk 47 targetWorker *projectionWorker 48 } 49 50 type projectionOutput struct { 51 chk *chunk.Chunk 52 done chan error 53 } 54 55 // ProjectionInterDirc implements the physical Projection Operator: 56 // https://en.wikipedia.org/wiki/Projection_(relational_algebra) 57 type ProjectionInterDirc struct { 58 baseInterlockingDirectorate 59 60 evaluatorSuit *memex.EvaluatorSuite 61 62 finishCh chan struct{} 63 outputCh chan *projectionOutput 64 fetcher projectionInputFetcher 65 numWorkers int64 66 workers []*projectionWorker 67 childResult *chunk.Chunk 68 69 // parentReqEvents indicates how many rows the parent interlock is 70 // requiring. It is set when parallelInterDircute() is called and used by the 71 // concurrent projectionInputFetcher. 72 // 73 // NOTE: It should be protected by atomic operations. 74 parentReqEvents int64 75 76 memTracker *memory.Tracker 77 wg sync.WaitGroup 78 79 calculateNoDelay bool 80 prepared bool 81 } 82 83 // Open implements the InterlockingDirectorate Open interface. 84 func (e *ProjectionInterDirc) Open(ctx context.Context) error { 85 if err := e.baseInterlockingDirectorate.Open(ctx); err != nil { 86 return err 87 } 88 return e.open(ctx) 89 } 90 91 func (e *ProjectionInterDirc) open(ctx context.Context) error { 92 e.prepared = false 93 e.parentReqEvents = int64(e.maxChunkSize) 94 95 e.memTracker = memory.NewTracker(e.id, -1) 96 e.memTracker.AttachTo(e.ctx.GetStochastikVars().StmtCtx.MemTracker) 97 98 // For now a Projection can not be executed vectorially only because it 99 // contains "SetVar" or "GetVar" functions, in this scenario this 100 // Projection can not be executed parallelly. 101 if e.numWorkers > 0 && !e.evaluatorSuit.Vectorizable() { 102 e.numWorkers = 0 103 } 104 105 if e.isUnparallelInterDirc() { 106 e.childResult = newFirstChunk(e.children[0]) 107 e.memTracker.Consume(e.childResult.MemoryUsage()) 108 } 109 110 return nil 111 } 112 113 // Next implements the InterlockingDirectorate Next interface. 114 // 115 // Here we explain the execution flow of the parallel projection implementation. 116 // There are 3 main components: 117 // 1. "projectionInputFetcher": Fetch input "Chunk" from child. 118 // 2. "projectionWorker": Do the projection work. 119 // 3. "ProjectionInterDirc.Next": Return result to parent. 120 // 121 // 1. "projectionInputFetcher" gets its input and output resources from its 122 // "inputCh" and "outputCh" channel, once the input and output resources are 123 // abtained, it fetches child's result into "input.chk" and: 124 // a. Dispatches this input to the worker specified in "input.targetWorker" 125 // b. Dispatches this output to the main thread: "ProjectionInterDirc.Next" 126 // c. Dispatches this output to the worker specified in "input.targetWorker" 127 // It is finished and exited once: 128 // a. There is no more input from child. 129 // b. "ProjectionInterDirc" close the "globalFinishCh" 130 // 131 // 2. "projectionWorker" gets its input and output resources from its 132 // "inputCh" and "outputCh" channel, once the input and output resources are 133 // abtained, it calculates the projection result use "input.chk" as the input 134 // and "output.chk" as the output, once the calculation is done, it: 135 // a. Sends "nil" or error to "output.done" to mark this input is finished. 136 // b. Returns the "input" resource to "projectionInputFetcher.inputCh" 137 // They are finished and exited once: 138 // a. "ProjectionInterDirc" closes the "globalFinishCh" 139 // 140 // 3. "ProjectionInterDirc.Next" gets its output resources from its "outputCh" channel. 141 // After receiving an output from "outputCh", it should wait to receive a "nil" 142 // or error from "output.done" channel. Once a "nil" or error is received: 143 // a. Returns this output to its parent 144 // b. Returns the "output" resource to "projectionInputFetcher.outputCh" 145 // 146 // +-----------+----------------------+--------------------------+ 147 // | | | | 148 // | +--------+---------+ +--------+---------+ +--------+---------+ 149 // | | projectionWorker | + projectionWorker | ... + projectionWorker | 150 // | +------------------+ +------------------+ +------------------+ 151 // | ^ ^ ^ ^ ^ ^ 152 // | | | | | | | 153 // | inputCh outputCh inputCh outputCh inputCh outputCh 154 // | ^ ^ ^ ^ ^ ^ 155 // | | | | | | | 156 // | | | 157 // | | +----------------->outputCh 158 // | | | | 159 // | | | v 160 // | +-------+-------+--------+ +---------------------+ 161 // | | projectionInputFetcher | | ProjectionInterDirc.Next | 162 // | +------------------------+ +---------+-----------+ 163 // | ^ ^ | 164 // | | | | 165 // | inputCh outputCh | 166 // | ^ ^ | 167 // | | | | 168 // +------------------------------+ +----------------------+ 169 // 170 func (e *ProjectionInterDirc) Next(ctx context.Context, req *chunk.Chunk) error { 171 req.GrowAndReset(e.maxChunkSize) 172 if e.isUnparallelInterDirc() { 173 return e.unParallelInterDircute(ctx, req) 174 } 175 return e.parallelInterDircute(ctx, req) 176 177 } 178 179 func (e *ProjectionInterDirc) isUnparallelInterDirc() bool { 180 return e.numWorkers <= 0 181 } 182 183 func (e *ProjectionInterDirc) unParallelInterDircute(ctx context.Context, chk *chunk.Chunk) error { 184 // transmit the requiredEvents 185 e.childResult.SetRequiredEvents(chk.RequiredEvents(), e.maxChunkSize) 186 mSize := e.childResult.MemoryUsage() 187 err := Next(ctx, e.children[0], e.childResult) 188 e.memTracker.Consume(e.childResult.MemoryUsage() - mSize) 189 if err != nil { 190 return err 191 } 192 if e.childResult.NumEvents() == 0 { 193 return nil 194 } 195 err = e.evaluatorSuit.Run(e.ctx, e.childResult, chk) 196 return err 197 } 198 199 func (e *ProjectionInterDirc) parallelInterDircute(ctx context.Context, chk *chunk.Chunk) error { 200 atomic.StoreInt64(&e.parentReqEvents, int64(chk.RequiredEvents())) 201 if !e.prepared { 202 e.prepare(ctx) 203 e.prepared = true 204 } 205 206 output, ok := <-e.outputCh 207 if !ok { 208 return nil 209 } 210 211 err := <-output.done 212 if err != nil { 213 return err 214 } 215 mSize := output.chk.MemoryUsage() 216 chk.SwapDeferredCausets(output.chk) 217 e.memTracker.Consume(output.chk.MemoryUsage() - mSize) 218 e.fetcher.outputCh <- output 219 return nil 220 } 221 222 func (e *ProjectionInterDirc) prepare(ctx context.Context) { 223 e.finishCh = make(chan struct{}) 224 e.outputCh = make(chan *projectionOutput, e.numWorkers) 225 226 // Initialize projectionInputFetcher. 227 e.fetcher = projectionInputFetcher{ 228 proj: e, 229 child: e.children[0], 230 globalFinishCh: e.finishCh, 231 globalOutputCh: e.outputCh, 232 inputCh: make(chan *projectionInput, e.numWorkers), 233 outputCh: make(chan *projectionOutput, e.numWorkers), 234 } 235 236 // Initialize projectionWorker. 237 e.workers = make([]*projectionWorker, 0, e.numWorkers) 238 for i := int64(0); i < e.numWorkers; i++ { 239 e.workers = append(e.workers, &projectionWorker{ 240 proj: e, 241 sctx: e.ctx, 242 evaluatorSuit: e.evaluatorSuit, 243 globalFinishCh: e.finishCh, 244 inputGiveBackCh: e.fetcher.inputCh, 245 inputCh: make(chan *projectionInput, 1), 246 outputCh: make(chan *projectionOutput, 1), 247 }) 248 249 inputChk := newFirstChunk(e.children[0]) 250 e.memTracker.Consume(inputChk.MemoryUsage()) 251 e.fetcher.inputCh <- &projectionInput{ 252 chk: inputChk, 253 targetWorker: e.workers[i], 254 } 255 256 outputChk := newFirstChunk(e) 257 e.memTracker.Consume(outputChk.MemoryUsage()) 258 e.fetcher.outputCh <- &projectionOutput{ 259 chk: outputChk, 260 done: make(chan error, 1), 261 } 262 } 263 264 e.wg.Add(1) 265 go e.fetcher.run(ctx) 266 267 for i := range e.workers { 268 e.wg.Add(1) 269 go e.workers[i].run(ctx) 270 } 271 } 272 273 func (e *ProjectionInterDirc) drainInputCh(ch chan *projectionInput) { 274 close(ch) 275 for item := range ch { 276 if item.chk != nil { 277 e.memTracker.Consume(-item.chk.MemoryUsage()) 278 } 279 } 280 } 281 282 func (e *ProjectionInterDirc) drainOutputCh(ch chan *projectionOutput) { 283 close(ch) 284 for item := range ch { 285 if item.chk != nil { 286 e.memTracker.Consume(-item.chk.MemoryUsage()) 287 } 288 } 289 } 290 291 // Close implements the InterlockingDirectorate Close interface. 292 func (e *ProjectionInterDirc) Close() error { 293 if e.isUnparallelInterDirc() { 294 e.memTracker.Consume(-e.childResult.MemoryUsage()) 295 e.childResult = nil 296 } 297 if e.prepared { 298 close(e.finishCh) 299 e.wg.Wait() // Wait for fetcher and workers to finish and exit. 300 301 // clear fetcher 302 e.drainInputCh(e.fetcher.inputCh) 303 e.drainOutputCh(e.fetcher.outputCh) 304 305 // clear workers 306 for _, w := range e.workers { 307 e.drainInputCh(w.inputCh) 308 e.drainOutputCh(w.outputCh) 309 } 310 } 311 if e.baseInterlockingDirectorate.runtimeStats != nil { 312 runtimeStats := &execdetails.RuntimeStatsWithConcurrencyInfo{} 313 if e.isUnparallelInterDirc() { 314 runtimeStats.SetConcurrencyInfo(execdetails.NewConcurrencyInfo("Concurrency", 0)) 315 } else { 316 runtimeStats.SetConcurrencyInfo(execdetails.NewConcurrencyInfo("Concurrency", int(e.numWorkers))) 317 } 318 e.ctx.GetStochastikVars().StmtCtx.RuntimeStatsDefCausl.RegisterStats(e.id, runtimeStats) 319 } 320 return e.baseInterlockingDirectorate.Close() 321 } 322 323 type projectionInputFetcher struct { 324 proj *ProjectionInterDirc 325 child InterlockingDirectorate 326 globalFinishCh <-chan struct{} 327 globalOutputCh chan<- *projectionOutput 328 wg sync.WaitGroup 329 330 inputCh chan *projectionInput 331 outputCh chan *projectionOutput 332 } 333 334 // run gets projectionInputFetcher's input and output resources from its 335 // "inputCh" and "outputCh" channel, once the input and output resources are 336 // abtained, it fetches child's result into "input.chk" and: 337 // a. Dispatches this input to the worker specified in "input.targetWorker" 338 // b. Dispatches this output to the main thread: "ProjectionInterDirc.Next" 339 // c. Dispatches this output to the worker specified in "input.targetWorker" 340 // 341 // It is finished and exited once: 342 // a. There is no more input from child. 343 // b. "ProjectionInterDirc" close the "globalFinishCh" 344 func (f *projectionInputFetcher) run(ctx context.Context) { 345 defer trace.StartRegion(ctx, "ProjectionFetcher").End() 346 var output *projectionOutput 347 defer func() { 348 if r := recover(); r != nil { 349 recoveryProjection(output, r) 350 } 351 close(f.globalOutputCh) 352 f.proj.wg.Done() 353 }() 354 355 for { 356 input := readProjectionInput(f.inputCh, f.globalFinishCh) 357 if input == nil { 358 return 359 } 360 targetWorker := input.targetWorker 361 362 output = readProjectionOutput(f.outputCh, f.globalFinishCh) 363 if output == nil { 364 f.proj.memTracker.Consume(-input.chk.MemoryUsage()) 365 return 366 } 367 368 f.globalOutputCh <- output 369 370 requiredEvents := atomic.LoadInt64(&f.proj.parentReqEvents) 371 input.chk.SetRequiredEvents(int(requiredEvents), f.proj.maxChunkSize) 372 mSize := input.chk.MemoryUsage() 373 err := Next(ctx, f.child, input.chk) 374 f.proj.memTracker.Consume(input.chk.MemoryUsage() - mSize) 375 if err != nil || input.chk.NumEvents() == 0 { 376 output.done <- err 377 f.proj.memTracker.Consume(-input.chk.MemoryUsage()) 378 return 379 } 380 381 targetWorker.inputCh <- input 382 targetWorker.outputCh <- output 383 } 384 } 385 386 type projectionWorker struct { 387 proj *ProjectionInterDirc 388 sctx stochastikctx.Context 389 evaluatorSuit *memex.EvaluatorSuite 390 globalFinishCh <-chan struct{} 391 inputGiveBackCh chan<- *projectionInput 392 393 // channel "input" and "output" is : 394 // a. initialized by "ProjectionInterDirc.prepare" 395 // b. written by "projectionInputFetcher.run" 396 // c. read by "projectionWorker.run" 397 inputCh chan *projectionInput 398 outputCh chan *projectionOutput 399 } 400 401 // run gets projectionWorker's input and output resources from its 402 // "inputCh" and "outputCh" channel, once the input and output resources are 403 // abtained, it calculate the projection result use "input.chk" as the input 404 // and "output.chk" as the output, once the calculation is done, it: 405 // a. Sends "nil" or error to "output.done" to mark this input is finished. 406 // b. Returns the "input" resource to "projectionInputFetcher.inputCh". 407 // 408 // It is finished and exited once: 409 // a. "ProjectionInterDirc" closes the "globalFinishCh". 410 func (w *projectionWorker) run(ctx context.Context) { 411 defer trace.StartRegion(ctx, "ProjectionWorker").End() 412 var output *projectionOutput 413 defer func() { 414 if r := recover(); r != nil { 415 recoveryProjection(output, r) 416 } 417 w.proj.wg.Done() 418 }() 419 for { 420 input := readProjectionInput(w.inputCh, w.globalFinishCh) 421 if input == nil { 422 return 423 } 424 425 output = readProjectionOutput(w.outputCh, w.globalFinishCh) 426 if output == nil { 427 return 428 } 429 430 mSize := output.chk.MemoryUsage() + input.chk.MemoryUsage() 431 err := w.evaluatorSuit.Run(w.sctx, input.chk, output.chk) 432 w.proj.memTracker.Consume(output.chk.MemoryUsage() + input.chk.MemoryUsage() - mSize) 433 output.done <- err 434 435 if err != nil { 436 return 437 } 438 439 w.inputGiveBackCh <- input 440 } 441 } 442 443 func recoveryProjection(output *projectionOutput, r interface{}) { 444 if output != nil { 445 output.done <- errors.Errorf("%v", r) 446 } 447 buf := soliton.GetStack() 448 logutil.BgLogger().Error("projection interlock panicked", zap.String("error", fmt.Sprintf("%v", r)), zap.String("stack", string(buf))) 449 } 450 451 func readProjectionInput(inputCh <-chan *projectionInput, finishCh <-chan struct{}) *projectionInput { 452 select { 453 case <-finishCh: 454 return nil 455 case input, ok := <-inputCh: 456 if !ok { 457 return nil 458 } 459 return input 460 } 461 } 462 463 func readProjectionOutput(outputCh <-chan *projectionOutput, finishCh <-chan struct{}) *projectionOutput { 464 select { 465 case <-finishCh: 466 return nil 467 case output, ok := <-outputCh: 468 if !ok { 469 return nil 470 } 471 return output 472 } 473 }