github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/interlock/parallel_apply.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package interlock 15 16 import ( 17 "context" 18 "runtime/trace" 19 "sync" 20 "sync/atomic" 21 22 "github.com/whtcorpsinc/errors" 23 "github.com/whtcorpsinc/BerolinaSQL/terror" 24 "github.com/whtcorpsinc/milevadb/memex" 25 "github.com/whtcorpsinc/milevadb/soliton/chunk" 26 "github.com/whtcorpsinc/milevadb/soliton/codec" 27 "github.com/whtcorpsinc/milevadb/soliton/execdetails" 28 "github.com/whtcorpsinc/milevadb/soliton/logutil" 29 "github.com/whtcorpsinc/milevadb/soliton/memory" 30 "go.uber.org/zap" 31 ) 32 33 type result struct { 34 chk *chunk.Chunk 35 err error 36 } 37 38 type outerEvent struct { 39 event *chunk.Event 40 selected bool // if this event is selected by the outer side 41 } 42 43 // ParallelNestedLoopApplyInterDirc is the interlock for apply. 44 type ParallelNestedLoopApplyInterDirc struct { 45 baseInterlockingDirectorate 46 47 // outer-side fields 48 cursor int 49 outerInterDirc InterlockingDirectorate 50 outerFilter memex.CNFExprs 51 outerList *chunk.List 52 outerEventMutex sync.Mutex 53 outer bool 54 55 // inner-side fields 56 // use slices since the inner side is paralleled 57 corDefCauss [][]*memex.CorrelatedDeferredCauset 58 innerFilter []memex.CNFExprs 59 innerInterDircs []InterlockingDirectorate 60 innerList []*chunk.List 61 innerChunk []*chunk.Chunk 62 innerSelected [][]bool 63 innerIter []chunk.Iterator 64 outerEvent []*chunk.Event 65 hasMatch []bool 66 hasNull []bool 67 joiners []joiner 68 69 // fields about concurrency control 70 concurrency int 71 started uint32 72 freeChkCh chan *chunk.Chunk 73 resultChkCh chan result 74 outerEventCh chan outerEvent 75 exit chan struct{} 76 workerWg sync.WaitGroup 77 notifyWg sync.WaitGroup 78 79 // fields about cache 80 cache *applyCache 81 useCache bool 82 cacheHitCounter int64 83 cacheAccessCounter int64 84 cacheLock sync.RWMutex 85 86 memTracker *memory.Tracker // track memory usage. 87 } 88 89 // Open implements the InterlockingDirectorate interface. 90 func (e *ParallelNestedLoopApplyInterDirc) Open(ctx context.Context) error { 91 err := e.outerInterDirc.Open(ctx) 92 if err != nil { 93 return err 94 } 95 e.memTracker = memory.NewTracker(e.id, -1) 96 e.memTracker.AttachTo(e.ctx.GetStochastikVars().StmtCtx.MemTracker) 97 98 e.outerList = chunk.NewList(retTypes(e.outerInterDirc), e.initCap, e.maxChunkSize) 99 e.outerList.GetMemTracker().SetLabel(memory.LabelForOuterList) 100 e.outerList.GetMemTracker().AttachTo(e.memTracker) 101 102 e.innerList = make([]*chunk.List, e.concurrency) 103 e.innerChunk = make([]*chunk.Chunk, e.concurrency) 104 e.innerSelected = make([][]bool, e.concurrency) 105 e.innerIter = make([]chunk.Iterator, e.concurrency) 106 e.outerEvent = make([]*chunk.Event, e.concurrency) 107 e.hasMatch = make([]bool, e.concurrency) 108 e.hasNull = make([]bool, e.concurrency) 109 for i := 0; i < e.concurrency; i++ { 110 e.innerChunk[i] = newFirstChunk(e.innerInterDircs[i]) 111 e.innerList[i] = chunk.NewList(retTypes(e.innerInterDircs[i]), e.initCap, e.maxChunkSize) 112 e.innerList[i].GetMemTracker().SetLabel(memory.LabelForInnerList) 113 e.innerList[i].GetMemTracker().AttachTo(e.memTracker) 114 } 115 116 e.freeChkCh = make(chan *chunk.Chunk, e.concurrency) 117 e.resultChkCh = make(chan result, e.concurrency+1) // innerWorkers + outerWorker 118 e.outerEventCh = make(chan outerEvent) 119 e.exit = make(chan struct{}) 120 for i := 0; i < e.concurrency; i++ { 121 e.freeChkCh <- newFirstChunk(e) 122 } 123 124 if e.useCache { 125 if e.cache, err = newApplyCache(e.ctx); err != nil { 126 return err 127 } 128 e.cache.GetMemTracker().AttachTo(e.memTracker) 129 } 130 return nil 131 } 132 133 // Next implements the InterlockingDirectorate interface. 134 func (e *ParallelNestedLoopApplyInterDirc) Next(ctx context.Context, req *chunk.Chunk) (err error) { 135 if atomic.CompareAndSwapUint32(&e.started, 0, 1) { 136 e.workerWg.Add(1) 137 go e.outerWorker(ctx) 138 for i := 0; i < e.concurrency; i++ { 139 e.workerWg.Add(1) 140 workID := i 141 go e.innerWorker(ctx, workID) 142 } 143 e.notifyWg.Add(1) 144 go e.notifyWorker(ctx) 145 } 146 result := <-e.resultChkCh 147 if result.err != nil { 148 return result.err 149 } 150 if result.chk == nil { // no more data 151 req.Reset() 152 return nil 153 } 154 req.SwapDeferredCausets(result.chk) 155 e.freeChkCh <- result.chk 156 return nil 157 } 158 159 // Close implements the InterlockingDirectorate interface. 160 func (e *ParallelNestedLoopApplyInterDirc) Close() error { 161 e.memTracker = nil 162 err := e.outerInterDirc.Close() 163 if atomic.LoadUint32(&e.started) == 1 { 164 close(e.exit) 165 e.notifyWg.Wait() 166 e.started = 0 167 } 168 169 if e.runtimeStats != nil { 170 runtimeStats := newJoinRuntimeStats() 171 e.ctx.GetStochastikVars().StmtCtx.RuntimeStatsDefCausl.RegisterStats(e.id, runtimeStats) 172 if e.useCache { 173 var hitRatio float64 174 if e.cacheAccessCounter > 0 { 175 hitRatio = float64(e.cacheHitCounter) / float64(e.cacheAccessCounter) 176 } 177 runtimeStats.setCacheInfo(true, hitRatio) 178 } else { 179 runtimeStats.setCacheInfo(false, 0) 180 } 181 runtimeStats.SetConcurrencyInfo(execdetails.NewConcurrencyInfo("Concurrency", e.concurrency)) 182 } 183 return err 184 } 185 186 // notifyWorker waits for all inner/outer-workers finishing and then put an empty 187 // chunk into the resultCh to notify the upper interlock there is no more data. 188 func (e *ParallelNestedLoopApplyInterDirc) notifyWorker(ctx context.Context) { 189 defer e.handleWorkerPanic(ctx, &e.notifyWg) 190 e.workerWg.Wait() 191 e.putResult(nil, nil) 192 } 193 194 func (e *ParallelNestedLoopApplyInterDirc) outerWorker(ctx context.Context) { 195 defer trace.StartRegion(ctx, "ParallelApplyOuterWorker").End() 196 defer e.handleWorkerPanic(ctx, &e.workerWg) 197 var selected []bool 198 var err error 199 for { 200 chk := newFirstChunk(e.outerInterDirc) 201 if err := Next(ctx, e.outerInterDirc, chk); err != nil { 202 e.putResult(nil, err) 203 return 204 } 205 if chk.NumEvents() == 0 { 206 close(e.outerEventCh) 207 return 208 } 209 e.outerList.Add(chk) 210 outerIter := chunk.NewIterator4Chunk(chk) 211 selected, err = memex.VectorizedFilter(e.ctx, e.outerFilter, outerIter, selected) 212 if err != nil { 213 e.putResult(nil, err) 214 return 215 } 216 for i := 0; i < chk.NumEvents(); i++ { 217 event := chk.GetEvent(i) 218 select { 219 case e.outerEventCh <- outerEvent{&event, selected[i]}: 220 case <-e.exit: 221 return 222 } 223 } 224 } 225 } 226 227 func (e *ParallelNestedLoopApplyInterDirc) innerWorker(ctx context.Context, id int) { 228 defer trace.StartRegion(ctx, "ParallelApplyInnerWorker").End() 229 defer e.handleWorkerPanic(ctx, &e.workerWg) 230 for { 231 var chk *chunk.Chunk 232 select { 233 case chk = <-e.freeChkCh: 234 case <-e.exit: 235 return 236 } 237 err := e.fillInnerChunk(ctx, id, chk) 238 if err == nil && chk.NumEvents() == 0 { // no more data, this goroutine can exit 239 return 240 } 241 if e.putResult(chk, err) { 242 return 243 } 244 } 245 } 246 247 func (e *ParallelNestedLoopApplyInterDirc) putResult(chk *chunk.Chunk, err error) (exit bool) { 248 select { 249 case e.resultChkCh <- result{chk, err}: 250 return false 251 case <-e.exit: 252 return true 253 } 254 } 255 256 func (e *ParallelNestedLoopApplyInterDirc) handleWorkerPanic(ctx context.Context, wg *sync.WaitGroup) { 257 if r := recover(); r != nil { 258 err := errors.Errorf("%v", r) 259 logutil.Logger(ctx).Error("parallel nested loop join worker panicked", zap.Error(err), zap.Stack("stack")) 260 e.resultChkCh <- result{nil, err} 261 } 262 if wg != nil { 263 wg.Done() 264 } 265 } 266 267 // fetchAllInners reads all data from the inner causet and stores them in a List. 268 func (e *ParallelNestedLoopApplyInterDirc) fetchAllInners(ctx context.Context, id int) (err error) { 269 var key []byte 270 for _, defCaus := range e.corDefCauss[id] { 271 *defCaus.Data = e.outerEvent[id].GetCauset(defCaus.Index, defCaus.RetType) 272 if e.useCache { 273 if key, err = codec.EncodeKey(e.ctx.GetStochastikVars().StmtCtx, key, *defCaus.Data); err != nil { 274 return err 275 } 276 } 277 } 278 if e.useCache { // look up the cache 279 atomic.AddInt64(&e.cacheAccessCounter, 1) 280 e.cacheLock.RLock() 281 value, err := e.cache.Get(key) 282 e.cacheLock.RUnlock() 283 if err != nil { 284 return err 285 } 286 if value != nil { 287 e.innerList[id] = value 288 atomic.AddInt64(&e.cacheHitCounter, 1) 289 return nil 290 } 291 } 292 293 err = e.innerInterDircs[id].Open(ctx) 294 defer terror.Call(e.innerInterDircs[id].Close) 295 if err != nil { 296 return err 297 } 298 299 if e.useCache { 300 // create a new one in this case since it may be in the cache 301 e.innerList[id] = chunk.NewList(retTypes(e.innerInterDircs[id]), e.initCap, e.maxChunkSize) 302 } else { 303 e.innerList[id].Reset() 304 } 305 306 innerIter := chunk.NewIterator4Chunk(e.innerChunk[id]) 307 for { 308 err := Next(ctx, e.innerInterDircs[id], e.innerChunk[id]) 309 if err != nil { 310 return err 311 } 312 if e.innerChunk[id].NumEvents() == 0 { 313 break 314 } 315 316 e.innerSelected[id], err = memex.VectorizedFilter(e.ctx, e.innerFilter[id], innerIter, e.innerSelected[id]) 317 if err != nil { 318 return err 319 } 320 for event := innerIter.Begin(); event != innerIter.End(); event = innerIter.Next() { 321 if e.innerSelected[id][event.Idx()] { 322 e.innerList[id].AppendEvent(event) 323 } 324 } 325 } 326 327 if e.useCache { // uFIDelate the cache 328 e.cacheLock.Lock() 329 defer e.cacheLock.Unlock() 330 if _, err := e.cache.Set(key, e.innerList[id]); err != nil { 331 return err 332 } 333 } 334 return nil 335 } 336 337 func (e *ParallelNestedLoopApplyInterDirc) fetchNextOuterEvent(id int, req *chunk.Chunk) (event *chunk.Event, exit bool) { 338 for { 339 select { 340 case outerEvent, ok := <-e.outerEventCh: 341 if !ok { // no more data 342 return nil, false 343 } 344 if !outerEvent.selected { 345 if e.outer { 346 e.joiners[id].onMissMatch(false, *outerEvent.event, req) 347 if req.IsFull() { 348 return nil, false 349 } 350 } 351 continue // try the next outer event 352 } 353 return outerEvent.event, false 354 case <-e.exit: 355 return nil, true 356 } 357 } 358 } 359 360 func (e *ParallelNestedLoopApplyInterDirc) fillInnerChunk(ctx context.Context, id int, req *chunk.Chunk) (err error) { 361 req.Reset() 362 for { 363 if e.innerIter[id] == nil || e.innerIter[id].Current() == e.innerIter[id].End() { 364 if e.outerEvent[id] != nil && !e.hasMatch[id] { 365 e.joiners[id].onMissMatch(e.hasNull[id], *e.outerEvent[id], req) 366 } 367 var exit bool 368 e.outerEvent[id], exit = e.fetchNextOuterEvent(id, req) 369 if exit || req.IsFull() || e.outerEvent[id] == nil { 370 return nil 371 } 372 373 e.hasMatch[id] = false 374 e.hasNull[id] = false 375 376 err = e.fetchAllInners(ctx, id) 377 if err != nil { 378 return err 379 } 380 e.innerIter[id] = chunk.NewIterator4List(e.innerList[id]) 381 e.innerIter[id].Begin() 382 } 383 384 matched, isNull, err := e.joiners[id].tryToMatchInners(*e.outerEvent[id], e.innerIter[id], req) 385 e.hasMatch[id] = e.hasMatch[id] || matched 386 e.hasNull[id] = e.hasNull[id] || isNull 387 388 if err != nil || req.IsFull() { 389 return err 390 } 391 } 392 }