github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/interlock/sort.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package interlock 15 16 import ( 17 "container/heap" 18 "context" 19 "errors" 20 "sort" 21 22 "github.com/whtcorpsinc/failpoint" 23 causetembedded "github.com/whtcorpsinc/milevadb/causet/embedded" 24 "github.com/whtcorpsinc/milevadb/causet/soliton" 25 "github.com/whtcorpsinc/milevadb/config" 26 "github.com/whtcorpsinc/milevadb/memex" 27 "github.com/whtcorpsinc/milevadb/soliton/chunk" 28 "github.com/whtcorpsinc/milevadb/soliton/disk" 29 "github.com/whtcorpsinc/milevadb/soliton/memory" 30 "github.com/whtcorpsinc/milevadb/types" 31 ) 32 33 // SortInterDirc represents sorting interlock. 34 type SortInterDirc struct { 35 baseInterlockingDirectorate 36 37 ByItems []*soliton.ByItems 38 Idx int 39 fetched bool 40 schemaReplicant *memex.Schema 41 42 keyExprs []memex.Expression 43 keyTypes []*types.FieldType 44 // keyDeferredCausets is the defCausumn index of the by items. 45 keyDeferredCausets []int 46 // keyCmpFuncs is used to compare each ByItem. 47 keyCmpFuncs []chunk.CompareFunc 48 // rowChunks is the chunks to causetstore event values. 49 rowChunks *chunk.SortedEventContainer 50 51 memTracker *memory.Tracker 52 diskTracker *disk.Tracker 53 54 // partitionList is the chunks to causetstore event values for partitions. Every partition is a sorted list. 55 partitionList []*chunk.SortedEventContainer 56 57 // multiWayMerge uses multi-way merge for spill disk. 58 // The multi-way merge algorithm can refer to https://en.wikipedia.org/wiki/K-way_merge_algorithm 59 multiWayMerge *multiWayMerge 60 // spillCausetAction save the CausetAction for spill disk. 61 spillCausetAction *chunk.SortAndSpillDiskCausetAction 62 } 63 64 // Close implements the InterlockingDirectorate Close interface. 65 func (e *SortInterDirc) Close() error { 66 for _, container := range e.partitionList { 67 err := container.Close() 68 if err != nil { 69 return err 70 } 71 } 72 e.partitionList = e.partitionList[:0] 73 74 if e.rowChunks != nil { 75 e.memTracker.Consume(-e.rowChunks.GetMemTracker().BytesConsumed()) 76 e.rowChunks = nil 77 } 78 e.memTracker = nil 79 e.diskTracker = nil 80 e.multiWayMerge = nil 81 e.spillCausetAction = nil 82 return e.children[0].Close() 83 } 84 85 // Open implements the InterlockingDirectorate Open interface. 86 func (e *SortInterDirc) Open(ctx context.Context) error { 87 e.fetched = false 88 e.Idx = 0 89 90 // To avoid duplicated initialization for TopNInterDirc. 91 if e.memTracker == nil { 92 e.memTracker = memory.NewTracker(e.id, -1) 93 e.memTracker.AttachTo(e.ctx.GetStochastikVars().StmtCtx.MemTracker) 94 e.diskTracker = memory.NewTracker(e.id, -1) 95 e.diskTracker.AttachTo(e.ctx.GetStochastikVars().StmtCtx.DiskTracker) 96 } 97 e.partitionList = e.partitionList[:0] 98 return e.children[0].Open(ctx) 99 } 100 101 // Next implements the InterlockingDirectorate Next interface. 102 // Sort constructs the result following these step: 103 // 1. Read as mush as rows into memory. 104 // 2. If memory quota is triggered, sort these rows in memory and put them into disk as partition 1, then reset 105 // the memory quota trigger and return to step 1 106 // 3. If memory quota is not triggered and child is consumed, sort these rows in memory as partition N. 107 // 4. Merge sort if the count of partitions is larger than 1. If there is only one partition in step 4, it works 108 // just like in-memory sort before. 109 func (e *SortInterDirc) Next(ctx context.Context, req *chunk.Chunk) error { 110 req.Reset() 111 if !e.fetched { 112 e.initCompareFuncs() 113 e.buildKeyDeferredCausets() 114 err := e.fetchEventChunks(ctx) 115 if err != nil { 116 return err 117 } 118 e.fetched = true 119 } 120 121 if len(e.partitionList) == 0 { 122 return nil 123 } 124 if len(e.partitionList) > 1 { 125 if err := e.externalSorting(req); err != nil { 126 return err 127 } 128 } else { 129 for !req.IsFull() && e.Idx < e.partitionList[0].NumEvent() { 130 event, err := e.partitionList[0].GetSortedEvent(e.Idx) 131 if err != nil { 132 return err 133 } 134 req.AppendEvent(event) 135 e.Idx++ 136 } 137 } 138 return nil 139 } 140 141 type partitionPointer struct { 142 event chunk.Event 143 partitionID int 144 consumed int 145 } 146 147 type multiWayMerge struct { 148 lessEventFunction func(rowI chunk.Event, rowJ chunk.Event) bool 149 elements []partitionPointer 150 } 151 152 func (h *multiWayMerge) Less(i, j int) bool { 153 rowI := h.elements[i].event 154 rowJ := h.elements[j].event 155 return h.lessEventFunction(rowI, rowJ) 156 } 157 158 func (h *multiWayMerge) Len() int { 159 return len(h.elements) 160 } 161 162 func (h *multiWayMerge) Push(x interface{}) { 163 // Should never be called. 164 } 165 166 func (h *multiWayMerge) Pop() interface{} { 167 h.elements = h.elements[:len(h.elements)-1] 168 return nil 169 } 170 171 func (h *multiWayMerge) Swap(i, j int) { 172 h.elements[i], h.elements[j] = h.elements[j], h.elements[i] 173 } 174 175 func (e *SortInterDirc) externalSorting(req *chunk.Chunk) (err error) { 176 if e.multiWayMerge == nil { 177 e.multiWayMerge = &multiWayMerge{e.lessEvent, make([]partitionPointer, 0, len(e.partitionList))} 178 for i := 0; i < len(e.partitionList); i++ { 179 event, err := e.partitionList[i].GetSortedEvent(0) 180 if err != nil { 181 return err 182 } 183 e.multiWayMerge.elements = append(e.multiWayMerge.elements, partitionPointer{event: event, partitionID: i, consumed: 0}) 184 } 185 heap.Init(e.multiWayMerge) 186 } 187 188 for !req.IsFull() && e.multiWayMerge.Len() > 0 { 189 partitionPtr := e.multiWayMerge.elements[0] 190 req.AppendEvent(partitionPtr.event) 191 partitionPtr.consumed++ 192 if partitionPtr.consumed >= e.partitionList[partitionPtr.partitionID].NumEvent() { 193 heap.Remove(e.multiWayMerge, 0) 194 continue 195 } 196 partitionPtr.event, err = e.partitionList[partitionPtr.partitionID]. 197 GetSortedEvent(partitionPtr.consumed) 198 if err != nil { 199 return err 200 } 201 e.multiWayMerge.elements[0] = partitionPtr 202 heap.Fix(e.multiWayMerge, 0) 203 } 204 return nil 205 } 206 207 func (e *SortInterDirc) fetchEventChunks(ctx context.Context) error { 208 fields := retTypes(e) 209 byItemsDesc := make([]bool, len(e.ByItems)) 210 for i, byItem := range e.ByItems { 211 byItemsDesc[i] = byItem.Desc 212 } 213 e.rowChunks = chunk.NewSortedEventContainer(fields, e.maxChunkSize, byItemsDesc, e.keyDeferredCausets, e.keyCmpFuncs) 214 e.rowChunks.GetMemTracker().AttachTo(e.memTracker) 215 e.rowChunks.GetMemTracker().SetLabel(memory.LabelForEventChunks) 216 if config.GetGlobalConfig().OOMUseTmpStorage { 217 e.spillCausetAction = e.rowChunks.CausetActionSpill() 218 failpoint.Inject("testSortedEventContainerSpill", func(val failpoint.Value) { 219 if val.(bool) { 220 e.spillCausetAction = e.rowChunks.CausetActionSpillForTest() 221 defer e.spillCausetAction.WaitForTest() 222 } 223 }) 224 e.ctx.GetStochastikVars().StmtCtx.MemTracker.FallbackOldAndSetNewCausetAction(e.spillCausetAction) 225 e.rowChunks.GetDiskTracker().AttachTo(e.diskTracker) 226 e.rowChunks.GetDiskTracker().SetLabel(memory.LabelForEventChunks) 227 } 228 for { 229 chk := newFirstChunk(e.children[0]) 230 err := Next(ctx, e.children[0], chk) 231 if err != nil { 232 return err 233 } 234 rowCount := chk.NumEvents() 235 if rowCount == 0 { 236 break 237 } 238 if err := e.rowChunks.Add(chk); err != nil { 239 if errors.Is(err, chunk.ErrCannotAddBecauseSorted) { 240 e.partitionList = append(e.partitionList, e.rowChunks) 241 e.rowChunks = chunk.NewSortedEventContainer(fields, e.maxChunkSize, byItemsDesc, e.keyDeferredCausets, e.keyCmpFuncs) 242 e.rowChunks.GetMemTracker().AttachTo(e.memTracker) 243 e.rowChunks.GetMemTracker().SetLabel(memory.LabelForEventChunks) 244 e.rowChunks.GetDiskTracker().AttachTo(e.diskTracker) 245 e.rowChunks.GetDiskTracker().SetLabel(memory.LabelForEventChunks) 246 e.spillCausetAction = e.rowChunks.CausetActionSpill() 247 failpoint.Inject("testSortedEventContainerSpill", func(val failpoint.Value) { 248 if val.(bool) { 249 e.spillCausetAction = e.rowChunks.CausetActionSpillForTest() 250 defer e.spillCausetAction.WaitForTest() 251 } 252 }) 253 e.ctx.GetStochastikVars().StmtCtx.MemTracker.FallbackOldAndSetNewCausetAction(e.spillCausetAction) 254 err = e.rowChunks.Add(chk) 255 } 256 if err != nil { 257 return err 258 } 259 } 260 } 261 if e.rowChunks.NumEvent() > 0 { 262 e.rowChunks.Sort() 263 e.partitionList = append(e.partitionList, e.rowChunks) 264 } 265 return nil 266 } 267 268 func (e *SortInterDirc) initCompareFuncs() { 269 e.keyCmpFuncs = make([]chunk.CompareFunc, len(e.ByItems)) 270 for i := range e.ByItems { 271 keyType := e.ByItems[i].Expr.GetType() 272 e.keyCmpFuncs[i] = chunk.GetCompareFunc(keyType) 273 } 274 } 275 276 func (e *SortInterDirc) buildKeyDeferredCausets() { 277 e.keyDeferredCausets = make([]int, 0, len(e.ByItems)) 278 for _, by := range e.ByItems { 279 defCaus := by.Expr.(*memex.DeferredCauset) 280 e.keyDeferredCausets = append(e.keyDeferredCausets, defCaus.Index) 281 } 282 } 283 284 func (e *SortInterDirc) lessEvent(rowI, rowJ chunk.Event) bool { 285 for i, defCausIdx := range e.keyDeferredCausets { 286 cmpFunc := e.keyCmpFuncs[i] 287 cmp := cmpFunc(rowI, defCausIdx, rowJ, defCausIdx) 288 if e.ByItems[i].Desc { 289 cmp = -cmp 290 } 291 if cmp < 0 { 292 return true 293 } else if cmp > 0 { 294 return false 295 } 296 } 297 return false 298 } 299 300 // TopNInterDirc implements a Top-N algorithm and it is built from a SELECT memex with ORDER BY and LIMIT. 301 // Instead of sorting all the rows fetched from the causet, it keeps the Top-N elements only in a heap to reduce memory usage. 302 type TopNInterDirc struct { 303 SortInterDirc 304 limit *causetembedded.PhysicalLimit 305 totalLimit uint64 306 307 // rowChunks is the chunks to causetstore event values. 308 rowChunks *chunk.List 309 // rowPointer causetstore the chunk index and event index for each event. 310 rowPtrs []chunk.EventPtr 311 312 chkHeap *topNChunkHeap 313 } 314 315 // topNChunkHeap implements heap.Interface. 316 type topNChunkHeap struct { 317 *TopNInterDirc 318 } 319 320 // Less implement heap.Interface, but since we mantains a max heap, 321 // this function returns true if event i is greater than event j. 322 func (h *topNChunkHeap) Less(i, j int) bool { 323 rowI := h.rowChunks.GetEvent(h.rowPtrs[i]) 324 rowJ := h.rowChunks.GetEvent(h.rowPtrs[j]) 325 return h.greaterEvent(rowI, rowJ) 326 } 327 328 func (h *topNChunkHeap) greaterEvent(rowI, rowJ chunk.Event) bool { 329 for i, defCausIdx := range h.keyDeferredCausets { 330 cmpFunc := h.keyCmpFuncs[i] 331 cmp := cmpFunc(rowI, defCausIdx, rowJ, defCausIdx) 332 if h.ByItems[i].Desc { 333 cmp = -cmp 334 } 335 if cmp > 0 { 336 return true 337 } else if cmp < 0 { 338 return false 339 } 340 } 341 return false 342 } 343 344 func (h *topNChunkHeap) Len() int { 345 return len(h.rowPtrs) 346 } 347 348 func (h *topNChunkHeap) Push(x interface{}) { 349 // Should never be called. 350 } 351 352 func (h *topNChunkHeap) Pop() interface{} { 353 h.rowPtrs = h.rowPtrs[:len(h.rowPtrs)-1] 354 // We don't need the popped value, return nil to avoid memory allocation. 355 return nil 356 } 357 358 func (h *topNChunkHeap) Swap(i, j int) { 359 h.rowPtrs[i], h.rowPtrs[j] = h.rowPtrs[j], h.rowPtrs[i] 360 } 361 362 // keyDeferredCausetsLess is the less function for key defCausumns. 363 func (e *TopNInterDirc) keyDeferredCausetsLess(i, j int) bool { 364 rowI := e.rowChunks.GetEvent(e.rowPtrs[i]) 365 rowJ := e.rowChunks.GetEvent(e.rowPtrs[j]) 366 return e.lessEvent(rowI, rowJ) 367 } 368 369 func (e *TopNInterDirc) initPointers() { 370 e.rowPtrs = make([]chunk.EventPtr, 0, e.rowChunks.Len()) 371 e.memTracker.Consume(int64(8 * e.rowChunks.Len())) 372 for chkIdx := 0; chkIdx < e.rowChunks.NumChunks(); chkIdx++ { 373 rowChk := e.rowChunks.GetChunk(chkIdx) 374 for rowIdx := 0; rowIdx < rowChk.NumEvents(); rowIdx++ { 375 e.rowPtrs = append(e.rowPtrs, chunk.EventPtr{ChkIdx: uint32(chkIdx), EventIdx: uint32(rowIdx)}) 376 } 377 } 378 } 379 380 // Open implements the InterlockingDirectorate Open interface. 381 func (e *TopNInterDirc) Open(ctx context.Context) error { 382 e.memTracker = memory.NewTracker(e.id, -1) 383 e.memTracker.AttachTo(e.ctx.GetStochastikVars().StmtCtx.MemTracker) 384 385 e.fetched = false 386 e.Idx = 0 387 388 return e.children[0].Open(ctx) 389 } 390 391 // Next implements the InterlockingDirectorate Next interface. 392 func (e *TopNInterDirc) Next(ctx context.Context, req *chunk.Chunk) error { 393 req.Reset() 394 if !e.fetched { 395 e.totalLimit = e.limit.Offset + e.limit.Count 396 e.Idx = int(e.limit.Offset) 397 err := e.loadChunksUntilTotalLimit(ctx) 398 if err != nil { 399 return err 400 } 401 err = e.executeTopN(ctx) 402 if err != nil { 403 return err 404 } 405 e.fetched = true 406 } 407 if e.Idx >= len(e.rowPtrs) { 408 return nil 409 } 410 for !req.IsFull() && e.Idx < len(e.rowPtrs) { 411 event := e.rowChunks.GetEvent(e.rowPtrs[e.Idx]) 412 req.AppendEvent(event) 413 e.Idx++ 414 } 415 return nil 416 } 417 418 func (e *TopNInterDirc) loadChunksUntilTotalLimit(ctx context.Context) error { 419 e.chkHeap = &topNChunkHeap{e} 420 e.rowChunks = chunk.NewList(retTypes(e), e.initCap, e.maxChunkSize) 421 e.rowChunks.GetMemTracker().AttachTo(e.memTracker) 422 e.rowChunks.GetMemTracker().SetLabel(memory.LabelForEventChunks) 423 for uint64(e.rowChunks.Len()) < e.totalLimit { 424 srcChk := newFirstChunk(e.children[0]) 425 // adjust required rows by total limit 426 srcChk.SetRequiredEvents(int(e.totalLimit-uint64(e.rowChunks.Len())), e.maxChunkSize) 427 err := Next(ctx, e.children[0], srcChk) 428 if err != nil { 429 return err 430 } 431 if srcChk.NumEvents() == 0 { 432 break 433 } 434 e.rowChunks.Add(srcChk) 435 } 436 e.initPointers() 437 e.initCompareFuncs() 438 e.buildKeyDeferredCausets() 439 return nil 440 } 441 442 const topNCompactionFactor = 4 443 444 func (e *TopNInterDirc) executeTopN(ctx context.Context) error { 445 heap.Init(e.chkHeap) 446 for uint64(len(e.rowPtrs)) > e.totalLimit { 447 // The number of rows we loaded may exceeds total limit, remove greatest rows by Pop. 448 heap.Pop(e.chkHeap) 449 } 450 childEventChk := newFirstChunk(e.children[0]) 451 for { 452 err := Next(ctx, e.children[0], childEventChk) 453 if err != nil { 454 return err 455 } 456 if childEventChk.NumEvents() == 0 { 457 break 458 } 459 err = e.processChildChk(childEventChk) 460 if err != nil { 461 return err 462 } 463 if e.rowChunks.Len() > len(e.rowPtrs)*topNCompactionFactor { 464 err = e.doCompaction() 465 if err != nil { 466 return err 467 } 468 } 469 } 470 sort.Slice(e.rowPtrs, e.keyDeferredCausetsLess) 471 return nil 472 } 473 474 func (e *TopNInterDirc) processChildChk(childEventChk *chunk.Chunk) error { 475 for i := 0; i < childEventChk.NumEvents(); i++ { 476 heapMaxPtr := e.rowPtrs[0] 477 var heapMax, next chunk.Event 478 heapMax = e.rowChunks.GetEvent(heapMaxPtr) 479 next = childEventChk.GetEvent(i) 480 if e.chkHeap.greaterEvent(heapMax, next) { 481 // Evict heap max, keep the next event. 482 e.rowPtrs[0] = e.rowChunks.AppendEvent(childEventChk.GetEvent(i)) 483 heap.Fix(e.chkHeap, 0) 484 } 485 } 486 return nil 487 } 488 489 // doCompaction rebuild the chunks and event pointers to release memory. 490 // If we don't do compaction, in a extreme case like the child data is already ascending sorted 491 // but we want descending top N, then we will keep all data in memory. 492 // But if data is distributed randomly, this function will be called log(n) times. 493 func (e *TopNInterDirc) doCompaction() error { 494 newEventChunks := chunk.NewList(retTypes(e), e.initCap, e.maxChunkSize) 495 newEventPtrs := make([]chunk.EventPtr, 0, e.rowChunks.Len()) 496 for _, rowPtr := range e.rowPtrs { 497 newEventPtr := newEventChunks.AppendEvent(e.rowChunks.GetEvent(rowPtr)) 498 newEventPtrs = append(newEventPtrs, newEventPtr) 499 } 500 newEventChunks.GetMemTracker().SetLabel(memory.LabelForEventChunks) 501 e.memTracker.ReplaceChild(e.rowChunks.GetMemTracker(), newEventChunks.GetMemTracker()) 502 e.rowChunks = newEventChunks 503 504 e.memTracker.Consume(int64(-8 * len(e.rowPtrs))) 505 e.memTracker.Consume(int64(8 * len(newEventPtrs))) 506 e.rowPtrs = newEventPtrs 507 return nil 508 }