github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/interlock/sort.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package interlock
    15  
    16  import (
    17  	"container/heap"
    18  	"context"
    19  	"errors"
    20  	"sort"
    21  
    22  	"github.com/whtcorpsinc/failpoint"
    23  	causetembedded "github.com/whtcorpsinc/milevadb/causet/embedded"
    24  	"github.com/whtcorpsinc/milevadb/causet/soliton"
    25  	"github.com/whtcorpsinc/milevadb/config"
    26  	"github.com/whtcorpsinc/milevadb/memex"
    27  	"github.com/whtcorpsinc/milevadb/soliton/chunk"
    28  	"github.com/whtcorpsinc/milevadb/soliton/disk"
    29  	"github.com/whtcorpsinc/milevadb/soliton/memory"
    30  	"github.com/whtcorpsinc/milevadb/types"
    31  )
    32  
    33  // SortInterDirc represents sorting interlock.
    34  type SortInterDirc struct {
    35  	baseInterlockingDirectorate
    36  
    37  	ByItems         []*soliton.ByItems
    38  	Idx             int
    39  	fetched         bool
    40  	schemaReplicant *memex.Schema
    41  
    42  	keyExprs []memex.Expression
    43  	keyTypes []*types.FieldType
    44  	// keyDeferredCausets is the defCausumn index of the by items.
    45  	keyDeferredCausets []int
    46  	// keyCmpFuncs is used to compare each ByItem.
    47  	keyCmpFuncs []chunk.CompareFunc
    48  	// rowChunks is the chunks to causetstore event values.
    49  	rowChunks *chunk.SortedEventContainer
    50  
    51  	memTracker  *memory.Tracker
    52  	diskTracker *disk.Tracker
    53  
    54  	// partitionList is the chunks to causetstore event values for partitions. Every partition is a sorted list.
    55  	partitionList []*chunk.SortedEventContainer
    56  
    57  	// multiWayMerge uses multi-way merge for spill disk.
    58  	// The multi-way merge algorithm can refer to https://en.wikipedia.org/wiki/K-way_merge_algorithm
    59  	multiWayMerge *multiWayMerge
    60  	// spillCausetAction save the CausetAction for spill disk.
    61  	spillCausetAction *chunk.SortAndSpillDiskCausetAction
    62  }
    63  
    64  // Close implements the InterlockingDirectorate Close interface.
    65  func (e *SortInterDirc) Close() error {
    66  	for _, container := range e.partitionList {
    67  		err := container.Close()
    68  		if err != nil {
    69  			return err
    70  		}
    71  	}
    72  	e.partitionList = e.partitionList[:0]
    73  
    74  	if e.rowChunks != nil {
    75  		e.memTracker.Consume(-e.rowChunks.GetMemTracker().BytesConsumed())
    76  		e.rowChunks = nil
    77  	}
    78  	e.memTracker = nil
    79  	e.diskTracker = nil
    80  	e.multiWayMerge = nil
    81  	e.spillCausetAction = nil
    82  	return e.children[0].Close()
    83  }
    84  
    85  // Open implements the InterlockingDirectorate Open interface.
    86  func (e *SortInterDirc) Open(ctx context.Context) error {
    87  	e.fetched = false
    88  	e.Idx = 0
    89  
    90  	// To avoid duplicated initialization for TopNInterDirc.
    91  	if e.memTracker == nil {
    92  		e.memTracker = memory.NewTracker(e.id, -1)
    93  		e.memTracker.AttachTo(e.ctx.GetStochastikVars().StmtCtx.MemTracker)
    94  		e.diskTracker = memory.NewTracker(e.id, -1)
    95  		e.diskTracker.AttachTo(e.ctx.GetStochastikVars().StmtCtx.DiskTracker)
    96  	}
    97  	e.partitionList = e.partitionList[:0]
    98  	return e.children[0].Open(ctx)
    99  }
   100  
   101  // Next implements the InterlockingDirectorate Next interface.
   102  // Sort constructs the result following these step:
   103  // 1. Read as mush as rows into memory.
   104  // 2. If memory quota is triggered, sort these rows in memory and put them into disk as partition 1, then reset
   105  //    the memory quota trigger and return to step 1
   106  // 3. If memory quota is not triggered and child is consumed, sort these rows in memory as partition N.
   107  // 4. Merge sort if the count of partitions is larger than 1. If there is only one partition in step 4, it works
   108  //    just like in-memory sort before.
   109  func (e *SortInterDirc) Next(ctx context.Context, req *chunk.Chunk) error {
   110  	req.Reset()
   111  	if !e.fetched {
   112  		e.initCompareFuncs()
   113  		e.buildKeyDeferredCausets()
   114  		err := e.fetchEventChunks(ctx)
   115  		if err != nil {
   116  			return err
   117  		}
   118  		e.fetched = true
   119  	}
   120  
   121  	if len(e.partitionList) == 0 {
   122  		return nil
   123  	}
   124  	if len(e.partitionList) > 1 {
   125  		if err := e.externalSorting(req); err != nil {
   126  			return err
   127  		}
   128  	} else {
   129  		for !req.IsFull() && e.Idx < e.partitionList[0].NumEvent() {
   130  			event, err := e.partitionList[0].GetSortedEvent(e.Idx)
   131  			if err != nil {
   132  				return err
   133  			}
   134  			req.AppendEvent(event)
   135  			e.Idx++
   136  		}
   137  	}
   138  	return nil
   139  }
   140  
   141  type partitionPointer struct {
   142  	event       chunk.Event
   143  	partitionID int
   144  	consumed    int
   145  }
   146  
   147  type multiWayMerge struct {
   148  	lessEventFunction func(rowI chunk.Event, rowJ chunk.Event) bool
   149  	elements          []partitionPointer
   150  }
   151  
   152  func (h *multiWayMerge) Less(i, j int) bool {
   153  	rowI := h.elements[i].event
   154  	rowJ := h.elements[j].event
   155  	return h.lessEventFunction(rowI, rowJ)
   156  }
   157  
   158  func (h *multiWayMerge) Len() int {
   159  	return len(h.elements)
   160  }
   161  
   162  func (h *multiWayMerge) Push(x interface{}) {
   163  	// Should never be called.
   164  }
   165  
   166  func (h *multiWayMerge) Pop() interface{} {
   167  	h.elements = h.elements[:len(h.elements)-1]
   168  	return nil
   169  }
   170  
   171  func (h *multiWayMerge) Swap(i, j int) {
   172  	h.elements[i], h.elements[j] = h.elements[j], h.elements[i]
   173  }
   174  
   175  func (e *SortInterDirc) externalSorting(req *chunk.Chunk) (err error) {
   176  	if e.multiWayMerge == nil {
   177  		e.multiWayMerge = &multiWayMerge{e.lessEvent, make([]partitionPointer, 0, len(e.partitionList))}
   178  		for i := 0; i < len(e.partitionList); i++ {
   179  			event, err := e.partitionList[i].GetSortedEvent(0)
   180  			if err != nil {
   181  				return err
   182  			}
   183  			e.multiWayMerge.elements = append(e.multiWayMerge.elements, partitionPointer{event: event, partitionID: i, consumed: 0})
   184  		}
   185  		heap.Init(e.multiWayMerge)
   186  	}
   187  
   188  	for !req.IsFull() && e.multiWayMerge.Len() > 0 {
   189  		partitionPtr := e.multiWayMerge.elements[0]
   190  		req.AppendEvent(partitionPtr.event)
   191  		partitionPtr.consumed++
   192  		if partitionPtr.consumed >= e.partitionList[partitionPtr.partitionID].NumEvent() {
   193  			heap.Remove(e.multiWayMerge, 0)
   194  			continue
   195  		}
   196  		partitionPtr.event, err = e.partitionList[partitionPtr.partitionID].
   197  			GetSortedEvent(partitionPtr.consumed)
   198  		if err != nil {
   199  			return err
   200  		}
   201  		e.multiWayMerge.elements[0] = partitionPtr
   202  		heap.Fix(e.multiWayMerge, 0)
   203  	}
   204  	return nil
   205  }
   206  
   207  func (e *SortInterDirc) fetchEventChunks(ctx context.Context) error {
   208  	fields := retTypes(e)
   209  	byItemsDesc := make([]bool, len(e.ByItems))
   210  	for i, byItem := range e.ByItems {
   211  		byItemsDesc[i] = byItem.Desc
   212  	}
   213  	e.rowChunks = chunk.NewSortedEventContainer(fields, e.maxChunkSize, byItemsDesc, e.keyDeferredCausets, e.keyCmpFuncs)
   214  	e.rowChunks.GetMemTracker().AttachTo(e.memTracker)
   215  	e.rowChunks.GetMemTracker().SetLabel(memory.LabelForEventChunks)
   216  	if config.GetGlobalConfig().OOMUseTmpStorage {
   217  		e.spillCausetAction = e.rowChunks.CausetActionSpill()
   218  		failpoint.Inject("testSortedEventContainerSpill", func(val failpoint.Value) {
   219  			if val.(bool) {
   220  				e.spillCausetAction = e.rowChunks.CausetActionSpillForTest()
   221  				defer e.spillCausetAction.WaitForTest()
   222  			}
   223  		})
   224  		e.ctx.GetStochastikVars().StmtCtx.MemTracker.FallbackOldAndSetNewCausetAction(e.spillCausetAction)
   225  		e.rowChunks.GetDiskTracker().AttachTo(e.diskTracker)
   226  		e.rowChunks.GetDiskTracker().SetLabel(memory.LabelForEventChunks)
   227  	}
   228  	for {
   229  		chk := newFirstChunk(e.children[0])
   230  		err := Next(ctx, e.children[0], chk)
   231  		if err != nil {
   232  			return err
   233  		}
   234  		rowCount := chk.NumEvents()
   235  		if rowCount == 0 {
   236  			break
   237  		}
   238  		if err := e.rowChunks.Add(chk); err != nil {
   239  			if errors.Is(err, chunk.ErrCannotAddBecauseSorted) {
   240  				e.partitionList = append(e.partitionList, e.rowChunks)
   241  				e.rowChunks = chunk.NewSortedEventContainer(fields, e.maxChunkSize, byItemsDesc, e.keyDeferredCausets, e.keyCmpFuncs)
   242  				e.rowChunks.GetMemTracker().AttachTo(e.memTracker)
   243  				e.rowChunks.GetMemTracker().SetLabel(memory.LabelForEventChunks)
   244  				e.rowChunks.GetDiskTracker().AttachTo(e.diskTracker)
   245  				e.rowChunks.GetDiskTracker().SetLabel(memory.LabelForEventChunks)
   246  				e.spillCausetAction = e.rowChunks.CausetActionSpill()
   247  				failpoint.Inject("testSortedEventContainerSpill", func(val failpoint.Value) {
   248  					if val.(bool) {
   249  						e.spillCausetAction = e.rowChunks.CausetActionSpillForTest()
   250  						defer e.spillCausetAction.WaitForTest()
   251  					}
   252  				})
   253  				e.ctx.GetStochastikVars().StmtCtx.MemTracker.FallbackOldAndSetNewCausetAction(e.spillCausetAction)
   254  				err = e.rowChunks.Add(chk)
   255  			}
   256  			if err != nil {
   257  				return err
   258  			}
   259  		}
   260  	}
   261  	if e.rowChunks.NumEvent() > 0 {
   262  		e.rowChunks.Sort()
   263  		e.partitionList = append(e.partitionList, e.rowChunks)
   264  	}
   265  	return nil
   266  }
   267  
   268  func (e *SortInterDirc) initCompareFuncs() {
   269  	e.keyCmpFuncs = make([]chunk.CompareFunc, len(e.ByItems))
   270  	for i := range e.ByItems {
   271  		keyType := e.ByItems[i].Expr.GetType()
   272  		e.keyCmpFuncs[i] = chunk.GetCompareFunc(keyType)
   273  	}
   274  }
   275  
   276  func (e *SortInterDirc) buildKeyDeferredCausets() {
   277  	e.keyDeferredCausets = make([]int, 0, len(e.ByItems))
   278  	for _, by := range e.ByItems {
   279  		defCaus := by.Expr.(*memex.DeferredCauset)
   280  		e.keyDeferredCausets = append(e.keyDeferredCausets, defCaus.Index)
   281  	}
   282  }
   283  
   284  func (e *SortInterDirc) lessEvent(rowI, rowJ chunk.Event) bool {
   285  	for i, defCausIdx := range e.keyDeferredCausets {
   286  		cmpFunc := e.keyCmpFuncs[i]
   287  		cmp := cmpFunc(rowI, defCausIdx, rowJ, defCausIdx)
   288  		if e.ByItems[i].Desc {
   289  			cmp = -cmp
   290  		}
   291  		if cmp < 0 {
   292  			return true
   293  		} else if cmp > 0 {
   294  			return false
   295  		}
   296  	}
   297  	return false
   298  }
   299  
   300  // TopNInterDirc implements a Top-N algorithm and it is built from a SELECT memex with ORDER BY and LIMIT.
   301  // Instead of sorting all the rows fetched from the causet, it keeps the Top-N elements only in a heap to reduce memory usage.
   302  type TopNInterDirc struct {
   303  	SortInterDirc
   304  	limit      *causetembedded.PhysicalLimit
   305  	totalLimit uint64
   306  
   307  	// rowChunks is the chunks to causetstore event values.
   308  	rowChunks *chunk.List
   309  	// rowPointer causetstore the chunk index and event index for each event.
   310  	rowPtrs []chunk.EventPtr
   311  
   312  	chkHeap *topNChunkHeap
   313  }
   314  
   315  // topNChunkHeap implements heap.Interface.
   316  type topNChunkHeap struct {
   317  	*TopNInterDirc
   318  }
   319  
   320  // Less implement heap.Interface, but since we mantains a max heap,
   321  // this function returns true if event i is greater than event j.
   322  func (h *topNChunkHeap) Less(i, j int) bool {
   323  	rowI := h.rowChunks.GetEvent(h.rowPtrs[i])
   324  	rowJ := h.rowChunks.GetEvent(h.rowPtrs[j])
   325  	return h.greaterEvent(rowI, rowJ)
   326  }
   327  
   328  func (h *topNChunkHeap) greaterEvent(rowI, rowJ chunk.Event) bool {
   329  	for i, defCausIdx := range h.keyDeferredCausets {
   330  		cmpFunc := h.keyCmpFuncs[i]
   331  		cmp := cmpFunc(rowI, defCausIdx, rowJ, defCausIdx)
   332  		if h.ByItems[i].Desc {
   333  			cmp = -cmp
   334  		}
   335  		if cmp > 0 {
   336  			return true
   337  		} else if cmp < 0 {
   338  			return false
   339  		}
   340  	}
   341  	return false
   342  }
   343  
   344  func (h *topNChunkHeap) Len() int {
   345  	return len(h.rowPtrs)
   346  }
   347  
   348  func (h *topNChunkHeap) Push(x interface{}) {
   349  	// Should never be called.
   350  }
   351  
   352  func (h *topNChunkHeap) Pop() interface{} {
   353  	h.rowPtrs = h.rowPtrs[:len(h.rowPtrs)-1]
   354  	// We don't need the popped value, return nil to avoid memory allocation.
   355  	return nil
   356  }
   357  
   358  func (h *topNChunkHeap) Swap(i, j int) {
   359  	h.rowPtrs[i], h.rowPtrs[j] = h.rowPtrs[j], h.rowPtrs[i]
   360  }
   361  
   362  // keyDeferredCausetsLess is the less function for key defCausumns.
   363  func (e *TopNInterDirc) keyDeferredCausetsLess(i, j int) bool {
   364  	rowI := e.rowChunks.GetEvent(e.rowPtrs[i])
   365  	rowJ := e.rowChunks.GetEvent(e.rowPtrs[j])
   366  	return e.lessEvent(rowI, rowJ)
   367  }
   368  
   369  func (e *TopNInterDirc) initPointers() {
   370  	e.rowPtrs = make([]chunk.EventPtr, 0, e.rowChunks.Len())
   371  	e.memTracker.Consume(int64(8 * e.rowChunks.Len()))
   372  	for chkIdx := 0; chkIdx < e.rowChunks.NumChunks(); chkIdx++ {
   373  		rowChk := e.rowChunks.GetChunk(chkIdx)
   374  		for rowIdx := 0; rowIdx < rowChk.NumEvents(); rowIdx++ {
   375  			e.rowPtrs = append(e.rowPtrs, chunk.EventPtr{ChkIdx: uint32(chkIdx), EventIdx: uint32(rowIdx)})
   376  		}
   377  	}
   378  }
   379  
   380  // Open implements the InterlockingDirectorate Open interface.
   381  func (e *TopNInterDirc) Open(ctx context.Context) error {
   382  	e.memTracker = memory.NewTracker(e.id, -1)
   383  	e.memTracker.AttachTo(e.ctx.GetStochastikVars().StmtCtx.MemTracker)
   384  
   385  	e.fetched = false
   386  	e.Idx = 0
   387  
   388  	return e.children[0].Open(ctx)
   389  }
   390  
   391  // Next implements the InterlockingDirectorate Next interface.
   392  func (e *TopNInterDirc) Next(ctx context.Context, req *chunk.Chunk) error {
   393  	req.Reset()
   394  	if !e.fetched {
   395  		e.totalLimit = e.limit.Offset + e.limit.Count
   396  		e.Idx = int(e.limit.Offset)
   397  		err := e.loadChunksUntilTotalLimit(ctx)
   398  		if err != nil {
   399  			return err
   400  		}
   401  		err = e.executeTopN(ctx)
   402  		if err != nil {
   403  			return err
   404  		}
   405  		e.fetched = true
   406  	}
   407  	if e.Idx >= len(e.rowPtrs) {
   408  		return nil
   409  	}
   410  	for !req.IsFull() && e.Idx < len(e.rowPtrs) {
   411  		event := e.rowChunks.GetEvent(e.rowPtrs[e.Idx])
   412  		req.AppendEvent(event)
   413  		e.Idx++
   414  	}
   415  	return nil
   416  }
   417  
   418  func (e *TopNInterDirc) loadChunksUntilTotalLimit(ctx context.Context) error {
   419  	e.chkHeap = &topNChunkHeap{e}
   420  	e.rowChunks = chunk.NewList(retTypes(e), e.initCap, e.maxChunkSize)
   421  	e.rowChunks.GetMemTracker().AttachTo(e.memTracker)
   422  	e.rowChunks.GetMemTracker().SetLabel(memory.LabelForEventChunks)
   423  	for uint64(e.rowChunks.Len()) < e.totalLimit {
   424  		srcChk := newFirstChunk(e.children[0])
   425  		// adjust required rows by total limit
   426  		srcChk.SetRequiredEvents(int(e.totalLimit-uint64(e.rowChunks.Len())), e.maxChunkSize)
   427  		err := Next(ctx, e.children[0], srcChk)
   428  		if err != nil {
   429  			return err
   430  		}
   431  		if srcChk.NumEvents() == 0 {
   432  			break
   433  		}
   434  		e.rowChunks.Add(srcChk)
   435  	}
   436  	e.initPointers()
   437  	e.initCompareFuncs()
   438  	e.buildKeyDeferredCausets()
   439  	return nil
   440  }
   441  
   442  const topNCompactionFactor = 4
   443  
   444  func (e *TopNInterDirc) executeTopN(ctx context.Context) error {
   445  	heap.Init(e.chkHeap)
   446  	for uint64(len(e.rowPtrs)) > e.totalLimit {
   447  		// The number of rows we loaded may exceeds total limit, remove greatest rows by Pop.
   448  		heap.Pop(e.chkHeap)
   449  	}
   450  	childEventChk := newFirstChunk(e.children[0])
   451  	for {
   452  		err := Next(ctx, e.children[0], childEventChk)
   453  		if err != nil {
   454  			return err
   455  		}
   456  		if childEventChk.NumEvents() == 0 {
   457  			break
   458  		}
   459  		err = e.processChildChk(childEventChk)
   460  		if err != nil {
   461  			return err
   462  		}
   463  		if e.rowChunks.Len() > len(e.rowPtrs)*topNCompactionFactor {
   464  			err = e.doCompaction()
   465  			if err != nil {
   466  				return err
   467  			}
   468  		}
   469  	}
   470  	sort.Slice(e.rowPtrs, e.keyDeferredCausetsLess)
   471  	return nil
   472  }
   473  
   474  func (e *TopNInterDirc) processChildChk(childEventChk *chunk.Chunk) error {
   475  	for i := 0; i < childEventChk.NumEvents(); i++ {
   476  		heapMaxPtr := e.rowPtrs[0]
   477  		var heapMax, next chunk.Event
   478  		heapMax = e.rowChunks.GetEvent(heapMaxPtr)
   479  		next = childEventChk.GetEvent(i)
   480  		if e.chkHeap.greaterEvent(heapMax, next) {
   481  			// Evict heap max, keep the next event.
   482  			e.rowPtrs[0] = e.rowChunks.AppendEvent(childEventChk.GetEvent(i))
   483  			heap.Fix(e.chkHeap, 0)
   484  		}
   485  	}
   486  	return nil
   487  }
   488  
   489  // doCompaction rebuild the chunks and event pointers to release memory.
   490  // If we don't do compaction, in a extreme case like the child data is already ascending sorted
   491  // but we want descending top N, then we will keep all data in memory.
   492  // But if data is distributed randomly, this function will be called log(n) times.
   493  func (e *TopNInterDirc) doCompaction() error {
   494  	newEventChunks := chunk.NewList(retTypes(e), e.initCap, e.maxChunkSize)
   495  	newEventPtrs := make([]chunk.EventPtr, 0, e.rowChunks.Len())
   496  	for _, rowPtr := range e.rowPtrs {
   497  		newEventPtr := newEventChunks.AppendEvent(e.rowChunks.GetEvent(rowPtr))
   498  		newEventPtrs = append(newEventPtrs, newEventPtr)
   499  	}
   500  	newEventChunks.GetMemTracker().SetLabel(memory.LabelForEventChunks)
   501  	e.memTracker.ReplaceChild(e.rowChunks.GetMemTracker(), newEventChunks.GetMemTracker())
   502  	e.rowChunks = newEventChunks
   503  
   504  	e.memTracker.Consume(int64(-8 * len(e.rowPtrs)))
   505  	e.memTracker.Consume(int64(8 * len(newEventPtrs)))
   506  	e.rowPtrs = newEventPtrs
   507  	return nil
   508  }