github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/db/merge/utils.go (about)

     1  // Copyright 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //	http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package merge
    16  
    17  import (
    18  	"container/heap"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog"
    21  )
    22  
    23  // min heap item
    24  type mItem[T any] struct {
    25  	row   int
    26  	entry T
    27  }
    28  
    29  type itemSet[T any] []*mItem[T]
    30  
    31  func (is itemSet[T]) Len() int { return len(is) }
    32  
    33  func (is itemSet[T]) Less(i, j int) bool {
    34  	// max heap
    35  	return is[i].row > is[j].row
    36  }
    37  
    38  func (is itemSet[T]) Swap(i, j int) {
    39  	is[i], is[j] = is[j], is[i]
    40  }
    41  
    42  func (is *itemSet[T]) Push(x any) {
    43  	item := x.(*mItem[T])
    44  	*is = append(*is, item)
    45  }
    46  
    47  func (is *itemSet[T]) Pop() any {
    48  	old := *is
    49  	n := len(old)
    50  	item := old[n-1]
    51  	old[n-1] = nil // avoid memory leak
    52  	*is = old[0 : n-1]
    53  	return item
    54  }
    55  
    56  func (is *itemSet[T]) Clear() {
    57  	old := *is
    58  	*is = old[:0]
    59  }
    60  
    61  // heapBuilder founds out blocks to be merged via maintaining a min heap
    62  type heapBuilder[T any] struct {
    63  	items itemSet[T]
    64  }
    65  
    66  func (h *heapBuilder[T]) reset() {
    67  	h.items.Clear()
    68  }
    69  
    70  func (h *heapBuilder[T]) pushWithCap(item *mItem[T], cap int) {
    71  	heap.Push(&h.items, item)
    72  	for h.items.Len() > cap {
    73  		heap.Pop(&h.items)
    74  	}
    75  }
    76  
    77  // copy out the items in the heap
    78  func (h *heapBuilder[T]) finish() []T {
    79  	ret := make([]T, h.items.Len())
    80  	for i, item := range h.items {
    81  		ret[i] = item.entry
    82  	}
    83  	return ret
    84  }
    85  
    86  func estimateMergeConsume(mobjs []*catalog.ObjectEntry) (origSize, estSize, compSize int) {
    87  	if len(mobjs) == 0 {
    88  		return
    89  	}
    90  	rows, merged := 0, 0
    91  	for _, m := range mobjs {
    92  		rows += m.GetRows()
    93  		merged += m.GetRemainingRows()
    94  		origSize += m.GetOriginSize()
    95  		compSize += m.GetCompSize()
    96  	}
    97  	// by test exprience, full 8192 rows batch will expand to (6~8)x memory comsupation.
    98  	// the ExpansionRate will be moderated by the actual row number after applying deletes
    99  	factor := float64(merged) / float64(rows)
   100  	rate := float64(constMergeExpansionRate) * factor
   101  	if rate < 2 {
   102  		rate = 2
   103  	}
   104  	estSize = int(float64(origSize) * rate)
   105  
   106  	// compSize is estimated after applying deletes
   107  	compSize = int(float64(compSize) * factor)
   108  	return
   109  }