github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/db/merge/utils.go (about) 1 // Copyright 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package merge 16 17 import ( 18 "container/heap" 19 20 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog" 21 ) 22 23 // min heap item 24 type mItem[T any] struct { 25 row int 26 entry T 27 } 28 29 type itemSet[T any] []*mItem[T] 30 31 func (is itemSet[T]) Len() int { return len(is) } 32 33 func (is itemSet[T]) Less(i, j int) bool { 34 // max heap 35 return is[i].row > is[j].row 36 } 37 38 func (is itemSet[T]) Swap(i, j int) { 39 is[i], is[j] = is[j], is[i] 40 } 41 42 func (is *itemSet[T]) Push(x any) { 43 item := x.(*mItem[T]) 44 *is = append(*is, item) 45 } 46 47 func (is *itemSet[T]) Pop() any { 48 old := *is 49 n := len(old) 50 item := old[n-1] 51 old[n-1] = nil // avoid memory leak 52 *is = old[0 : n-1] 53 return item 54 } 55 56 func (is *itemSet[T]) Clear() { 57 old := *is 58 *is = old[:0] 59 } 60 61 // heapBuilder founds out blocks to be merged via maintaining a min heap 62 type heapBuilder[T any] struct { 63 items itemSet[T] 64 } 65 66 func (h *heapBuilder[T]) reset() { 67 h.items.Clear() 68 } 69 70 func (h *heapBuilder[T]) pushWithCap(item *mItem[T], cap int) { 71 heap.Push(&h.items, item) 72 for h.items.Len() > cap { 73 heap.Pop(&h.items) 74 } 75 } 76 77 // copy out the items in the heap 78 func (h *heapBuilder[T]) finish() []T { 79 ret := make([]T, h.items.Len()) 80 for i, item := range h.items { 81 ret[i] = item.entry 82 } 83 return ret 84 } 85 86 func estimateMergeConsume(mobjs []*catalog.ObjectEntry) (origSize, estSize, compSize int) { 87 if len(mobjs) == 0 { 88 return 89 } 90 rows, merged := 0, 0 91 for _, m := range mobjs { 92 rows += m.GetRows() 93 merged += m.GetRemainingRows() 94 origSize += m.GetOriginSize() 95 compSize += m.GetCompSize() 96 } 97 // by test exprience, full 8192 rows batch will expand to (6~8)x memory comsupation. 98 // the ExpansionRate will be moderated by the actual row number after applying deletes 99 factor := float64(merged) / float64(rows) 100 rate := float64(constMergeExpansionRate) * factor 101 if rate < 2 { 102 rate = 2 103 } 104 estSize = int(float64(origSize) * rate) 105 106 // compSize is estimated after applying deletes 107 compSize = int(float64(compSize) * factor) 108 return 109 }