github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/db/merge/policyBasic.go (about) 1 // Copyright 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package merge 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "sort" 22 "sync" 23 24 "github.com/matrixorigin/matrixone/pkg/logutil" 25 "github.com/matrixorigin/matrixone/pkg/pb/api" 26 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog" 27 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 28 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/txnif" 29 ) 30 31 var ( 32 _ Policy = (*basic)(nil) 33 defaultBasicConfig = &BasicPolicyConfig{ 34 MergeMaxOneRun: common.DefaultMaxMergeObjN, 35 MaxOsizeMergedObj: common.DefaultMaxOsizeObjMB * common.Const1MBytes, 36 ObjectMinOsize: common.DefaultMinOsizeQualifiedMB * common.Const1MBytes, 37 MinCNMergeSize: common.DefaultMinCNMergeSize * common.Const1MBytes, 38 } 39 ) 40 41 /// TODO(aptend): codes related storing and fetching configs are too annoying! 42 43 type BasicPolicyConfig struct { 44 name string 45 MergeMaxOneRun int 46 ObjectMinOsize uint32 47 MaxOsizeMergedObj uint32 48 MinCNMergeSize uint64 49 FromUser bool 50 MergeHints []api.MergeHint 51 } 52 53 func (c *BasicPolicyConfig) String() string { 54 return fmt.Sprintf( 55 "minOsizeObj:%v, maxOneRun:%v, maxOsizeMergedObj: %v, offloadToCNSize:%v, hints: %v", 56 common.HumanReadableBytes(int(c.ObjectMinOsize)), 57 c.MergeMaxOneRun, 58 common.HumanReadableBytes(int(c.MaxOsizeMergedObj)), 59 common.HumanReadableBytes(int(c.MinCNMergeSize)), 60 c.MergeHints, 61 ) 62 } 63 64 type customConfigProvider struct { 65 sync.Mutex 66 configs map[uint64]*BasicPolicyConfig // works like a cache 67 } 68 69 func newCustomConfigProvider() *customConfigProvider { 70 return &customConfigProvider{ 71 configs: make(map[uint64]*BasicPolicyConfig), 72 } 73 } 74 75 func (o *customConfigProvider) GetConfig(tbl *catalog.TableEntry) *BasicPolicyConfig { 76 o.Lock() 77 defer o.Unlock() 78 p, ok := o.configs[tbl.ID] 79 if !ok { 80 // load from an atomic value 81 extra := tbl.GetLastestSchemaLocked().Extra 82 if extra.MaxObjOnerun != 0 || extra.MinOsizeQuailifed != 0 { 83 // compatible with old version 84 cnSize := extra.MinCnMergeSize 85 if cnSize == 0 { 86 cnSize = common.DefaultMinCNMergeSize * common.Const1MBytes 87 } 88 // if the values are smaller than default, it map old rows -> bytes size 89 minOsize := extra.MinOsizeQuailifed 90 if v := uint32(80 * 8192); minOsize < v { 91 minOsize = v 92 } 93 maxOsize := extra.MaxOsizeMergedObj 94 if v := uint32(500 * 8192); maxOsize < v { 95 maxOsize = v 96 } 97 98 p = &BasicPolicyConfig{ 99 ObjectMinOsize: minOsize, 100 MergeMaxOneRun: int(extra.MaxObjOnerun), 101 MaxOsizeMergedObj: maxOsize, 102 MinCNMergeSize: cnSize, 103 FromUser: true, 104 MergeHints: extra.Hints, 105 } 106 o.configs[tbl.ID] = p 107 } else { 108 p = defaultBasicConfig 109 o.configs[tbl.ID] = p 110 } 111 } 112 return p 113 } 114 115 func (o *customConfigProvider) InvalidCache(tbl *catalog.TableEntry) { 116 o.Lock() 117 defer o.Unlock() 118 delete(o.configs, tbl.ID) 119 } 120 121 func (o *customConfigProvider) SetCache(tbl *catalog.TableEntry, cfg *BasicPolicyConfig) { 122 o.Lock() 123 defer o.Unlock() 124 o.configs[tbl.ID] = cfg 125 } 126 127 func (o *customConfigProvider) String() string { 128 o.Lock() 129 defer o.Unlock() 130 keys := make([]uint64, 0, len(o.configs)) 131 for k := range o.configs { 132 keys = append(keys, k) 133 } 134 sort.Slice(keys, func(i, j int) bool { 135 return keys[i] < keys[j] 136 }) 137 buf := bytes.Buffer{} 138 buf.WriteString("customConfigProvider: ") 139 for _, k := range keys { 140 c := o.configs[k] 141 buf.WriteString(fmt.Sprintf("%d-%v:%v,%v | ", k, c.name, c.ObjectMinOsize, c.MergeMaxOneRun)) 142 } 143 return buf.String() 144 } 145 146 func (o *customConfigProvider) ResetConfig() { 147 o.Lock() 148 defer o.Unlock() 149 o.configs = make(map[uint64]*BasicPolicyConfig) 150 } 151 152 type basic struct { 153 id uint64 154 schema *catalog.Schema 155 hist *common.MergeHistory 156 objHeap *heapBuilder[*catalog.ObjectEntry] 157 guessType common.WorkloadKind 158 accBuf []int 159 160 config *BasicPolicyConfig 161 configProvider *customConfigProvider 162 } 163 164 func NewBasicPolicy() Policy { 165 return &basic{ 166 objHeap: &heapBuilder[*catalog.ObjectEntry]{ 167 items: make(itemSet[*catalog.ObjectEntry], 0, 32), 168 }, 169 accBuf: make([]int, 1, 32), 170 configProvider: newCustomConfigProvider(), 171 } 172 } 173 174 // impl Policy for Basic 175 func (o *basic) OnObject(obj *catalog.ObjectEntry) { 176 rowsLeftOnObj := obj.GetRemainingRows() 177 osize := obj.GetOriginSize() 178 179 iscandidate := func() bool { 180 // objext with a lot of holes 181 if rowsLeftOnObj < obj.GetRows()/2 { 182 return true 183 } 184 if osize < int(o.config.ObjectMinOsize) { 185 return true 186 } 187 // skip big object as an insurance 188 if osize > 110*common.Const1MBytes { 189 return false 190 } 191 192 return false 193 } 194 195 if iscandidate() { 196 o.objHeap.pushWithCap(&mItem[*catalog.ObjectEntry]{ 197 row: rowsLeftOnObj, 198 entry: obj, 199 }, o.config.MergeMaxOneRun) 200 } 201 } 202 203 func (o *basic) SetConfig(tbl *catalog.TableEntry, f func() txnif.AsyncTxn, c any) { 204 txn := f() 205 if tbl == nil || txn == nil { 206 return 207 } 208 db, err := txn.GetDatabaseByID(tbl.GetDB().ID) 209 if err != nil { 210 return 211 } 212 tblHandle, err := db.GetRelationByID(tbl.ID) 213 if err != nil { 214 return 215 } 216 cfg := c.(*BasicPolicyConfig) 217 ctx := context.Background() 218 tblHandle.AlterTable( 219 ctx, 220 NewUpdatePolicyReq(cfg), 221 ) 222 logutil.Infof("mergeblocks set %v-%v config: %v", tbl.ID, tbl.GetLastestSchemaLocked().Name, cfg) 223 txn.Commit(ctx) 224 o.configProvider.InvalidCache(tbl) 225 } 226 227 func (o *basic) GetConfig(tbl *catalog.TableEntry) any { 228 r := o.configProvider.GetConfig(tbl) 229 if r == nil { 230 r = &BasicPolicyConfig{ 231 ObjectMinOsize: common.RuntimeOsizeRowsQualified.Load(), 232 MaxOsizeMergedObj: common.RuntimeMaxObjOsize.Load(), 233 MergeMaxOneRun: int(common.RuntimeMaxMergeObjN.Load()), 234 MinCNMergeSize: common.RuntimeMinCNMergeSize.Load(), 235 } 236 } 237 return r 238 } 239 240 func (o *basic) Revise(cpu, mem int64) ([]*catalog.ObjectEntry, TaskHostKind) { 241 objs := o.objHeap.finish() 242 sort.Slice(objs, func(i, j int) bool { 243 return objs[i].GetRemainingRows() < objs[j].GetRemainingRows() 244 }) 245 246 isStandalone := common.IsStandaloneBoost.Load() 247 mergeOnDNIfStandalone := !common.ShouldStandaloneCNTakeOver.Load() 248 249 dnobjs := o.controlMem(objs, mem) 250 dnobjs = o.optimize(dnobjs) 251 252 dnosize, _, _ := estimateMergeConsume(dnobjs) 253 254 schedDN := func() ([]*catalog.ObjectEntry, TaskHostKind) { 255 if cpu > 85 { 256 if dnosize > 25*common.Const1MBytes { 257 logutil.Infof("mergeblocks skip big merge for high level cpu usage, %d", cpu) 258 return nil, TaskHostDN 259 } 260 } 261 return dnobjs, TaskHostDN 262 } 263 264 schedCN := func() ([]*catalog.ObjectEntry, TaskHostKind) { 265 cnobjs := o.controlMem(objs, int64(common.RuntimeCNMergeMemControl.Load())) 266 cnobjs = o.optimize(cnobjs) 267 return cnobjs, TaskHostCN 268 } 269 270 if isStandalone && mergeOnDNIfStandalone { 271 return schedDN() 272 } 273 274 // CNs come into the picture in two cases: 275 // 1.cluster deployed 276 // 2.standalone deployed but it's asked to merge on cn 277 if common.RuntimeCNTakeOverAll.Load() || dnosize > int(common.RuntimeMinCNMergeSize.Load()) { 278 return schedCN() 279 } 280 281 // CNs don't take over the task, leave it on dn. 282 return schedDN() 283 } 284 285 func (o *basic) ConfigString() string { 286 r := o.configProvider.String() 287 return r 288 } 289 290 func (o *basic) optimize(objs []*catalog.ObjectEntry) []*catalog.ObjectEntry { 291 // objs are sorted by remaining rows 292 o.accBuf = o.accBuf[:1] 293 for i, obj := range objs { 294 o.accBuf = append(o.accBuf, o.accBuf[i]+obj.GetRemainingRows()) 295 } 296 acc := o.accBuf 297 298 isBigGap := func(small, big int) bool { 299 if big < int(o.schema.BlockMaxRows) { 300 return false 301 } 302 return big-small > 3*small 303 } 304 305 var i int 306 // skip merging objects with big row count gaps, 3x and more 307 for i = len(acc) - 1; i > 1 && isBigGap(acc[i-1], acc[i]); i-- { 308 } 309 310 readyToMergeRows := acc[i] 311 312 // avoid frequent small object merge 313 if readyToMergeRows < int(o.schema.BlockMaxRows) && 314 !o.hist.IsLastBefore(constSmallMergeGap) && 315 i < o.config.MergeMaxOneRun { 316 return nil 317 } 318 319 objs = objs[:i] 320 321 return objs 322 } 323 324 func (o *basic) controlMem(objs []*catalog.ObjectEntry, mem int64) []*catalog.ObjectEntry { 325 if mem > constMaxMemCap { 326 mem = constMaxMemCap 327 } 328 329 needPopout := func(ss []*catalog.ObjectEntry) bool { 330 osize, esize, _ := estimateMergeConsume(ss) 331 if esize > int(2*mem/3) { 332 return true 333 } 334 335 if len(ss) <= 2 { 336 return false 337 } 338 // make object averaged size 339 return osize > int(o.config.MaxOsizeMergedObj) 340 } 341 for needPopout(objs) { 342 objs = objs[:len(objs)-1] 343 } 344 345 return objs 346 } 347 348 func (o *basic) ResetForTable(entry *catalog.TableEntry) { 349 o.id = entry.ID 350 o.schema = entry.GetLastestSchemaLocked() 351 o.hist = entry.Stats.GetLastMerge() 352 o.guessType = entry.Stats.GetWorkloadGuess() 353 o.objHeap.reset() 354 355 o.config = o.configProvider.GetConfig(entry) 356 }