github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/db/merge/executor.go (about) 1 // Copyright 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package merge 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "math" 22 "sync" 23 "sync/atomic" 24 25 "github.com/matrixorigin/matrixone/pkg/objectio" 26 "github.com/matrixorigin/matrixone/pkg/pb/api" 27 v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2" 28 29 "github.com/KimMachineGun/automemlimit/memlimit" 30 "github.com/matrixorigin/matrixone/pkg/logutil" 31 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog" 32 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 33 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/db/dbutils" 34 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/txnif" 35 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/tables/jobs" 36 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/tasks" 37 "github.com/shirou/gopsutil/v3/cpu" 38 "github.com/shirou/gopsutil/v3/mem" 39 ) 40 41 type activeTaskStats map[uint64]struct { 42 blk int 43 estBytes int 44 } 45 46 // MergeExecutor consider resources to decide to merge or not. 47 type MergeExecutor struct { 48 tableName string 49 rt *dbutils.Runtime 50 cnSched CNMergeScheduler 51 memAvail int 52 memSpare int // 10% of total memory or container memory limit 53 cpuPercent float64 54 activeMergeBlkCount int32 55 activeEstimateBytes int64 56 taskConsume struct { 57 sync.Mutex 58 o map[objectio.ObjectId]struct{} 59 m activeTaskStats 60 } 61 } 62 63 func NewMergeExecutor(rt *dbutils.Runtime, sched CNMergeScheduler) *MergeExecutor { 64 return &MergeExecutor{ 65 rt: rt, 66 cnSched: sched, 67 } 68 } 69 70 func (e *MergeExecutor) setSpareMem(total uint64) { 71 containerMLimit, err := memlimit.FromCgroup() 72 logutil.Infof("[Mergeblocks] constainer memory limit %v, host mem %v, err %v", 73 common.HumanReadableBytes(int(containerMLimit)), 74 common.HumanReadableBytes(int(total)), 75 err) 76 tenth := int(float64(total) * 0.1) 77 limitdiff := 0 78 if containerMLimit > 0 { 79 limitdiff = int(total - containerMLimit) 80 } 81 if limitdiff > tenth { 82 e.memSpare = limitdiff 83 } else { 84 e.memSpare = tenth 85 } 86 } 87 88 func (e *MergeExecutor) RefreshMemInfo() { 89 if stats, err := mem.VirtualMemory(); err == nil { 90 e.memAvail = int(stats.Available) 91 if e.memSpare == 0 { 92 e.setSpareMem(stats.Total) 93 } 94 } 95 if percents, err := cpu.Percent(0, false); err == nil { 96 e.cpuPercent = percents[0] 97 } 98 } 99 100 func (e *MergeExecutor) PrintStats() { 101 cnt := atomic.LoadInt32(&e.activeMergeBlkCount) 102 if cnt == 0 && e.MemAvailBytes() > 512*common.Const1MBytes { 103 return 104 } 105 106 logutil.Infof( 107 "Mergeblocks avail mem: %v(%v reserved), active mergeing size: %v, active merging blk cnt: %d", 108 common.HumanReadableBytes(e.memAvail), 109 common.HumanReadableBytes(e.memSpare), 110 common.HumanReadableBytes(int(atomic.LoadInt64(&e.activeEstimateBytes))), cnt, 111 ) 112 } 113 114 func (e *MergeExecutor) AddActiveTask(taskId uint64, blkn, esize int) { 115 atomic.AddInt64(&e.activeEstimateBytes, int64(esize)) 116 atomic.AddInt32(&e.activeMergeBlkCount, int32(blkn)) 117 e.taskConsume.Lock() 118 if e.taskConsume.m == nil { 119 e.taskConsume.m = make(activeTaskStats) 120 } 121 e.taskConsume.m[taskId] = struct { 122 blk int 123 estBytes int 124 }{blkn, esize} 125 e.taskConsume.Unlock() 126 } 127 128 func (e *MergeExecutor) OnExecDone(v any) { 129 task := v.(tasks.MScopedTask) 130 131 e.taskConsume.Lock() 132 stat := e.taskConsume.m[task.ID()] 133 delete(e.taskConsume.m, task.ID()) 134 e.taskConsume.Unlock() 135 136 atomic.AddInt32(&e.activeMergeBlkCount, -int32(stat.blk)) 137 atomic.AddInt64(&e.activeEstimateBytes, -int64(stat.estBytes)) 138 } 139 140 func (e *MergeExecutor) ExecuteFor(entry *catalog.TableEntry, policy Policy) { 141 e.tableName = fmt.Sprintf("%v-%v", entry.ID, entry.GetLastestSchema().Name) 142 143 mobjs, kind := policy.Revise(e.CPUPercent(), int64(e.MemAvailBytes())) 144 if len(mobjs) < 2 { 145 return 146 } 147 148 if ActiveCNObj.CheckOverlapOnCNActive(mobjs) { 149 return 150 } 151 152 osize, esize, _ := estimateMergeConsume(mobjs) 153 blkCnt := 0 154 for _, obj := range mobjs { 155 blkCnt += obj.BlockCnt() 156 } 157 if kind == TaskHostCN { 158 stats := make([][]byte, 0, len(mobjs)) 159 cids := make([]common.ID, 0, len(mobjs)) 160 for _, obj := range mobjs { 161 stat := obj.GetObjectStats() 162 stats = append(stats, stat.Clone().Marshal()) 163 cids = append(cids, *obj.AsCommonID()) 164 } 165 if e.rt.Scheduler.CheckAsyncScopes(cids) != nil { 166 return 167 } 168 schema := entry.GetLastestSchema() 169 cntask := &api.MergeTaskEntry{ 170 AccountId: schema.AcInfo.TenantID, 171 UserId: schema.AcInfo.UserID, 172 RoleId: schema.AcInfo.RoleID, 173 TblId: entry.ID, 174 DbId: entry.GetDB().GetID(), 175 TableName: entry.GetLastestSchema().Name, 176 DbName: entry.GetDB().GetName(), 177 ToMergeObjs: stats, 178 EstimatedMemUsage: uint64(esize), 179 } 180 if err := e.cnSched.SendMergeTask(context.TODO(), cntask); err == nil { 181 ActiveCNObj.AddActiveCNObj(mobjs) 182 logMergeTask(e.tableName, math.MaxUint64, mobjs, blkCnt, osize, esize) 183 } else { 184 logutil.Warnf("mergeblocks send to cn error: %v", err) 185 return 186 } 187 } else { 188 scopes := make([]common.ID, len(mobjs)) 189 for i, obj := range mobjs { 190 scopes[i] = *obj.AsCommonID() 191 } 192 193 factory := func(ctx *tasks.Context, txn txnif.AsyncTxn) (tasks.Task, error) { 194 return jobs.NewMergeObjectsTask(ctx, txn, mobjs, e.rt, common.DefaultMaxOsizeObjMB*common.Const1MBytes) 195 } 196 task, err := e.rt.Scheduler.ScheduleMultiScopedTxnTask(nil, tasks.DataCompactionTask, scopes, factory) 197 if err != nil { 198 if err != tasks.ErrScheduleScopeConflict { 199 logutil.Infof("[Mergeblocks] Schedule error info=%v", err) 200 } 201 return 202 } 203 e.AddActiveTask(task.ID(), blkCnt, esize) 204 task.AddObserver(e) 205 logMergeTask(e.tableName, task.ID(), mobjs, blkCnt, osize, esize) 206 } 207 208 entry.Stats.AddMerge(osize, len(mobjs), blkCnt) 209 } 210 211 func (e *MergeExecutor) MemAvailBytes() int { 212 merging := int(atomic.LoadInt64(&e.activeEstimateBytes)) 213 avail := e.memAvail - e.memSpare - merging 214 if avail < 0 { 215 avail = 0 216 } 217 return avail 218 } 219 220 func (e *MergeExecutor) CPUPercent() int64 { 221 return int64(e.cpuPercent) 222 } 223 224 func logMergeTask(name string, taskId uint64, merges []*catalog.ObjectEntry, blkn, osize, esize int) { 225 rows := 0 226 infoBuf := &bytes.Buffer{} 227 for _, obj := range merges { 228 r := obj.GetRemainingRows() 229 rows += r 230 infoBuf.WriteString(fmt.Sprintf(" %d(%s)", r, common.ShortObjId(obj.ID))) 231 } 232 platform := fmt.Sprintf("t%d", taskId) 233 if taskId == math.MaxUint64 { 234 platform = "CN" 235 v2.TaskCNMergeScheduledByCounter.Inc() 236 v2.TaskCNMergedSizeCounter.Add(float64(osize)) 237 } else { 238 v2.TaskDNMergeScheduledByCounter.Inc() 239 v2.TaskDNMergedSizeCounter.Add(float64(osize)) 240 } 241 logutil.Infof( 242 "[Mergeblocks] Scheduled %v [%v|on%d,bn%d|%s,%s], merged(%v): %s", name, 243 platform, len(merges), blkn, 244 common.HumanReadableBytes(osize), common.HumanReadableBytes(esize), 245 rows, 246 infoBuf.String(), 247 ) 248 }