github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/db/merge/mod.go (about) 1 // Copyright 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package merge 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "strconv" 22 "sync" 23 "sync/atomic" 24 "time" 25 26 "github.com/matrixorigin/matrixone/pkg/common/moerr" 27 "github.com/matrixorigin/matrixone/pkg/fileservice" 28 "github.com/matrixorigin/matrixone/pkg/objectio" 29 "github.com/matrixorigin/matrixone/pkg/pb/api" 30 taskpb "github.com/matrixorigin/matrixone/pkg/pb/task" 31 "github.com/matrixorigin/matrixone/pkg/taskservice" 32 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog" 33 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 34 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/txnif" 35 ) 36 37 var StopMerge atomic.Bool 38 39 type CNMergeScheduler interface { 40 SendMergeTask(ctx context.Context, task *api.MergeTaskEntry) error 41 } 42 43 func NewTaskServiceGetter(getter taskservice.Getter) CNMergeScheduler { 44 return &taskServiceGetter{ 45 Getter: getter, 46 } 47 } 48 49 type taskServiceGetter struct { 50 taskservice.Getter 51 } 52 53 func (tsg *taskServiceGetter) SendMergeTask(ctx context.Context, task *api.MergeTaskEntry) error { 54 ts, ok := tsg.Getter() 55 if !ok { 56 return taskservice.ErrNotReady 57 } 58 taskIDPrefix := "Merge:" + task.TableName 59 asyncTask, err := ts.QueryAsyncTask(ctx, 60 taskservice.WithTaskMetadataId(taskservice.LIKE, taskIDPrefix+"%"), 61 taskservice.WithTaskStatusCond(taskpb.TaskStatus_Created, taskpb.TaskStatus_Running)) 62 if err != nil { 63 return err 64 } 65 if len(asyncTask) != 0 { 66 return moerr.NewInternalError(ctx, fmt.Sprintf("table %q is merging", task.TableName)) 67 } 68 b, err := task.Marshal() 69 if err != nil { 70 return err 71 } 72 return ts.CreateAsyncTask(ctx, 73 taskpb.TaskMetadata{ 74 ID: taskIDPrefix + ":" + strconv.FormatInt(time.Now().Unix(), 10), 75 Executor: taskpb.TaskCode_MergeObject, 76 Context: b, 77 Options: taskpb.TaskOptions{Resource: &taskpb.Resource{Memory: task.EstimatedMemUsage}}, 78 }) 79 } 80 81 type TaskHostKind int 82 83 const ( 84 TaskHostCN TaskHostKind = iota 85 TaskHostDN 86 ) 87 88 type activeEntry struct { 89 tid uint64 90 insertAt time.Time 91 } 92 93 var ActiveCNObj ActiveCNObjMap = ActiveCNObjMap{ 94 o: make(map[objectio.ObjectId]activeEntry), 95 } 96 97 type ActiveCNObjMap struct { 98 sync.Mutex 99 o map[objectio.ObjectId]activeEntry 100 } 101 102 func (e *ActiveCNObjMap) Prune(id uint64, ago time.Duration) { 103 e.Lock() 104 defer e.Unlock() 105 now := time.Now() 106 if ago == 0 { 107 for k, v := range e.o { 108 if v.tid == id { 109 delete(e.o, k) 110 } 111 } 112 return 113 } 114 115 if id == 0 && ago > 1*time.Second { 116 for k, v := range e.o { 117 if now.Sub(v.insertAt) > ago { 118 delete(e.o, k) 119 } 120 } 121 return 122 } 123 for k, v := range e.o { 124 if v.tid == id && now.Sub(v.insertAt) > ago { 125 delete(e.o, k) 126 } 127 } 128 } 129 130 func (e *ActiveCNObjMap) String() string { 131 e.Lock() 132 defer e.Unlock() 133 134 b := &bytes.Buffer{} 135 now := time.Now() 136 for k, v := range e.o { 137 b.WriteString(fmt.Sprintf(" id: %v, table: %v, insertAt: %s ago\n", 138 k.String(), v.tid, now.Sub(v.insertAt).String())) 139 } 140 return b.String() 141 } 142 143 func (e *ActiveCNObjMap) AddActiveCNObj(entries []*catalog.ObjectEntry) { 144 e.Lock() 145 for _, entry := range entries { 146 e.o[entry.ID] = activeEntry{ 147 entry.GetTable().ID, 148 time.Now(), 149 } 150 } 151 e.Unlock() 152 } 153 154 func (e *ActiveCNObjMap) RemoveActiveCNObj(ids []objectio.ObjectId) { 155 e.Lock() 156 defer e.Unlock() 157 for _, id := range ids { 158 delete(e.o, id) 159 } 160 } 161 162 func (e *ActiveCNObjMap) CheckOverlapOnCNActive(entries []*catalog.ObjectEntry) bool { 163 e.Lock() 164 defer e.Unlock() 165 for _, entry := range entries { 166 if _, ok := e.o[entry.ID]; ok { 167 return true 168 } 169 } 170 return false 171 } 172 173 func CleanUpUselessFiles(entry *api.MergeCommitEntry, fs fileservice.FileService) { 174 if entry == nil { 175 return 176 } 177 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) 178 defer cancel() 179 if len(entry.BookingLoc) != 0 { 180 loc := objectio.Location(entry.BookingLoc) 181 _ = fs.Delete(ctx, loc.Name().String()) 182 } 183 if len(entry.CreatedObjs) != 0 { 184 for _, obj := range entry.CreatedObjs { 185 if len(obj) == 0 { 186 continue 187 } 188 s := objectio.ObjectStats(obj) 189 _ = fs.Delete(ctx, s.ObjectName().String()) 190 } 191 } 192 } 193 194 const ( 195 constMergeMinBlks = 5 196 constMergeExpansionRate = 6 197 constMaxMemCap = 4 * constMergeExpansionRate * common.Const1GBytes // max orginal memory for a object 198 constSmallMergeGap = 3 * time.Minute 199 ) 200 201 type Policy interface { 202 OnObject(obj *catalog.ObjectEntry) 203 Revise(cpu, mem int64) ([]*catalog.ObjectEntry, TaskHostKind) 204 ResetForTable(*catalog.TableEntry) 205 SetConfig(*catalog.TableEntry, func() txnif.AsyncTxn, any) 206 GetConfig(*catalog.TableEntry) any 207 } 208 209 func NewUpdatePolicyReq(c *BasicPolicyConfig) *api.AlterTableReq { 210 return &api.AlterTableReq{ 211 Kind: api.AlterKind_UpdatePolicy, 212 Operation: &api.AlterTableReq_UpdatePolicy{ 213 UpdatePolicy: &api.AlterTablePolicy{ 214 MinOsizeQuailifed: uint32(c.ObjectMinOsize), 215 MaxObjOnerun: uint32(c.MergeMaxOneRun), 216 MaxOsizeMergedObj: uint32(c.MaxOsizeMergedObj), 217 MinCnMergeSize: uint64(c.MinCNMergeSize), 218 Hints: c.MergeHints, 219 }, 220 }, 221 } 222 }