github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/tables/jobs/mergeobjects.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package jobs 16 17 import ( 18 "context" 19 "fmt" 20 "strings" 21 22 pkgcatalog "github.com/matrixorigin/matrixone/pkg/catalog" 23 "github.com/matrixorigin/matrixone/pkg/common/mpool" 24 "github.com/matrixorigin/matrixone/pkg/container/batch" 25 "github.com/matrixorigin/matrixone/pkg/container/nulls" 26 "github.com/matrixorigin/matrixone/pkg/container/types" 27 "github.com/matrixorigin/matrixone/pkg/container/vector" 28 "github.com/matrixorigin/matrixone/pkg/logutil" 29 "github.com/matrixorigin/matrixone/pkg/objectio" 30 "github.com/matrixorigin/matrixone/pkg/pb/api" 31 "github.com/matrixorigin/matrixone/pkg/perfcounter" 32 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/blockio" 33 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog" 34 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 35 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers" 36 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/db/dbutils" 37 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/handle" 38 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/txnif" 39 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/mergesort" 40 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/tables/txnentries" 41 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/tasks" 42 ) 43 44 type mergeObjectsTask struct { 45 *tasks.BaseTask 46 txn txnif.AsyncTxn 47 rt *dbutils.Runtime 48 mergedObjs []*catalog.ObjectEntry 49 mergedObjsHandle []handle.Object 50 mergedBlkCnt []int 51 totalMergedBlkCnt int 52 createdBObjs []*catalog.ObjectEntry 53 commitEntry *api.MergeCommitEntry 54 rel handle.Relation 55 did, tid uint64 56 57 doTransfer bool 58 59 blkCnt []int 60 nMergedBlk []int 61 schema *catalog.Schema 62 idxs []int 63 attrs []string 64 65 targetObjSize uint32 66 } 67 68 func NewMergeObjectsTask( 69 ctx *tasks.Context, 70 txn txnif.AsyncTxn, 71 mergedObjs []*catalog.ObjectEntry, 72 rt *dbutils.Runtime, 73 targetObjSize uint32) (task *mergeObjectsTask, err error) { 74 if len(mergedObjs) == 0 { 75 panic("empty mergedObjs") 76 } 77 task = &mergeObjectsTask{ 78 txn: txn, 79 rt: rt, 80 mergedObjs: mergedObjs, 81 createdBObjs: make([]*catalog.ObjectEntry, 0), 82 mergedBlkCnt: make([]int, len(mergedObjs)), 83 nMergedBlk: make([]int, len(mergedObjs)), 84 blkCnt: make([]int, len(mergedObjs)), 85 86 targetObjSize: targetObjSize, 87 } 88 for i, obj := range mergedObjs { 89 task.mergedBlkCnt[i] = task.totalMergedBlkCnt 90 task.blkCnt[i] = obj.BlockCnt() 91 task.totalMergedBlkCnt += task.blkCnt[i] 92 } 93 94 task.did = mergedObjs[0].GetTable().GetDB().ID 95 database, err := txn.GetDatabaseByID(task.did) 96 if err != nil { 97 return 98 } 99 task.tid = mergedObjs[0].GetTable().ID 100 task.rel, err = database.GetRelationByID(task.tid) 101 if err != nil { 102 return 103 } 104 for _, meta := range mergedObjs { 105 obj, err := task.rel.GetObject(&meta.ID) 106 if err != nil { 107 return nil, err 108 } 109 task.mergedObjsHandle = append(task.mergedObjsHandle, obj) 110 } 111 task.schema = task.rel.Schema().(*catalog.Schema) 112 task.doTransfer = !strings.Contains(task.schema.Comment, pkgcatalog.MO_COMMENT_NO_DEL_HINT) 113 task.idxs = make([]int, 0, len(task.schema.ColDefs)-1) 114 task.attrs = make([]string, 0, len(task.schema.ColDefs)-1) 115 for _, def := range task.schema.ColDefs { 116 if def.IsPhyAddr() { 117 continue 118 } 119 task.idxs = append(task.idxs, def.Idx) 120 task.attrs = append(task.attrs, def.Name) 121 } 122 task.BaseTask = tasks.NewBaseTask(task, tasks.DataCompactionTask, ctx) 123 return 124 } 125 126 func (task *mergeObjectsTask) GetObjectCnt() int { 127 return len(task.mergedObjs) 128 } 129 130 func (task *mergeObjectsTask) GetBlkCnts() []int { 131 return task.blkCnt 132 } 133 134 func (task *mergeObjectsTask) GetAccBlkCnts() []int { 135 return task.mergedBlkCnt 136 } 137 138 func (task *mergeObjectsTask) GetBlockMaxRows() uint32 { 139 return task.schema.BlockMaxRows 140 } 141 142 func (task *mergeObjectsTask) GetObjectMaxBlocks() uint16 { 143 return task.schema.ObjectMaxBlocks 144 } 145 146 func (task *mergeObjectsTask) GetTargetObjSize() uint32 { 147 return task.targetObjSize 148 } 149 150 func (task *mergeObjectsTask) GetSortKeyPos() int { 151 sortKeyPos := -1 152 if task.schema.HasSortKey() { 153 sortKeyPos = task.schema.GetSingleSortKeyIdx() 154 } 155 return sortKeyPos 156 } 157 158 func (task *mergeObjectsTask) GetSortKeyType() types.Type { 159 if task.schema.HasSortKey() { 160 return task.schema.GetSingleSortKeyType() 161 } 162 return types.Type{} 163 } 164 165 // impl DisposableVecPool 166 func (task *mergeObjectsTask) GetVector(typ *types.Type) (*vector.Vector, func()) { 167 v := task.rt.VectorPool.Transient.GetVector(typ) 168 return v.GetDownstreamVector(), v.Close 169 } 170 171 func (task *mergeObjectsTask) GetMPool() *mpool.MPool { 172 return task.rt.VectorPool.Transient.GetMPool() 173 } 174 175 func (task *mergeObjectsTask) HostHintName() string { return "DN" } 176 177 func (task *mergeObjectsTask) PrepareData(ctx context.Context) ([]*batch.Batch, []*nulls.Nulls, func(), error) { 178 var err error 179 views := make([]*containers.BlockView, task.totalMergedBlkCnt) 180 releaseF := func() { 181 for _, view := range views { 182 if view != nil { 183 view.Close() 184 } 185 } 186 } 187 defer func() { 188 if err != nil { 189 releaseF() 190 } 191 }() 192 schema := task.rel.Schema().(*catalog.Schema) 193 idxs := make([]int, 0, len(schema.ColDefs)-1) 194 attrs := make([]string, 0, len(schema.ColDefs)-1) 195 for _, def := range schema.ColDefs { 196 if def.IsPhyAddr() { 197 continue 198 } 199 idxs = append(idxs, def.Idx) 200 attrs = append(attrs, def.Name) 201 } 202 for i, obj := range task.mergedObjsHandle { 203 204 maxBlockOffset := task.totalMergedBlkCnt 205 if i != len(task.mergedObjs)-1 { 206 maxBlockOffset = task.mergedBlkCnt[i+1] 207 } 208 minBlockOffset := task.mergedBlkCnt[i] 209 210 for j := 0; j < maxBlockOffset-minBlockOffset; j++ { 211 if views[minBlockOffset+j], err = obj.GetColumnDataByIds(ctx, uint16(j), idxs, common.MergeAllocator); err != nil { 212 return nil, nil, nil, err 213 } 214 } 215 } 216 217 batches := make([]*batch.Batch, 0, task.totalMergedBlkCnt) 218 dels := make([]*nulls.Nulls, 0, task.totalMergedBlkCnt) 219 for _, view := range views { 220 batch := batch.New(true, attrs) 221 if len(attrs) != len(view.Columns) { 222 panic(fmt.Sprintf("mismatch %v, %v, %v", attrs, len(attrs), len(view.Columns))) 223 } 224 for i, col := range view.Columns { 225 batch.Vecs[i] = col.GetData().GetDownstreamVector() 226 } 227 batch.SetRowCount(view.Columns[0].Length()) 228 batches = append(batches, batch) 229 dels = append(dels, view.DeleteMask) 230 } 231 232 return batches, dels, releaseF, nil 233 } 234 235 func (task *mergeObjectsTask) LoadNextBatch(ctx context.Context, objIdx uint32) (*batch.Batch, *nulls.Nulls, func(), error) { 236 if objIdx >= uint32(len(task.mergedObjs)) { 237 panic("invalid objIdx") 238 } 239 if task.nMergedBlk[objIdx] >= task.blkCnt[objIdx] { 240 return nil, nil, nil, mergesort.ErrNoMoreBlocks 241 } 242 var err error 243 var view *containers.BlockView 244 releaseF := func() { 245 if view != nil { 246 view.Close() 247 } 248 } 249 defer func() { 250 if err != nil { 251 releaseF() 252 } 253 }() 254 255 obj := task.mergedObjsHandle[objIdx] 256 view, err = obj.GetColumnDataByIds(ctx, uint16(task.nMergedBlk[objIdx]), task.idxs, common.MergeAllocator) 257 if err != nil { 258 return nil, nil, nil, err 259 } 260 if len(task.attrs) != len(view.Columns) { 261 panic(fmt.Sprintf("mismatch %v, %v, %v", task.attrs, len(task.attrs), len(view.Columns))) 262 } 263 task.nMergedBlk[objIdx]++ 264 265 bat := batch.New(true, task.attrs) 266 for i, col := range view.Columns { 267 bat.Vecs[i] = col.GetData().GetDownstreamVector() 268 } 269 bat.SetRowCount(view.Columns[0].Length()) 270 return bat, view.DeleteMask, releaseF, nil 271 } 272 273 func (task *mergeObjectsTask) GetCommitEntry() *api.MergeCommitEntry { 274 if task.commitEntry == nil { 275 return task.prepareCommitEntry() 276 } 277 return task.commitEntry 278 } 279 280 func (task *mergeObjectsTask) prepareCommitEntry() *api.MergeCommitEntry { 281 schema := task.rel.Schema().(*catalog.Schema) 282 commitEntry := &api.MergeCommitEntry{} 283 commitEntry.DbId = task.did 284 commitEntry.TblId = task.tid 285 commitEntry.TableName = schema.Name 286 commitEntry.StartTs = task.txn.GetStartTS().ToTimestamp() 287 for _, o := range task.mergedObjs { 288 obj := o.GetObjectStats() 289 commitEntry.MergedObjs = append(commitEntry.MergedObjs, obj.Clone().Marshal()) 290 } 291 task.commitEntry = commitEntry 292 // leave mapping to ReadMergeAndWrite 293 return commitEntry 294 } 295 296 func (task *mergeObjectsTask) PrepareNewWriter() *blockio.BlockWriter { 297 schema := task.rel.Schema().(*catalog.Schema) 298 seqnums := make([]uint16, 0, len(schema.ColDefs)-1) 299 for _, def := range schema.ColDefs { 300 if def.IsPhyAddr() { 301 continue 302 } 303 seqnums = append(seqnums, def.SeqNum) 304 } 305 sortkeyIsPK := false 306 sortkeyPos := -1 307 308 if schema.HasPK() { 309 sortkeyPos = schema.GetSingleSortKeyIdx() 310 sortkeyIsPK = true 311 } else if schema.HasSortKey() { 312 sortkeyPos = schema.GetSingleSortKeyIdx() 313 } 314 315 return mergesort.GetNewWriter(task.rt.Fs.Service, schema.Version, seqnums, sortkeyPos, sortkeyIsPK) 316 } 317 318 func (task *mergeObjectsTask) DoTransfer() bool { 319 return task.doTransfer 320 } 321 322 func (task *mergeObjectsTask) Execute(ctx context.Context) (err error) { 323 phaseDesc := "" 324 defer func() { 325 if err != nil { 326 logutil.Error("[DoneWithErr] Mergeblocks", common.OperationField(task.Name()), 327 common.AnyField("error", err), 328 common.AnyField("phase", phaseDesc), 329 ) 330 } 331 }() 332 333 schema := task.rel.Schema().(*catalog.Schema) 334 sortkeyPos := -1 335 if schema.HasSortKey() { 336 sortkeyPos = schema.GetSingleSortKeyIdx() 337 } 338 phaseDesc = "1-DoMergeAndWrite" 339 if err = mergesort.DoMergeAndWrite(ctx, sortkeyPos, int(schema.BlockMaxRows), task); err != nil { 340 return err 341 } 342 343 phaseDesc = "2-HandleMergeEntryInTxn" 344 if task.createdBObjs, err = HandleMergeEntryInTxn(task.txn, task.commitEntry, task.rt); err != nil { 345 return err 346 } 347 348 perfcounter.Update(ctx, func(counter *perfcounter.CounterSet) { 349 counter.TAE.Object.MergeBlocks.Add(1) 350 }) 351 return nil 352 } 353 354 func HandleMergeEntryInTxn(txn txnif.AsyncTxn, entry *api.MergeCommitEntry, rt *dbutils.Runtime) ([]*catalog.ObjectEntry, error) { 355 database, err := txn.GetDatabaseByID(entry.DbId) 356 if err != nil { 357 return nil, err 358 } 359 rel, err := database.GetRelationByID(entry.TblId) 360 if err != nil { 361 return nil, err 362 } 363 364 mergedObjs := make([]*catalog.ObjectEntry, 0, len(entry.MergedObjs)) 365 createdObjs := make([]*catalog.ObjectEntry, 0, len(entry.CreatedObjs)) 366 ids := make([]*common.ID, 0, len(entry.MergedObjs)*2) 367 368 // drop merged blocks and objects 369 for _, item := range entry.MergedObjs { 370 drop := objectio.ObjectStats(item) 371 objID := drop.ObjectName().ObjectId() 372 obj, err := rel.GetObject(objID) 373 if err != nil { 374 return nil, err 375 } 376 mergedObjs = append(mergedObjs, obj.GetMeta().(*catalog.ObjectEntry)) 377 if err = rel.SoftDeleteObject(objID); err != nil { 378 return nil, err 379 } 380 } 381 382 // construct new object, 383 for _, stats := range entry.CreatedObjs { 384 stats := objectio.ObjectStats(stats) 385 objID := stats.ObjectName().ObjectId() 386 obj, err := rel.CreateNonAppendableObject(false, new(objectio.CreateObjOpt).WithId(objID)) 387 if err != nil { 388 return nil, err 389 } 390 createdObjs = append(createdObjs, obj.GetMeta().(*catalog.ObjectEntry)) 391 // set stats and sorted property 392 if err = obj.UpdateStats(stats); err != nil { 393 return nil, err 394 } 395 objEntry := obj.GetMeta().(*catalog.ObjectEntry) 396 objEntry.SetSorted() 397 } 398 399 txnEntry, err := txnentries.NewMergeObjectsEntry( 400 txn, 401 rel, 402 mergedObjs, 403 createdObjs, 404 entry.Booking, 405 rt, 406 ) 407 if err != nil { 408 return nil, err 409 } 410 411 if err = txn.LogTxnEntry(entry.DbId, entry.TblId, txnEntry, ids); err != nil { 412 return nil, err 413 } 414 415 return createdObjs, nil 416 } 417 418 func (task *mergeObjectsTask) GetTotalSize() uint32 { 419 totalSize := uint32(0) 420 for _, obj := range task.mergedObjs { 421 totalSize += uint32(obj.GetOriginSize()) 422 } 423 return totalSize 424 } 425 426 func (task *mergeObjectsTask) GetTotalRowCnt() uint32 { 427 totalRowCnt := 0 428 for _, obj := range task.mergedObjs { 429 totalRowCnt += obj.GetRows() 430 } 431 return uint32(totalRowCnt) 432 } 433 434 // for UT 435 func (task *mergeObjectsTask) GetCreatedObjects() []*catalog.ObjectEntry { 436 return task.createdBObjs 437 }