github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/tae/tables/jobs/mergeblocks.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package jobs 16 17 import ( 18 "context" 19 "fmt" 20 "time" 21 "unsafe" 22 23 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/dataio/blockio" 24 25 "github.com/RoaringBitmap/roaring" 26 "github.com/matrixorigin/matrixone/pkg/logutil" 27 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog" 28 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 29 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers" 30 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/handle" 31 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/txnif" 32 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/mergesort" 33 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/model" 34 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/tables/txnentries" 35 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/tasks" 36 "go.uber.org/zap/zapcore" 37 ) 38 39 // CompactSegmentTaskFactory merge non-appendable blocks of an appendable-segment 40 // into a new non-appendable segment. 41 var CompactSegmentTaskFactory = func(mergedBlks []*catalog.BlockEntry, scheduler tasks.TaskScheduler) tasks.TxnTaskFactory { 42 return func(ctx *tasks.Context, txn txnif.AsyncTxn) (tasks.Task, error) { 43 mergedSegs := make([]*catalog.SegmentEntry, 1) 44 mergedSegs[0] = mergedBlks[0].GetSegment() 45 return NewMergeBlocksTask(ctx, txn, mergedBlks, mergedSegs, nil, scheduler) 46 } 47 } 48 49 var MergeBlocksIntoSegmentTaskFctory = func(mergedBlks []*catalog.BlockEntry, toSegEntry *catalog.SegmentEntry, scheduler tasks.TaskScheduler) tasks.TxnTaskFactory { 50 if toSegEntry == nil { 51 panic(tasks.ErrBadTaskRequestPara) 52 } 53 return func(ctx *tasks.Context, txn txnif.AsyncTxn) (tasks.Task, error) { 54 return NewMergeBlocksTask(ctx, txn, mergedBlks, nil, toSegEntry, scheduler) 55 } 56 } 57 58 type mergeBlocksTask struct { 59 *tasks.BaseTask 60 txn txnif.AsyncTxn 61 toSegEntry *catalog.SegmentEntry 62 createdSegs []*catalog.SegmentEntry 63 mergedSegs []*catalog.SegmentEntry 64 mergedBlks []*catalog.BlockEntry 65 createdBlks []*catalog.BlockEntry 66 compacted []handle.Block 67 rel handle.Relation 68 scheduler tasks.TaskScheduler 69 scopes []common.ID 70 deletes []*roaring.Bitmap 71 } 72 73 func NewMergeBlocksTask(ctx *tasks.Context, txn txnif.AsyncTxn, mergedBlks []*catalog.BlockEntry, mergedSegs []*catalog.SegmentEntry, toSegEntry *catalog.SegmentEntry, scheduler tasks.TaskScheduler) (task *mergeBlocksTask, err error) { 74 task = &mergeBlocksTask{ 75 txn: txn, 76 mergedBlks: mergedBlks, 77 mergedSegs: mergedSegs, 78 createdBlks: make([]*catalog.BlockEntry, 0), 79 compacted: make([]handle.Block, 0), 80 scheduler: scheduler, 81 toSegEntry: toSegEntry, 82 } 83 dbId := mergedBlks[0].GetSegment().GetTable().GetDB().ID 84 database, err := txn.GetDatabaseByID(dbId) 85 if err != nil { 86 return 87 } 88 relId := mergedBlks[0].GetSegment().GetTable().ID 89 task.rel, err = database.GetRelationByID(relId) 90 if err != nil { 91 return 92 } 93 for _, meta := range mergedBlks { 94 seg, err := task.rel.GetSegment(meta.GetSegment().GetID()) 95 if err != nil { 96 return nil, err 97 } 98 blk, err := seg.GetBlock(meta.GetID()) 99 if err != nil { 100 return nil, err 101 } 102 task.compacted = append(task.compacted, blk) 103 task.scopes = append(task.scopes, *meta.AsCommonID()) 104 } 105 task.BaseTask = tasks.NewBaseTask(task, tasks.DataCompactionTask, ctx) 106 return 107 } 108 109 func (task *mergeBlocksTask) Scopes() []common.ID { return task.scopes } 110 111 func (task *mergeBlocksTask) mergeColumn( 112 vecs []containers.Vector, 113 sortedIdx *[]uint32, 114 isPrimary bool, 115 fromLayout, 116 toLayout []uint32, 117 sort bool) (column []containers.Vector, mapping []uint32) { 118 if len(vecs) == 0 { 119 return 120 } 121 if sort { 122 if isPrimary { 123 column, mapping = mergesort.MergeSortedColumn(vecs, sortedIdx, fromLayout, toLayout) 124 } else { 125 column = mergesort.ShuffleColumn(vecs, *sortedIdx, fromLayout, toLayout) 126 } 127 } else { 128 column, mapping = task.mergeColumnWithOutSort(vecs, fromLayout, toLayout) 129 } 130 for _, vec := range vecs { 131 vec.Close() 132 } 133 return 134 } 135 136 func (task *mergeBlocksTask) mergeColumnWithOutSort(column []containers.Vector, fromLayout, toLayout []uint32) (ret []containers.Vector, mapping []uint32) { 137 totalLength := uint32(0) 138 for _, i := range toLayout { 139 totalLength += i 140 } 141 mapping = make([]uint32, totalLength) 142 for i := range mapping { 143 mapping[i] = uint32(i) 144 } 145 ret = mergesort.Reshape(column, fromLayout, toLayout) 146 return 147 } 148 149 func (task *mergeBlocksTask) MarshalLogObject(enc zapcore.ObjectEncoder) (err error) { 150 blks := "" 151 for _, blk := range task.mergedBlks { 152 blks = fmt.Sprintf("%s%d,", blks, blk.GetID()) 153 } 154 enc.AddString("from-blks", blks) 155 segs := "" 156 for _, seg := range task.mergedSegs { 157 segs = fmt.Sprintf("%s%d,", segs, seg.GetID()) 158 } 159 enc.AddString("from-segs", segs) 160 161 toblks := "" 162 for _, blk := range task.createdBlks { 163 toblks = fmt.Sprintf("%s%d,", toblks, blk.GetID()) 164 } 165 if toblks != "" { 166 enc.AddString("to-blks", toblks) 167 } 168 169 tosegs := "" 170 for _, seg := range task.createdSegs { 171 tosegs = fmt.Sprintf("%s%d,", tosegs, seg.GetID()) 172 } 173 if tosegs != "" { 174 enc.AddString("to-segs", tosegs) 175 } 176 return 177 } 178 179 func (task *mergeBlocksTask) Execute() (err error) { 180 logutil.Info("[Start] Mergeblocks", common.OperationField(task.Name()), 181 common.OperandField(task)) 182 now := time.Now() 183 var toSegEntry handle.Segment 184 if task.toSegEntry == nil { 185 if toSegEntry, err = task.rel.CreateNonAppendableSegment(false); err != nil { 186 return err 187 } 188 task.toSegEntry = toSegEntry.GetMeta().(*catalog.SegmentEntry) 189 task.toSegEntry.SetSorted() 190 task.createdSegs = append(task.createdSegs, task.toSegEntry) 191 } else { 192 if toSegEntry, err = task.rel.GetSegment(task.toSegEntry.GetID()); err != nil { 193 return 194 } 195 } 196 197 schema := task.mergedBlks[0].GetSchema() 198 var view *model.ColumnView 199 sortVecs := make([]containers.Vector, 0) 200 rows := make([]uint32, 0) 201 skipBlks := make([]int, 0) 202 length := 0 203 fromAddr := make([]uint32, 0, len(task.compacted)) 204 ids := make([]*common.ID, 0, len(task.compacted)) 205 task.deletes = make([]*roaring.Bitmap, len(task.compacted)) 206 207 // Prepare sort key resources 208 // If there's no sort key, use physical address key 209 var sortColDef *catalog.ColDef 210 if schema.HasSortKey() { 211 sortColDef = schema.GetSingleSortKey() 212 } else { 213 sortColDef = schema.PhyAddrKey 214 } 215 logutil.Infof("Mergeblocks on sort column %s\n", sortColDef.Name) 216 for i, block := range task.compacted { 217 if view, err = block.GetColumnDataById(sortColDef.Idx, nil); err != nil { 218 return 219 } 220 defer view.Close() 221 task.deletes[i] = view.DeleteMask 222 view.ApplyDeletes() 223 vec := view.Orphan() 224 defer vec.Close() 225 if vec.Length() == 0 { 226 skipBlks = append(skipBlks, i) 227 continue 228 } 229 sortVecs = append(sortVecs, vec) 230 rows = append(rows, uint32(vec.Length())) 231 fromAddr = append(fromAddr, uint32(length)) 232 length += vec.Length() 233 ids = append(ids, block.Fingerprint()) 234 } 235 236 to := make([]uint32, 0) 237 maxrow := schema.BlockMaxRows 238 totalRows := length 239 for totalRows > 0 { 240 if totalRows > int(maxrow) { 241 to = append(to, maxrow) 242 totalRows -= int(maxrow) 243 } else { 244 to = append(to, uint32(totalRows)) 245 break 246 } 247 } 248 249 // merge sort the sort key 250 node, err := common.DefaultAllocator.Alloc(length * 4) 251 if err != nil { 252 panic(err) 253 } 254 buf := node[:length] 255 defer common.DefaultAllocator.Free(node) 256 sortedIdx := *(*[]uint32)(unsafe.Pointer(&buf)) 257 vecs, mapping := task.mergeColumn(sortVecs, &sortedIdx, true, rows, to, schema.HasSortKey()) 258 // logutil.Infof("mapping is %v", mapping) 259 // logutil.Infof("sortedIdx is %v", sortedIdx) 260 length = 0 261 var blk handle.Block 262 toAddr := make([]uint32, 0, len(vecs)) 263 // index meta for every created block 264 // Prepare new block placeholder 265 // Build and flush block index if sort key is defined 266 // Flush sort key it correlates to only one column 267 batchs := make([]*containers.Batch, 0) 268 blockHandles := make([]handle.Block, 0) 269 for _, vec := range vecs { 270 toAddr = append(toAddr, uint32(length)) 271 length += vec.Length() 272 blk, err = toSegEntry.CreateNonAppendableBlock() 273 if err != nil { 274 return err 275 } 276 task.createdBlks = append(task.createdBlks, blk.GetMeta().(*catalog.BlockEntry)) 277 blockHandles = append(blockHandles, blk) 278 batch := containers.NewBatch() 279 batchs = append(batchs, batch) 280 vec.Close() 281 } 282 283 // Build and flush block index if sort key is defined 284 // Flush sort key it correlates to only one column 285 286 for _, def := range schema.ColDefs { 287 if def.IsPhyAddr() { 288 continue 289 } 290 // Skip 291 // PhyAddr column was processed before 292 // If only one single sort key, it was processed before 293 vecs = vecs[:0] 294 for _, block := range task.compacted { 295 if view, err = block.GetColumnDataById(def.Idx, nil); err != nil { 296 return 297 } 298 defer view.Close() 299 view.ApplyDeletes() 300 vec := view.Orphan() 301 if vec.Length() == 0 { 302 continue 303 } 304 defer vec.Close() 305 vecs = append(vecs, vec) 306 } 307 vecs, _ := task.mergeColumn(vecs, &sortedIdx, false, rows, to, schema.HasSortKey()) 308 for i := range vecs { 309 defer vecs[i].Close() 310 } 311 for i, vec := range vecs { 312 batchs[i].AddVector(def.Name, vec) 313 } 314 } 315 316 phyAddr := schema.PhyAddrKey 317 name := blockio.EncodeObjectName() 318 writer := blockio.NewWriter(context.Background(), task.mergedBlks[0].GetBlockData().GetFs(), name) 319 pkIdx := -1 320 if schema.HasPK() { 321 pkIdx = schema.GetSingleSortKeyIdx() 322 } 323 for _, bat := range batchs { 324 block, err := writer.WriteBlock(bat) 325 if err != nil { 326 return err 327 } 328 for idx, vec := range bat.Vecs { 329 if phyAddr.Idx == idx { 330 continue 331 } 332 isPk := idx == pkIdx 333 _, err = BuildColumnIndex(writer.GetWriter(), block, schema.ColDefs[idx], vec, isPk, isPk) 334 if err != nil { 335 return err 336 } 337 } 338 } 339 blocks, err := writer.Sync() 340 if err != nil { 341 return err 342 } 343 var metaLoc string 344 for i, block := range blocks { 345 metaLoc, err = blockio.EncodeMetaLocWithObject(block.GetExtent(), uint32(batchs[i].Length()), blocks) 346 if err != nil { 347 return 348 } 349 err = blockHandles[i].UpdateMetaLoc(metaLoc) 350 } 351 for _, blk := range task.createdBlks { 352 if err = blk.GetBlockData().Init(); err != nil { 353 return err 354 } 355 } 356 357 for _, compacted := range task.compacted { 358 seg := compacted.GetSegment() 359 if err = seg.SoftDeleteBlock(compacted.Fingerprint().BlockID); err != nil { 360 return err 361 } 362 } 363 for _, entry := range task.mergedSegs { 364 if err = task.rel.SoftDeleteSegment(entry.GetID()); err != nil { 365 return err 366 } 367 } 368 369 table := task.toSegEntry.GetTable() 370 txnEntry := txnentries.NewMergeBlocksEntry( 371 task.txn, 372 task.rel, 373 task.mergedSegs, 374 task.createdSegs, 375 task.mergedBlks, 376 task.createdBlks, 377 mapping, 378 fromAddr, 379 toAddr, 380 task.deletes, 381 skipBlks, 382 task.scheduler) 383 if err = task.txn.LogTxnEntry(table.GetDB().ID, table.ID, txnEntry, ids); err != nil { 384 return err 385 } 386 387 logutil.Info("[Done] Mergeblocks", 388 common.AnyField("txn-start-ts", task.txn.GetStartTS().ToString()), 389 common.OperationField(task.Name()), 390 common.OperandField(task), 391 common.DurationField(time.Since(now))) 392 return err 393 }