github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/tables/jobs/flushTableTail.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package jobs 16 17 import ( 18 "context" 19 "fmt" 20 "strings" 21 "time" 22 23 pkgcatalog "github.com/matrixorigin/matrixone/pkg/catalog" 24 "github.com/matrixorigin/matrixone/pkg/common/bitmap" 25 "github.com/matrixorigin/matrixone/pkg/common/moerr" 26 "github.com/matrixorigin/matrixone/pkg/common/mpool" 27 "github.com/matrixorigin/matrixone/pkg/container/batch" 28 "github.com/matrixorigin/matrixone/pkg/container/types" 29 "github.com/matrixorigin/matrixone/pkg/container/vector" 30 "github.com/matrixorigin/matrixone/pkg/logutil" 31 "github.com/matrixorigin/matrixone/pkg/objectio" 32 "github.com/matrixorigin/matrixone/pkg/pb/api" 33 "github.com/matrixorigin/matrixone/pkg/util/fault" 34 v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2" 35 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/blockio" 36 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog" 37 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 38 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers" 39 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/db/dbutils" 40 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/handle" 41 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/txnif" 42 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/mergesort" 43 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/tables/txnentries" 44 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/tasks" 45 "go.uber.org/zap" 46 "go.uber.org/zap/zapcore" 47 ) 48 49 type TestFlushBailoutPos1 struct{} 50 type TestFlushBailoutPos2 struct{} 51 52 var FlushTableTailTaskFactory = func( 53 metas []*catalog.ObjectEntry, rt *dbutils.Runtime, endTs types.TS, /* end of dirty range*/ 54 ) tasks.TxnTaskFactory { 55 return func(ctx *tasks.Context, txn txnif.AsyncTxn) (tasks.Task, error) { 56 return NewFlushTableTailTask(ctx, txn, metas, rt, endTs) 57 } 58 } 59 60 type flushTableTailTask struct { 61 *tasks.BaseTask 62 txn txnif.AsyncTxn 63 rt *dbutils.Runtime 64 dirtyEndTs types.TS 65 66 scopes []common.ID 67 schema *catalog.Schema 68 69 rel handle.Relation 70 dbid uint64 71 72 // record the row mapping from deleted blocks to created blocks 73 transMappings *api.BlkTransferBooking 74 doTransfer bool 75 76 aObjMetas []*catalog.ObjectEntry 77 delSrcMetas []*catalog.ObjectEntry 78 aObjHandles []handle.Object 79 delSrcHandles []handle.Object 80 createdObjHandles handle.Object 81 82 dirtyLen int 83 createdMergedObjectName string 84 createdDeletesObjectName string 85 86 mergeRowsCnt, aObjDeletesCnt, nObjDeletesCnt int 87 } 88 89 // A note about flush start timestamp 90 // 91 // As the last **committed** time, not the newest allcated time, 92 // is used in NewFlushTableTailTask, there will be a situation that 93 // some commiting appends prepared between committed-time and aobj-freeze-time 94 // are ignored during the data collection stage of flushing, 95 // which leads to transfer-row-not-found problem. 96 // 97 // The proposed solution is to add a check function in NewFlushTableTailTask 98 // to figure out if there exist an AppendNode with a bigger prepared time 99 // than flush-start-ts, and if so, retry the flush task 100 // 101 // Two question: 102 // 103 // 1. How about deletes prepared in that special time range? 104 // Never mind, deletes will be transfered when committing the flush task 105 // 2. Is it guaranteed that the check function is able to see all possible AppendNodes? 106 // Probably no, because getting appender and attaching AppendNode are not atomic group opertions. 107 // Imagine: 108 // 109 // freeze check 110 // committed x1 | | x2 111 // prepared | | o2 112 // preparing i2 | | 113 // 114 // - x1 is the last committed time. 115 // - getting appender(i2 in graph) is before the freezing 116 // - attaching AppendNode successfully (o2 in graph) after the check 117 // - finishing commit at x2 118 // 119 // So in order for the check function to work, a dedicated lock is added 120 // on ablock to ensure that NO AppendNode will be attatched to ablock 121 // after the very moment when the ablock is freezed. 122 // 123 // In the first version proposal, the check in NewFlushTableTailTask is omitted, 124 // because the existing PrepareCompact in ablock already handles that thing. 125 // If the last AppendNode in an ablock is not committed, PrepareCompact will 126 // return false to reschedule the task. However, commiting AppendNode doesn't 127 // guarantee that the committs has been updated. It's still possible to get a 128 // old startts which is not able to collect all appends in the ablock. 129 130 func NewFlushTableTailTask( 131 ctx *tasks.Context, 132 txn txnif.AsyncTxn, 133 objs []*catalog.ObjectEntry, 134 rt *dbutils.Runtime, 135 dirtyEndTs types.TS, 136 ) (task *flushTableTailTask, err error) { 137 task = &flushTableTailTask{ 138 txn: txn, 139 rt: rt, 140 dirtyEndTs: dirtyEndTs, 141 } 142 meta := objs[0] 143 dbId := meta.GetTable().GetDB().ID 144 task.dbid = dbId 145 database, err := txn.UnsafeGetDatabase(dbId) 146 if err != nil { 147 return 148 } 149 tableId := meta.GetTable().ID 150 rel, err := database.UnsafeGetRelation(tableId) 151 task.rel = rel 152 if err != nil { 153 return 154 } 155 task.schema = rel.Schema().(*catalog.Schema) 156 157 for _, obj := range objs { 158 task.scopes = append(task.scopes, *obj.AsCommonID()) 159 var hdl handle.Object 160 hdl, err = rel.GetObject(&obj.ID) 161 if err != nil { 162 return 163 } 164 if hdl.IsAppendable() && !obj.HasDropCommitted() { 165 task.aObjMetas = append(task.aObjMetas, obj) 166 task.aObjHandles = append(task.aObjHandles, hdl) 167 if obj.GetObjectData().CheckFlushTaskRetry(txn.GetStartTS()) { 168 logutil.Infof("[FlushTabletail] obj %v needs retry", obj.ID.String()) 169 return nil, txnif.ErrTxnNeedRetry 170 } 171 } else { 172 task.delSrcMetas = append(task.delSrcMetas, obj) 173 task.delSrcHandles = append(task.delSrcHandles, hdl) 174 } 175 } 176 177 task.doTransfer = !strings.Contains(task.schema.Comment, pkgcatalog.MO_COMMENT_NO_DEL_HINT) 178 if task.doTransfer { 179 task.transMappings = mergesort.NewBlkTransferBooking(len(task.aObjHandles)) 180 } 181 182 task.BaseTask = tasks.NewBaseTask(task, tasks.DataCompactionTask, ctx) 183 184 tblEntry := rel.GetMeta().(*catalog.TableEntry) 185 tblEntry.Stats.RLock() 186 defer tblEntry.Stats.RUnlock() 187 task.dirtyLen = len(tblEntry.DeletedDirties) 188 for _, obj := range tblEntry.DeletedDirties { 189 task.scopes = append(task.scopes, *obj.AsCommonID()) 190 var hdl handle.Object 191 hdl, err = rel.GetObject(&obj.ID) 192 if err != nil { 193 return 194 } 195 task.delSrcMetas = append(task.delSrcMetas, obj) 196 task.delSrcHandles = append(task.delSrcHandles, hdl) 197 } 198 return 199 } 200 201 // impl DisposableVecPool 202 func (task *flushTableTailTask) GetVector(typ *types.Type) (*vector.Vector, func()) { 203 v := task.rt.VectorPool.Transient.GetVector(typ) 204 return v.GetDownstreamVector(), v.Close 205 } 206 207 func (task *flushTableTailTask) GetMPool() *mpool.MPool { 208 return task.rt.VectorPool.Transient.GetMPool() 209 } 210 211 // Scopes is used in conflict checking in scheduler. For ScopedTask interface 212 func (task *flushTableTailTask) Scopes() []common.ID { return task.scopes } 213 214 // Name is for ScopedTask interface 215 func (task *flushTableTailTask) Name() string { 216 return fmt.Sprintf("[%d]FT-%d-%s", task.ID(), task.rel.ID(), task.schema.Name) 217 } 218 219 func (task *flushTableTailTask) MarshalLogObject(enc zapcore.ObjectEncoder) (err error) { 220 enc.AddString("endTs", task.dirtyEndTs.ToString()) 221 objs := "" 222 for _, obj := range task.aObjMetas { 223 objs = fmt.Sprintf("%s%s,", objs, obj.ID.ShortStringEx()) 224 } 225 enc.AddString("a-objs", objs) 226 // delsrc := "" 227 // for _, del := range task.delSrcMetas { 228 // delsrc = fmt.Sprintf("%s%s,", delsrc, del.ID.ShortStringEx()) 229 // } 230 // enc.AddString("deletes-src", delsrc) 231 enc.AddInt("delete-obj-ndv", len(task.delSrcMetas)) 232 233 toObjs := "" 234 if task.createdObjHandles != nil { 235 id := task.createdObjHandles.GetID() 236 toObjs = fmt.Sprintf("%s%s,", toObjs, id.ShortStringEx()) 237 } 238 if toObjs != "" { 239 enc.AddString("to-objs", toObjs) 240 } 241 return 242 } 243 244 func (task *flushTableTailTask) Execute(ctx context.Context) (err error) { 245 logutil.Info("[Start]", common.OperationField(task.Name()), common.OperandField(task), 246 common.OperandField(len(task.aObjHandles)+len(task.delSrcHandles))) 247 248 phaseDesc := "" 249 defer func() { 250 if err != nil { 251 logutil.Error("[DoneWithErr]", common.OperationField(task.Name()), 252 common.AnyField("error", err), 253 common.AnyField("phase", phaseDesc), 254 ) 255 } 256 }() 257 now := time.Now() 258 259 ///////////////////// 260 //// phase seperator 261 /////////////////// 262 263 phaseDesc = "1-flushing appendable blocks for snapshot" 264 snapshotSubtasks, err := task.flushAObjsForSnapshot(ctx) 265 if err != nil { 266 return 267 } 268 defer func() { 269 releaseFlushObjTasks(snapshotSubtasks, err) 270 }() 271 272 ///////////////////// 273 //// phase seperator 274 /////////////////// 275 276 phaseDesc = "1-write all deletes from naobjs" 277 // just collect deletes, do not soft delete it, leave that to merge task. 278 deleteTask, emptyMap, err := task.flushAllDeletesFromDelSrc(ctx) 279 if err != nil { 280 return 281 } 282 defer func() { 283 relaseFlushDelTask(deleteTask, err) 284 }() 285 ///////////////////// 286 //// phase seperator 287 /////////////////// 288 289 phaseDesc = "1-merge aobjects" 290 // merge aobjects, no need to wait, it is a sync procedure, that is why put it 291 // after flushAObjsForSnapshot and flushAllDeletesFromNObjs 292 if err = task.mergeAObjs(ctx); err != nil { 293 return 294 } 295 296 if v := ctx.Value(TestFlushBailoutPos1{}); v != nil { 297 err = moerr.NewInternalErrorNoCtx("test merge bail out") 298 return 299 } 300 301 ///////////////////// 302 //// phase seperator 303 /////////////////// 304 phaseDesc = "1-waiting flushing appendable blocks for snapshot" 305 // wait flush tasks 306 if err = task.waitFlushAObjForSnapshot(ctx, snapshotSubtasks); err != nil { 307 return 308 } 309 310 ///////////////////// 311 //// phase seperator 312 /////////////////// 313 314 phaseDesc = "1-wait flushing all deletes from naobjs" 315 if err = task.waitFlushAllDeletesFromDelSrc(ctx, deleteTask, emptyMap); err != nil { 316 return 317 } 318 319 phaseDesc = "1-wait LogTxnEntry" 320 txnEntry, err := txnentries.NewFlushTableTailEntry( 321 task.txn, 322 task.ID(), 323 task.transMappings, 324 task.rel.GetMeta().(*catalog.TableEntry), 325 task.aObjMetas, 326 task.delSrcMetas, 327 task.aObjHandles, 328 task.delSrcHandles, 329 task.createdObjHandles, 330 task.createdDeletesObjectName, 331 task.createdMergedObjectName, 332 task.dirtyLen, 333 task.rt, 334 task.dirtyEndTs, 335 ) 336 if err != nil { 337 return err 338 } 339 if err = task.txn.LogTxnEntry( 340 task.dbid, 341 task.rel.ID(), 342 txnEntry, 343 nil, 344 ); err != nil { 345 return 346 } 347 ///////////////////// 348 349 duration := time.Since(now) 350 logutil.Info("[End]", common.OperationField(task.Name()), 351 common.AnyField("txn-start-ts", task.txn.GetStartTS().ToString()), 352 zap.Int("aobj-deletes", task.aObjDeletesCnt), 353 zap.Int("aobj-merge-rows", task.mergeRowsCnt), 354 zap.Int("nobj-deletes", task.nObjDeletesCnt), 355 common.DurationField(duration), 356 common.OperandField(task)) 357 358 v2.TaskFlushTableTailDurationHistogram.Observe(duration.Seconds()) 359 360 sleep, name, exist := fault.TriggerFault("slow_flush") 361 if exist && name == task.schema.Name { 362 time.Sleep(time.Duration(sleep) * time.Second) 363 } 364 return 365 } 366 367 // prepareAObjSortedData read the data from appendable blocks, sort them if sort key exists 368 func (task *flushTableTailTask) prepareAObjSortedData( 369 ctx context.Context, objIdx int, idxs []int, sortKeyPos int, 370 ) (bat *containers.Batch, empty bool, err error) { 371 if len(idxs) <= 0 { 372 logutil.Infof("[FlushTabletail] no mergeable columns") 373 return nil, true, nil 374 } 375 obj := task.aObjHandles[objIdx] 376 377 views, err := obj.GetColumnDataByIds(ctx, 0, idxs, common.MergeAllocator) 378 if err != nil { 379 return 380 } 381 bat = containers.NewBatch() 382 rowCntBeforeApplyDelete := views.Columns[0].Length() 383 deletes := views.DeleteMask 384 views.ApplyDeletes() 385 defer views.Close() 386 for i, colidx := range idxs { 387 colview := views.Columns[i] 388 if colview == nil { 389 empty = true 390 return 391 } 392 vec := colview.Orphan() 393 if vec.Length() == 0 { 394 empty = true 395 vec.Close() 396 bat.Close() 397 return 398 } 399 bat.AddVector(task.schema.ColDefs[colidx].Name, vec.TryConvertConst()) 400 } 401 402 if deletes != nil { 403 task.aObjDeletesCnt += deletes.GetCardinality() 404 } 405 406 var sortMapping []int64 407 if sortKeyPos >= 0 { 408 if objIdx == 0 { 409 logutil.Infof("flushtabletail sort obj on %s", bat.Attrs[sortKeyPos]) 410 } 411 sortMapping, err = mergesort.SortBlockColumns(bat.Vecs, sortKeyPos, task.rt.VectorPool.Transient) 412 if err != nil { 413 return 414 } 415 } 416 if task.doTransfer { 417 mergesort.AddSortPhaseMapping(task.transMappings, objIdx, rowCntBeforeApplyDelete, deletes, sortMapping) 418 } 419 return 420 } 421 422 // mergeAObjs merge the data from appendable blocks, and write the merged data to new block, 423 // recording row mapping in blkTransferBooking struct 424 func (task *flushTableTailTask) mergeAObjs(ctx context.Context) (err error) { 425 if len(task.aObjMetas) == 0 { 426 return nil 427 } 428 429 // prepare columns idx and sortKey to read sorted batch 430 schema := task.schema 431 seqnums := make([]uint16, 0, len(schema.ColDefs)) 432 readColIdxs := make([]int, 0, len(schema.ColDefs)) 433 sortKeyIdx := -1 434 sortKeyPos := -1 435 if schema.HasSortKey() { 436 sortKeyIdx = schema.GetSingleSortKeyIdx() 437 } 438 for i, def := range schema.ColDefs { 439 if def.IsPhyAddr() { 440 continue 441 } 442 readColIdxs = append(readColIdxs, def.Idx) 443 if def.Idx == sortKeyIdx { 444 sortKeyPos = i 445 } 446 seqnums = append(seqnums, def.SeqNum) 447 } 448 449 // read from aobjects 450 readedBats := make([]*containers.Batch, 0, len(task.aObjHandles)) 451 for _, block := range task.aObjHandles { 452 err = block.Prefetch(readColIdxs) 453 if err != nil { 454 return 455 } 456 } 457 for i := range task.aObjHandles { 458 bat, empty, err := task.prepareAObjSortedData(ctx, i, readColIdxs, sortKeyPos) 459 if err != nil { 460 return err 461 } 462 if empty { 463 continue 464 } 465 readedBats = append(readedBats, bat) 466 } 467 defer func() { 468 for _, bat := range readedBats { 469 bat.Close() 470 } 471 }() 472 473 if len(readedBats) == 0 { 474 // just soft delete all Objects 475 for _, obj := range task.aObjHandles { 476 tbl := obj.GetRelation() 477 if err = tbl.SoftDeleteObject(obj.GetID()); err != nil { 478 return err 479 } 480 } 481 if task.doTransfer { 482 mergesort.CleanTransMapping(task.transMappings) 483 } 484 return nil 485 } 486 487 // prepare merge 488 // fromLayout describes the layout of the input batch, which is a list of batch length 489 fromLayout := make([]uint32, 0, len(readedBats)) 490 // toLayout describes the layout of the output batch, i.e. [8192, 8192, 8192, 4242] 491 toLayout := make([]uint32, 0, len(readedBats)) 492 totalRowCnt := 0 493 if sortKeyPos < 0 { 494 // no pk, just pick the first column to reshape 495 sortKeyPos = 0 496 } 497 for _, bat := range readedBats { 498 vec := bat.Vecs[sortKeyPos] 499 fromLayout = append(fromLayout, uint32(vec.Length())) 500 totalRowCnt += vec.Length() 501 } 502 task.mergeRowsCnt = totalRowCnt 503 rowsLeft := totalRowCnt 504 for rowsLeft > 0 { 505 if rowsLeft > int(schema.BlockMaxRows) { 506 toLayout = append(toLayout, schema.BlockMaxRows) 507 rowsLeft -= int(schema.BlockMaxRows) 508 } else { 509 toLayout = append(toLayout, uint32(rowsLeft)) 510 break 511 } 512 } 513 514 // do first sort 515 var writtenBatches []*batch.Batch 516 var releaseF func() 517 var mapping []uint32 518 if schema.HasSortKey() { 519 writtenBatches, releaseF, mapping, err = mergesort.MergeAObj(ctx, task, readedBats, sortKeyPos, schema.BlockMaxRows, len(toLayout)) 520 if err != nil { 521 return 522 } 523 } else { 524 cnBatches := make([]*batch.Batch, len(readedBats)) 525 for i := range readedBats { 526 cnBatches[i] = containers.ToCNBatch(readedBats[i]) 527 } 528 writtenBatches, releaseF = mergesort.ReshapeBatches(cnBatches, fromLayout, toLayout, task) 529 } 530 defer releaseF() 531 if task.doTransfer { 532 mergesort.UpdateMappingAfterMerge(task.transMappings, mapping, toLayout) 533 } 534 535 // write! 536 // create new object to hold merged blocks 537 if task.createdObjHandles, err = task.rel.CreateNonAppendableObject(false, nil); err != nil { 538 return 539 } 540 toObjectEntry := task.createdObjHandles.GetMeta().(*catalog.ObjectEntry) 541 toObjectEntry.SetSorted() 542 name := objectio.BuildObjectNameWithObjectID(&toObjectEntry.ID) 543 writer, err := blockio.NewBlockWriterNew(task.rt.Fs.Service, name, schema.Version, seqnums) 544 if err != nil { 545 return err 546 } 547 if schema.HasPK() { 548 pkIdx := schema.GetSingleSortKeyIdx() 549 writer.SetPrimaryKey(uint16(pkIdx)) 550 } else if schema.HasSortKey() { 551 writer.SetSortKey(uint16(schema.GetSingleSortKeyIdx())) 552 } 553 for _, bat := range writtenBatches { 554 _, err = writer.WriteBatch(bat) 555 if err != nil { 556 return err 557 } 558 } 559 _, _, err = writer.Sync(ctx) 560 if err != nil { 561 return err 562 } 563 task.createdMergedObjectName = name.String() 564 565 // update new status for created blocks 566 err = task.createdObjHandles.UpdateStats(writer.Stats()) 567 if err != nil { 568 return 569 } 570 err = task.createdObjHandles.GetMeta().(*catalog.ObjectEntry).GetObjectData().Init() 571 if err != nil { 572 return 573 } 574 575 // soft delete all aobjs 576 for _, obj := range task.aObjHandles { 577 tbl := obj.GetRelation() 578 if err = tbl.SoftDeleteObject(obj.GetID()); err != nil { 579 return err 580 } 581 } 582 583 return nil 584 } 585 586 // flushAObjsForSnapshot schedule io task to flush aobjects for snapshot read. this function will not release any data in io task 587 func (task *flushTableTailTask) flushAObjsForSnapshot(ctx context.Context) (subtasks []*flushObjTask, err error) { 588 defer func() { 589 if err != nil { 590 releaseFlushObjTasks(subtasks, err) 591 } 592 }() 593 subtasks = make([]*flushObjTask, len(task.aObjMetas)) 594 // fire flush task 595 for i, obj := range task.aObjMetas { 596 var data, deletes *containers.Batch 597 var dataVer *containers.BatchWithVersion 598 objData := obj.GetObjectData() 599 if dataVer, err = objData.CollectAppendInRange( 600 types.TS{}, task.txn.GetStartTS(), true, common.MergeAllocator, 601 ); err != nil { 602 return 603 } 604 data = dataVer.Batch 605 if data == nil || data.Length() == 0 { 606 // the new appendable block might has no data when we flush the table, just skip it 607 // In previous impl, runner will only pass non-empty obj to NewCompactBlackTask 608 continue 609 } 610 // do not close data, leave that to wait phase 611 if deletes, _, err = objData.CollectDeleteInRange( 612 ctx, types.TS{}, task.txn.GetStartTS(), true, common.MergeAllocator, 613 ); err != nil { 614 return 615 } 616 if deletes != nil { 617 // make sure every batch in deltaloc object is sorted by rowid 618 _, err := mergesort.SortBlockColumns(deletes.Vecs, 0, task.rt.VectorPool.Transient) 619 if err != nil { 620 return nil, err 621 } 622 } 623 624 aobjectTask := NewFlushObjTask( 625 tasks.WaitableCtx, 626 dataVer.Version, 627 dataVer.Seqnums, 628 objData.GetFs(), 629 obj, 630 data, 631 deletes, 632 true, 633 ) 634 if err = task.rt.Scheduler.Schedule(aobjectTask); err != nil { 635 return 636 } 637 subtasks[i] = aobjectTask 638 } 639 return 640 } 641 642 // waitFlushAObjForSnapshot waits all io tasks about flushing aobject for snapshot read, update locations 643 func (task *flushTableTailTask) waitFlushAObjForSnapshot(ctx context.Context, subtasks []*flushObjTask) (err error) { 644 ictx, cancel := context.WithTimeout(ctx, 6*time.Minute) 645 defer cancel() 646 for i, subtask := range subtasks { 647 if subtask == nil { 648 continue 649 } 650 if err = subtask.WaitDone(ictx); err != nil { 651 return 652 } 653 if err = task.aObjHandles[i].UpdateStats(subtask.stat); err != nil { 654 return 655 } 656 if subtask.delta == nil { 657 continue 658 } 659 deltaLoc := blockio.EncodeLocation( 660 subtask.name, 661 subtask.blocks[1].GetExtent(), 662 uint32(subtask.delta.Length()), 663 subtask.blocks[1].GetID()) 664 665 if err = task.aObjHandles[i].UpdateDeltaLoc(0, deltaLoc); err != nil { 666 return err 667 } 668 } 669 return nil 670 } 671 672 // flushAllDeletesFromDelSrc collects all deletes from objs and flush them into one obj 673 func (task *flushTableTailTask) flushAllDeletesFromDelSrc(ctx context.Context) (subtask *flushDeletesTask, emtpyDelObjIdx []*bitmap.Bitmap, err error) { 674 var bufferBatch *containers.Batch 675 defer func() { 676 if err != nil && bufferBatch != nil { 677 bufferBatch.Close() 678 } 679 }() 680 emtpyDelObjIdx = make([]*bitmap.Bitmap, len(task.delSrcMetas)) 681 for i, obj := range task.delSrcMetas { 682 objData := obj.GetObjectData() 683 var deletes *containers.Batch 684 emptyDelObjs := &bitmap.Bitmap{} 685 emptyDelObjs.InitWithSize(int64(obj.BlockCnt())) 686 for j := 0; j < obj.BlockCnt(); j++ { 687 found, _ := objData.HasDeleteIntentsPreparedInByBlock(uint16(j), types.TS{}, task.txn.GetStartTS()) 688 if !found { 689 emptyDelObjs.Add(uint64(j)) 690 continue 691 } 692 if deletes, err = objData.CollectDeleteInRangeByBlock( 693 ctx, uint16(j), types.TS{}, task.txn.GetStartTS(), true, common.MergeAllocator, 694 ); err != nil { 695 return 696 } 697 if deletes == nil || deletes.Length() == 0 { 698 emptyDelObjs.Add(uint64(j)) 699 continue 700 } 701 if bufferBatch == nil { 702 bufferBatch = makeDeletesTempBatch(deletes, task.rt.VectorPool.Transient) 703 } 704 task.nObjDeletesCnt += deletes.Length() 705 // deletes is closed by Extend 706 bufferBatch.Extend(deletes) 707 } 708 emtpyDelObjIdx[i] = emptyDelObjs 709 } 710 if bufferBatch != nil { 711 // make sure every batch in deltaloc object is sorted by rowid 712 _, err = mergesort.SortBlockColumns(bufferBatch.Vecs, 0, task.rt.VectorPool.Transient) 713 if err != nil { 714 return 715 } 716 subtask = NewFlushDeletesTask(tasks.WaitableCtx, task.rt.Fs, bufferBatch) 717 if err = task.rt.Scheduler.Schedule(subtask); err != nil { 718 return 719 } 720 } 721 return 722 } 723 724 // waitFlushAllDeletesFromDelSrc waits all io tasks about flushing deletes from objs, update locations but skip those in emtpyDelObjIdx 725 func (task *flushTableTailTask) waitFlushAllDeletesFromDelSrc(ctx context.Context, subtask *flushDeletesTask, emtpyDelObjIdx []*bitmap.Bitmap) (err error) { 726 if subtask == nil { 727 return 728 } 729 ictx, cancel := context.WithTimeout(ctx, 6*time.Minute) 730 defer cancel() 731 if err = subtask.WaitDone(ictx); err != nil { 732 return err 733 } 734 task.createdDeletesObjectName = subtask.name.String() 735 deltaLoc := blockio.EncodeLocation( 736 subtask.name, 737 subtask.blocks[0].GetExtent(), 738 uint32(subtask.delta.Length()), 739 subtask.blocks[0].GetID()) 740 741 v2.TaskFlushDeletesCountHistogram.Observe(float64(task.nObjDeletesCnt)) 742 v2.TaskFlushDeletesSizeHistogram.Observe(float64(deltaLoc.Extent().End())) 743 logutil.Infof("[FlushTabletail] task %d update %s for approximate %d objs", task.ID(), deltaLoc, len(task.delSrcHandles)) 744 for i, hdl := range task.delSrcHandles { 745 for j := 0; j < hdl.GetMeta().(*catalog.ObjectEntry).BlockCnt(); j++ { 746 if emtpyDelObjIdx[i] != nil && emtpyDelObjIdx[i].Contains(uint64(j)) { 747 continue 748 } 749 if err = hdl.UpdateDeltaLoc(uint16(j), deltaLoc); err != nil { 750 return err 751 } 752 753 } 754 } 755 return 756 } 757 758 func makeDeletesTempBatch(template *containers.Batch, pool *containers.VectorPool) *containers.Batch { 759 bat := containers.NewBatchWithCapacity(len(template.Attrs)) 760 for i, name := range template.Attrs { 761 bat.AddVector(name, pool.GetVector(template.Vecs[i].GetType())) 762 } 763 return bat 764 } 765 766 func relaseFlushDelTask(task *flushDeletesTask, err error) { 767 if err != nil && task != nil { 768 logutil.Infof("[FlushTabletail] release flush del task bat because of err %v", err) 769 ictx, cancel := context.WithTimeout( 770 context.Background(), 771 10*time.Second, /*6*time.Minute,*/ 772 ) 773 defer cancel() 774 task.WaitDone(ictx) 775 } 776 if task != nil && task.delta != nil { 777 task.delta.Close() 778 } 779 } 780 781 func releaseFlushObjTasks(subtasks []*flushObjTask, err error) { 782 if err != nil { 783 logutil.Infof("[FlushTabletail] release flush aobj bat because of err %v", err) 784 // add a timeout to avoid WaitDone block the whole process 785 ictx, cancel := context.WithTimeout( 786 context.Background(), 787 10*time.Second, /*6*time.Minute,*/ 788 ) 789 defer cancel() 790 for _, subtask := range subtasks { 791 if subtask != nil { 792 // wait done, otherwise the data might be released before flush, and cause data race 793 subtask.WaitDone(ictx) 794 } 795 } 796 } 797 for _, subtask := range subtasks { 798 if subtask != nil && subtask.data != nil { 799 subtask.data.Close() 800 } 801 if subtask != nil && subtask.delta != nil { 802 subtask.delta.Close() 803 } 804 } 805 } 806 807 // For unit test 808 func (task *flushTableTailTask) GetCreatedObjects() handle.Object { 809 return task.createdObjHandles 810 }