github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/tables/jobs/flushTableTail.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  // http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package jobs
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"strings"
    21  	"time"
    22  
    23  	pkgcatalog "github.com/matrixorigin/matrixone/pkg/catalog"
    24  	"github.com/matrixorigin/matrixone/pkg/common/bitmap"
    25  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    26  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    27  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    28  	"github.com/matrixorigin/matrixone/pkg/container/types"
    29  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    30  	"github.com/matrixorigin/matrixone/pkg/logutil"
    31  	"github.com/matrixorigin/matrixone/pkg/objectio"
    32  	"github.com/matrixorigin/matrixone/pkg/pb/api"
    33  	"github.com/matrixorigin/matrixone/pkg/util/fault"
    34  	v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2"
    35  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/blockio"
    36  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog"
    37  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common"
    38  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers"
    39  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/db/dbutils"
    40  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/handle"
    41  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/txnif"
    42  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/mergesort"
    43  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/tables/txnentries"
    44  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/tasks"
    45  	"go.uber.org/zap"
    46  	"go.uber.org/zap/zapcore"
    47  )
    48  
    49  type TestFlushBailoutPos1 struct{}
    50  type TestFlushBailoutPos2 struct{}
    51  
    52  var FlushTableTailTaskFactory = func(
    53  	metas []*catalog.ObjectEntry, rt *dbutils.Runtime, endTs types.TS, /* end of dirty range*/
    54  ) tasks.TxnTaskFactory {
    55  	return func(ctx *tasks.Context, txn txnif.AsyncTxn) (tasks.Task, error) {
    56  		return NewFlushTableTailTask(ctx, txn, metas, rt, endTs)
    57  	}
    58  }
    59  
    60  type flushTableTailTask struct {
    61  	*tasks.BaseTask
    62  	txn        txnif.AsyncTxn
    63  	rt         *dbutils.Runtime
    64  	dirtyEndTs types.TS
    65  
    66  	scopes []common.ID
    67  	schema *catalog.Schema
    68  
    69  	rel  handle.Relation
    70  	dbid uint64
    71  
    72  	// record the row mapping from deleted blocks to created blocks
    73  	transMappings *api.BlkTransferBooking
    74  	doTransfer    bool
    75  
    76  	aObjMetas         []*catalog.ObjectEntry
    77  	delSrcMetas       []*catalog.ObjectEntry
    78  	aObjHandles       []handle.Object
    79  	delSrcHandles     []handle.Object
    80  	createdObjHandles handle.Object
    81  
    82  	dirtyLen                 int
    83  	createdMergedObjectName  string
    84  	createdDeletesObjectName string
    85  
    86  	mergeRowsCnt, aObjDeletesCnt, nObjDeletesCnt int
    87  }
    88  
    89  // A note about flush start timestamp
    90  //
    91  // As the last **committed** time, not the newest allcated time,
    92  // is used in NewFlushTableTailTask, there will be a situation that
    93  // some commiting appends prepared between committed-time and aobj-freeze-time
    94  // are ignored during the data collection stage of flushing,
    95  // which leads to transfer-row-not-found problem.
    96  //
    97  // The proposed solution is to add a check function in NewFlushTableTailTask
    98  // to figure out if there exist an AppendNode with a bigger prepared time
    99  // than flush-start-ts, and if so, retry the flush task
   100  //
   101  // Two question:
   102  //
   103  // 1. How about deletes prepared in that special time range?
   104  //    Never mind, deletes will be transfered when committing the flush task
   105  // 2. Is it guaranteed that the check function is able to see all possible AppendNodes?
   106  //    Probably no, because getting appender and attaching AppendNode are not atomic group opertions.
   107  //    Imagine:
   108  //
   109  //                freeze  check
   110  // committed  x1     |     |     x2
   111  // prepared          |     |  o2
   112  // preparing    i2   |     |
   113  //
   114  // - x1 is the last committed time.
   115  // - getting appender(i2 in graph) is before the freezing
   116  // - attaching AppendNode successfully (o2 in graph) after the check
   117  // - finishing commit at x2
   118  //
   119  // So in order for the check function to work, a dedicated lock is added
   120  // on ablock to ensure that NO AppendNode will be attatched to ablock
   121  // after the very moment when the ablock is freezed.
   122  //
   123  // In the first version proposal, the check in NewFlushTableTailTask is omitted,
   124  // because the existing PrepareCompact in ablock already handles that thing.
   125  // If the last AppendNode in an ablock is not committed, PrepareCompact will
   126  // return false to reschedule the task. However, commiting AppendNode doesn't
   127  // guarantee that the committs has been updated. It's still possible to get a
   128  // old startts which is not able to collect all appends in the ablock.
   129  
   130  func NewFlushTableTailTask(
   131  	ctx *tasks.Context,
   132  	txn txnif.AsyncTxn,
   133  	objs []*catalog.ObjectEntry,
   134  	rt *dbutils.Runtime,
   135  	dirtyEndTs types.TS,
   136  ) (task *flushTableTailTask, err error) {
   137  	task = &flushTableTailTask{
   138  		txn:        txn,
   139  		rt:         rt,
   140  		dirtyEndTs: dirtyEndTs,
   141  	}
   142  	meta := objs[0]
   143  	dbId := meta.GetTable().GetDB().ID
   144  	task.dbid = dbId
   145  	database, err := txn.UnsafeGetDatabase(dbId)
   146  	if err != nil {
   147  		return
   148  	}
   149  	tableId := meta.GetTable().ID
   150  	rel, err := database.UnsafeGetRelation(tableId)
   151  	task.rel = rel
   152  	if err != nil {
   153  		return
   154  	}
   155  	task.schema = rel.Schema().(*catalog.Schema)
   156  
   157  	for _, obj := range objs {
   158  		task.scopes = append(task.scopes, *obj.AsCommonID())
   159  		var hdl handle.Object
   160  		hdl, err = rel.GetObject(&obj.ID)
   161  		if err != nil {
   162  			return
   163  		}
   164  		if hdl.IsAppendable() && !obj.HasDropCommitted() {
   165  			task.aObjMetas = append(task.aObjMetas, obj)
   166  			task.aObjHandles = append(task.aObjHandles, hdl)
   167  			if obj.GetObjectData().CheckFlushTaskRetry(txn.GetStartTS()) {
   168  				logutil.Infof("[FlushTabletail] obj %v needs retry", obj.ID.String())
   169  				return nil, txnif.ErrTxnNeedRetry
   170  			}
   171  		} else {
   172  			task.delSrcMetas = append(task.delSrcMetas, obj)
   173  			task.delSrcHandles = append(task.delSrcHandles, hdl)
   174  		}
   175  	}
   176  
   177  	task.doTransfer = !strings.Contains(task.schema.Comment, pkgcatalog.MO_COMMENT_NO_DEL_HINT)
   178  	if task.doTransfer {
   179  		task.transMappings = mergesort.NewBlkTransferBooking(len(task.aObjHandles))
   180  	}
   181  
   182  	task.BaseTask = tasks.NewBaseTask(task, tasks.DataCompactionTask, ctx)
   183  
   184  	tblEntry := rel.GetMeta().(*catalog.TableEntry)
   185  	tblEntry.Stats.RLock()
   186  	defer tblEntry.Stats.RUnlock()
   187  	task.dirtyLen = len(tblEntry.DeletedDirties)
   188  	for _, obj := range tblEntry.DeletedDirties {
   189  		task.scopes = append(task.scopes, *obj.AsCommonID())
   190  		var hdl handle.Object
   191  		hdl, err = rel.GetObject(&obj.ID)
   192  		if err != nil {
   193  			return
   194  		}
   195  		task.delSrcMetas = append(task.delSrcMetas, obj)
   196  		task.delSrcHandles = append(task.delSrcHandles, hdl)
   197  	}
   198  	return
   199  }
   200  
   201  // impl DisposableVecPool
   202  func (task *flushTableTailTask) GetVector(typ *types.Type) (*vector.Vector, func()) {
   203  	v := task.rt.VectorPool.Transient.GetVector(typ)
   204  	return v.GetDownstreamVector(), v.Close
   205  }
   206  
   207  func (task *flushTableTailTask) GetMPool() *mpool.MPool {
   208  	return task.rt.VectorPool.Transient.GetMPool()
   209  }
   210  
   211  // Scopes is used in conflict checking in scheduler. For ScopedTask interface
   212  func (task *flushTableTailTask) Scopes() []common.ID { return task.scopes }
   213  
   214  // Name is for ScopedTask interface
   215  func (task *flushTableTailTask) Name() string {
   216  	return fmt.Sprintf("[%d]FT-%d-%s", task.ID(), task.rel.ID(), task.schema.Name)
   217  }
   218  
   219  func (task *flushTableTailTask) MarshalLogObject(enc zapcore.ObjectEncoder) (err error) {
   220  	enc.AddString("endTs", task.dirtyEndTs.ToString())
   221  	objs := ""
   222  	for _, obj := range task.aObjMetas {
   223  		objs = fmt.Sprintf("%s%s,", objs, obj.ID.ShortStringEx())
   224  	}
   225  	enc.AddString("a-objs", objs)
   226  	// delsrc := ""
   227  	// for _, del := range task.delSrcMetas {
   228  	// 	delsrc = fmt.Sprintf("%s%s,", delsrc, del.ID.ShortStringEx())
   229  	// }
   230  	// enc.AddString("deletes-src", delsrc)
   231  	enc.AddInt("delete-obj-ndv", len(task.delSrcMetas))
   232  
   233  	toObjs := ""
   234  	if task.createdObjHandles != nil {
   235  		id := task.createdObjHandles.GetID()
   236  		toObjs = fmt.Sprintf("%s%s,", toObjs, id.ShortStringEx())
   237  	}
   238  	if toObjs != "" {
   239  		enc.AddString("to-objs", toObjs)
   240  	}
   241  	return
   242  }
   243  
   244  func (task *flushTableTailTask) Execute(ctx context.Context) (err error) {
   245  	logutil.Info("[Start]", common.OperationField(task.Name()), common.OperandField(task),
   246  		common.OperandField(len(task.aObjHandles)+len(task.delSrcHandles)))
   247  
   248  	phaseDesc := ""
   249  	defer func() {
   250  		if err != nil {
   251  			logutil.Error("[DoneWithErr]", common.OperationField(task.Name()),
   252  				common.AnyField("error", err),
   253  				common.AnyField("phase", phaseDesc),
   254  			)
   255  		}
   256  	}()
   257  	now := time.Now()
   258  
   259  	/////////////////////
   260  	//// phase seperator
   261  	///////////////////
   262  
   263  	phaseDesc = "1-flushing appendable blocks for snapshot"
   264  	snapshotSubtasks, err := task.flushAObjsForSnapshot(ctx)
   265  	if err != nil {
   266  		return
   267  	}
   268  	defer func() {
   269  		releaseFlushObjTasks(snapshotSubtasks, err)
   270  	}()
   271  
   272  	/////////////////////
   273  	//// phase seperator
   274  	///////////////////
   275  
   276  	phaseDesc = "1-write all deletes from naobjs"
   277  	// just collect deletes, do not soft delete it, leave that to merge task.
   278  	deleteTask, emptyMap, err := task.flushAllDeletesFromDelSrc(ctx)
   279  	if err != nil {
   280  		return
   281  	}
   282  	defer func() {
   283  		relaseFlushDelTask(deleteTask, err)
   284  	}()
   285  	/////////////////////
   286  	//// phase seperator
   287  	///////////////////
   288  
   289  	phaseDesc = "1-merge aobjects"
   290  	// merge aobjects, no need to wait, it is a sync procedure, that is why put it
   291  	// after flushAObjsForSnapshot and flushAllDeletesFromNObjs
   292  	if err = task.mergeAObjs(ctx); err != nil {
   293  		return
   294  	}
   295  
   296  	if v := ctx.Value(TestFlushBailoutPos1{}); v != nil {
   297  		err = moerr.NewInternalErrorNoCtx("test merge bail out")
   298  		return
   299  	}
   300  
   301  	/////////////////////
   302  	//// phase seperator
   303  	///////////////////
   304  	phaseDesc = "1-waiting flushing appendable blocks for snapshot"
   305  	// wait flush tasks
   306  	if err = task.waitFlushAObjForSnapshot(ctx, snapshotSubtasks); err != nil {
   307  		return
   308  	}
   309  
   310  	/////////////////////
   311  	//// phase seperator
   312  	///////////////////
   313  
   314  	phaseDesc = "1-wait flushing all deletes from naobjs"
   315  	if err = task.waitFlushAllDeletesFromDelSrc(ctx, deleteTask, emptyMap); err != nil {
   316  		return
   317  	}
   318  
   319  	phaseDesc = "1-wait LogTxnEntry"
   320  	txnEntry, err := txnentries.NewFlushTableTailEntry(
   321  		task.txn,
   322  		task.ID(),
   323  		task.transMappings,
   324  		task.rel.GetMeta().(*catalog.TableEntry),
   325  		task.aObjMetas,
   326  		task.delSrcMetas,
   327  		task.aObjHandles,
   328  		task.delSrcHandles,
   329  		task.createdObjHandles,
   330  		task.createdDeletesObjectName,
   331  		task.createdMergedObjectName,
   332  		task.dirtyLen,
   333  		task.rt,
   334  		task.dirtyEndTs,
   335  	)
   336  	if err != nil {
   337  		return err
   338  	}
   339  	if err = task.txn.LogTxnEntry(
   340  		task.dbid,
   341  		task.rel.ID(),
   342  		txnEntry,
   343  		nil,
   344  	); err != nil {
   345  		return
   346  	}
   347  	/////////////////////
   348  
   349  	duration := time.Since(now)
   350  	logutil.Info("[End]", common.OperationField(task.Name()),
   351  		common.AnyField("txn-start-ts", task.txn.GetStartTS().ToString()),
   352  		zap.Int("aobj-deletes", task.aObjDeletesCnt),
   353  		zap.Int("aobj-merge-rows", task.mergeRowsCnt),
   354  		zap.Int("nobj-deletes", task.nObjDeletesCnt),
   355  		common.DurationField(duration),
   356  		common.OperandField(task))
   357  
   358  	v2.TaskFlushTableTailDurationHistogram.Observe(duration.Seconds())
   359  
   360  	sleep, name, exist := fault.TriggerFault("slow_flush")
   361  	if exist && name == task.schema.Name {
   362  		time.Sleep(time.Duration(sleep) * time.Second)
   363  	}
   364  	return
   365  }
   366  
   367  // prepareAObjSortedData read the data from appendable blocks, sort them if sort key exists
   368  func (task *flushTableTailTask) prepareAObjSortedData(
   369  	ctx context.Context, objIdx int, idxs []int, sortKeyPos int,
   370  ) (bat *containers.Batch, empty bool, err error) {
   371  	if len(idxs) <= 0 {
   372  		logutil.Infof("[FlushTabletail] no mergeable columns")
   373  		return nil, true, nil
   374  	}
   375  	obj := task.aObjHandles[objIdx]
   376  
   377  	views, err := obj.GetColumnDataByIds(ctx, 0, idxs, common.MergeAllocator)
   378  	if err != nil {
   379  		return
   380  	}
   381  	bat = containers.NewBatch()
   382  	rowCntBeforeApplyDelete := views.Columns[0].Length()
   383  	deletes := views.DeleteMask
   384  	views.ApplyDeletes()
   385  	defer views.Close()
   386  	for i, colidx := range idxs {
   387  		colview := views.Columns[i]
   388  		if colview == nil {
   389  			empty = true
   390  			return
   391  		}
   392  		vec := colview.Orphan()
   393  		if vec.Length() == 0 {
   394  			empty = true
   395  			vec.Close()
   396  			bat.Close()
   397  			return
   398  		}
   399  		bat.AddVector(task.schema.ColDefs[colidx].Name, vec.TryConvertConst())
   400  	}
   401  
   402  	if deletes != nil {
   403  		task.aObjDeletesCnt += deletes.GetCardinality()
   404  	}
   405  
   406  	var sortMapping []int64
   407  	if sortKeyPos >= 0 {
   408  		if objIdx == 0 {
   409  			logutil.Infof("flushtabletail sort obj on %s", bat.Attrs[sortKeyPos])
   410  		}
   411  		sortMapping, err = mergesort.SortBlockColumns(bat.Vecs, sortKeyPos, task.rt.VectorPool.Transient)
   412  		if err != nil {
   413  			return
   414  		}
   415  	}
   416  	if task.doTransfer {
   417  		mergesort.AddSortPhaseMapping(task.transMappings, objIdx, rowCntBeforeApplyDelete, deletes, sortMapping)
   418  	}
   419  	return
   420  }
   421  
   422  // mergeAObjs merge the data from appendable blocks, and write the merged data to new block,
   423  // recording row mapping in blkTransferBooking struct
   424  func (task *flushTableTailTask) mergeAObjs(ctx context.Context) (err error) {
   425  	if len(task.aObjMetas) == 0 {
   426  		return nil
   427  	}
   428  
   429  	// prepare columns idx and sortKey to read sorted batch
   430  	schema := task.schema
   431  	seqnums := make([]uint16, 0, len(schema.ColDefs))
   432  	readColIdxs := make([]int, 0, len(schema.ColDefs))
   433  	sortKeyIdx := -1
   434  	sortKeyPos := -1
   435  	if schema.HasSortKey() {
   436  		sortKeyIdx = schema.GetSingleSortKeyIdx()
   437  	}
   438  	for i, def := range schema.ColDefs {
   439  		if def.IsPhyAddr() {
   440  			continue
   441  		}
   442  		readColIdxs = append(readColIdxs, def.Idx)
   443  		if def.Idx == sortKeyIdx {
   444  			sortKeyPos = i
   445  		}
   446  		seqnums = append(seqnums, def.SeqNum)
   447  	}
   448  
   449  	// read from aobjects
   450  	readedBats := make([]*containers.Batch, 0, len(task.aObjHandles))
   451  	for _, block := range task.aObjHandles {
   452  		err = block.Prefetch(readColIdxs)
   453  		if err != nil {
   454  			return
   455  		}
   456  	}
   457  	for i := range task.aObjHandles {
   458  		bat, empty, err := task.prepareAObjSortedData(ctx, i, readColIdxs, sortKeyPos)
   459  		if err != nil {
   460  			return err
   461  		}
   462  		if empty {
   463  			continue
   464  		}
   465  		readedBats = append(readedBats, bat)
   466  	}
   467  	defer func() {
   468  		for _, bat := range readedBats {
   469  			bat.Close()
   470  		}
   471  	}()
   472  
   473  	if len(readedBats) == 0 {
   474  		// just soft delete all Objects
   475  		for _, obj := range task.aObjHandles {
   476  			tbl := obj.GetRelation()
   477  			if err = tbl.SoftDeleteObject(obj.GetID()); err != nil {
   478  				return err
   479  			}
   480  		}
   481  		if task.doTransfer {
   482  			mergesort.CleanTransMapping(task.transMappings)
   483  		}
   484  		return nil
   485  	}
   486  
   487  	// prepare merge
   488  	// fromLayout describes the layout of the input batch, which is a list of batch length
   489  	fromLayout := make([]uint32, 0, len(readedBats))
   490  	// toLayout describes the layout of the output batch, i.e. [8192, 8192, 8192, 4242]
   491  	toLayout := make([]uint32, 0, len(readedBats))
   492  	totalRowCnt := 0
   493  	if sortKeyPos < 0 {
   494  		// no pk, just pick the first column to reshape
   495  		sortKeyPos = 0
   496  	}
   497  	for _, bat := range readedBats {
   498  		vec := bat.Vecs[sortKeyPos]
   499  		fromLayout = append(fromLayout, uint32(vec.Length()))
   500  		totalRowCnt += vec.Length()
   501  	}
   502  	task.mergeRowsCnt = totalRowCnt
   503  	rowsLeft := totalRowCnt
   504  	for rowsLeft > 0 {
   505  		if rowsLeft > int(schema.BlockMaxRows) {
   506  			toLayout = append(toLayout, schema.BlockMaxRows)
   507  			rowsLeft -= int(schema.BlockMaxRows)
   508  		} else {
   509  			toLayout = append(toLayout, uint32(rowsLeft))
   510  			break
   511  		}
   512  	}
   513  
   514  	// do first sort
   515  	var writtenBatches []*batch.Batch
   516  	var releaseF func()
   517  	var mapping []uint32
   518  	if schema.HasSortKey() {
   519  		writtenBatches, releaseF, mapping, err = mergesort.MergeAObj(ctx, task, readedBats, sortKeyPos, schema.BlockMaxRows, len(toLayout))
   520  		if err != nil {
   521  			return
   522  		}
   523  	} else {
   524  		cnBatches := make([]*batch.Batch, len(readedBats))
   525  		for i := range readedBats {
   526  			cnBatches[i] = containers.ToCNBatch(readedBats[i])
   527  		}
   528  		writtenBatches, releaseF = mergesort.ReshapeBatches(cnBatches, fromLayout, toLayout, task)
   529  	}
   530  	defer releaseF()
   531  	if task.doTransfer {
   532  		mergesort.UpdateMappingAfterMerge(task.transMappings, mapping, toLayout)
   533  	}
   534  
   535  	// write!
   536  	// create new object to hold merged blocks
   537  	if task.createdObjHandles, err = task.rel.CreateNonAppendableObject(false, nil); err != nil {
   538  		return
   539  	}
   540  	toObjectEntry := task.createdObjHandles.GetMeta().(*catalog.ObjectEntry)
   541  	toObjectEntry.SetSorted()
   542  	name := objectio.BuildObjectNameWithObjectID(&toObjectEntry.ID)
   543  	writer, err := blockio.NewBlockWriterNew(task.rt.Fs.Service, name, schema.Version, seqnums)
   544  	if err != nil {
   545  		return err
   546  	}
   547  	if schema.HasPK() {
   548  		pkIdx := schema.GetSingleSortKeyIdx()
   549  		writer.SetPrimaryKey(uint16(pkIdx))
   550  	} else if schema.HasSortKey() {
   551  		writer.SetSortKey(uint16(schema.GetSingleSortKeyIdx()))
   552  	}
   553  	for _, bat := range writtenBatches {
   554  		_, err = writer.WriteBatch(bat)
   555  		if err != nil {
   556  			return err
   557  		}
   558  	}
   559  	_, _, err = writer.Sync(ctx)
   560  	if err != nil {
   561  		return err
   562  	}
   563  	task.createdMergedObjectName = name.String()
   564  
   565  	// update new status for created blocks
   566  	err = task.createdObjHandles.UpdateStats(writer.Stats())
   567  	if err != nil {
   568  		return
   569  	}
   570  	err = task.createdObjHandles.GetMeta().(*catalog.ObjectEntry).GetObjectData().Init()
   571  	if err != nil {
   572  		return
   573  	}
   574  
   575  	// soft delete all aobjs
   576  	for _, obj := range task.aObjHandles {
   577  		tbl := obj.GetRelation()
   578  		if err = tbl.SoftDeleteObject(obj.GetID()); err != nil {
   579  			return err
   580  		}
   581  	}
   582  
   583  	return nil
   584  }
   585  
   586  // flushAObjsForSnapshot schedule io task to flush aobjects for snapshot read. this function will not release any data in io task
   587  func (task *flushTableTailTask) flushAObjsForSnapshot(ctx context.Context) (subtasks []*flushObjTask, err error) {
   588  	defer func() {
   589  		if err != nil {
   590  			releaseFlushObjTasks(subtasks, err)
   591  		}
   592  	}()
   593  	subtasks = make([]*flushObjTask, len(task.aObjMetas))
   594  	// fire flush task
   595  	for i, obj := range task.aObjMetas {
   596  		var data, deletes *containers.Batch
   597  		var dataVer *containers.BatchWithVersion
   598  		objData := obj.GetObjectData()
   599  		if dataVer, err = objData.CollectAppendInRange(
   600  			types.TS{}, task.txn.GetStartTS(), true, common.MergeAllocator,
   601  		); err != nil {
   602  			return
   603  		}
   604  		data = dataVer.Batch
   605  		if data == nil || data.Length() == 0 {
   606  			// the new appendable block might has no data when we flush the table, just skip it
   607  			// In previous impl, runner will only pass non-empty obj to NewCompactBlackTask
   608  			continue
   609  		}
   610  		// do not close data, leave that to wait phase
   611  		if deletes, _, err = objData.CollectDeleteInRange(
   612  			ctx, types.TS{}, task.txn.GetStartTS(), true, common.MergeAllocator,
   613  		); err != nil {
   614  			return
   615  		}
   616  		if deletes != nil {
   617  			// make sure every batch in deltaloc object is sorted by rowid
   618  			_, err := mergesort.SortBlockColumns(deletes.Vecs, 0, task.rt.VectorPool.Transient)
   619  			if err != nil {
   620  				return nil, err
   621  			}
   622  		}
   623  
   624  		aobjectTask := NewFlushObjTask(
   625  			tasks.WaitableCtx,
   626  			dataVer.Version,
   627  			dataVer.Seqnums,
   628  			objData.GetFs(),
   629  			obj,
   630  			data,
   631  			deletes,
   632  			true,
   633  		)
   634  		if err = task.rt.Scheduler.Schedule(aobjectTask); err != nil {
   635  			return
   636  		}
   637  		subtasks[i] = aobjectTask
   638  	}
   639  	return
   640  }
   641  
   642  // waitFlushAObjForSnapshot waits all io tasks about flushing aobject for snapshot read, update locations
   643  func (task *flushTableTailTask) waitFlushAObjForSnapshot(ctx context.Context, subtasks []*flushObjTask) (err error) {
   644  	ictx, cancel := context.WithTimeout(ctx, 6*time.Minute)
   645  	defer cancel()
   646  	for i, subtask := range subtasks {
   647  		if subtask == nil {
   648  			continue
   649  		}
   650  		if err = subtask.WaitDone(ictx); err != nil {
   651  			return
   652  		}
   653  		if err = task.aObjHandles[i].UpdateStats(subtask.stat); err != nil {
   654  			return
   655  		}
   656  		if subtask.delta == nil {
   657  			continue
   658  		}
   659  		deltaLoc := blockio.EncodeLocation(
   660  			subtask.name,
   661  			subtask.blocks[1].GetExtent(),
   662  			uint32(subtask.delta.Length()),
   663  			subtask.blocks[1].GetID())
   664  
   665  		if err = task.aObjHandles[i].UpdateDeltaLoc(0, deltaLoc); err != nil {
   666  			return err
   667  		}
   668  	}
   669  	return nil
   670  }
   671  
   672  // flushAllDeletesFromDelSrc collects all deletes from objs and flush them into one obj
   673  func (task *flushTableTailTask) flushAllDeletesFromDelSrc(ctx context.Context) (subtask *flushDeletesTask, emtpyDelObjIdx []*bitmap.Bitmap, err error) {
   674  	var bufferBatch *containers.Batch
   675  	defer func() {
   676  		if err != nil && bufferBatch != nil {
   677  			bufferBatch.Close()
   678  		}
   679  	}()
   680  	emtpyDelObjIdx = make([]*bitmap.Bitmap, len(task.delSrcMetas))
   681  	for i, obj := range task.delSrcMetas {
   682  		objData := obj.GetObjectData()
   683  		var deletes *containers.Batch
   684  		emptyDelObjs := &bitmap.Bitmap{}
   685  		emptyDelObjs.InitWithSize(int64(obj.BlockCnt()))
   686  		for j := 0; j < obj.BlockCnt(); j++ {
   687  			found, _ := objData.HasDeleteIntentsPreparedInByBlock(uint16(j), types.TS{}, task.txn.GetStartTS())
   688  			if !found {
   689  				emptyDelObjs.Add(uint64(j))
   690  				continue
   691  			}
   692  			if deletes, err = objData.CollectDeleteInRangeByBlock(
   693  				ctx, uint16(j), types.TS{}, task.txn.GetStartTS(), true, common.MergeAllocator,
   694  			); err != nil {
   695  				return
   696  			}
   697  			if deletes == nil || deletes.Length() == 0 {
   698  				emptyDelObjs.Add(uint64(j))
   699  				continue
   700  			}
   701  			if bufferBatch == nil {
   702  				bufferBatch = makeDeletesTempBatch(deletes, task.rt.VectorPool.Transient)
   703  			}
   704  			task.nObjDeletesCnt += deletes.Length()
   705  			// deletes is closed by Extend
   706  			bufferBatch.Extend(deletes)
   707  		}
   708  		emtpyDelObjIdx[i] = emptyDelObjs
   709  	}
   710  	if bufferBatch != nil {
   711  		// make sure every batch in deltaloc object is sorted by rowid
   712  		_, err = mergesort.SortBlockColumns(bufferBatch.Vecs, 0, task.rt.VectorPool.Transient)
   713  		if err != nil {
   714  			return
   715  		}
   716  		subtask = NewFlushDeletesTask(tasks.WaitableCtx, task.rt.Fs, bufferBatch)
   717  		if err = task.rt.Scheduler.Schedule(subtask); err != nil {
   718  			return
   719  		}
   720  	}
   721  	return
   722  }
   723  
   724  // waitFlushAllDeletesFromDelSrc waits all io tasks about flushing deletes from objs, update locations but skip those in emtpyDelObjIdx
   725  func (task *flushTableTailTask) waitFlushAllDeletesFromDelSrc(ctx context.Context, subtask *flushDeletesTask, emtpyDelObjIdx []*bitmap.Bitmap) (err error) {
   726  	if subtask == nil {
   727  		return
   728  	}
   729  	ictx, cancel := context.WithTimeout(ctx, 6*time.Minute)
   730  	defer cancel()
   731  	if err = subtask.WaitDone(ictx); err != nil {
   732  		return err
   733  	}
   734  	task.createdDeletesObjectName = subtask.name.String()
   735  	deltaLoc := blockio.EncodeLocation(
   736  		subtask.name,
   737  		subtask.blocks[0].GetExtent(),
   738  		uint32(subtask.delta.Length()),
   739  		subtask.blocks[0].GetID())
   740  
   741  	v2.TaskFlushDeletesCountHistogram.Observe(float64(task.nObjDeletesCnt))
   742  	v2.TaskFlushDeletesSizeHistogram.Observe(float64(deltaLoc.Extent().End()))
   743  	logutil.Infof("[FlushTabletail] task %d update %s for approximate %d objs", task.ID(), deltaLoc, len(task.delSrcHandles))
   744  	for i, hdl := range task.delSrcHandles {
   745  		for j := 0; j < hdl.GetMeta().(*catalog.ObjectEntry).BlockCnt(); j++ {
   746  			if emtpyDelObjIdx[i] != nil && emtpyDelObjIdx[i].Contains(uint64(j)) {
   747  				continue
   748  			}
   749  			if err = hdl.UpdateDeltaLoc(uint16(j), deltaLoc); err != nil {
   750  				return err
   751  			}
   752  
   753  		}
   754  	}
   755  	return
   756  }
   757  
   758  func makeDeletesTempBatch(template *containers.Batch, pool *containers.VectorPool) *containers.Batch {
   759  	bat := containers.NewBatchWithCapacity(len(template.Attrs))
   760  	for i, name := range template.Attrs {
   761  		bat.AddVector(name, pool.GetVector(template.Vecs[i].GetType()))
   762  	}
   763  	return bat
   764  }
   765  
   766  func relaseFlushDelTask(task *flushDeletesTask, err error) {
   767  	if err != nil && task != nil {
   768  		logutil.Infof("[FlushTabletail] release flush del task bat because of err %v", err)
   769  		ictx, cancel := context.WithTimeout(
   770  			context.Background(),
   771  			10*time.Second, /*6*time.Minute,*/
   772  		)
   773  		defer cancel()
   774  		task.WaitDone(ictx)
   775  	}
   776  	if task != nil && task.delta != nil {
   777  		task.delta.Close()
   778  	}
   779  }
   780  
   781  func releaseFlushObjTasks(subtasks []*flushObjTask, err error) {
   782  	if err != nil {
   783  		logutil.Infof("[FlushTabletail] release flush aobj bat because of err %v", err)
   784  		// add a timeout to avoid WaitDone block the whole process
   785  		ictx, cancel := context.WithTimeout(
   786  			context.Background(),
   787  			10*time.Second, /*6*time.Minute,*/
   788  		)
   789  		defer cancel()
   790  		for _, subtask := range subtasks {
   791  			if subtask != nil {
   792  				// wait done, otherwise the data might be released before flush, and cause data race
   793  				subtask.WaitDone(ictx)
   794  			}
   795  		}
   796  	}
   797  	for _, subtask := range subtasks {
   798  		if subtask != nil && subtask.data != nil {
   799  			subtask.data.Close()
   800  		}
   801  		if subtask != nil && subtask.delta != nil {
   802  			subtask.delta.Close()
   803  		}
   804  	}
   805  }
   806  
   807  // For unit test
   808  func (task *flushTableTailTask) GetCreatedObjects() handle.Object {
   809  	return task.createdObjHandles
   810  }