github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/tables/jobs/mergeobjects.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package jobs
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"strings"
    21  
    22  	pkgcatalog "github.com/matrixorigin/matrixone/pkg/catalog"
    23  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    24  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    25  	"github.com/matrixorigin/matrixone/pkg/container/nulls"
    26  	"github.com/matrixorigin/matrixone/pkg/container/types"
    27  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    28  	"github.com/matrixorigin/matrixone/pkg/logutil"
    29  	"github.com/matrixorigin/matrixone/pkg/objectio"
    30  	"github.com/matrixorigin/matrixone/pkg/pb/api"
    31  	"github.com/matrixorigin/matrixone/pkg/perfcounter"
    32  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/blockio"
    33  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog"
    34  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common"
    35  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers"
    36  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/db/dbutils"
    37  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/handle"
    38  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/txnif"
    39  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/mergesort"
    40  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/tables/txnentries"
    41  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/tasks"
    42  )
    43  
    44  type mergeObjectsTask struct {
    45  	*tasks.BaseTask
    46  	txn               txnif.AsyncTxn
    47  	rt                *dbutils.Runtime
    48  	mergedObjs        []*catalog.ObjectEntry
    49  	mergedObjsHandle  []handle.Object
    50  	mergedBlkCnt      []int
    51  	totalMergedBlkCnt int
    52  	createdBObjs      []*catalog.ObjectEntry
    53  	commitEntry       *api.MergeCommitEntry
    54  	rel               handle.Relation
    55  	did, tid          uint64
    56  
    57  	doTransfer bool
    58  
    59  	blkCnt     []int
    60  	nMergedBlk []int
    61  	schema     *catalog.Schema
    62  	idxs       []int
    63  	attrs      []string
    64  
    65  	targetObjSize uint32
    66  }
    67  
    68  func NewMergeObjectsTask(
    69  	ctx *tasks.Context,
    70  	txn txnif.AsyncTxn,
    71  	mergedObjs []*catalog.ObjectEntry,
    72  	rt *dbutils.Runtime,
    73  	targetObjSize uint32) (task *mergeObjectsTask, err error) {
    74  	if len(mergedObjs) == 0 {
    75  		panic("empty mergedObjs")
    76  	}
    77  	task = &mergeObjectsTask{
    78  		txn:          txn,
    79  		rt:           rt,
    80  		mergedObjs:   mergedObjs,
    81  		createdBObjs: make([]*catalog.ObjectEntry, 0),
    82  		mergedBlkCnt: make([]int, len(mergedObjs)),
    83  		nMergedBlk:   make([]int, len(mergedObjs)),
    84  		blkCnt:       make([]int, len(mergedObjs)),
    85  
    86  		targetObjSize: targetObjSize,
    87  	}
    88  	for i, obj := range mergedObjs {
    89  		task.mergedBlkCnt[i] = task.totalMergedBlkCnt
    90  		task.blkCnt[i] = obj.BlockCnt()
    91  		task.totalMergedBlkCnt += task.blkCnt[i]
    92  	}
    93  
    94  	task.did = mergedObjs[0].GetTable().GetDB().ID
    95  	database, err := txn.GetDatabaseByID(task.did)
    96  	if err != nil {
    97  		return
    98  	}
    99  	task.tid = mergedObjs[0].GetTable().ID
   100  	task.rel, err = database.GetRelationByID(task.tid)
   101  	if err != nil {
   102  		return
   103  	}
   104  	for _, meta := range mergedObjs {
   105  		obj, err := task.rel.GetObject(&meta.ID)
   106  		if err != nil {
   107  			return nil, err
   108  		}
   109  		task.mergedObjsHandle = append(task.mergedObjsHandle, obj)
   110  	}
   111  	task.schema = task.rel.Schema().(*catalog.Schema)
   112  	task.doTransfer = !strings.Contains(task.schema.Comment, pkgcatalog.MO_COMMENT_NO_DEL_HINT)
   113  	task.idxs = make([]int, 0, len(task.schema.ColDefs)-1)
   114  	task.attrs = make([]string, 0, len(task.schema.ColDefs)-1)
   115  	for _, def := range task.schema.ColDefs {
   116  		if def.IsPhyAddr() {
   117  			continue
   118  		}
   119  		task.idxs = append(task.idxs, def.Idx)
   120  		task.attrs = append(task.attrs, def.Name)
   121  	}
   122  	task.BaseTask = tasks.NewBaseTask(task, tasks.DataCompactionTask, ctx)
   123  	return
   124  }
   125  
   126  func (task *mergeObjectsTask) GetObjectCnt() int {
   127  	return len(task.mergedObjs)
   128  }
   129  
   130  func (task *mergeObjectsTask) GetBlkCnts() []int {
   131  	return task.blkCnt
   132  }
   133  
   134  func (task *mergeObjectsTask) GetAccBlkCnts() []int {
   135  	return task.mergedBlkCnt
   136  }
   137  
   138  func (task *mergeObjectsTask) GetBlockMaxRows() uint32 {
   139  	return task.schema.BlockMaxRows
   140  }
   141  
   142  func (task *mergeObjectsTask) GetObjectMaxBlocks() uint16 {
   143  	return task.schema.ObjectMaxBlocks
   144  }
   145  
   146  func (task *mergeObjectsTask) GetTargetObjSize() uint32 {
   147  	return task.targetObjSize
   148  }
   149  
   150  func (task *mergeObjectsTask) GetSortKeyPos() int {
   151  	sortKeyPos := -1
   152  	if task.schema.HasSortKey() {
   153  		sortKeyPos = task.schema.GetSingleSortKeyIdx()
   154  	}
   155  	return sortKeyPos
   156  }
   157  
   158  func (task *mergeObjectsTask) GetSortKeyType() types.Type {
   159  	if task.schema.HasSortKey() {
   160  		return task.schema.GetSingleSortKeyType()
   161  	}
   162  	return types.Type{}
   163  }
   164  
   165  // impl DisposableVecPool
   166  func (task *mergeObjectsTask) GetVector(typ *types.Type) (*vector.Vector, func()) {
   167  	v := task.rt.VectorPool.Transient.GetVector(typ)
   168  	return v.GetDownstreamVector(), v.Close
   169  }
   170  
   171  func (task *mergeObjectsTask) GetMPool() *mpool.MPool {
   172  	return task.rt.VectorPool.Transient.GetMPool()
   173  }
   174  
   175  func (task *mergeObjectsTask) HostHintName() string { return "DN" }
   176  
   177  func (task *mergeObjectsTask) PrepareData(ctx context.Context) ([]*batch.Batch, []*nulls.Nulls, func(), error) {
   178  	var err error
   179  	views := make([]*containers.BlockView, task.totalMergedBlkCnt)
   180  	releaseF := func() {
   181  		for _, view := range views {
   182  			if view != nil {
   183  				view.Close()
   184  			}
   185  		}
   186  	}
   187  	defer func() {
   188  		if err != nil {
   189  			releaseF()
   190  		}
   191  	}()
   192  	schema := task.rel.Schema().(*catalog.Schema)
   193  	idxs := make([]int, 0, len(schema.ColDefs)-1)
   194  	attrs := make([]string, 0, len(schema.ColDefs)-1)
   195  	for _, def := range schema.ColDefs {
   196  		if def.IsPhyAddr() {
   197  			continue
   198  		}
   199  		idxs = append(idxs, def.Idx)
   200  		attrs = append(attrs, def.Name)
   201  	}
   202  	for i, obj := range task.mergedObjsHandle {
   203  
   204  		maxBlockOffset := task.totalMergedBlkCnt
   205  		if i != len(task.mergedObjs)-1 {
   206  			maxBlockOffset = task.mergedBlkCnt[i+1]
   207  		}
   208  		minBlockOffset := task.mergedBlkCnt[i]
   209  
   210  		for j := 0; j < maxBlockOffset-minBlockOffset; j++ {
   211  			if views[minBlockOffset+j], err = obj.GetColumnDataByIds(ctx, uint16(j), idxs, common.MergeAllocator); err != nil {
   212  				return nil, nil, nil, err
   213  			}
   214  		}
   215  	}
   216  
   217  	batches := make([]*batch.Batch, 0, task.totalMergedBlkCnt)
   218  	dels := make([]*nulls.Nulls, 0, task.totalMergedBlkCnt)
   219  	for _, view := range views {
   220  		batch := batch.New(true, attrs)
   221  		if len(attrs) != len(view.Columns) {
   222  			panic(fmt.Sprintf("mismatch %v, %v, %v", attrs, len(attrs), len(view.Columns)))
   223  		}
   224  		for i, col := range view.Columns {
   225  			batch.Vecs[i] = col.GetData().GetDownstreamVector()
   226  		}
   227  		batch.SetRowCount(view.Columns[0].Length())
   228  		batches = append(batches, batch)
   229  		dels = append(dels, view.DeleteMask)
   230  	}
   231  
   232  	return batches, dels, releaseF, nil
   233  }
   234  
   235  func (task *mergeObjectsTask) LoadNextBatch(ctx context.Context, objIdx uint32) (*batch.Batch, *nulls.Nulls, func(), error) {
   236  	if objIdx >= uint32(len(task.mergedObjs)) {
   237  		panic("invalid objIdx")
   238  	}
   239  	if task.nMergedBlk[objIdx] >= task.blkCnt[objIdx] {
   240  		return nil, nil, nil, mergesort.ErrNoMoreBlocks
   241  	}
   242  	var err error
   243  	var view *containers.BlockView
   244  	releaseF := func() {
   245  		if view != nil {
   246  			view.Close()
   247  		}
   248  	}
   249  	defer func() {
   250  		if err != nil {
   251  			releaseF()
   252  		}
   253  	}()
   254  
   255  	obj := task.mergedObjsHandle[objIdx]
   256  	view, err = obj.GetColumnDataByIds(ctx, uint16(task.nMergedBlk[objIdx]), task.idxs, common.MergeAllocator)
   257  	if err != nil {
   258  		return nil, nil, nil, err
   259  	}
   260  	if len(task.attrs) != len(view.Columns) {
   261  		panic(fmt.Sprintf("mismatch %v, %v, %v", task.attrs, len(task.attrs), len(view.Columns)))
   262  	}
   263  	task.nMergedBlk[objIdx]++
   264  
   265  	bat := batch.New(true, task.attrs)
   266  	for i, col := range view.Columns {
   267  		bat.Vecs[i] = col.GetData().GetDownstreamVector()
   268  	}
   269  	bat.SetRowCount(view.Columns[0].Length())
   270  	return bat, view.DeleteMask, releaseF, nil
   271  }
   272  
   273  func (task *mergeObjectsTask) GetCommitEntry() *api.MergeCommitEntry {
   274  	if task.commitEntry == nil {
   275  		return task.prepareCommitEntry()
   276  	}
   277  	return task.commitEntry
   278  }
   279  
   280  func (task *mergeObjectsTask) prepareCommitEntry() *api.MergeCommitEntry {
   281  	schema := task.rel.Schema().(*catalog.Schema)
   282  	commitEntry := &api.MergeCommitEntry{}
   283  	commitEntry.DbId = task.did
   284  	commitEntry.TblId = task.tid
   285  	commitEntry.TableName = schema.Name
   286  	commitEntry.StartTs = task.txn.GetStartTS().ToTimestamp()
   287  	for _, o := range task.mergedObjs {
   288  		obj := o.GetObjectStats()
   289  		commitEntry.MergedObjs = append(commitEntry.MergedObjs, obj.Clone().Marshal())
   290  	}
   291  	task.commitEntry = commitEntry
   292  	// leave mapping to ReadMergeAndWrite
   293  	return commitEntry
   294  }
   295  
   296  func (task *mergeObjectsTask) PrepareNewWriter() *blockio.BlockWriter {
   297  	schema := task.rel.Schema().(*catalog.Schema)
   298  	seqnums := make([]uint16, 0, len(schema.ColDefs)-1)
   299  	for _, def := range schema.ColDefs {
   300  		if def.IsPhyAddr() {
   301  			continue
   302  		}
   303  		seqnums = append(seqnums, def.SeqNum)
   304  	}
   305  	sortkeyIsPK := false
   306  	sortkeyPos := -1
   307  
   308  	if schema.HasPK() {
   309  		sortkeyPos = schema.GetSingleSortKeyIdx()
   310  		sortkeyIsPK = true
   311  	} else if schema.HasSortKey() {
   312  		sortkeyPos = schema.GetSingleSortKeyIdx()
   313  	}
   314  
   315  	return mergesort.GetNewWriter(task.rt.Fs.Service, schema.Version, seqnums, sortkeyPos, sortkeyIsPK)
   316  }
   317  
   318  func (task *mergeObjectsTask) DoTransfer() bool {
   319  	return task.doTransfer
   320  }
   321  
   322  func (task *mergeObjectsTask) Execute(ctx context.Context) (err error) {
   323  	phaseDesc := ""
   324  	defer func() {
   325  		if err != nil {
   326  			logutil.Error("[DoneWithErr] Mergeblocks", common.OperationField(task.Name()),
   327  				common.AnyField("error", err),
   328  				common.AnyField("phase", phaseDesc),
   329  			)
   330  		}
   331  	}()
   332  
   333  	schema := task.rel.Schema().(*catalog.Schema)
   334  	sortkeyPos := -1
   335  	if schema.HasSortKey() {
   336  		sortkeyPos = schema.GetSingleSortKeyIdx()
   337  	}
   338  	phaseDesc = "1-DoMergeAndWrite"
   339  	if err = mergesort.DoMergeAndWrite(ctx, sortkeyPos, int(schema.BlockMaxRows), task); err != nil {
   340  		return err
   341  	}
   342  
   343  	phaseDesc = "2-HandleMergeEntryInTxn"
   344  	if task.createdBObjs, err = HandleMergeEntryInTxn(task.txn, task.commitEntry, task.rt); err != nil {
   345  		return err
   346  	}
   347  
   348  	perfcounter.Update(ctx, func(counter *perfcounter.CounterSet) {
   349  		counter.TAE.Object.MergeBlocks.Add(1)
   350  	})
   351  	return nil
   352  }
   353  
   354  func HandleMergeEntryInTxn(txn txnif.AsyncTxn, entry *api.MergeCommitEntry, rt *dbutils.Runtime) ([]*catalog.ObjectEntry, error) {
   355  	database, err := txn.GetDatabaseByID(entry.DbId)
   356  	if err != nil {
   357  		return nil, err
   358  	}
   359  	rel, err := database.GetRelationByID(entry.TblId)
   360  	if err != nil {
   361  		return nil, err
   362  	}
   363  
   364  	mergedObjs := make([]*catalog.ObjectEntry, 0, len(entry.MergedObjs))
   365  	createdObjs := make([]*catalog.ObjectEntry, 0, len(entry.CreatedObjs))
   366  	ids := make([]*common.ID, 0, len(entry.MergedObjs)*2)
   367  
   368  	// drop merged blocks and objects
   369  	for _, item := range entry.MergedObjs {
   370  		drop := objectio.ObjectStats(item)
   371  		objID := drop.ObjectName().ObjectId()
   372  		obj, err := rel.GetObject(objID)
   373  		if err != nil {
   374  			return nil, err
   375  		}
   376  		mergedObjs = append(mergedObjs, obj.GetMeta().(*catalog.ObjectEntry))
   377  		if err = rel.SoftDeleteObject(objID); err != nil {
   378  			return nil, err
   379  		}
   380  	}
   381  
   382  	// construct new object,
   383  	for _, stats := range entry.CreatedObjs {
   384  		stats := objectio.ObjectStats(stats)
   385  		objID := stats.ObjectName().ObjectId()
   386  		obj, err := rel.CreateNonAppendableObject(false, new(objectio.CreateObjOpt).WithId(objID))
   387  		if err != nil {
   388  			return nil, err
   389  		}
   390  		createdObjs = append(createdObjs, obj.GetMeta().(*catalog.ObjectEntry))
   391  		// set stats and sorted property
   392  		if err = obj.UpdateStats(stats); err != nil {
   393  			return nil, err
   394  		}
   395  		objEntry := obj.GetMeta().(*catalog.ObjectEntry)
   396  		objEntry.SetSorted()
   397  	}
   398  
   399  	txnEntry, err := txnentries.NewMergeObjectsEntry(
   400  		txn,
   401  		rel,
   402  		mergedObjs,
   403  		createdObjs,
   404  		entry.Booking,
   405  		rt,
   406  	)
   407  	if err != nil {
   408  		return nil, err
   409  	}
   410  
   411  	if err = txn.LogTxnEntry(entry.DbId, entry.TblId, txnEntry, ids); err != nil {
   412  		return nil, err
   413  	}
   414  
   415  	return createdObjs, nil
   416  }
   417  
   418  func (task *mergeObjectsTask) GetTotalSize() uint32 {
   419  	totalSize := uint32(0)
   420  	for _, obj := range task.mergedObjs {
   421  		totalSize += uint32(obj.GetOriginSize())
   422  	}
   423  	return totalSize
   424  }
   425  
   426  func (task *mergeObjectsTask) GetTotalRowCnt() uint32 {
   427  	totalRowCnt := 0
   428  	for _, obj := range task.mergedObjs {
   429  		totalRowCnt += obj.GetRows()
   430  	}
   431  	return uint32(totalRowCnt)
   432  }
   433  
   434  // for UT
   435  func (task *mergeObjectsTask) GetCreatedObjects() []*catalog.ObjectEntry {
   436  	return task.createdBObjs
   437  }