github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/mergesort/task.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //	http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mergesort
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"time"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    23  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    24  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    25  	"github.com/matrixorigin/matrixone/pkg/container/nulls"
    26  	"github.com/matrixorigin/matrixone/pkg/container/types"
    27  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    28  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    29  	"github.com/matrixorigin/matrixone/pkg/logutil"
    30  	"github.com/matrixorigin/matrixone/pkg/objectio"
    31  	"github.com/matrixorigin/matrixone/pkg/pb/api"
    32  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/blockio"
    33  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common"
    34  	"go.uber.org/zap"
    35  )
    36  
    37  var ErrNoMoreBlocks = moerr.NewInternalErrorNoCtx("no more blocks")
    38  
    39  // DisposableVecPool bridge the gap between the vector pools in cn and tn
    40  type DisposableVecPool interface {
    41  	GetVector(*types.Type) (ret *vector.Vector, release func())
    42  	GetMPool() *mpool.MPool
    43  }
    44  
    45  type MergeTaskHost interface {
    46  	DisposableVecPool
    47  	HostHintName() string
    48  	PrepareData(context.Context) ([]*batch.Batch, []*nulls.Nulls, func(), error)
    49  	GetCommitEntry() *api.MergeCommitEntry
    50  	PrepareNewWriter() *blockio.BlockWriter
    51  	DoTransfer() bool
    52  	GetObjectCnt() int
    53  	GetBlkCnts() []int
    54  	GetAccBlkCnts() []int
    55  	GetSortKeyType() types.Type
    56  	LoadNextBatch(ctx context.Context, objIdx uint32) (*batch.Batch, *nulls.Nulls, func(), error)
    57  	GetTotalSize() uint32
    58  	GetTotalRowCnt() uint32
    59  	GetBlockMaxRows() uint32
    60  	GetObjectMaxBlocks() uint16
    61  	GetTargetObjSize() uint32
    62  }
    63  
    64  func initTransferMapping(e *api.MergeCommitEntry, blkcnt int) {
    65  	e.Booking = NewBlkTransferBooking(blkcnt)
    66  }
    67  
    68  func getSimilarBatch(bat *batch.Batch, capacity int, vpool DisposableVecPool) (*batch.Batch, func()) {
    69  	newBat := batch.NewWithSize(len(bat.Vecs))
    70  	newBat.Attrs = bat.Attrs
    71  	rfs := make([]func(), len(bat.Vecs))
    72  	releaseF := func() {
    73  		for _, f := range rfs {
    74  			f()
    75  		}
    76  	}
    77  	for i := range bat.Vecs {
    78  		vec, release := vpool.GetVector(bat.Vecs[i].GetType())
    79  		if capacity > 0 {
    80  			vec.PreExtend(capacity, vpool.GetMPool())
    81  		}
    82  		newBat.Vecs[i] = vec
    83  		rfs[i] = release
    84  	}
    85  	return newBat, releaseF
    86  }
    87  
    88  func GetNewWriter(
    89  	fs fileservice.FileService,
    90  	ver uint32, seqnums []uint16,
    91  	sortkeyPos int, sortkeyIsPK bool,
    92  ) *blockio.BlockWriter {
    93  	name := objectio.BuildObjectNameWithObjectID(objectio.NewObjectid())
    94  	writer, err := blockio.NewBlockWriterNew(fs, name, ver, seqnums)
    95  	if err != nil {
    96  		panic(err) // it is impossible
    97  	}
    98  	// has sortkey
    99  	if sortkeyPos >= 0 {
   100  		if sortkeyIsPK {
   101  			writer.SetPrimaryKey(uint16(sortkeyPos))
   102  		} else { // cluster by
   103  			writer.SetSortKey(uint16(sortkeyPos))
   104  		}
   105  	}
   106  	return writer
   107  }
   108  
   109  func DoMergeAndWrite(
   110  	ctx context.Context,
   111  	sortkeyPos int,
   112  	blkMaxRow int,
   113  	mergehost MergeTaskHost,
   114  ) (err error) {
   115  	now := time.Now()
   116  	/*out args, keep the transfer infomation*/
   117  	commitEntry := mergehost.GetCommitEntry()
   118  	fromObjsDesc := ""
   119  	for _, o := range commitEntry.MergedObjs {
   120  		obj := objectio.ObjectStats(o)
   121  		fromObjsDesc = fmt.Sprintf("%s%s,", fromObjsDesc, common.ShortObjId(*obj.ObjectName().ObjectId()))
   122  	}
   123  	tableDesc := fmt.Sprintf("%v-%v", commitEntry.TblId, commitEntry.TableName)
   124  	logutil.Info("[Start] Mergeblocks",
   125  		zap.String("table", tableDesc),
   126  		zap.String("on", mergehost.HostHintName()),
   127  		zap.String("txn-start-ts", commitEntry.StartTs.DebugString()),
   128  		zap.String("from-objs", fromObjsDesc),
   129  	)
   130  	phaseDesc := "prepare data"
   131  	defer func() {
   132  		if err != nil {
   133  			logutil.Error("[DoneWithErr] Mergeblocks",
   134  				zap.String("table", tableDesc),
   135  				zap.Error(err),
   136  				zap.String("phase", phaseDesc),
   137  			)
   138  		}
   139  	}()
   140  
   141  	hasSortKey := sortkeyPos >= 0
   142  	if !hasSortKey {
   143  		sortkeyPos = 0 // no sort key, use the first column to do reshape
   144  	}
   145  
   146  	if hasSortKey {
   147  		if err := mergeObjs(ctx, mergehost, sortkeyPos); err != nil {
   148  			return err
   149  		}
   150  
   151  		toObjsDesc := ""
   152  		for _, o := range commitEntry.CreatedObjs {
   153  			obj := objectio.ObjectStats(o)
   154  			toObjsDesc += fmt.Sprintf("%s(%v)Rows(%v),",
   155  				common.ShortObjId(*obj.ObjectName().ObjectId()),
   156  				obj.BlkCnt(),
   157  				obj.Rows())
   158  		}
   159  
   160  		logutil.Info("[Done] Mergeblocks",
   161  			zap.String("table", tableDesc),
   162  			zap.String("on", mergehost.HostHintName()),
   163  			zap.String("txn-start-ts", commitEntry.StartTs.DebugString()),
   164  			zap.String("to-objs", toObjsDesc),
   165  			common.DurationField(time.Since(now)))
   166  		return
   167  	}
   168  
   169  	// batches is read from disk, dels is read from disk and memory
   170  	//
   171  	// batches[i] means the i-th non-appendable block to be merged and
   172  	// it has no rowid
   173  	batches, dels, release, err := mergehost.PrepareData(ctx)
   174  	if err != nil {
   175  		return err
   176  	}
   177  	defer release()
   178  
   179  	if mergehost.DoTransfer() {
   180  		initTransferMapping(commitEntry, len(batches))
   181  	}
   182  
   183  	fromLayout := make([]uint32, len(batches))
   184  	totalRowCount := 0
   185  
   186  	mpool := mergehost.GetMPool()
   187  	// iter all block to get basic info, do shrink if needed
   188  	for i := range batches {
   189  		rowCntBeforeApplyDelete := batches[i].RowCount()
   190  		del := dels[i]
   191  		if del != nil && del.Count() > 0 {
   192  			// dup vector before apply delete. old b will be freed in releaseF
   193  			newb, err := batches[i].Dup(mpool)
   194  			if err != nil {
   195  				return err
   196  			}
   197  			defer newb.Clean(mpool) // whoever create new vector, should clean it
   198  			batches[i] = newb
   199  			batches[i].Shrink(del.ToI64Arrary(), true)
   200  			// skip empty batch
   201  			if batches[i].RowCount() == 0 {
   202  				continue
   203  			}
   204  		}
   205  		if mergehost.DoTransfer() {
   206  			AddSortPhaseMapping(commitEntry.Booking, i, rowCntBeforeApplyDelete, del, nil)
   207  		}
   208  		fromLayout[i] = uint32(batches[i].RowCount())
   209  		totalRowCount += batches[i].RowCount()
   210  	}
   211  
   212  	if totalRowCount == 0 {
   213  		logutil.Info("[Done] Mergeblocks due to all deleted",
   214  			zap.String("table", tableDesc),
   215  			zap.String("txn-start-ts", commitEntry.StartTs.DebugString()))
   216  		if mergehost.DoTransfer() {
   217  			CleanTransMapping(commitEntry.Booking)
   218  		}
   219  		return
   220  	}
   221  
   222  	// -------------------------- phase 1
   223  	phaseDesc = "reshape, one column"
   224  	toLayout := arrangeToLayout(totalRowCount, blkMaxRow)
   225  
   226  	retBatches, releaseF := ReshapeBatches(batches, fromLayout, toLayout, mergehost)
   227  	defer releaseF()
   228  	if mergehost.DoTransfer() {
   229  		UpdateMappingAfterMerge(commitEntry.Booking, nil, toLayout)
   230  	}
   231  
   232  	// -------------------------- phase 2
   233  	phaseDesc = "new writer to write down"
   234  	writer := mergehost.PrepareNewWriter()
   235  	for _, bat := range retBatches {
   236  		_, err = writer.WriteBatch(bat)
   237  		if err != nil {
   238  			return err
   239  		}
   240  	}
   241  
   242  	if _, _, err = writer.Sync(ctx); err != nil {
   243  		return err
   244  	}
   245  
   246  	// no tomestone actually
   247  	cobjstats := writer.GetObjectStats()[:objectio.SchemaTombstone]
   248  	for _, cobj := range cobjstats {
   249  		commitEntry.CreatedObjs = append(commitEntry.CreatedObjs, cobj.Clone().Marshal())
   250  	}
   251  	cobj := fmt.Sprintf("%s(%v)Rows(%v)",
   252  		common.ShortObjId(*cobjstats[0].ObjectName().ObjectId()),
   253  		cobjstats[0].BlkCnt(),
   254  		cobjstats[0].Rows())
   255  	logutil.Info("[Done] Mergeblocks",
   256  		zap.String("table", tableDesc),
   257  		zap.String("on", mergehost.HostHintName()),
   258  		zap.String("txn-start-ts", commitEntry.StartTs.DebugString()),
   259  		zap.String("to-objs", cobj),
   260  		common.DurationField(time.Since(now)))
   261  
   262  	return nil
   263  
   264  }
   265  
   266  // layout [blkMaxRow, blkMaxRow, blkMaxRow,..., blkMaxRow, totalRowCount - blkMaxRow*N]
   267  func arrangeToLayout(totalRowCount int, blkMaxRow int) []uint32 {
   268  	toLayout := make([]uint32, 0, totalRowCount/blkMaxRow)
   269  	unconsumed := totalRowCount
   270  	for unconsumed > 0 {
   271  		if unconsumed > blkMaxRow {
   272  			toLayout = append(toLayout, uint32(blkMaxRow))
   273  			unconsumed -= blkMaxRow
   274  		} else {
   275  			toLayout = append(toLayout, uint32(unconsumed))
   276  			unconsumed = 0
   277  		}
   278  	}
   279  	return toLayout
   280  }
   281  
   282  // not defined in api.go to avoid import cycle
   283  
   284  func NewBlkTransferBooking(size int) *api.BlkTransferBooking {
   285  	mappings := make([]api.BlkTransMap, size)
   286  	for i := 0; i < size; i++ {
   287  		mappings[i] = api.BlkTransMap{
   288  			M: make(map[int32]api.TransDestPos),
   289  		}
   290  	}
   291  	return &api.BlkTransferBooking{
   292  		Mappings: mappings,
   293  	}
   294  }
   295  
   296  func CleanTransMapping(b *api.BlkTransferBooking) {
   297  	for i := 0; i < len(b.Mappings); i++ {
   298  		b.Mappings[i] = api.BlkTransMap{
   299  			M: make(map[int32]api.TransDestPos),
   300  		}
   301  	}
   302  }
   303  
   304  func AddSortPhaseMapping(b *api.BlkTransferBooking, idx int, originRowCnt int, deletes *nulls.Nulls, mapping []int64) {
   305  	// TODO: remove panic check
   306  	if mapping != nil {
   307  		deletecnt := 0
   308  		if deletes != nil {
   309  			deletecnt = deletes.GetCardinality()
   310  		}
   311  		if len(mapping) != originRowCnt-deletecnt {
   312  			panic(fmt.Sprintf("mapping length %d != originRowCnt %d - deletes %s", len(mapping), originRowCnt, deletes))
   313  		}
   314  		// mapping sortedVec[i] = originalVec[sortMapping[i]]
   315  		// transpose it, originalVec[sortMapping[i]] = sortedVec[i]
   316  		// [9 4 8 5 2 6 0 7 3 1](orignVec)  -> [6 9 4 8 1 3 5 7 2 0](sortedVec)
   317  		// [0 1 2 3 4 5 6 7 8 9](sortedVec) -> [0 1 2 3 4 5 6 7 8 9](originalVec)
   318  		// TODO: use a more efficient way to transpose, in place
   319  		transposedMapping := make([]int64, len(mapping))
   320  		for sortedPos, originalPos := range mapping {
   321  			transposedMapping[originalPos] = int64(sortedPos)
   322  		}
   323  		mapping = transposedMapping
   324  	}
   325  	posInVecApplyDeletes := 0
   326  	targetMapping := b.Mappings[idx].M
   327  	for origRow := 0; origRow < originRowCnt; origRow++ {
   328  		if deletes != nil && deletes.Contains(uint64(origRow)) {
   329  			// this row has been deleted, skip its mapping
   330  			continue
   331  		}
   332  		if mapping == nil {
   333  			// no sort phase, the mapping is 1:1, just use posInVecApplyDeletes
   334  			targetMapping[int32(origRow)] = api.TransDestPos{BlkIdx: -1, RowIdx: int32(posInVecApplyDeletes)}
   335  		} else {
   336  			targetMapping[int32(origRow)] = api.TransDestPos{BlkIdx: -1, RowIdx: int32(mapping[posInVecApplyDeletes])}
   337  		}
   338  		posInVecApplyDeletes++
   339  	}
   340  }
   341  
   342  func UpdateMappingAfterMerge(b *api.BlkTransferBooking, mapping, toLayout []uint32) {
   343  	bisectHaystack := make([]uint32, 0, len(toLayout)+1)
   344  	bisectHaystack = append(bisectHaystack, 0)
   345  	for _, x := range toLayout {
   346  		bisectHaystack = append(bisectHaystack, bisectHaystack[len(bisectHaystack)-1]+x)
   347  	}
   348  
   349  	// given toLayout and a needle, find its corresponding block index and row index in the block
   350  	// For example, toLayout [8192, 8192, 1024], needle = 0 -> (0, 0); needle = 8192 -> (1, 0); needle = 8193 -> (1, 1)
   351  	bisectPinpoint := func(needle uint32) (int, uint32) {
   352  		i, j := 0, len(bisectHaystack)
   353  		for i < j {
   354  			m := (i + j) / 2
   355  			if bisectHaystack[m] > needle {
   356  				j = m
   357  			} else {
   358  				i = m + 1
   359  			}
   360  		}
   361  		// bisectHaystack[i] is the first number > needle, so the needle falls into i-1 th block
   362  		blkIdx := i - 1
   363  		rows := needle - bisectHaystack[blkIdx]
   364  		return blkIdx, rows
   365  	}
   366  
   367  	var totalHandledRows int32
   368  
   369  	for _, mcontainer := range b.Mappings {
   370  		m := mcontainer.M
   371  		var curTotal int32   // index in the flatten src array
   372  		var destTotal uint32 // index in the flatten merged array
   373  		for srcRow := range m {
   374  			curTotal = totalHandledRows + m[srcRow].RowIdx
   375  			if mapping == nil {
   376  				destTotal = uint32(curTotal)
   377  			} else {
   378  				destTotal = mapping[curTotal]
   379  			}
   380  			destBlkIdx, destRowIdx := bisectPinpoint(destTotal)
   381  			m[srcRow] = api.TransDestPos{BlkIdx: int32(destBlkIdx), RowIdx: int32(destRowIdx)}
   382  		}
   383  		totalHandledRows += int32(len(m))
   384  	}
   385  }