github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/mergesort/task.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mergesort 16 17 import ( 18 "context" 19 "fmt" 20 "time" 21 22 "github.com/matrixorigin/matrixone/pkg/common/moerr" 23 "github.com/matrixorigin/matrixone/pkg/common/mpool" 24 "github.com/matrixorigin/matrixone/pkg/container/batch" 25 "github.com/matrixorigin/matrixone/pkg/container/nulls" 26 "github.com/matrixorigin/matrixone/pkg/container/types" 27 "github.com/matrixorigin/matrixone/pkg/container/vector" 28 "github.com/matrixorigin/matrixone/pkg/fileservice" 29 "github.com/matrixorigin/matrixone/pkg/logutil" 30 "github.com/matrixorigin/matrixone/pkg/objectio" 31 "github.com/matrixorigin/matrixone/pkg/pb/api" 32 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/blockio" 33 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 34 "go.uber.org/zap" 35 ) 36 37 var ErrNoMoreBlocks = moerr.NewInternalErrorNoCtx("no more blocks") 38 39 // DisposableVecPool bridge the gap between the vector pools in cn and tn 40 type DisposableVecPool interface { 41 GetVector(*types.Type) (ret *vector.Vector, release func()) 42 GetMPool() *mpool.MPool 43 } 44 45 type MergeTaskHost interface { 46 DisposableVecPool 47 HostHintName() string 48 PrepareData(context.Context) ([]*batch.Batch, []*nulls.Nulls, func(), error) 49 GetCommitEntry() *api.MergeCommitEntry 50 PrepareNewWriter() *blockio.BlockWriter 51 DoTransfer() bool 52 GetObjectCnt() int 53 GetBlkCnts() []int 54 GetAccBlkCnts() []int 55 GetSortKeyType() types.Type 56 LoadNextBatch(ctx context.Context, objIdx uint32) (*batch.Batch, *nulls.Nulls, func(), error) 57 GetTotalSize() uint32 58 GetTotalRowCnt() uint32 59 GetBlockMaxRows() uint32 60 GetObjectMaxBlocks() uint16 61 GetTargetObjSize() uint32 62 } 63 64 func initTransferMapping(e *api.MergeCommitEntry, blkcnt int) { 65 e.Booking = NewBlkTransferBooking(blkcnt) 66 } 67 68 func getSimilarBatch(bat *batch.Batch, capacity int, vpool DisposableVecPool) (*batch.Batch, func()) { 69 newBat := batch.NewWithSize(len(bat.Vecs)) 70 newBat.Attrs = bat.Attrs 71 rfs := make([]func(), len(bat.Vecs)) 72 releaseF := func() { 73 for _, f := range rfs { 74 f() 75 } 76 } 77 for i := range bat.Vecs { 78 vec, release := vpool.GetVector(bat.Vecs[i].GetType()) 79 if capacity > 0 { 80 vec.PreExtend(capacity, vpool.GetMPool()) 81 } 82 newBat.Vecs[i] = vec 83 rfs[i] = release 84 } 85 return newBat, releaseF 86 } 87 88 func GetNewWriter( 89 fs fileservice.FileService, 90 ver uint32, seqnums []uint16, 91 sortkeyPos int, sortkeyIsPK bool, 92 ) *blockio.BlockWriter { 93 name := objectio.BuildObjectNameWithObjectID(objectio.NewObjectid()) 94 writer, err := blockio.NewBlockWriterNew(fs, name, ver, seqnums) 95 if err != nil { 96 panic(err) // it is impossible 97 } 98 // has sortkey 99 if sortkeyPos >= 0 { 100 if sortkeyIsPK { 101 writer.SetPrimaryKey(uint16(sortkeyPos)) 102 } else { // cluster by 103 writer.SetSortKey(uint16(sortkeyPos)) 104 } 105 } 106 return writer 107 } 108 109 func DoMergeAndWrite( 110 ctx context.Context, 111 sortkeyPos int, 112 blkMaxRow int, 113 mergehost MergeTaskHost, 114 ) (err error) { 115 now := time.Now() 116 /*out args, keep the transfer infomation*/ 117 commitEntry := mergehost.GetCommitEntry() 118 fromObjsDesc := "" 119 for _, o := range commitEntry.MergedObjs { 120 obj := objectio.ObjectStats(o) 121 fromObjsDesc = fmt.Sprintf("%s%s,", fromObjsDesc, common.ShortObjId(*obj.ObjectName().ObjectId())) 122 } 123 tableDesc := fmt.Sprintf("%v-%v", commitEntry.TblId, commitEntry.TableName) 124 logutil.Info("[Start] Mergeblocks", 125 zap.String("table", tableDesc), 126 zap.String("on", mergehost.HostHintName()), 127 zap.String("txn-start-ts", commitEntry.StartTs.DebugString()), 128 zap.String("from-objs", fromObjsDesc), 129 ) 130 phaseDesc := "prepare data" 131 defer func() { 132 if err != nil { 133 logutil.Error("[DoneWithErr] Mergeblocks", 134 zap.String("table", tableDesc), 135 zap.Error(err), 136 zap.String("phase", phaseDesc), 137 ) 138 } 139 }() 140 141 hasSortKey := sortkeyPos >= 0 142 if !hasSortKey { 143 sortkeyPos = 0 // no sort key, use the first column to do reshape 144 } 145 146 if hasSortKey { 147 if err := mergeObjs(ctx, mergehost, sortkeyPos); err != nil { 148 return err 149 } 150 151 toObjsDesc := "" 152 for _, o := range commitEntry.CreatedObjs { 153 obj := objectio.ObjectStats(o) 154 toObjsDesc += fmt.Sprintf("%s(%v)Rows(%v),", 155 common.ShortObjId(*obj.ObjectName().ObjectId()), 156 obj.BlkCnt(), 157 obj.Rows()) 158 } 159 160 logutil.Info("[Done] Mergeblocks", 161 zap.String("table", tableDesc), 162 zap.String("on", mergehost.HostHintName()), 163 zap.String("txn-start-ts", commitEntry.StartTs.DebugString()), 164 zap.String("to-objs", toObjsDesc), 165 common.DurationField(time.Since(now))) 166 return 167 } 168 169 // batches is read from disk, dels is read from disk and memory 170 // 171 // batches[i] means the i-th non-appendable block to be merged and 172 // it has no rowid 173 batches, dels, release, err := mergehost.PrepareData(ctx) 174 if err != nil { 175 return err 176 } 177 defer release() 178 179 if mergehost.DoTransfer() { 180 initTransferMapping(commitEntry, len(batches)) 181 } 182 183 fromLayout := make([]uint32, len(batches)) 184 totalRowCount := 0 185 186 mpool := mergehost.GetMPool() 187 // iter all block to get basic info, do shrink if needed 188 for i := range batches { 189 rowCntBeforeApplyDelete := batches[i].RowCount() 190 del := dels[i] 191 if del != nil && del.Count() > 0 { 192 // dup vector before apply delete. old b will be freed in releaseF 193 newb, err := batches[i].Dup(mpool) 194 if err != nil { 195 return err 196 } 197 defer newb.Clean(mpool) // whoever create new vector, should clean it 198 batches[i] = newb 199 batches[i].Shrink(del.ToI64Arrary(), true) 200 // skip empty batch 201 if batches[i].RowCount() == 0 { 202 continue 203 } 204 } 205 if mergehost.DoTransfer() { 206 AddSortPhaseMapping(commitEntry.Booking, i, rowCntBeforeApplyDelete, del, nil) 207 } 208 fromLayout[i] = uint32(batches[i].RowCount()) 209 totalRowCount += batches[i].RowCount() 210 } 211 212 if totalRowCount == 0 { 213 logutil.Info("[Done] Mergeblocks due to all deleted", 214 zap.String("table", tableDesc), 215 zap.String("txn-start-ts", commitEntry.StartTs.DebugString())) 216 if mergehost.DoTransfer() { 217 CleanTransMapping(commitEntry.Booking) 218 } 219 return 220 } 221 222 // -------------------------- phase 1 223 phaseDesc = "reshape, one column" 224 toLayout := arrangeToLayout(totalRowCount, blkMaxRow) 225 226 retBatches, releaseF := ReshapeBatches(batches, fromLayout, toLayout, mergehost) 227 defer releaseF() 228 if mergehost.DoTransfer() { 229 UpdateMappingAfterMerge(commitEntry.Booking, nil, toLayout) 230 } 231 232 // -------------------------- phase 2 233 phaseDesc = "new writer to write down" 234 writer := mergehost.PrepareNewWriter() 235 for _, bat := range retBatches { 236 _, err = writer.WriteBatch(bat) 237 if err != nil { 238 return err 239 } 240 } 241 242 if _, _, err = writer.Sync(ctx); err != nil { 243 return err 244 } 245 246 // no tomestone actually 247 cobjstats := writer.GetObjectStats()[:objectio.SchemaTombstone] 248 for _, cobj := range cobjstats { 249 commitEntry.CreatedObjs = append(commitEntry.CreatedObjs, cobj.Clone().Marshal()) 250 } 251 cobj := fmt.Sprintf("%s(%v)Rows(%v)", 252 common.ShortObjId(*cobjstats[0].ObjectName().ObjectId()), 253 cobjstats[0].BlkCnt(), 254 cobjstats[0].Rows()) 255 logutil.Info("[Done] Mergeblocks", 256 zap.String("table", tableDesc), 257 zap.String("on", mergehost.HostHintName()), 258 zap.String("txn-start-ts", commitEntry.StartTs.DebugString()), 259 zap.String("to-objs", cobj), 260 common.DurationField(time.Since(now))) 261 262 return nil 263 264 } 265 266 // layout [blkMaxRow, blkMaxRow, blkMaxRow,..., blkMaxRow, totalRowCount - blkMaxRow*N] 267 func arrangeToLayout(totalRowCount int, blkMaxRow int) []uint32 { 268 toLayout := make([]uint32, 0, totalRowCount/blkMaxRow) 269 unconsumed := totalRowCount 270 for unconsumed > 0 { 271 if unconsumed > blkMaxRow { 272 toLayout = append(toLayout, uint32(blkMaxRow)) 273 unconsumed -= blkMaxRow 274 } else { 275 toLayout = append(toLayout, uint32(unconsumed)) 276 unconsumed = 0 277 } 278 } 279 return toLayout 280 } 281 282 // not defined in api.go to avoid import cycle 283 284 func NewBlkTransferBooking(size int) *api.BlkTransferBooking { 285 mappings := make([]api.BlkTransMap, size) 286 for i := 0; i < size; i++ { 287 mappings[i] = api.BlkTransMap{ 288 M: make(map[int32]api.TransDestPos), 289 } 290 } 291 return &api.BlkTransferBooking{ 292 Mappings: mappings, 293 } 294 } 295 296 func CleanTransMapping(b *api.BlkTransferBooking) { 297 for i := 0; i < len(b.Mappings); i++ { 298 b.Mappings[i] = api.BlkTransMap{ 299 M: make(map[int32]api.TransDestPos), 300 } 301 } 302 } 303 304 func AddSortPhaseMapping(b *api.BlkTransferBooking, idx int, originRowCnt int, deletes *nulls.Nulls, mapping []int64) { 305 // TODO: remove panic check 306 if mapping != nil { 307 deletecnt := 0 308 if deletes != nil { 309 deletecnt = deletes.GetCardinality() 310 } 311 if len(mapping) != originRowCnt-deletecnt { 312 panic(fmt.Sprintf("mapping length %d != originRowCnt %d - deletes %s", len(mapping), originRowCnt, deletes)) 313 } 314 // mapping sortedVec[i] = originalVec[sortMapping[i]] 315 // transpose it, originalVec[sortMapping[i]] = sortedVec[i] 316 // [9 4 8 5 2 6 0 7 3 1](orignVec) -> [6 9 4 8 1 3 5 7 2 0](sortedVec) 317 // [0 1 2 3 4 5 6 7 8 9](sortedVec) -> [0 1 2 3 4 5 6 7 8 9](originalVec) 318 // TODO: use a more efficient way to transpose, in place 319 transposedMapping := make([]int64, len(mapping)) 320 for sortedPos, originalPos := range mapping { 321 transposedMapping[originalPos] = int64(sortedPos) 322 } 323 mapping = transposedMapping 324 } 325 posInVecApplyDeletes := 0 326 targetMapping := b.Mappings[idx].M 327 for origRow := 0; origRow < originRowCnt; origRow++ { 328 if deletes != nil && deletes.Contains(uint64(origRow)) { 329 // this row has been deleted, skip its mapping 330 continue 331 } 332 if mapping == nil { 333 // no sort phase, the mapping is 1:1, just use posInVecApplyDeletes 334 targetMapping[int32(origRow)] = api.TransDestPos{BlkIdx: -1, RowIdx: int32(posInVecApplyDeletes)} 335 } else { 336 targetMapping[int32(origRow)] = api.TransDestPos{BlkIdx: -1, RowIdx: int32(mapping[posInVecApplyDeletes])} 337 } 338 posInVecApplyDeletes++ 339 } 340 } 341 342 func UpdateMappingAfterMerge(b *api.BlkTransferBooking, mapping, toLayout []uint32) { 343 bisectHaystack := make([]uint32, 0, len(toLayout)+1) 344 bisectHaystack = append(bisectHaystack, 0) 345 for _, x := range toLayout { 346 bisectHaystack = append(bisectHaystack, bisectHaystack[len(bisectHaystack)-1]+x) 347 } 348 349 // given toLayout and a needle, find its corresponding block index and row index in the block 350 // For example, toLayout [8192, 8192, 1024], needle = 0 -> (0, 0); needle = 8192 -> (1, 0); needle = 8193 -> (1, 1) 351 bisectPinpoint := func(needle uint32) (int, uint32) { 352 i, j := 0, len(bisectHaystack) 353 for i < j { 354 m := (i + j) / 2 355 if bisectHaystack[m] > needle { 356 j = m 357 } else { 358 i = m + 1 359 } 360 } 361 // bisectHaystack[i] is the first number > needle, so the needle falls into i-1 th block 362 blkIdx := i - 1 363 rows := needle - bisectHaystack[blkIdx] 364 return blkIdx, rows 365 } 366 367 var totalHandledRows int32 368 369 for _, mcontainer := range b.Mappings { 370 m := mcontainer.M 371 var curTotal int32 // index in the flatten src array 372 var destTotal uint32 // index in the flatten merged array 373 for srcRow := range m { 374 curTotal = totalHandledRows + m[srcRow].RowIdx 375 if mapping == nil { 376 destTotal = uint32(curTotal) 377 } else { 378 destTotal = mapping[curTotal] 379 } 380 destBlkIdx, destRowIdx := bisectPinpoint(destTotal) 381 m[srcRow] = api.TransDestPos{BlkIdx: int32(destBlkIdx), RowIdx: int32(destRowIdx)} 382 } 383 totalHandledRows += int32(len(m)) 384 } 385 }