github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/disttae/reader.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package disttae 16 17 import ( 18 "context" 19 "sort" 20 "time" 21 22 "github.com/matrixorigin/matrixone/pkg/catalog" 23 "github.com/matrixorigin/matrixone/pkg/common/moerr" 24 "github.com/matrixorigin/matrixone/pkg/common/mpool" 25 "github.com/matrixorigin/matrixone/pkg/container/batch" 26 "github.com/matrixorigin/matrixone/pkg/container/types" 27 "github.com/matrixorigin/matrixone/pkg/container/vector" 28 "github.com/matrixorigin/matrixone/pkg/fileservice" 29 "github.com/matrixorigin/matrixone/pkg/logutil" 30 "github.com/matrixorigin/matrixone/pkg/objectio" 31 "github.com/matrixorigin/matrixone/pkg/pb/plan" 32 "github.com/matrixorigin/matrixone/pkg/pb/timestamp" 33 "github.com/matrixorigin/matrixone/pkg/perfcounter" 34 plan2 "github.com/matrixorigin/matrixone/pkg/sql/plan" 35 "github.com/matrixorigin/matrixone/pkg/testutil" 36 "github.com/matrixorigin/matrixone/pkg/txn/trace" 37 v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2" 38 "github.com/matrixorigin/matrixone/pkg/vm/engine" 39 "github.com/matrixorigin/matrixone/pkg/vm/engine/disttae/logtailreplay" 40 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/blockio" 41 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/index" 42 "github.com/matrixorigin/matrixone/pkg/vm/process" 43 "go.uber.org/zap" 44 ) 45 46 // ----------------------------------------------------------------- 47 // ------------------------ withFilterMixin ------------------------ 48 // ----------------------------------------------------------------- 49 50 func (mixin *withFilterMixin) reset() { 51 mixin.filterState.evaluated = false 52 mixin.filterState.filter = nil 53 mixin.columns.pkPos = -1 54 mixin.columns.indexOfFirstSortedColumn = -1 55 mixin.columns.seqnums = nil 56 mixin.columns.colTypes = nil 57 mixin.sels = nil 58 } 59 60 // when the reader.Read is called for a new block, it will always 61 // call tryUpdate to update the seqnums 62 // NOTE: here we assume the tryUpdate is always called with the same cols 63 // for all blocks and it will only be updated once 64 func (mixin *withFilterMixin) tryUpdateColumns(cols []string) { 65 if len(cols) == len(mixin.columns.seqnums) { 66 return 67 } 68 if len(mixin.columns.seqnums) != 0 { 69 panic(moerr.NewInternalErrorNoCtx("withFilterMixin tryUpdate called with different cols")) 70 } 71 72 // record the column selectivity 73 chit, ctotal := len(cols), len(mixin.tableDef.Cols) 74 v2.TaskSelColumnTotal.Add(float64(ctotal)) 75 v2.TaskSelColumnHit.Add(float64(ctotal - chit)) 76 blockio.RecordColumnSelectivity(chit, ctotal) 77 78 mixin.columns.seqnums = make([]uint16, len(cols)) 79 mixin.columns.colTypes = make([]types.Type, len(cols)) 80 // mixin.columns.colNulls = make([]bool, len(cols)) 81 mixin.columns.pkPos = -1 82 mixin.columns.indexOfFirstSortedColumn = -1 83 compPKName2Pos := make(map[string]struct{}) 84 positions := make(map[string]int) 85 if mixin.tableDef.Pkey != nil && mixin.tableDef.Pkey.CompPkeyCol != nil { 86 pk := mixin.tableDef.Pkey 87 for _, name := range pk.Names { 88 compPKName2Pos[name] = struct{}{} 89 } 90 } 91 for i, column := range cols { 92 if column == catalog.Row_ID { 93 mixin.columns.seqnums[i] = objectio.SEQNUM_ROWID 94 mixin.columns.colTypes[i] = objectio.RowidType 95 } else { 96 if plan2.GetSortOrderByName(mixin.tableDef, column) == 0 { 97 mixin.columns.indexOfFirstSortedColumn = i 98 } 99 colIdx := mixin.tableDef.Name2ColIndex[column] 100 colDef := mixin.tableDef.Cols[colIdx] 101 mixin.columns.seqnums[i] = uint16(colDef.Seqnum) 102 103 if _, ok := compPKName2Pos[column]; ok { 104 positions[column] = i 105 } 106 107 if mixin.tableDef.Pkey != nil && mixin.tableDef.Pkey.PkeyColName == column { 108 // primary key is in the cols 109 mixin.columns.pkPos = i 110 } 111 mixin.columns.colTypes[i] = types.T(colDef.Typ.Id).ToType() 112 // if colDef.Default != nil { 113 // mixin.columns.colNulls[i] = colDef.Default.NullAbility 114 // } 115 } 116 } 117 if len(positions) != 0 { 118 for _, name := range mixin.tableDef.Pkey.Names { 119 if pos, ok := positions[name]; !ok { 120 break 121 } else { 122 mixin.columns.compPKPositions = append(mixin.columns.compPKPositions, uint16(pos)) 123 } 124 } 125 } 126 } 127 128 func (mixin *withFilterMixin) getReadFilter(proc *process.Process, blkCnt int) ( 129 filter blockio.ReadFilter, 130 ) { 131 if mixin.filterState.evaluated { 132 filter = mixin.filterState.filter 133 return 134 } 135 pk := mixin.tableDef.Pkey 136 if pk == nil { 137 mixin.filterState.evaluated = true 138 mixin.filterState.filter = nil 139 return 140 } 141 if pk.CompPkeyCol == nil { 142 return mixin.getNonCompositPKFilter(proc, blkCnt) 143 } 144 return mixin.getCompositPKFilter(proc, blkCnt) 145 } 146 147 func (mixin *withFilterMixin) getCompositPKFilter(proc *process.Process, blkCnt int) ( 148 filter blockio.ReadFilter, 149 ) { 150 // if no primary key is included in the columns or no filter expr is given, 151 // no filter is needed 152 if len(mixin.columns.compPKPositions) == 0 || mixin.filterState.expr == nil { 153 mixin.filterState.evaluated = true 154 mixin.filterState.filter = nil 155 return 156 } 157 158 // evaluate 159 pkNames := mixin.tableDef.Pkey.Names 160 pkVals := make([]*plan.Literal, len(pkNames)) 161 ok, hasNull := getCompositPKVals(mixin.filterState.expr, pkNames, pkVals, proc) 162 163 if !ok || pkVals[0] == nil { 164 mixin.filterState.evaluated = true 165 mixin.filterState.filter = nil 166 mixin.filterState.hasNull = hasNull 167 return 168 } 169 cnt := getValidCompositePKCnt(pkVals) 170 pkVals = pkVals[:cnt] 171 172 filterFuncs := make([]func(*vector.Vector, []int32, *[]int32), len(pkVals)) 173 for i := range filterFuncs { 174 filterFuncs[i] = getCompositeFilterFuncByExpr(pkVals[i], i == 0) 175 } 176 177 filter = func(vecs []*vector.Vector) []int32 { 178 var ( 179 inputSels []int32 180 ) 181 for i := range filterFuncs { 182 vec := vecs[i] 183 mixin.sels = mixin.sels[:0] 184 filterFuncs[i](vec, inputSels, &mixin.sels) 185 if len(mixin.sels) == 0 { 186 break 187 } 188 inputSels = mixin.sels 189 } 190 // logutil.Debugf("%s: %d/%d", mixin.tableDef.Name, len(res), vecs[0].Length()) 191 192 return mixin.sels 193 } 194 195 mixin.filterState.evaluated = true 196 mixin.filterState.filter = filter 197 mixin.filterState.seqnums = make([]uint16, 0, len(mixin.columns.compPKPositions)) 198 mixin.filterState.colTypes = make([]types.Type, 0, len(mixin.columns.compPKPositions)) 199 for _, pos := range mixin.columns.compPKPositions { 200 mixin.filterState.seqnums = append(mixin.filterState.seqnums, mixin.columns.seqnums[pos]) 201 mixin.filterState.colTypes = append(mixin.filterState.colTypes, mixin.columns.colTypes[pos]) 202 } 203 // records how many blks one reader needs to read when having filter 204 objectio.BlkReadStats.BlksByReaderStats.Record(1, blkCnt) 205 return 206 } 207 208 func (mixin *withFilterMixin) getNonCompositPKFilter(proc *process.Process, blkCnt int) blockio.ReadFilter { 209 // if no primary key is included in the columns or no filter expr is given, 210 // no filter is needed 211 if mixin.columns.pkPos == -1 || mixin.filterState.expr == nil { 212 mixin.filterState.evaluated = true 213 mixin.filterState.filter = nil 214 return nil 215 } 216 217 // evaluate the search function for the filter 218 // if the search function is not found, no filter is needed 219 // primary key must be used by the expr in one of the following patterns: 220 // A: $pk = const_value 221 // B: const_value = $pk 222 // C: {A|B} and {A|B} 223 // D: {A|B|C} [and {A|B|C}]* 224 // for other patterns, no filter is needed 225 ok, hasNull, searchFunc := getNonCompositePKSearchFuncByExpr( 226 mixin.filterState.expr, 227 mixin.tableDef.Pkey.PkeyColName, 228 proc, 229 ) 230 if !ok || searchFunc == nil { 231 mixin.filterState.evaluated = true 232 mixin.filterState.filter = nil 233 mixin.filterState.hasNull = hasNull 234 return nil 235 } 236 237 // here we will select the primary key column from the vectors, and 238 // use the search function to find the offset of the primary key. 239 // it returns the offset of the primary key in the pk vector. 240 // if the primary key is not found, it returns empty slice 241 mixin.filterState.evaluated = true 242 mixin.filterState.filter = searchFunc 243 mixin.filterState.seqnums = []uint16{mixin.columns.seqnums[mixin.columns.pkPos]} 244 mixin.filterState.colTypes = mixin.columns.colTypes[mixin.columns.pkPos : mixin.columns.pkPos+1] 245 246 // records how many blks one reader needs to read when having filter 247 objectio.BlkReadStats.BlksByReaderStats.Record(1, blkCnt) 248 return searchFunc 249 } 250 251 // ----------------------------------------------------------------- 252 // ------------------------ emptyReader ---------------------------- 253 // ----------------------------------------------------------------- 254 255 func (r *emptyReader) SetFilterZM(objectio.ZoneMap) { 256 } 257 258 func (r *emptyReader) GetOrderBy() []*plan.OrderBySpec { 259 return nil 260 } 261 262 func (r *emptyReader) SetOrderBy([]*plan.OrderBySpec) { 263 } 264 265 func (r *emptyReader) Close() error { 266 return nil 267 } 268 269 func (r *emptyReader) Read(_ context.Context, _ []string, 270 _ *plan.Expr, _ *mpool.MPool, _ engine.VectorPool) (*batch.Batch, error) { 271 return nil, nil 272 } 273 274 // ----------------------------------------------------------------- 275 // ------------------------ blockReader ---------------------------- 276 // ----------------------------------------------------------------- 277 278 func newBlockReader( 279 ctx context.Context, 280 tableDef *plan.TableDef, 281 ts timestamp.Timestamp, 282 blks []*objectio.BlockInfo, 283 filterExpr *plan.Expr, 284 fs fileservice.FileService, 285 proc *process.Process, 286 ) *blockReader { 287 for _, blk := range blks { 288 trace.GetService().TxnReadBlock( 289 proc.TxnOperator, 290 tableDef.TblId, 291 blk.BlockID[:]) 292 } 293 r := &blockReader{ 294 withFilterMixin: withFilterMixin{ 295 ctx: ctx, 296 fs: fs, 297 ts: ts, 298 proc: proc, 299 tableDef: tableDef, 300 }, 301 blks: blks, 302 } 303 r.filterState.expr = filterExpr 304 return r 305 } 306 307 func (r *blockReader) Close() error { 308 r.withFilterMixin.reset() 309 r.blks = nil 310 r.buffer = nil 311 return nil 312 } 313 314 func (r *blockReader) SetFilterZM(zm objectio.ZoneMap) { 315 if !r.filterZM.IsInited() { 316 r.filterZM = zm.Clone() 317 return 318 } 319 if r.desc && r.filterZM.CompareMax(zm) < 0 { 320 r.filterZM = zm.Clone() 321 return 322 } 323 if !r.desc && r.filterZM.CompareMin(zm) > 0 { 324 r.filterZM = zm.Clone() 325 return 326 } 327 } 328 329 func (r *blockReader) GetOrderBy() []*plan.OrderBySpec { 330 return r.OrderBy 331 } 332 333 func (r *blockReader) SetOrderBy(orderby []*plan.OrderBySpec) { 334 r.OrderBy = orderby 335 } 336 337 func (r *blockReader) needReadBlkByZM(i int) bool { 338 zm := r.blockZMS[i] 339 if !r.filterZM.IsInited() || !zm.IsInited() { 340 return true 341 } 342 if r.desc { 343 return r.filterZM.CompareMax(zm) <= 0 344 } else { 345 return r.filterZM.CompareMin(zm) >= 0 346 } 347 } 348 349 func (r *blockReader) getBlockZMs() { 350 orderByCol, _ := r.OrderBy[0].Expr.Expr.(*plan.Expr_Col) 351 orderByColIDX := int(r.tableDef.Cols[int(orderByCol.Col.ColPos)].Seqnum) 352 353 r.blockZMS = make([]index.ZM, len(r.blks)) 354 var objDataMeta objectio.ObjectDataMeta 355 var location objectio.Location 356 for i := range r.blks { 357 location = r.blks[i].MetaLocation() 358 if !objectio.IsSameObjectLocVsMeta(location, objDataMeta) { 359 objMeta, err := objectio.FastLoadObjectMeta(r.ctx, &location, false, r.fs) 360 if err != nil { 361 panic("load object meta error when ordered scan!") 362 } 363 objDataMeta = objMeta.MustDataMeta() 364 } 365 blkMeta := objDataMeta.GetBlockMeta(uint32(location.ID())) 366 r.blockZMS[i] = blkMeta.ColumnMeta(uint16(orderByColIDX)).ZoneMap() 367 } 368 } 369 370 func (r *blockReader) sortBlockList() { 371 helper := make([]*blockSortHelper, len(r.blks)) 372 for i := range r.blks { 373 helper[i] = &blockSortHelper{} 374 helper[i].blk = r.blks[i] 375 helper[i].zm = r.blockZMS[i] 376 } 377 if r.desc { 378 sort.Slice(helper, func(i, j int) bool { 379 zm1 := helper[i].zm 380 if !zm1.IsInited() { 381 return true 382 } 383 zm2 := helper[j].zm 384 if !zm2.IsInited() { 385 return false 386 } 387 return zm1.CompareMax(zm2) > 0 388 }) 389 } else { 390 sort.Slice(helper, func(i, j int) bool { 391 zm1 := helper[i].zm 392 if !zm1.IsInited() { 393 return true 394 } 395 zm2 := helper[j].zm 396 if !zm2.IsInited() { 397 return false 398 } 399 return zm1.CompareMin(zm2) < 0 400 }) 401 } 402 403 for i := range helper { 404 r.blks[i] = helper[i].blk 405 r.blockZMS[i] = helper[i].zm 406 } 407 } 408 409 func (r *blockReader) deleteFirstNBlocks(n int) { 410 r.blks = r.blks[n:] 411 if len(r.OrderBy) > 0 { 412 r.blockZMS = r.blockZMS[n:] 413 } 414 } 415 416 func (r *blockReader) Read( 417 ctx context.Context, 418 cols []string, 419 _ *plan.Expr, 420 mp *mpool.MPool, 421 vp engine.VectorPool, 422 ) (bat *batch.Batch, err error) { 423 start := time.Now() 424 defer func() { 425 v2.TxnBlockReaderDurationHistogram.Observe(time.Since(start).Seconds()) 426 }() 427 428 // for ordered scan, sort blocklist by zonemap info, and then filter by zonemap 429 if len(r.OrderBy) > 0 { 430 if !r.sorted { 431 r.desc = r.OrderBy[0].Flag&plan.OrderBySpec_DESC != 0 432 r.getBlockZMs() 433 r.sortBlockList() 434 r.sorted = true 435 } 436 i := 0 437 for i < len(r.blks) { 438 if r.needReadBlkByZM(i) { 439 break 440 } 441 i++ 442 } 443 r.deleteFirstNBlocks(i) 444 } 445 // if the block list is empty, return nil 446 if len(r.blks) == 0 { 447 return nil, nil 448 } 449 450 // move to the next block at the end of this call 451 defer func() { 452 r.deleteFirstNBlocks(1) 453 r.buffer = r.buffer[:0] 454 r.currentStep++ 455 }() 456 457 // get the current block to be read 458 blockInfo := r.blks[0] 459 460 // try to update the columns 461 // the columns is only updated once for all blocks 462 r.tryUpdateColumns(cols) 463 464 // get the block read filter 465 filter := r.getReadFilter(r.proc, len(r.blks)) 466 467 // if any null expr is found in the primary key (composite primary keys), quick return 468 if r.filterState.hasNull { 469 return nil, nil 470 } 471 472 if !r.dontPrefetch { 473 //prefetch some objects 474 for len(r.steps) > 0 && r.steps[0] == r.currentStep { 475 // always true for now, will optimize this in the future 476 prefetchFile := r.scanType == SMALL || r.scanType == LARGE || r.scanType == NORMAL 477 if filter != nil && blockInfo.Sorted { 478 err = blockio.BlockPrefetch(r.filterState.seqnums, r.fs, [][]*objectio.BlockInfo{r.infos[0]}, prefetchFile) 479 } else { 480 err = blockio.BlockPrefetch(r.columns.seqnums, r.fs, [][]*objectio.BlockInfo{r.infos[0]}, prefetchFile) 481 } 482 if err != nil { 483 return nil, err 484 } 485 r.infos = r.infos[1:] 486 r.steps = r.steps[1:] 487 } 488 } 489 490 statsCtx, numRead, numHit := r.ctx, int64(0), int64(0) 491 if filter != nil { 492 // try to store the blkReadStats CounterSet into ctx, so that 493 // it can record the mem cache hit stats when call MemCache.Read() later soon. 494 statsCtx, numRead, numHit = r.prepareGatherStats() 495 } 496 497 // read the block 498 var policy fileservice.Policy 499 if r.scanType == LARGE || r.scanType == NORMAL { 500 policy = fileservice.SkipMemoryCacheWrites 501 } 502 bat, err = blockio.BlockRead( 503 statsCtx, blockInfo, r.buffer, r.columns.seqnums, r.columns.colTypes, r.ts, 504 r.filterState.seqnums, 505 r.filterState.colTypes, 506 filter, 507 r.fs, mp, vp, policy, 508 ) 509 if err != nil { 510 return nil, err 511 } 512 513 if filter != nil { 514 // we collect mem cache hit related statistics info for blk read here 515 r.gatherStats(numRead, numHit) 516 } 517 518 bat.SetAttributes(cols) 519 520 if blockInfo.Sorted && r.columns.indexOfFirstSortedColumn != -1 { 521 bat.GetVector(int32(r.columns.indexOfFirstSortedColumn)).SetSorted(true) 522 } 523 524 if logutil.GetSkip1Logger().Core().Enabled(zap.DebugLevel) { 525 logutil.Debug(testutil.OperatorCatchBatch("block reader", bat)) 526 } 527 return bat, nil 528 } 529 530 func (r *blockReader) prepareGatherStats() (context.Context, int64, int64) { 531 ctx := perfcounter.WithCounterSet(r.ctx, objectio.BlkReadStats.CounterSet) 532 return ctx, objectio.BlkReadStats.CounterSet.FileService.Cache.Read.Load(), 533 objectio.BlkReadStats.CounterSet.FileService.Cache.Hit.Load() 534 } 535 536 func (r *blockReader) gatherStats(lastNumRead, lastNumHit int64) { 537 numRead := objectio.BlkReadStats.CounterSet.FileService.Cache.Read.Load() 538 numHit := objectio.BlkReadStats.CounterSet.FileService.Cache.Hit.Load() 539 540 curNumRead := numRead - lastNumRead 541 curNumHit := numHit - lastNumHit 542 543 if curNumRead > curNumHit { 544 objectio.BlkReadStats.BlkCacheHitStats.Record(0, 1) 545 } else { 546 objectio.BlkReadStats.BlkCacheHitStats.Record(1, 1) 547 } 548 549 objectio.BlkReadStats.EntryCacheHitStats.Record(int(curNumHit), int(curNumRead)) 550 } 551 552 // ----------------------------------------------------------------- 553 // ---------------------- blockMergeReader ------------------------- 554 // ----------------------------------------------------------------- 555 556 func newBlockMergeReader( 557 ctx context.Context, 558 txnTable *txnTable, 559 pkVal []byte, 560 ts timestamp.Timestamp, 561 dirtyBlks []*objectio.BlockInfo, 562 filterExpr *plan.Expr, 563 fs fileservice.FileService, 564 proc *process.Process, 565 ) *blockMergeReader { 566 r := &blockMergeReader{ 567 table: txnTable, 568 blockReader: newBlockReader( 569 ctx, 570 txnTable.GetTableDef(ctx), 571 ts, 572 dirtyBlks, 573 filterExpr, 574 fs, 575 proc, 576 ), 577 pkVal: pkVal, 578 deletaLocs: make(map[string][]objectio.Location), 579 } 580 return r 581 } 582 583 func (r *blockMergeReader) Close() error { 584 r.table = nil 585 return r.blockReader.Close() 586 } 587 588 func (r *blockMergeReader) prefetchDeletes() error { 589 //load delta locations for r.blocks. 590 r.table.getTxn().blockId_tn_delete_metaLoc_batch.RLock() 591 defer r.table.getTxn().blockId_tn_delete_metaLoc_batch.RUnlock() 592 593 if !r.loaded { 594 for _, info := range r.blks { 595 bats, ok := r.table.getTxn().blockId_tn_delete_metaLoc_batch.data[info.BlockID] 596 597 if !ok { 598 return nil 599 } 600 for _, bat := range bats { 601 vs := vector.MustStrCol(bat.GetVector(0)) 602 for _, deltaLoc := range vs { 603 location, err := blockio.EncodeLocationFromString(deltaLoc) 604 if err != nil { 605 return err 606 } 607 r.deletaLocs[location.Name().String()] = 608 append(r.deletaLocs[location.Name().String()], location) 609 } 610 } 611 } 612 613 // Get Single Col pk index 614 for idx, colDef := range r.tableDef.Cols { 615 if colDef.Name == r.tableDef.Pkey.PkeyColName { 616 r.pkidx = idx 617 break 618 } 619 } 620 r.loaded = true 621 } 622 623 //prefetch the deletes 624 for name, locs := range r.deletaLocs { 625 pref, err := blockio.BuildPrefetchParams(r.fs, locs[0]) 626 if err != nil { 627 return err 628 } 629 for _, loc := range locs { 630 //rowid + pk 631 pref.AddBlockWithType([]uint16{0, uint16(r.pkidx)}, []uint16{loc.ID()}, uint16(objectio.SchemaTombstone)) 632 633 } 634 delete(r.deletaLocs, name) 635 return blockio.PrefetchWithMerged(pref) 636 } 637 return nil 638 } 639 640 func (r *blockMergeReader) loadDeletes(ctx context.Context, cols []string) error { 641 if len(r.blks) == 0 { 642 return nil 643 } 644 info := r.blks[0] 645 646 r.tryUpdateColumns(cols) 647 // load deletes from txn.blockId_dn_delete_metaLoc_batch 648 err := r.table.LoadDeletesForBlock(info.BlockID, &r.buffer) 649 if err != nil { 650 return err 651 } 652 653 // load deletes from partition state for the specified block 654 filter := r.getReadFilter(r.proc, len(r.blks)) 655 656 state, err := r.table.getPartitionState(ctx) 657 if err != nil { 658 return err 659 } 660 ts := types.TimestampToTS(r.ts) 661 662 if filter != nil && info.Sorted && len(r.pkVal) > 0 { 663 iter := state.NewPrimaryKeyDelIter( 664 ts, 665 logtailreplay.Prefix(r.pkVal), 666 info.BlockID, 667 ) 668 for iter.Next() { 669 entry := iter.Entry() 670 if !entry.Deleted { 671 continue 672 } 673 _, offset := entry.RowID.Decode() 674 r.buffer = append(r.buffer, int64(offset)) 675 } 676 iter.Close() 677 } else { 678 iter := state.NewRowsIter(ts, &info.BlockID, true) 679 currlen := len(r.buffer) 680 for iter.Next() { 681 entry := iter.Entry() 682 _, offset := entry.RowID.Decode() 683 r.buffer = append(r.buffer, int64(offset)) 684 } 685 v2.TaskLoadMemDeletesPerBlockHistogram.Observe(float64(len(r.buffer) - currlen)) 686 iter.Close() 687 } 688 689 //TODO:: if r.table.writes is a map , the time complexity could be O(1) 690 //load deletes from txn.writes for the specified block 691 r.table.getTxn().forEachTableWrites( 692 r.table.db.databaseId, 693 r.table.tableId, 694 r.table.getTxn().GetSnapshotWriteOffset(), func(entry Entry) { 695 if entry.isGeneratedByTruncate() { 696 return 697 } 698 if (entry.typ == DELETE || entry.typ == DELETE_TXN) && entry.fileName == "" { 699 vs := vector.MustFixedCol[types.Rowid](entry.bat.GetVector(0)) 700 for _, v := range vs { 701 id, offset := v.Decode() 702 if id == info.BlockID { 703 r.buffer = append(r.buffer, int64(offset)) 704 } 705 } 706 } 707 }) 708 //load deletes from txn.deletedBlocks. 709 txn := r.table.getTxn() 710 txn.deletedBlocks.getDeletedOffsetsByBlock(&info.BlockID, &r.buffer) 711 return nil 712 } 713 714 func (r *blockMergeReader) Read( 715 ctx context.Context, 716 cols []string, 717 expr *plan.Expr, 718 mp *mpool.MPool, 719 vp engine.VectorPool, 720 ) (*batch.Batch, error) { 721 start := time.Now() 722 defer func() { 723 v2.TxnBlockMergeReaderDurationHistogram.Observe(time.Since(start).Seconds()) 724 }() 725 726 //prefetch deletes for r.blks 727 if err := r.prefetchDeletes(); err != nil { 728 return nil, err 729 } 730 //load deletes for the specified block 731 if err := r.loadDeletes(ctx, cols); err != nil { 732 return nil, err 733 } 734 return r.blockReader.Read(ctx, cols, expr, mp, vp) 735 } 736 737 // ----------------------------------------------------------------- 738 // ------------------------ mergeReader ---------------------------- 739 // ----------------------------------------------------------------- 740 741 func NewMergeReader(readers []engine.Reader) *mergeReader { 742 return &mergeReader{ 743 rds: readers, 744 } 745 } 746 747 func (r *mergeReader) SetFilterZM(zm objectio.ZoneMap) { 748 for i := range r.rds { 749 r.rds[i].SetFilterZM(zm) 750 } 751 } 752 753 func (r *mergeReader) GetOrderBy() []*plan.OrderBySpec { 754 for i := range r.rds { 755 if r.rds[i].GetOrderBy() != nil { 756 return r.rds[i].GetOrderBy() 757 } 758 } 759 return nil 760 } 761 762 func (r *mergeReader) SetOrderBy(orderby []*plan.OrderBySpec) { 763 for i := range r.rds { 764 r.rds[i].SetOrderBy(orderby) 765 } 766 } 767 768 func (r *mergeReader) Close() error { 769 return nil 770 } 771 772 func (r *mergeReader) Read( 773 ctx context.Context, 774 cols []string, 775 expr *plan.Expr, 776 mp *mpool.MPool, 777 vp engine.VectorPool, 778 ) (*batch.Batch, error) { 779 start := time.Now() 780 defer func() { 781 v2.TxnMergeReaderDurationHistogram.Observe(time.Since(start).Seconds()) 782 }() 783 784 if len(r.rds) == 0 { 785 return nil, nil 786 } 787 for len(r.rds) > 0 { 788 bat, err := r.rds[0].Read(ctx, cols, expr, mp, vp) 789 if err != nil { 790 for _, rd := range r.rds { 791 rd.Close() 792 } 793 return nil, err 794 } 795 if bat == nil { 796 r.rds = r.rds[1:] 797 } 798 if bat != nil { 799 if logutil.GetSkip1Logger().Core().Enabled(zap.DebugLevel) { 800 logutil.Debug(testutil.OperatorCatchBatch("merge reader", bat)) 801 } 802 return bat, nil 803 } 804 } 805 return nil, nil 806 }