github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/tae/tables/ablk.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tables 16 17 import ( 18 "bytes" 19 "time" 20 21 "sync/atomic" 22 23 "github.com/RoaringBitmap/roaring" 24 "github.com/matrixorigin/matrixone/pkg/common/moerr" 25 "github.com/matrixorigin/matrixone/pkg/container/types" 26 "github.com/matrixorigin/matrixone/pkg/logutil" 27 "github.com/matrixorigin/matrixone/pkg/objectio" 28 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/buffer/base" 29 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog" 30 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 31 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/compute" 32 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers" 33 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/data" 34 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/handle" 35 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/iface/txnif" 36 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/model" 37 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/tables/updates" 38 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/tasks" 39 ) 40 41 type ablock struct { 42 *baseBlock 43 frozen atomic.Bool 44 } 45 46 func newABlock( 47 meta *catalog.BlockEntry, 48 fs *objectio.ObjectFS, 49 bufMgr base.INodeManager, 50 scheduler tasks.TaskScheduler) *ablock { 51 blk := &ablock{} 52 blk.baseBlock = newBaseBlock(blk, meta, bufMgr, fs, scheduler) 53 blk.mvcc.SetAppendListener(blk.OnApplyAppend) 54 blk.mvcc.SetDeletesListener(blk.OnApplyDelete) 55 if blk.meta.HasDropCommitted() { 56 pnode := newPersistedNode(blk.baseBlock) 57 node := NewNode(pnode) 58 node.Ref() 59 blk.node.Store(node) 60 } else { 61 mnode := newMemoryNode(blk.baseBlock) 62 node := NewNode(mnode) 63 node.Ref() 64 blk.node.Store(node) 65 } 66 return blk 67 } 68 69 func (blk *ablock) OnApplyAppend(n txnif.AppendNode) (err error) { 70 blk.meta.GetSegment().GetTable().AddRows(uint64(n.GetMaxRow() - 71 n.GetStartRow())) 72 return 73 } 74 75 func (blk *ablock) OnApplyDelete( 76 deleted uint64, 77 gen common.RowGen, 78 ts types.TS) (err error) { 79 blk.meta.GetSegment().GetTable().RemoveRows(deleted) 80 return 81 } 82 83 func (blk *ablock) FreezeAppend() { 84 blk.frozen.Store(true) 85 } 86 87 func (blk *ablock) IsAppendFrozen() bool { 88 return blk.frozen.Load() 89 } 90 91 func (blk *ablock) IsAppendable() bool { 92 if blk.IsAppendFrozen() { 93 return false 94 } 95 node := blk.PinNode() 96 defer node.Unref() 97 if node.IsPersisted() { 98 return false 99 } 100 return node.Rows() < blk.meta.GetSchema().BlockMaxRows 101 } 102 103 func (blk *ablock) PrepareCompact() bool { 104 if blk.RefCount() > 0 { 105 return false 106 } 107 blk.FreezeAppend() 108 if !blk.meta.PrepareCompact() { 109 return false 110 } 111 return blk.RefCount() == 0 112 } 113 114 func (blk *ablock) Pin() *common.PinnedItem[*ablock] { 115 blk.Ref() 116 return &common.PinnedItem[*ablock]{ 117 Val: blk, 118 } 119 } 120 121 func (blk *ablock) GetColumnDataByNames( 122 txn txnif.AsyncTxn, 123 attrs []string, 124 buffers []*bytes.Buffer) (view *model.BlockView, err error) { 125 colIdxes := make([]int, len(attrs)) 126 for i, attr := range attrs { 127 colIdxes[i] = blk.meta.GetSchema().GetColIdx(attr) 128 } 129 return blk.GetColumnDataByIds(txn, colIdxes, buffers) 130 } 131 132 func (blk *ablock) GetColumnDataByName( 133 txn txnif.AsyncTxn, 134 attr string, 135 buffer *bytes.Buffer) (view *model.ColumnView, err error) { 136 colIdx := blk.meta.GetSchema().GetColIdx(attr) 137 return blk.GetColumnDataById(txn, colIdx, buffer) 138 } 139 140 func (blk *ablock) GetColumnDataByIds( 141 txn txnif.AsyncTxn, 142 colIdxes []int, 143 buffers []*bytes.Buffer) (view *model.BlockView, err error) { 144 return blk.resolveColumnDatas( 145 txn.GetStartTS(), 146 colIdxes, 147 buffers, 148 false) 149 } 150 151 func (blk *ablock) GetColumnDataById( 152 txn txnif.AsyncTxn, 153 colIdx int, 154 buffer *bytes.Buffer) (view *model.ColumnView, err error) { 155 return blk.resolveColumnData( 156 txn.GetStartTS(), 157 colIdx, 158 buffer, 159 false) 160 } 161 162 func (blk *ablock) resolveColumnDatas( 163 ts types.TS, 164 colIdxes []int, 165 buffers []*bytes.Buffer, 166 skipDeletes bool) (view *model.BlockView, err error) { 167 node := blk.PinNode() 168 defer node.Unref() 169 170 if !node.IsPersisted() { 171 return blk.resolveInMemoryColumnDatas( 172 node.MustMNode(), 173 ts, 174 colIdxes, 175 buffers, 176 skipDeletes) 177 } else { 178 return blk.ResolvePersistedColumnDatas( 179 node.MustPNode(), 180 ts, 181 colIdxes, 182 buffers, 183 skipDeletes, 184 ) 185 } 186 } 187 188 func (blk *ablock) resolveColumnData( 189 ts types.TS, 190 colIdx int, 191 buffer *bytes.Buffer, 192 skipDeletes bool) (view *model.ColumnView, err error) { 193 node := blk.PinNode() 194 defer node.Unref() 195 196 if !node.IsPersisted() { 197 return blk.resolveInMemoryColumnData( 198 node.MustMNode(), 199 ts, 200 colIdx, 201 buffer, 202 skipDeletes) 203 } else { 204 return blk.ResolvePersistedColumnData( 205 node.MustPNode(), 206 ts, 207 colIdx, 208 buffer, 209 skipDeletes, 210 ) 211 } 212 } 213 214 // Note: With PinNode Context 215 func (blk *ablock) resolveInMemoryColumnDatas( 216 mnode *memoryNode, 217 ts types.TS, 218 colIdxes []int, 219 buffers []*bytes.Buffer, 220 skipDeletes bool) (view *model.BlockView, err error) { 221 blk.RLock() 222 defer blk.RUnlock() 223 maxRow, visible, deSels, err := blk.mvcc.GetVisibleRowLocked(ts) 224 if !visible || err != nil { 225 // blk.RUnlock() 226 return 227 } 228 229 data, err := mnode.GetDataWindow(0, maxRow) 230 if err != nil { 231 return 232 } 233 view = model.NewBlockView(ts) 234 for _, colIdx := range colIdxes { 235 view.SetData(colIdx, data.Vecs[colIdx]) 236 } 237 if skipDeletes { 238 // blk.RUnlock() 239 return 240 } 241 242 err = blk.FillInMemoryDeletesLocked(view.BaseView, blk.RWMutex) 243 // blk.RUnlock() 244 if err != nil { 245 return 246 } 247 if deSels != nil && !deSels.IsEmpty() { 248 if view.DeleteMask != nil { 249 view.DeleteMask.Or(deSels) 250 } else { 251 view.DeleteMask = deSels 252 } 253 } 254 return 255 } 256 257 // Note: With PinNode Context 258 func (blk *ablock) resolveInMemoryColumnData( 259 mnode *memoryNode, 260 ts types.TS, 261 colIdx int, 262 buffer *bytes.Buffer, 263 skipDeletes bool) (view *model.ColumnView, err error) { 264 blk.RLock() 265 defer blk.RUnlock() 266 maxRow, visible, deSels, err := blk.mvcc.GetVisibleRowLocked(ts) 267 if !visible || err != nil { 268 // blk.RUnlock() 269 return 270 } 271 272 view = model.NewColumnView(ts, colIdx) 273 var data containers.Vector 274 data, err = mnode.GetColumnDataWindow( 275 0, 276 maxRow, 277 colIdx, 278 buffer) 279 if err != nil { 280 // blk.RUnlock() 281 return 282 } 283 view.SetData(data) 284 if skipDeletes { 285 // blk.RUnlock() 286 return 287 } 288 289 err = blk.FillInMemoryDeletesLocked(view.BaseView, blk.RWMutex) 290 // blk.RUnlock() 291 if err != nil { 292 return 293 } 294 if deSels != nil && !deSels.IsEmpty() { 295 if view.DeleteMask != nil { 296 view.DeleteMask.Or(deSels) 297 } else { 298 view.DeleteMask = deSels 299 } 300 } 301 302 return 303 } 304 305 func (blk *ablock) GetValue( 306 txn txnif.AsyncTxn, 307 row, col int) (v any, err error) { 308 ts := txn.GetStartTS() 309 node := blk.PinNode() 310 defer node.Unref() 311 if !node.IsPersisted() { 312 return blk.getInMemoryValue(node.MustMNode(), ts, row, col) 313 } else { 314 return blk.getPersistedValue( 315 node.MustPNode(), 316 ts, 317 row, 318 col, 319 true) 320 } 321 } 322 323 // With PinNode Context 324 func (blk *ablock) getInMemoryValue( 325 mnode *memoryNode, 326 ts types.TS, 327 row, col int) (v any, err error) { 328 blk.RLock() 329 deleted, err := blk.mvcc.IsDeletedLocked(uint32(row), ts, blk.RWMutex) 330 blk.RUnlock() 331 if err != nil { 332 return 333 } 334 if deleted { 335 err = moerr.NewNotFoundNoCtx() 336 return 337 } 338 view, err := blk.resolveInMemoryColumnData(mnode, ts, col, nil, true) 339 if err != nil { 340 return 341 } 342 defer view.Close() 343 v = view.GetValue(row) 344 return 345 } 346 347 func (blk *ablock) GetByFilter( 348 txn txnif.AsyncTxn, 349 filter *handle.Filter) (offset uint32, err error) { 350 if filter.Op != handle.FilterEq { 351 panic("logic error") 352 } 353 if blk.meta.GetSchema().SortKey == nil { 354 _, _, offset = model.DecodePhyAddrKeyFromValue(filter.Val) 355 return 356 } 357 ts := txn.GetStartTS() 358 359 node := blk.PinNode() 360 defer node.Unref() 361 if !node.IsPersisted() { 362 return blk.getInMemoryRowByFilter(node.MustMNode(), ts, filter) 363 } else { 364 return blk.getPersistedRowByFilter(node.MustPNode(), ts, filter) 365 } 366 } 367 368 func (blk *ablock) getPersistedRowByFilter( 369 pnode *persistedNode, 370 ts types.TS, 371 filter *handle.Filter) (row uint32, err error) { 372 ok, err := pnode.ContainsKey(filter.Val) 373 if err != nil { 374 return 375 } 376 if !ok { 377 err = moerr.NewNotFoundNoCtx() 378 return 379 } 380 sortKey, err := blk.LoadPersistedColumnData( 381 blk.meta.GetSchema().GetSingleSortKeyIdx(), 382 nil, 383 ) 384 if err != nil { 385 return 386 } 387 defer sortKey.Close() 388 rows := make([]uint32, 0) 389 err = sortKey.Foreach(func(v any, offset int) error { 390 if compute.CompareGeneric(v, filter.Val, sortKey.GetType()) == 0 { 391 row := uint32(offset) 392 rows = append(rows, row) 393 return nil 394 } 395 return nil 396 }, nil) 397 if err != nil && !moerr.IsMoErrCode(err, moerr.OkExpectedDup) { 398 return 399 } 400 if len(rows) == 0 { 401 err = moerr.NewNotFoundNoCtx() 402 return 403 } 404 405 // Load persisted commit ts 406 commitTSVec, err := blk.LoadPersistedCommitTS() 407 if err != nil { 408 return 409 } 410 defer commitTSVec.Close() 411 412 // Load persisted deletes 413 view := model.NewColumnView(ts, 0) 414 if err = blk.FillPersistedDeletes(view.BaseView); err != nil { 415 return 416 } 417 418 exist := false 419 var deleted bool 420 for _, offset := range rows { 421 commitTS := commitTSVec.Get(int(offset)).(types.TS) 422 if commitTS.Greater(ts) { 423 break 424 } 425 deleted = view.IsDeleted(int(offset)) 426 if !deleted { 427 exist = true 428 row = offset 429 break 430 } 431 } 432 if !exist { 433 err = moerr.NewNotFoundNoCtx() 434 } 435 return 436 } 437 438 // With PinNode Context 439 func (blk *ablock) getInMemoryRowByFilter( 440 mnode *memoryNode, 441 ts types.TS, 442 filter *handle.Filter) (row uint32, err error) { 443 blk.RLock() 444 defer blk.RUnlock() 445 rows, err := mnode.GetRowsByKey(filter.Val) 446 if err != nil && !moerr.IsMoErrCode(err, moerr.ErrNotFound) { 447 return 448 } 449 450 waitFn := func(n *updates.AppendNode) { 451 txn := n.Txn 452 if txn != nil { 453 blk.RUnlock() 454 txn.GetTxnState(true) 455 blk.RLock() 456 } 457 } 458 if anyWaitable := blk.mvcc.CollectUncommittedANodesPreparedBefore( 459 ts, 460 waitFn); anyWaitable { 461 rows, err = mnode.GetRowsByKey(filter.Val) 462 if err != nil { 463 return 464 } 465 } 466 467 for i := len(rows) - 1; i >= 0; i-- { 468 row = rows[i] 469 appendnode := blk.mvcc.GetAppendNodeByRow(row) 470 needWait, txn := appendnode.NeedWaitCommitting(ts) 471 if needWait { 472 blk.RUnlock() 473 txn.GetTxnState(true) 474 blk.RLock() 475 } 476 if appendnode.IsAborted() || !appendnode.IsVisible(ts) { 477 continue 478 } 479 var deleted bool 480 deleted, err = blk.mvcc.IsDeletedLocked(row, ts, blk.mvcc.RWMutex) 481 if err != nil { 482 return 483 } 484 if !deleted { 485 return 486 } 487 } 488 return 0, moerr.NewNotFoundNoCtx() 489 } 490 491 func (blk *ablock) checkConflictAndDupClosure( 492 dedupTS types.TS, 493 conflictTS types.TS, 494 dupRow *uint32, 495 rowmask *roaring.Bitmap) func(row uint32) error { 496 return func(row uint32) (err error) { 497 if rowmask != nil && rowmask.Contains(row) { 498 return nil 499 } 500 appendnode := blk.mvcc.GetAppendNodeByRow(row) 501 needWait, txn := appendnode.NeedWaitCommitting(dedupTS) 502 if needWait { 503 blk.mvcc.RUnlock() 504 txn.GetTxnState(true) 505 blk.mvcc.RLock() 506 } 507 if err = appendnode.CheckConflict(conflictTS); err != nil { 508 return 509 } 510 if appendnode.IsAborted() || !appendnode.IsVisible(dedupTS) { 511 return nil 512 } 513 deleteNode := blk.mvcc.GetDeleteNodeByRow(row) 514 if deleteNode == nil { 515 *dupRow = row 516 return moerr.GetOkExpectedDup() 517 } 518 needWait, txn = deleteNode.NeedWaitCommitting(dedupTS) 519 if needWait { 520 blk.mvcc.RUnlock() 521 txn.GetTxnState(true) 522 blk.mvcc.RLock() 523 } 524 if err = deleteNode.CheckConflict(conflictTS); err != nil { 525 return 526 } 527 if deleteNode.IsAborted() || !deleteNode.IsVisible(dedupTS) { 528 return moerr.GetOkExpectedDup() 529 } 530 return nil 531 } 532 } 533 534 func (blk *ablock) inMemoryBatchDedup( 535 mnode *memoryNode, 536 dedupTS types.TS, 537 conflictTS types.TS, 538 keys containers.Vector, 539 rowmask *roaring.Bitmap) (err error) { 540 var dupRow uint32 541 blk.RLock() 542 defer blk.RUnlock() 543 _, err = mnode.BatchDedup( 544 keys, 545 blk.checkConflictAndDupClosure(dedupTS, conflictTS, &dupRow, rowmask)) 546 547 // definitely no duplicate 548 if err == nil || !moerr.IsMoErrCode(err, moerr.OkExpectedDup) { 549 return 550 } 551 552 def := blk.meta.GetSchema().GetSingleSortKey() 553 v := mnode.GetValueByRow(int(dupRow), def.Idx) 554 entry := common.TypeStringValue(keys.GetType(), v) 555 return moerr.NewDuplicateEntryNoCtx(entry, def.Name) 556 } 557 558 func (blk *ablock) dedupClosure( 559 vec containers.Vector, 560 ts types.TS, 561 mask *roaring.Bitmap, 562 def *catalog.ColDef) func(any, int) error { 563 return func(v1 any, _ int) (err error) { 564 return vec.Foreach(func(v2 any, row int) error { 565 if mask != nil && mask.ContainsInt(row) { 566 return nil 567 } 568 if compute.CompareGeneric(v1, v2, vec.GetType()) == 0 { 569 commitTSVec, err := blk.LoadPersistedCommitTS() 570 if err != nil { 571 return err 572 } 573 defer commitTSVec.Close() 574 commiTs := commitTSVec.Get(row).(types.TS) 575 if commiTs.Greater(ts) { 576 return txnif.ErrTxnWWConflict 577 } 578 entry := common.TypeStringValue(vec.GetType(), v1) 579 return moerr.NewDuplicateEntryNoCtx(entry, def.Name) 580 } 581 return nil 582 }, nil) 583 } 584 } 585 586 func (blk *ablock) BatchDedup( 587 txn txnif.AsyncTxn, 588 keys containers.Vector, 589 rowmask *roaring.Bitmap, 590 precommit bool) (err error) { 591 defer func() { 592 if moerr.IsMoErrCode(err, moerr.ErrDuplicateEntry) { 593 logutil.Infof("BatchDedup BLK-%d: %v", blk.meta.ID, err) 594 } 595 }() 596 dedupTS := txn.GetStartTS() 597 if precommit { 598 dedupTS = txn.GetPrepareTS() 599 } 600 node := blk.PinNode() 601 defer node.Unref() 602 if !node.IsPersisted() { 603 return blk.inMemoryBatchDedup(node.MustMNode(), dedupTS, txn.GetStartTS(), keys, rowmask) 604 } else { 605 return blk.PersistedBatchDedup( 606 node.MustPNode(), 607 dedupTS, 608 keys, 609 rowmask, 610 blk.dedupClosure) 611 } 612 } 613 614 func (blk *ablock) persistedCollectAppendInRange( 615 pnode *persistedNode, 616 start, end types.TS, 617 withAborted bool) (bat *containers.Batch, err error) { 618 // FIXME: we'll gc mvcc after being persisted. refactor it later 619 blk.RLock() 620 minRow, maxRow, commitTSVec, abortVec, abortedMap := 621 blk.mvcc.CollectAppendLocked(start, end) 622 blk.RUnlock() 623 if bat, err = pnode.GetDataWindow(minRow, maxRow); err != nil { 624 return 625 } 626 bat.AddVector(catalog.AttrCommitTs, commitTSVec) 627 if withAborted { 628 bat.AddVector(catalog.AttrAborted, abortVec) 629 } else { 630 bat.Deletes = abortedMap 631 bat.Compact() 632 } 633 return 634 } 635 636 func (blk *ablock) inMemoryCollectAppendInRange( 637 mnode *memoryNode, 638 start, end types.TS, 639 withAborted bool) (bat *containers.Batch, err error) { 640 blk.RLock() 641 minRow, maxRow, commitTSVec, abortVec, abortedMap := 642 blk.mvcc.CollectAppendLocked(start, end) 643 if bat, err = mnode.GetDataWindow(minRow, maxRow); err != nil { 644 blk.RUnlock() 645 return 646 } 647 blk.RUnlock() 648 bat.AddVector(catalog.AttrCommitTs, commitTSVec) 649 if withAborted { 650 bat.AddVector(catalog.AttrAborted, abortVec) 651 } else { 652 bat.Deletes = abortedMap 653 bat.Compact() 654 } 655 return 656 } 657 658 func (blk *ablock) CollectAppendInRange( 659 start, end types.TS, 660 withAborted bool) (*containers.Batch, error) { 661 node := blk.PinNode() 662 defer node.Unref() 663 if !node.IsPersisted() { 664 return blk.inMemoryCollectAppendInRange( 665 node.MustMNode(), 666 start, 667 end, 668 withAborted) 669 } else { 670 return blk.persistedCollectAppendInRange( 671 node.MustPNode(), 672 start, 673 end, 674 withAborted) 675 } 676 } 677 678 func (blk *ablock) estimateRawScore() (score int, dropped bool) { 679 if blk.meta.HasDropCommitted() { 680 dropped = true 681 return 682 } 683 blk.meta.RLock() 684 atLeastOneCommitted := blk.meta.HasCommittedNode() 685 blk.meta.RUnlock() 686 if !atLeastOneCommitted { 687 score = 1 688 return 689 } 690 691 rows := blk.Rows() 692 if rows == int(blk.meta.GetSchema().BlockMaxRows) { 693 score = 100 694 return 695 } 696 697 if blk.mvcc.GetChangeNodeCnt() == 0 && rows == 0 { 698 score = 0 699 } else { 700 score = 1 701 } 702 703 if score > 0 { 704 if _, terminated := blk.meta.GetTerminationTS(); terminated { 705 score = 100 706 } 707 } 708 return 709 } 710 711 func (blk *ablock) RunCalibration() (score int) { 712 score, _ = blk.estimateRawScore() 713 return 714 } 715 716 func (blk *ablock) EstimateScore(ttl time.Duration, force bool) int { 717 return blk.adjustScore(blk.estimateRawScore, ttl, force) 718 } 719 720 func (blk *ablock) OnReplayAppend(node txnif.AppendNode) (err error) { 721 an := node.(*updates.AppendNode) 722 blk.mvcc.OnReplayAppendNode(an) 723 return 724 } 725 726 func (blk *ablock) OnReplayAppendPayload(bat *containers.Batch) (err error) { 727 appender, err := blk.MakeAppender() 728 if err != nil { 729 return 730 } 731 _, err = appender.ReplayAppend(bat, nil) 732 return 733 } 734 735 func (blk *ablock) MakeAppender() (appender data.BlockAppender, err error) { 736 if blk == nil { 737 err = moerr.GetOkExpectedEOB() 738 return 739 } 740 appender = newAppender(blk) 741 return 742 } 743 744 func (blk *ablock) Init() (err error) { return }