github.com/matrixorigin/matrixone@v0.7.0/pkg/txn/storage/memorystorage/memtable/table.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package memtable 16 17 import ( 18 "database/sql" 19 "errors" 20 "fmt" 21 "io" 22 "sync" 23 "sync/atomic" 24 "time" 25 26 "github.com/matrixorigin/matrixone/pkg/common/moerr" 27 "github.com/matrixorigin/matrixone/pkg/txn/storage/memorystorage/memorytable" 28 "github.com/tidwall/btree" 29 ) 30 31 type Table[ 32 K memorytable.Ordered[K], 33 V any, 34 R Row[K, V], 35 ] struct { 36 sync.Mutex 37 state atomic.Pointer[tableState[K, V]] 38 } 39 40 type tableState[ 41 K memorytable.Ordered[K], 42 V any, 43 ] struct { 44 rows *btree.BTreeG[*PhysicalRow[K, V]] 45 indexes *btree.BTreeG[*IndexEntry[K, V]] 46 reverseIndexes *btree.BTreeG[*ReverseIndexEntry[K, V]] 47 writes *btree.BTreeG[*WriteEntry[K, V]] 48 uniqueIndexes *btree.BTreeG[*IndexEntry[K, V]] 49 reverseUniqueIndexes *btree.BTreeG[*ReverseIndexEntry[K, V]] 50 } 51 52 func (t *tableState[K, V]) Copy() *tableState[K, V] { 53 return &tableState[K, V]{ 54 rows: t.rows.Copy(), 55 indexes: t.indexes.Copy(), 56 reverseIndexes: t.reverseIndexes.Copy(), 57 writes: t.writes.Copy(), 58 uniqueIndexes: t.uniqueIndexes.Copy(), 59 reverseUniqueIndexes: t.reverseUniqueIndexes.Copy(), 60 } 61 } 62 63 type Row[K any, V any] interface { 64 Key() K 65 Value() V 66 Indexes() []Tuple 67 UniqueIndexes() []Tuple 68 } 69 70 type IndexEntry[ 71 K memorytable.Ordered[K], 72 V any, 73 ] struct { 74 Index Tuple 75 Key K 76 VersionID int64 77 } 78 79 type ReverseIndexEntry[ 80 K memorytable.Ordered[K], 81 V any, 82 ] struct { 83 Key K 84 VersionID int64 85 Index Tuple 86 } 87 88 type WriteEntry[ 89 K memorytable.Ordered[K], 90 V any, 91 ] struct { 92 Transaction *Transaction 93 Key *K 94 95 VersionID int64 96 } 97 98 func NewTable[ 99 K memorytable.Ordered[K], 100 V any, 101 R Row[K, V], 102 ]() *Table[K, V, R] { 103 ret := &Table[K, V, R]{} 104 state := &tableState[K, V]{ 105 rows: btree.NewBTreeG(comparePhysicalRow[K, V]), 106 indexes: btree.NewBTreeG(compareIndexEntry[K, V]), 107 reverseIndexes: btree.NewBTreeG(compareReverseIndexEntry[K, V]), 108 writes: btree.NewBTreeG(compareWriteEntry[K, V]), 109 uniqueIndexes: btree.NewBTreeG(compareIndexEntry[K, V]), 110 reverseUniqueIndexes: btree.NewBTreeG(compareReverseIndexEntry[K, V]), 111 } 112 ret.state.Store(state) 113 return ret 114 } 115 116 func comparePhysicalRow[ 117 K memorytable.Ordered[K], 118 V any, 119 ](a, b *PhysicalRow[K, V]) bool { 120 return a.Key.Less(b.Key) 121 } 122 123 func compareIndexEntry[ 124 K memorytable.Ordered[K], 125 V any, 126 ](a, b *IndexEntry[K, V]) bool { 127 if a.Index.Less(b.Index) { 128 return true 129 } 130 if b.Index.Less(a.Index) { 131 return false 132 } 133 if a.Key.Less(b.Key) { 134 return true 135 } 136 if b.Key.Less(a.Key) { 137 return false 138 } 139 return a.VersionID < b.VersionID 140 } 141 142 func compareReverseIndexEntry[ 143 K memorytable.Ordered[K], 144 V any, 145 ](a, b *ReverseIndexEntry[K, V]) bool { 146 if a.Key.Less(b.Key) { 147 return true 148 } 149 if b.Key.Less(a.Key) { 150 return false 151 } 152 if a.VersionID < b.VersionID { 153 return true 154 } 155 if b.VersionID < a.VersionID { 156 return false 157 } 158 return a.Index.Less(b.Index) 159 } 160 161 func compareWriteEntry[ 162 K memorytable.Ordered[K], 163 V any, 164 ](a, b *WriteEntry[K, V]) bool { 165 if a.Transaction.ID < b.Transaction.ID { 166 return true 167 } 168 if a.Transaction.ID > b.Transaction.ID { 169 return false 170 } 171 if a.Key != nil && b.Key != nil { 172 if (*a.Key).Less(*b.Key) { 173 return true 174 } 175 if (*b.Key).Less(*a.Key) { 176 return false 177 } 178 } 179 return a.Key == nil && b.Key != nil 180 } 181 182 func (t *Table[K, V, R]) Insert( 183 tx *Transaction, 184 row R, 185 ) error { 186 key := row.Key() 187 188 return t.update(func(state *tableState[K, V]) error { 189 physicalRow := getOrSetRowByKey(state.rows, key) 190 191 if err := validate(physicalRow, tx); err != nil { 192 return err 193 } 194 195 for i := len(physicalRow.Versions) - 1; i >= 0; i-- { 196 version := physicalRow.Versions[i] 197 if version.Visible(tx.Time, tx.ID, tx.IsolationPolicy.Read) { 198 return moerr.NewDuplicateNoCtx() 199 } 200 } 201 202 value := row.Value() 203 physicalRow, version, err := physicalRow.Insert( 204 tx.Time, tx, value, 205 ) 206 if err != nil { 207 return err 208 } 209 210 // index entry 211 if err := setIndexes(tx, t, state, key, version, row); err != nil { 212 return err 213 } 214 215 // write entry 216 tx.committers[t] = struct{}{} 217 state.writes.Set(&WriteEntry[K, V]{ 218 Transaction: tx, 219 Key: &key, 220 VersionID: version.ID, 221 }) 222 223 // row entry 224 state.rows.Set(physicalRow) 225 226 tx.Time.Tick() 227 return nil 228 }) 229 230 } 231 232 func (t *Table[K, V, R]) Update( 233 tx *Transaction, 234 row R, 235 ) error { 236 key := row.Key() 237 238 return t.update(func(state *tableState[K, V]) error { 239 physicalRow := getOrSetRowByKey(state.rows, key) 240 241 value := row.Value() 242 physicalRow, version, err := physicalRow.Update( 243 tx.Time, tx, value, 244 ) 245 if err != nil { 246 return err 247 } 248 249 // index entry 250 if err := setIndexes(tx, t, state, key, version, row); err != nil { 251 return err 252 } 253 254 // write entry 255 tx.committers[t] = struct{}{} 256 state.writes.Set(&WriteEntry[K, V]{ 257 Transaction: tx, 258 Key: &key, 259 VersionID: version.ID, 260 }) 261 262 // row entry 263 state.rows.Set(physicalRow) 264 265 tx.Time.Tick() 266 return nil 267 }) 268 } 269 270 func (t *Table[K, V, R]) Delete( 271 tx *Transaction, 272 key K, 273 ) error { 274 275 return t.update(func(state *tableState[K, V]) error { 276 physicalRow := getRowByKey(state.rows, key) 277 if physicalRow == nil { 278 return nil 279 } 280 281 physicalRow, version, err := physicalRow.Delete(tx.Time, tx) 282 if err != nil { 283 return err 284 } 285 286 // write entry 287 tx.committers[t] = struct{}{} 288 state.writes.Set(&WriteEntry[K, V]{ 289 Transaction: tx, 290 Key: &key, 291 VersionID: version.ID, 292 }) 293 294 // row entry 295 state.rows.Set(physicalRow) 296 297 tx.Time.Tick() 298 return nil 299 }) 300 301 } 302 303 func (t *Table[K, V, R]) Upsert( 304 tx *Transaction, 305 row R, 306 ) error { 307 key := row.Key() 308 309 return t.update(func(state *tableState[K, V]) error { 310 physicalRow := getOrSetRowByKey(state.rows, key) 311 312 value := row.Value() 313 updatedPhysicalRow, version, err := physicalRow.Update( 314 tx.Time, tx, value, 315 ) 316 if err != nil { 317 318 if errors.Is(err, sql.ErrNoRows) { 319 // insert 320 if err := validate(physicalRow, tx); err != nil { 321 return err 322 } 323 324 for i := len(physicalRow.Versions) - 1; i >= 0; i-- { 325 version := physicalRow.Versions[i] 326 if version.Visible(tx.Time, tx.ID, tx.IsolationPolicy.Read) { 327 return moerr.NewDuplicateNoCtx() 328 } 329 } 330 331 value := row.Value() 332 physicalRow, version, err = physicalRow.Insert( 333 tx.Time, tx, value, 334 ) 335 if err != nil { 336 return err 337 } 338 339 } else { 340 return err 341 } 342 } else { 343 physicalRow = updatedPhysicalRow 344 } 345 346 // index entry 347 if err := setIndexes(tx, t, state, key, version, row); err != nil { 348 return err 349 } 350 351 // write entry 352 tx.committers[t] = struct{}{} 353 state.writes.Set(&WriteEntry[K, V]{ 354 Transaction: tx, 355 Key: &key, 356 VersionID: version.ID, 357 }) 358 359 // row entry 360 state.rows.Set(physicalRow) 361 362 tx.Time.Tick() 363 return nil 364 }) 365 } 366 367 func setIndexes[ 368 K memorytable.Ordered[K], 369 V any, 370 R Row[K, V], 371 ]( 372 tx *Transaction, 373 table *Table[K, V, R], 374 state *tableState[K, V], 375 key K, 376 version *Version[V], 377 row R, 378 ) error { 379 380 // index entries 381 for _, index := range row.Indexes() { 382 state.indexes.Set(&IndexEntry[K, V]{ 383 Index: index, 384 Key: key, 385 VersionID: version.ID, 386 }) 387 state.reverseIndexes.Set(&ReverseIndexEntry[K, V]{ 388 Key: key, 389 VersionID: version.ID, 390 Index: index, 391 }) 392 } 393 394 // unique index entries 395 uniqueIndexes := row.UniqueIndexes() 396 for _, index := range uniqueIndexes { 397 iter := table.newIndexIter( 398 state.uniqueIndexes.Copy().Iter(), 399 state.rows, 400 tx, 401 index, 402 append(index, Min), 403 ) 404 for ok := iter.First(); ok; ok = iter.Next() { 405 return moerr.NewDuplicateNoCtx() 406 } 407 state.uniqueIndexes.Set(&IndexEntry[K, V]{ 408 Index: index, 409 Key: key, 410 VersionID: version.ID, 411 }) 412 state.reverseUniqueIndexes.Set(&ReverseIndexEntry[K, V]{ 413 Key: key, 414 VersionID: version.ID, 415 Index: index, 416 }) 417 } 418 419 return nil 420 } 421 422 func (t *Table[K, V, R]) Get( 423 tx *Transaction, 424 key K, 425 ) ( 426 value V, 427 err error, 428 ) { 429 state := t.state.Load() 430 physicalRow := getRowByKey(state.rows, key) 431 if physicalRow == nil { 432 err = sql.ErrNoRows 433 return 434 } 435 value, err = physicalRow.Read(tx.Time, tx) 436 if err != nil { 437 return 438 } 439 return 440 } 441 442 func getRowByKey[ 443 K memorytable.Ordered[K], 444 V any, 445 ]( 446 tree *btree.BTreeG[*PhysicalRow[K, V]], 447 key K, 448 ) *PhysicalRow[K, V] { 449 pivot := &PhysicalRow[K, V]{ 450 Key: key, 451 } 452 row, _ := tree.Get(pivot) 453 if row == nil { 454 return nil 455 } 456 return row 457 } 458 459 func getOrSetRowByKey[ 460 K memorytable.Ordered[K], 461 V any, 462 ]( 463 tree *btree.BTreeG[*PhysicalRow[K, V]], 464 key K, 465 ) *PhysicalRow[K, V] { 466 pivot := &PhysicalRow[K, V]{ 467 Key: key, 468 } 469 if row, _ := tree.Get(pivot); row != nil { 470 return row 471 } 472 pivot.LastUpdate = time.Now() 473 tree.Set(pivot) 474 return pivot 475 } 476 477 func (t *Table[K, V, R]) Index(tx *Transaction, index Tuple) (entries []*IndexEntry[K, V], err error) { 478 iter := t.NewIndexIter( 479 tx, 480 index, 481 index, 482 ) 483 defer iter.Close() 484 for ok := iter.First(); ok; ok = iter.Next() { 485 entry := iter.Item() 486 entries = append(entries, entry) 487 } 488 return 489 } 490 491 func (t *Table[K, V, R]) CommitTx(tx *Transaction) error { 492 return t.update(func(state *tableState[K, V]) error { 493 iter := state.writes.Copy().Iter() 494 defer iter.Release() 495 pivot := &WriteEntry[K, V]{ 496 Transaction: tx, 497 } 498 for ok := iter.Seek(pivot); ok; ok = iter.Next() { 499 entry := iter.Item() 500 if entry.Transaction != tx { 501 break 502 } 503 504 key := *entry.Key 505 physicalRow := getRowByKey(state.rows, key) 506 if err := validate(physicalRow, tx); err != nil { 507 return err 508 } 509 510 physicalRow = physicalRow.clone() 511 for i, version := range physicalRow.Versions { 512 if version.ID != entry.VersionID { 513 continue 514 } 515 516 // set born time and lock time to commit time 517 if version.LockTx == tx { 518 version.LockTime = tx.CommitTime 519 } 520 if version.BornTx == tx { 521 version.BornTime = tx.CommitTime 522 } 523 524 // check unique index 525 reverseIter := state.reverseUniqueIndexes.Copy().Iter() 526 pivot := &ReverseIndexEntry[K, V]{ 527 Key: key, 528 VersionID: version.ID, 529 Index: Tuple{Min}, 530 } 531 for ok := reverseIter.Seek(pivot); ok; ok = reverseIter.Next() { 532 entry := reverseIter.Item() 533 if key.Less(entry.Key) { 534 break 535 } 536 if entry.VersionID != version.ID { 537 break 538 } 539 iter := t.newIndexIter( 540 state.uniqueIndexes.Copy().Iter(), 541 state.rows, 542 tx, 543 entry.Index, 544 append(entry.Index, Min), 545 ) 546 for ok := iter.First(); ok; ok = iter.Next() { 547 index := iter.Item() 548 if index.Key.Less(entry.Key) || 549 entry.Key.Less(index.Key) || 550 index.VersionID != entry.VersionID { 551 return moerr.NewDuplicateNoCtx() 552 } 553 } 554 } 555 556 physicalRow.Versions[i] = version 557 } 558 state.rows.Set(physicalRow) 559 560 // delete write entry 561 state.writes.Delete(entry) 562 563 } 564 return nil 565 }) 566 567 } 568 569 func (t *Table[K, V, R]) FilterVersions(filterFunc func(K, []Version[V]) ([]Version[V], error)) error { 570 return t.update(func(state *tableState[K, V]) error { 571 rowsIter := state.rows.Copy().Iter() 572 defer rowsIter.Release() 573 for ok := rowsIter.First(); ok; ok = rowsIter.Next() { 574 physicalRow := rowsIter.Item() 575 key := physicalRow.Key 576 577 newVersions, err := filterFunc(key, physicalRow.Versions) 578 if err != nil { 579 return err 580 } 581 582 if len(newVersions) == 0 { 583 // delete 584 state.rows.Delete(physicalRow) 585 } else { 586 // update 587 physicalRow = physicalRow.clone() 588 physicalRow.Versions = newVersions 589 state.rows.Set(physicalRow) 590 } 591 592 newVersionIDSet := make(map[int64]bool) 593 for _, v := range newVersions { 594 newVersionIDSet[v.ID] = true 595 } 596 597 // remove indexes 598 iter := state.reverseIndexes.Copy().Iter() 599 for ok := iter.Seek(&ReverseIndexEntry[K, V]{ 600 Key: key, 601 VersionID: 0, 602 }); ok; ok = iter.Next() { 603 entry := iter.Item() 604 if key.Less(entry.Key) { 605 break 606 } 607 if newVersionIDSet[entry.VersionID] { 608 continue 609 } 610 state.indexes.Delete(&IndexEntry[K, V]{ 611 Index: entry.Index, 612 Key: entry.Key, 613 VersionID: entry.VersionID, 614 }) 615 state.reverseIndexes.Delete(entry) 616 } 617 618 // remove unique indexes 619 iter = state.reverseUniqueIndexes.Copy().Iter() 620 for ok := iter.Seek(&ReverseIndexEntry[K, V]{ 621 Key: key, 622 VersionID: 0, 623 }); ok; ok = iter.Next() { 624 entry := iter.Item() 625 if key.Less(entry.Key) { 626 break 627 } 628 if newVersionIDSet[entry.VersionID] { 629 continue 630 } 631 state.uniqueIndexes.Delete(&IndexEntry[K, V]{ 632 Index: entry.Index, 633 Key: entry.Key, 634 VersionID: entry.VersionID, 635 }) 636 state.reverseUniqueIndexes.Delete(entry) 637 } 638 } 639 640 return nil 641 }) 642 } 643 644 func (t *Table[K, V, R]) AbortTx(tx *Transaction) error { 645 return t.update(func(state *tableState[K, V]) error { 646 iter := state.writes.Copy().Iter() 647 defer iter.Release() 648 pivot := &WriteEntry[K, V]{ 649 Transaction: tx, 650 } 651 for ok := iter.Seek(pivot); ok; ok = iter.Next() { 652 entry := iter.Item() 653 if entry.Transaction != tx { 654 break 655 } 656 state.writes.Delete(entry) 657 } 658 return nil 659 }) 660 } 661 662 func (t *Table[K, V, R]) update( 663 fn func(state *tableState[K, V]) error, 664 ) error { 665 t.Lock() 666 defer t.Unlock() 667 state := t.state.Load() 668 newState := state.Copy() 669 if err := fn(newState); err != nil { 670 return err 671 } 672 t.state.Store(newState) 673 return nil 674 } 675 676 func validate[ 677 K memorytable.Ordered[K], 678 V any, 679 ]( 680 physicalRow *PhysicalRow[K, V], 681 tx *Transaction, 682 ) error { 683 684 for i := len(physicalRow.Versions) - 1; i >= 0; i-- { 685 version := physicalRow.Versions[i] 686 687 // locked by another committed tx after tx begin 688 if version.LockTx != nil && 689 version.LockTx.State.Load() == Committed && 690 version.LockTx.ID != tx.ID && 691 version.LockTime.After(tx.BeginTime) { 692 //err = moerr.NewPrimaryKeyDuplicated(physicalRow.Key) 693 return moerr.NewDuplicateNoCtx() 694 } 695 696 // born in another committed tx after tx begin 697 if version.BornTx.State.Load() == Committed && 698 version.BornTx.ID != tx.ID && 699 version.BornTime.After(tx.BeginTime) { 700 //err = moerr.NewPrimaryKeyDuplicated(physicalRow.Key) 701 return moerr.NewDuplicateNoCtx() 702 } 703 704 } 705 706 return nil 707 } 708 709 func (t *Table[K, V, R]) Dump(out io.Writer) { 710 iter := t.state.Load().rows.Copy().Iter() 711 for ok := iter.First(); ok; ok = iter.Next() { 712 item := iter.Item() 713 fmt.Fprintf(out, "key: %+v\n", item.Key) 714 for _, version := range item.Versions { 715 fmt.Fprintf(out, "\tversion: %+v\n", version) 716 } 717 } 718 }