github.com/matrixorigin/matrixone@v0.7.0/pkg/txn/storage/memorystorage/memtable/table.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package memtable
    16  
    17  import (
    18  	"database/sql"
    19  	"errors"
    20  	"fmt"
    21  	"io"
    22  	"sync"
    23  	"sync/atomic"
    24  	"time"
    25  
    26  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    27  	"github.com/matrixorigin/matrixone/pkg/txn/storage/memorystorage/memorytable"
    28  	"github.com/tidwall/btree"
    29  )
    30  
    31  type Table[
    32  	K memorytable.Ordered[K],
    33  	V any,
    34  	R Row[K, V],
    35  ] struct {
    36  	sync.Mutex
    37  	state atomic.Pointer[tableState[K, V]]
    38  }
    39  
    40  type tableState[
    41  	K memorytable.Ordered[K],
    42  	V any,
    43  ] struct {
    44  	rows                 *btree.BTreeG[*PhysicalRow[K, V]]
    45  	indexes              *btree.BTreeG[*IndexEntry[K, V]]
    46  	reverseIndexes       *btree.BTreeG[*ReverseIndexEntry[K, V]]
    47  	writes               *btree.BTreeG[*WriteEntry[K, V]]
    48  	uniqueIndexes        *btree.BTreeG[*IndexEntry[K, V]]
    49  	reverseUniqueIndexes *btree.BTreeG[*ReverseIndexEntry[K, V]]
    50  }
    51  
    52  func (t *tableState[K, V]) Copy() *tableState[K, V] {
    53  	return &tableState[K, V]{
    54  		rows:                 t.rows.Copy(),
    55  		indexes:              t.indexes.Copy(),
    56  		reverseIndexes:       t.reverseIndexes.Copy(),
    57  		writes:               t.writes.Copy(),
    58  		uniqueIndexes:        t.uniqueIndexes.Copy(),
    59  		reverseUniqueIndexes: t.reverseUniqueIndexes.Copy(),
    60  	}
    61  }
    62  
    63  type Row[K any, V any] interface {
    64  	Key() K
    65  	Value() V
    66  	Indexes() []Tuple
    67  	UniqueIndexes() []Tuple
    68  }
    69  
    70  type IndexEntry[
    71  	K memorytable.Ordered[K],
    72  	V any,
    73  ] struct {
    74  	Index     Tuple
    75  	Key       K
    76  	VersionID int64
    77  }
    78  
    79  type ReverseIndexEntry[
    80  	K memorytable.Ordered[K],
    81  	V any,
    82  ] struct {
    83  	Key       K
    84  	VersionID int64
    85  	Index     Tuple
    86  }
    87  
    88  type WriteEntry[
    89  	K memorytable.Ordered[K],
    90  	V any,
    91  ] struct {
    92  	Transaction *Transaction
    93  	Key         *K
    94  
    95  	VersionID int64
    96  }
    97  
    98  func NewTable[
    99  	K memorytable.Ordered[K],
   100  	V any,
   101  	R Row[K, V],
   102  ]() *Table[K, V, R] {
   103  	ret := &Table[K, V, R]{}
   104  	state := &tableState[K, V]{
   105  		rows:                 btree.NewBTreeG(comparePhysicalRow[K, V]),
   106  		indexes:              btree.NewBTreeG(compareIndexEntry[K, V]),
   107  		reverseIndexes:       btree.NewBTreeG(compareReverseIndexEntry[K, V]),
   108  		writes:               btree.NewBTreeG(compareWriteEntry[K, V]),
   109  		uniqueIndexes:        btree.NewBTreeG(compareIndexEntry[K, V]),
   110  		reverseUniqueIndexes: btree.NewBTreeG(compareReverseIndexEntry[K, V]),
   111  	}
   112  	ret.state.Store(state)
   113  	return ret
   114  }
   115  
   116  func comparePhysicalRow[
   117  	K memorytable.Ordered[K],
   118  	V any,
   119  ](a, b *PhysicalRow[K, V]) bool {
   120  	return a.Key.Less(b.Key)
   121  }
   122  
   123  func compareIndexEntry[
   124  	K memorytable.Ordered[K],
   125  	V any,
   126  ](a, b *IndexEntry[K, V]) bool {
   127  	if a.Index.Less(b.Index) {
   128  		return true
   129  	}
   130  	if b.Index.Less(a.Index) {
   131  		return false
   132  	}
   133  	if a.Key.Less(b.Key) {
   134  		return true
   135  	}
   136  	if b.Key.Less(a.Key) {
   137  		return false
   138  	}
   139  	return a.VersionID < b.VersionID
   140  }
   141  
   142  func compareReverseIndexEntry[
   143  	K memorytable.Ordered[K],
   144  	V any,
   145  ](a, b *ReverseIndexEntry[K, V]) bool {
   146  	if a.Key.Less(b.Key) {
   147  		return true
   148  	}
   149  	if b.Key.Less(a.Key) {
   150  		return false
   151  	}
   152  	if a.VersionID < b.VersionID {
   153  		return true
   154  	}
   155  	if b.VersionID < a.VersionID {
   156  		return false
   157  	}
   158  	return a.Index.Less(b.Index)
   159  }
   160  
   161  func compareWriteEntry[
   162  	K memorytable.Ordered[K],
   163  	V any,
   164  ](a, b *WriteEntry[K, V]) bool {
   165  	if a.Transaction.ID < b.Transaction.ID {
   166  		return true
   167  	}
   168  	if a.Transaction.ID > b.Transaction.ID {
   169  		return false
   170  	}
   171  	if a.Key != nil && b.Key != nil {
   172  		if (*a.Key).Less(*b.Key) {
   173  			return true
   174  		}
   175  		if (*b.Key).Less(*a.Key) {
   176  			return false
   177  		}
   178  	}
   179  	return a.Key == nil && b.Key != nil
   180  }
   181  
   182  func (t *Table[K, V, R]) Insert(
   183  	tx *Transaction,
   184  	row R,
   185  ) error {
   186  	key := row.Key()
   187  
   188  	return t.update(func(state *tableState[K, V]) error {
   189  		physicalRow := getOrSetRowByKey(state.rows, key)
   190  
   191  		if err := validate(physicalRow, tx); err != nil {
   192  			return err
   193  		}
   194  
   195  		for i := len(physicalRow.Versions) - 1; i >= 0; i-- {
   196  			version := physicalRow.Versions[i]
   197  			if version.Visible(tx.Time, tx.ID, tx.IsolationPolicy.Read) {
   198  				return moerr.NewDuplicateNoCtx()
   199  			}
   200  		}
   201  
   202  		value := row.Value()
   203  		physicalRow, version, err := physicalRow.Insert(
   204  			tx.Time, tx, value,
   205  		)
   206  		if err != nil {
   207  			return err
   208  		}
   209  
   210  		// index entry
   211  		if err := setIndexes(tx, t, state, key, version, row); err != nil {
   212  			return err
   213  		}
   214  
   215  		// write entry
   216  		tx.committers[t] = struct{}{}
   217  		state.writes.Set(&WriteEntry[K, V]{
   218  			Transaction: tx,
   219  			Key:         &key,
   220  			VersionID:   version.ID,
   221  		})
   222  
   223  		// row entry
   224  		state.rows.Set(physicalRow)
   225  
   226  		tx.Time.Tick()
   227  		return nil
   228  	})
   229  
   230  }
   231  
   232  func (t *Table[K, V, R]) Update(
   233  	tx *Transaction,
   234  	row R,
   235  ) error {
   236  	key := row.Key()
   237  
   238  	return t.update(func(state *tableState[K, V]) error {
   239  		physicalRow := getOrSetRowByKey(state.rows, key)
   240  
   241  		value := row.Value()
   242  		physicalRow, version, err := physicalRow.Update(
   243  			tx.Time, tx, value,
   244  		)
   245  		if err != nil {
   246  			return err
   247  		}
   248  
   249  		// index entry
   250  		if err := setIndexes(tx, t, state, key, version, row); err != nil {
   251  			return err
   252  		}
   253  
   254  		// write entry
   255  		tx.committers[t] = struct{}{}
   256  		state.writes.Set(&WriteEntry[K, V]{
   257  			Transaction: tx,
   258  			Key:         &key,
   259  			VersionID:   version.ID,
   260  		})
   261  
   262  		// row entry
   263  		state.rows.Set(physicalRow)
   264  
   265  		tx.Time.Tick()
   266  		return nil
   267  	})
   268  }
   269  
   270  func (t *Table[K, V, R]) Delete(
   271  	tx *Transaction,
   272  	key K,
   273  ) error {
   274  
   275  	return t.update(func(state *tableState[K, V]) error {
   276  		physicalRow := getRowByKey(state.rows, key)
   277  		if physicalRow == nil {
   278  			return nil
   279  		}
   280  
   281  		physicalRow, version, err := physicalRow.Delete(tx.Time, tx)
   282  		if err != nil {
   283  			return err
   284  		}
   285  
   286  		// write entry
   287  		tx.committers[t] = struct{}{}
   288  		state.writes.Set(&WriteEntry[K, V]{
   289  			Transaction: tx,
   290  			Key:         &key,
   291  			VersionID:   version.ID,
   292  		})
   293  
   294  		// row entry
   295  		state.rows.Set(physicalRow)
   296  
   297  		tx.Time.Tick()
   298  		return nil
   299  	})
   300  
   301  }
   302  
   303  func (t *Table[K, V, R]) Upsert(
   304  	tx *Transaction,
   305  	row R,
   306  ) error {
   307  	key := row.Key()
   308  
   309  	return t.update(func(state *tableState[K, V]) error {
   310  		physicalRow := getOrSetRowByKey(state.rows, key)
   311  
   312  		value := row.Value()
   313  		updatedPhysicalRow, version, err := physicalRow.Update(
   314  			tx.Time, tx, value,
   315  		)
   316  		if err != nil {
   317  
   318  			if errors.Is(err, sql.ErrNoRows) {
   319  				// insert
   320  				if err := validate(physicalRow, tx); err != nil {
   321  					return err
   322  				}
   323  
   324  				for i := len(physicalRow.Versions) - 1; i >= 0; i-- {
   325  					version := physicalRow.Versions[i]
   326  					if version.Visible(tx.Time, tx.ID, tx.IsolationPolicy.Read) {
   327  						return moerr.NewDuplicateNoCtx()
   328  					}
   329  				}
   330  
   331  				value := row.Value()
   332  				physicalRow, version, err = physicalRow.Insert(
   333  					tx.Time, tx, value,
   334  				)
   335  				if err != nil {
   336  					return err
   337  				}
   338  
   339  			} else {
   340  				return err
   341  			}
   342  		} else {
   343  			physicalRow = updatedPhysicalRow
   344  		}
   345  
   346  		// index entry
   347  		if err := setIndexes(tx, t, state, key, version, row); err != nil {
   348  			return err
   349  		}
   350  
   351  		// write entry
   352  		tx.committers[t] = struct{}{}
   353  		state.writes.Set(&WriteEntry[K, V]{
   354  			Transaction: tx,
   355  			Key:         &key,
   356  			VersionID:   version.ID,
   357  		})
   358  
   359  		// row entry
   360  		state.rows.Set(physicalRow)
   361  
   362  		tx.Time.Tick()
   363  		return nil
   364  	})
   365  }
   366  
   367  func setIndexes[
   368  	K memorytable.Ordered[K],
   369  	V any,
   370  	R Row[K, V],
   371  ](
   372  	tx *Transaction,
   373  	table *Table[K, V, R],
   374  	state *tableState[K, V],
   375  	key K,
   376  	version *Version[V],
   377  	row R,
   378  ) error {
   379  
   380  	// index entries
   381  	for _, index := range row.Indexes() {
   382  		state.indexes.Set(&IndexEntry[K, V]{
   383  			Index:     index,
   384  			Key:       key,
   385  			VersionID: version.ID,
   386  		})
   387  		state.reverseIndexes.Set(&ReverseIndexEntry[K, V]{
   388  			Key:       key,
   389  			VersionID: version.ID,
   390  			Index:     index,
   391  		})
   392  	}
   393  
   394  	// unique index entries
   395  	uniqueIndexes := row.UniqueIndexes()
   396  	for _, index := range uniqueIndexes {
   397  		iter := table.newIndexIter(
   398  			state.uniqueIndexes.Copy().Iter(),
   399  			state.rows,
   400  			tx,
   401  			index,
   402  			append(index, Min),
   403  		)
   404  		for ok := iter.First(); ok; ok = iter.Next() {
   405  			return moerr.NewDuplicateNoCtx()
   406  		}
   407  		state.uniqueIndexes.Set(&IndexEntry[K, V]{
   408  			Index:     index,
   409  			Key:       key,
   410  			VersionID: version.ID,
   411  		})
   412  		state.reverseUniqueIndexes.Set(&ReverseIndexEntry[K, V]{
   413  			Key:       key,
   414  			VersionID: version.ID,
   415  			Index:     index,
   416  		})
   417  	}
   418  
   419  	return nil
   420  }
   421  
   422  func (t *Table[K, V, R]) Get(
   423  	tx *Transaction,
   424  	key K,
   425  ) (
   426  	value V,
   427  	err error,
   428  ) {
   429  	state := t.state.Load()
   430  	physicalRow := getRowByKey(state.rows, key)
   431  	if physicalRow == nil {
   432  		err = sql.ErrNoRows
   433  		return
   434  	}
   435  	value, err = physicalRow.Read(tx.Time, tx)
   436  	if err != nil {
   437  		return
   438  	}
   439  	return
   440  }
   441  
   442  func getRowByKey[
   443  	K memorytable.Ordered[K],
   444  	V any,
   445  ](
   446  	tree *btree.BTreeG[*PhysicalRow[K, V]],
   447  	key K,
   448  ) *PhysicalRow[K, V] {
   449  	pivot := &PhysicalRow[K, V]{
   450  		Key: key,
   451  	}
   452  	row, _ := tree.Get(pivot)
   453  	if row == nil {
   454  		return nil
   455  	}
   456  	return row
   457  }
   458  
   459  func getOrSetRowByKey[
   460  	K memorytable.Ordered[K],
   461  	V any,
   462  ](
   463  	tree *btree.BTreeG[*PhysicalRow[K, V]],
   464  	key K,
   465  ) *PhysicalRow[K, V] {
   466  	pivot := &PhysicalRow[K, V]{
   467  		Key: key,
   468  	}
   469  	if row, _ := tree.Get(pivot); row != nil {
   470  		return row
   471  	}
   472  	pivot.LastUpdate = time.Now()
   473  	tree.Set(pivot)
   474  	return pivot
   475  }
   476  
   477  func (t *Table[K, V, R]) Index(tx *Transaction, index Tuple) (entries []*IndexEntry[K, V], err error) {
   478  	iter := t.NewIndexIter(
   479  		tx,
   480  		index,
   481  		index,
   482  	)
   483  	defer iter.Close()
   484  	for ok := iter.First(); ok; ok = iter.Next() {
   485  		entry := iter.Item()
   486  		entries = append(entries, entry)
   487  	}
   488  	return
   489  }
   490  
   491  func (t *Table[K, V, R]) CommitTx(tx *Transaction) error {
   492  	return t.update(func(state *tableState[K, V]) error {
   493  		iter := state.writes.Copy().Iter()
   494  		defer iter.Release()
   495  		pivot := &WriteEntry[K, V]{
   496  			Transaction: tx,
   497  		}
   498  		for ok := iter.Seek(pivot); ok; ok = iter.Next() {
   499  			entry := iter.Item()
   500  			if entry.Transaction != tx {
   501  				break
   502  			}
   503  
   504  			key := *entry.Key
   505  			physicalRow := getRowByKey(state.rows, key)
   506  			if err := validate(physicalRow, tx); err != nil {
   507  				return err
   508  			}
   509  
   510  			physicalRow = physicalRow.clone()
   511  			for i, version := range physicalRow.Versions {
   512  				if version.ID != entry.VersionID {
   513  					continue
   514  				}
   515  
   516  				// set born time and lock time to commit time
   517  				if version.LockTx == tx {
   518  					version.LockTime = tx.CommitTime
   519  				}
   520  				if version.BornTx == tx {
   521  					version.BornTime = tx.CommitTime
   522  				}
   523  
   524  				// check unique index
   525  				reverseIter := state.reverseUniqueIndexes.Copy().Iter()
   526  				pivot := &ReverseIndexEntry[K, V]{
   527  					Key:       key,
   528  					VersionID: version.ID,
   529  					Index:     Tuple{Min},
   530  				}
   531  				for ok := reverseIter.Seek(pivot); ok; ok = reverseIter.Next() {
   532  					entry := reverseIter.Item()
   533  					if key.Less(entry.Key) {
   534  						break
   535  					}
   536  					if entry.VersionID != version.ID {
   537  						break
   538  					}
   539  					iter := t.newIndexIter(
   540  						state.uniqueIndexes.Copy().Iter(),
   541  						state.rows,
   542  						tx,
   543  						entry.Index,
   544  						append(entry.Index, Min),
   545  					)
   546  					for ok := iter.First(); ok; ok = iter.Next() {
   547  						index := iter.Item()
   548  						if index.Key.Less(entry.Key) ||
   549  							entry.Key.Less(index.Key) ||
   550  							index.VersionID != entry.VersionID {
   551  							return moerr.NewDuplicateNoCtx()
   552  						}
   553  					}
   554  				}
   555  
   556  				physicalRow.Versions[i] = version
   557  			}
   558  			state.rows.Set(physicalRow)
   559  
   560  			// delete write entry
   561  			state.writes.Delete(entry)
   562  
   563  		}
   564  		return nil
   565  	})
   566  
   567  }
   568  
   569  func (t *Table[K, V, R]) FilterVersions(filterFunc func(K, []Version[V]) ([]Version[V], error)) error {
   570  	return t.update(func(state *tableState[K, V]) error {
   571  		rowsIter := state.rows.Copy().Iter()
   572  		defer rowsIter.Release()
   573  		for ok := rowsIter.First(); ok; ok = rowsIter.Next() {
   574  			physicalRow := rowsIter.Item()
   575  			key := physicalRow.Key
   576  
   577  			newVersions, err := filterFunc(key, physicalRow.Versions)
   578  			if err != nil {
   579  				return err
   580  			}
   581  
   582  			if len(newVersions) == 0 {
   583  				// delete
   584  				state.rows.Delete(physicalRow)
   585  			} else {
   586  				// update
   587  				physicalRow = physicalRow.clone()
   588  				physicalRow.Versions = newVersions
   589  				state.rows.Set(physicalRow)
   590  			}
   591  
   592  			newVersionIDSet := make(map[int64]bool)
   593  			for _, v := range newVersions {
   594  				newVersionIDSet[v.ID] = true
   595  			}
   596  
   597  			// remove indexes
   598  			iter := state.reverseIndexes.Copy().Iter()
   599  			for ok := iter.Seek(&ReverseIndexEntry[K, V]{
   600  				Key:       key,
   601  				VersionID: 0,
   602  			}); ok; ok = iter.Next() {
   603  				entry := iter.Item()
   604  				if key.Less(entry.Key) {
   605  					break
   606  				}
   607  				if newVersionIDSet[entry.VersionID] {
   608  					continue
   609  				}
   610  				state.indexes.Delete(&IndexEntry[K, V]{
   611  					Index:     entry.Index,
   612  					Key:       entry.Key,
   613  					VersionID: entry.VersionID,
   614  				})
   615  				state.reverseIndexes.Delete(entry)
   616  			}
   617  
   618  			// remove unique indexes
   619  			iter = state.reverseUniqueIndexes.Copy().Iter()
   620  			for ok := iter.Seek(&ReverseIndexEntry[K, V]{
   621  				Key:       key,
   622  				VersionID: 0,
   623  			}); ok; ok = iter.Next() {
   624  				entry := iter.Item()
   625  				if key.Less(entry.Key) {
   626  					break
   627  				}
   628  				if newVersionIDSet[entry.VersionID] {
   629  					continue
   630  				}
   631  				state.uniqueIndexes.Delete(&IndexEntry[K, V]{
   632  					Index:     entry.Index,
   633  					Key:       entry.Key,
   634  					VersionID: entry.VersionID,
   635  				})
   636  				state.reverseUniqueIndexes.Delete(entry)
   637  			}
   638  		}
   639  
   640  		return nil
   641  	})
   642  }
   643  
   644  func (t *Table[K, V, R]) AbortTx(tx *Transaction) error {
   645  	return t.update(func(state *tableState[K, V]) error {
   646  		iter := state.writes.Copy().Iter()
   647  		defer iter.Release()
   648  		pivot := &WriteEntry[K, V]{
   649  			Transaction: tx,
   650  		}
   651  		for ok := iter.Seek(pivot); ok; ok = iter.Next() {
   652  			entry := iter.Item()
   653  			if entry.Transaction != tx {
   654  				break
   655  			}
   656  			state.writes.Delete(entry)
   657  		}
   658  		return nil
   659  	})
   660  }
   661  
   662  func (t *Table[K, V, R]) update(
   663  	fn func(state *tableState[K, V]) error,
   664  ) error {
   665  	t.Lock()
   666  	defer t.Unlock()
   667  	state := t.state.Load()
   668  	newState := state.Copy()
   669  	if err := fn(newState); err != nil {
   670  		return err
   671  	}
   672  	t.state.Store(newState)
   673  	return nil
   674  }
   675  
   676  func validate[
   677  	K memorytable.Ordered[K],
   678  	V any,
   679  ](
   680  	physicalRow *PhysicalRow[K, V],
   681  	tx *Transaction,
   682  ) error {
   683  
   684  	for i := len(physicalRow.Versions) - 1; i >= 0; i-- {
   685  		version := physicalRow.Versions[i]
   686  
   687  		// locked by another committed tx after tx begin
   688  		if version.LockTx != nil &&
   689  			version.LockTx.State.Load() == Committed &&
   690  			version.LockTx.ID != tx.ID &&
   691  			version.LockTime.After(tx.BeginTime) {
   692  			//err = moerr.NewPrimaryKeyDuplicated(physicalRow.Key)
   693  			return moerr.NewDuplicateNoCtx()
   694  		}
   695  
   696  		// born in another committed tx after tx begin
   697  		if version.BornTx.State.Load() == Committed &&
   698  			version.BornTx.ID != tx.ID &&
   699  			version.BornTime.After(tx.BeginTime) {
   700  			//err = moerr.NewPrimaryKeyDuplicated(physicalRow.Key)
   701  			return moerr.NewDuplicateNoCtx()
   702  		}
   703  
   704  	}
   705  
   706  	return nil
   707  }
   708  
   709  func (t *Table[K, V, R]) Dump(out io.Writer) {
   710  	iter := t.state.Load().rows.Copy().Iter()
   711  	for ok := iter.First(); ok; ok = iter.Next() {
   712  		item := iter.Item()
   713  		fmt.Fprintf(out, "key: %+v\n", item.Key)
   714  		for _, version := range item.Versions {
   715  			fmt.Fprintf(out, "\tversion: %+v\n", version)
   716  		}
   717  	}
   718  }