github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/disttae/engine.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package disttae
    16  
    17  import (
    18  	"bytes"
    19  	"container/heap"
    20  	"context"
    21  	"math"
    22  	"runtime"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/matrixorigin/matrixone/pkg/catalog"
    27  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    28  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    29  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    30  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    31  	"github.com/matrixorigin/matrixone/pkg/pb/timestamp"
    32  	"github.com/matrixorigin/matrixone/pkg/txn/client"
    33  	"github.com/matrixorigin/matrixone/pkg/txn/storage/memorystorage/memorytable"
    34  	"github.com/matrixorigin/matrixone/pkg/util/errutil"
    35  	"github.com/matrixorigin/matrixone/pkg/vm/engine"
    36  	"github.com/matrixorigin/matrixone/pkg/vm/engine/disttae/cache"
    37  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    38  )
    39  
    40  var _ engine.Engine = new(Engine)
    41  
    42  func New(
    43  	ctx context.Context,
    44  	mp *mpool.MPool,
    45  	fs fileservice.FileService,
    46  	cli client.TxnClient,
    47  	idGen IDGenerator,
    48  	getClusterDetails engine.GetClusterDetailsFunc,
    49  ) *Engine {
    50  	cluster, err := getClusterDetails()
    51  	if err != nil {
    52  		panic(err)
    53  	}
    54  	db := newDB(cluster.DNStores)
    55  	catalogCache := cache.NewCatalog()
    56  	if err := db.init(ctx, mp, catalogCache); err != nil {
    57  		panic(err)
    58  	}
    59  	e := &Engine{
    60  		db:                db,
    61  		mp:                mp,
    62  		fs:                fs,
    63  		cli:               cli,
    64  		idGen:             idGen,
    65  		catalog:           catalogCache,
    66  		txnHeap:           &transactionHeap{},
    67  		getClusterDetails: getClusterDetails,
    68  		txns:              make(map[string]*Transaction),
    69  	}
    70  	go e.gc(ctx)
    71  	return e
    72  }
    73  
    74  func (e *Engine) Create(ctx context.Context, name string, op client.TxnOperator) error {
    75  	txn := e.getTransaction(op)
    76  	if txn == nil {
    77  		return moerr.NewTxnClosedNoCtx(op.Txn().ID)
    78  	}
    79  	sql := getSql(ctx)
    80  	accountId, userId, roleId := getAccessInfo(ctx)
    81  	databaseId, err := txn.allocateID(ctx)
    82  	if err != nil {
    83  		return err
    84  	}
    85  	bat, err := genCreateDatabaseTuple(sql, accountId, userId, roleId,
    86  		name, databaseId, e.mp)
    87  	if err != nil {
    88  		return err
    89  	}
    90  	// non-io operations do not need to pass context
    91  	if err := txn.WriteBatch(INSERT, catalog.MO_CATALOG_ID, catalog.MO_DATABASE_ID,
    92  		catalog.MO_CATALOG, catalog.MO_DATABASE, bat, txn.dnStores[0], -1); err != nil {
    93  		return err
    94  	}
    95  	txn.databaseMap.Store(genDatabaseKey(ctx, name), &database{
    96  		txn:          txn,
    97  		db:           e.db,
    98  		fs:           e.fs,
    99  		databaseId:   databaseId,
   100  		databaseName: name,
   101  	})
   102  	return nil
   103  }
   104  
   105  func (e *Engine) Database(ctx context.Context, name string,
   106  	op client.TxnOperator) (engine.Database, error) {
   107  	txn := e.getTransaction(op)
   108  	if txn == nil {
   109  		return nil, moerr.NewTxnClosedNoCtx(op.Txn().ID)
   110  	}
   111  	if v, ok := txn.databaseMap.Load(genDatabaseKey(ctx, name)); ok {
   112  		return v.(*database), nil
   113  	}
   114  	if name == catalog.MO_CATALOG {
   115  		db := &database{
   116  			txn:          txn,
   117  			db:           e.db,
   118  			fs:           e.fs,
   119  			databaseId:   catalog.MO_CATALOG_ID,
   120  			databaseName: name,
   121  		}
   122  		return db, nil
   123  	}
   124  	key := &cache.DatabaseItem{
   125  		Name:      name,
   126  		AccountId: getAccountId(ctx),
   127  		Ts:        txn.meta.SnapshotTS,
   128  	}
   129  	if ok := e.catalog.GetDatabase(key); !ok {
   130  		return nil, moerr.GetOkExpectedEOB()
   131  	}
   132  	return &database{
   133  		txn:          txn,
   134  		db:           e.db,
   135  		fs:           e.fs,
   136  		databaseName: name,
   137  		databaseId:   key.Id,
   138  	}, nil
   139  }
   140  
   141  func (e *Engine) Databases(ctx context.Context, op client.TxnOperator) ([]string, error) {
   142  	var dbs []string
   143  
   144  	txn := e.getTransaction(op)
   145  	if txn == nil {
   146  		return nil, moerr.NewTxnClosed(ctx, op.Txn().ID)
   147  	}
   148  	accountId := getAccountId(ctx)
   149  	txn.databaseMap.Range(func(k, _ any) bool {
   150  		key := k.(databaseKey)
   151  		if key.accountId == accountId {
   152  			dbs = append(dbs, key.name)
   153  		}
   154  		return true
   155  	})
   156  	dbs = append(dbs, e.catalog.Databases(getAccountId(ctx), txn.meta.SnapshotTS)...)
   157  	return dbs, nil
   158  }
   159  
   160  func (e *Engine) GetNameById(ctx context.Context, op client.TxnOperator, tableId uint64) (dbName string, tblName string, err error) {
   161  	txn := e.getTransaction(op)
   162  	if txn == nil {
   163  		return "", "", moerr.NewTxnClosed(ctx, op.Txn().ID)
   164  	}
   165  	accountId := getAccountId(ctx)
   166  	var db engine.Database
   167  	noRepCtx := errutil.ContextWithNoReport(ctx, true)
   168  	txn.databaseMap.Range(func(k, _ any) bool {
   169  		key := k.(databaseKey)
   170  		dbName = key.name
   171  		if key.accountId == accountId {
   172  			db, err = e.Database(noRepCtx, key.name, op)
   173  			if err != nil {
   174  				return false
   175  			}
   176  			distDb := db.(*database)
   177  			tblName = distDb.getTableNameById(ctx, key.id)
   178  			if tblName != "" {
   179  				return false
   180  			}
   181  		}
   182  		return true
   183  	})
   184  
   185  	if tblName == "" {
   186  		dbNames := e.catalog.Databases(accountId, txn.meta.SnapshotTS)
   187  		for _, dbName := range dbNames {
   188  			db, err = e.Database(noRepCtx, dbName, op)
   189  			if err != nil {
   190  				return "", "", err
   191  			}
   192  			distDb := db.(*database)
   193  			tableName, rel, _ := distDb.getRelationById(noRepCtx, tableId)
   194  			if rel != nil {
   195  				tblName = tableName
   196  				break
   197  			}
   198  		}
   199  	}
   200  
   201  	if tblName == "" {
   202  		return "", "", moerr.NewInternalError(ctx, "can not find table name by id %d", tableId)
   203  	}
   204  
   205  	return
   206  }
   207  
   208  func (e *Engine) GetRelationById(ctx context.Context, op client.TxnOperator, tableId uint64) (dbName, tableName string, rel engine.Relation, err error) {
   209  	txn := e.getTransaction(op)
   210  	if txn == nil {
   211  		return "", "", nil, moerr.NewTxnClosed(ctx, op.Txn().ID)
   212  	}
   213  	accountId := getAccountId(ctx)
   214  	var db engine.Database
   215  	noRepCtx := errutil.ContextWithNoReport(ctx, true)
   216  	txn.databaseMap.Range(func(k, _ any) bool {
   217  		key := k.(databaseKey)
   218  		dbName = key.name
   219  		if key.accountId == accountId {
   220  			db, err = e.Database(noRepCtx, key.name, op)
   221  			if err != nil {
   222  				return false
   223  			}
   224  			distDb := db.(*database)
   225  			tableName, rel, err = distDb.getRelationById(noRepCtx, tableId)
   226  			if rel != nil {
   227  				return false
   228  			}
   229  		}
   230  		return true
   231  	})
   232  
   233  	if rel == nil {
   234  		dbNames := e.catalog.Databases(accountId, txn.meta.SnapshotTS)
   235  		for _, dbName := range dbNames {
   236  			db, err = e.Database(noRepCtx, dbName, op)
   237  			if err != nil {
   238  				return "", "", nil, err
   239  			}
   240  			distDb := db.(*database)
   241  			tableName, rel, err = distDb.getRelationById(noRepCtx, tableId)
   242  			if rel != nil {
   243  				break
   244  			}
   245  		}
   246  	}
   247  
   248  	if rel == nil {
   249  		return "", "", nil, moerr.NewInternalError(ctx, "can not find table by id %d", tableId)
   250  	}
   251  	return
   252  }
   253  
   254  func (e *Engine) Delete(ctx context.Context, name string, op client.TxnOperator) error {
   255  	var db *database
   256  
   257  	txn := e.getTransaction(op)
   258  	if txn == nil {
   259  		return moerr.NewTxnClosedNoCtx(op.Txn().ID)
   260  	}
   261  	key := genDatabaseKey(ctx, name)
   262  	if _, ok := txn.databaseMap.Load(key); ok {
   263  		txn.databaseMap.Delete(key)
   264  		return nil
   265  	} else {
   266  		key := &cache.DatabaseItem{
   267  			Name:      name,
   268  			AccountId: getAccountId(ctx),
   269  			Ts:        txn.meta.SnapshotTS,
   270  		}
   271  		if ok := e.catalog.GetDatabase(key); !ok {
   272  			return moerr.GetOkExpectedEOB()
   273  		}
   274  		db = &database{
   275  			txn:          txn,
   276  			db:           e.db,
   277  			fs:           e.fs,
   278  			databaseName: name,
   279  			databaseId:   key.Id,
   280  		}
   281  	}
   282  	rels, err := db.Relations(ctx)
   283  	if err != nil {
   284  		return err
   285  	}
   286  	for _, relName := range rels {
   287  		if err := db.Delete(ctx, relName); err != nil {
   288  			return err
   289  		}
   290  	}
   291  	bat, err := genDropDatabaseTuple(db.databaseId, name, e.mp)
   292  	if err != nil {
   293  		return err
   294  	}
   295  	// non-io operations do not need to pass context
   296  	if err := txn.WriteBatch(DELETE, catalog.MO_CATALOG_ID, catalog.MO_DATABASE_ID,
   297  		catalog.MO_CATALOG, catalog.MO_DATABASE, bat, txn.dnStores[0], -1); err != nil {
   298  		return err
   299  	}
   300  	return nil
   301  }
   302  
   303  // hasConflict used to detect if a transaction on a cn is in conflict,
   304  // currently an empty implementation, assuming all transactions on a cn are conflict free
   305  func (e *Engine) hasConflict(txn *Transaction) bool {
   306  	return false
   307  }
   308  
   309  // hasDuplicate used to detect if a transaction on a cn has duplicate.
   310  func (e *Engine) hasDuplicate(ctx context.Context, txn *Transaction) bool {
   311  	for i := range txn.writes {
   312  		for _, e := range txn.writes[i] {
   313  			if e.typ == DELETE {
   314  				continue
   315  			}
   316  			if e.bat.Length() == 0 {
   317  				continue
   318  			}
   319  			key := genTableKey(ctx, e.tableName, e.databaseId)
   320  			v, ok := txn.tableMap.Load(key)
   321  			if !ok {
   322  				continue
   323  			}
   324  			tbl := v.(*table)
   325  			if tbl.meta == nil {
   326  				continue
   327  			}
   328  			if tbl.primaryIdx == -1 {
   329  				continue
   330  			}
   331  		}
   332  	}
   333  	return false
   334  }
   335  
   336  func (e *Engine) New(ctx context.Context, op client.TxnOperator) error {
   337  	cluster, err := e.getClusterDetails()
   338  	if err != nil {
   339  		return err
   340  	}
   341  	proc := process.New(
   342  		ctx,
   343  		e.mp,
   344  		e.cli,
   345  		op,
   346  		e.fs,
   347  		e.getClusterDetails,
   348  	)
   349  	workspace := memorytable.NewTable[RowID, *workspaceRow, *workspaceRow]()
   350  	workspace.DisableHistory()
   351  	txn := &Transaction{
   352  		op:          op,
   353  		proc:        proc,
   354  		db:          e.db,
   355  		readOnly:    true,
   356  		meta:        op.Txn(),
   357  		idGen:       e.idGen,
   358  		rowId:       [2]uint64{math.MaxUint64, 0},
   359  		workspace:   workspace,
   360  		dnStores:    cluster.DNStores,
   361  		fileMap:     make(map[string]uint64),
   362  		tableMap:    new(sync.Map),
   363  		databaseMap: new(sync.Map),
   364  		createMap:   new(sync.Map),
   365  		catalog:     e.catalog,
   366  	}
   367  	txn.writes = append(txn.writes, make([]Entry, 0, 1))
   368  	e.newTransaction(op, txn)
   369  	// update catalog's cache
   370  	table := &table{
   371  		db: &database{
   372  			fs: e.fs,
   373  			txn: &Transaction{
   374  				catalog: e.catalog,
   375  			},
   376  			databaseId: catalog.MO_CATALOG_ID,
   377  		},
   378  	}
   379  	table.tableId = catalog.MO_DATABASE_ID
   380  	table.tableName = catalog.MO_DATABASE
   381  	if err := e.db.Update(ctx, txn.dnStores[:1], table, op, catalog.MO_TABLES_REL_ID_IDX,
   382  		catalog.MO_CATALOG_ID, catalog.MO_DATABASE_ID, txn.meta.SnapshotTS); err != nil {
   383  		e.delTransaction(txn)
   384  		return err
   385  	}
   386  	table.tableId = catalog.MO_TABLES_ID
   387  	table.tableName = catalog.MO_TABLES
   388  	if err := e.db.Update(ctx, txn.dnStores[:1], table, op, catalog.MO_TABLES_REL_ID_IDX,
   389  		catalog.MO_CATALOG_ID, catalog.MO_TABLES_ID, txn.meta.SnapshotTS); err != nil {
   390  		e.delTransaction(txn)
   391  		return err
   392  	}
   393  	table.tableId = catalog.MO_COLUMNS_ID
   394  	table.tableName = catalog.MO_COLUMNS
   395  	if err := e.db.Update(ctx, txn.dnStores[:1], table, op, catalog.MO_TABLES_REL_ID_IDX,
   396  		catalog.MO_CATALOG_ID, catalog.MO_COLUMNS_ID, txn.meta.SnapshotTS); err != nil {
   397  		e.delTransaction(txn)
   398  		return err
   399  	}
   400  	return nil
   401  }
   402  
   403  func (e *Engine) Commit(ctx context.Context, op client.TxnOperator) error {
   404  	txn := e.getTransaction(op)
   405  	if txn == nil {
   406  		return moerr.NewTxnClosedNoCtx(op.Txn().ID)
   407  	}
   408  	defer e.delTransaction(txn)
   409  	if txn.readOnly {
   410  		return nil
   411  	}
   412  	if e.hasConflict(txn) {
   413  		return moerr.NewTxnWriteConflictNoCtx("write conflict")
   414  	}
   415  	if e.hasDuplicate(ctx, txn) {
   416  		return moerr.NewDuplicateNoCtx()
   417  	}
   418  	reqs, err := genWriteReqs(txn.writes)
   419  	if err != nil {
   420  		return err
   421  	}
   422  	_, err = op.Write(ctx, reqs)
   423  	return err
   424  }
   425  
   426  func (e *Engine) Rollback(ctx context.Context, op client.TxnOperator) error {
   427  	txn := e.getTransaction(op)
   428  	if txn == nil {
   429  		return nil // compatible with existing logic
   430  		//	return moerr.NewTxnClosed()
   431  	}
   432  	defer e.delTransaction(txn)
   433  	return nil
   434  }
   435  
   436  func (e *Engine) Nodes() (engine.Nodes, error) {
   437  	clusterDetails, err := e.getClusterDetails()
   438  	if err != nil {
   439  		return nil, err
   440  	}
   441  
   442  	var nodes engine.Nodes
   443  	for _, store := range clusterDetails.CNStores {
   444  		nodes = append(nodes, engine.Node{
   445  			Mcpu: runtime.NumCPU(),
   446  			Id:   store.UUID,
   447  			Addr: store.ServiceAddress,
   448  		})
   449  	}
   450  	return nodes, nil
   451  }
   452  
   453  func (e *Engine) Hints() (h engine.Hints) {
   454  	h.CommitOrRollbackTimeout = time.Minute * 5
   455  	return
   456  }
   457  
   458  func (e *Engine) NewBlockReader(ctx context.Context, num int, ts timestamp.Timestamp,
   459  	expr *plan.Expr, ranges [][]byte, tblDef *plan.TableDef) ([]engine.Reader, error) {
   460  	rds := make([]engine.Reader, num)
   461  	blks := make([]BlockMeta, len(ranges))
   462  	for i := range ranges {
   463  		blks[i] = blockUnmarshal(ranges[i])
   464  		blks[i].Info.EntryState = false
   465  	}
   466  	if len(ranges) < num {
   467  		for i := range ranges {
   468  			rds[i] = &blockReader{
   469  				fs:         e.fs,
   470  				tableDef:   tblDef,
   471  				primaryIdx: -1,
   472  				expr:       expr,
   473  				ts:         ts,
   474  				ctx:        ctx,
   475  				blks:       []BlockMeta{blks[i]},
   476  			}
   477  		}
   478  		for j := len(ranges); j < num; j++ {
   479  			rds[j] = &emptyReader{}
   480  		}
   481  		return rds, nil
   482  	}
   483  	step := len(ranges) / num
   484  	if step < 1 {
   485  		step = 1
   486  	}
   487  	for i := 0; i < num; i++ {
   488  		if i == num-1 {
   489  			rds[i] = &blockReader{
   490  				fs:         e.fs,
   491  				tableDef:   tblDef,
   492  				primaryIdx: -1,
   493  				expr:       expr,
   494  				ts:         ts,
   495  				ctx:        ctx,
   496  				blks:       blks[i*step:],
   497  			}
   498  		} else {
   499  			rds[i] = &blockReader{
   500  				fs:         e.fs,
   501  				tableDef:   tblDef,
   502  				primaryIdx: -1,
   503  				expr:       expr,
   504  				ts:         ts,
   505  				ctx:        ctx,
   506  				blks:       blks[i*step : (i+1)*step],
   507  			}
   508  		}
   509  	}
   510  	return rds, nil
   511  }
   512  
   513  func (e *Engine) newTransaction(op client.TxnOperator, txn *Transaction) {
   514  	e.Lock()
   515  	defer e.Unlock()
   516  	heap.Push(e.txnHeap, txn)
   517  	e.txns[string(op.Txn().ID)] = txn
   518  }
   519  
   520  func (e *Engine) getTransaction(op client.TxnOperator) *Transaction {
   521  	e.RLock()
   522  	defer e.RUnlock()
   523  	return e.txns[string(op.Txn().ID)]
   524  }
   525  
   526  func (e *Engine) delTransaction(txn *Transaction) {
   527  	for i := range txn.writes {
   528  		for j := range txn.writes[i] {
   529  			txn.writes[i][j].bat.Clean(e.mp)
   530  		}
   531  	}
   532  	txn.tableMap = nil
   533  	txn.createMap = nil
   534  	txn.databaseMap = nil
   535  	e.Lock()
   536  	defer e.Unlock()
   537  	for i, tmp := range *e.txnHeap {
   538  		if bytes.Equal(txn.meta.ID, tmp.meta.ID) {
   539  			heap.Remove(e.txnHeap, i)
   540  			break
   541  		}
   542  	}
   543  	delete(e.txns, string(txn.meta.ID))
   544  }
   545  
   546  func (e *Engine) gc(ctx context.Context) {
   547  	var ps []Partitions
   548  	var ts timestamp.Timestamp
   549  
   550  	for {
   551  		select {
   552  		case <-ctx.Done():
   553  			return
   554  		case <-time.After(GcCycle):
   555  			e.RLock()
   556  			if len(*e.txnHeap) == 0 {
   557  				e.RUnlock()
   558  				continue
   559  			}
   560  			ts = (*e.txnHeap)[0].meta.SnapshotTS
   561  			e.RUnlock()
   562  			e.db.Lock()
   563  			for k := range e.db.partitions {
   564  				ps = append(ps, e.db.partitions[k])
   565  			}
   566  			e.db.Unlock()
   567  			for i := range ps {
   568  				for j := range ps[i] {
   569  					select {
   570  					case <-ps[i][j].lock:
   571  					case <-ctx.Done():
   572  						return
   573  					}
   574  					ps[i][j].GC(ts)
   575  					ps[i][j].lock <- struct{}{}
   576  				}
   577  			}
   578  		}
   579  	}
   580  }