github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/disttae/db.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package disttae
    16  
    17  import (
    18  	"context"
    19  	"strconv"
    20  	"strings"
    21  	"sync"
    22  
    23  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    24  	"github.com/matrixorigin/matrixone/pkg/container/types"
    25  	"github.com/matrixorigin/matrixone/pkg/vm/engine/disttae/cache"
    26  	"github.com/matrixorigin/matrixone/pkg/vm/engine/disttae/logtailreplay"
    27  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/db/checkpoint"
    28  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/logtail"
    29  
    30  	"github.com/matrixorigin/matrixone/pkg/catalog"
    31  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    32  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    33  	"github.com/matrixorigin/matrixone/pkg/pb/timestamp"
    34  )
    35  
    36  // init is used to insert some data that will not be synchronized by logtail.
    37  func (e *Engine) init(ctx context.Context) error {
    38  	e.Lock()
    39  	defer e.Unlock()
    40  	m := e.mp
    41  
    42  	e.catalog = cache.NewCatalog()
    43  	e.partitions = make(map[[2]uint64]*logtailreplay.Partition)
    44  
    45  	var packer *types.Packer
    46  	put := e.packerPool.Get(&packer)
    47  	defer put.Put()
    48  
    49  	{
    50  		e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_DATABASE_ID}] = logtailreplay.NewPartition()
    51  	}
    52  
    53  	{
    54  		e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_TABLES_ID}] = logtailreplay.NewPartition()
    55  	}
    56  
    57  	{
    58  		e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_COLUMNS_ID}] = logtailreplay.NewPartition()
    59  	}
    60  
    61  	{ // mo_catalog
    62  		part := e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_DATABASE_ID}]
    63  		bat, err := genCreateDatabaseTuple("", 0, 0, 0, catalog.MO_CATALOG, catalog.MO_CATALOG_ID, "", m)
    64  		if err != nil {
    65  			return err
    66  		}
    67  		ibat, err := genInsertBatch(bat, m)
    68  		if err != nil {
    69  			bat.Clean(m)
    70  			return err
    71  		}
    72  		state, done := part.MutateState()
    73  		state.HandleRowsInsert(ctx, ibat, MO_PRIMARY_OFF, packer)
    74  		done()
    75  		e.catalog.InsertDatabase(bat)
    76  		bat.Clean(m)
    77  	}
    78  
    79  	{ // mo_database
    80  		part := e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_TABLES_ID}]
    81  		cols, err := genColumns(0, catalog.MO_DATABASE, catalog.MO_CATALOG, catalog.MO_DATABASE_ID,
    82  			catalog.MO_CATALOG_ID, catalog.MoDatabaseTableDefs)
    83  		if err != nil {
    84  			return err
    85  		}
    86  		tbl := new(txnTable)
    87  		tbl.relKind = catalog.SystemOrdinaryRel
    88  		bat, err := genCreateTableTuple(tbl, "", 0, 0, 0,
    89  			catalog.MO_DATABASE, catalog.MO_DATABASE_ID,
    90  			catalog.MO_CATALOG_ID, catalog.MO_CATALOG, types.Rowid{}, false, m)
    91  		if err != nil {
    92  			return err
    93  		}
    94  		ibat, err := genInsertBatch(bat, m)
    95  		if err != nil {
    96  			bat.Clean(m)
    97  			return err
    98  		}
    99  		state, done := part.MutateState()
   100  		state.HandleRowsInsert(ctx, ibat, MO_PRIMARY_OFF+catalog.MO_TABLES_REL_ID_IDX, packer)
   101  		done()
   102  		e.catalog.InsertTable(bat)
   103  		bat.Clean(m)
   104  
   105  		part = e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_COLUMNS_ID}]
   106  		bat = batch.NewWithSize(len(catalog.MoColumnsSchema))
   107  		bat.Attrs = append(bat.Attrs, catalog.MoColumnsSchema...)
   108  		bat.SetRowCount(len(cols))
   109  		for _, col := range cols {
   110  			bat0, err := genCreateColumnTuple(col, types.Rowid{}, false, m)
   111  			if err != nil {
   112  				return err
   113  			}
   114  			if bat.Vecs[0] == nil {
   115  				for i, vec := range bat0.Vecs {
   116  					bat.Vecs[i] = vector.NewVec(*vec.GetType())
   117  				}
   118  			}
   119  			for i, vec := range bat0.Vecs {
   120  				if err := bat.Vecs[i].UnionOne(vec, 0, m); err != nil {
   121  					bat.Clean(m)
   122  					bat0.Clean(m)
   123  					return err
   124  				}
   125  			}
   126  			bat0.Clean(m)
   127  		}
   128  		ibat, err = genInsertBatch(bat, m)
   129  		if err != nil {
   130  			bat.Clean(m)
   131  			return err
   132  		}
   133  		state, done = part.MutateState()
   134  		state.HandleRowsInsert(ctx, ibat, MO_PRIMARY_OFF+catalog.MO_COLUMNS_ATT_UNIQ_NAME_IDX, packer)
   135  		done()
   136  		e.catalog.InsertColumns(bat)
   137  		bat.Clean(m)
   138  	}
   139  
   140  	{ // mo_tables
   141  		part := e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_TABLES_ID}]
   142  		cols, err := genColumns(0, catalog.MO_TABLES, catalog.MO_CATALOG, catalog.MO_TABLES_ID,
   143  			catalog.MO_CATALOG_ID, catalog.MoTablesTableDefs)
   144  		if err != nil {
   145  			return err
   146  		}
   147  		tbl := new(txnTable)
   148  		tbl.relKind = catalog.SystemOrdinaryRel
   149  		bat, err := genCreateTableTuple(tbl, "", 0, 0, 0, catalog.MO_TABLES, catalog.MO_TABLES_ID,
   150  			catalog.MO_CATALOG_ID, catalog.MO_CATALOG, types.Rowid{}, false, m)
   151  		if err != nil {
   152  			return err
   153  		}
   154  		ibat, err := genInsertBatch(bat, m)
   155  		if err != nil {
   156  			bat.Clean(m)
   157  			return err
   158  		}
   159  		state, done := part.MutateState()
   160  		state.HandleRowsInsert(ctx, ibat, MO_PRIMARY_OFF+catalog.MO_TABLES_REL_ID_IDX, packer)
   161  		done()
   162  		e.catalog.InsertTable(bat)
   163  		bat.Clean(m)
   164  
   165  		part = e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_COLUMNS_ID}]
   166  		bat = batch.NewWithSize(len(catalog.MoColumnsSchema))
   167  		bat.Attrs = append(bat.Attrs, catalog.MoColumnsSchema...)
   168  		bat.SetRowCount(len(cols))
   169  		for _, col := range cols {
   170  			bat0, err := genCreateColumnTuple(col, types.Rowid{}, false, m)
   171  			if err != nil {
   172  				return err
   173  			}
   174  			if bat.Vecs[0] == nil {
   175  				for i, vec := range bat0.Vecs {
   176  					bat.Vecs[i] = vector.NewVec(*vec.GetType())
   177  				}
   178  			}
   179  			for i, vec := range bat0.Vecs {
   180  				if err := bat.Vecs[i].UnionOne(vec, 0, m); err != nil {
   181  					bat.Clean(m)
   182  					bat0.Clean(m)
   183  					return err
   184  				}
   185  			}
   186  			bat0.Clean(m)
   187  		}
   188  		ibat, err = genInsertBatch(bat, m)
   189  		if err != nil {
   190  			bat.Clean(m)
   191  			return err
   192  		}
   193  		state, done = part.MutateState()
   194  		state.HandleRowsInsert(ctx, ibat, MO_PRIMARY_OFF+catalog.MO_COLUMNS_ATT_UNIQ_NAME_IDX, packer)
   195  		done()
   196  		e.catalog.InsertColumns(bat)
   197  		bat.Clean(m)
   198  	}
   199  
   200  	{ // mo_columns
   201  		part := e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_TABLES_ID}]
   202  		cols, err := genColumns(0, catalog.MO_COLUMNS, catalog.MO_CATALOG, catalog.MO_COLUMNS_ID,
   203  			catalog.MO_CATALOG_ID, catalog.MoColumnsTableDefs)
   204  		if err != nil {
   205  			return err
   206  		}
   207  		tbl := new(txnTable)
   208  		tbl.relKind = catalog.SystemOrdinaryRel
   209  		bat, err := genCreateTableTuple(tbl, "", 0, 0, 0, catalog.MO_COLUMNS, catalog.MO_COLUMNS_ID,
   210  			catalog.MO_CATALOG_ID, catalog.MO_CATALOG, types.Rowid{}, false, m)
   211  		if err != nil {
   212  			return err
   213  		}
   214  		ibat, err := genInsertBatch(bat, m)
   215  		if err != nil {
   216  			bat.Clean(m)
   217  			return err
   218  		}
   219  		state, done := part.MutateState()
   220  		state.HandleRowsInsert(ctx, ibat, MO_PRIMARY_OFF+catalog.MO_TABLES_REL_ID_IDX, packer)
   221  		done()
   222  		e.catalog.InsertTable(bat)
   223  		bat.Clean(m)
   224  
   225  		part = e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_COLUMNS_ID}]
   226  		bat = batch.NewWithSize(len(catalog.MoColumnsSchema))
   227  		bat.Attrs = append(bat.Attrs, catalog.MoColumnsSchema...)
   228  		bat.SetRowCount(len(cols))
   229  		for _, col := range cols {
   230  			bat0, err := genCreateColumnTuple(col, types.Rowid{}, false, m)
   231  			if err != nil {
   232  				return err
   233  			}
   234  			if bat.Vecs[0] == nil {
   235  				for i, vec := range bat0.Vecs {
   236  					bat.Vecs[i] = vector.NewVec(*vec.GetType())
   237  				}
   238  			}
   239  			for i, vec := range bat0.Vecs {
   240  				if err := bat.Vecs[i].UnionOne(vec, 0, m); err != nil {
   241  					bat.Clean(m)
   242  					bat0.Clean(m)
   243  					return err
   244  				}
   245  			}
   246  			bat0.Clean(m)
   247  		}
   248  		ibat, err = genInsertBatch(bat, m)
   249  		if err != nil {
   250  			bat.Clean(m)
   251  			return err
   252  		}
   253  		state, done = part.MutateState()
   254  		state.HandleRowsInsert(ctx, ibat, MO_PRIMARY_OFF+catalog.MO_COLUMNS_ATT_UNIQ_NAME_IDX, packer)
   255  		done()
   256  		e.catalog.InsertColumns(bat)
   257  		bat.Clean(m)
   258  	}
   259  
   260  	return nil
   261  }
   262  
   263  func (e *Engine) getLatestCatalogCache() *cache.CatalogCache {
   264  	return e.catalog
   265  }
   266  
   267  func (e *Engine) loadSnapCkpForTable(
   268  	ctx context.Context,
   269  	snapCatalog *cache.CatalogCache,
   270  	loc string,
   271  	tid uint64,
   272  	tblName string,
   273  	did uint64,
   274  	dbName string,
   275  	pkSeqNum int,
   276  ) error {
   277  	entries, closeCBs, err := logtail.LoadCheckpointEntries(
   278  		ctx,
   279  		loc,
   280  		tid,
   281  		tblName,
   282  		did,
   283  		dbName,
   284  		e.mp,
   285  		e.fs)
   286  	if err != nil {
   287  		return err
   288  	}
   289  	defer func() {
   290  		for _, cb := range closeCBs {
   291  			cb()
   292  		}
   293  	}()
   294  	for _, entry := range entries {
   295  		if err = consumeEntry(ctx, pkSeqNum, e, snapCatalog, nil, entry); err != nil {
   296  			return err
   297  		}
   298  	}
   299  	return nil
   300  }
   301  
   302  func (e *Engine) getOrCreateSnapCatalogCache(
   303  	ctx context.Context,
   304  	ts types.TS) (*cache.CatalogCache, error) {
   305  	if e.catalog.CanServe(ts) {
   306  		return e.catalog, nil
   307  	}
   308  	e.snapCatalog.Lock()
   309  	defer e.snapCatalog.Unlock()
   310  	for _, snap := range e.snapCatalog.snaps {
   311  		if snap.CanServe(ts) {
   312  			return snap, nil
   313  		}
   314  	}
   315  	snapCata := cache.NewCatalog()
   316  	//TODO:: insert mo_tables, or mo_colunms, or mo_database, mo_catalog into snapCata.
   317  	//       ref to engine.init.
   318  	ckps, err := checkpoint.ListSnapshotCheckpoint(ctx, e.fs, ts, 0, nil)
   319  	if ckps == nil {
   320  		return nil, moerr.NewInternalErrorNoCtx("No checkpoints for snapshot read")
   321  	}
   322  	if err != nil {
   323  		return nil, err
   324  	}
   325  	//Notice that checkpoints must contain only one or zero global checkpoint
   326  	//followed by zero or multi continuous incremental checkpoints.
   327  	start := types.MaxTs()
   328  	end := types.TS{}
   329  	for _, ckp := range ckps {
   330  		locs := make([]string, 0)
   331  		locs = append(locs, ckp.GetLocation().String())
   332  		locs = append(locs, strconv.Itoa(int(ckp.GetVersion())))
   333  		locations := strings.Join(locs, ";")
   334  		//FIXME::pkSeqNum == 0?
   335  		if err := e.loadSnapCkpForTable(
   336  			ctx,
   337  			snapCata,
   338  			locations,
   339  			catalog.MO_DATABASE_ID,
   340  			catalog.MO_DATABASE,
   341  			catalog.MO_CATALOG_ID,
   342  			catalog.MO_CATALOG,
   343  			0); err != nil {
   344  			return nil, err
   345  		}
   346  		if err := e.loadSnapCkpForTable(
   347  			ctx,
   348  			snapCata,
   349  			locations,
   350  			catalog.MO_TABLES_ID,
   351  			catalog.MO_TABLES,
   352  			catalog.MO_CATALOG_ID,
   353  			catalog.MO_CATALOG, 0); err != nil {
   354  			return nil, err
   355  		}
   356  		if err := e.loadSnapCkpForTable(
   357  			ctx,
   358  			snapCata,
   359  			locations,
   360  			catalog.MO_COLUMNS_ID,
   361  			catalog.MO_COLUMNS,
   362  			catalog.MO_CATALOG_ID,
   363  			catalog.MO_CATALOG,
   364  			0); err != nil {
   365  			return nil, err
   366  		}
   367  		//update start and end of snapCata.
   368  		if ckp.GetType() == checkpoint.ET_Global {
   369  			start = ckp.GetEnd()
   370  		}
   371  		if ckp.GetType() == checkpoint.ET_Incremental {
   372  			ckpstart := ckp.GetStart()
   373  			if ckpstart.Less(&start) {
   374  				start = ckpstart
   375  			}
   376  			ckpend := ckp.GetEnd()
   377  			if ckpend.Greater(&end) {
   378  				end = ckpend
   379  			}
   380  		}
   381  	}
   382  	if end.IsEmpty() {
   383  		//only on global checkpoint.
   384  		end = start
   385  	}
   386  	if ts.Greater(&end) || ts.Less(&start) {
   387  		return nil, moerr.NewInternalErrorNoCtx("Invalid checkpoints for snapshot read")
   388  	}
   389  	snapCata.UpdateDuration(start, end)
   390  	e.snapCatalog.snaps = append(e.snapCatalog.snaps, snapCata)
   391  	return snapCata, nil
   392  }
   393  
   394  func (e *Engine) getOrCreateSnapPart(
   395  	ctx context.Context,
   396  	tbl *txnTable,
   397  	ts types.TS) (*logtailreplay.Partition, error) {
   398  	//check whether the snapshot partitions are available for reuse.
   399  	e.mu.Lock()
   400  	tblSnaps, ok := e.mu.snapParts[[2]uint64{tbl.db.databaseId, tbl.tableId}]
   401  	if !ok {
   402  		e.mu.snapParts[[2]uint64{tbl.db.databaseId, tbl.tableId}] = &struct {
   403  			sync.Mutex
   404  			snaps []*logtailreplay.Partition
   405  		}{}
   406  		tblSnaps = e.mu.snapParts[[2]uint64{tbl.db.databaseId, tbl.tableId}]
   407  	}
   408  	e.mu.Unlock()
   409  
   410  	tblSnaps.Lock()
   411  	defer tblSnaps.Unlock()
   412  	for _, snap := range tblSnaps.snaps {
   413  		if snap.CanServe(ts) {
   414  			return snap, nil
   415  		}
   416  	}
   417  
   418  	//new snapshot partition and apply checkpoints into it.
   419  	snap := logtailreplay.NewPartition()
   420  	//TODO::if tableId is mo_tables, or mo_colunms, or mo_database,
   421  	//      we should init the partition,ref to engine.init
   422  	ckps, err := checkpoint.ListSnapshotCheckpoint(ctx, e.fs, ts, tbl.tableId, nil)
   423  	if err != nil {
   424  		return nil, err
   425  	}
   426  	snap.ConsumeSnapCkps(ctx, ckps, func(
   427  		checkpoint *checkpoint.CheckpointEntry,
   428  		state *logtailreplay.PartitionState) error {
   429  		locs := make([]string, 0)
   430  		locs = append(locs, checkpoint.GetLocation().String())
   431  		locs = append(locs, strconv.Itoa(int(checkpoint.GetVersion())))
   432  		locations := strings.Join(locs, ";")
   433  		entries, closeCBs, err := logtail.LoadCheckpointEntries(
   434  			ctx,
   435  			locations,
   436  			tbl.tableId,
   437  			tbl.tableName,
   438  			tbl.db.databaseId,
   439  			tbl.db.databaseName,
   440  			e.mp,
   441  			e.fs)
   442  		if err != nil {
   443  			return err
   444  		}
   445  		defer func() {
   446  			for _, cb := range closeCBs {
   447  				cb()
   448  			}
   449  		}()
   450  		for _, entry := range entries {
   451  			if err = consumeEntry(
   452  				ctx,
   453  				tbl.primarySeqnum,
   454  				e,
   455  				nil,
   456  				state,
   457  				entry); err != nil {
   458  				return err
   459  			}
   460  		}
   461  		return nil
   462  	})
   463  	if snap.CanServe(ts) {
   464  		tblSnaps.snaps = append(tblSnaps.snaps, snap)
   465  		return snap, nil
   466  	}
   467  
   468  	start, end := snap.GetDuration()
   469  	//if has no checkpoints or ts > snap.end, use latest partition.
   470  	if snap.IsEmpty() || ts.Greater(&end) {
   471  		err := tbl.updateLogtail(ctx)
   472  		if err != nil {
   473  			return nil, err
   474  		}
   475  		return e.getOrCreateLatestPart(tbl.db.databaseId, tbl.tableId), nil
   476  	}
   477  	if ts.Less(&start) {
   478  		return nil, moerr.NewInternalErrorNoCtx(
   479  			"No valid checkpoints for snapshot read,maybe snapshot is too old, "+
   480  				"snapshot:%s, start:%s, end:%s",
   481  			ts.ToTimestamp().DebugString(),
   482  			start.ToTimestamp().DebugString(),
   483  			end.ToTimestamp().DebugString())
   484  	}
   485  	panic("impossible path")
   486  }
   487  
   488  func (e *Engine) getOrCreateLatestPart(
   489  	databaseId,
   490  	tableId uint64) *logtailreplay.Partition {
   491  	e.Lock()
   492  	defer e.Unlock()
   493  	partition, ok := e.partitions[[2]uint64{databaseId, tableId}]
   494  	if !ok { // create a new table
   495  		partition = logtailreplay.NewPartition()
   496  		e.partitions[[2]uint64{databaseId, tableId}] = partition
   497  	}
   498  	return partition
   499  }
   500  
   501  func (e *Engine) lazyLoadLatestCkp(
   502  	ctx context.Context,
   503  	tbl *txnTable) (*logtailreplay.Partition, error) {
   504  	part := e.getOrCreateLatestPart(tbl.db.databaseId, tbl.tableId)
   505  	cache := e.getLatestCatalogCache()
   506  
   507  	if err := part.ConsumeCheckpoints(
   508  		ctx,
   509  		func(checkpoint string, state *logtailreplay.PartitionState) error {
   510  			entries, closeCBs, err := logtail.LoadCheckpointEntries(
   511  				ctx,
   512  				checkpoint,
   513  				tbl.tableId,
   514  				tbl.tableName,
   515  				tbl.db.databaseId,
   516  				tbl.db.databaseName,
   517  				tbl.getTxn().engine.mp,
   518  				tbl.getTxn().engine.fs)
   519  			if err != nil {
   520  				return err
   521  			}
   522  			defer func() {
   523  				for _, cb := range closeCBs {
   524  					cb()
   525  				}
   526  			}()
   527  			for _, entry := range entries {
   528  				if err = consumeEntry(ctx, tbl.primarySeqnum, e, cache, state, entry); err != nil {
   529  					return err
   530  				}
   531  			}
   532  			return nil
   533  		},
   534  	); err != nil {
   535  		return nil, err
   536  	}
   537  
   538  	return part, nil
   539  }
   540  
   541  func (e *Engine) UpdateOfPush(
   542  	ctx context.Context,
   543  	databaseId,
   544  	tableId uint64, ts timestamp.Timestamp) error {
   545  	return e.pClient.TryToSubscribeTable(ctx, databaseId, tableId)
   546  }