github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/disttae/txn.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package disttae
    16  
    17  import (
    18  	"context"
    19  	"database/sql"
    20  	"errors"
    21  	"math"
    22  	"strings"
    23  	"time"
    24  
    25  	"github.com/matrixorigin/matrixone/pkg/catalog"
    26  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    27  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    28  	"github.com/matrixorigin/matrixone/pkg/container/types"
    29  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    30  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    31  	"github.com/matrixorigin/matrixone/pkg/objectio"
    32  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    33  	"github.com/matrixorigin/matrixone/pkg/pb/timestamp"
    34  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    35  	plan2 "github.com/matrixorigin/matrixone/pkg/sql/plan"
    36  	"github.com/matrixorigin/matrixone/pkg/sql/util"
    37  	"github.com/matrixorigin/matrixone/pkg/txn/storage/memorystorage/memorytable"
    38  	"github.com/matrixorigin/matrixone/pkg/txn/storage/memorystorage/memtable"
    39  	"github.com/matrixorigin/matrixone/pkg/util/errutil"
    40  	"github.com/matrixorigin/matrixone/pkg/vm/engine"
    41  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common"
    42  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    43  )
    44  
    45  /*
    46  func (txn *Transaction) getTableList(ctx context.Context, databaseId uint64) ([]string, error) {
    47  	rows, err := txn.getRows(ctx, "", catalog.MO_CATALOG_ID, catalog.MO_TABLES_ID, txn.dnStores[:1],
    48  		catalog.MoTablesTableDefs, []string{
    49  			catalog.MoTablesSchema[catalog.MO_TABLES_REL_NAME_IDX],
    50  			catalog.MoTablesSchema[catalog.MO_TABLES_RELDATABASE_ID_IDX],
    51  			catalog.MoTablesSchema[catalog.MO_TABLES_ACCOUNT_ID_IDX],
    52  		},
    53  		genTableListExpr(ctx, getAccountId(ctx), databaseId))
    54  	if err != nil {
    55  		return nil, err
    56  	}
    57  	tableList := make([]string, len(rows))
    58  	for i := range rows {
    59  		tableList[i] = string(rows[i][0].([]byte))
    60  	}
    61  	return tableList, nil
    62  }
    63  
    64  func (txn *Transaction) getTableInfo(ctx context.Context, databaseId uint64,
    65  	name string) (*table, []engine.TableDef, error) {
    66  	accountId := getAccountId(ctx)
    67  	key := genTableIndexKey(name, databaseId, accountId)
    68  	rows, err := txn.getRowsByIndex(catalog.MO_CATALOG_ID, catalog.MO_TABLES_ID, "",
    69  		txn.dnStores[:1], catalog.MoTablesSchema, key,
    70  		genTableInfoExpr(ctx, accountId, databaseId, name))
    71  	if err != nil {
    72  		return nil, nil, err
    73  	}
    74  	if len(rows) != 1 {
    75  		return nil, nil, moerr.NewDuplicate(ctx)
    76  	}
    77  	row := rows[0]
    78  	//	row, err := txn.getRow(ctx, catalog.MO_CATALOG_ID, catalog.MO_TABLES_ID,
    79  	//		txn.dnStores[:1], catalog.MoTablesTableDefs, catalog.MoTablesSchema,
    80  	//		genTableInfoExpr(accountId, databaseId, name))
    81  	//	if err != nil {
    82  	//		return nil, nil, err
    83  	//	}
    84  	tbl := new(table)
    85  	tbl.primaryIdx = -1
    86  	tbl.tableId = row[catalog.MO_TABLES_REL_ID_IDX].(uint64)
    87  	tbl.viewdef = string(row[catalog.MO_TABLES_VIEWDEF_IDX].([]byte))
    88  	tbl.relKind = string(row[catalog.MO_TABLES_RELKIND_IDX].([]byte))
    89  	tbl.comment = string(row[catalog.MO_TABLES_REL_COMMENT_IDX].([]byte))
    90  	tbl.partition = string(row[catalog.MO_TABLES_PARTITIONED_IDX].([]byte))
    91  	tbl.createSql = string(row[catalog.MO_TABLES_REL_CREATESQL_IDX].([]byte))
    92  	tbl.constraint = row[catalog.MO_TABLES_CONSTRAINT_IDX].([]byte)
    93  	//	rows, err := txn.getRows(ctx, "", catalog.MO_CATALOG_ID, catalog.MO_COLUMNS_ID,
    94  	//		txn.dnStores[:1], catalog.MoColumnsTableDefs, catalog.MoColumnsSchema,
    95  	//		genColumnInfoExpr(accountId, databaseId, tbl.tableId))
    96  	//	if err != nil {
    97  	//		return nil, nil, err
    98  	//	}
    99  	rows, err = txn.getRowsByIndex(catalog.MO_CATALOG_ID, catalog.MO_COLUMNS_ID, "",
   100  		txn.dnStores[:1], catalog.MoColumnsSchema, genColumnIndexKey(tbl.tableId),
   101  		genColumnInfoExpr(ctx, accountId, databaseId, tbl.tableId))
   102  	if err != nil {
   103  		return nil, nil, err
   104  	}
   105  	cols := getColumnsFromRows(rows)
   106  	defs := make([]engine.TableDef, 0, len(cols))
   107  	defs = append(defs, genTableDefOfComment(string(row[catalog.MO_TABLES_REL_COMMENT_IDX].([]byte))))
   108  	for i, col := range cols {
   109  		if col.constraintType == catalog.SystemColPKConstraint {
   110  			tbl.primaryIdx = i
   111  		}
   112  		if col.isClusterBy == 1 {
   113  			tbl.clusterByIdx = i
   114  		}
   115  		defs = append(defs, genTableDefOfColumn(col))
   116  	}
   117  	return tbl, defs, nil
   118  }
   119  
   120  func (txn *Transaction) getTableId(ctx context.Context, databaseId uint64,
   121  	name string) (uint64, error) {
   122  	accountId := getAccountId(ctx)
   123  	row, err := txn.getRow(ctx, catalog.MO_CATALOG_ID, catalog.MO_TABLES_ID,
   124  		txn.dnStores[:1],
   125  		catalog.MoTablesTableDefs, []string{
   126  			catalog.MoTablesSchema[catalog.MO_TABLES_REL_ID_IDX],
   127  			catalog.MoTablesSchema[catalog.MO_TABLES_REL_NAME_IDX],
   128  			catalog.MoTablesSchema[catalog.MO_TABLES_RELDATABASE_ID_IDX],
   129  			catalog.MoTablesSchema[catalog.MO_TABLES_ACCOUNT_ID_IDX],
   130  		},
   131  		genTableIdExpr(ctx, accountId, databaseId, name))
   132  	if err != nil {
   133  		return 0, err
   134  	}
   135  	return row[0].(uint64), nil
   136  }
   137  
   138  func (txn *Transaction) getDatabaseList(ctx context.Context) ([]string, error) {
   139  	rows, err := txn.getRows(ctx, "", catalog.MO_CATALOG_ID, catalog.MO_DATABASE_ID,
   140  		txn.dnStores[:1],
   141  		catalog.MoDatabaseTableDefs, []string{
   142  			catalog.MoDatabaseSchema[catalog.MO_DATABASE_DAT_NAME_IDX],
   143  			catalog.MoDatabaseSchema[catalog.MO_DATABASE_ACCOUNT_ID_IDX],
   144  		},
   145  		genDatabaseListExpr(ctx, getAccountId(ctx)))
   146  	if err != nil {
   147  		return nil, err
   148  	}
   149  	databaseList := make([]string, len(rows))
   150  	for i := range rows {
   151  		databaseList[i] = string(rows[i][0].([]byte))
   152  	}
   153  	return databaseList, nil
   154  }
   155  
   156  func (txn *Transaction) getDatabaseId(ctx context.Context, name string) (uint64, error) {
   157  	accountId := getAccountId(ctx)
   158  	key := genDatabaseIndexKey(name, accountId)
   159  	rows, err := txn.getRowsByIndex(catalog.MO_CATALOG_ID, catalog.MO_DATABASE_ID, "",
   160  		txn.dnStores[:1], []string{
   161  			catalog.MoDatabaseSchema[catalog.MO_DATABASE_DAT_ID_IDX],
   162  			catalog.MoDatabaseSchema[catalog.MO_DATABASE_DAT_NAME_IDX],
   163  			catalog.MoDatabaseSchema[catalog.MO_DATABASE_ACCOUNT_ID_IDX],
   164  		}, key, genDatabaseIdExpr(ctx, accountId, name))
   165  	if err != nil {
   166  		return 0, err
   167  	}
   168  	if len(rows) != 1 {
   169  		return 0, moerr.NewDuplicate(ctx)
   170  	}
   171  	//	row, err := txn.getRow(ctx, catalog.MO_CATALOG_ID, catalog.MO_DATABASE_ID, txn.dnStores[:1],
   172  	//		catalog.MoDatabaseTableDefs, []string{
   173  	//			catalog.MoDatabaseSchema[catalog.MO_DATABASE_DAT_ID_IDX],
   174  	//			catalog.MoDatabaseSchema[catalog.MO_DATABASE_DAT_NAME_IDX],
   175  	//			catalog.MoDatabaseSchema[catalog.MO_DATABASE_ACCOUNT_ID_IDX],
   176  	//		},
   177  	//		genDatabaseIdExpr(accountId, name))
   178  	//	if err != nil {
   179  	//		return 0, err
   180  	//	}
   181  	return rows[0][0].(uint64), nil
   182  }
   183  */
   184  
   185  func (txn *Transaction) getTableMeta(ctx context.Context, databaseId uint64,
   186  	name string, needUpdated bool, columnLength int, prefetch bool) (*tableMeta, error) {
   187  	blocks := make([][]BlockMeta, len(txn.dnStores))
   188  	if needUpdated {
   189  		for i, dnStore := range txn.dnStores {
   190  			rows, err := txn.getRows(ctx, name, databaseId, 0,
   191  				[]DNStore{dnStore}, catalog.MoTableMetaDefs, catalog.MoTableMetaSchema, nil)
   192  			if moerr.IsMoErrCode(err, moerr.OkExpectedEOB) {
   193  				continue
   194  			}
   195  			if err != nil {
   196  				return nil, err
   197  			}
   198  			blocks[i], err = genBlockMetas(ctx, rows, columnLength, txn.proc.FileService,
   199  				txn.proc.GetMPool(), prefetch)
   200  			if err != nil {
   201  				return nil, moerr.NewInternalError(ctx, "disttae: getTableMeta err: %v, table: %v", err.Error(), name)
   202  			}
   203  		}
   204  	}
   205  	return &tableMeta{
   206  		tableName: name,
   207  		blocks:    blocks,
   208  		defs:      catalog.MoTableMetaDefs,
   209  	}, nil
   210  }
   211  
   212  // detecting whether a transaction is a read-only transaction
   213  func (txn *Transaction) ReadOnly() bool {
   214  	return txn.readOnly
   215  }
   216  
   217  // use for solving halloween problem
   218  func (txn *Transaction) IncStatementId() {
   219  	txn.statementId++
   220  	txn.writes = append(txn.writes, make([]Entry, 0, 1))
   221  }
   222  
   223  // Write used to write data to the transaction buffer
   224  // insert/delete/update all use this api
   225  func (txn *Transaction) WriteBatch(
   226  	typ int,
   227  	databaseId uint64,
   228  	tableId uint64,
   229  	databaseName string,
   230  	tableName string,
   231  	bat *batch.Batch,
   232  	dnStore DNStore,
   233  	primaryIdx int, // pass -1 to indicate no primary key or disable primary key checking
   234  ) error {
   235  	txn.readOnly = false
   236  	bat.Cnt = 1
   237  	if typ == INSERT {
   238  		len := bat.Length()
   239  		vec := vector.New(types.New(types.T_Rowid, 0, 0, 0))
   240  		for i := 0; i < len; i++ {
   241  			if err := vec.Append(txn.genRowId(), false,
   242  				txn.proc.Mp()); err != nil {
   243  				return err
   244  			}
   245  		}
   246  		bat.Vecs = append([]*vector.Vector{vec}, bat.Vecs...)
   247  		bat.Attrs = append([]string{catalog.Row_ID}, bat.Attrs...)
   248  	}
   249  	txn.Lock()
   250  	txn.writes[txn.statementId] = append(txn.writes[txn.statementId], Entry{
   251  		typ:          typ,
   252  		bat:          bat,
   253  		tableId:      tableId,
   254  		databaseId:   databaseId,
   255  		tableName:    tableName,
   256  		databaseName: databaseName,
   257  		dnStore:      dnStore,
   258  	})
   259  	txn.Unlock()
   260  
   261  	if err := txn.checkPrimaryKey(typ, primaryIdx, bat, tableName, tableId); err != nil {
   262  		return err
   263  	}
   264  
   265  	return nil
   266  }
   267  
   268  func (txn *Transaction) checkPrimaryKey(
   269  	typ int,
   270  	primaryIdx int,
   271  	bat *batch.Batch,
   272  	tableName string,
   273  	tableId uint64,
   274  ) error {
   275  
   276  	// no primary key
   277  	if primaryIdx < 0 {
   278  		return nil
   279  	}
   280  
   281  	//TODO ignore these buggy auto incr tables for now
   282  	if strings.Contains(tableName, "%!%mo_increment") {
   283  		return nil
   284  	}
   285  
   286  	t := txn.nextLocalTS()
   287  	tx := memorytable.NewTransaction(t)
   288  	iter := memorytable.NewBatchIter(bat)
   289  	for {
   290  		tuple := iter()
   291  		if len(tuple) == 0 {
   292  			break
   293  		}
   294  
   295  		rowID := RowID(tuple[0].Value.(types.Rowid))
   296  
   297  		switch typ {
   298  
   299  		case INSERT:
   300  			var indexes []memtable.Tuple
   301  
   302  			idx := primaryIdx + 1 // skip the first row id column
   303  			primaryKey := memtable.ToOrdered(tuple[idx].Value)
   304  			index := memtable.Tuple{
   305  				index_TableID_PrimaryKey,
   306  				memtable.ToOrdered(tableId),
   307  				primaryKey,
   308  			}
   309  
   310  			// check primary key
   311  			entries, err := txn.workspace.Index(tx, index)
   312  			if err != nil {
   313  				return err
   314  			}
   315  			if len(entries) > 0 {
   316  				return moerr.NewDuplicateEntry(
   317  					txn.proc.Ctx,
   318  					common.TypeStringValue(bat.Vecs[idx].Typ, tuple[idx].Value),
   319  					bat.Attrs[idx],
   320  				)
   321  			}
   322  
   323  			// add primary key
   324  			indexes = append(indexes, index)
   325  
   326  			row := &workspaceRow{
   327  				rowID:   rowID,
   328  				tableID: tableId,
   329  				indexes: indexes,
   330  			}
   331  			err = txn.workspace.Insert(tx, row)
   332  			if err != nil {
   333  				return err
   334  			}
   335  
   336  		case DELETE:
   337  			err := txn.workspace.Delete(tx, rowID)
   338  			if err != nil && !errors.Is(err, sql.ErrNoRows) {
   339  				return err
   340  			}
   341  
   342  		}
   343  	}
   344  	if err := tx.Commit(t); err != nil {
   345  		return err
   346  	}
   347  
   348  	return nil
   349  }
   350  
   351  func (txn *Transaction) nextLocalTS() timestamp.Timestamp {
   352  	txn.localTS = txn.localTS.Next()
   353  	return txn.localTS
   354  }
   355  
   356  // WriteFile used to add a s3 file information to the transaction buffer
   357  // insert/delete/update all use this api
   358  func (txn *Transaction) WriteFile(typ int, databaseId, tableId uint64,
   359  	databaseName, tableName string, fileName string, bat *batch.Batch, dnStore DNStore) error {
   360  	txn.readOnly = false
   361  	txn.writes[txn.statementId] = append(txn.writes[txn.statementId], Entry{
   362  		typ:          typ,
   363  		tableId:      tableId,
   364  		databaseId:   databaseId,
   365  		tableName:    tableName,
   366  		databaseName: databaseName,
   367  		fileName:     fileName,
   368  		bat:          bat,
   369  		dnStore:      dnStore,
   370  	})
   371  	return nil
   372  }
   373  
   374  // getRow used to get a row of table based on a condition
   375  /*
   376  func (txn *Transaction) getRow(ctx context.Context, databaseId uint64, tableId uint64,
   377  	dnList []DNStore, defs []engine.TableDef, columns []string, expr *plan.Expr) ([]any, error) {
   378  	bats, err := txn.readTable(ctx, "", databaseId, tableId, defs, dnList, columns, expr)
   379  	if err != nil {
   380  		return nil, err
   381  	}
   382  	if len(bats) == 0 {
   383  		return nil, moerr.GetOkExpectedEOB()
   384  	}
   385  	rows := make([][]any, 0, len(bats))
   386  	for _, bat := range bats {
   387  		if bat.Length() > 0 {
   388  			rows = append(rows, catalog.GenRows(bat)...)
   389  		}
   390  		bat.Clean(txn.proc.Mp())
   391  	}
   392  	if len(rows) == 0 {
   393  		return nil, moerr.GetOkExpectedEOB()
   394  	}
   395  	if len(rows) != 1 {
   396  		return nil, moerr.NewInvalidInput(ctx, "table is not unique")
   397  	}
   398  	return rows[0], nil
   399  }
   400  */
   401  
   402  // getRows used to get rows of table
   403  func (txn *Transaction) getRows(ctx context.Context, name string, databaseId uint64, tableId uint64,
   404  	dnList []DNStore, defs []engine.TableDef, columns []string, expr *plan.Expr) ([][]any, error) {
   405  	bats, err := txn.readTable(ctx, name, databaseId, tableId, defs, dnList, columns, expr)
   406  	if err != nil {
   407  		return nil, err
   408  	}
   409  	if len(bats) == 0 {
   410  		return nil, moerr.GetOkExpectedEOB()
   411  	}
   412  	rows := make([][]any, 0, len(bats))
   413  	for _, bat := range bats {
   414  		if bat.Length() > 0 {
   415  			rows = append(rows, catalog.GenRows(bat)...)
   416  		}
   417  		bat.Clean(txn.proc.Mp())
   418  	}
   419  	return rows, nil
   420  }
   421  
   422  /*
   423  func (txn *Transaction) getRowsByIndex(databaseId, tableId uint64, name string,
   424  	dnList []DNStore, columns []string, index memtable.Tuple, expr *plan.Expr) ([][]any, error) {
   425  	var rows [][]any
   426  
   427  	deletes := make(map[types.Rowid]uint8)
   428  	if len(name) == 0 {
   429  		for i := range txn.writes {
   430  			for _, entry := range txn.writes[i] {
   431  				if !(entry.databaseId == databaseId &&
   432  					entry.tableId == tableId) {
   433  					continue
   434  				}
   435  				if entry.typ == DELETE {
   436  					if entry.bat.GetVector(0).GetType().Oid == types.T_Rowid {
   437  						vs := vector.MustTCols[types.Rowid](entry.bat.GetVector(0))
   438  						for _, v := range vs {
   439  							deletes[v] = 0
   440  						}
   441  					}
   442  				}
   443  				if entry.typ == INSERT {
   444  					length := entry.bat.Length()
   445  					flags := make([]uint8, length)
   446  					for i := range flags {
   447  						flags[i]++
   448  					}
   449  					mp := make(map[string]int)
   450  					for _, col := range columns {
   451  						mp[col] = 0
   452  					}
   453  					for i, attr := range entry.bat.Attrs {
   454  						if _, ok := mp[attr]; ok {
   455  							mp[attr] = i
   456  						}
   457  					}
   458  					bat := batch.NewWithSize(len(columns))
   459  					for i := range bat.Vecs {
   460  						vec := entry.bat.Vecs[mp[columns[i]]]
   461  						bat.Vecs[i] = vector.New(vec.GetType())
   462  						if err := vector.UnionBatch(bat.Vecs[i], vec, 0, length,
   463  							flags[:length], txn.proc.Mp()); err != nil {
   464  							return nil, err
   465  						}
   466  					}
   467  					bat.SetZs(entry.bat.Length(), txn.proc.Mp())
   468  					if expr != nil {
   469  						vec, err := colexec.EvalExpr(bat, txn.proc, expr)
   470  						if err != nil {
   471  							return nil, err
   472  						}
   473  						bs := vector.GetColumn[bool](vec)
   474  						if vec.IsScalar() {
   475  							if !bs[0] {
   476  								bat.Shrink(nil)
   477  							}
   478  						} else {
   479  							sels := txn.proc.Mp().GetSels()
   480  							for i, b := range bs {
   481  								if b {
   482  									sels = append(sels, int64(i))
   483  								}
   484  							}
   485  							bat.Shrink(sels)
   486  							txn.proc.Mp().PutSels(sels)
   487  						}
   488  						vec.Free(txn.proc.Mp())
   489  					}
   490  					rows = append(rows, catalog.GenRows(bat)...)
   491  					bat.Clean(txn.proc.Mp())
   492  				}
   493  			}
   494  		}
   495  	}
   496  	accessed := make(map[string]uint8)
   497  	for _, dn := range dnList {
   498  		accessed[dn.GetUUID()] = 0
   499  	}
   500  	parts := txn.db.getPartitions(databaseId, tableId)
   501  	for i, dn := range txn.dnStores {
   502  		if _, ok := accessed[dn.GetUUID()]; !ok {
   503  			continue
   504  		}
   505  		tuples, err := parts[i].GetRowsByIndex(txn.meta.SnapshotTS, index, columns, deletes)
   506  		if err == nil {
   507  			rows = append(rows, tuples...)
   508  		}
   509  	}
   510  	if len(rows) == 0 {
   511  		return nil, moerr.GetOkExpectedEOB()
   512  	}
   513  	return rows, nil
   514  }
   515  */
   516  
   517  // readTable used to get tuples of table based on a condition
   518  // only used to read data from catalog, for which the execution is currently single-core
   519  func (txn *Transaction) readTable(ctx context.Context, name string, databaseId uint64, tableId uint64,
   520  	defs []engine.TableDef, dnList []DNStore, columns []string, expr *plan.Expr) ([]*batch.Batch, error) {
   521  	var parts Partitions
   522  	/*
   523  		var writes [][]Entry
   524  		// consider halloween problem
   525  		if int64(txn.statementId)-1 > 0 {
   526  			writes = txn.writes[:txn.statementId-1]
   527  		}
   528  	*/
   529  	writes := make([]Entry, 0, len(txn.writes))
   530  	if len(name) == 0 { // meta table not need this
   531  		for i := range txn.writes {
   532  			for _, entry := range txn.writes[i] {
   533  				if entry.databaseId == databaseId &&
   534  					entry.tableId == tableId {
   535  					writes = append(writes, entry)
   536  				}
   537  			}
   538  		}
   539  	}
   540  	bats := make([]*batch.Batch, 0, 1)
   541  	accessed := make(map[string]uint8)
   542  	for _, dn := range dnList {
   543  		accessed[dn.GetUUID()] = 0
   544  	}
   545  	if len(name) == 0 {
   546  		parts = txn.db.getPartitions(databaseId, tableId)
   547  	} else {
   548  		parts = txn.db.getMetaPartitions(name)
   549  	}
   550  	for i, dn := range txn.dnStores {
   551  		if _, ok := accessed[dn.GetUUID()]; !ok {
   552  			continue
   553  		}
   554  		rds, err := parts[i].NewReader(ctx, 1, nil, defs, nil, nil, nil,
   555  			txn.meta.SnapshotTS, nil, writes)
   556  		if err != nil {
   557  			return nil, err
   558  		}
   559  		for _, rd := range rds {
   560  			for {
   561  				bat, err := rd.Read(ctx, columns, expr, txn.proc.Mp())
   562  				if err != nil {
   563  					return nil, err
   564  				}
   565  				if bat != nil {
   566  					bats = append(bats, bat)
   567  				} else {
   568  					break
   569  				}
   570  			}
   571  		}
   572  	}
   573  	if expr == nil {
   574  		return bats, nil
   575  	}
   576  	for i, bat := range bats {
   577  		vec, err := colexec.EvalExpr(bat, txn.proc, expr)
   578  		if err != nil {
   579  			return nil, err
   580  		}
   581  		bs := vector.GetColumn[bool](vec)
   582  		if vec.IsScalar() {
   583  			if !bs[0] {
   584  				bat.Shrink(nil)
   585  			}
   586  		} else {
   587  			sels := txn.proc.Mp().GetSels()
   588  			for i, b := range bs {
   589  				if b {
   590  					sels = append(sels, int64(i))
   591  				}
   592  			}
   593  			bat.Shrink(sels)
   594  			txn.proc.Mp().PutSels(sels)
   595  		}
   596  		vec.Free(txn.proc.Mp())
   597  		bats[i] = bat
   598  	}
   599  	return bats, nil
   600  }
   601  
   602  func (txn *Transaction) deleteBatch(bat *batch.Batch,
   603  	databaseId, tableId uint64) *batch.Batch {
   604  
   605  	// tx for workspace operations
   606  	t := txn.nextLocalTS()
   607  	tx := memorytable.NewTransaction(t)
   608  	defer func() {
   609  		if err := tx.Commit(t); err != nil {
   610  			panic(err)
   611  		}
   612  	}()
   613  
   614  	mp := make(map[types.Rowid]uint8)
   615  	rowids := vector.MustTCols[types.Rowid](bat.GetVector(0))
   616  	for _, rowid := range rowids {
   617  		mp[rowid] = 0
   618  		// update workspace
   619  		err := txn.workspace.Delete(tx, RowID(rowid))
   620  		if err != nil && !errors.Is(err, sql.ErrNoRows) {
   621  			panic(err)
   622  		}
   623  	}
   624  
   625  	sels := txn.proc.Mp().GetSels()
   626  	for i := range txn.writes {
   627  		for j, e := range txn.writes[i] {
   628  			sels = sels[:0]
   629  			if e.tableId == tableId && e.databaseId == databaseId {
   630  				vs := vector.MustTCols[types.Rowid](e.bat.GetVector(0))
   631  				for k, v := range vs {
   632  					if _, ok := mp[v]; !ok {
   633  						sels = append(sels, int64(k))
   634  					} else {
   635  						mp[v]++
   636  					}
   637  				}
   638  				if len(sels) != len(vs) {
   639  					txn.writes[i][j].bat.Shrink(sels)
   640  				}
   641  			}
   642  		}
   643  	}
   644  	sels = sels[:0]
   645  	for k, rowid := range rowids {
   646  		if mp[rowid] == 0 {
   647  			sels = append(sels, int64(k))
   648  		}
   649  	}
   650  	bat.Shrink(sels)
   651  	txn.proc.Mp().PutSels(sels)
   652  	return bat
   653  }
   654  
   655  func (txn *Transaction) allocateID(ctx context.Context) (uint64, error) {
   656  	ctx, cancel := context.WithTimeout(ctx, time.Minute)
   657  	defer cancel()
   658  	return txn.idGen.AllocateID(ctx)
   659  }
   660  
   661  func (txn *Transaction) genRowId() types.Rowid {
   662  	txn.rowId[1]++
   663  	return types.DecodeFixed[types.Rowid](types.EncodeSlice(txn.rowId[:]))
   664  }
   665  
   666  func (h transactionHeap) Len() int {
   667  	return len(h)
   668  }
   669  
   670  func (h transactionHeap) Less(i, j int) bool {
   671  	return h[i].meta.SnapshotTS.Less(h[j].meta.SnapshotTS)
   672  }
   673  
   674  func (h transactionHeap) Swap(i, j int) {
   675  	h[i], h[j] = h[j], h[i]
   676  }
   677  
   678  func (h *transactionHeap) Push(x any) {
   679  	*h = append(*h, x.(*Transaction))
   680  }
   681  
   682  func (h *transactionHeap) Pop() any {
   683  	old := *h
   684  	n := len(old)
   685  	x := old[n-1]
   686  	*h = old[0 : n-1]
   687  	return x
   688  }
   689  
   690  // needRead determine if a block needs to be read
   691  func needRead(ctx context.Context, expr *plan.Expr, blkInfo BlockMeta, tableDef *plan.TableDef, columnMap map[int]int, columns []int, maxCol int, proc *process.Process) bool {
   692  	var err error
   693  	if expr == nil {
   694  		return true
   695  	}
   696  	notReportErrCtx := errutil.ContextWithNoReport(ctx, true)
   697  
   698  	// if expr match no columns, just eval expr
   699  	if len(columns) == 0 {
   700  		bat := batch.NewWithSize(0)
   701  		defer bat.Clean(proc.Mp())
   702  		ifNeed, err := plan2.EvalFilterExpr(notReportErrCtx, expr, bat, proc)
   703  		if err != nil {
   704  			return true
   705  		}
   706  		return ifNeed
   707  	}
   708  
   709  	// get min max data from Meta
   710  	datas, dataTypes, err := getZonemapDataFromMeta(ctx, columns, blkInfo, tableDef)
   711  	if err != nil || datas == nil {
   712  		return true
   713  	}
   714  
   715  	// use all min/max data to build []vectors.
   716  	buildVectors := plan2.BuildVectorsByData(datas, dataTypes, proc.Mp())
   717  	bat := batch.NewWithSize(maxCol + 1)
   718  	defer bat.Clean(proc.Mp())
   719  	for k, v := range columnMap {
   720  		for i, realIdx := range columns {
   721  			if realIdx == v {
   722  				bat.SetVector(int32(k), buildVectors[i])
   723  				break
   724  			}
   725  		}
   726  	}
   727  	bat.SetZs(buildVectors[0].Length(), proc.Mp())
   728  
   729  	ifNeed, err := plan2.EvalFilterExpr(notReportErrCtx, expr, bat, proc)
   730  	if err != nil {
   731  		return true
   732  	}
   733  	return ifNeed
   734  
   735  }
   736  
   737  // get row count of block
   738  func blockRows(meta BlockMeta) int64 {
   739  	return meta.Rows
   740  }
   741  
   742  func blockMarshal(meta BlockMeta) []byte {
   743  	data, _ := types.Encode(meta)
   744  	return data
   745  }
   746  
   747  func blockUnmarshal(data []byte) BlockMeta {
   748  	var meta BlockMeta
   749  
   750  	types.Decode(data, &meta)
   751  	return meta
   752  }
   753  
   754  // write a block to s3
   755  func blockWrite(ctx context.Context, bat *batch.Batch, fs fileservice.FileService) ([]objectio.BlockObject, error) {
   756  	// 1. write bat
   757  	accountId, _, _ := getAccessInfo(ctx)
   758  	s3FileName, err := getNewBlockName(accountId)
   759  	if err != nil {
   760  		return nil, err
   761  	}
   762  	writer, err := objectio.NewObjectWriter(s3FileName, fs)
   763  	if err != nil {
   764  		return nil, err
   765  	}
   766  	fd, err := writer.Write(bat)
   767  	if err != nil {
   768  		return nil, err
   769  	}
   770  
   771  	// 2. write index (index and zonemap)
   772  	for i, vec := range bat.Vecs {
   773  		bloomFilter, zoneMap, err := getIndexDataFromVec(uint16(i), vec)
   774  		if err != nil {
   775  			return nil, err
   776  		}
   777  		if bloomFilter != nil {
   778  			err = writer.WriteIndex(fd, bloomFilter)
   779  			if err != nil {
   780  				return nil, err
   781  			}
   782  		}
   783  		if zoneMap != nil {
   784  			err = writer.WriteIndex(fd, zoneMap)
   785  			if err != nil {
   786  				return nil, err
   787  			}
   788  		}
   789  	}
   790  
   791  	// 3. get return
   792  	return writer.WriteEnd(ctx)
   793  }
   794  
   795  func needSyncDnStores(ctx context.Context, expr *plan.Expr, tableDef *plan.TableDef,
   796  	priKeys []*engine.Attribute, dnStores []DNStore, proc *process.Process) []int {
   797  	var pk *engine.Attribute
   798  
   799  	fullList := func() []int {
   800  		dnList := make([]int, len(dnStores))
   801  		for i := range dnStores {
   802  			dnList[i] = i
   803  		}
   804  		return dnList
   805  	}
   806  	if len(dnStores) == 1 {
   807  		return []int{0}
   808  	}
   809  	for _, key := range priKeys {
   810  		isCPkey := util.JudgeIsCompositePrimaryKeyColumn(key.Name)
   811  		if isCPkey {
   812  			continue
   813  		}
   814  		pk = key
   815  		break
   816  	}
   817  	// have no PrimaryKey, return all the list
   818  	if expr == nil || pk == nil || tableDef == nil {
   819  		return fullList()
   820  	}
   821  	if pk.Type.IsIntOrUint() {
   822  		canComputeRange, intPkRange := computeRangeByIntPk(expr, pk.Name, "")
   823  		if !canComputeRange {
   824  			return fullList()
   825  		}
   826  		if intPkRange.isRange {
   827  			r := intPkRange.ranges
   828  			if r[0] == math.MinInt64 || r[1] == math.MaxInt64 || r[1]-r[0] > MAX_RANGE_SIZE {
   829  				return fullList()
   830  			}
   831  			intPkRange.isRange = false
   832  			for i := intPkRange.ranges[0]; i <= intPkRange.ranges[1]; i++ {
   833  				intPkRange.items = append(intPkRange.items, i)
   834  			}
   835  		}
   836  		return getListByItems(dnStores, intPkRange.items)
   837  	}
   838  	canComputeRange, hashVal := computeRangeByNonIntPk(ctx, expr, pk.Name, proc)
   839  	if !canComputeRange {
   840  		return fullList()
   841  	}
   842  	listLen := uint64(len(dnStores))
   843  	idx := hashVal % listLen
   844  	return []int{int(idx)}
   845  }