github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/model/schema_storage.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package model
    15  
    16  import (
    17  	"fmt"
    18  
    19  	"github.com/pingcap/log"
    20  	"github.com/pingcap/tidb/pkg/parser/model"
    21  	"github.com/pingcap/tidb/pkg/parser/mysql"
    22  	"github.com/pingcap/tidb/pkg/parser/types"
    23  	"github.com/pingcap/tidb/pkg/table/tables"
    24  	datumTypes "github.com/pingcap/tidb/pkg/types"
    25  	"github.com/pingcap/tidb/pkg/util/rowcodec"
    26  	"go.uber.org/zap"
    27  )
    28  
    29  const (
    30  	// HandleIndexPKIsHandle represents that the handle index is the pk and the pk is the handle
    31  	HandleIndexPKIsHandle = -1
    32  	// HandleIndexTableIneligible represents that the table is ineligible
    33  	HandleIndexTableIneligible = -2
    34  )
    35  
    36  // TableInfo provides meta data describing a DB table.
    37  type TableInfo struct {
    38  	*model.TableInfo
    39  	SchemaID int64
    40  	// NOTICE: We probably store the logical ID inside TableName,
    41  	// not the physical ID.
    42  	// For normal table, there is only one ID, which is the physical ID.
    43  	// AKA TIDB_TABLE_ID.
    44  	// For partitioned table, there are two kinds of ID:
    45  	// 1. TIDB_PARTITION_ID is the physical ID of the partition.
    46  	// 2. TIDB_TABLE_ID is the logical ID of the table.
    47  	// In general, we always use the physical ID to represent a table, but we
    48  	// record the logical ID from the DDL event(job.BinlogInfo.TableInfo).
    49  	// So be careful when using the TableInfo.
    50  	TableName TableName
    51  	// Version record the tso of create the table info.
    52  	Version uint64
    53  	// ColumnID -> offset in model.TableInfo.Columns
    54  	columnsOffset map[int64]int
    55  	// ColumnID -> offset in model.TableInfo.Indices
    56  	indicesOffset map[int64]int
    57  	// Column name -> ColumnID
    58  	nameToColID map[string]int64
    59  
    60  	hasUniqueColumn bool
    61  
    62  	// ColumnID -> offset in RowChangedEvents.Columns.
    63  	RowColumnsOffset map[int64]int
    64  
    65  	ColumnsFlag map[int64]*ColumnFlagType
    66  
    67  	// the mounter will choose this index to output delete events
    68  	// special value:
    69  	// HandleIndexPKIsHandle(-1) : pk is handle
    70  	// HandleIndexTableIneligible(-2) : the table is not eligible
    71  	HandleIndexID int64
    72  
    73  	// IndexColumnsOffset store the offset of the columns in row changed events for
    74  	// unique index and primary key
    75  	// The reason why we need this is that the Indexes in TableInfo
    76  	// will not contain the PK if it is create in statement like:
    77  	// create table t (a int primary key, b int unique key);
    78  	// Every element in first dimension is a index, and the second dimension is the columns offset
    79  	// for example:
    80  	// table has 3 columns: a, b, c
    81  	// pk: a
    82  	// index1: a, b
    83  	// index2: a, c
    84  	// indexColumnsOffset: [[0], [0, 1], [0, 2]]
    85  	IndexColumnsOffset [][]int
    86  
    87  	// The following 3 fields, should only be used to decode datum from the raw value bytes, do not abuse those field.
    88  	// rowColInfos extend the model.ColumnInfo with some extra information
    89  	// it's the same length and order with the model.TableInfo.Columns
    90  	rowColInfos    []rowcodec.ColInfo
    91  	rowColFieldTps map[int64]*types.FieldType
    92  	// only for new row format decoder
    93  	handleColID []int64
    94  
    95  	// number of virtual columns
    96  	virtualColumnCount int
    97  	// rowColInfosWithoutVirtualCols is the same as rowColInfos, but without virtual columns
    98  	rowColInfosWithoutVirtualCols *[]rowcodec.ColInfo
    99  }
   100  
   101  // WrapTableInfo creates a TableInfo from a timodel.TableInfo
   102  func WrapTableInfo(schemaID int64, schemaName string, version uint64, info *model.TableInfo) *TableInfo {
   103  	ti := &TableInfo{
   104  		TableInfo: info,
   105  		SchemaID:  schemaID,
   106  		TableName: TableName{
   107  			Schema:      schemaName,
   108  			Table:       info.Name.O,
   109  			TableID:     info.ID,
   110  			IsPartition: info.GetPartitionInfo() != nil,
   111  		},
   112  		hasUniqueColumn:  false,
   113  		Version:          version,
   114  		columnsOffset:    make(map[int64]int, len(info.Columns)),
   115  		indicesOffset:    make(map[int64]int, len(info.Indices)),
   116  		nameToColID:      make(map[string]int64, len(info.Columns)),
   117  		RowColumnsOffset: make(map[int64]int, len(info.Columns)),
   118  		ColumnsFlag:      make(map[int64]*ColumnFlagType, len(info.Columns)),
   119  		handleColID:      []int64{-1},
   120  		HandleIndexID:    HandleIndexTableIneligible,
   121  		rowColInfos:      make([]rowcodec.ColInfo, len(info.Columns)),
   122  		rowColFieldTps:   make(map[int64]*types.FieldType, len(info.Columns)),
   123  	}
   124  
   125  	rowColumnsCurrentOffset := 0
   126  
   127  	ti.virtualColumnCount = 0
   128  	for i, col := range ti.Columns {
   129  		ti.columnsOffset[col.ID] = i
   130  		pkIsHandle := false
   131  		if IsColCDCVisible(col) {
   132  			ti.nameToColID[col.Name.O] = col.ID
   133  			ti.RowColumnsOffset[col.ID] = rowColumnsCurrentOffset
   134  			rowColumnsCurrentOffset++
   135  			pkIsHandle = (ti.PKIsHandle && mysql.HasPriKeyFlag(col.GetFlag())) || col.ID == model.ExtraHandleID
   136  			if pkIsHandle {
   137  				// pk is handle
   138  				ti.handleColID = []int64{col.ID}
   139  				ti.HandleIndexID = HandleIndexPKIsHandle
   140  				ti.hasUniqueColumn = true
   141  				ti.IndexColumnsOffset = append(ti.IndexColumnsOffset, []int{ti.RowColumnsOffset[col.ID]})
   142  			} else if ti.IsCommonHandle {
   143  				ti.HandleIndexID = HandleIndexPKIsHandle
   144  				ti.handleColID = ti.handleColID[:0]
   145  				pkIdx := tables.FindPrimaryIndex(info)
   146  				for _, pkCol := range pkIdx.Columns {
   147  					id := info.Columns[pkCol.Offset].ID
   148  					ti.handleColID = append(ti.handleColID, id)
   149  				}
   150  			}
   151  		} else {
   152  			ti.virtualColumnCount += 1
   153  		}
   154  		ti.rowColInfos[i] = rowcodec.ColInfo{
   155  			ID:            col.ID,
   156  			IsPKHandle:    pkIsHandle,
   157  			Ft:            col.FieldType.Clone(),
   158  			VirtualGenCol: col.IsGenerated(),
   159  		}
   160  		ti.rowColFieldTps[col.ID] = ti.rowColInfos[i].Ft
   161  	}
   162  
   163  	for i, idx := range ti.Indices {
   164  		ti.indicesOffset[idx.ID] = i
   165  		if ti.IsIndexUnique(idx) {
   166  			ti.hasUniqueColumn = true
   167  		}
   168  		if idx.Primary || idx.Unique {
   169  			indexColOffset := make([]int, 0, len(idx.Columns))
   170  			for _, idxCol := range idx.Columns {
   171  				colInfo := ti.Columns[idxCol.Offset]
   172  				if IsColCDCVisible(colInfo) {
   173  					indexColOffset = append(indexColOffset, ti.RowColumnsOffset[colInfo.ID])
   174  				}
   175  			}
   176  			if len(indexColOffset) > 0 {
   177  				ti.IndexColumnsOffset = append(ti.IndexColumnsOffset, indexColOffset)
   178  			}
   179  		}
   180  	}
   181  
   182  	ti.initRowColInfosWithoutVirtualCols()
   183  	ti.findHandleIndex()
   184  	ti.initColumnsFlag()
   185  	return ti
   186  }
   187  
   188  func (ti *TableInfo) initRowColInfosWithoutVirtualCols() {
   189  	if ti.virtualColumnCount == 0 {
   190  		ti.rowColInfosWithoutVirtualCols = &ti.rowColInfos
   191  		return
   192  	}
   193  	colInfos := make([]rowcodec.ColInfo, 0, len(ti.rowColInfos)-ti.virtualColumnCount)
   194  	for i, col := range ti.Columns {
   195  		if IsColCDCVisible(col) {
   196  			colInfos = append(colInfos, ti.rowColInfos[i])
   197  		}
   198  	}
   199  	if len(colInfos) != len(ti.rowColInfos)-ti.virtualColumnCount {
   200  		log.Panic("invalid rowColInfosWithoutVirtualCols",
   201  			zap.Int("len(colInfos)", len(colInfos)),
   202  			zap.Int("len(ti.rowColInfos)", len(ti.rowColInfos)),
   203  			zap.Int("ti.virtualColumnCount", ti.virtualColumnCount))
   204  	}
   205  	ti.rowColInfosWithoutVirtualCols = &colInfos
   206  }
   207  
   208  func (ti *TableInfo) findHandleIndex() {
   209  	if ti.HandleIndexID == HandleIndexPKIsHandle {
   210  		// pk is handle
   211  		return
   212  	}
   213  	handleIndexOffset := -1
   214  	for i, idx := range ti.Indices {
   215  		if !ti.IsIndexUnique(idx) {
   216  			continue
   217  		}
   218  		if idx.Primary {
   219  			handleIndexOffset = i
   220  			break
   221  		}
   222  		if handleIndexOffset < 0 {
   223  			handleIndexOffset = i
   224  		} else {
   225  			if len(ti.Indices[handleIndexOffset].Columns) > len(ti.Indices[i].Columns) ||
   226  				(len(ti.Indices[handleIndexOffset].Columns) == len(ti.Indices[i].Columns) &&
   227  					ti.Indices[handleIndexOffset].ID > ti.Indices[i].ID) {
   228  				handleIndexOffset = i
   229  			}
   230  		}
   231  	}
   232  	if handleIndexOffset >= 0 {
   233  		ti.HandleIndexID = ti.Indices[handleIndexOffset].ID
   234  	}
   235  }
   236  
   237  func (ti *TableInfo) initColumnsFlag() {
   238  	for _, colInfo := range ti.Columns {
   239  		var flag ColumnFlagType
   240  		if colInfo.GetCharset() == "binary" {
   241  			flag.SetIsBinary()
   242  		}
   243  		if colInfo.IsGenerated() {
   244  			flag.SetIsGeneratedColumn()
   245  		}
   246  		if mysql.HasPriKeyFlag(colInfo.GetFlag()) {
   247  			flag.SetIsPrimaryKey()
   248  			if ti.HandleIndexID == HandleIndexPKIsHandle {
   249  				flag.SetIsHandleKey()
   250  			}
   251  		}
   252  		if mysql.HasUniKeyFlag(colInfo.GetFlag()) {
   253  			flag.SetIsUniqueKey()
   254  		}
   255  		if !mysql.HasNotNullFlag(colInfo.GetFlag()) {
   256  			flag.SetIsNullable()
   257  		}
   258  		if mysql.HasMultipleKeyFlag(colInfo.GetFlag()) {
   259  			flag.SetIsMultipleKey()
   260  		}
   261  		if mysql.HasUnsignedFlag(colInfo.GetFlag()) {
   262  			flag.SetIsUnsigned()
   263  		}
   264  		ti.ColumnsFlag[colInfo.ID] = &flag
   265  	}
   266  
   267  	// In TiDB, just as in MySQL, only the first column of an index can be marked as "multiple key" or "unique key",
   268  	// and only the first column of a unique index may be marked as "unique key".
   269  	// See https://dev.mysql.com/doc/refman/5.7/en/show-columns.html.
   270  	// Yet if an index has multiple columns, we would like to easily determine that all those columns are indexed,
   271  	// which is crucial for the completeness of the information we pass to the downstream.
   272  	// Therefore, instead of using the MySQL standard,
   273  	// we made our own decision to mark all columns in an index with the appropriate flag(s).
   274  	for _, idxInfo := range ti.Indices {
   275  		for _, idxCol := range idxInfo.Columns {
   276  			colInfo := ti.Columns[idxCol.Offset]
   277  			flag := ti.ColumnsFlag[colInfo.ID]
   278  			if idxInfo.Primary {
   279  				flag.SetIsPrimaryKey()
   280  			} else if idxInfo.Unique {
   281  				flag.SetIsUniqueKey()
   282  			}
   283  			if len(idxInfo.Columns) > 1 {
   284  				flag.SetIsMultipleKey()
   285  			}
   286  			if idxInfo.ID == ti.HandleIndexID && ti.HandleIndexID >= 0 {
   287  				flag.SetIsHandleKey()
   288  			}
   289  			ti.ColumnsFlag[colInfo.ID] = flag
   290  		}
   291  	}
   292  }
   293  
   294  // GetColumnInfo returns the column info by ID
   295  func (ti *TableInfo) GetColumnInfo(colID int64) (info *model.ColumnInfo, exist bool) {
   296  	colOffset, exist := ti.columnsOffset[colID]
   297  	if !exist {
   298  		return nil, false
   299  	}
   300  	return ti.Columns[colOffset], true
   301  }
   302  
   303  // ForceGetColumnInfo return the column info by ID
   304  // Caller must ensure `colID` exists
   305  func (ti *TableInfo) ForceGetColumnInfo(colID int64) *model.ColumnInfo {
   306  	colInfo, ok := ti.GetColumnInfo(colID)
   307  	if !ok {
   308  		log.Panic("invalid column id", zap.Int64("columnID", colID))
   309  	}
   310  	return colInfo
   311  }
   312  
   313  // ForceGetColumnFlagType return the column flag type by ID
   314  // Caller must ensure `colID` exists
   315  func (ti *TableInfo) ForceGetColumnFlagType(colID int64) *ColumnFlagType {
   316  	flag, ok := ti.ColumnsFlag[colID]
   317  	if !ok {
   318  		log.Panic("invalid column id", zap.Int64("columnID", colID))
   319  	}
   320  	return flag
   321  }
   322  
   323  // ForceGetColumnName return the column name by ID
   324  // Caller must ensure `colID` exists
   325  func (ti *TableInfo) ForceGetColumnName(colID int64) string {
   326  	return ti.ForceGetColumnInfo(colID).Name.O
   327  }
   328  
   329  // ForceGetColumnIDByName return column ID by column name
   330  // Caller must ensure `colID` exists
   331  func (ti *TableInfo) ForceGetColumnIDByName(name string) int64 {
   332  	colID, ok := ti.nameToColID[name]
   333  	if !ok {
   334  		log.Panic("invalid column name", zap.String("column", name))
   335  	}
   336  	return colID
   337  }
   338  
   339  // GetSchemaName returns the schema name of the table
   340  func (ti *TableInfo) GetSchemaName() string {
   341  	return ti.TableName.Schema
   342  }
   343  
   344  // GetTableName returns the table name of the table
   345  func (ti *TableInfo) GetTableName() string {
   346  	return ti.TableName.Table
   347  }
   348  
   349  // GetSchemaNamePtr returns the pointer to the schema name of the table
   350  func (ti *TableInfo) GetSchemaNamePtr() *string {
   351  	return &ti.TableName.Schema
   352  }
   353  
   354  // GetTableNamePtr returns the pointer to the table name of the table
   355  func (ti *TableInfo) GetTableNamePtr() *string {
   356  	return &ti.TableName.Table
   357  }
   358  
   359  // IsPartitionTable returns whether the table is partition table
   360  func (ti *TableInfo) IsPartitionTable() bool {
   361  	return ti.TableName.IsPartition
   362  }
   363  
   364  func (ti *TableInfo) String() string {
   365  	return fmt.Sprintf("TableInfo, ID: %d, Name:%s, ColNum: %d, IdxNum: %d, PKIsHandle: %t", ti.ID, ti.TableName, len(ti.Columns), len(ti.Indices), ti.PKIsHandle)
   366  }
   367  
   368  // GetRowColInfos returns all column infos for rowcodec
   369  func (ti *TableInfo) GetRowColInfos() ([]int64, map[int64]*types.FieldType, []rowcodec.ColInfo) {
   370  	return ti.handleColID, ti.rowColFieldTps, ti.rowColInfos
   371  }
   372  
   373  // GetColInfosForRowChangedEvent return column infos for non-virtual columns
   374  // The column order in the result is the same as the order in its corresponding RowChangedEvent
   375  func (ti *TableInfo) GetColInfosForRowChangedEvent() []rowcodec.ColInfo {
   376  	return *ti.rowColInfosWithoutVirtualCols
   377  }
   378  
   379  // IsColCDCVisible returns whether the col is visible for CDC
   380  func IsColCDCVisible(col *model.ColumnInfo) bool {
   381  	// this column is a virtual generated column
   382  	if col.IsGenerated() && !col.GeneratedStored {
   383  		return false
   384  	}
   385  	return true
   386  }
   387  
   388  // HasUniqueColumn returns whether the table has a unique column
   389  func (ti *TableInfo) HasUniqueColumn() bool {
   390  	return ti.hasUniqueColumn
   391  }
   392  
   393  // HasVirtualColumns returns whether the table has virtual columns
   394  func (ti *TableInfo) HasVirtualColumns() bool {
   395  	return ti.virtualColumnCount > 0
   396  }
   397  
   398  // IsEligible returns whether the table is a eligible table
   399  func (ti *TableInfo) IsEligible(forceReplicate bool) bool {
   400  	// Sequence is not supported yet, TiCDC needs to filter all sequence tables.
   401  	// See https://github.com/pingcap/tiflow/issues/4559
   402  	if ti.IsSequence() {
   403  		return false
   404  	}
   405  	if forceReplicate {
   406  		return true
   407  	}
   408  	if ti.IsView() {
   409  		return true
   410  	}
   411  	return ti.HasUniqueColumn()
   412  }
   413  
   414  // IsIndexUnique returns whether the index is unique
   415  func (ti *TableInfo) IsIndexUnique(indexInfo *model.IndexInfo) bool {
   416  	if indexInfo.Primary {
   417  		return true
   418  	}
   419  	if indexInfo.Unique {
   420  		for _, col := range indexInfo.Columns {
   421  			colInfo := ti.Columns[col.Offset]
   422  			if !mysql.HasNotNullFlag(colInfo.GetFlag()) {
   423  				return false
   424  			}
   425  			// this column is a virtual generated column
   426  			if colInfo.IsGenerated() && !colInfo.GeneratedStored {
   427  				return false
   428  			}
   429  		}
   430  		return true
   431  	}
   432  	return false
   433  }
   434  
   435  // Clone clones the TableInfo
   436  func (ti *TableInfo) Clone() *TableInfo {
   437  	return WrapTableInfo(ti.SchemaID, ti.TableName.Schema, ti.Version, ti.TableInfo.Clone())
   438  }
   439  
   440  // GetIndex return the corresponding index by the given name.
   441  func (ti *TableInfo) GetIndex(name string) *model.IndexInfo {
   442  	for _, index := range ti.Indices {
   443  		if index != nil && index.Name.O == name {
   444  			return index
   445  		}
   446  	}
   447  	return nil
   448  }
   449  
   450  // IndexByName returns the index columns and offsets of the corresponding index by name
   451  func (ti *TableInfo) IndexByName(name string) ([]string, []int, bool) {
   452  	index := ti.GetIndex(name)
   453  	if index == nil {
   454  		return nil, nil, false
   455  	}
   456  	names := make([]string, 0, len(index.Columns))
   457  	offset := make([]int, 0, len(index.Columns))
   458  	for _, col := range index.Columns {
   459  		names = append(names, col.Name.O)
   460  		offset = append(offset, col.Offset)
   461  	}
   462  	return names, offset, true
   463  }
   464  
   465  // OffsetsByNames returns the column offsets of the corresponding columns by names
   466  // If any column does not exist, return false
   467  func (ti *TableInfo) OffsetsByNames(names []string) ([]int, bool) {
   468  	// todo: optimize it
   469  	columnOffsets := make(map[string]int, len(ti.Columns))
   470  	for _, col := range ti.Columns {
   471  		if col != nil {
   472  			columnOffsets[col.Name.O] = col.Offset
   473  		}
   474  	}
   475  
   476  	result := make([]int, 0, len(names))
   477  	for _, col := range names {
   478  		offset, ok := columnOffsets[col]
   479  		if !ok {
   480  			return nil, false
   481  		}
   482  		result = append(result, offset)
   483  	}
   484  
   485  	return result, true
   486  }
   487  
   488  // GetPrimaryKeyColumnNames returns the primary key column names
   489  func (ti *TableInfo) GetPrimaryKeyColumnNames() []string {
   490  	var result []string
   491  	if ti.PKIsHandle {
   492  		result = append(result, ti.GetPkColInfo().Name.O)
   493  		return result
   494  	}
   495  
   496  	indexInfo := ti.GetPrimaryKey()
   497  	if indexInfo != nil {
   498  		for _, col := range indexInfo.Columns {
   499  			result = append(result, col.Name.O)
   500  		}
   501  	}
   502  	return result
   503  }
   504  
   505  // GetColumnDefaultValue returns the default definition of a column.
   506  func GetColumnDefaultValue(col *model.ColumnInfo) interface{} {
   507  	defaultValue := col.GetDefaultValue()
   508  	if defaultValue == nil {
   509  		defaultValue = col.GetOriginDefaultValue()
   510  	}
   511  	defaultDatum := datumTypes.NewDatum(defaultValue)
   512  	return defaultDatum.GetValue()
   513  }