github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/model/schema_storage.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package model 15 16 import ( 17 "fmt" 18 19 "github.com/pingcap/log" 20 "github.com/pingcap/tidb/pkg/parser/model" 21 "github.com/pingcap/tidb/pkg/parser/mysql" 22 "github.com/pingcap/tidb/pkg/parser/types" 23 "github.com/pingcap/tidb/pkg/table/tables" 24 datumTypes "github.com/pingcap/tidb/pkg/types" 25 "github.com/pingcap/tidb/pkg/util/rowcodec" 26 "go.uber.org/zap" 27 ) 28 29 const ( 30 // HandleIndexPKIsHandle represents that the handle index is the pk and the pk is the handle 31 HandleIndexPKIsHandle = -1 32 // HandleIndexTableIneligible represents that the table is ineligible 33 HandleIndexTableIneligible = -2 34 ) 35 36 // TableInfo provides meta data describing a DB table. 37 type TableInfo struct { 38 *model.TableInfo 39 SchemaID int64 40 // NOTICE: We probably store the logical ID inside TableName, 41 // not the physical ID. 42 // For normal table, there is only one ID, which is the physical ID. 43 // AKA TIDB_TABLE_ID. 44 // For partitioned table, there are two kinds of ID: 45 // 1. TIDB_PARTITION_ID is the physical ID of the partition. 46 // 2. TIDB_TABLE_ID is the logical ID of the table. 47 // In general, we always use the physical ID to represent a table, but we 48 // record the logical ID from the DDL event(job.BinlogInfo.TableInfo). 49 // So be careful when using the TableInfo. 50 TableName TableName 51 // Version record the tso of create the table info. 52 Version uint64 53 // ColumnID -> offset in model.TableInfo.Columns 54 columnsOffset map[int64]int 55 // ColumnID -> offset in model.TableInfo.Indices 56 indicesOffset map[int64]int 57 // Column name -> ColumnID 58 nameToColID map[string]int64 59 60 hasUniqueColumn bool 61 62 // ColumnID -> offset in RowChangedEvents.Columns. 63 RowColumnsOffset map[int64]int 64 65 ColumnsFlag map[int64]*ColumnFlagType 66 67 // the mounter will choose this index to output delete events 68 // special value: 69 // HandleIndexPKIsHandle(-1) : pk is handle 70 // HandleIndexTableIneligible(-2) : the table is not eligible 71 HandleIndexID int64 72 73 // IndexColumnsOffset store the offset of the columns in row changed events for 74 // unique index and primary key 75 // The reason why we need this is that the Indexes in TableInfo 76 // will not contain the PK if it is create in statement like: 77 // create table t (a int primary key, b int unique key); 78 // Every element in first dimension is a index, and the second dimension is the columns offset 79 // for example: 80 // table has 3 columns: a, b, c 81 // pk: a 82 // index1: a, b 83 // index2: a, c 84 // indexColumnsOffset: [[0], [0, 1], [0, 2]] 85 IndexColumnsOffset [][]int 86 87 // The following 3 fields, should only be used to decode datum from the raw value bytes, do not abuse those field. 88 // rowColInfos extend the model.ColumnInfo with some extra information 89 // it's the same length and order with the model.TableInfo.Columns 90 rowColInfos []rowcodec.ColInfo 91 rowColFieldTps map[int64]*types.FieldType 92 // only for new row format decoder 93 handleColID []int64 94 95 // number of virtual columns 96 virtualColumnCount int 97 // rowColInfosWithoutVirtualCols is the same as rowColInfos, but without virtual columns 98 rowColInfosWithoutVirtualCols *[]rowcodec.ColInfo 99 } 100 101 // WrapTableInfo creates a TableInfo from a timodel.TableInfo 102 func WrapTableInfo(schemaID int64, schemaName string, version uint64, info *model.TableInfo) *TableInfo { 103 ti := &TableInfo{ 104 TableInfo: info, 105 SchemaID: schemaID, 106 TableName: TableName{ 107 Schema: schemaName, 108 Table: info.Name.O, 109 TableID: info.ID, 110 IsPartition: info.GetPartitionInfo() != nil, 111 }, 112 hasUniqueColumn: false, 113 Version: version, 114 columnsOffset: make(map[int64]int, len(info.Columns)), 115 indicesOffset: make(map[int64]int, len(info.Indices)), 116 nameToColID: make(map[string]int64, len(info.Columns)), 117 RowColumnsOffset: make(map[int64]int, len(info.Columns)), 118 ColumnsFlag: make(map[int64]*ColumnFlagType, len(info.Columns)), 119 handleColID: []int64{-1}, 120 HandleIndexID: HandleIndexTableIneligible, 121 rowColInfos: make([]rowcodec.ColInfo, len(info.Columns)), 122 rowColFieldTps: make(map[int64]*types.FieldType, len(info.Columns)), 123 } 124 125 rowColumnsCurrentOffset := 0 126 127 ti.virtualColumnCount = 0 128 for i, col := range ti.Columns { 129 ti.columnsOffset[col.ID] = i 130 pkIsHandle := false 131 if IsColCDCVisible(col) { 132 ti.nameToColID[col.Name.O] = col.ID 133 ti.RowColumnsOffset[col.ID] = rowColumnsCurrentOffset 134 rowColumnsCurrentOffset++ 135 pkIsHandle = (ti.PKIsHandle && mysql.HasPriKeyFlag(col.GetFlag())) || col.ID == model.ExtraHandleID 136 if pkIsHandle { 137 // pk is handle 138 ti.handleColID = []int64{col.ID} 139 ti.HandleIndexID = HandleIndexPKIsHandle 140 ti.hasUniqueColumn = true 141 ti.IndexColumnsOffset = append(ti.IndexColumnsOffset, []int{ti.RowColumnsOffset[col.ID]}) 142 } else if ti.IsCommonHandle { 143 ti.HandleIndexID = HandleIndexPKIsHandle 144 ti.handleColID = ti.handleColID[:0] 145 pkIdx := tables.FindPrimaryIndex(info) 146 for _, pkCol := range pkIdx.Columns { 147 id := info.Columns[pkCol.Offset].ID 148 ti.handleColID = append(ti.handleColID, id) 149 } 150 } 151 } else { 152 ti.virtualColumnCount += 1 153 } 154 ti.rowColInfos[i] = rowcodec.ColInfo{ 155 ID: col.ID, 156 IsPKHandle: pkIsHandle, 157 Ft: col.FieldType.Clone(), 158 VirtualGenCol: col.IsGenerated(), 159 } 160 ti.rowColFieldTps[col.ID] = ti.rowColInfos[i].Ft 161 } 162 163 for i, idx := range ti.Indices { 164 ti.indicesOffset[idx.ID] = i 165 if ti.IsIndexUnique(idx) { 166 ti.hasUniqueColumn = true 167 } 168 if idx.Primary || idx.Unique { 169 indexColOffset := make([]int, 0, len(idx.Columns)) 170 for _, idxCol := range idx.Columns { 171 colInfo := ti.Columns[idxCol.Offset] 172 if IsColCDCVisible(colInfo) { 173 indexColOffset = append(indexColOffset, ti.RowColumnsOffset[colInfo.ID]) 174 } 175 } 176 if len(indexColOffset) > 0 { 177 ti.IndexColumnsOffset = append(ti.IndexColumnsOffset, indexColOffset) 178 } 179 } 180 } 181 182 ti.initRowColInfosWithoutVirtualCols() 183 ti.findHandleIndex() 184 ti.initColumnsFlag() 185 return ti 186 } 187 188 func (ti *TableInfo) initRowColInfosWithoutVirtualCols() { 189 if ti.virtualColumnCount == 0 { 190 ti.rowColInfosWithoutVirtualCols = &ti.rowColInfos 191 return 192 } 193 colInfos := make([]rowcodec.ColInfo, 0, len(ti.rowColInfos)-ti.virtualColumnCount) 194 for i, col := range ti.Columns { 195 if IsColCDCVisible(col) { 196 colInfos = append(colInfos, ti.rowColInfos[i]) 197 } 198 } 199 if len(colInfos) != len(ti.rowColInfos)-ti.virtualColumnCount { 200 log.Panic("invalid rowColInfosWithoutVirtualCols", 201 zap.Int("len(colInfos)", len(colInfos)), 202 zap.Int("len(ti.rowColInfos)", len(ti.rowColInfos)), 203 zap.Int("ti.virtualColumnCount", ti.virtualColumnCount)) 204 } 205 ti.rowColInfosWithoutVirtualCols = &colInfos 206 } 207 208 func (ti *TableInfo) findHandleIndex() { 209 if ti.HandleIndexID == HandleIndexPKIsHandle { 210 // pk is handle 211 return 212 } 213 handleIndexOffset := -1 214 for i, idx := range ti.Indices { 215 if !ti.IsIndexUnique(idx) { 216 continue 217 } 218 if idx.Primary { 219 handleIndexOffset = i 220 break 221 } 222 if handleIndexOffset < 0 { 223 handleIndexOffset = i 224 } else { 225 if len(ti.Indices[handleIndexOffset].Columns) > len(ti.Indices[i].Columns) || 226 (len(ti.Indices[handleIndexOffset].Columns) == len(ti.Indices[i].Columns) && 227 ti.Indices[handleIndexOffset].ID > ti.Indices[i].ID) { 228 handleIndexOffset = i 229 } 230 } 231 } 232 if handleIndexOffset >= 0 { 233 ti.HandleIndexID = ti.Indices[handleIndexOffset].ID 234 } 235 } 236 237 func (ti *TableInfo) initColumnsFlag() { 238 for _, colInfo := range ti.Columns { 239 var flag ColumnFlagType 240 if colInfo.GetCharset() == "binary" { 241 flag.SetIsBinary() 242 } 243 if colInfo.IsGenerated() { 244 flag.SetIsGeneratedColumn() 245 } 246 if mysql.HasPriKeyFlag(colInfo.GetFlag()) { 247 flag.SetIsPrimaryKey() 248 if ti.HandleIndexID == HandleIndexPKIsHandle { 249 flag.SetIsHandleKey() 250 } 251 } 252 if mysql.HasUniKeyFlag(colInfo.GetFlag()) { 253 flag.SetIsUniqueKey() 254 } 255 if !mysql.HasNotNullFlag(colInfo.GetFlag()) { 256 flag.SetIsNullable() 257 } 258 if mysql.HasMultipleKeyFlag(colInfo.GetFlag()) { 259 flag.SetIsMultipleKey() 260 } 261 if mysql.HasUnsignedFlag(colInfo.GetFlag()) { 262 flag.SetIsUnsigned() 263 } 264 ti.ColumnsFlag[colInfo.ID] = &flag 265 } 266 267 // In TiDB, just as in MySQL, only the first column of an index can be marked as "multiple key" or "unique key", 268 // and only the first column of a unique index may be marked as "unique key". 269 // See https://dev.mysql.com/doc/refman/5.7/en/show-columns.html. 270 // Yet if an index has multiple columns, we would like to easily determine that all those columns are indexed, 271 // which is crucial for the completeness of the information we pass to the downstream. 272 // Therefore, instead of using the MySQL standard, 273 // we made our own decision to mark all columns in an index with the appropriate flag(s). 274 for _, idxInfo := range ti.Indices { 275 for _, idxCol := range idxInfo.Columns { 276 colInfo := ti.Columns[idxCol.Offset] 277 flag := ti.ColumnsFlag[colInfo.ID] 278 if idxInfo.Primary { 279 flag.SetIsPrimaryKey() 280 } else if idxInfo.Unique { 281 flag.SetIsUniqueKey() 282 } 283 if len(idxInfo.Columns) > 1 { 284 flag.SetIsMultipleKey() 285 } 286 if idxInfo.ID == ti.HandleIndexID && ti.HandleIndexID >= 0 { 287 flag.SetIsHandleKey() 288 } 289 ti.ColumnsFlag[colInfo.ID] = flag 290 } 291 } 292 } 293 294 // GetColumnInfo returns the column info by ID 295 func (ti *TableInfo) GetColumnInfo(colID int64) (info *model.ColumnInfo, exist bool) { 296 colOffset, exist := ti.columnsOffset[colID] 297 if !exist { 298 return nil, false 299 } 300 return ti.Columns[colOffset], true 301 } 302 303 // ForceGetColumnInfo return the column info by ID 304 // Caller must ensure `colID` exists 305 func (ti *TableInfo) ForceGetColumnInfo(colID int64) *model.ColumnInfo { 306 colInfo, ok := ti.GetColumnInfo(colID) 307 if !ok { 308 log.Panic("invalid column id", zap.Int64("columnID", colID)) 309 } 310 return colInfo 311 } 312 313 // ForceGetColumnFlagType return the column flag type by ID 314 // Caller must ensure `colID` exists 315 func (ti *TableInfo) ForceGetColumnFlagType(colID int64) *ColumnFlagType { 316 flag, ok := ti.ColumnsFlag[colID] 317 if !ok { 318 log.Panic("invalid column id", zap.Int64("columnID", colID)) 319 } 320 return flag 321 } 322 323 // ForceGetColumnName return the column name by ID 324 // Caller must ensure `colID` exists 325 func (ti *TableInfo) ForceGetColumnName(colID int64) string { 326 return ti.ForceGetColumnInfo(colID).Name.O 327 } 328 329 // ForceGetColumnIDByName return column ID by column name 330 // Caller must ensure `colID` exists 331 func (ti *TableInfo) ForceGetColumnIDByName(name string) int64 { 332 colID, ok := ti.nameToColID[name] 333 if !ok { 334 log.Panic("invalid column name", zap.String("column", name)) 335 } 336 return colID 337 } 338 339 // GetSchemaName returns the schema name of the table 340 func (ti *TableInfo) GetSchemaName() string { 341 return ti.TableName.Schema 342 } 343 344 // GetTableName returns the table name of the table 345 func (ti *TableInfo) GetTableName() string { 346 return ti.TableName.Table 347 } 348 349 // GetSchemaNamePtr returns the pointer to the schema name of the table 350 func (ti *TableInfo) GetSchemaNamePtr() *string { 351 return &ti.TableName.Schema 352 } 353 354 // GetTableNamePtr returns the pointer to the table name of the table 355 func (ti *TableInfo) GetTableNamePtr() *string { 356 return &ti.TableName.Table 357 } 358 359 // IsPartitionTable returns whether the table is partition table 360 func (ti *TableInfo) IsPartitionTable() bool { 361 return ti.TableName.IsPartition 362 } 363 364 func (ti *TableInfo) String() string { 365 return fmt.Sprintf("TableInfo, ID: %d, Name:%s, ColNum: %d, IdxNum: %d, PKIsHandle: %t", ti.ID, ti.TableName, len(ti.Columns), len(ti.Indices), ti.PKIsHandle) 366 } 367 368 // GetRowColInfos returns all column infos for rowcodec 369 func (ti *TableInfo) GetRowColInfos() ([]int64, map[int64]*types.FieldType, []rowcodec.ColInfo) { 370 return ti.handleColID, ti.rowColFieldTps, ti.rowColInfos 371 } 372 373 // GetColInfosForRowChangedEvent return column infos for non-virtual columns 374 // The column order in the result is the same as the order in its corresponding RowChangedEvent 375 func (ti *TableInfo) GetColInfosForRowChangedEvent() []rowcodec.ColInfo { 376 return *ti.rowColInfosWithoutVirtualCols 377 } 378 379 // IsColCDCVisible returns whether the col is visible for CDC 380 func IsColCDCVisible(col *model.ColumnInfo) bool { 381 // this column is a virtual generated column 382 if col.IsGenerated() && !col.GeneratedStored { 383 return false 384 } 385 return true 386 } 387 388 // HasUniqueColumn returns whether the table has a unique column 389 func (ti *TableInfo) HasUniqueColumn() bool { 390 return ti.hasUniqueColumn 391 } 392 393 // HasVirtualColumns returns whether the table has virtual columns 394 func (ti *TableInfo) HasVirtualColumns() bool { 395 return ti.virtualColumnCount > 0 396 } 397 398 // IsEligible returns whether the table is a eligible table 399 func (ti *TableInfo) IsEligible(forceReplicate bool) bool { 400 // Sequence is not supported yet, TiCDC needs to filter all sequence tables. 401 // See https://github.com/pingcap/tiflow/issues/4559 402 if ti.IsSequence() { 403 return false 404 } 405 if forceReplicate { 406 return true 407 } 408 if ti.IsView() { 409 return true 410 } 411 return ti.HasUniqueColumn() 412 } 413 414 // IsIndexUnique returns whether the index is unique 415 func (ti *TableInfo) IsIndexUnique(indexInfo *model.IndexInfo) bool { 416 if indexInfo.Primary { 417 return true 418 } 419 if indexInfo.Unique { 420 for _, col := range indexInfo.Columns { 421 colInfo := ti.Columns[col.Offset] 422 if !mysql.HasNotNullFlag(colInfo.GetFlag()) { 423 return false 424 } 425 // this column is a virtual generated column 426 if colInfo.IsGenerated() && !colInfo.GeneratedStored { 427 return false 428 } 429 } 430 return true 431 } 432 return false 433 } 434 435 // Clone clones the TableInfo 436 func (ti *TableInfo) Clone() *TableInfo { 437 return WrapTableInfo(ti.SchemaID, ti.TableName.Schema, ti.Version, ti.TableInfo.Clone()) 438 } 439 440 // GetIndex return the corresponding index by the given name. 441 func (ti *TableInfo) GetIndex(name string) *model.IndexInfo { 442 for _, index := range ti.Indices { 443 if index != nil && index.Name.O == name { 444 return index 445 } 446 } 447 return nil 448 } 449 450 // IndexByName returns the index columns and offsets of the corresponding index by name 451 func (ti *TableInfo) IndexByName(name string) ([]string, []int, bool) { 452 index := ti.GetIndex(name) 453 if index == nil { 454 return nil, nil, false 455 } 456 names := make([]string, 0, len(index.Columns)) 457 offset := make([]int, 0, len(index.Columns)) 458 for _, col := range index.Columns { 459 names = append(names, col.Name.O) 460 offset = append(offset, col.Offset) 461 } 462 return names, offset, true 463 } 464 465 // OffsetsByNames returns the column offsets of the corresponding columns by names 466 // If any column does not exist, return false 467 func (ti *TableInfo) OffsetsByNames(names []string) ([]int, bool) { 468 // todo: optimize it 469 columnOffsets := make(map[string]int, len(ti.Columns)) 470 for _, col := range ti.Columns { 471 if col != nil { 472 columnOffsets[col.Name.O] = col.Offset 473 } 474 } 475 476 result := make([]int, 0, len(names)) 477 for _, col := range names { 478 offset, ok := columnOffsets[col] 479 if !ok { 480 return nil, false 481 } 482 result = append(result, offset) 483 } 484 485 return result, true 486 } 487 488 // GetPrimaryKeyColumnNames returns the primary key column names 489 func (ti *TableInfo) GetPrimaryKeyColumnNames() []string { 490 var result []string 491 if ti.PKIsHandle { 492 result = append(result, ti.GetPkColInfo().Name.O) 493 return result 494 } 495 496 indexInfo := ti.GetPrimaryKey() 497 if indexInfo != nil { 498 for _, col := range indexInfo.Columns { 499 result = append(result, col.Name.O) 500 } 501 } 502 return result 503 } 504 505 // GetColumnDefaultValue returns the default definition of a column. 506 func GetColumnDefaultValue(col *model.ColumnInfo) interface{} { 507 defaultValue := col.GetDefaultValue() 508 if defaultValue == nil { 509 defaultValue = col.GetOriginDefaultValue() 510 } 511 defaultDatum := datumTypes.NewDatum(defaultValue) 512 return defaultDatum.GetValue() 513 }