github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/model/sink.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package model 15 16 import ( 17 "fmt" 18 "sort" 19 "strconv" 20 "strings" 21 "sync/atomic" 22 "unsafe" 23 24 "github.com/pingcap/log" 25 "github.com/pingcap/tidb/pkg/kv" 26 "github.com/pingcap/tidb/pkg/parser/model" 27 "github.com/pingcap/tidb/pkg/parser/mysql" 28 "github.com/pingcap/tidb/pkg/util/rowcodec" 29 "github.com/pingcap/tiflow/pkg/errors" 30 "github.com/pingcap/tiflow/pkg/integrity" 31 "github.com/pingcap/tiflow/pkg/quotes" 32 "github.com/pingcap/tiflow/pkg/sink" 33 "github.com/pingcap/tiflow/pkg/util" 34 "go.uber.org/zap" 35 ) 36 37 //go:generate msgp 38 39 // MessageType is the type of message, which is used by MqSink and RedoLog. 40 type MessageType int 41 42 const ( 43 // MessageTypeUnknown is unknown type of message key 44 MessageTypeUnknown MessageType = iota 45 // MessageTypeRow is row type of message key 46 MessageTypeRow 47 // MessageTypeDDL is ddl type of message key 48 MessageTypeDDL 49 // MessageTypeResolved is resolved type of message key 50 MessageTypeResolved 51 ) 52 53 const ( 54 // the RowChangedEvent order in the same transaction 55 typeDelete = iota + 1 56 typeUpdate 57 typeInsert 58 ) 59 60 // ColumnFlagType is for encapsulating the flag operations for different flags. 61 type ColumnFlagType util.Flag 62 63 const ( 64 // BinaryFlag means the column charset is binary 65 BinaryFlag ColumnFlagType = 1 << ColumnFlagType(iota) 66 // HandleKeyFlag means the column is selected as the handle key 67 // The handleKey is chosen by the following rules in the order: 68 // 1. if the table has primary key, it's the handle key. 69 // 2. If the table has not null unique key, it's the handle key. 70 // 3. If the table has no primary key and no not null unique key, it has no handleKey. 71 HandleKeyFlag 72 // GeneratedColumnFlag means the column is a generated column 73 GeneratedColumnFlag 74 // PrimaryKeyFlag means the column is primary key 75 PrimaryKeyFlag 76 // UniqueKeyFlag means the column is unique key 77 UniqueKeyFlag 78 // MultipleKeyFlag means the column is multiple key 79 MultipleKeyFlag 80 // NullableFlag means the column is nullable 81 NullableFlag 82 // UnsignedFlag means the column stores an unsigned integer 83 UnsignedFlag 84 ) 85 86 // SetIsBinary sets BinaryFlag 87 func (b *ColumnFlagType) SetIsBinary() { 88 (*util.Flag)(b).Add(util.Flag(BinaryFlag)) 89 } 90 91 // UnsetIsBinary unsets BinaryFlag 92 func (b *ColumnFlagType) UnsetIsBinary() { 93 (*util.Flag)(b).Remove(util.Flag(BinaryFlag)) 94 } 95 96 // IsBinary shows whether BinaryFlag is set 97 func (b *ColumnFlagType) IsBinary() bool { 98 return (*util.Flag)(b).HasAll(util.Flag(BinaryFlag)) 99 } 100 101 // SetIsHandleKey sets HandleKey 102 func (b *ColumnFlagType) SetIsHandleKey() { 103 (*util.Flag)(b).Add(util.Flag(HandleKeyFlag)) 104 } 105 106 // UnsetIsHandleKey unsets HandleKey 107 func (b *ColumnFlagType) UnsetIsHandleKey() { 108 (*util.Flag)(b).Remove(util.Flag(HandleKeyFlag)) 109 } 110 111 // IsHandleKey shows whether HandleKey is set 112 func (b *ColumnFlagType) IsHandleKey() bool { 113 return (*util.Flag)(b).HasAll(util.Flag(HandleKeyFlag)) 114 } 115 116 // SetIsGeneratedColumn sets GeneratedColumn 117 func (b *ColumnFlagType) SetIsGeneratedColumn() { 118 (*util.Flag)(b).Add(util.Flag(GeneratedColumnFlag)) 119 } 120 121 // UnsetIsGeneratedColumn unsets GeneratedColumn 122 func (b *ColumnFlagType) UnsetIsGeneratedColumn() { 123 (*util.Flag)(b).Remove(util.Flag(GeneratedColumnFlag)) 124 } 125 126 // IsGeneratedColumn shows whether GeneratedColumn is set 127 func (b *ColumnFlagType) IsGeneratedColumn() bool { 128 return (*util.Flag)(b).HasAll(util.Flag(GeneratedColumnFlag)) 129 } 130 131 // SetIsPrimaryKey sets PrimaryKeyFlag 132 func (b *ColumnFlagType) SetIsPrimaryKey() { 133 (*util.Flag)(b).Add(util.Flag(PrimaryKeyFlag)) 134 } 135 136 // UnsetIsPrimaryKey unsets PrimaryKeyFlag 137 func (b *ColumnFlagType) UnsetIsPrimaryKey() { 138 (*util.Flag)(b).Remove(util.Flag(PrimaryKeyFlag)) 139 } 140 141 // IsPrimaryKey shows whether PrimaryKeyFlag is set 142 func (b *ColumnFlagType) IsPrimaryKey() bool { 143 return (*util.Flag)(b).HasAll(util.Flag(PrimaryKeyFlag)) 144 } 145 146 // SetIsUniqueKey sets UniqueKeyFlag 147 func (b *ColumnFlagType) SetIsUniqueKey() { 148 (*util.Flag)(b).Add(util.Flag(UniqueKeyFlag)) 149 } 150 151 // UnsetIsUniqueKey unsets UniqueKeyFlag 152 func (b *ColumnFlagType) UnsetIsUniqueKey() { 153 (*util.Flag)(b).Remove(util.Flag(UniqueKeyFlag)) 154 } 155 156 // IsUniqueKey shows whether UniqueKeyFlag is set 157 func (b *ColumnFlagType) IsUniqueKey() bool { 158 return (*util.Flag)(b).HasAll(util.Flag(UniqueKeyFlag)) 159 } 160 161 // IsMultipleKey shows whether MultipleKeyFlag is set 162 func (b *ColumnFlagType) IsMultipleKey() bool { 163 return (*util.Flag)(b).HasAll(util.Flag(MultipleKeyFlag)) 164 } 165 166 // SetIsMultipleKey sets MultipleKeyFlag 167 func (b *ColumnFlagType) SetIsMultipleKey() { 168 (*util.Flag)(b).Add(util.Flag(MultipleKeyFlag)) 169 } 170 171 // UnsetIsMultipleKey unsets MultipleKeyFlag 172 func (b *ColumnFlagType) UnsetIsMultipleKey() { 173 (*util.Flag)(b).Remove(util.Flag(MultipleKeyFlag)) 174 } 175 176 // IsNullable shows whether NullableFlag is set 177 func (b *ColumnFlagType) IsNullable() bool { 178 return (*util.Flag)(b).HasAll(util.Flag(NullableFlag)) 179 } 180 181 // SetIsNullable sets NullableFlag 182 func (b *ColumnFlagType) SetIsNullable() { 183 (*util.Flag)(b).Add(util.Flag(NullableFlag)) 184 } 185 186 // UnsetIsNullable unsets NullableFlag 187 func (b *ColumnFlagType) UnsetIsNullable() { 188 (*util.Flag)(b).Remove(util.Flag(NullableFlag)) 189 } 190 191 // IsUnsigned shows whether UnsignedFlag is set 192 func (b *ColumnFlagType) IsUnsigned() bool { 193 return (*util.Flag)(b).HasAll(util.Flag(UnsignedFlag)) 194 } 195 196 // SetIsUnsigned sets UnsignedFlag 197 func (b *ColumnFlagType) SetIsUnsigned() { 198 (*util.Flag)(b).Add(util.Flag(UnsignedFlag)) 199 } 200 201 // UnsetIsUnsigned unsets UnsignedFlag 202 func (b *ColumnFlagType) UnsetIsUnsigned() { 203 (*util.Flag)(b).Remove(util.Flag(UnsignedFlag)) 204 } 205 206 // TableName represents name of a table, includes table name and schema name. 207 type TableName struct { 208 Schema string `toml:"db-name" msg:"db-name"` 209 Table string `toml:"tbl-name" msg:"tbl-name"` 210 TableID int64 `toml:"tbl-id" msg:"tbl-id"` 211 IsPartition bool `toml:"is-partition" msg:"is-partition"` 212 } 213 214 // String implements fmt.Stringer interface. 215 func (t TableName) String() string { 216 return fmt.Sprintf("%s.%s", t.Schema, t.Table) 217 } 218 219 // QuoteString returns quoted full table name 220 func (t TableName) QuoteString() string { 221 return quotes.QuoteSchema(t.Schema, t.Table) 222 } 223 224 // GetSchema returns schema name. 225 func (t *TableName) GetSchema() string { 226 return t.Schema 227 } 228 229 // GetTable returns table name. 230 func (t *TableName) GetTable() string { 231 return t.Table 232 } 233 234 // GetTableID returns table ID. 235 func (t *TableName) GetTableID() int64 { 236 return t.TableID 237 } 238 239 // RedoLogType is the type of log 240 type RedoLogType int 241 242 const ( 243 // RedoLogTypeUnknown is unknown type of log 244 RedoLogTypeUnknown RedoLogType = iota 245 // RedoLogTypeRow is row type of log 246 RedoLogTypeRow 247 // RedoLogTypeDDL is ddl type of log 248 RedoLogTypeDDL 249 ) 250 251 // RedoLog defines the persistent structure of redo log 252 // since MsgPack do not support types that are defined in another package, 253 // more info https://github.com/tinylib/msgp/issues/158, https://github.com/tinylib/msgp/issues/149 254 // so define a RedoColumn, RedoDDLEvent instead of using the Column, DDLEvent 255 type RedoLog struct { 256 RedoRow RedoRowChangedEvent `msg:"row"` 257 RedoDDL RedoDDLEvent `msg:"ddl"` 258 Type RedoLogType `msg:"type"` 259 } 260 261 // GetCommitTs returns the commit ts of the redo log. 262 func (r *RedoLog) GetCommitTs() Ts { 263 switch r.Type { 264 case RedoLogTypeRow: 265 return r.RedoRow.Row.CommitTs 266 case RedoLogTypeDDL: 267 return r.RedoDDL.DDL.CommitTs 268 default: 269 log.Panic("invalid redo log type", zap.Any("type", r.Type)) 270 } 271 return 0 272 } 273 274 // TrySplitAndSortUpdateEvent redo log do nothing 275 func (r *RedoLog) TrySplitAndSortUpdateEvent(_ string) error { 276 return nil 277 } 278 279 // RedoRowChangedEvent represents the DML event used in RedoLog 280 type RedoRowChangedEvent struct { 281 Row *RowChangedEventInRedoLog `msg:"row"` 282 Columns []RedoColumn `msg:"columns"` 283 PreColumns []RedoColumn `msg:"pre-columns"` 284 } 285 286 // RedoDDLEvent represents DDL event used in redo log persistent 287 type RedoDDLEvent struct { 288 DDL *DDLEvent `msg:"ddl"` 289 Type byte `msg:"type"` 290 TableName TableName `msg:"table-name"` 291 } 292 293 // ToRedoLog converts row changed event to redo log 294 func (r *RowChangedEvent) ToRedoLog() *RedoLog { 295 rowInRedoLog := &RowChangedEventInRedoLog{ 296 StartTs: r.StartTs, 297 CommitTs: r.CommitTs, 298 Table: &TableName{ 299 Schema: r.TableInfo.GetSchemaName(), 300 Table: r.TableInfo.GetTableName(), 301 TableID: r.PhysicalTableID, 302 IsPartition: r.TableInfo.IsPartitionTable(), 303 }, 304 Columns: r.GetColumns(), 305 PreColumns: r.GetPreColumns(), 306 IndexColumns: r.TableInfo.IndexColumnsOffset, 307 } 308 return &RedoLog{ 309 RedoRow: RedoRowChangedEvent{Row: rowInRedoLog}, 310 Type: RedoLogTypeRow, 311 } 312 } 313 314 // ToRedoLog converts ddl event to redo log 315 func (d *DDLEvent) ToRedoLog() *RedoLog { 316 return &RedoLog{ 317 RedoDDL: RedoDDLEvent{DDL: d}, 318 Type: RedoLogTypeDDL, 319 } 320 } 321 322 // RowChangedEvent represents a row changed event 323 // 324 //msgp:ignore RowChangedEvent 325 type RowChangedEvent struct { 326 StartTs uint64 327 CommitTs uint64 328 329 RowID int64 // Deprecated. It is empty when the RowID comes from clustered index table. 330 331 PhysicalTableID int64 332 333 // NOTICE: We probably store the logical ID inside TableInfo's TableName, 334 // not the physical ID. 335 // For normal table, there is only one ID, which is the physical ID. 336 // AKA TIDB_TABLE_ID. 337 // For partitioned table, there are two kinds of ID: 338 // 1. TIDB_PARTITION_ID is the physical ID of the partition. 339 // 2. TIDB_TABLE_ID is the logical ID of the table. 340 // In general, we always use the physical ID to represent a table, but we 341 // record the logical ID from the DDL event(job.BinlogInfo.TableInfo). 342 // So be careful when using the TableInfo. 343 TableInfo *TableInfo 344 345 Columns []*ColumnData 346 PreColumns []*ColumnData 347 348 // Checksum for the event, only not nil if the upstream TiDB enable the row level checksum 349 // and TiCDC set the integrity check level to the correctness. 350 Checksum *integrity.Checksum 351 352 // ApproximateDataSize is the approximate size of protobuf binary 353 // representation of this event. 354 ApproximateDataSize int64 355 356 // SplitTxn marks this RowChangedEvent as the first line of a new txn. 357 SplitTxn bool 358 // ReplicatingTs is ts when a table starts replicating events to downstream. 359 ReplicatingTs Ts 360 // HandleKey is the key of the row changed event. 361 // It can be used to identify the row changed event. 362 // It can be one of three : common_handle, int_handle or _tidb_rowid based on the table definitions 363 // 1. primary key is the clustered index, and key is not int type, then we use `CommonHandle` 364 // 2. primary key is int type(including different types of int, such as bigint, TINYINT), then we use IntHandle 365 // 3. when the table doesn't have the primary key and clustered index, 366 // tidb will make a hidden column called "_tidb_rowid" as the handle. 367 // due to the type of "_tidb_rowid" is int, so we also use IntHandle to represent. 368 HandleKey kv.Handle 369 } 370 371 // RowChangedEventInRedoLog is used to store RowChangedEvent in redo log v2 format 372 type RowChangedEventInRedoLog struct { 373 StartTs uint64 `msg:"start-ts"` 374 CommitTs uint64 `msg:"commit-ts"` 375 376 // Table contains the table name and table ID. 377 // NOTICE: We store the physical table ID here, not the logical table ID. 378 Table *TableName `msg:"table"` 379 380 Columns []*Column `msg:"columns"` 381 PreColumns []*Column `msg:"pre-columns"` 382 IndexColumns [][]int `msg:"index-columns"` 383 } 384 385 // ToRowChangedEvent converts RowChangedEventInRedoLog to RowChangedEvent 386 func (r *RowChangedEventInRedoLog) ToRowChangedEvent() *RowChangedEvent { 387 cols := r.Columns 388 if cols == nil { 389 cols = r.PreColumns 390 } 391 tableInfo := BuildTableInfo( 392 r.Table.Schema, 393 r.Table.Table, 394 cols, 395 r.IndexColumns) 396 tableInfo.TableName.TableID = r.Table.TableID 397 tableInfo.TableName.IsPartition = r.Table.IsPartition 398 row := &RowChangedEvent{ 399 StartTs: r.StartTs, 400 CommitTs: r.CommitTs, 401 PhysicalTableID: r.Table.TableID, 402 TableInfo: tableInfo, 403 Columns: Columns2ColumnDatas(r.Columns, tableInfo), 404 PreColumns: Columns2ColumnDatas(r.PreColumns, tableInfo), 405 } 406 return row 407 } 408 409 // txnRows represents a set of events that belong to the same transaction. 410 type txnRows []*RowChangedEvent 411 412 // Len is the number of elements in the collection. 413 func (e txnRows) Len() int { 414 return len(e) 415 } 416 417 // Less sort the events base on the order of event type delete<update<insert 418 func (e txnRows) Less(i, j int) bool { 419 return getDMLOrder(e[i]) < getDMLOrder(e[j]) 420 } 421 422 // getDMLOrder returns the order of the dml types: delete<update<insert 423 func getDMLOrder(event *RowChangedEvent) int { 424 if event.IsDelete() { 425 return typeDelete 426 } else if event.IsUpdate() { 427 return typeUpdate 428 } 429 return typeInsert 430 } 431 432 func (e txnRows) Swap(i, j int) { 433 e[i], e[j] = e[j], e[i] 434 } 435 436 // GetCommitTs returns the commit timestamp of this event. 437 func (r *RowChangedEvent) GetCommitTs() uint64 { 438 return r.CommitTs 439 } 440 441 // TrySplitAndSortUpdateEvent do nothing 442 func (r *RowChangedEvent) TrySplitAndSortUpdateEvent(_ string) error { 443 return nil 444 } 445 446 // IsDelete returns true if the row is a delete event 447 func (r *RowChangedEvent) IsDelete() bool { 448 return len(r.PreColumns) != 0 && len(r.Columns) == 0 449 } 450 451 // IsInsert returns true if the row is an insert event 452 func (r *RowChangedEvent) IsInsert() bool { 453 return len(r.PreColumns) == 0 && len(r.Columns) != 0 454 } 455 456 // IsUpdate returns true if the row is an update event 457 func (r *RowChangedEvent) IsUpdate() bool { 458 return len(r.PreColumns) != 0 && len(r.Columns) != 0 459 } 460 461 func columnData2Column(col *ColumnData, tableInfo *TableInfo) *Column { 462 colID := col.ColumnID 463 offset, ok := tableInfo.columnsOffset[colID] 464 if !ok { 465 log.Panic("invalid column id", 466 zap.Int64("columnID", colID), 467 zap.Any("tableInfo", tableInfo)) 468 } 469 colInfo := tableInfo.Columns[offset] 470 return &Column{ 471 Name: colInfo.Name.O, 472 Type: colInfo.GetType(), 473 Charset: colInfo.GetCharset(), 474 Collation: colInfo.GetCollate(), 475 Flag: *tableInfo.ColumnsFlag[colID], 476 Value: col.Value, 477 Default: GetColumnDefaultValue(colInfo), 478 } 479 } 480 481 func columnDatas2Columns(cols []*ColumnData, tableInfo *TableInfo) []*Column { 482 if cols == nil { 483 return nil 484 } 485 columns := make([]*Column, len(cols)) 486 for i, colData := range cols { 487 if colData == nil { 488 log.Warn("meet nil column data, should not happened in production env", 489 zap.Any("cols", cols), 490 zap.Any("tableInfo", tableInfo)) 491 continue 492 } 493 columns[i] = columnData2Column(colData, tableInfo) 494 } 495 return columns 496 } 497 498 // GetColumns returns the columns of the event 499 func (r *RowChangedEvent) GetColumns() []*Column { 500 return columnDatas2Columns(r.Columns, r.TableInfo) 501 } 502 503 // GetPreColumns returns the pre columns of the event 504 func (r *RowChangedEvent) GetPreColumns() []*Column { 505 return columnDatas2Columns(r.PreColumns, r.TableInfo) 506 } 507 508 // PrimaryKeyColumnNames return all primary key's name 509 func (r *RowChangedEvent) PrimaryKeyColumnNames() []string { 510 var result []string 511 512 var cols []*ColumnData 513 if r.IsDelete() { 514 cols = r.PreColumns 515 } else { 516 cols = r.Columns 517 } 518 519 result = make([]string, 0) 520 tableInfo := r.TableInfo 521 for _, col := range cols { 522 if col != nil && tableInfo.ForceGetColumnFlagType(col.ColumnID).IsPrimaryKey() { 523 result = append(result, tableInfo.ForceGetColumnName(col.ColumnID)) 524 } 525 } 526 return result 527 } 528 529 // GetHandleKeyColumnValues returns all handle key's column values 530 func (r *RowChangedEvent) GetHandleKeyColumnValues() []string { 531 var result []string 532 533 var cols []*ColumnData 534 if r.IsDelete() { 535 cols = r.PreColumns 536 } else { 537 cols = r.Columns 538 } 539 540 result = make([]string, 0) 541 tableInfo := r.TableInfo 542 for _, col := range cols { 543 if col != nil && tableInfo.ForceGetColumnFlagType(col.ColumnID).IsHandleKey() { 544 result = append(result, ColumnValueString(col.Value)) 545 } 546 } 547 return result 548 } 549 550 // HandleKeyColInfos returns the column(s) and colInfo(s) corresponding to the handle key(s) 551 func (r *RowChangedEvent) HandleKeyColInfos() ([]*Column, []rowcodec.ColInfo) { 552 pkeyCols := make([]*Column, 0) 553 pkeyColInfos := make([]rowcodec.ColInfo, 0) 554 555 var cols []*ColumnData 556 if r.IsDelete() { 557 cols = r.PreColumns 558 } else { 559 cols = r.Columns 560 } 561 562 tableInfo := r.TableInfo 563 colInfos := tableInfo.GetColInfosForRowChangedEvent() 564 for i, col := range cols { 565 if col != nil && tableInfo.ForceGetColumnFlagType(col.ColumnID).IsHandleKey() { 566 pkeyCols = append(pkeyCols, columnData2Column(col, tableInfo)) 567 pkeyColInfos = append(pkeyColInfos, colInfos[i]) 568 } 569 } 570 571 // It is okay not to have handle keys, so the empty array is an acceptable result 572 return pkeyCols, pkeyColInfos 573 } 574 575 // ApproximateBytes returns approximate bytes in memory consumed by the event. 576 func (r *RowChangedEvent) ApproximateBytes() int { 577 const sizeOfRowEvent = int(unsafe.Sizeof(*r)) 578 579 size := 0 580 // Size of cols 581 for i := range r.Columns { 582 size += r.Columns[i].ApproximateBytes 583 } 584 // Size of pre cols 585 for i := range r.PreColumns { 586 if r.PreColumns[i] != nil { 587 size += r.PreColumns[i].ApproximateBytes 588 } 589 } 590 // Size of an empty row event 591 size += sizeOfRowEvent 592 return size 593 } 594 595 // Columns2ColumnDatas convert `Column`s to `ColumnData`s 596 func Columns2ColumnDatas(cols []*Column, tableInfo *TableInfo) []*ColumnData { 597 if cols == nil { 598 return nil 599 } 600 columns := make([]*ColumnData, len(cols)) 601 for i, col := range cols { 602 if col == nil { 603 continue 604 } 605 colID := tableInfo.ForceGetColumnIDByName(col.Name) 606 columns[i] = &ColumnData{ 607 ColumnID: colID, 608 Value: col.Value, 609 } 610 } 611 return columns 612 } 613 614 // Column represents a column value and its schema info 615 type Column struct { 616 Name string `msg:"name"` 617 Type byte `msg:"type"` 618 Charset string `msg:"charset"` 619 Collation string `msg:"collation"` 620 Flag ColumnFlagType `msg:"-"` 621 Value interface{} `msg:"-"` 622 Default interface{} `msg:"-"` 623 624 // ApproximateBytes is approximate bytes consumed by the column. 625 ApproximateBytes int `msg:"-"` 626 } 627 628 // ColumnData represents a column value in row changed event 629 type ColumnData struct { 630 // ColumnID may be just a mock id, because we don't store it in redo log. 631 // So after restore from redo log, we need to give every a column a mock id. 632 // The only guarantee is that the column id is unique in a RowChangedEvent 633 ColumnID int64 `json:"column_id" msg:"column_id"` 634 Value interface{} `json:"value" msg:"-"` 635 636 // ApproximateBytes is approximate bytes consumed by the column. 637 ApproximateBytes int `json:"-" msg:"-"` 638 } 639 640 // RedoColumn stores Column change 641 type RedoColumn struct { 642 // Fields from Column and can't be marshaled directly in Column. 643 Value interface{} `msg:"column"` 644 // msgp transforms empty byte slice into nil, PTAL msgp#247. 645 ValueIsEmptyBytes bool `msg:"value-is-empty-bytes"` 646 Flag uint64 `msg:"flag"` 647 } 648 649 // ColumnIDAllocator represents the interface to allocate column id for tableInfo 650 type ColumnIDAllocator interface { 651 // GetColumnID return the column id according to the column name 652 GetColumnID(name string) int64 653 } 654 655 // IncrementalColumnIDAllocator allocates column id in an incremental way. 656 // At most of the time, it is the default implementation when you don't care the column id's concrete value. 657 // 658 //msgp:ignore IncrementalColumnIDAllocator 659 type IncrementalColumnIDAllocator struct { 660 nextColID int64 661 } 662 663 // NewIncrementalColumnIDAllocator creates a new IncrementalColumnIDAllocator 664 func NewIncrementalColumnIDAllocator() *IncrementalColumnIDAllocator { 665 return &IncrementalColumnIDAllocator{ 666 nextColID: 100, // 100 is an arbitrary number 667 } 668 } 669 670 // GetColumnID return the next mock column id 671 func (d *IncrementalColumnIDAllocator) GetColumnID(name string) int64 { 672 result := d.nextColID 673 d.nextColID += 1 674 return result 675 } 676 677 // NameBasedColumnIDAllocator allocates column id using an prefined map from column name to id 678 // 679 //msgp:ignore NameBasedColumnIDAllocator 680 type NameBasedColumnIDAllocator struct { 681 nameToIDMap map[string]int64 682 } 683 684 // NewNameBasedColumnIDAllocator creates a new NameBasedColumnIDAllocator 685 func NewNameBasedColumnIDAllocator(nameToIDMap map[string]int64) *NameBasedColumnIDAllocator { 686 return &NameBasedColumnIDAllocator{ 687 nameToIDMap: nameToIDMap, 688 } 689 } 690 691 // GetColumnID return the column id of the name 692 func (n *NameBasedColumnIDAllocator) GetColumnID(name string) int64 { 693 colID, ok := n.nameToIDMap[name] 694 if !ok { 695 log.Panic("column not found", 696 zap.String("name", name), 697 zap.Any("nameToIDMap", n.nameToIDMap)) 698 } 699 return colID 700 } 701 702 // BuildTableInfo builds a table info from given information. 703 // Note that some fields of the result TableInfo may just be mocked. 704 // The only guarantee is that we can use the result to reconstrut the information in `Column`. 705 // The main use cases of this function it to build TableInfo from redo log and in tests. 706 func BuildTableInfo(schemaName, tableName string, columns []*Column, indexColumns [][]int) *TableInfo { 707 tidbTableInfo := BuildTiDBTableInfo(tableName, columns, indexColumns) 708 return WrapTableInfo(100 /* not used */, schemaName, 1000 /* not used */, tidbTableInfo) 709 } 710 711 // BuildTableInfoWithPKNames4Test builds a table info from given information. 712 func BuildTableInfoWithPKNames4Test(schemaName, tableName string, columns []*Column, pkNames map[string]struct{}) *TableInfo { 713 if len(pkNames) == 0 { 714 return BuildTableInfo(schemaName, tableName, columns, nil) 715 } 716 indexColumns := make([][]int, 1) 717 indexColumns[0] = make([]int, 0) 718 for i, col := range columns { 719 if _, ok := pkNames[col.Name]; ok { 720 indexColumns[0] = append(indexColumns[0], i) 721 col.Flag.SetIsHandleKey() 722 col.Flag.SetIsPrimaryKey() 723 } 724 } 725 if len(indexColumns[0]) != len(pkNames) { 726 log.Panic("cannot find all pks", 727 zap.Any("indexColumns", indexColumns), 728 zap.Any("pkNames", pkNames)) 729 } 730 return BuildTableInfo(schemaName, tableName, columns, indexColumns) 731 } 732 733 // AddExtraColumnInfo is used to add some extra column info to the table info. 734 // Just use it in test. 735 func AddExtraColumnInfo(tableInfo *model.TableInfo, extraColInfos []rowcodec.ColInfo) { 736 for i, colInfo := range extraColInfos { 737 tableInfo.Columns[i].SetElems(colInfo.Ft.GetElems()) 738 tableInfo.Columns[i].SetFlen(colInfo.Ft.GetFlen()) 739 } 740 } 741 742 // GetHandleAndUniqueIndexOffsets4Test is used to get the offsets of handle columns and other unique index columns in test 743 func GetHandleAndUniqueIndexOffsets4Test(cols []*Column) [][]int { 744 result := make([][]int, 0) 745 handleColumns := make([]int, 0) 746 for i, col := range cols { 747 if col.Flag.IsHandleKey() { 748 handleColumns = append(handleColumns, i) 749 } else if col.Flag.IsUniqueKey() { 750 // When there is a unique key which is not handle key, 751 // we cannot get the accurate index info for this key. 752 // So just be aggressive to make each unique column a unique index 753 // to make sure there is no write conflict when syncing data in tests. 754 result = append(result, []int{i}) 755 } 756 } 757 if len(handleColumns) != 0 { 758 result = append(result, handleColumns) 759 } 760 return result 761 } 762 763 // BuildTiDBTableInfoWithoutVirtualColumns build a TableInfo without virual columns from the source table info 764 func BuildTiDBTableInfoWithoutVirtualColumns(source *model.TableInfo) *model.TableInfo { 765 ret := source.Clone() 766 ret.Columns = make([]*model.ColumnInfo, 0, len(source.Columns)) 767 rowColumnsCurrentOffset := 0 768 columnsOffset := make(map[string]int, len(source.Columns)) 769 for _, srcCol := range source.Columns { 770 if !IsColCDCVisible(srcCol) { 771 continue 772 } 773 colInfo := srcCol.Clone() 774 colInfo.Offset = rowColumnsCurrentOffset 775 ret.Columns = append(ret.Columns, colInfo) 776 columnsOffset[colInfo.Name.O] = rowColumnsCurrentOffset 777 rowColumnsCurrentOffset += 1 778 } 779 // Keep all the index info even if it contains virtual columns for simplicity 780 for _, indexInfo := range ret.Indices { 781 for _, col := range indexInfo.Columns { 782 col.Offset = columnsOffset[col.Name.O] 783 } 784 } 785 786 return ret 787 } 788 789 // BuildTiDBTableInfo is a simple wrapper over BuildTiDBTableInfoImpl which create a default ColumnIDAllocator. 790 func BuildTiDBTableInfo(tableName string, columns []*Column, indexColumns [][]int) *model.TableInfo { 791 return BuildTiDBTableInfoImpl(tableName, columns, indexColumns, NewIncrementalColumnIDAllocator()) 792 } 793 794 // BuildTiDBTableInfoImpl builds a TiDB TableInfo from given information. 795 // Note the result TableInfo may not be same as the original TableInfo in tidb. 796 // The only guarantee is that you can restore the `Name`, `Type`, `Charset`, `Collation` 797 // and `Flag` field of `Column` using the result TableInfo. 798 // The precondition required for calling this function: 799 // 1. There must be at least one handle key in `columns`; 800 // 2. The handle key must either be a primary key or a non null unique key; 801 // 3. The index that is selected as the handle must be provided in `indexColumns`; 802 func BuildTiDBTableInfoImpl( 803 tableName string, 804 columns []*Column, 805 indexColumns [][]int, 806 columnIDAllocator ColumnIDAllocator, 807 ) *model.TableInfo { 808 ret := &model.TableInfo{} 809 ret.Name = model.NewCIStr(tableName) 810 811 hasPrimaryKeyColumn := false 812 for i, col := range columns { 813 columnInfo := &model.ColumnInfo{ 814 Offset: i, 815 State: model.StatePublic, 816 } 817 if col == nil { 818 // actually, col should never be nil according to `datum2Column` and `WrapTableInfo` in prod env 819 // we mock it as generated column just for test 820 columnInfo.Name = model.NewCIStr("omitted") 821 columnInfo.GeneratedExprString = "pass_generated_check" 822 columnInfo.GeneratedStored = false 823 ret.Columns = append(ret.Columns, columnInfo) 824 continue 825 } 826 // add a mock id to identify columns inside cdc 827 columnInfo.ID = columnIDAllocator.GetColumnID(col.Name) 828 columnInfo.Name = model.NewCIStr(col.Name) 829 columnInfo.SetType(col.Type) 830 831 if col.Collation != "" { 832 columnInfo.SetCollate(col.Collation) 833 } else { 834 // collation is not stored, give it a default value 835 columnInfo.SetCollate(mysql.UTF8MB4DefaultCollation) 836 } 837 838 // inverse initColumnsFlag 839 flag := col.Flag 840 if col.Charset != "" { 841 columnInfo.SetCharset(col.Charset) 842 } else if flag.IsBinary() { 843 columnInfo.SetCharset("binary") 844 } else { 845 // charset is not stored, give it a default value 846 columnInfo.SetCharset(mysql.UTF8MB4Charset) 847 } 848 if flag.IsGeneratedColumn() { 849 // we do not use this field, so we set it to any non-empty string 850 columnInfo.GeneratedExprString = "pass_generated_check" 851 columnInfo.GeneratedStored = true 852 } 853 if flag.IsPrimaryKey() { 854 columnInfo.AddFlag(mysql.PriKeyFlag) 855 hasPrimaryKeyColumn = true 856 if !flag.IsHandleKey() { 857 log.Panic("Primary key must be handle key", 858 zap.String("table", tableName), 859 zap.Any("columns", columns), 860 zap.Any("indexColumns", indexColumns)) 861 } 862 // just set it for test compatibility, 863 // actually we cannot deduce the value of IsCommonHandle from the provided args. 864 ret.IsCommonHandle = true 865 } 866 if flag.IsUniqueKey() { 867 columnInfo.AddFlag(mysql.UniqueKeyFlag) 868 } 869 if flag.IsHandleKey() { 870 if !flag.IsPrimaryKey() && !flag.IsUniqueKey() { 871 log.Panic("Handle key must either be primary key or unique key", 872 zap.String("table", tableName), 873 zap.Any("columns", columns), 874 zap.Any("indexColumns", indexColumns)) 875 } 876 } 877 if !flag.IsNullable() { 878 columnInfo.AddFlag(mysql.NotNullFlag) 879 } 880 if flag.IsMultipleKey() { 881 columnInfo.AddFlag(mysql.MultipleKeyFlag) 882 } 883 if flag.IsUnsigned() { 884 columnInfo.AddFlag(mysql.UnsignedFlag) 885 } 886 ret.Columns = append(ret.Columns, columnInfo) 887 } 888 889 hasPrimaryKeyIndex := false 890 hasHandleIndex := false 891 // TiCDC handles columns according to the following rules: 892 // 1. If a primary key (PK) exists, it is chosen. 893 // 2. If there is no PK, TiCDC looks for a not null unique key (UK) with the least number of columns and the smallest index ID. 894 // So we assign the smallest index id to the index which is selected as handle to mock this behavior. 895 minIndexID := int64(1) 896 nextMockIndexID := minIndexID + 1 897 for i, colOffsets := range indexColumns { 898 indexInfo := &model.IndexInfo{ 899 Name: model.NewCIStr(fmt.Sprintf("idx_%d", i)), 900 State: model.StatePublic, 901 } 902 firstCol := columns[colOffsets[0]] 903 if firstCol == nil { 904 // when the referenced column is nil, we already have a handle index 905 // so we can skip this index. 906 // only happens for DELETE event and old value feature is disabled 907 continue 908 } 909 if firstCol.Flag.IsPrimaryKey() { 910 indexInfo.Unique = true 911 } 912 if firstCol.Flag.IsUniqueKey() { 913 indexInfo.Unique = true 914 } 915 916 isPrimary := true 917 isAllColumnsHandle := true 918 for _, offset := range colOffsets { 919 col := columns[offset] 920 // When only all columns in the index are primary key, then the index is primary key. 921 if col == nil || !col.Flag.IsPrimaryKey() { 922 isPrimary = false 923 } 924 if col == nil || !col.Flag.IsHandleKey() { 925 isAllColumnsHandle = false 926 } 927 928 tiCol := ret.Columns[offset] 929 indexCol := &model.IndexColumn{} 930 indexCol.Name = tiCol.Name 931 indexCol.Offset = offset 932 indexInfo.Columns = append(indexInfo.Columns, indexCol) 933 indexInfo.Primary = isPrimary 934 } 935 hasPrimaryKeyIndex = hasPrimaryKeyIndex || isPrimary 936 if isAllColumnsHandle { 937 // If there is no primary index, only one index will contain columns which are all handles. 938 // If there is a primary index, the primary index must be the handle. 939 // And there may be another index which is a subset of the primary index. So we skip this check. 940 if hasHandleIndex && !hasPrimaryKeyColumn { 941 log.Panic("Multiple handle index found", 942 zap.String("table", tableName), 943 zap.Any("colOffsets", colOffsets), 944 zap.String("indexName", indexInfo.Name.O), 945 zap.Any("columns", columns), 946 zap.Any("indexColumns", indexColumns)) 947 } 948 hasHandleIndex = true 949 } 950 // If there is no primary column, we need allocate the min index id to the one selected as handle. 951 // In other cases, we don't care the concrete value of index id. 952 if isAllColumnsHandle && !hasPrimaryKeyColumn { 953 indexInfo.ID = minIndexID 954 } else { 955 indexInfo.ID = nextMockIndexID 956 nextMockIndexID += 1 957 } 958 959 // TODO: revert the "all column set index related flag" to "only the 960 // first column set index related flag" if needed 961 962 ret.Indices = append(ret.Indices, indexInfo) 963 } 964 if hasPrimaryKeyColumn != hasPrimaryKeyIndex { 965 log.Panic("Primary key column and primary key index is not consistent", 966 zap.String("table", tableName), 967 zap.Any("columns", columns), 968 zap.Any("indexColumns", indexColumns), 969 zap.Bool("hasPrimaryKeyColumn", hasPrimaryKeyColumn), 970 zap.Bool("hasPrimaryKeyIndex", hasPrimaryKeyIndex)) 971 } 972 return ret 973 } 974 975 // ColumnValueString returns the string representation of the column value 976 func ColumnValueString(c interface{}) string { 977 var data string 978 switch v := c.(type) { 979 case nil: 980 data = "null" 981 case bool: 982 if v { 983 data = "1" 984 } else { 985 data = "0" 986 } 987 case int: 988 data = strconv.FormatInt(int64(v), 10) 989 case int8: 990 data = strconv.FormatInt(int64(v), 10) 991 case int16: 992 data = strconv.FormatInt(int64(v), 10) 993 case int32: 994 data = strconv.FormatInt(int64(v), 10) 995 case int64: 996 data = strconv.FormatInt(v, 10) 997 case uint8: 998 data = strconv.FormatUint(uint64(v), 10) 999 case uint16: 1000 data = strconv.FormatUint(uint64(v), 10) 1001 case uint32: 1002 data = strconv.FormatUint(uint64(v), 10) 1003 case uint64: 1004 data = strconv.FormatUint(v, 10) 1005 case float32: 1006 data = strconv.FormatFloat(float64(v), 'f', -1, 32) 1007 case float64: 1008 data = strconv.FormatFloat(v, 'f', -1, 64) 1009 case string: 1010 data = v 1011 case []byte: 1012 data = string(v) 1013 default: 1014 data = fmt.Sprintf("%v", v) 1015 } 1016 return data 1017 } 1018 1019 // DDLEvent stores DDL event 1020 type DDLEvent struct { 1021 StartTs uint64 `msg:"start-ts"` 1022 CommitTs uint64 `msg:"commit-ts"` 1023 Query string `msg:"query"` 1024 TableInfo *TableInfo `msg:"-"` 1025 PreTableInfo *TableInfo `msg:"-"` 1026 Type model.ActionType `msg:"-"` 1027 Done atomic.Bool `msg:"-"` 1028 Charset string `msg:"-"` 1029 Collate string `msg:"-"` 1030 IsBootstrap bool `msg:"-"` 1031 // BDRRole is the role of the TiDB cluster, it is used to determine whether 1032 // the DDL is executed by the primary cluster. 1033 BDRRole string `msg:"-"` 1034 SQLMode mysql.SQLMode `msg:"-"` 1035 } 1036 1037 // FromJob fills the values with DDLEvent from DDL job 1038 func (d *DDLEvent) FromJob(job *model.Job, preTableInfo *TableInfo, tableInfo *TableInfo) { 1039 d.FromJobWithArgs(job, preTableInfo, tableInfo, "", "") 1040 } 1041 1042 // FromJobWithArgs fills the values with DDLEvent from DDL job 1043 func (d *DDLEvent) FromJobWithArgs( 1044 job *model.Job, 1045 preTableInfo, tableInfo *TableInfo, 1046 oldSchemaName, newSchemaName string, 1047 ) { 1048 d.StartTs = job.StartTS 1049 d.CommitTs = job.BinlogInfo.FinishedTS 1050 d.Type = job.Type 1051 d.PreTableInfo = preTableInfo 1052 d.TableInfo = tableInfo 1053 d.Charset = job.Charset 1054 d.Collate = job.Collate 1055 d.BDRRole = job.BDRRole 1056 d.SQLMode = job.SQLMode 1057 switch d.Type { 1058 // The query for "DROP TABLE" and "DROP VIEW" statements need 1059 // to be rebuilt. The reason is elaborated as follows: 1060 // for a DDL statement like "DROP TABLE test1.table1, test2.table2", 1061 // two DDL jobs will be generated. These two jobs can be differentiated 1062 // from job.BinlogInfo.TableInfo whereas the job.Query are identical. 1063 case model.ActionDropTable: 1064 d.Query = fmt.Sprintf("DROP TABLE `%s`.`%s`", 1065 d.TableInfo.TableName.Schema, d.TableInfo.TableName.Table) 1066 case model.ActionDropView: 1067 d.Query = fmt.Sprintf("DROP VIEW `%s`.`%s`", 1068 d.TableInfo.TableName.Schema, d.TableInfo.TableName.Table) 1069 case model.ActionRenameTables: 1070 oldTableName := preTableInfo.Name.O 1071 newTableName := tableInfo.Name.O 1072 d.Query = fmt.Sprintf("RENAME TABLE `%s`.`%s` TO `%s`.`%s`", 1073 oldSchemaName, oldTableName, newSchemaName, newTableName) 1074 // Note that type is ActionRenameTable, not ActionRenameTables. 1075 d.Type = model.ActionRenameTable 1076 case model.ActionExchangeTablePartition: 1077 // Parse idx of partition name from query. 1078 upperQuery := strings.ToUpper(job.Query) 1079 idx1 := strings.Index(upperQuery, "EXCHANGE PARTITION") + len("EXCHANGE PARTITION") 1080 idx2 := strings.Index(upperQuery, "WITH TABLE") 1081 1082 // Note that partition name should be parsed from original query, not the upperQuery. 1083 partName := strings.TrimSpace(job.Query[idx1:idx2]) 1084 // The tableInfo is the partition table, preTableInfo is non partition table. 1085 d.Query = fmt.Sprintf("ALTER TABLE `%s`.`%s` EXCHANGE PARTITION `%s` WITH TABLE `%s`.`%s`", 1086 tableInfo.TableName.Schema, tableInfo.TableName.Table, partName, 1087 preTableInfo.TableName.Schema, preTableInfo.TableName.Table) 1088 1089 if strings.HasSuffix(upperQuery, "WITHOUT VALIDATION") { 1090 d.Query += " WITHOUT VALIDATION" 1091 } 1092 default: 1093 d.Query = job.Query 1094 } 1095 } 1096 1097 // NewBootstrapDDLEvent returns a bootstrap DDL event. 1098 // We set Bootstrap DDL event's startTs and commitTs to 0. 1099 // Because it is generated by the TiCDC, not from the upstream TiDB. 1100 // And they ere useless for a bootstrap DDL event. 1101 func NewBootstrapDDLEvent(tableInfo *TableInfo) *DDLEvent { 1102 return &DDLEvent{ 1103 StartTs: 0, 1104 CommitTs: 0, 1105 TableInfo: tableInfo, 1106 IsBootstrap: true, 1107 } 1108 } 1109 1110 // SingleTableTxn represents a transaction which includes many row events in a single table 1111 // 1112 //msgp:ignore SingleTableTxn 1113 type SingleTableTxn struct { 1114 PhysicalTableID int64 1115 TableInfo *TableInfo 1116 // TableInfoVersion is the version of the table info, it is used to generate data path 1117 // in storage sink. Generally, TableInfoVersion equals to `SingleTableTxn.TableInfo.Version`. 1118 // Besides, if one table is just scheduled to a new processor, the TableInfoVersion should be 1119 // greater than or equal to the startTs of table sink. 1120 TableInfoVersion uint64 1121 1122 StartTs uint64 1123 CommitTs uint64 1124 Rows []*RowChangedEvent 1125 } 1126 1127 // GetCommitTs returns the commit timestamp of the transaction. 1128 func (t *SingleTableTxn) GetCommitTs() uint64 { 1129 return t.CommitTs 1130 } 1131 1132 // GetPhysicalTableID returns the physical table id of the table in the transaction 1133 func (t *SingleTableTxn) GetPhysicalTableID() int64 { 1134 return t.PhysicalTableID 1135 } 1136 1137 // TrySplitAndSortUpdateEvent split update events if unique key is updated 1138 func (t *SingleTableTxn) TrySplitAndSortUpdateEvent(scheme string) error { 1139 if !t.shouldSplitUpdateEvent(scheme) { 1140 return nil 1141 } 1142 newRows, err := trySplitAndSortUpdateEvent(t.Rows) 1143 if err != nil { 1144 return errors.Trace(err) 1145 } 1146 t.Rows = newRows 1147 return nil 1148 } 1149 1150 // Whether split a single update event into delete and insert events? 1151 // 1152 // For the MySQL Sink, we don't split any update event. 1153 // This may cause error like "duplicate entry" when sink to the downstream. 1154 // This kind of error will cause the changefeed to restart, 1155 // and then the related update rows will be splitted to insert and delete at puller side. 1156 // 1157 // For the Kafka and Storage sink, always split a single unique key changed update event, since: 1158 // 1. Avro and CSV does not output the previous column values for the update event, so it would 1159 // cause consumer missing data if the unique key changed event is not split. 1160 // 2. Index-Value Dispatcher cannot work correctly if the unique key changed event isn't split. 1161 func (t *SingleTableTxn) shouldSplitUpdateEvent(sinkScheme string) bool { 1162 return !sink.IsMySQLCompatibleScheme(sinkScheme) 1163 } 1164 1165 // trySplitAndSortUpdateEvent try to split update events if unique key is updated 1166 // returns true if some updated events is split 1167 func trySplitAndSortUpdateEvent( 1168 events []*RowChangedEvent, 1169 ) ([]*RowChangedEvent, error) { 1170 rowChangedEvents := make([]*RowChangedEvent, 0, len(events)) 1171 split := false 1172 for _, e := range events { 1173 if e == nil { 1174 log.Warn("skip emit nil event", 1175 zap.Any("event", e)) 1176 continue 1177 } 1178 1179 colLen := len(e.Columns) 1180 preColLen := len(e.PreColumns) 1181 // Some transactions could generate empty row change event, such as 1182 // begin; insert into t (id) values (1); delete from t where id=1; commit; 1183 // Just ignore these row changed events. 1184 if colLen == 0 && preColLen == 0 { 1185 log.Warn("skip emit empty row event", 1186 zap.Any("event", e)) 1187 continue 1188 } 1189 1190 // This indicates that it is an update event. if the pk or uk is updated, 1191 // we need to split it into two events (delete and insert). 1192 if e.IsUpdate() && ShouldSplitUpdateEvent(e) { 1193 deleteEvent, insertEvent, err := SplitUpdateEvent(e) 1194 if err != nil { 1195 return nil, errors.Trace(err) 1196 } 1197 split = true 1198 rowChangedEvents = append(rowChangedEvents, deleteEvent, insertEvent) 1199 } else { 1200 rowChangedEvents = append(rowChangedEvents, e) 1201 } 1202 } 1203 // some updated events is split, need to sort 1204 if split { 1205 sort.Sort(txnRows(rowChangedEvents)) 1206 } 1207 return rowChangedEvents, nil 1208 } 1209 1210 func isNonEmptyUniqueOrHandleCol(col *ColumnData, tableInfo *TableInfo) bool { 1211 if col != nil { 1212 colFlag := tableInfo.ForceGetColumnFlagType(col.ColumnID) 1213 return colFlag.IsUniqueKey() || colFlag.IsHandleKey() 1214 } 1215 return false 1216 } 1217 1218 // ShouldSplitUpdateEvent determines if the split event is needed to align the old format based on 1219 // whether the handle key column or unique key has been modified. 1220 // If is modified, we need to use splitUpdateEvent to split the update event into a delete and an insert event. 1221 func ShouldSplitUpdateEvent(updateEvent *RowChangedEvent) bool { 1222 // nil event will never be split. 1223 if updateEvent == nil { 1224 return false 1225 } 1226 1227 tableInfo := updateEvent.TableInfo 1228 for i := range updateEvent.Columns { 1229 col := updateEvent.Columns[i] 1230 preCol := updateEvent.PreColumns[i] 1231 if isNonEmptyUniqueOrHandleCol(col, tableInfo) && isNonEmptyUniqueOrHandleCol(preCol, tableInfo) { 1232 colValueString := ColumnValueString(col.Value) 1233 preColValueString := ColumnValueString(preCol.Value) 1234 // If one unique key columns is updated, we need to split the event row. 1235 if colValueString != preColValueString { 1236 return true 1237 } 1238 } 1239 } 1240 return false 1241 } 1242 1243 // SplitUpdateEvent splits an update event into a delete and an insert event. 1244 func SplitUpdateEvent( 1245 updateEvent *RowChangedEvent, 1246 ) (*RowChangedEvent, *RowChangedEvent, error) { 1247 if updateEvent == nil { 1248 return nil, nil, errors.New("nil event cannot be split") 1249 } 1250 1251 // If there is an update to handle key columns, 1252 // we need to split the event into two events to be compatible with the old format. 1253 // NOTICE: Here we don't need a full deep copy because 1254 // our two events need Columns and PreColumns respectively, 1255 // so it won't have an impact and no more full deep copy wastes memory. 1256 deleteEvent := *updateEvent 1257 deleteEvent.Columns = nil 1258 1259 insertEvent := *updateEvent 1260 // NOTICE: clean up pre cols for insert event. 1261 insertEvent.PreColumns = nil 1262 1263 return &deleteEvent, &insertEvent, nil 1264 } 1265 1266 // Append adds a row changed event into SingleTableTxn 1267 func (t *SingleTableTxn) Append(row *RowChangedEvent) { 1268 if row.StartTs != t.StartTs || row.CommitTs != t.CommitTs || row.PhysicalTableID != t.GetPhysicalTableID() { 1269 log.Panic("unexpected row change event", 1270 zap.Uint64("startTs", t.StartTs), 1271 zap.Uint64("commitTs", t.CommitTs), 1272 zap.Any("table", t.GetPhysicalTableID()), 1273 zap.Any("row", row)) 1274 } 1275 t.Rows = append(t.Rows, row) 1276 } 1277 1278 // TopicPartitionKey contains the topic and partition key of the message. 1279 type TopicPartitionKey struct { 1280 Topic string 1281 Partition int32 1282 PartitionKey string 1283 TotalPartition int32 1284 }