github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/pkg/shardddl/optimism/lock.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package optimism 15 16 import ( 17 "bytes" 18 "context" 19 "encoding/json" 20 "fmt" 21 "sync" 22 23 "github.com/pingcap/errors" 24 "github.com/pingcap/tidb/pkg/parser" 25 "github.com/pingcap/tidb/pkg/parser/ast" 26 "github.com/pingcap/tidb/pkg/parser/model" 27 "github.com/pingcap/tidb/pkg/util/dbutil" 28 "github.com/pingcap/tidb/pkg/util/schemacmp" 29 "github.com/pingcap/tiflow/dm/master/metrics" 30 "github.com/pingcap/tiflow/dm/pkg/conn" 31 "github.com/pingcap/tiflow/dm/pkg/cputil" 32 "github.com/pingcap/tiflow/dm/pkg/log" 33 "github.com/pingcap/tiflow/dm/pkg/terror" 34 clientv3 "go.etcd.io/etcd/client/v3" 35 "go.uber.org/zap" 36 ) 37 38 // DropColumnStage represents whether drop column done for a sharding table. 39 type DropColumnStage int 40 41 const ( 42 // DropNotDone represents master haven't received done for the col. 43 DropNotDone DropColumnStage = iota 44 // DropPartiallyDone represents master receive done for the col. 45 DropPartiallyDone 46 // DropDone represents master receive done and ddl for the col(executed in downstream). 47 DropDone 48 ) 49 50 type tableType int 51 52 const ( 53 // normalTables represents upstream table info record in checkpoint. 54 normalTables tableType = iota 55 // conflictTables represents upstream table info after executing conflict DDL. 56 conflictTables 57 // finalTables combines normalTables and conflcitTables, 58 // which represents all upstream table infos after executing all conflict DDLs. 59 finalTables 60 ) 61 62 // Lock represents the shard DDL lock in memory. 63 // This information does not need to be persistent, and can be re-constructed from the shard DDL info. 64 type Lock struct { 65 mu sync.RWMutex 66 67 cli *clientv3.Client 68 69 ID string // lock's ID 70 Task string // lock's corresponding task name 71 72 DownSchema string // downstream schema name 73 DownTable string // downstream table name 74 75 // first prevTable when a lock created 76 // only use when fetchTableInfo return an error. 77 initTable schemacmp.Table 78 // per-table's table info, 79 // upstream source ID -> upstream schema name -> upstream table name -> table info. 80 // if all of them are the same, then we call the lock `synced`. 81 tables map[string]map[string]map[string]schemacmp.Table 82 // conflictTables is used for conflict DDL coordination 83 // upstream source ID -> upstream schema name -> upstream table name -> table info. 84 conflictTables map[string]map[string]map[string]schemacmp.Table 85 // finalTables combine tables and conflcitTables 86 // it represents final state of all tables 87 // upstream source ID -> upstream schema name -> upstream table name -> table info. 88 finalTables map[string]map[string]map[string]schemacmp.Table 89 90 synced bool 91 92 // whether DDLs operations have done (execute the shard DDL) to the downstream. 93 // if all of them have done and have the same schema, then we call the lock `resolved`. 94 // in optimistic mode, one table should only send a new table info (and DDLs) after the old one has done, 95 // so we can set `done` to `false` when received a table info (and need the table to done some DDLs), 96 // and mark `done` to `true` after received the done status of the DDLs operation. 97 done map[string]map[string]map[string]bool 98 99 // upstream source ID -> upstream schema name -> upstream table name -> info version. 100 versions map[string]map[string]map[string]int64 101 102 // record the partially dropped columns 103 // column name -> source -> upSchema -> upTable -> int 104 columns map[string]map[string]map[string]map[string]DropColumnStage 105 106 downstreamMeta *DownstreamMeta 107 } 108 109 // NewLock creates a new Lock instance. 110 func NewLock(cli *clientv3.Client, id, task, downSchema, downTable string, initTable schemacmp.Table, tts []TargetTable, downstreamMeta *DownstreamMeta) *Lock { 111 l := &Lock{ 112 cli: cli, 113 ID: id, 114 Task: task, 115 DownSchema: downSchema, 116 DownTable: downTable, 117 initTable: initTable, 118 tables: make(map[string]map[string]map[string]schemacmp.Table), 119 conflictTables: make(map[string]map[string]map[string]schemacmp.Table), 120 finalTables: make(map[string]map[string]map[string]schemacmp.Table), 121 done: make(map[string]map[string]map[string]bool), 122 synced: true, 123 versions: make(map[string]map[string]map[string]int64), 124 columns: make(map[string]map[string]map[string]map[string]DropColumnStage), 125 downstreamMeta: downstreamMeta, 126 } 127 l.addTables(tts) 128 metrics.ReportDDLPending(task, metrics.DDLPendingNone, metrics.DDLPendingSynced) 129 return l 130 } 131 132 // FetchTableInfos fetch all table infos for a lock. 133 func (l *Lock) FetchTableInfos(task, source, schema, table string) (*model.TableInfo, error) { 134 if l.downstreamMeta == nil { 135 return nil, terror.ErrMasterOptimisticDownstreamMetaNotFound.Generate(task) 136 } 137 138 db, err := conn.GetDownstreamDB(l.downstreamMeta.dbConfig) 139 if err != nil { 140 return nil, err 141 } 142 defer db.Close() 143 144 ctx, cancel := context.WithTimeout(context.Background(), dbutil.DefaultTimeout) 145 defer cancel() 146 147 query := `SELECT table_info FROM ` + dbutil.TableName(l.downstreamMeta.meta, cputil.SyncerCheckpoint(task)) + ` WHERE id = ? AND cp_schema = ? AND cp_table = ?` 148 row := db.DB.QueryRowContext(ctx, query, source, schema, table) 149 if row.Err() != nil { 150 return nil, terror.ErrDBExecuteFailed.Delegate(row.Err(), query) 151 } 152 var tiBytes []byte 153 if err := row.Scan(&tiBytes); err != nil { 154 return nil, terror.ErrDBExecuteFailed.Delegate(err, query) 155 } 156 var ti *model.TableInfo 157 if bytes.Equal(tiBytes, []byte("null")) { 158 log.L().Warn("null table info", zap.String("query", query), zap.String("source", source), zap.String("schema", schema), zap.String("table", table)) 159 return nil, terror.ErrMasterOptimisticDownstreamMetaNotFound.Generate(task) 160 } 161 if err := json.Unmarshal(tiBytes, &ti); err != nil { 162 return nil, err 163 } 164 return ti, nil 165 } 166 167 // TrySync tries to sync the lock, re-entrant. 168 // new upstream sources may join when the DDL lock is in syncing, 169 // so we need to merge these new sources. 170 // NOTE: now, any error returned, we treat it as conflict detected. 171 // NOTE: now, DDLs (not empty) returned when resolved the conflict, but in fact these DDLs should not be replicated to the downstream. 172 // NOTE: now, `TrySync` can detect and resolve conflicts in both of the following modes: 173 // - non-intrusive: update the schema of non-conflict tables to match the conflict tables. 174 // data from conflict tables are non-intrusive. 175 // - intrusive: revert the schema of the conflict tables to match the non-conflict tables. 176 // data from conflict tables are intrusive. 177 // 178 // TODO: but both of these modes are difficult to be implemented in DM-worker now, try to do that later. 179 // for non-intrusive, a broadcast mechanism needed to notify conflict tables after the conflict has resolved, or even a block mechanism needed. 180 // for intrusive, a DML prune or transform mechanism needed for two different schemas (before and after the conflict resolved). 181 func (l *Lock) TrySync(info Info, tts []TargetTable) (newDDLs []string, cols []string, err error) { 182 var ( 183 callerSource = info.Source 184 callerSchema = info.UpSchema 185 callerTable = info.UpTable 186 ddls = info.DDLs 187 emptyDDLs = []string{} 188 emptyCols = []string{} 189 newTIs = info.TableInfosAfter 190 infoVersion = info.Version 191 ignoreConflict = info.IgnoreConflict 192 oldSynced = l.synced 193 ) 194 l.mu.Lock() 195 defer func() { 196 _, remain := l.syncStatus() 197 l.synced = remain == 0 198 if oldSynced != l.synced { 199 if oldSynced { 200 metrics.ReportDDLPending(l.Task, metrics.DDLPendingSynced, metrics.DDLPendingUnSynced) 201 } else { 202 metrics.ReportDDLPending(l.Task, metrics.DDLPendingUnSynced, metrics.DDLPendingSynced) 203 } 204 } 205 if len(newDDLs) > 0 || (err != nil && (terror.ErrShardDDLOptimismNeedSkipAndRedirect.Equal(err) || 206 terror.ErrShardDDLOptimismTrySyncFail.Equal(err))) { 207 // revert the `done` status if need to wait for the new operation to be done. 208 // Now, we wait for the new operation to be done if any DDLs returned. 209 l.tryRevertDone(callerSource, callerSchema, callerTable) 210 } 211 l.mu.Unlock() 212 }() 213 214 // should not happen 215 if len(ddls) != len(newTIs) || len(newTIs) == 0 { 216 return emptyDDLs, emptyCols, terror.ErrMasterInconsistentOptimisticDDLsAndInfo.Generate(len(ddls), len(newTIs)) 217 } 218 // should not happen 219 if info.TableInfoBefore == nil { 220 return emptyDDLs, emptyCols, terror.ErrMasterOptimisticTableInfoBeforeNotExist.Generate(ddls) 221 } 222 223 defer func() { 224 if err == nil && len(cols) > 0 { 225 err = l.AddDroppedColumns(callerSource, callerSchema, callerTable, cols) 226 } 227 // only update table info if no error or ignore conflict or conflict DDL 228 if err != nil { 229 var revertInfo schemacmp.Table 230 switch { 231 case ignoreConflict: 232 // forcely set schema for --ignore-conflict 233 revertInfo = schemacmp.Encode(newTIs[len(newTIs)-1]) 234 case terror.ErrShardDDLOptimismNeedSkipAndRedirect.Equal(err): 235 return 236 default: 237 revertInfo = schemacmp.Encode(info.TableInfoBefore) 238 } 239 l.tables[callerSource][callerSchema][callerTable] = revertInfo 240 l.finalTables[callerSource][callerSchema][callerTable] = revertInfo 241 l.removeConflictTable(callerSource, callerSchema, callerTable) 242 } 243 }() 244 245 // handle the case where <callerSource, callerSchema, callerTable> 246 // is not in old source tables and current new source tables. 247 // duplicate append is not a problem. 248 tts = append(tts, newTargetTable(l.Task, callerSource, l.DownSchema, l.DownTable, 249 map[string]map[string]struct{}{callerSchema: {callerTable: struct{}{}}})) 250 // add any new source tables. 251 l.addTables(tts) 252 if val, ok := l.versions[callerSource][callerSchema][callerTable]; !ok || val < infoVersion { 253 l.versions[callerSource][callerSchema][callerTable] = infoVersion 254 } 255 256 newDDLs = []string{} 257 cols = []string{} 258 prevTable := schemacmp.Encode(info.TableInfoBefore) 259 // join and compare every new table info 260 for idx, ti := range newTIs { 261 postTable := schemacmp.Encode(ti) 262 schemaChanged, conflictStage := l.trySyncForOneDDL(callerSource, callerSchema, callerTable, prevTable, postTable) 263 264 switch conflictStage { 265 case ConflictDetected: 266 return emptyDDLs, emptyCols, terror.ErrShardDDLOptimismTrySyncFail.Generate(l.ID, fmt.Sprintf("there will be conflicts if DDLs %s are applied to the downstream. old table info: %s, new table info: %s", ddls[idx], prevTable, postTable)) 267 case ConflictNone: 268 if col, err := l.checkAddDropColumn(callerSource, callerSchema, callerTable, ddls[idx], prevTable, postTable, cols); err != nil { 269 return emptyDDLs, emptyCols, err 270 } else if len(col) != 0 { 271 cols = append(cols, col) 272 } 273 case ConflictSkipWaitRedirect: 274 return newDDLs, cols, terror.ErrShardDDLOptimismNeedSkipAndRedirect.Generate(l.ID, ddls[idx]) 275 case ConflictResolved: 276 log.L().Info("all conflict DDL resolved", zap.String("DDL", ddls[idx]), zap.String("callerSource", callerSource), 277 zap.String("callerSchema", callerSchema), zap.String("callerTable", callerTable)) 278 } 279 280 if schemaChanged { 281 newDDLs = append(newDDLs, ddls[idx]) 282 } 283 prevTable = postTable 284 } 285 return newDDLs, cols, nil 286 } 287 288 // TryRemoveTable tries to remove a table in the lock. 289 // it returns whether the table has been removed. 290 // TODO: it does NOT try to rebuild the joined schema after the table removed now. 291 // try to support this if needed later. 292 // NOTE: if no table exists in the lock after removed the table, 293 // it's the caller's responsibility to decide whether remove the lock or not. 294 func (l *Lock) TryRemoveTable(source, schema, table string) []string { 295 l.mu.Lock() 296 defer l.mu.Unlock() 297 298 if _, ok := l.tables[source]; !ok { 299 return nil 300 } 301 if _, ok := l.tables[source][schema]; !ok { 302 return nil 303 } 304 305 ti, ok := l.tables[source][schema][table] 306 if !ok { 307 return nil 308 } 309 310 // delete drop columns 311 dropColumns := make([]string, 0) 312 for col, sourceColumns := range l.columns { 313 if schemaColumns, ok := sourceColumns[source]; ok { 314 if tableColumn, ok := schemaColumns[schema]; ok { 315 if _, ok := tableColumn[table]; ok { 316 dropColumns = append(dropColumns, col) 317 delete(tableColumn, table) 318 if len(tableColumn) == 0 { 319 delete(schemaColumns, schema) 320 } 321 } 322 } 323 } 324 } 325 326 delete(l.tables[source][schema], table) 327 delete(l.finalTables[source][schema], table) 328 l.removeConflictTable(source, schema, table) 329 _, remain := l.syncStatus() 330 l.synced = remain == 0 331 delete(l.done[source][schema], table) 332 delete(l.versions[source][schema], table) 333 log.L().Info("table removed from the lock", zap.String("lock", l.ID), 334 zap.String("source", source), zap.String("schema", schema), zap.String("table", table), 335 zap.Stringer("table info", ti)) 336 return dropColumns 337 } 338 339 // TryRemoveTable tries to remove tables in the lock by sources. 340 // return drop columns for later use. 341 func (l *Lock) TryRemoveTableBySources(sources []string) []string { 342 l.mu.Lock() 343 defer l.mu.Unlock() 344 345 // record drop columns for sources 346 dropColumns := make([]string, 0) 347 for col, sourceColumns := range l.columns { 348 for _, source := range sources { 349 if _, ok := sourceColumns[source]; ok { 350 dropColumns = append(dropColumns, col) 351 break 352 } 353 } 354 } 355 356 for _, source := range sources { 357 if _, ok := l.tables[source]; !ok { 358 continue 359 } 360 361 delete(l.tables, source) 362 delete(l.finalTables, source) 363 delete(l.conflictTables, source) 364 _, remain := l.syncStatus() 365 l.synced = remain == 0 366 delete(l.done, source) 367 delete(l.versions, source) 368 for _, sourceColumns := range l.columns { 369 delete(sourceColumns, source) 370 } 371 log.L().Info("tables removed from the lock", zap.String("lock", l.ID), zap.String("source", source)) 372 } 373 return dropColumns 374 } 375 376 // HasTables check whether a lock has tables. 377 func (l *Lock) HasTables() bool { 378 l.mu.Lock() 379 defer l.mu.Unlock() 380 381 for _, schemas := range l.tables { 382 for _, tables := range schemas { 383 for range tables { 384 return true 385 } 386 } 387 } 388 return false 389 } 390 391 // UpdateTableAfterUnlock updates table's schema info after unlock exec action. 392 func (l *Lock) UpdateTableAfterUnlock(info Info) { 393 l.mu.Lock() 394 defer l.mu.Unlock() 395 var ok bool 396 if _, ok = l.tables[info.Source]; !ok { 397 l.tables[info.Source] = make(map[string]map[string]schemacmp.Table) 398 } 399 if _, ok = l.tables[info.Source][info.UpSchema]; !ok { 400 l.tables[info.Source][info.UpSchema] = make(map[string]schemacmp.Table) 401 } 402 l.tables[info.Source][info.UpSchema][info.UpTable] = schemacmp.Encode(info.TableInfosAfter[len(info.TableInfosAfter)-1]) 403 } 404 405 // IsSynced returns whether the lock has synced. 406 // In the optimistic mode, we call it `synced` if table info of all tables are the same, 407 // and we define `remain` as the table count which have different table info with the joined one, 408 // e.g. for `ADD COLUMN`, it's the table count which have not added the column, 409 // for `DROP COLUMN`, it's the table count which have dropped the column. 410 func (l *Lock) IsSynced() (bool, int) { 411 l.mu.RLock() 412 defer l.mu.RUnlock() 413 _, remain := l.syncStatus() 414 return remain == 0, remain 415 } 416 417 // Ready returns the source tables' sync status (whether they are ready). 418 // we define `ready` if the table's info is the same with the joined one, 419 // e.g for `ADD COLUMN`, it's true if it has added the column, 420 // for `DROP COLUMN`, it's true if it has not dropped the column. 421 func (l *Lock) Ready() map[string]map[string]map[string]bool { 422 l.mu.RLock() 423 defer l.mu.RUnlock() 424 ready, _ := l.syncStatus() 425 return ready 426 } 427 428 // Joined returns the joined table info. 429 func (l *Lock) Joined() (schemacmp.Table, error) { 430 l.mu.RLock() 431 defer l.mu.RUnlock() 432 return l.joinNormalTables() 433 } 434 435 // TryMarkDone tries to mark the operation of the source table as done. 436 // it returns whether marked done. 437 // NOTE: this method can always mark a existing table as done, 438 // so the caller of this method should ensure that the table has done the DDLs operation. 439 // NOTE: a done table may revert to not-done if new table schema received and new DDLs operation need to be done. 440 func (l *Lock) TryMarkDone(source, schema, table string) bool { 441 l.mu.Lock() 442 defer l.mu.Unlock() 443 444 if _, ok := l.done[source]; !ok { 445 return false 446 } 447 if _, ok := l.done[source][schema]; !ok { 448 return false 449 } 450 if _, ok := l.done[source][schema][table]; !ok { 451 return false 452 } 453 454 // always mark it as `true` now. 455 l.done[source][schema][table] = true 456 return true 457 } 458 459 // IsDone returns whether the operation of the source table has done. 460 func (l *Lock) IsDone(source, schema, table string) bool { 461 l.mu.RLock() 462 defer l.mu.RUnlock() 463 464 if _, ok := l.done[source]; !ok { 465 return false 466 } 467 if _, ok := l.done[source][schema]; !ok { 468 return false 469 } 470 if _, ok := l.done[source][schema][table]; !ok { 471 return false 472 } 473 return l.done[source][schema][table] 474 } 475 476 // IsResolved returns whether the lock has resolved. 477 // return true if all tables have the same schema and all DDLs operations have done. 478 func (l *Lock) IsResolved() bool { 479 l.mu.RLock() 480 defer l.mu.RUnlock() 481 482 // whether all tables have the same schema. 483 if _, remain := l.syncStatus(); remain != 0 { 484 return false 485 } 486 487 // whether all tables have done DDLs operations. 488 for _, schemaTables := range l.done { 489 for _, tables := range schemaTables { 490 for _, done := range tables { 491 if !done { 492 return false 493 } 494 } 495 } 496 } 497 return true 498 } 499 500 // syncedStatus returns the current tables' sync status (<Ready, remain>). 501 func (l *Lock) syncStatus() (map[string]map[string]map[string]bool, int) { 502 ready := make(map[string]map[string]map[string]bool) 503 remain := 0 504 joined, joinedErr := l.joinFinalTables() 505 for source, schemaTables := range l.finalTables { 506 if _, ok := ready[source]; !ok { 507 ready[source] = make(map[string]map[string]bool) 508 } 509 for schema, tables := range schemaTables { 510 if _, ok := ready[source][schema]; !ok { 511 ready[source][schema] = make(map[string]bool) 512 } 513 for table, ti := range tables { 514 if joinedErr == nil { 515 if cmp, err := joined.Compare(ti); err == nil && cmp == 0 { 516 ready[source][schema][table] = true 517 continue 518 } 519 } 520 ready[source][schema][table] = false 521 remain++ 522 } 523 } 524 } 525 return ready, remain 526 } 527 528 // tryRevertDone tries to revert the done status when the table's schema changed. 529 func (l *Lock) tryRevertDone(source, schema, table string) { 530 if _, ok := l.done[source]; !ok { 531 return 532 } 533 if _, ok := l.done[source][schema]; !ok { 534 return 535 } 536 if _, ok := l.done[source][schema][table]; !ok { 537 return 538 } 539 l.done[source][schema][table] = false 540 } 541 542 // AddTable create a table in lock. 543 func (l *Lock) AddTable(source, schema, table string, needLock bool) { 544 if needLock { 545 l.mu.Lock() 546 defer l.mu.Unlock() 547 } 548 if _, ok := l.tables[source]; !ok { 549 l.tables[source] = make(map[string]map[string]schemacmp.Table) 550 l.finalTables[source] = make(map[string]map[string]schemacmp.Table) 551 l.done[source] = make(map[string]map[string]bool) 552 l.versions[source] = make(map[string]map[string]int64) 553 } 554 if _, ok := l.tables[source][schema]; !ok { 555 l.tables[source][schema] = make(map[string]schemacmp.Table) 556 l.finalTables[source][schema] = make(map[string]schemacmp.Table) 557 l.done[source][schema] = make(map[string]bool) 558 l.versions[source][schema] = make(map[string]int64) 559 } 560 if _, ok := l.tables[source][schema][table]; !ok { 561 ti, err := l.FetchTableInfos(l.Task, source, schema, table) 562 if err != nil { 563 log.L().Error("source table info not found, use init table info instead", zap.String("task", l.Task), zap.String("source", source), zap.String("schema", schema), zap.String("table", table), log.ShortError(err)) 564 l.tables[source][schema][table] = l.initTable 565 l.finalTables[source][schema][table] = l.initTable 566 } else { 567 t := schemacmp.Encode(ti) 568 log.L().Debug("get source table info", zap.String("task", l.Task), zap.String("source", source), zap.String("schema", schema), zap.String("table", table), zap.Stringer("info", t)) 569 l.tables[source][schema][table] = t 570 l.finalTables[source][schema][table] = t 571 } 572 l.done[source][schema][table] = false 573 l.versions[source][schema][table] = 0 574 log.L().Info("table added to the lock", zap.String("lock", l.ID), 575 zap.String("source", source), zap.String("schema", schema), zap.String("table", table), 576 zap.Stringer("table info", l.initTable)) 577 } 578 } 579 580 // addTables adds any not-existing tables into the lock. 581 // For a new table, try to fetch table info from downstream. 582 func (l *Lock) addTables(tts []TargetTable) { 583 for _, tt := range tts { 584 for schema, tables := range tt.UpTables { 585 for table := range tables { 586 l.AddTable(tt.Source, schema, table, false) 587 } 588 } 589 } 590 } 591 592 // GetVersion return version of info in lock. 593 func (l *Lock) GetVersion(source string, schema string, table string) int64 { 594 l.mu.RLock() 595 defer l.mu.RUnlock() 596 597 return l.versions[source][schema][table] 598 } 599 600 // IsDroppedColumn checks whether this column is a partially dropped column for this lock. 601 func (l *Lock) IsDroppedColumn(source, upSchema, upTable, col string) bool { 602 if _, ok := l.columns[col]; !ok { 603 return false 604 } 605 if _, ok := l.columns[col][source]; !ok { 606 return false 607 } 608 if _, ok := l.columns[col][source][upSchema]; !ok { 609 return false 610 } 611 if _, ok := l.columns[col][source][upSchema][upTable]; !ok { 612 return false 613 } 614 return true 615 } 616 617 // AddDroppedColumns adds a dropped column name in both etcd and lock's column map. 618 func (l *Lock) AddDroppedColumns(source, schema, table string, cols []string) error { 619 newCols := make([]string, 0, len(cols)) 620 for _, col := range cols { 621 if !l.IsDroppedColumn(source, schema, table, col) { 622 newCols = append(newCols, col) 623 } 624 } 625 log.L().Info("add partially dropped columns", zap.Strings("columns", newCols), zap.String("source", source), zap.String("schema", schema), zap.String("table", table)) 626 627 if len(newCols) > 0 { 628 _, _, err := PutDroppedColumns(l.cli, l.ID, source, schema, table, newCols, DropNotDone) 629 if err != nil { 630 return err 631 } 632 } 633 634 for _, col := range newCols { 635 if _, ok := l.columns[col]; !ok { 636 l.columns[col] = make(map[string]map[string]map[string]DropColumnStage) 637 } 638 if _, ok := l.columns[col][source]; !ok { 639 l.columns[col][source] = make(map[string]map[string]DropColumnStage) 640 } 641 if _, ok := l.columns[col][source][schema]; !ok { 642 l.columns[col][source][schema] = make(map[string]DropColumnStage) 643 } 644 l.columns[col][source][schema][table] = DropNotDone 645 } 646 return nil 647 } 648 649 // DeleteColumnsByOp deletes the partially dropped columns that extracted from operation. 650 // We can not remove columns from the partially dropped columns map unless: 651 // this column is dropped in the downstream database, 652 // all the upstream source done the delete column operation 653 // that is to say, columns all done. 654 func (l *Lock) DeleteColumnsByOp(op Operation) error { 655 l.mu.Lock() 656 defer l.mu.Unlock() 657 658 doneCols := make(map[string]struct{}, len(op.DDLs)) 659 for _, ddl := range op.DDLs { 660 col, err := GetColumnName(l.ID, ddl, ast.AlterTableDropColumn) 661 if err != nil { 662 return err 663 } 664 if len(col) > 0 { 665 doneCols[col] = struct{}{} 666 } 667 } 668 669 colsToDelete := make([]string, 0, len(op.Cols)) 670 for _, col := range op.Cols { 671 done := DropPartiallyDone 672 if l.IsDroppedColumn(op.Source, op.UpSchema, op.UpTable, col) { 673 if _, ok := doneCols[col]; ok { 674 done = DropDone 675 } 676 // mark col PartiallyDone/Done 677 _, _, err := PutDroppedColumns(l.cli, op.ID, op.Source, op.UpSchema, op.UpTable, []string{col}, done) 678 if err != nil { 679 log.L().Error("cannot put drop column to etcd", log.ShortError(err)) 680 return err 681 } 682 l.columns[col][op.Source][op.UpSchema][op.UpTable] = done 683 } 684 685 allDone := true 686 dropDone := false 687 OUTER: 688 for _, schemaCols := range l.columns[col] { 689 for _, tableCols := range schemaCols { 690 for _, done := range tableCols { 691 if done == DropDone { 692 dropDone = true 693 } 694 if done == DropNotDone { 695 allDone = false 696 break OUTER 697 } 698 } 699 } 700 } 701 if allDone && dropDone { 702 colsToDelete = append(colsToDelete, col) 703 } 704 } 705 706 if len(colsToDelete) > 0 { 707 log.L().Info("delete partially dropped columns", 708 zap.String("lockID", l.ID), zap.Strings("columns", colsToDelete)) 709 710 _, _, err := DeleteDroppedColumns(l.cli, op.ID, colsToDelete...) 711 if err != nil { 712 return err 713 } 714 715 for _, col := range colsToDelete { 716 delete(l.columns, col) 717 } 718 } 719 720 return nil 721 } 722 723 // AddDifferentFieldLenColumns checks whether dm adds columns with different field lengths. 724 func AddDifferentFieldLenColumns(lockID, ddl string, oldJoined, newJoined schemacmp.Table) (string, error) { 725 col, err := GetColumnName(lockID, ddl, ast.AlterTableAddColumns) 726 if err != nil { 727 return col, err 728 } 729 if len(col) > 0 { 730 oldJoinedCols := schemacmp.DecodeColumnFieldTypes(oldJoined) 731 newJoinedCols := schemacmp.DecodeColumnFieldTypes(newJoined) 732 oldCol, ok1 := oldJoinedCols[col] 733 newCol, ok2 := newJoinedCols[col] 734 if ok1 && ok2 && newCol.GetFlen() != oldCol.GetFlen() { 735 return col, terror.ErrShardDDLOptimismAddNotFullyDroppedColumn.Generate( 736 lockID, fmt.Sprintf("add columns with different field lengths. "+ 737 "ddl: %s, origLen: %d, newLen: %d", ddl, oldCol.GetFlen(), newCol.GetFlen())) 738 } 739 } 740 return col, nil 741 } 742 743 // GetColumnName checks whether dm adds/drops a column, and return this column's name. 744 func GetColumnName(lockID, ddl string, tp ast.AlterTableType) (string, error) { 745 if stmt, err := parser.New().ParseOneStmt(ddl, "", ""); err != nil { 746 return "", terror.ErrShardDDLOptimismAddNotFullyDroppedColumn.Delegate( 747 err, lockID, fmt.Sprintf("fail to parse ddl %s", ddl)) 748 } else if v, ok := stmt.(*ast.AlterTableStmt); ok && len(v.Specs) > 0 { 749 spec := v.Specs[0] 750 if spec.Tp == tp { 751 switch spec.Tp { 752 case ast.AlterTableAddColumns: 753 if len(spec.NewColumns) > 0 { 754 return spec.NewColumns[0].Name.Name.O, nil 755 } 756 case ast.AlterTableDropColumn: 757 if spec.OldColumnName != nil { 758 return spec.OldColumnName.Name.O, nil 759 } 760 } 761 } 762 } 763 return "", nil 764 } 765 766 func contains(s []string, e string) bool { 767 for _, a := range s { 768 if a == e { 769 return true 770 } 771 } 772 return false 773 } 774 775 // checkAddDropColumn check for ALTER TABLE ADD/DROP COLUMN statement 776 // FOR ADD COLUMN, check whether add column with a different field or add a dropped column 777 // FOR DROP COLUMN, return the droped column. 778 func (l *Lock) checkAddDropColumn(source, schema, table string, ddl string, prevTable, postTable schemacmp.Table, newDropColumns []string) (string, error) { 779 currTable := l.tables[source][schema][table] 780 defer func() { 781 l.tables[source][schema][table] = currTable 782 }() 783 784 l.tables[source][schema][table] = prevTable 785 oldJoined, err := l.joinNormalTables() 786 if err != nil { 787 // nolint:nilerr 788 return "", nil 789 } 790 791 l.tables[source][schema][table] = postTable 792 newJoined, err := l.joinNormalTables() 793 if err != nil { 794 // nolint:nilerr 795 return "", nil 796 } 797 798 cmp, err := oldJoined.Compare(newJoined) 799 if err != nil { 800 // nolint:nilerr 801 return "", nil 802 } 803 804 if cmp <= 0 { 805 if col, err2 := AddDifferentFieldLenColumns(l.ID, ddl, oldJoined, newJoined); err2 != nil { 806 // check for add column with a larger field len 807 return "", err2 808 } else if _, err2 = AddDifferentFieldLenColumns(l.ID, ddl, postTable, newJoined); err2 != nil { 809 // check for add column with a smaller field len 810 return "", err2 811 } else if len(col) > 0 && (l.IsDroppedColumn(source, schema, table, col) || contains(newDropColumns, col)) { 812 return "", terror.ErrShardDDLOptimismAddNotFullyDroppedColumn.Generate(l.ID, fmt.Sprintf("add column %s that wasn't fully dropped in downstream. ddl: %s", col, ddl)) 813 } 814 } 815 816 if cmp >= 0 { 817 if col, err2 := GetColumnName(l.ID, ddl, ast.AlterTableDropColumn); err2 != nil { 818 return "", err2 819 } else if len(col) > 0 { 820 return col, nil 821 } 822 } 823 return "", nil 824 } 825 826 // trySyncForOneDDL try sync for a DDL operation. 827 // e.g. `ALTER TABLE ADD COLUMN a, RENAME b TO c, DROP COLUMN d' will call this func three times. 828 // return whether joined table is changed and whether there is a conflict. 829 func (l *Lock) trySyncForOneDDL(source, schema, table string, prevTable, postTable schemacmp.Table) (schemaChanged bool, conflictStage ConflictStage) { 830 // we only support resolve one conflict DDL per table, 831 // so reset conflict table after receive new table info. 832 l.removeConflictTable(source, schema, table) 833 l.finalTables[source][schema][table] = l.tables[source][schema][table] 834 835 // For idempotent DDL 836 // this often happens when an info TrySync twice, e.g. worker restart/resume task 837 idempotent := false 838 if cmp, err := prevTable.Compare(l.tables[source][schema][table]); err != nil || cmp != 0 { 839 if cmp, err := postTable.Compare(l.tables[source][schema][table]); err == nil && cmp == 0 { 840 idempotent = true 841 } 842 log.L().Warn("prev-table not equal table saved in master", zap.Stringer("master-table", l.tables[source][schema][table]), zap.Stringer("prev-table", prevTable)) 843 l.tables[source][schema][table] = prevTable 844 l.finalTables[source][schema][table] = prevTable 845 } 846 847 tableCmp, tableErr := prevTable.Compare(postTable) 848 849 // Normal DDL 850 if tableErr == nil { 851 log.L().Info("receive a normal DDL", zap.String("source", source), zap.String("schema", schema), zap.String("table", table), zap.Stringer("prevTable", prevTable), zap.Stringer("postTable", postTable)) 852 oldJoined, oldErr := l.joinNormalTables() 853 854 l.tables[source][schema][table] = postTable 855 l.finalTables[source][schema][table] = postTable 856 857 newJoined, newErr := l.joinNormalTables() 858 // normal DDL can be sync if no error 859 if newErr == nil { 860 // if a normal DDL let all final tables become no conflict 861 // return ConflictNone 862 if len(l.conflictTables) > 0 && l.noConflictForFinalTables() { 863 log.L().Info("all conflict resolved for the DDL", zap.String("source", source), zap.String("schema", schema), zap.String("table", table), zap.Stringer("prevTable", prevTable), zap.Stringer("postTable", postTable)) 864 err := l.redirectForConflictTables(source, schema, table) 865 if err != nil { 866 log.L().Error("failed to put redirect operation for conflict tables", log.ShortError(err)) 867 return false, ConflictDetected 868 } 869 l.resolveTables() 870 return true, ConflictNone 871 } 872 873 if oldErr != nil { 874 return true, ConflictNone 875 } 876 joinedCmp, joinedErr := oldJoined.Compare(newJoined) 877 // special case: if the DDL does not affect the schema at all, assume it is 878 // idempotent and just execute the DDL directly. 879 // this often happens when executing `CREATE TABLE` statement 880 cmp, err2 := postTable.Compare(oldJoined) 881 882 // return schema changed in 3 cases 883 // oldJoined != newJoined 884 // postTable == oldJoined (CREATE TABLE) 885 // prevTable < postTable 886 // prevTable == postTable(Partition/Sequence) 887 return (joinedErr != nil || joinedCmp != 0) || (err2 == nil && cmp == 0) || tableCmp <= 0, ConflictNone 888 } 889 } 890 891 log.L().Info("found conflict for DDL", zap.String("source", source), zap.String("schema", schema), zap.String("table", table), zap.Stringer("prevTable", prevTable), zap.Stringer("postTable", postTable), log.ShortError(tableErr)) 892 893 if idempotent || l.noConflictWithOneNormalTable(source, schema, table, prevTable, postTable) { 894 log.L().Info("directly return conflict DDL", zap.Bool("idempotent", idempotent), zap.String("source", source), zap.String("schema", schema), zap.String("table", table), zap.Stringer("prevTable", prevTable), zap.Stringer("postTable", postTable)) 895 l.tables[source][schema][table] = postTable 896 l.finalTables[source][schema][table] = postTable 897 return true, ConflictNone 898 } 899 900 // meet conflict DDL 901 // revert tables and update conflictTables and finalTables 902 l.tables[source][schema][table] = prevTable 903 l.addConflictTable(source, schema, table, postTable) 904 l.finalTables[source][schema][table] = postTable 905 906 // if any conflict happened between conflict DDLs, return error 907 // e.g. tb1: "ALTER TABLE RENAME a TO b", tb2: "ALTER TABLE RENAME c TO d" 908 if !l.noConflictForConflictTables() { 909 log.L().Error("conflict happened with other conflict tables", zap.String("source", source), zap.String("schema", schema), zap.String("table", table), zap.Stringer("prevTable", prevTable), zap.Stringer("postTable", postTable)) 910 return false, ConflictDetected 911 } 912 913 if l.noConflictForFinalTables() { 914 log.L().Info("all conflict resolved for the DDL", zap.String("source", source), zap.String("schema", schema), zap.String("table", table), zap.Stringer("prevTable", prevTable), zap.Stringer("postTable", postTable)) 915 err := l.redirectForConflictTables(source, schema, table) 916 if err != nil { 917 log.L().Error("failed to put redirect operation for conflict tables", log.ShortError(err)) 918 return false, ConflictDetected 919 } 920 l.resolveTables() 921 922 return true, ConflictNone 923 } 924 log.L().Info("conflict hasn't been resolved", zap.String("source", source), zap.String("schema", schema), zap.String("table", table), zap.Stringer("prevTable", prevTable), zap.Stringer("postTable", postTable)) 925 return false, ConflictSkipWaitRedirect 926 } 927 928 // joinTables join tables by tableType. 929 func (l *Lock) joinTables(tp tableType) (schemacmp.Table, error) { 930 var ( 931 joined schemacmp.Table 932 allTables map[string]map[string]map[string]schemacmp.Table 933 firstTable = true 934 ) 935 936 switch tp { 937 case conflictTables: 938 allTables = l.conflictTables 939 case finalTables: 940 allTables = l.finalTables 941 default: 942 allTables = l.tables 943 } 944 945 for source, schemaTables := range allTables { 946 for schema, tables := range schemaTables { 947 for table, ti := range tables { 948 if firstTable { 949 joined = ti 950 firstTable = false 951 continue 952 } 953 954 newJoined, err := joined.Join(ti) 955 if err != nil { 956 return newJoined, errors.Errorf("failed to join tables with %s.%s.%s, joined: %s, table: %s, root cause: %s", source, schema, table, joined.String(), ti.String(), err.Error()) 957 } 958 joined = newJoined 959 } 960 } 961 } 962 963 return joined, nil 964 } 965 966 // Compare(joined,prev_tbx) == error 967 // For a conflict DDL make table become part of larger and another part of smaller, 968 // this function make sure all tables that need to be judged become part of smaller. 969 // e.g. `ALTER TABLE RENAME a TO b`, this function check whether all tables do not contain `a`. 970 // Prove: 971 // 972 // Compare(joined,prev_tbk) == error 973 // 974 // => Joined ⊇ prev_tbk-{a}+{b} && Joined ⊅ prev_tbk 975 // => a ∉ Joined. 976 func (l *Lock) allTableSmaller(tp tableType) bool { 977 var ( 978 joined schemacmp.Table 979 err error 980 ) 981 switch tp { 982 case conflictTables: 983 joined, err = l.joinConflictTables() 984 default: 985 joined, err = l.joinFinalTables() 986 } 987 988 if err != nil { 989 return false 990 } 991 992 for source, schemaTables := range l.conflictTables { 993 for schema, tables := range schemaTables { 994 for table := range tables { 995 ti := l.tables[source][schema][table] 996 997 if _, err = joined.Compare(ti); err == nil { 998 return false 999 } 1000 } 1001 } 1002 } 1003 return true 1004 } 1005 1006 // Compare(Join(prev_tbx,tabley),post_tbx)>=0 1007 // For a conflict DDL make table become part of larger and another part of smaller, 1008 // this function make sure all the tables that need to be judged become part of larger. 1009 // e.g `ALTER TABLE RENAME a TO b`, this function check whether all tables contain `b`. 1010 // Prove: 1011 // 1012 // Compare(Join(prev_tbx,tabley),post_tbx)>=0 1013 // 1014 // => Compare(Join(prev_tbk,tabley),prev_tbk-{a}+{b})>=0 1015 // => Join(prev_tbk,tabley) ⊇ prev_tbk-{a}+{b} 1016 // => b ∈ tabley. 1017 func (l *Lock) allTableLarger(tp tableType) bool { 1018 var judgeTables map[string]map[string]map[string]schemacmp.Table 1019 1020 switch tp { 1021 case normalTables: 1022 judgeTables = l.tables 1023 case conflictTables: 1024 judgeTables = l.conflictTables 1025 default: 1026 judgeTables = l.finalTables 1027 } 1028 1029 for source, schemaTables := range l.conflictTables { 1030 for schema, tables := range schemaTables { 1031 for table, conflictTi := range tables { 1032 // for every conflict table's prev_table 1033 ti := l.tables[source][schema][table] 1034 1035 // for every judge table 1036 for _, sTables := range judgeTables { 1037 for _, ts := range sTables { 1038 for _, finalTi := range ts { 1039 joined, err := ti.Join(finalTi) 1040 if err != nil { 1041 // modify column 1042 joined = finalTi 1043 } 1044 if cmp, err := joined.Compare(conflictTi); err != nil || cmp < 0 { 1045 return false 1046 } 1047 } 1048 } 1049 } 1050 } 1051 } 1052 } 1053 return true 1054 } 1055 1056 func (l *Lock) joinNormalTables() (schemacmp.Table, error) { 1057 return l.joinTables(normalTables) 1058 } 1059 1060 func (l *Lock) joinFinalTables() (schemacmp.Table, error) { 1061 return l.joinTables(finalTables) 1062 } 1063 1064 func (l *Lock) joinConflictTables() (schemacmp.Table, error) { 1065 return l.joinTables(conflictTables) 1066 } 1067 1068 func (l *Lock) allConflictTableSmaller() bool { 1069 return l.allTableSmaller(conflictTables) 1070 } 1071 1072 func (l *Lock) allFinalTableSmaller() bool { 1073 return l.allTableSmaller(finalTables) 1074 } 1075 1076 func (l *Lock) allConflictTableLarger() bool { 1077 return l.allTableLarger(conflictTables) 1078 } 1079 1080 func (l *Lock) allFinalTableLarger() bool { 1081 return l.allTableLarger(finalTables) 1082 } 1083 1084 // jude a conflict ddl is no conflict with at least one normal table. 1085 func (l *Lock) noConflictWithOneNormalTable(callerSource, callerSchema, callerTable string, prevTable, postTable schemacmp.Table) bool { 1086 for source, schemaTables := range l.tables { 1087 for schema, tables := range schemaTables { 1088 for table, ti := range tables { 1089 if source == callerSource && schema == callerSchema && table == callerTable { 1090 continue 1091 } 1092 1093 // judge joined no error 1094 joined, err := postTable.Join(ti) 1095 if err != nil { 1096 continue 1097 } 1098 1099 // judge this normal table is smaller(same as allTableSmaller) 1100 if _, err = joined.Compare(prevTable); err == nil { 1101 continue 1102 } 1103 1104 // judge this normal table is larger(same as allTableLarger) 1105 if joined, err = prevTable.Join(ti); err != nil { 1106 joined = ti 1107 } 1108 if cmp, err := joined.Compare(postTable); err != nil || cmp < 0 { 1109 continue 1110 } 1111 1112 return true 1113 } 1114 } 1115 } 1116 return false 1117 } 1118 1119 // judge whether all conflict tables has no conflict. 1120 func (l *Lock) noConflictForConflictTables() bool { 1121 if _, err := l.joinConflictTables(); err != nil { 1122 return false 1123 } 1124 if !l.allConflictTableSmaller() { 1125 return false 1126 } 1127 if !l.allConflictTableLarger() { 1128 return false 1129 } 1130 return true 1131 } 1132 1133 // judge whether all final tables has no conflict. 1134 func (l *Lock) noConflictForFinalTables() bool { 1135 if _, err := l.joinFinalTables(); err != nil { 1136 return false 1137 } 1138 if !l.allFinalTableSmaller() { 1139 return false 1140 } 1141 if !l.allFinalTableLarger() { 1142 return false 1143 } 1144 return true 1145 } 1146 1147 func (l *Lock) addConflictTable(source, schema, table string, ti schemacmp.Table) { 1148 if _, ok := l.conflictTables[source]; !ok { 1149 l.conflictTables[source] = make(map[string]map[string]schemacmp.Table) 1150 } 1151 if _, ok := l.conflictTables[source][schema]; !ok { 1152 l.conflictTables[source][schema] = make(map[string]schemacmp.Table) 1153 } 1154 l.conflictTables[source][schema][table] = ti 1155 } 1156 1157 func (l *Lock) removeConflictTable(source, schema, table string) { 1158 if _, ok := l.conflictTables[source]; !ok { 1159 return 1160 } 1161 if _, ok := l.conflictTables[source][schema]; !ok { 1162 return 1163 } 1164 delete(l.conflictTables[source][schema], table) 1165 if len(l.conflictTables[source][schema]) == 0 { 1166 delete(l.conflictTables[source], schema) 1167 } 1168 if len(l.conflictTables[source]) == 0 { 1169 delete(l.conflictTables, source) 1170 } 1171 } 1172 1173 // resolveTables reset conflictTables and copy tables from final tables. 1174 func (l *Lock) resolveTables() { 1175 l.conflictTables = make(map[string]map[string]map[string]schemacmp.Table) 1176 for source, schemaTables := range l.finalTables { 1177 for schema, tables := range schemaTables { 1178 for table, ti := range tables { 1179 l.tables[source][schema][table] = ti 1180 } 1181 } 1182 } 1183 } 1184 1185 // redirectForConflictTables put redirect Ops for all conflict tables. 1186 func (l *Lock) redirectForConflictTables(callerSource, callerSchema, callerTable string) error { 1187 for source, schemaTables := range l.conflictTables { 1188 for schema, tables := range schemaTables { 1189 for table := range tables { 1190 if source == callerSource && schema == callerSchema && table == callerTable { 1191 // no redirect for caller table 1192 continue 1193 } 1194 op := NewOperation(l.ID, l.Task, source, schema, table, nil, ConflictResolved, "", false, nil) 1195 // TODO(GMHDBJD): put these operation in one transaction 1196 rev, succ, err := PutOperation(l.cli, false, op, 0) 1197 if err != nil { 1198 return err 1199 } 1200 log.L().Info("put redirect operation for conflict table", zap.String("lock", l.ID), 1201 zap.Stringer("operation", op), zap.Bool("succeed", !succ), zap.Int64("revision", rev)) 1202 } 1203 } 1204 } 1205 return nil 1206 }