github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/scheduler/internal/v3/replication/replication_manager_test.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package replication 15 16 import ( 17 "context" 18 "math" 19 "testing" 20 "time" 21 22 "github.com/pingcap/tiflow/cdc/model" 23 "github.com/pingcap/tiflow/cdc/processor/tablepb" 24 "github.com/pingcap/tiflow/cdc/redo/common" 25 "github.com/pingcap/tiflow/cdc/scheduler/schedulepb" 26 "github.com/pingcap/tiflow/pkg/spanz" 27 "github.com/pingcap/tiflow/pkg/util" 28 "github.com/stretchr/testify/require" 29 ) 30 31 func TestReplicationManagerHandleAddTableTask(t *testing.T) { 32 t.Parallel() 33 34 r := NewReplicationManager(10, model.ChangeFeedID{}) 35 addTableCh := make(chan int, 1) 36 // Absent -> Prepare 37 msgs, err := r.HandleTasks([]*ScheduleTask{{ 38 AddTable: &AddTable{ 39 Span: spanz.TableIDToComparableSpan(1), CaptureID: "1", CheckpointTs: 1, 40 }, 41 Accept: func() { 42 addTableCh <- 1 43 close(addTableCh) 44 }, 45 }}) 46 require.Nil(t, err) 47 require.Len(t, msgs, 1) 48 require.EqualValues(t, &schedulepb.Message{ 49 To: "1", 50 MsgType: schedulepb.MsgDispatchTableRequest, 51 DispatchTableRequest: &schedulepb.DispatchTableRequest{ 52 Request: &schedulepb.DispatchTableRequest_AddTable{ 53 AddTable: &schedulepb.AddTableRequest{ 54 Span: spanz.TableIDToComparableSpan(1), 55 IsSecondary: true, 56 Checkpoint: tablepb.Checkpoint{ 57 CheckpointTs: 1, 58 ResolvedTs: 1, 59 }, 60 }, 61 }, 62 }, 63 }, msgs[0]) 64 require.NotNil(t, r.runningTasks.Has(spanz.TableIDToComparableSpan(1))) 65 require.Equal(t, 1, <-addTableCh) 66 67 // Ignore if add the table again. 68 msgs, err = r.HandleTasks([]*ScheduleTask{{ 69 AddTable: &AddTable{Span: spanz.TableIDToComparableSpan(1), CaptureID: "1"}, 70 Accept: func() { t.Fatalf("must not accept") }, 71 }}) 72 require.Nil(t, err) 73 require.Len(t, msgs, 0) 74 75 // Prepare -> Commit. 76 msgs, err = r.HandleMessage([]*schedulepb.Message{{ 77 From: "1", 78 MsgType: schedulepb.MsgDispatchTableResponse, 79 DispatchTableResponse: &schedulepb.DispatchTableResponse{ 80 Response: &schedulepb.DispatchTableResponse_AddTable{ 81 AddTable: &schedulepb.AddTableResponse{ 82 Status: &tablepb.TableStatus{ 83 Span: spanz.TableIDToComparableSpan(1), 84 State: tablepb.TableStatePrepared, 85 }, 86 }, 87 }, 88 }, 89 }}) 90 require.Nil(t, err) 91 require.Len(t, msgs, 1) 92 require.EqualValues(t, &schedulepb.Message{ 93 To: "1", 94 MsgType: schedulepb.MsgDispatchTableRequest, 95 DispatchTableRequest: &schedulepb.DispatchTableRequest{ 96 Request: &schedulepb.DispatchTableRequest_AddTable{ 97 AddTable: &schedulepb.AddTableRequest{ 98 Span: spanz.TableIDToComparableSpan(1), 99 IsSecondary: false, 100 Checkpoint: tablepb.Checkpoint{ 101 CheckpointTs: 1, 102 ResolvedTs: 1, 103 }, 104 }, 105 }, 106 }, 107 }, msgs[0]) 108 require.Equal( 109 t, ReplicationSetStateCommit, r.spans.GetV(spanz.TableIDToComparableSpan(1)).State) 110 require.Equal(t, "1", r.spans.GetV(spanz.TableIDToComparableSpan(1)).Primary) 111 require.False(t, r.spans.GetV(spanz.TableIDToComparableSpan(1)).hasRole(RoleSecondary)) 112 113 // Commit -> Replicating through heartbeat response. 114 msgs, err = r.HandleMessage([]*schedulepb.Message{{ 115 From: "1", 116 MsgType: schedulepb.MsgHeartbeatResponse, 117 HeartbeatResponse: &schedulepb.HeartbeatResponse{ 118 Tables: []tablepb.TableStatus{{ 119 Span: spanz.TableIDToComparableSpan(1), 120 State: tablepb.TableStateReplicating, 121 }}, 122 }, 123 }}) 124 require.Nil(t, err) 125 require.Len(t, msgs, 0) 126 require.Equal( 127 t, ReplicationSetStateReplicating, r.spans.GetV(spanz.TableIDToComparableSpan(1)).State) 128 require.Equal(t, "1", r.spans.GetV(spanz.TableIDToComparableSpan(1)).Primary) 129 require.False(t, r.spans.GetV(spanz.TableIDToComparableSpan(1)).hasRole(RoleSecondary)) 130 131 // Handle task again to clear runningTasks 132 msgs, err = r.HandleTasks(nil) 133 require.Nil(t, err) 134 require.Len(t, msgs, 0) 135 require.Nil(t, r.runningTasks.GetV(spanz.TableIDToComparableSpan(1))) 136 } 137 138 func TestReplicationManagerRemoveTable(t *testing.T) { 139 t.Parallel() 140 141 r := NewReplicationManager(10, model.ChangeFeedID{}) 142 removeTableCh := make(chan int, 1) 143 144 // Ignore remove table if there is no such table. 145 msgs, err := r.HandleTasks([]*ScheduleTask{{ 146 RemoveTable: &RemoveTable{Span: spanz.TableIDToComparableSpan(1), CaptureID: "1"}, 147 Accept: func() { t.Fatal("must not accept") }, 148 }}) 149 require.Nil(t, err) 150 require.Len(t, msgs, 0) 151 152 // Add the table. 153 span := spanz.TableIDToComparableSpan(1) 154 tbl, err := NewReplicationSet(span, 0, map[string]*tablepb.TableStatus{ 155 "1": {Span: span, State: tablepb.TableStateReplicating}, 156 }, model.ChangeFeedID{}) 157 require.Nil(t, err) 158 require.Equal(t, ReplicationSetStateReplicating, tbl.State) 159 r.spans.ReplaceOrInsert(spanz.TableIDToComparableSpan(1), tbl) 160 161 // Remove the table. 162 msgs, err = r.HandleTasks([]*ScheduleTask{{ 163 RemoveTable: &RemoveTable{Span: spanz.TableIDToComparableSpan(1), CaptureID: "1"}, 164 Accept: func() { 165 removeTableCh <- 1 166 close(removeTableCh) 167 }, 168 }}) 169 require.Nil(t, err) 170 require.Len(t, msgs, 1) 171 require.EqualValues(t, &schedulepb.Message{ 172 To: "1", 173 MsgType: schedulepb.MsgDispatchTableRequest, 174 DispatchTableRequest: &schedulepb.DispatchTableRequest{ 175 Request: &schedulepb.DispatchTableRequest_RemoveTable{ 176 RemoveTable: &schedulepb.RemoveTableRequest{Span: span}, 177 }, 178 }, 179 }, msgs[0]) 180 require.NotNil(t, r.runningTasks.Has(spanz.TableIDToComparableSpan(1))) 181 require.Equal(t, 1, <-removeTableCh) 182 183 // Ignore if remove table again. 184 msgs, err = r.HandleTasks([]*ScheduleTask{{ 185 RemoveTable: &RemoveTable{Span: spanz.TableIDToComparableSpan(1), CaptureID: "1"}, 186 Accept: func() { t.Fatalf("must not accept") }, 187 }}) 188 require.Nil(t, err) 189 require.Len(t, msgs, 0) 190 191 // Removing is in-progress through remove table response. 192 msgs, err = r.HandleMessage([]*schedulepb.Message{{ 193 From: "1", 194 MsgType: schedulepb.MsgDispatchTableResponse, 195 DispatchTableResponse: &schedulepb.DispatchTableResponse{ 196 Response: &schedulepb.DispatchTableResponse_RemoveTable{ 197 RemoveTable: &schedulepb.RemoveTableResponse{ 198 Status: &tablepb.TableStatus{ 199 Span: span, 200 State: tablepb.TableStateStopping, 201 }, 202 }, 203 }, 204 }, 205 }}) 206 require.Nil(t, err) 207 require.Len(t, msgs, 0) 208 209 // Removed through heartbeat response. 210 msgs, err = r.HandleMessage([]*schedulepb.Message{{ 211 From: "1", 212 MsgType: schedulepb.MsgHeartbeatResponse, 213 HeartbeatResponse: &schedulepb.HeartbeatResponse{ 214 Tables: []tablepb.TableStatus{{ 215 Span: span, 216 State: tablepb.TableStateStopped, 217 }}, 218 }, 219 }}) 220 require.Nil(t, err) 221 require.Len(t, msgs, 0) 222 require.Nil(t, r.spans.GetV(spanz.TableIDToComparableSpan(1))) 223 224 // Handle task again to clear runningTasks 225 msgs, err = r.HandleTasks(nil) 226 require.Nil(t, err) 227 require.Len(t, msgs, 0) 228 require.Nil(t, r.runningTasks.GetV(spanz.TableIDToComparableSpan(1))) 229 } 230 231 func TestReplicationManagerMoveTable(t *testing.T) { 232 t.Parallel() 233 234 r := NewReplicationManager(10, model.ChangeFeedID{}) 235 moveTableCh := make(chan int, 1) 236 237 source := "1" 238 dest := "2" 239 240 // Ignore move table if it's not exist. 241 msgs, err := r.HandleTasks([]*ScheduleTask{{ 242 MoveTable: &MoveTable{Span: spanz.TableIDToComparableSpan(1), DestCapture: dest}, 243 Accept: func() { t.Fatal("must not accept") }, 244 }}) 245 require.Nil(t, err) 246 require.Len(t, msgs, 0) 247 248 // Add the table. 249 span := spanz.TableIDToComparableSpan(1) 250 tbl, err := NewReplicationSet(span, 0, map[string]*tablepb.TableStatus{ 251 source: {Span: span, State: tablepb.TableStateReplicating}, 252 }, model.ChangeFeedID{}) 253 require.Nil(t, err) 254 require.Equal(t, ReplicationSetStateReplicating, tbl.State) 255 r.spans.ReplaceOrInsert(spanz.TableIDToComparableSpan(1), tbl) 256 257 // Replicating -> Prepare 258 msgs, err = r.HandleTasks([]*ScheduleTask{{ 259 MoveTable: &MoveTable{Span: spanz.TableIDToComparableSpan(1), DestCapture: dest}, 260 Accept: func() { 261 moveTableCh <- 1 262 close(moveTableCh) 263 }, 264 }}) 265 require.Nil(t, err) 266 require.Len(t, msgs, 1) 267 require.EqualValues(t, &schedulepb.Message{ 268 To: dest, 269 MsgType: schedulepb.MsgDispatchTableRequest, 270 DispatchTableRequest: &schedulepb.DispatchTableRequest{ 271 Request: &schedulepb.DispatchTableRequest_AddTable{ 272 AddTable: &schedulepb.AddTableRequest{ 273 Span: span, 274 IsSecondary: true, 275 }, 276 }, 277 }, 278 }, msgs[0]) 279 require.NotNil(t, r.runningTasks.Has(spanz.TableIDToComparableSpan(1))) 280 require.Equal(t, 1, <-moveTableCh) 281 282 // Ignore if move table again. 283 msgs, err = r.HandleTasks([]*ScheduleTask{{ 284 MoveTable: &MoveTable{Span: spanz.TableIDToComparableSpan(1), DestCapture: dest}, 285 Accept: func() { 286 moveTableCh <- 1 287 close(moveTableCh) 288 }, 289 }}) 290 require.Nil(t, err) 291 require.Len(t, msgs, 0) 292 293 // Prepare -> Commit. 294 msgs, err = r.HandleMessage([]*schedulepb.Message{{ 295 From: dest, 296 MsgType: schedulepb.MsgDispatchTableResponse, 297 DispatchTableResponse: &schedulepb.DispatchTableResponse{ 298 Response: &schedulepb.DispatchTableResponse_AddTable{ 299 AddTable: &schedulepb.AddTableResponse{ 300 Status: &tablepb.TableStatus{ 301 Span: span, 302 State: tablepb.TableStatePrepared, 303 }, 304 }, 305 }, 306 }, 307 }}) 308 require.Nil(t, err) 309 require.Len(t, msgs, 1) 310 require.EqualValues(t, &schedulepb.Message{ 311 To: source, 312 MsgType: schedulepb.MsgDispatchTableRequest, 313 DispatchTableRequest: &schedulepb.DispatchTableRequest{ 314 Request: &schedulepb.DispatchTableRequest_RemoveTable{ 315 RemoveTable: &schedulepb.RemoveTableRequest{Span: span}, 316 }, 317 }, 318 }, msgs[0]) 319 320 // Source is removed, 321 // updates it's table status through heartbeat response. 322 msgs, err = r.HandleMessage([]*schedulepb.Message{{ 323 From: source, 324 MsgType: schedulepb.MsgHeartbeatResponse, 325 HeartbeatResponse: &schedulepb.HeartbeatResponse{ 326 Tables: []tablepb.TableStatus{{ 327 Span: span, 328 State: tablepb.TableStateStopped, 329 }}, 330 }, 331 }}) 332 require.Nil(t, err) 333 require.Len(t, msgs, 1) 334 require.EqualValues(t, &schedulepb.Message{ 335 To: dest, 336 MsgType: schedulepb.MsgDispatchTableRequest, 337 DispatchTableRequest: &schedulepb.DispatchTableRequest{ 338 Request: &schedulepb.DispatchTableRequest_AddTable{ 339 AddTable: &schedulepb.AddTableRequest{ 340 Span: span, 341 IsSecondary: false, 342 }, 343 }, 344 }, 345 }, msgs[0]) 346 347 // Commit -> Replicating 348 msgs, err = r.HandleMessage([]*schedulepb.Message{{ 349 From: dest, 350 MsgType: schedulepb.MsgDispatchTableResponse, 351 DispatchTableResponse: &schedulepb.DispatchTableResponse{ 352 Response: &schedulepb.DispatchTableResponse_AddTable{ 353 AddTable: &schedulepb.AddTableResponse{ 354 Status: &tablepb.TableStatus{ 355 Span: span, 356 State: tablepb.TableStateReplicating, 357 }, 358 }, 359 }, 360 }, 361 }}) 362 require.Nil(t, err) 363 require.Len(t, msgs, 0) 364 require.Equal( 365 t, ReplicationSetStateReplicating, r.spans.GetV(spanz.TableIDToComparableSpan(1)).State) 366 require.Equal(t, dest, r.spans.GetV(spanz.TableIDToComparableSpan(1)).Primary) 367 368 // Handle task again to clear runningTasks 369 msgs, err = r.HandleTasks(nil) 370 require.Nil(t, err) 371 require.Len(t, msgs, 0) 372 require.Nil(t, r.runningTasks.GetV(spanz.TableIDToComparableSpan(1))) 373 } 374 375 func TestReplicationManagerBurstBalance(t *testing.T) { 376 t.Parallel() 377 378 r := NewReplicationManager(1, model.ChangeFeedID{}) 379 balanceTableCh := make(chan int, 1) 380 381 // Burst balance is not limited by maxTaskConcurrency. 382 msgs, err := r.HandleTasks([]*ScheduleTask{{ 383 AddTable: &AddTable{ 384 Span: spanz.TableIDToComparableSpan(1), CaptureID: "0", CheckpointTs: 1, 385 }, 386 }, { 387 BurstBalance: &BurstBalance{ 388 AddTables: []AddTable{{ 389 Span: spanz.TableIDToComparableSpan(1), CaptureID: "1", CheckpointTs: 1, 390 }, { 391 Span: spanz.TableIDToComparableSpan(2), CaptureID: "2", CheckpointTs: 1, 392 }, { 393 Span: spanz.TableIDToComparableSpan(3), CaptureID: "3", CheckpointTs: 1, 394 }}, 395 }, 396 Accept: func() { 397 balanceTableCh <- 1 398 }, 399 }}) 400 require.Nil(t, err) 401 require.Equal(t, 1, <-balanceTableCh) 402 require.Len(t, msgs, 3) 403 for tableID, captureID := range map[model.TableID]model.CaptureID{ 404 1: "0", 2: "2", 3: "3", 405 } { 406 require.Contains(t, msgs, &schedulepb.Message{ 407 To: captureID, 408 MsgType: schedulepb.MsgDispatchTableRequest, 409 DispatchTableRequest: &schedulepb.DispatchTableRequest{ 410 Request: &schedulepb.DispatchTableRequest_AddTable{ 411 AddTable: &schedulepb.AddTableRequest{ 412 Span: spanz.TableIDToComparableSpan(tableID), 413 IsSecondary: true, 414 Checkpoint: tablepb.Checkpoint{ 415 CheckpointTs: 1, 416 ResolvedTs: 1, 417 }, 418 }, 419 }, 420 }, 421 }, msgs) 422 require.True(t, r.spans.Has(spanz.TableIDToComparableSpan(tableID))) 423 require.True(t, r.runningTasks.Has(spanz.TableIDToComparableSpan(tableID))) 424 } 425 426 // Add a new table. 427 span := spanz.TableIDToComparableSpan(5) 428 table5, err := NewReplicationSet(span, 0, map[string]*tablepb.TableStatus{ 429 "5": {Span: span, State: tablepb.TableStateReplicating}, 430 }, model.ChangeFeedID{}) 431 require.Nil(t, err) 432 r.spans.ReplaceOrInsert(span, table5) 433 434 // More burst balance is still allowed. 435 msgs, err = r.HandleTasks([]*ScheduleTask{{ 436 BurstBalance: &BurstBalance{ 437 AddTables: []AddTable{{ 438 Span: spanz.TableIDToComparableSpan(4), CaptureID: "4", CheckpointTs: 2, 439 }, { 440 Span: spanz.TableIDToComparableSpan(1), CaptureID: "0", CheckpointTs: 2, 441 }}, 442 RemoveTables: []RemoveTable{{ 443 Span: spanz.TableIDToComparableSpan(5), CaptureID: "5", 444 }, { 445 Span: spanz.TableIDToComparableSpan(1), CaptureID: "0", 446 }}, 447 }, 448 Accept: func() { 449 balanceTableCh <- 1 450 }, 451 }}) 452 require.Nil(t, err) 453 require.Equal(t, 1, <-balanceTableCh) 454 require.Len(t, msgs, 2) 455 require.Contains(t, msgs, &schedulepb.Message{ 456 To: "4", 457 MsgType: schedulepb.MsgDispatchTableRequest, 458 DispatchTableRequest: &schedulepb.DispatchTableRequest{ 459 Request: &schedulepb.DispatchTableRequest_AddTable{ 460 AddTable: &schedulepb.AddTableRequest{ 461 Span: spanz.TableIDToComparableSpan(4), 462 IsSecondary: true, 463 Checkpoint: tablepb.Checkpoint{ 464 CheckpointTs: 2, 465 ResolvedTs: 2, 466 }, 467 }, 468 }, 469 }, 470 }, msgs) 471 require.Contains(t, msgs, &schedulepb.Message{ 472 To: "5", 473 MsgType: schedulepb.MsgDispatchTableRequest, 474 DispatchTableRequest: &schedulepb.DispatchTableRequest{ 475 Request: &schedulepb.DispatchTableRequest_RemoveTable{ 476 RemoveTable: &schedulepb.RemoveTableRequest{ 477 Span: spanz.TableIDToComparableSpan(5), 478 }, 479 }, 480 }, 481 }, msgs) 482 } 483 484 func TestReplicationManagerBurstBalanceMoveTables(t *testing.T) { 485 t.Parallel() 486 487 r := NewReplicationManager(1, model.ChangeFeedID{}) 488 balanceTableCh := make(chan int, 1) 489 490 var err error 491 // Two tables in "1". 492 span := spanz.TableIDToComparableSpan(1) 493 table, err := NewReplicationSet(span, 0, map[string]*tablepb.TableStatus{ 494 "1": {Span: span, State: tablepb.TableStateReplicating}, 495 }, model.ChangeFeedID{}) 496 require.Nil(t, err) 497 r.spans.ReplaceOrInsert(span, table) 498 span2 := spanz.TableIDToComparableSpan(2) 499 table2, err := NewReplicationSet(span2, 0, map[string]*tablepb.TableStatus{ 500 "1": { 501 Span: span2, State: tablepb.TableStateReplicating, 502 Checkpoint: tablepb.Checkpoint{CheckpointTs: 1, ResolvedTs: 1}, 503 }, 504 }, model.ChangeFeedID{}) 505 require.Nil(t, err) 506 r.spans.ReplaceOrInsert(span2, table2) 507 508 msgs, err := r.HandleTasks([]*ScheduleTask{{ 509 BurstBalance: &BurstBalance{ 510 MoveTables: []MoveTable{{ 511 Span: spanz.TableIDToComparableSpan(2), DestCapture: "2", 512 }}, 513 }, 514 Accept: func() { 515 balanceTableCh <- 1 516 }, 517 }}) 518 require.Nil(t, err) 519 require.Equal(t, 1, <-balanceTableCh) 520 require.Len(t, msgs, 1) 521 require.Contains(t, msgs, &schedulepb.Message{ 522 To: "2", 523 MsgType: schedulepb.MsgDispatchTableRequest, 524 DispatchTableRequest: &schedulepb.DispatchTableRequest{ 525 Request: &schedulepb.DispatchTableRequest_AddTable{ 526 AddTable: &schedulepb.AddTableRequest{ 527 Span: span2, 528 IsSecondary: true, 529 Checkpoint: tablepb.Checkpoint{ 530 CheckpointTs: 1, 531 ResolvedTs: 1, 532 }, 533 }, 534 }, 535 }, 536 }, msgs) 537 require.True(t, r.spans.Has(span2)) 538 require.True(t, r.runningTasks.Has(spanz.TableIDToComparableSpan(2))) 539 } 540 541 func TestReplicationManagerMaxTaskConcurrency(t *testing.T) { 542 t.Parallel() 543 544 r := NewReplicationManager(1, model.ChangeFeedID{}) 545 addTableCh := make(chan int, 1) 546 547 msgs, err := r.HandleTasks([]*ScheduleTask{{ 548 AddTable: &AddTable{Span: spanz.TableIDToComparableSpan(1), CaptureID: "1"}, 549 Accept: func() { 550 addTableCh <- 1 551 close(addTableCh) 552 }, 553 }}) 554 require.Nil(t, err) 555 require.Len(t, msgs, 1) 556 require.EqualValues(t, &schedulepb.Message{ 557 To: "1", 558 MsgType: schedulepb.MsgDispatchTableRequest, 559 DispatchTableRequest: &schedulepb.DispatchTableRequest{ 560 Request: &schedulepb.DispatchTableRequest_AddTable{ 561 AddTable: &schedulepb.AddTableRequest{ 562 Span: spanz.TableIDToComparableSpan(1), 563 IsSecondary: true, 564 }, 565 }, 566 }, 567 }, msgs[0]) 568 require.NotNil(t, r.runningTasks.Has(spanz.TableIDToComparableSpan(1))) 569 require.Equal(t, 1, <-addTableCh) 570 571 // No more tasks allowed. 572 msgs, err = r.HandleTasks([]*ScheduleTask{{ 573 AddTable: &AddTable{Span: spanz.TableIDToComparableSpan(2), CaptureID: "1"}, 574 Accept: func() { 575 t.Fatal("must not accept") 576 }, 577 }}) 578 require.Nil(t, err) 579 require.Len(t, msgs, 0) 580 } 581 582 type mockRedoMetaManager struct { 583 util.Runnable 584 585 checkpointTs model.Ts 586 resolvedTs model.Ts 587 enable bool 588 } 589 590 func (m *mockRedoMetaManager) UpdateMeta(checkpointTs, resolvedTs model.Ts) { 591 } 592 593 func (m *mockRedoMetaManager) GetFlushedMeta() common.LogMeta { 594 return common.LogMeta{ 595 CheckpointTs: m.checkpointTs, 596 ResolvedTs: m.resolvedTs, 597 } 598 } 599 600 func (m *mockRedoMetaManager) Cleanup(ctx context.Context) error { 601 return nil 602 } 603 604 func (m *mockRedoMetaManager) Enabled() bool { 605 return m.enable 606 } 607 608 func (m *mockRedoMetaManager) Running() bool { 609 return true 610 } 611 612 func TestReplicationManagerAdvanceCheckpoint(t *testing.T) { 613 t.Parallel() 614 r := NewReplicationManager(1, model.ChangeFeedID{}) 615 span := spanz.TableIDToComparableSpan(1) 616 rs, err := NewReplicationSet(span, model.Ts(10), 617 map[model.CaptureID]*tablepb.TableStatus{ 618 "1": { 619 Span: spanz.TableIDToComparableSpan(1), 620 State: tablepb.TableStateReplicating, 621 Checkpoint: tablepb.Checkpoint{ 622 CheckpointTs: model.Ts(10), 623 ResolvedTs: model.Ts(20), 624 LastSyncedTs: model.Ts(15), 625 }, 626 Stats: tablepb.Stats{ 627 StageCheckpoints: map[string]tablepb.Checkpoint{ 628 "puller-egress": { 629 ResolvedTs: model.Ts(30), 630 }, 631 }, 632 }, 633 }, 634 }, model.ChangeFeedID{}) 635 require.NoError(t, err) 636 r.spans.ReplaceOrInsert(span, rs) 637 638 span2 := spanz.TableIDToComparableSpan(2) 639 rs, err = NewReplicationSet(span2, model.Ts(15), 640 map[model.CaptureID]*tablepb.TableStatus{ 641 "2": { 642 Span: spanz.TableIDToComparableSpan(2), 643 State: tablepb.TableStateReplicating, 644 Checkpoint: tablepb.Checkpoint{ 645 CheckpointTs: model.Ts(15), 646 ResolvedTs: model.Ts(30), 647 LastSyncedTs: model.Ts(20), 648 }, 649 Stats: tablepb.Stats{ 650 StageCheckpoints: map[string]tablepb.Checkpoint{ 651 "puller-egress": { 652 ResolvedTs: model.Ts(40), 653 }, 654 }, 655 }, 656 }, 657 }, model.ChangeFeedID{}) 658 require.NoError(t, err) 659 r.spans.ReplaceOrInsert(span2, rs) 660 661 redoMetaManager := &mockRedoMetaManager{enable: false} 662 663 // no tables are replicating, resolvedTs should be advanced to globalBarrierTs and checkpoint 664 // should be advanced to minTableBarrierTs. 665 currentTables := &TableRanges{} 666 watermark := r.AdvanceCheckpoint( 667 currentTables, time.Now(), schedulepb.NewBarrierWithMinTs(5), redoMetaManager) 668 require.Equal(t, model.Ts(5), watermark.CheckpointTs) 669 require.Equal(t, model.Ts(5), watermark.ResolvedTs) 670 require.Equal(t, model.Ts(0), watermark.LastSyncedTs) 671 require.Equal(t, model.Ts(math.MaxUint64), watermark.PullerResolvedTs) 672 673 // all tables are replicating 674 currentTables.UpdateTables([]model.TableID{1, 2}) 675 watermark = r.AdvanceCheckpoint( 676 currentTables, time.Now(), schedulepb.NewBarrierWithMinTs(30), redoMetaManager) 677 require.Equal(t, model.Ts(10), watermark.CheckpointTs) 678 require.Equal(t, model.Ts(20), watermark.ResolvedTs) 679 require.Equal(t, model.Ts(20), watermark.LastSyncedTs) 680 require.Equal(t, model.Ts(30), watermark.PullerResolvedTs) 681 682 // some table not exist yet. 683 currentTables.UpdateTables([]model.TableID{1, 2, 3}) 684 watermark = r.AdvanceCheckpoint( 685 currentTables, time.Now(), schedulepb.NewBarrierWithMinTs(30), redoMetaManager) 686 require.Equal(t, checkpointCannotProceed, watermark.CheckpointTs) 687 require.Equal(t, checkpointCannotProceed, watermark.ResolvedTs) 688 require.Equal(t, checkpointCannotProceed, watermark.LastSyncedTs) 689 require.Equal(t, checkpointCannotProceed, watermark.PullerResolvedTs) 690 691 span3 := spanz.TableIDToComparableSpan(3) 692 rs, err = NewReplicationSet(span3, model.Ts(5), 693 map[model.CaptureID]*tablepb.TableStatus{ 694 "1": { 695 Span: spanz.TableIDToComparableSpan(3), 696 State: tablepb.TableStateReplicating, 697 Checkpoint: tablepb.Checkpoint{ 698 CheckpointTs: model.Ts(5), 699 ResolvedTs: model.Ts(40), 700 LastSyncedTs: model.Ts(30), 701 }, 702 Stats: tablepb.Stats{ 703 StageCheckpoints: map[string]tablepb.Checkpoint{ 704 "puller-egress": { 705 ResolvedTs: model.Ts(50), 706 }, 707 }, 708 }, 709 }, 710 "2": { 711 Span: spanz.TableIDToComparableSpan(3), 712 State: tablepb.TableStatePreparing, 713 Checkpoint: tablepb.Checkpoint{ 714 CheckpointTs: model.Ts(5), 715 ResolvedTs: model.Ts(40), 716 LastSyncedTs: model.Ts(32), 717 }, 718 Stats: tablepb.Stats{ 719 StageCheckpoints: map[string]tablepb.Checkpoint{ 720 "puller-egress": { 721 ResolvedTs: model.Ts(50), 722 }, 723 }, 724 }, 725 }, 726 }, model.ChangeFeedID{}) 727 require.NoError(t, err) 728 r.spans.ReplaceOrInsert(span3, rs) 729 watermark = r.AdvanceCheckpoint( 730 currentTables, time.Now(), schedulepb.NewBarrierWithMinTs(30), redoMetaManager) 731 require.Equal(t, model.Ts(5), watermark.CheckpointTs) 732 require.Equal(t, model.Ts(20), watermark.ResolvedTs) 733 require.Equal(t, model.Ts(32), watermark.LastSyncedTs) 734 require.Equal(t, model.Ts(30), watermark.PullerResolvedTs) 735 736 currentTables.UpdateTables([]model.TableID{1, 2, 3, 4}) 737 span4 := spanz.TableIDToComparableSpan(4) 738 rs, err = NewReplicationSet(span4, model.Ts(3), 739 map[model.CaptureID]*tablepb.TableStatus{ 740 "1": { 741 Span: spanz.TableIDToComparableSpan(4), 742 State: tablepb.TableStatePrepared, 743 Checkpoint: tablepb.Checkpoint{ 744 CheckpointTs: model.Ts(3), 745 ResolvedTs: model.Ts(10), 746 LastSyncedTs: model.Ts(5), 747 }, 748 Stats: tablepb.Stats{ 749 StageCheckpoints: map[string]tablepb.Checkpoint{ 750 "puller-egress": { 751 ResolvedTs: model.Ts(12), 752 }, 753 }, 754 }, 755 }, 756 }, model.ChangeFeedID{}) 757 require.NoError(t, err) 758 r.spans.ReplaceOrInsert(span4, rs) 759 watermark = r.AdvanceCheckpoint( 760 currentTables, time.Now(), schedulepb.NewBarrierWithMinTs(30), redoMetaManager) 761 require.Equal(t, model.Ts(3), watermark.CheckpointTs) 762 require.Equal(t, model.Ts(10), watermark.ResolvedTs) 763 require.Equal(t, model.Ts(32), watermark.LastSyncedTs) 764 require.Equal(t, model.Ts(12), watermark.PullerResolvedTs) 765 766 // Split table 5 into 2 spans. 767 currentTables.UpdateTables([]model.TableID{1, 2, 3, 4, 5}) 768 span5_1 := spanz.TableIDToComparableSpan(5) 769 span5_1.EndKey = append(span5_1.StartKey, 0) 770 span5_2 := spanz.TableIDToComparableSpan(5) 771 span5_2.StartKey = append(span5_2.StartKey, 0) 772 for _, span := range []tablepb.Span{span5_1, span5_2} { 773 rs, err = NewReplicationSet(span, model.Ts(3), 774 map[model.CaptureID]*tablepb.TableStatus{ 775 "1": { 776 Span: span, 777 State: tablepb.TableStatePrepared, 778 Checkpoint: tablepb.Checkpoint{ 779 CheckpointTs: model.Ts(3), 780 ResolvedTs: model.Ts(10), 781 LastSyncedTs: model.Ts(8), 782 }, 783 Stats: tablepb.Stats{ 784 StageCheckpoints: map[string]tablepb.Checkpoint{ 785 "puller-egress": { 786 ResolvedTs: model.Ts(11), 787 }, 788 }, 789 }, 790 }, 791 }, model.ChangeFeedID{}) 792 require.NoError(t, err) 793 r.spans.ReplaceOrInsert(span, rs) 794 } 795 watermark = r.AdvanceCheckpoint( 796 currentTables, time.Now(), schedulepb.NewBarrierWithMinTs(30), redoMetaManager) 797 require.Equal(t, model.Ts(3), watermark.CheckpointTs) 798 require.Equal(t, model.Ts(10), watermark.ResolvedTs) 799 require.Equal(t, model.Ts(32), watermark.LastSyncedTs) 800 require.Equal(t, model.Ts(11), watermark.PullerResolvedTs) 801 802 // The start span is missing 803 rs5_1, _ := r.spans.Delete(span5_1) 804 watermark = r.AdvanceCheckpoint( 805 currentTables, time.Now(), schedulepb.NewBarrierWithMinTs(30), redoMetaManager) 806 require.Equal(t, checkpointCannotProceed, watermark.CheckpointTs) 807 require.Equal(t, checkpointCannotProceed, watermark.ResolvedTs) 808 require.Equal(t, checkpointCannotProceed, watermark.LastSyncedTs) 809 require.Equal(t, checkpointCannotProceed, watermark.PullerResolvedTs) 810 811 // The end span is missing 812 r.spans.ReplaceOrInsert(span5_1, rs5_1) 813 r.spans.Delete(span5_2) 814 watermark = r.AdvanceCheckpoint( 815 currentTables, time.Now(), schedulepb.NewBarrierWithMinTs(30), redoMetaManager) 816 require.Equal(t, checkpointCannotProceed, watermark.CheckpointTs) 817 require.Equal(t, checkpointCannotProceed, watermark.ResolvedTs) 818 require.Equal(t, checkpointCannotProceed, watermark.LastSyncedTs) 819 require.Equal(t, checkpointCannotProceed, watermark.PullerResolvedTs) 820 821 // redo is enabled 822 currentTables.UpdateTables([]model.TableID{4}) 823 spanRedo := spanz.TableIDToComparableSpan(4) 824 rs, err = NewReplicationSet(spanRedo, model.Ts(3), 825 map[model.CaptureID]*tablepb.TableStatus{ 826 "1": { 827 Span: spanz.TableIDToComparableSpan(4), 828 State: tablepb.TableStatePrepared, 829 Checkpoint: tablepb.Checkpoint{ 830 CheckpointTs: model.Ts(10), 831 ResolvedTs: model.Ts(15), 832 LastSyncedTs: model.Ts(12), 833 }, 834 Stats: tablepb.Stats{ 835 StageCheckpoints: map[string]tablepb.Checkpoint{ 836 "puller-egress": { 837 ResolvedTs: model.Ts(16), 838 }, 839 }, 840 }, 841 }, 842 }, model.ChangeFeedID{}) 843 require.NoError(t, err) 844 r.spans.ReplaceOrInsert(spanRedo, rs) 845 barrier := schedulepb.NewBarrierWithMinTs(30) 846 redoMetaManager.enable = true 847 redoMetaManager.resolvedTs = 9 848 watermark = r.AdvanceCheckpoint( 849 currentTables, time.Now(), barrier, redoMetaManager) 850 require.Equal(t, model.Ts(9), watermark.ResolvedTs) 851 require.Equal(t, model.Ts(9), watermark.CheckpointTs) 852 require.Equal(t, model.Ts(12), watermark.LastSyncedTs) 853 require.Equal(t, model.Ts(16), watermark.PullerResolvedTs) 854 require.Equal(t, model.Ts(9), barrier.GetGlobalBarrierTs()) 855 } 856 857 func TestReplicationManagerAdvanceCheckpointWithRedoEnabled(t *testing.T) { 858 t.Parallel() 859 r := NewReplicationManager(1, model.ChangeFeedID{}) 860 span := spanz.TableIDToComparableSpan(1) 861 rs, err := NewReplicationSet(span, model.Ts(10), 862 map[model.CaptureID]*tablepb.TableStatus{ 863 "1": { 864 Span: spanz.TableIDToComparableSpan(1), 865 State: tablepb.TableStateReplicating, 866 Checkpoint: tablepb.Checkpoint{ 867 CheckpointTs: model.Ts(10), 868 ResolvedTs: model.Ts(20), 869 LastSyncedTs: model.Ts(12), 870 }, 871 Stats: tablepb.Stats{ 872 StageCheckpoints: map[string]tablepb.Checkpoint{ 873 "puller-egress": { 874 ResolvedTs: model.Ts(26), 875 }, 876 }, 877 }, 878 }, 879 }, model.ChangeFeedID{}) 880 require.NoError(t, err) 881 r.spans.ReplaceOrInsert(span, rs) 882 883 span2 := spanz.TableIDToComparableSpan(2) 884 rs, err = NewReplicationSet(span2, model.Ts(15), 885 map[model.CaptureID]*tablepb.TableStatus{ 886 "2": { 887 Span: spanz.TableIDToComparableSpan(2), 888 State: tablepb.TableStateReplicating, 889 Checkpoint: tablepb.Checkpoint{ 890 CheckpointTs: model.Ts(15), 891 ResolvedTs: model.Ts(30), 892 LastSyncedTs: model.Ts(18), 893 }, 894 Stats: tablepb.Stats{ 895 StageCheckpoints: map[string]tablepb.Checkpoint{ 896 "puller-egress": { 897 ResolvedTs: model.Ts(39), 898 }, 899 }, 900 }, 901 }, 902 }, model.ChangeFeedID{}) 903 require.NoError(t, err) 904 r.spans.ReplaceOrInsert(span2, rs) 905 906 redoMetaManager := &mockRedoMetaManager{enable: true, resolvedTs: 25} 907 908 // some table not exist yet with redo is enabled. 909 currentTables := &TableRanges{} 910 currentTables.UpdateTables([]model.TableID{1, 2, 3}) 911 barrier := schedulepb.NewBarrierWithMinTs(30) 912 watermark := r.AdvanceCheckpoint( 913 currentTables, 914 time.Now(), barrier, redoMetaManager) 915 require.Equal(t, checkpointCannotProceed, watermark.CheckpointTs) 916 require.Equal(t, checkpointCannotProceed, watermark.ResolvedTs) 917 require.Equal(t, checkpointCannotProceed, watermark.LastSyncedTs) 918 require.Equal(t, checkpointCannotProceed, watermark.PullerResolvedTs) 919 require.Equal(t, uint64(25), barrier.Barrier.GetGlobalBarrierTs()) 920 } 921 922 func TestReplicationManagerHandleCaptureChanges(t *testing.T) { 923 t.Parallel() 924 925 r := NewReplicationManager(1, model.ChangeFeedID{}) 926 init := map[model.CaptureID][]tablepb.TableStatus{ 927 "1": {{Span: spanz.TableIDToComparableSpan(1), State: tablepb.TableStateReplicating}}, 928 "2": {{Span: spanz.TableIDToComparableSpan(2), State: tablepb.TableStateReplicating}}, 929 "3": { 930 {Span: spanz.TableIDToComparableSpan(3), State: tablepb.TableStateReplicating}, 931 {Span: spanz.TableIDToComparableSpan(2), State: tablepb.TableStatePreparing}, 932 }, 933 "4": {{Span: spanz.TableIDToComparableSpan(4), State: tablepb.TableStateStopping}}, 934 "5": {{Span: spanz.TableIDToComparableSpan(5), State: tablepb.TableStateStopped}}, 935 } 936 msgs, err := r.HandleCaptureChanges(init, nil, 0) 937 require.Nil(t, err) 938 require.Len(t, msgs, 0) 939 require.Equal(t, r.spans.Len(), 5) 940 require.Equal( 941 t, ReplicationSetStateReplicating, r.spans.GetV(spanz.TableIDToComparableSpan(1)).State) 942 require.Equal( 943 t, ReplicationSetStatePrepare, r.spans.GetV(spanz.TableIDToComparableSpan(2)).State) 944 require.Equal( 945 t, ReplicationSetStateReplicating, r.spans.GetV(spanz.TableIDToComparableSpan(3)).State) 946 require.Equal( 947 t, ReplicationSetStateRemoving, r.spans.GetV(spanz.TableIDToComparableSpan(4)).State) 948 require.Equal( 949 t, ReplicationSetStateAbsent, r.spans.GetV(spanz.TableIDToComparableSpan(5)).State) 950 951 removed := map[string][]tablepb.TableStatus{ 952 "1": {{Span: spanz.TableIDToComparableSpan(1), State: tablepb.TableStateReplicating}}, 953 } 954 msgs, err = r.HandleCaptureChanges(nil, removed, 0) 955 require.Nil(t, err) 956 require.Len(t, msgs, 0) 957 require.Equal(t, r.spans.Len(), 5) 958 require.Equal( 959 t, ReplicationSetStateAbsent, r.spans.GetV(spanz.TableIDToComparableSpan(1)).State) 960 require.Equal( 961 t, ReplicationSetStatePrepare, r.spans.GetV(spanz.TableIDToComparableSpan(2)).State) 962 require.Equal( 963 t, ReplicationSetStateReplicating, r.spans.GetV(spanz.TableIDToComparableSpan(3)).State) 964 require.Equal( 965 t, ReplicationSetStateRemoving, r.spans.GetV(spanz.TableIDToComparableSpan(4)).State) 966 require.Equal( 967 t, ReplicationSetStateAbsent, r.spans.GetV(spanz.TableIDToComparableSpan(5)).State) 968 } 969 970 func TestReplicationManagerHandleCaptureChangesDuringAddTable(t *testing.T) { 971 t.Parallel() 972 973 r := NewReplicationManager(1, model.ChangeFeedID{}) 974 addTableCh := make(chan int, 1) 975 976 msgs, err := r.HandleTasks([]*ScheduleTask{{ 977 AddTable: &AddTable{Span: spanz.TableIDToComparableSpan(1), CaptureID: "1"}, 978 Accept: func() { 979 addTableCh <- 1 980 }, 981 }}) 982 require.Nil(t, err) 983 require.Len(t, msgs, 1) 984 require.NotNil(t, r.runningTasks.Has(spanz.TableIDToComparableSpan(1))) 985 require.Equal(t, 1, <-addTableCh) 986 987 removed := map[string][]tablepb.TableStatus{ 988 "1": {{Span: spanz.TableIDToComparableSpan(1), State: tablepb.TableStatePreparing}}, 989 } 990 msgs, err = r.HandleCaptureChanges(nil, removed, 0) 991 require.Nil(t, err) 992 require.Len(t, msgs, 0) 993 require.Equal(t, r.spans.Len(), 1) 994 require.Equal( 995 t, ReplicationSetStateAbsent, r.spans.GetV(spanz.TableIDToComparableSpan(1)).State) 996 require.Nil(t, r.runningTasks.GetV(spanz.TableIDToComparableSpan(1))) 997 998 // New task must be accepted. 999 msgs, err = r.HandleTasks([]*ScheduleTask{{ 1000 AddTable: &AddTable{Span: spanz.TableIDToComparableSpan(1), CaptureID: "1"}, 1001 Accept: func() { 1002 addTableCh <- 1 1003 }, 1004 }}) 1005 require.Nil(t, err) 1006 require.Len(t, msgs, 1) 1007 require.NotNil(t, r.runningTasks.Has(spanz.TableIDToComparableSpan(1))) 1008 require.Equal(t, 1, <-addTableCh) 1009 } 1010 1011 func TestLogSlowTableInfo(t *testing.T) { 1012 t.Parallel() 1013 r := NewReplicationManager(1, model.ChangeFeedID{}) 1014 r.spans.ReplaceOrInsert(spanz.TableIDToComparableSpan(1), &ReplicationSet{ 1015 Span: spanz.TableIDToComparableSpan(1), 1016 Checkpoint: tablepb.Checkpoint{CheckpointTs: 1}, 1017 State: ReplicationSetStateReplicating, 1018 }) 1019 r.spans.ReplaceOrInsert(spanz.TableIDToComparableSpan(2), &ReplicationSet{ 1020 Span: spanz.TableIDToComparableSpan(2), 1021 Checkpoint: tablepb.Checkpoint{CheckpointTs: 2}, 1022 State: ReplicationSetStatePrepare, 1023 }) 1024 r.spans.ReplaceOrInsert(spanz.TableIDToComparableSpan(3), &ReplicationSet{ 1025 Span: spanz.TableIDToComparableSpan(3), 1026 Checkpoint: tablepb.Checkpoint{CheckpointTs: 3}, 1027 State: ReplicationSetStatePrepare, 1028 }) 1029 r.logSlowTableInfo(time.Now()) 1030 // make sure all tables are will be pop out from heal after logged 1031 require.Equal(t, r.slowTableHeap.Len(), 0) 1032 r.spans.ReplaceOrInsert(spanz.TableIDToComparableSpan(4), &ReplicationSet{ 1033 Span: spanz.TableIDToComparableSpan(4), 1034 Checkpoint: tablepb.Checkpoint{CheckpointTs: 4}, 1035 State: ReplicationSetStatePrepare, 1036 }) 1037 r.spans.ReplaceOrInsert(spanz.TableIDToComparableSpan(5), &ReplicationSet{ 1038 Span: spanz.TableIDToComparableSpan(5), 1039 Checkpoint: tablepb.Checkpoint{CheckpointTs: 5}, 1040 State: ReplicationSetStatePrepare, 1041 }) 1042 r.spans.ReplaceOrInsert(spanz.TableIDToComparableSpan(6), &ReplicationSet{ 1043 Span: spanz.TableIDToComparableSpan(6), 1044 Checkpoint: tablepb.Checkpoint{CheckpointTs: 6}, 1045 State: ReplicationSetStatePrepare, 1046 }) 1047 r.spans.ReplaceOrInsert(spanz.TableIDToComparableSpan(7), &ReplicationSet{ 1048 Span: spanz.TableIDToComparableSpan(7), 1049 Checkpoint: tablepb.Checkpoint{CheckpointTs: 7}, 1050 State: ReplicationSetStatePrepare, 1051 }) 1052 r.spans.ReplaceOrInsert(spanz.TableIDToComparableSpan(8), &ReplicationSet{ 1053 Span: spanz.TableIDToComparableSpan(8), 1054 Checkpoint: tablepb.Checkpoint{CheckpointTs: 8}, 1055 State: ReplicationSetStatePrepare, 1056 }) 1057 r.spans.ReplaceOrInsert(spanz.TableIDToComparableSpan(9), &ReplicationSet{ 1058 Span: spanz.TableIDToComparableSpan(9), 1059 Checkpoint: tablepb.Checkpoint{CheckpointTs: 9}, 1060 State: ReplicationSetStatePrepare, 1061 }) 1062 r.spans.ReplaceOrInsert(spanz.TableIDToComparableSpan(1), &ReplicationSet{ 1063 Span: spanz.TableIDToComparableSpan(10), 1064 Checkpoint: tablepb.Checkpoint{CheckpointTs: 10}, 1065 State: ReplicationSetStatePrepare, 1066 }) 1067 r.logSlowTableInfo(time.Now()) 1068 // make sure the slowTableHeap's capacity will not extend 1069 require.Equal(t, cap(r.slowTableHeap), 8) 1070 }