github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/cdc/owner_test.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package cdc 15 16 import ( 17 "bytes" 18 "context" 19 "fmt" 20 "net/url" 21 "sync" 22 "sync/atomic" 23 "time" 24 25 "github.com/google/uuid" 26 "github.com/pingcap/check" 27 "github.com/pingcap/errors" 28 timodel "github.com/pingcap/parser/model" 29 "github.com/pingcap/parser/mysql" 30 "github.com/pingcap/parser/types" 31 "github.com/pingcap/ticdc/cdc/entry" 32 "github.com/pingcap/ticdc/cdc/kv" 33 "github.com/pingcap/ticdc/cdc/model" 34 "github.com/pingcap/ticdc/pkg/config" 35 cdcContext "github.com/pingcap/ticdc/pkg/context" 36 cerror "github.com/pingcap/ticdc/pkg/errors" 37 "github.com/pingcap/ticdc/pkg/etcd" 38 "github.com/pingcap/ticdc/pkg/filter" 39 "github.com/pingcap/ticdc/pkg/security" 40 "github.com/pingcap/ticdc/pkg/util" 41 "github.com/pingcap/ticdc/pkg/util/testleak" 42 "github.com/pingcap/tidb/meta" 43 "github.com/pingcap/tidb/store/mockstore" 44 "github.com/pingcap/tidb/store/tikv/oracle" 45 pd "github.com/tikv/pd/client" 46 "go.etcd.io/etcd/clientv3" 47 "go.etcd.io/etcd/clientv3/concurrency" 48 "go.etcd.io/etcd/embed" 49 "golang.org/x/sync/errgroup" 50 ) 51 52 const TiKVGCLifeTime = 10 * 60 * time.Second // 10 min 53 54 type ownerSuite struct { 55 e *embed.Etcd 56 clientURL *url.URL 57 client kv.CDCEtcdClient 58 ctx context.Context 59 cancel context.CancelFunc 60 errg *errgroup.Group 61 } 62 63 var _ = check.Suite(&ownerSuite{}) 64 65 func (s *ownerSuite) SetUpTest(c *check.C) { 66 dir := c.MkDir() 67 var err error 68 s.clientURL, s.e, err = etcd.SetupEmbedEtcd(dir) 69 c.Assert(err, check.IsNil) 70 client, err := clientv3.New(clientv3.Config{ 71 Endpoints: []string{s.clientURL.String()}, 72 DialTimeout: 3 * time.Second, 73 }) 74 c.Assert(err, check.IsNil) 75 s.client = kv.NewCDCEtcdClient(context.TODO(), client) 76 s.ctx, s.cancel = context.WithCancel(context.Background()) 77 s.errg = util.HandleErrWithErrGroup(s.ctx, s.e.Err(), func(e error) { c.Log(e) }) 78 } 79 80 func (s *ownerSuite) TearDownTest(c *check.C) { 81 s.e.Close() 82 s.cancel() 83 err := s.errg.Wait() 84 if err != nil { 85 c.Errorf("Error group error: %s", err) 86 } 87 s.client.Close() //nolint:errcheck 88 } 89 90 type mockPDClient struct { 91 pd.Client 92 invokeCounter int 93 mockSafePointLost bool 94 mockPDFailure bool 95 mockTiKVGCLifeTime bool 96 } 97 98 func (m *mockPDClient) GetTS(ctx context.Context) (int64, int64, error) { 99 if m.mockPDFailure { 100 return 0, 0, errors.New("injected PD failure") 101 } 102 if m.mockSafePointLost { 103 return 0, 0, nil 104 } 105 return oracle.GetPhysical(time.Now()), 0, nil 106 } 107 108 func (m *mockPDClient) UpdateServiceGCSafePoint(ctx context.Context, serviceID string, ttl int64, safePoint uint64) (uint64, error) { 109 m.invokeCounter++ 110 111 if m.mockSafePointLost { 112 return 1000, nil 113 } 114 if m.mockPDFailure { 115 return 0, errors.New("injected PD failure") 116 } 117 if m.mockTiKVGCLifeTime { 118 Ts := oracle.GoTimeToTS(time.Now().Add(-TiKVGCLifeTime)) 119 return Ts, nil 120 } 121 return safePoint, nil 122 } 123 124 type mockSink struct { 125 AsyncSink 126 checkpointTs model.Ts 127 128 checkpointMu sync.Mutex 129 checkpointError error 130 } 131 132 func (m *mockSink) EmitCheckpointTs(ctx cdcContext.Context, ts uint64) error { 133 m.checkpointMu.Lock() 134 defer m.checkpointMu.Unlock() 135 atomic.StoreUint64(&m.checkpointTs, ts) 136 return m.checkpointError 137 } 138 139 func (m *mockSink) Close(ctx context.Context) error { 140 return nil 141 } 142 143 func (m *mockSink) Barrier(ctx context.Context) error { 144 return nil 145 } 146 147 // Test whether the owner can tolerate sink caused error, it won't be killed. 148 // also set the specific changefeed to stop 149 func (s *ownerSuite) TestOwnerCalcResolvedTs(c *check.C) { 150 defer testleak.AfterTest(c)() 151 mockPDCli := &mockPDClient{} 152 153 sink := &mockSink{checkpointError: cerror.ErrKafkaSendMessage} 154 changeFeeds := map[model.ChangeFeedID]*changeFeed{ 155 "test_change_feed_1": { 156 info: &model.ChangeFeedInfo{State: model.StateNormal}, 157 etcdCli: s.client, 158 status: &model.ChangeFeedStatus{ 159 CheckpointTs: 0, 160 }, 161 targetTs: 2000, 162 ddlResolvedTs: 2000, 163 ddlState: model.ChangeFeedSyncDML, 164 taskStatus: model.ProcessorsInfos{ 165 "capture_1": {}, 166 "capture_2": {}, 167 }, 168 taskPositions: map[string]*model.TaskPosition{ 169 "capture_1": { 170 CheckPointTs: 2333, 171 ResolvedTs: 2333, 172 }, 173 "capture_2": { 174 CheckPointTs: 2333, 175 ResolvedTs: 2333, 176 }, 177 }, 178 sink: sink, 179 }, 180 } 181 182 session, err := concurrency.NewSession(s.client.Client.Unwrap(), 183 concurrency.WithTTL(config.GetDefaultServerConfig().CaptureSessionTTL)) 184 c.Assert(err, check.IsNil) 185 mockOwner := Owner{ 186 session: session, 187 pdClient: mockPDCli, 188 etcdClient: s.client, 189 lastFlushChangefeeds: time.Now(), 190 flushChangefeedInterval: 1 * time.Hour, 191 changeFeeds: changeFeeds, 192 cfRWriter: s.client, 193 stoppedFeeds: make(map[model.ChangeFeedID]*model.ChangeFeedStatus), 194 minGCSafePointCache: minGCSafePointCacheEntry{}, 195 } 196 197 err = mockOwner.calcResolvedTs(s.ctx) 198 c.Assert(err, check.IsNil) 199 200 err = mockOwner.handleDDL(s.ctx) 201 c.Assert(err, check.IsNil) 202 203 err = mockOwner.handleAdminJob(s.ctx) 204 c.Assert(err, check.IsNil) 205 c.Assert(mockOwner.stoppedFeeds["test_change_feed_1"], check.NotNil) 206 207 err = mockOwner.flushChangeFeedInfos(s.ctx) 208 c.Assert(err, check.IsNil) 209 c.Assert(mockPDCli.invokeCounter, check.Equals, 1) 210 211 s.TearDownTest(c) 212 } 213 214 func (s *ownerSuite) TestOwnerFlushChangeFeedInfos(c *check.C) { 215 defer testleak.AfterTest(c)() 216 session, err := concurrency.NewSession(s.client.Client.Unwrap(), 217 concurrency.WithTTL(config.GetDefaultServerConfig().CaptureSessionTTL)) 218 c.Assert(err, check.IsNil) 219 mockPDCli := &mockPDClient{} 220 mockOwner := Owner{ 221 session: session, 222 etcdClient: s.client, 223 pdClient: mockPDCli, 224 gcSafepointLastUpdate: time.Now(), 225 } 226 227 err = mockOwner.flushChangeFeedInfos(s.ctx) 228 c.Assert(err, check.IsNil) 229 c.Assert(mockPDCli.invokeCounter, check.Equals, 1) 230 s.TearDownTest(c) 231 } 232 233 func (s *ownerSuite) TestOwnerFlushChangeFeedInfosFailed(c *check.C) { 234 defer testleak.AfterTest(c)() 235 mockPDCli := &mockPDClient{ 236 mockPDFailure: true, 237 } 238 239 changeFeeds := map[model.ChangeFeedID]*changeFeed{ 240 "test_change_feed_1": { 241 info: &model.ChangeFeedInfo{State: model.StateNormal}, 242 status: &model.ChangeFeedStatus{ 243 CheckpointTs: 100, 244 }, 245 targetTs: 2000, 246 ddlState: model.ChangeFeedSyncDML, 247 taskStatus: model.ProcessorsInfos{ 248 "capture_1": {}, 249 "capture_2": {}, 250 }, 251 taskPositions: map[string]*model.TaskPosition{ 252 "capture_1": {}, 253 "capture_2": {}, 254 }, 255 }, 256 } 257 258 session, err := concurrency.NewSession(s.client.Client.Unwrap(), 259 concurrency.WithTTL(config.GetDefaultServerConfig().CaptureSessionTTL)) 260 c.Assert(err, check.IsNil) 261 mockOwner := Owner{ 262 session: session, 263 pdClient: mockPDCli, 264 etcdClient: s.client, 265 lastFlushChangefeeds: time.Now(), 266 flushChangefeedInterval: 1 * time.Hour, 267 gcSafepointLastUpdate: time.Now(), 268 gcTTL: 6, // 6 seconds 269 changeFeeds: changeFeeds, 270 } 271 272 time.Sleep(3 * time.Second) 273 err = mockOwner.flushChangeFeedInfos(s.ctx) 274 c.Assert(err, check.IsNil) 275 c.Assert(mockPDCli.invokeCounter, check.Equals, 1) 276 277 time.Sleep(6 * time.Second) 278 err = mockOwner.flushChangeFeedInfos(s.ctx) 279 c.Assert(err, check.ErrorMatches, ".*CDC:ErrUpdateServiceSafepointFailed.*") 280 c.Assert(mockPDCli.invokeCounter, check.Equals, 2) 281 282 s.TearDownTest(c) 283 } 284 285 // Test whether it is possible to successfully create a changefeed 286 // with startTs less than currentTs - gcTTL when tikv_gc_life_time is greater than gc-ttl 287 func (s *ownerSuite) TestTiKVGCLifeTimeLargeThanGCTTL(c *check.C) { 288 defer testleak.AfterTest(c) 289 mockPDCli := &mockPDClient{} 290 mockPDCli.mockTiKVGCLifeTime = true 291 292 changeFeeds := map[model.ChangeFeedID]*changeFeed{ 293 "test_change_feed_1": { 294 info: &model.ChangeFeedInfo{State: model.StateNormal}, 295 etcdCli: s.client, 296 status: &model.ChangeFeedStatus{ 297 CheckpointTs: oracle.GoTimeToTS(time.Now().Add(-6 * time.Second)), 298 }, 299 targetTs: 2000, 300 ddlState: model.ChangeFeedSyncDML, 301 taskStatus: model.ProcessorsInfos{ 302 "capture_1": {}, 303 "capture_2": {}, 304 }, 305 taskPositions: map[string]*model.TaskPosition{ 306 "capture_1": {}, 307 "capture_2": {}, 308 }, 309 }, 310 } 311 312 session, err := concurrency.NewSession(s.client.Client.Unwrap(), 313 concurrency.WithTTL(config.GetDefaultServerConfig().CaptureSessionTTL)) 314 c.Assert(err, check.IsNil) 315 316 mockOwner := Owner{ 317 session: session, 318 pdClient: mockPDCli, 319 etcdClient: s.client, 320 lastFlushChangefeeds: time.Now(), 321 flushChangefeedInterval: 1 * time.Hour, 322 // gcSafepointLastUpdate: time.Now(), 323 gcTTL: 6, // 6 seconds 324 changeFeeds: changeFeeds, 325 cfRWriter: s.client, 326 stoppedFeeds: make(map[model.ChangeFeedID]*model.ChangeFeedStatus), 327 minGCSafePointCache: minGCSafePointCacheEntry{}, 328 } 329 330 err = mockOwner.flushChangeFeedInfos(s.ctx) 331 c.Assert(err, check.IsNil) 332 c.Assert(mockPDCli.invokeCounter, check.Equals, 1) 333 334 err = mockOwner.handleAdminJob(s.ctx) 335 c.Assert(err, check.IsNil) 336 c.Assert(mockOwner.stoppedFeeds["test_change_feed_1"], check.IsNil) 337 c.Assert(mockOwner.changeFeeds["test_change_feed_1"].info.State, check.Equals, model.StateNormal) 338 339 time.Sleep(7 * time.Second) // wait for gcTTL time pass 340 err = mockOwner.flushChangeFeedInfos(s.ctx) 341 c.Assert(err, check.IsNil) 342 c.Assert(mockPDCli.invokeCounter, check.Equals, 2) 343 344 err = mockOwner.handleAdminJob(s.ctx) 345 c.Assert(err, check.IsNil) 346 c.Assert(mockOwner.stoppedFeeds["test_change_feed_1"], check.IsNil) 347 348 s.TearDownTest(c) 349 } 350 351 // Test whether the owner handles the stagnant task correctly, so that it can't block the update of gcSafePoint. 352 // If a changefeed is put into the stop queue due to stagnation, it can no longer affect the update of gcSafePoint. 353 // So we just need to test whether the stagnant changefeed is put into the stop queue. 354 func (s *ownerSuite) TestOwnerHandleStaleChangeFeed(c *check.C) { 355 defer testleak.AfterTest(c)() 356 mockPDCli := &mockPDClient{} 357 changeFeeds := map[model.ChangeFeedID]*changeFeed{ 358 "test_change_feed_1": { 359 info: &model.ChangeFeedInfo{State: model.StateNormal}, 360 etcdCli: s.client, 361 status: &model.ChangeFeedStatus{ 362 CheckpointTs: 1000, 363 }, 364 targetTs: 2000, 365 ddlState: model.ChangeFeedSyncDML, 366 taskStatus: model.ProcessorsInfos{ 367 "capture_1": {}, 368 "capture_2": {}, 369 }, 370 taskPositions: map[string]*model.TaskPosition{ 371 "capture_1": {}, 372 "capture_2": {}, 373 }, 374 }, 375 "test_change_feed_2": { 376 info: &model.ChangeFeedInfo{State: model.StateNormal}, 377 etcdCli: s.client, 378 status: &model.ChangeFeedStatus{ 379 CheckpointTs: oracle.EncodeTSO(oracle.GetPhysical(time.Now())), 380 }, 381 targetTs: 2000, 382 ddlState: model.ChangeFeedSyncDML, 383 taskStatus: model.ProcessorsInfos{ 384 "capture_1": {}, 385 "capture_2": {}, 386 }, 387 taskPositions: map[string]*model.TaskPosition{ 388 "capture_1": {}, 389 "capture_2": {}, 390 }, 391 }, 392 } 393 394 session, err := concurrency.NewSession(s.client.Client.Unwrap(), 395 concurrency.WithTTL(config.GetDefaultServerConfig().CaptureSessionTTL)) 396 c.Assert(err, check.IsNil) 397 398 mockOwner := Owner{ 399 session: session, 400 pdClient: mockPDCli, 401 etcdClient: s.client, 402 lastFlushChangefeeds: time.Now(), 403 flushChangefeedInterval: 1 * time.Hour, 404 gcSafepointLastUpdate: time.Now().Add(-4 * time.Second), 405 gcTTL: 6, // 6 seconds 406 changeFeeds: changeFeeds, 407 cfRWriter: s.client, 408 stoppedFeeds: make(map[model.ChangeFeedID]*model.ChangeFeedStatus), 409 minGCSafePointCache: minGCSafePointCacheEntry{}, 410 } 411 412 err = mockOwner.flushChangeFeedInfos(s.ctx) 413 c.Assert(err, check.IsNil) 414 c.Assert(mockPDCli.invokeCounter, check.Equals, 1) 415 err = mockOwner.handleAdminJob(s.ctx) 416 c.Assert(err, check.IsNil) 417 418 time.Sleep(2 * time.Second) 419 err = mockOwner.flushChangeFeedInfos(s.ctx) 420 c.Assert(err, check.IsNil) 421 c.Assert(mockPDCli.invokeCounter, check.Equals, 2) 422 err = mockOwner.handleAdminJob(s.ctx) 423 c.Assert(err, check.IsNil) 424 425 c.Assert(mockOwner.stoppedFeeds["test_change_feed_1"], check.NotNil) 426 c.Assert(mockOwner.changeFeeds["test_change_feed_2"].info.State, check.Equals, model.StateNormal) 427 428 time.Sleep(6 * time.Second) 429 err = mockOwner.flushChangeFeedInfos(s.ctx) 430 c.Assert(err, check.IsNil) 431 c.Assert(mockPDCli.invokeCounter, check.Equals, 3) 432 err = mockOwner.handleAdminJob(s.ctx) 433 c.Assert(err, check.IsNil) 434 435 time.Sleep(2 * time.Second) 436 err = mockOwner.flushChangeFeedInfos(s.ctx) 437 c.Assert(err, check.IsNil) 438 c.Assert(mockPDCli.invokeCounter, check.Equals, 4) 439 err = mockOwner.handleAdminJob(s.ctx) 440 c.Assert(err, check.IsNil) 441 c.Assert(mockOwner.stoppedFeeds["test_change_feed_2"], check.NotNil) 442 443 s.TearDownTest(c) 444 } 445 446 func (s *ownerSuite) TestOwnerUploadGCSafePointOutdated(c *check.C) { 447 defer testleak.AfterTest(c)() 448 mockPDCli := &mockPDClient{ 449 mockSafePointLost: true, 450 } 451 changeFeeds := map[model.ChangeFeedID]*changeFeed{ 452 "test_change_feed_1": { 453 info: &model.ChangeFeedInfo{State: model.StateNormal}, 454 etcdCli: s.client, 455 status: &model.ChangeFeedStatus{ 456 CheckpointTs: 100, 457 }, 458 targetTs: 2000, 459 ddlState: model.ChangeFeedSyncDML, 460 taskStatus: model.ProcessorsInfos{ 461 "capture_1": {}, 462 "capture_2": {}, 463 }, 464 taskPositions: map[string]*model.TaskPosition{ 465 "capture_1": {}, 466 "capture_2": {}, 467 }, 468 }, 469 "test_change_feed_2": { 470 info: &model.ChangeFeedInfo{State: model.StateNormal}, 471 etcdCli: s.client, 472 status: &model.ChangeFeedStatus{ 473 CheckpointTs: 1100, 474 }, 475 targetTs: 2000, 476 ddlState: model.ChangeFeedSyncDML, 477 taskStatus: model.ProcessorsInfos{ 478 "capture_1": {}, 479 "capture_2": {}, 480 }, 481 taskPositions: map[string]*model.TaskPosition{ 482 "capture_1": {}, 483 "capture_2": {}, 484 }, 485 }, 486 } 487 488 session, err := concurrency.NewSession(s.client.Client.Unwrap(), 489 concurrency.WithTTL(config.GetDefaultServerConfig().CaptureSessionTTL)) 490 c.Assert(err, check.IsNil) 491 492 mockOwner := Owner{ 493 pdClient: mockPDCli, 494 session: session, 495 etcdClient: s.client, 496 lastFlushChangefeeds: time.Now(), 497 flushChangefeedInterval: 1 * time.Hour, 498 changeFeeds: changeFeeds, 499 cfRWriter: s.client, 500 stoppedFeeds: make(map[model.ChangeFeedID]*model.ChangeFeedStatus), 501 minGCSafePointCache: minGCSafePointCacheEntry{}, 502 } 503 504 err = mockOwner.flushChangeFeedInfos(s.ctx) 505 c.Assert(err, check.IsNil) 506 c.Assert(mockPDCli.invokeCounter, check.Equals, 1) 507 508 err = mockOwner.handleAdminJob(s.ctx) 509 c.Assert(err, check.IsNil) 510 511 c.Assert(mockOwner.stoppedFeeds["test_change_feed_1"], check.NotNil) 512 c.Assert(changeFeeds["test_change_feed_2"].info.State, check.Equals, model.StateNormal) 513 s.TearDownTest(c) 514 } 515 516 /* 517 type handlerForPrueDMLTest struct { 518 mu sync.RWMutex 519 index int 520 resolvedTs1 []uint64 521 resolvedTs2 []uint64 522 expectResolvedTs []uint64 523 c *check.C 524 cancel func() 525 } 526 527 func (h *handlerForPrueDMLTest) PullDDL() (resolvedTs uint64, ddl []*model.DDL, err error) { 528 return uint64(math.MaxUint64), nil, nil 529 } 530 531 func (h *handlerForPrueDMLTest) ExecDDL(context.Context, string, map[string]string, model.SingleTableTxn) error { 532 panic("unreachable") 533 } 534 535 func (h *handlerForPrueDMLTest) Close() error { 536 return nil 537 } 538 539 var _ ChangeFeedRWriter = &handlerForPrueDMLTest{} 540 541 func (h *handlerForPrueDMLTest) GetChangeFeeds(ctx context.Context) (int64, map[string]*mvccpb.KeyValue, error) { 542 h.mu.RLock() 543 defer h.mu.RUnlock() 544 cfInfo := &model.ChangeFeedInfo{ 545 TargetTs: 100, 546 } 547 cfInfoJSON, err := cfInfo.Marshal() 548 h.c.Assert(err, check.IsNil) 549 rawKV := &mvccpb.KeyValue{ 550 Value: []byte(cfInfoJSON), 551 } 552 return 0, map[model.ChangeFeedID]*mvccpb.KeyValue{ 553 "test_change_feed": rawKV, 554 }, nil 555 } 556 557 func (h *handlerForPrueDMLTest) GetAllTaskStatus(ctx context.Context, changefeedID string) (model.ProcessorsInfos, error) { 558 if changefeedID != "test_change_feed" { 559 return nil, cerror.ErrTaskStatusNotExists.GenWithStackByArgs("test_change_feed) 560 } 561 h.mu.RLock() 562 defer h.mu.RUnlock() 563 h.index++ 564 return model.ProcessorsInfos{ 565 "capture_1": {}, 566 "capture_2": {}, 567 }, nil 568 } 569 570 func (h *handlerForPrueDMLTest) GetAllTaskPositions(ctx context.Context, changefeedID string) (map[string]*model.TaskPosition, error) { 571 if changefeedID != "test_change_feed" { 572 return nil, cerror.ErrTaskStatusNotExists.GenWithStackByArgs("test_change_feed) 573 } 574 h.mu.RLock() 575 defer h.mu.RUnlock() 576 h.index++ 577 return map[string]*model.TaskPosition{ 578 "capture_1": { 579 ResolvedTs: h.resolvedTs1[h.index], 580 }, 581 "capture_2": { 582 ResolvedTs: h.resolvedTs2[h.index], 583 }, 584 }, nil 585 } 586 587 func (h *handlerForPrueDMLTest) GetChangeFeedStatus(ctx context.Context, id string) (*model.ChangeFeedStatus, error) { 588 return nil, cerror.ErrChangeFeedNotExists.GenWithStackByArgs(id) 589 } 590 591 func (h *handlerForPrueDMLTest) PutAllChangeFeedStatus(ctx context.Context, infos map[model.ChangeFeedID]*model.ChangeFeedStatus) error { 592 h.mu.Lock() 593 defer h.mu.Unlock() 594 info, exist := infos["test_change_feed"] 595 h.c.Assert(exist, check.IsTrue) 596 h.c.Assert(info.ResolvedTs, check.Equals, h.expectResolvedTs[h.index]) 597 // h.c.Assert(info.State, check.Equals, model.ChangeFeedSyncDML) 598 if h.index >= len(h.expectResolvedTs)-1 { 599 log.Info("cancel") 600 h.cancel() 601 } 602 return nil 603 } 604 605 func (s *ownerSuite) TestPureDML(c *check.C) { 606 defer testleak.AfterTest(c)() 607 ctx, cancel := context.WithCancel(context.Background()) 608 handler := &handlerForPrueDMLTest{ 609 index: -1, 610 resolvedTs1: []uint64{10, 22, 64, 92, 99, 120}, 611 resolvedTs2: []uint64{8, 36, 53, 88, 103, 108}, 612 expectResolvedTs: []uint64{8, 22, 53, 88, 99, 100}, 613 cancel: cancel, 614 c: c, 615 } 616 617 tables := map[uint64]model.TableName{1: {Schema: "any"}} 618 619 changeFeeds := map[model.ChangeFeedID]*changeFeed{ 620 "test_change_feed": { 621 tables: tables, 622 status: &model.ChangeFeedStatus{}, 623 targetTs: 100, 624 ddlState: model.ChangeFeedSyncDML, 625 taskStatus: model.ProcessorsInfos{ 626 "capture_1": {}, 627 "capture_2": {}, 628 }, 629 taskPositions: map[string]*model.TaskPosition{ 630 "capture_1": {}, 631 "capture_2": {}, 632 }, 633 ddlHandler: handler, 634 }, 635 } 636 637 manager := roles.NewMockManager(uuid.New().String(), cancel) 638 err := manager.CampaignOwner(ctx) 639 c.Assert(err, check.IsNil) 640 owner := &ownerImpl{ 641 cancelWatchCapture: cancel, 642 changeFeeds: changeFeeds, 643 cfRWriter: handler, 644 etcdClient: s.client, 645 manager: manager, 646 } 647 s.owner = owner 648 err = owner.Run(ctx, 50*time.Millisecond) 649 c.Assert(err.Error(), check.Equals, "context canceled") 650 } 651 652 type handlerForDDLTest struct { 653 mu sync.RWMutex 654 655 ddlIndex int 656 ddls []*model.DDL 657 ddlResolvedTs []uint64 658 659 ddlExpectIndex int 660 661 dmlIndex int 662 resolvedTs1 []uint64 663 resolvedTs2 []uint64 664 currentGlobalResolvedTs uint64 665 666 dmlExpectIndex int 667 expectResolvedTs []uint64 668 expectStatus []model.ChangeFeedDDLState 669 670 c *check.C 671 cancel func() 672 } 673 674 func (h *handlerForDDLTest) PullDDL() (resolvedTs uint64, jobs []*model.DDL, err error) { 675 h.mu.RLock() 676 defer h.mu.RUnlock() 677 if h.ddlIndex < len(h.ddls)-1 { 678 h.ddlIndex++ 679 } 680 return h.ddlResolvedTs[h.ddlIndex], []*model.DDL{h.ddls[h.ddlIndex]}, nil 681 } 682 683 func (h *handlerForDDLTest) ExecDDL(ctx context.Context, sinkURI string, _ map[string]string, txn model.SingleTableTxn) error { 684 h.mu.Lock() 685 defer h.mu.Unlock() 686 h.ddlExpectIndex++ 687 h.c.Assert(txn.DDL, check.DeepEquals, h.ddls[h.ddlExpectIndex]) 688 h.c.Assert(txn.DDL.Job.BinlogInfo.FinishedTS, check.Equals, h.currentGlobalResolvedTs) 689 return nil 690 } 691 692 func (h *handlerForDDLTest) Close() error { 693 return nil 694 } 695 696 func (h *handlerForDDLTest) GetChangeFeeds(ctx context.Context) (int64, map[string]*mvccpb.KeyValue, error) { 697 h.mu.RLock() 698 defer h.mu.RUnlock() 699 cfInfo := &model.ChangeFeedInfo{ 700 TargetTs: 100, 701 } 702 cfInfoJSON, err := cfInfo.Marshal() 703 h.c.Assert(err, check.IsNil) 704 rawKV := &mvccpb.KeyValue{ 705 Value: []byte(cfInfoJSON), 706 } 707 return 0, map[model.ChangeFeedID]*mvccpb.KeyValue{ 708 "test_change_feed": rawKV, 709 }, nil 710 } 711 712 func (h *handlerForDDLTest) GetAllTaskStatus(ctx context.Context, changefeedID string) (model.ProcessorsInfos, error) { 713 if changefeedID != "test_change_feed" { 714 return nil, cerror.ErrTaskStatusNotExists.GenWithStackByArgs("test_change_feed") 715 } 716 h.mu.RLock() 717 defer h.mu.RUnlock() 718 if h.dmlIndex < len(h.resolvedTs1)-1 { 719 h.dmlIndex++ 720 } 721 return model.ProcessorsInfos{ 722 "capture_1": {}, 723 "capture_2": {}, 724 }, nil 725 } 726 727 func (h *handlerForDDLTest) GetAllTaskPositions(ctx context.Context, changefeedID string) (map[string]*model.TaskPosition, error) { 728 if changefeedID != "test_change_feed" { 729 return nil, cerror.ErrTaskStatusNotExists.GenWithStackByArgs("test_change_feed") 730 } 731 h.mu.RLock() 732 defer h.mu.RUnlock() 733 if h.dmlIndex < len(h.resolvedTs1)-1 { 734 h.dmlIndex++ 735 } 736 return map[string]*model.TaskPosition{ 737 "capture_1": { 738 ResolvedTs: h.resolvedTs1[h.dmlIndex], 739 CheckPointTs: h.currentGlobalResolvedTs, 740 }, 741 "capture_2": { 742 ResolvedTs: h.resolvedTs2[h.dmlIndex], 743 CheckPointTs: h.currentGlobalResolvedTs, 744 }, 745 }, nil 746 } 747 748 func (h *handlerForDDLTest) GetChangeFeedStatus(ctx context.Context, id string) (*model.ChangeFeedStatus, error) { 749 return nil, cerror.ErrChangeFeedNotExists.GenWithStackByArgs(id) 750 } 751 752 func (h *handlerForDDLTest) PutAllChangeFeedStatus(ctx context.Context, infos map[model.ChangeFeedID]*model.ChangeFeedStatus) error { 753 h.mu.Lock() 754 defer h.mu.Unlock() 755 h.dmlExpectIndex++ 756 info, exist := infos["test_change_feed"] 757 h.c.Assert(exist, check.IsTrue) 758 h.currentGlobalResolvedTs = info.ResolvedTs 759 h.c.Assert(info.ResolvedTs, check.Equals, h.expectResolvedTs[h.dmlExpectIndex]) 760 // h.c.Assert(info.State, check.Equals, h.expectStatus[h.dmlExpectIndex]) 761 if h.dmlExpectIndex >= len(h.expectResolvedTs)-1 { 762 log.Info("cancel") 763 h.cancel() 764 } 765 return nil 766 } 767 768 func (s *ownerSuite) TestDDL(c *check.C) { 769 defer testleak.AfterTest(c)() 770 ctx, cancel := context.WithCancel(context.Background()) 771 772 handler := &handlerForDDLTest{ 773 ddlIndex: -1, 774 ddlResolvedTs: []uint64{5, 8, 49, 91, 113}, 775 ddls: []*model.DDL{ 776 {Job: &timodel.Job{ 777 ID: 1, 778 BinlogInfo: &timodel.HistoryInfo{ 779 FinishedTS: 3, 780 }, 781 }}, 782 {Job: &timodel.Job{ 783 ID: 2, 784 BinlogInfo: &timodel.HistoryInfo{ 785 FinishedTS: 7, 786 }, 787 }}, 788 {Job: &timodel.Job{ 789 ID: 3, 790 BinlogInfo: &timodel.HistoryInfo{ 791 FinishedTS: 11, 792 }, 793 }}, 794 {Job: &timodel.Job{ 795 ID: 4, 796 BinlogInfo: &timodel.HistoryInfo{ 797 FinishedTS: 89, 798 }, 799 }}, 800 {Job: &timodel.Job{ 801 ID: 5, 802 BinlogInfo: &timodel.HistoryInfo{ 803 FinishedTS: 111, 804 }, 805 }}, 806 }, 807 808 ddlExpectIndex: -1, 809 810 dmlIndex: -1, 811 resolvedTs1: []uint64{10, 22, 64, 92, 99, 120}, 812 resolvedTs2: []uint64{8, 36, 53, 88, 103, 108}, 813 currentGlobalResolvedTs: 0, 814 815 dmlExpectIndex: -1, 816 expectResolvedTs: []uint64{ 817 3, 3, 818 7, 7, 819 11, 11, 820 89, 89, 821 100}, 822 expectStatus: []model.ChangeFeedDDLState{ 823 model.ChangeFeedWaitToExecDDL, model.ChangeFeedExecDDL, 824 model.ChangeFeedWaitToExecDDL, model.ChangeFeedExecDDL, 825 model.ChangeFeedWaitToExecDDL, model.ChangeFeedExecDDL, 826 model.ChangeFeedWaitToExecDDL, model.ChangeFeedExecDDL, 827 model.ChangeFeedSyncDML}, 828 829 cancel: cancel, 830 c: c, 831 } 832 833 tables := map[uint64]model.TableName{1: {Schema: "any"}} 834 835 filter, err := newTxnFilter(&model.ReplicaConfig{}) 836 c.Assert(err, check.IsNil) 837 changeFeeds := map[model.ChangeFeedID]*changeFeed{ 838 "test_change_feed": { 839 tables: tables, 840 info: &model.ChangeFeedInfo{}, 841 status: &model.ChangeFeedStatus{}, 842 targetTs: 100, 843 ddlState: model.ChangeFeedSyncDML, 844 taskStatus: model.ProcessorsInfos{ 845 "capture_1": {}, 846 "capture_2": {}, 847 }, 848 taskPositions: map[string]*model.TaskPosition{ 849 "capture_1": {}, 850 "capture_2": {}, 851 }, 852 ddlHandler: handler, 853 filter: filter, 854 }, 855 } 856 857 manager := roles.NewMockManager(uuid.New().String(), cancel) 858 err = manager.CampaignOwner(ctx) 859 c.Assert(err, check.IsNil) 860 owner := &ownerImpl{ 861 cancelWatchCapture: cancel, 862 changeFeeds: changeFeeds, 863 864 // ddlHandler: handler, 865 etcdClient: s.client, 866 cfRWriter: handler, 867 manager: manager, 868 } 869 s.owner = owner 870 err = owner.Run(ctx, 50*time.Millisecond) 871 c.Assert(errors.Cause(err), check.DeepEquals, context.Canceled) 872 } 873 */ 874 var cdcGCSafePointTTL4Test = int64(24 * 60 * 60) 875 876 func (s *ownerSuite) TestHandleAdmin(c *check.C) { 877 defer testleak.AfterTest(c)() 878 defer s.TearDownTest(c) 879 cfID := "test_handle_admin" 880 881 ctx, cancel0 := context.WithCancel(context.Background()) 882 defer cancel0() 883 cctx, cancel := context.WithCancel(ctx) 884 errg, _ := errgroup.WithContext(cctx) 885 886 replicaConf := config.GetDefaultReplicaConfig() 887 888 sampleCF := &changeFeed{ 889 id: cfID, 890 info: &model.ChangeFeedInfo{Config: replicaConf, SinkURI: "blackhole://"}, 891 status: &model.ChangeFeedStatus{}, 892 ddlState: model.ChangeFeedSyncDML, 893 taskStatus: model.ProcessorsInfos{ 894 "capture_1": {}, 895 "capture_2": {}, 896 }, 897 taskPositions: map[string]*model.TaskPosition{ 898 "capture_1": {ResolvedTs: 10001}, 899 "capture_2": {}, 900 }, 901 ddlHandler: &ddlHandler{ 902 cancel: cancel, 903 wg: errg, 904 }, 905 cancel: cancel, 906 } 907 // new asyncSink 908 cdcCtx := cdcContext.NewContext(ctx, &cdcContext.GlobalVars{}) 909 cdcCtx = cdcContext.WithChangefeedVars(cdcCtx, &cdcContext.ChangefeedVars{ 910 ID: cfID, 911 Info: sampleCF.info, 912 }) 913 sink, err := newAsyncSink(cdcCtx) 914 c.Assert(err, check.IsNil) 915 defer sink.Close(cctx) //nolint:errcheck 916 sampleCF.sink = sink 917 918 capture, err := NewCapture(ctx, []string{s.clientURL.String()}, nil, nil) 919 c.Assert(err, check.IsNil) 920 err = capture.Campaign(ctx) 921 c.Assert(err, check.IsNil) 922 923 grpcPool := kv.NewGrpcPoolImpl(ctx, &security.Credential{}) 924 defer grpcPool.Close() 925 owner, err := NewOwner(ctx, nil, grpcPool, capture.session, cdcGCSafePointTTL4Test, time.Millisecond*200) 926 c.Assert(err, check.IsNil) 927 928 sampleCF.etcdCli = owner.etcdClient 929 owner.changeFeeds = map[model.ChangeFeedID]*changeFeed{cfID: sampleCF} 930 for cid, pinfo := range sampleCF.taskPositions { 931 key := kv.GetEtcdKeyTaskStatus(cfID, cid) 932 pinfoStr, err := pinfo.Marshal() 933 c.Assert(err, check.IsNil) 934 _, err = s.client.Client.Put(ctx, key, pinfoStr) 935 c.Assert(err, check.IsNil) 936 } 937 err = owner.etcdClient.PutChangeFeedStatus(ctx, cfID, &model.ChangeFeedStatus{}) 938 c.Assert(err, check.IsNil) 939 err = owner.etcdClient.SaveChangeFeedInfo(ctx, sampleCF.info, cfID) 940 c.Assert(err, check.IsNil) 941 checkAdminJobLen := func(length int) { 942 owner.adminJobsLock.Lock() 943 c.Assert(owner.adminJobs, check.HasLen, length) 944 owner.adminJobsLock.Unlock() 945 } 946 947 c.Assert(owner.EnqueueJob(model.AdminJob{CfID: cfID, Type: model.AdminStop}), check.IsNil) 948 checkAdminJobLen(1) 949 c.Assert(owner.handleAdminJob(ctx), check.IsNil) 950 checkAdminJobLen(0) 951 c.Assert(len(owner.changeFeeds), check.Equals, 0) 952 // check changefeed info is set admin job 953 info, err := owner.etcdClient.GetChangeFeedInfo(ctx, cfID) 954 c.Assert(err, check.IsNil) 955 c.Assert(info.AdminJobType, check.Equals, model.AdminStop) 956 // check processor is set admin job 957 for cid := range sampleCF.taskPositions { 958 _, subInfo, err := owner.etcdClient.GetTaskStatus(ctx, cfID, cid) 959 c.Assert(err, check.IsNil) 960 c.Assert(subInfo.AdminJobType, check.Equals, model.AdminStop) 961 } 962 // check changefeed status is set admin job 963 st, _, err := owner.etcdClient.GetChangeFeedStatus(ctx, cfID) 964 c.Assert(err, check.IsNil) 965 c.Assert(st.AdminJobType, check.Equals, model.AdminStop) 966 // check changefeed context is canceled 967 select { 968 case <-cctx.Done(): 969 default: 970 c.Fatal("changefeed context is expected canceled") 971 } 972 973 cctx, cancel = context.WithCancel(ctx) 974 sampleCF.cancel = cancel 975 976 c.Assert(owner.EnqueueJob(model.AdminJob{CfID: cfID, Type: model.AdminResume}), check.IsNil) 977 c.Assert(owner.handleAdminJob(ctx), check.IsNil) 978 checkAdminJobLen(0) 979 // check changefeed info is set admin job 980 info, err = owner.etcdClient.GetChangeFeedInfo(ctx, cfID) 981 c.Assert(err, check.IsNil) 982 c.Assert(info.AdminJobType, check.Equals, model.AdminResume) 983 // check changefeed status is set admin job 984 st, _, err = owner.etcdClient.GetChangeFeedStatus(ctx, cfID) 985 c.Assert(err, check.IsNil) 986 c.Assert(st.AdminJobType, check.Equals, model.AdminResume) 987 988 owner.changeFeeds[cfID] = sampleCF 989 c.Assert(owner.EnqueueJob(model.AdminJob{CfID: cfID, Type: model.AdminRemove}), check.IsNil) 990 c.Assert(owner.handleAdminJob(ctx), check.IsNil) 991 checkAdminJobLen(0) 992 c.Assert(len(owner.changeFeeds), check.Equals, 0) 993 // check changefeed info is deleted 994 _, err = owner.etcdClient.GetChangeFeedInfo(ctx, cfID) 995 c.Assert(cerror.ErrChangeFeedNotExists.Equal(err), check.IsTrue) 996 // check processor is set admin job 997 for cid := range sampleCF.taskPositions { 998 _, subInfo, err := owner.etcdClient.GetTaskStatus(ctx, cfID, cid) 999 c.Assert(err, check.IsNil) 1000 c.Assert(subInfo.AdminJobType, check.Equals, model.AdminRemove) 1001 } 1002 // check changefeed status is set admin job 1003 st, _, err = owner.etcdClient.GetChangeFeedStatus(ctx, cfID) 1004 c.Assert(err, check.IsNil) 1005 c.Assert(st.AdminJobType, check.Equals, model.AdminRemove) 1006 // check changefeed context is canceled 1007 select { 1008 case <-cctx.Done(): 1009 default: 1010 c.Fatal("changefeed context is expected canceled") 1011 } 1012 owner.etcdClient.Close() //nolint:errcheck 1013 } 1014 1015 func (s *ownerSuite) TestChangefeedApplyDDLJob(c *check.C) { 1016 defer testleak.AfterTest(c)() 1017 var ( 1018 jobs = []*timodel.Job{ 1019 { 1020 ID: 1, 1021 SchemaID: 1, 1022 Type: timodel.ActionCreateSchema, 1023 State: timodel.JobStateSynced, 1024 Query: "create database test", 1025 BinlogInfo: &timodel.HistoryInfo{ 1026 SchemaVersion: 1, 1027 DBInfo: &timodel.DBInfo{ 1028 ID: 1, 1029 Name: timodel.NewCIStr("test"), 1030 }, 1031 }, 1032 }, 1033 { 1034 ID: 2, 1035 SchemaID: 1, 1036 Type: timodel.ActionCreateTable, 1037 State: timodel.JobStateSynced, 1038 Query: "create table t1 (id int primary key)", 1039 BinlogInfo: &timodel.HistoryInfo{ 1040 SchemaVersion: 2, 1041 DBInfo: &timodel.DBInfo{ 1042 ID: 1, 1043 Name: timodel.NewCIStr("test"), 1044 }, 1045 TableInfo: &timodel.TableInfo{ 1046 ID: 47, 1047 Name: timodel.NewCIStr("t1"), 1048 PKIsHandle: true, 1049 Columns: []*timodel.ColumnInfo{ 1050 {ID: 1, FieldType: types.FieldType{Flag: mysql.PriKeyFlag}, State: timodel.StatePublic}, 1051 }, 1052 }, 1053 }, 1054 }, 1055 { 1056 ID: 2, 1057 SchemaID: 1, 1058 Type: timodel.ActionCreateTable, 1059 State: timodel.JobStateSynced, 1060 Query: "create table t2 (id int primary key)", 1061 BinlogInfo: &timodel.HistoryInfo{ 1062 SchemaVersion: 2, 1063 DBInfo: &timodel.DBInfo{ 1064 ID: 1, 1065 Name: timodel.NewCIStr("test"), 1066 }, 1067 TableInfo: &timodel.TableInfo{ 1068 ID: 49, 1069 Name: timodel.NewCIStr("t2"), 1070 PKIsHandle: true, 1071 Columns: []*timodel.ColumnInfo{ 1072 {ID: 1, FieldType: types.FieldType{Flag: mysql.PriKeyFlag}, State: timodel.StatePublic}, 1073 }, 1074 }, 1075 }, 1076 }, 1077 { 1078 ID: 2, 1079 SchemaID: 1, 1080 TableID: 49, 1081 Type: timodel.ActionDropTable, 1082 State: timodel.JobStateSynced, 1083 Query: "drop table t2", 1084 BinlogInfo: &timodel.HistoryInfo{ 1085 SchemaVersion: 3, 1086 DBInfo: &timodel.DBInfo{ 1087 ID: 1, 1088 Name: timodel.NewCIStr("test"), 1089 }, 1090 TableInfo: &timodel.TableInfo{ 1091 ID: 49, 1092 Name: timodel.NewCIStr("t2"), 1093 }, 1094 }, 1095 }, 1096 { 1097 ID: 2, 1098 SchemaID: 1, 1099 TableID: 47, 1100 Type: timodel.ActionTruncateTable, 1101 State: timodel.JobStateSynced, 1102 Query: "truncate table t1", 1103 BinlogInfo: &timodel.HistoryInfo{ 1104 SchemaVersion: 4, 1105 DBInfo: &timodel.DBInfo{ 1106 ID: 1, 1107 Name: timodel.NewCIStr("test"), 1108 }, 1109 TableInfo: &timodel.TableInfo{ 1110 ID: 51, 1111 Name: timodel.NewCIStr("t1"), 1112 PKIsHandle: true, 1113 Columns: []*timodel.ColumnInfo{ 1114 {ID: 1, FieldType: types.FieldType{Flag: mysql.PriKeyFlag}, State: timodel.StatePublic}, 1115 }, 1116 }, 1117 }, 1118 }, 1119 { 1120 ID: 2, 1121 SchemaID: 1, 1122 TableID: 51, 1123 Type: timodel.ActionDropTable, 1124 State: timodel.JobStateSynced, 1125 Query: "drop table t1", 1126 BinlogInfo: &timodel.HistoryInfo{ 1127 SchemaVersion: 5, 1128 DBInfo: &timodel.DBInfo{ 1129 ID: 1, 1130 Name: timodel.NewCIStr("test"), 1131 }, 1132 TableInfo: &timodel.TableInfo{ 1133 ID: 51, 1134 Name: timodel.NewCIStr("t1"), 1135 }, 1136 }, 1137 }, 1138 { 1139 ID: 2, 1140 SchemaID: 1, 1141 Type: timodel.ActionDropSchema, 1142 State: timodel.JobStateSynced, 1143 Query: "drop database test", 1144 BinlogInfo: &timodel.HistoryInfo{ 1145 SchemaVersion: 6, 1146 DBInfo: &timodel.DBInfo{ 1147 ID: 1, 1148 Name: timodel.NewCIStr("test"), 1149 }, 1150 }, 1151 }, 1152 } 1153 1154 expectSchemas = []map[int64]tableIDMap{ 1155 {1: make(tableIDMap)}, 1156 {1: {47: struct{}{}}}, 1157 {1: {47: struct{}{}, 49: struct{}{}}}, 1158 {1: {47: struct{}{}}}, 1159 {1: {51: struct{}{}}}, 1160 {1: make(tableIDMap)}, 1161 {}, 1162 } 1163 1164 expectTables = []map[int64]model.TableName{ 1165 {}, 1166 {47: {Schema: "test", Table: "t1"}}, 1167 {47: {Schema: "test", Table: "t1"}, 49: {Schema: "test", Table: "t2"}}, 1168 {47: {Schema: "test", Table: "t1"}}, 1169 {51: {Schema: "test", Table: "t1"}}, 1170 {}, 1171 {}, 1172 } 1173 ) 1174 f, err := filter.NewFilter(config.GetDefaultReplicaConfig()) 1175 c.Assert(err, check.IsNil) 1176 1177 store, err := mockstore.NewMockStore() 1178 c.Assert(err, check.IsNil) 1179 defer func() { 1180 _ = store.Close() 1181 }() 1182 1183 txn, err := store.Begin() 1184 c.Assert(err, check.IsNil) 1185 defer func() { 1186 _ = txn.Rollback() 1187 }() 1188 t := meta.NewMeta(txn) 1189 1190 schemaSnap, err := entry.NewSingleSchemaSnapshotFromMeta(t, 0, false) 1191 c.Assert(err, check.IsNil) 1192 1193 cf := &changeFeed{ 1194 schema: schemaSnap, 1195 schemas: make(map[model.SchemaID]tableIDMap), 1196 tables: make(map[model.TableID]model.TableName), 1197 partitions: make(map[model.TableID][]int64), 1198 orphanTables: make(map[model.TableID]model.Ts), 1199 toCleanTables: make(map[model.TableID]model.Ts), 1200 filter: f, 1201 info: &model.ChangeFeedInfo{Config: config.GetDefaultReplicaConfig()}, 1202 } 1203 for i, job := range jobs { 1204 err = cf.schema.HandleDDL(job) 1205 c.Assert(err, check.IsNil) 1206 err = cf.schema.FillSchemaName(job) 1207 c.Assert(err, check.IsNil) 1208 _, err = cf.applyJob(job) 1209 c.Assert(err, check.IsNil) 1210 c.Assert(cf.schemas, check.DeepEquals, expectSchemas[i]) 1211 c.Assert(cf.tables, check.DeepEquals, expectTables[i]) 1212 } 1213 s.TearDownTest(c) 1214 } 1215 1216 func (s *ownerSuite) TestWatchCampaignKey(c *check.C) { 1217 defer testleak.AfterTest(c)() 1218 defer s.TearDownTest(c) 1219 ctx, cancel := context.WithCancel(context.Background()) 1220 defer cancel() 1221 capture, err := NewCapture(ctx, []string{s.clientURL.String()}, nil, nil) 1222 c.Assert(err, check.IsNil) 1223 err = capture.Campaign(ctx) 1224 c.Assert(err, check.IsNil) 1225 1226 grpcPool := kv.NewGrpcPoolImpl(ctx, &security.Credential{}) 1227 defer grpcPool.Close() 1228 ctx1, cancel1 := context.WithCancel(ctx) 1229 owner, err := NewOwner(ctx1, nil, grpcPool, capture.session, 1230 cdcGCSafePointTTL4Test, time.Millisecond*200) 1231 c.Assert(err, check.IsNil) 1232 1233 // check campaign key deleted can be detected 1234 var wg sync.WaitGroup 1235 wg.Add(1) 1236 go func() { 1237 defer wg.Done() 1238 err := owner.watchCampaignKey(ctx1) 1239 c.Assert(cerror.ErrOwnerCampaignKeyDeleted.Equal(err), check.IsTrue) 1240 cancel1() 1241 }() 1242 // ensure the watch loop has started 1243 time.Sleep(time.Millisecond * 100) 1244 etcdCli := owner.etcdClient.Client.Unwrap() 1245 key := fmt.Sprintf("%s/%x", kv.CaptureOwnerKey, owner.session.Lease()) 1246 _, err = etcdCli.Delete(ctx, key) 1247 c.Assert(err, check.IsNil) 1248 wg.Wait() 1249 1250 // check key is deleted before watch loop starts 1251 ctx1, cancel1 = context.WithCancel(ctx) 1252 err = owner.watchCampaignKey(ctx1) 1253 c.Assert(cerror.ErrOwnerCampaignKeyDeleted.Equal(err), check.IsTrue) 1254 1255 // check the watch routine can be canceled 1256 err = capture.Campaign(ctx) 1257 c.Assert(err, check.IsNil) 1258 wg.Add(1) 1259 go func() { 1260 defer wg.Done() 1261 err := owner.watchCampaignKey(ctx1) 1262 c.Assert(err, check.IsNil) 1263 }() 1264 // ensure the watch loop has started 1265 time.Sleep(time.Millisecond * 100) 1266 cancel1() 1267 wg.Wait() 1268 1269 err = capture.etcdClient.Close() 1270 c.Assert(err, check.IsNil) 1271 } 1272 1273 func (s *ownerSuite) TestCleanUpStaleTasks(c *check.C) { 1274 defer testleak.AfterTest(c)() 1275 defer s.TearDownTest(c) 1276 ctx, cancel := context.WithCancel(context.Background()) 1277 defer cancel() 1278 addr := "127.0.0.1:12034" 1279 ctx = util.PutCaptureAddrInCtx(ctx, addr) 1280 capture, err := NewCapture(ctx, []string{s.clientURL.String()}, nil, nil) 1281 c.Assert(err, check.IsNil) 1282 err = s.client.PutCaptureInfo(ctx, capture.info, capture.session.Lease()) 1283 c.Assert(err, check.IsNil) 1284 1285 changefeed := "changefeed-name" 1286 invalidCapture := uuid.New().String() 1287 for _, captureID := range []string{capture.info.ID, invalidCapture} { 1288 taskStatus := &model.TaskStatus{} 1289 if captureID == invalidCapture { 1290 taskStatus.Tables = map[model.TableID]*model.TableReplicaInfo{ 1291 51: {StartTs: 110}, 1292 } 1293 } 1294 err = s.client.PutTaskStatus(ctx, changefeed, captureID, taskStatus) 1295 c.Assert(err, check.IsNil) 1296 _, err = s.client.PutTaskPositionOnChange(ctx, changefeed, captureID, &model.TaskPosition{CheckPointTs: 100, ResolvedTs: 120}) 1297 c.Assert(err, check.IsNil) 1298 err = s.client.PutTaskWorkload(ctx, changefeed, captureID, &model.TaskWorkload{}) 1299 c.Assert(err, check.IsNil) 1300 } 1301 err = s.client.SaveChangeFeedInfo(ctx, &model.ChangeFeedInfo{}, changefeed) 1302 c.Assert(err, check.IsNil) 1303 1304 _, captureList, err := s.client.GetCaptures(ctx) 1305 c.Assert(err, check.IsNil) 1306 captures := make(map[model.CaptureID]*model.CaptureInfo) 1307 for _, c := range captureList { 1308 captures[c.ID] = c 1309 } 1310 grpcPool := kv.NewGrpcPoolImpl(ctx, &security.Credential{}) 1311 defer grpcPool.Close() 1312 owner, err := NewOwner(ctx, nil, grpcPool, capture.session, 1313 cdcGCSafePointTTL4Test, time.Millisecond*200) 1314 c.Assert(err, check.IsNil) 1315 // It is better to update changefeed information by `loadChangeFeeds`, however 1316 // `loadChangeFeeds` is too overweight, just mock enough information here. 1317 owner.changeFeeds = map[model.ChangeFeedID]*changeFeed{ 1318 changefeed: { 1319 id: changefeed, 1320 orphanTables: make(map[model.TableID]model.Ts), 1321 status: &model.ChangeFeedStatus{ 1322 CheckpointTs: 100, 1323 }, 1324 }, 1325 } 1326 1327 // capture information is not built, owner.run does nothing 1328 err = owner.run(ctx) 1329 c.Assert(err, check.IsNil) 1330 statuses, err := s.client.GetAllTaskStatus(ctx, changefeed) 1331 c.Assert(err, check.IsNil) 1332 // stale tasks are not cleaned up, since `cleanUpStaleTasks` does not run 1333 c.Assert(len(statuses), check.Equals, 2) 1334 c.Assert(len(owner.captures), check.Equals, 0) 1335 1336 err = owner.rebuildCaptureEvents(ctx, captures) 1337 c.Assert(err, check.IsNil) 1338 c.Assert(len(owner.captures), check.Equals, 1) 1339 c.Assert(owner.captures, check.HasKey, capture.info.ID) 1340 c.Assert(owner.changeFeeds[changefeed].orphanTables, check.DeepEquals, map[model.TableID]model.Ts{51: 110}) 1341 c.Assert(atomic.LoadInt32(&owner.captureLoaded), check.Equals, int32(1)) 1342 // check stale tasks are cleaned up 1343 statuses, err = s.client.GetAllTaskStatus(ctx, changefeed) 1344 c.Assert(err, check.IsNil) 1345 c.Assert(len(statuses), check.Equals, 1) 1346 c.Assert(statuses, check.HasKey, capture.info.ID) 1347 positions, err := s.client.GetAllTaskPositions(ctx, changefeed) 1348 c.Assert(err, check.IsNil) 1349 c.Assert(len(positions), check.Equals, 1) 1350 c.Assert(positions, check.HasKey, capture.info.ID) 1351 workloads, err := s.client.GetAllTaskWorkloads(ctx, changefeed) 1352 c.Assert(err, check.IsNil) 1353 c.Assert(len(workloads), check.Equals, 1) 1354 c.Assert(workloads, check.HasKey, capture.info.ID) 1355 1356 err = capture.etcdClient.Close() 1357 c.Assert(err, check.IsNil) 1358 } 1359 1360 func (s *ownerSuite) TestWatchFeedChange(c *check.C) { 1361 defer testleak.AfterTest(c)() 1362 defer s.TearDownTest(c) 1363 1364 ctx, cancel := context.WithCancel(context.Background()) 1365 defer cancel() 1366 addr := "127.0.0.1:12034" 1367 ctx = util.PutCaptureAddrInCtx(ctx, addr) 1368 capture, err := NewCapture(ctx, []string{s.clientURL.String()}, nil, nil) 1369 c.Assert(err, check.IsNil) 1370 grpcPool := kv.NewGrpcPoolImpl(ctx, &security.Credential{}) 1371 defer grpcPool.Close() 1372 owner, err := NewOwner(ctx, nil, grpcPool, capture.session, 1373 cdcGCSafePointTTL4Test, time.Millisecond*200) 1374 c.Assert(err, check.IsNil) 1375 1376 var ( 1377 wg sync.WaitGroup 1378 updateCount = 0 1379 recvChangeCount = 0 1380 ) 1381 ctx1, cancel1 := context.WithCancel(ctx) 1382 wg.Add(1) 1383 go func() { 1384 defer wg.Done() 1385 changefeedID := "test-changefeed" 1386 pos := &model.TaskPosition{CheckPointTs: 100, ResolvedTs: 102} 1387 for { 1388 select { 1389 case <-ctx1.Done(): 1390 return 1391 default: 1392 } 1393 pos.ResolvedTs++ 1394 pos.CheckPointTs++ 1395 updated, err := capture.etcdClient.PutTaskPositionOnChange(ctx1, changefeedID, capture.info.ID, pos) 1396 if errors.Cause(err) == context.Canceled { 1397 return 1398 } 1399 c.Assert(err, check.IsNil) 1400 c.Assert(updated, check.IsTrue) 1401 updateCount++ 1402 // sleep to avoid other goroutine starvation 1403 time.Sleep(time.Millisecond) 1404 } 1405 }() 1406 1407 feedChangeReceiver, err := owner.feedChangeNotifier.NewReceiver(ownerRunInterval) 1408 c.Assert(err, check.IsNil) 1409 defer feedChangeReceiver.Stop() 1410 owner.watchFeedChange(ctx) 1411 wg.Add(1) 1412 go func() { 1413 defer func() { 1414 // there could be one message remaining in notification receiver, try to consume it 1415 select { 1416 case <-feedChangeReceiver.C: 1417 default: 1418 } 1419 wg.Done() 1420 }() 1421 for { 1422 select { 1423 case <-ctx1.Done(): 1424 return 1425 case <-feedChangeReceiver.C: 1426 recvChangeCount++ 1427 // sleep to simulate some owner work 1428 time.Sleep(time.Millisecond * 50) 1429 } 1430 } 1431 }() 1432 1433 time.Sleep(time.Second * 2) 1434 // use cancel1 to avoid cancel the watchFeedChange 1435 cancel1() 1436 wg.Wait() 1437 c.Assert(recvChangeCount, check.Greater, 0) 1438 c.Assert(recvChangeCount, check.Less, updateCount) 1439 select { 1440 case <-feedChangeReceiver.C: 1441 c.Error("should not receive message from feed change chan any more") 1442 default: 1443 } 1444 1445 err = capture.etcdClient.Close() 1446 if err != nil { 1447 c.Assert(errors.Cause(err), check.Equals, context.Canceled) 1448 } 1449 } 1450 1451 func (s *ownerSuite) TestWriteDebugInfo(c *check.C) { 1452 defer testleak.AfterTest(c)() 1453 defer s.TearDownTest(c) 1454 owner := &Owner{ 1455 changeFeeds: map[model.ChangeFeedID]*changeFeed{ 1456 "test": { 1457 id: "test", 1458 info: &model.ChangeFeedInfo{ 1459 SinkURI: "blackhole://", 1460 Config: config.GetDefaultReplicaConfig(), 1461 }, 1462 status: &model.ChangeFeedStatus{ 1463 ResolvedTs: 120, 1464 CheckpointTs: 100, 1465 }, 1466 }, 1467 }, 1468 stoppedFeeds: map[model.ChangeFeedID]*model.ChangeFeedStatus{ 1469 "test-2": { 1470 ResolvedTs: 120, 1471 CheckpointTs: 100, 1472 }, 1473 }, 1474 captures: map[model.CaptureID]*model.CaptureInfo{ 1475 "capture-1": { 1476 ID: "capture-1", 1477 AdvertiseAddr: "127.0.0.1:8301", 1478 }, 1479 }, 1480 } 1481 var buf bytes.Buffer 1482 owner.writeDebugInfo(&buf) 1483 c.Assert(buf.String(), check.Matches, `[\s\S]*active changefeeds[\s\S]*stopped changefeeds[\s\S]*captures[\s\S]*`) 1484 }