github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/cdc/capture_test.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package cdc
    15  
    16  import (
    17  	"context"
    18  	"net/url"
    19  	"sync"
    20  	"time"
    21  
    22  	"github.com/pingcap/check"
    23  	"github.com/pingcap/errors"
    24  	"github.com/pingcap/failpoint"
    25  	"github.com/pingcap/ticdc/cdc/kv"
    26  	"github.com/pingcap/ticdc/cdc/model"
    27  	"github.com/pingcap/ticdc/pkg/config"
    28  	cerror "github.com/pingcap/ticdc/pkg/errors"
    29  	"github.com/pingcap/ticdc/pkg/etcd"
    30  	"github.com/pingcap/ticdc/pkg/util"
    31  	"github.com/pingcap/ticdc/pkg/util/testleak"
    32  	pd "github.com/tikv/pd/client"
    33  	"go.etcd.io/etcd/clientv3"
    34  	"go.etcd.io/etcd/clientv3/concurrency"
    35  	"go.etcd.io/etcd/embed"
    36  	"golang.org/x/sync/errgroup"
    37  )
    38  
    39  type captureSuite struct {
    40  	e         *embed.Etcd
    41  	clientURL *url.URL
    42  	client    kv.CDCEtcdClient
    43  	ctx       context.Context
    44  	cancel    context.CancelFunc
    45  	errg      *errgroup.Group
    46  }
    47  
    48  var _ = check.Suite(&captureSuite{})
    49  
    50  func (s *captureSuite) SetUpTest(c *check.C) {
    51  	dir := c.MkDir()
    52  	var err error
    53  	s.clientURL, s.e, err = etcd.SetupEmbedEtcd(dir)
    54  	c.Assert(err, check.IsNil)
    55  	client, err := clientv3.New(clientv3.Config{
    56  		Endpoints:   []string{s.clientURL.String()},
    57  		DialTimeout: 3 * time.Second,
    58  	})
    59  	c.Assert(err, check.IsNil)
    60  	s.client = kv.NewCDCEtcdClient(context.Background(), client)
    61  	s.ctx, s.cancel = context.WithCancel(context.Background())
    62  	s.errg = util.HandleErrWithErrGroup(s.ctx, s.e.Err(), func(e error) { c.Log(e) })
    63  }
    64  
    65  func (s *captureSuite) TearDownTest(c *check.C) {
    66  	s.e.Close()
    67  	s.cancel()
    68  	err := s.errg.Wait()
    69  	if err != nil {
    70  		c.Errorf("Error group error: %s", err)
    71  	}
    72  	s.client.Close() //nolint:errcheck
    73  }
    74  
    75  func (s *captureSuite) TestCaptureSuicide(c *check.C) {
    76  	defer testleak.AfterTest(c)()
    77  	defer s.TearDownTest(c)
    78  
    79  	ctx, cancel := context.WithCancel(context.Background())
    80  	defer cancel()
    81  	capture, err := NewCapture(ctx, []string{s.clientURL.String()}, nil, nil)
    82  	c.Assert(err, check.IsNil)
    83  
    84  	var wg sync.WaitGroup
    85  	wg.Add(1)
    86  	go func() {
    87  		defer wg.Done()
    88  		err := capture.Run(ctx)
    89  		c.Assert(cerror.ErrCaptureSuicide.Equal(err), check.IsTrue)
    90  	}()
    91  	// ttl is 5s, wait 1s to ensure `capture.Run` starts
    92  	time.Sleep(time.Second)
    93  	_, err = s.client.Client.Revoke(ctx, capture.session.Lease())
    94  	c.Assert(err, check.IsNil)
    95  	wg.Wait()
    96  
    97  	err = capture.etcdClient.Close()
    98  	if err != nil {
    99  		c.Assert(errors.Cause(err), check.Equals, context.Canceled)
   100  	}
   101  }
   102  
   103  func (s *captureSuite) TestCaptureSessionDoneDuringHandleTask(c *check.C) {
   104  	defer testleak.AfterTest(c)()
   105  	defer s.TearDownTest(c)
   106  	if config.NewReplicaImpl {
   107  		c.Skip("this case is designed for old processor")
   108  	}
   109  
   110  	ctx, cancel := context.WithCancel(context.Background())
   111  	defer cancel()
   112  	capture, err := NewCapture(ctx, []string{s.clientURL.String()}, nil, nil)
   113  	c.Assert(err, check.IsNil)
   114  
   115  	runProcessorCount := 0
   116  	err = failpoint.Enable("github.com/pingcap/ticdc/cdc/captureHandleTaskDelay", "sleep(500)")
   117  	c.Assert(err, check.IsNil)
   118  	defer func() {
   119  		_ = failpoint.Disable("github.com/pingcap/ticdc/cdc/captureHandleTaskDelay")
   120  	}()
   121  	runProcessorBackup := runProcessorImpl
   122  	runProcessorImpl = func(
   123  		ctx context.Context, _ pd.Client, grpcPool kv.GrpcPool,
   124  		session *concurrency.Session, info model.ChangeFeedInfo, changefeedID string,
   125  		captureInfo model.CaptureInfo, checkpointTs uint64, flushCheckpointInterval time.Duration,
   126  	) (*oldProcessor, error) {
   127  		runProcessorCount++
   128  		etcdCli := kv.NewCDCEtcdClient(ctx, session.Client())
   129  		_, _, err := etcdCli.GetTaskStatus(ctx, changefeedID, captureInfo.ID)
   130  		return nil, err
   131  	}
   132  	defer func() {
   133  		runProcessorImpl = runProcessorBackup
   134  	}()
   135  
   136  	// The test simulates the following procedure
   137  	// 1. owner: dispatches new task to a capture
   138  	// 2. capture: detects the task, and starts to handle task
   139  	// 3. capture: during the task handling, capture session is disconnected
   140  	// 4. owner: observes the capture session disconnected and cleanup the task status of this capture
   141  	// 5. capture: queries task status failed when handling task
   142  	// 6. capture: checks session ttl, finds session disconnected and returns ErrCaptureSuicide to restart itself
   143  	// the event sequence must be kept, especially for 2->3->4->5
   144  	var wg sync.WaitGroup
   145  	wg.Add(1)
   146  	go func() {
   147  		defer wg.Done()
   148  		err := capture.Run(ctx)
   149  		// check step-6
   150  		c.Assert(cerror.ErrCaptureSuicide.Equal(err), check.IsTrue)
   151  		// check step-5 runs
   152  		c.Assert(runProcessorCount, check.Equals, 1)
   153  	}()
   154  	changefeedID := "test-changefeed"
   155  	err = s.client.SaveChangeFeedInfo(ctx, &model.ChangeFeedInfo{Config: config.GetDefaultReplicaConfig()}, changefeedID)
   156  	c.Assert(err, check.IsNil)
   157  	// step-1
   158  	err = s.client.PutTaskStatus(ctx, changefeedID, capture.info.ID, &model.TaskStatus{})
   159  	c.Assert(err, check.IsNil)
   160  	// sleep 100ms to ensure step-2 happens, the failpoint injected delay will ensure step-4 is after step-3
   161  	time.Sleep(time.Millisecond * 100)
   162  
   163  	// step-3
   164  	_, err = s.client.Client.Revoke(ctx, capture.session.Lease())
   165  	c.Assert(err, check.IsNil)
   166  	err = s.client.DeleteTaskStatus(ctx, changefeedID, capture.info.ID)
   167  	c.Assert(err, check.IsNil)
   168  
   169  	wg.Wait()
   170  
   171  	err = capture.etcdClient.Close()
   172  	if err != nil {
   173  		c.Assert(errors.Cause(err), check.Equals, context.Canceled)
   174  	}
   175  }