github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/cdc/kv/etcd.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package kv
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"time"
    20  
    21  	"go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
    22  
    23  	"github.com/pingcap/errors"
    24  	"github.com/pingcap/log"
    25  	"github.com/pingcap/ticdc/cdc/model"
    26  	cerror "github.com/pingcap/ticdc/pkg/errors"
    27  	"github.com/pingcap/ticdc/pkg/etcd"
    28  	"github.com/pingcap/ticdc/pkg/retry"
    29  	"github.com/pingcap/ticdc/pkg/util"
    30  	"github.com/prometheus/client_golang/prometheus"
    31  	"go.etcd.io/etcd/clientv3"
    32  	"go.etcd.io/etcd/clientv3/concurrency"
    33  	"go.etcd.io/etcd/embed"
    34  	"go.etcd.io/etcd/mvcc/mvccpb"
    35  	"go.uber.org/zap"
    36  	"google.golang.org/grpc/codes"
    37  )
    38  
    39  const (
    40  	// EtcdKeyBase is the common prefix of the keys in CDC
    41  	EtcdKeyBase = "/tidb/cdc"
    42  	// CaptureOwnerKey is the capture owner path that is saved to etcd
    43  	CaptureOwnerKey = EtcdKeyBase + "/owner"
    44  	// CaptureInfoKeyPrefix is the capture info path that is saved to etcd
    45  	CaptureInfoKeyPrefix = EtcdKeyBase + "/capture"
    46  
    47  	// TaskKeyPrefix is the prefix of task keys
    48  	TaskKeyPrefix = EtcdKeyBase + "/task"
    49  
    50  	// TaskWorkloadKeyPrefix is the prefix of task workload keys
    51  	TaskWorkloadKeyPrefix = TaskKeyPrefix + "/workload"
    52  
    53  	// TaskStatusKeyPrefix is the prefix of task status keys
    54  	TaskStatusKeyPrefix = TaskKeyPrefix + "/status"
    55  
    56  	// TaskPositionKeyPrefix is the prefix of task position keys
    57  	TaskPositionKeyPrefix = TaskKeyPrefix + "/position"
    58  
    59  	// JobKeyPrefix is the prefix of job keys
    60  	JobKeyPrefix = EtcdKeyBase + "/job"
    61  )
    62  
    63  const (
    64  	putTaskStatusBackoffBaseDelayInMs = 100
    65  	putTaskStatusMaxTries             = 3
    66  )
    67  
    68  // GetEtcdKeyChangeFeedList returns the prefix key of all changefeed config
    69  func GetEtcdKeyChangeFeedList() string {
    70  	return fmt.Sprintf("%s/changefeed/info", EtcdKeyBase)
    71  }
    72  
    73  // GetEtcdKeyChangeFeedInfo returns the key of a changefeed config
    74  func GetEtcdKeyChangeFeedInfo(changefeedID string) string {
    75  	return fmt.Sprintf("%s/%s", GetEtcdKeyChangeFeedList(), changefeedID)
    76  }
    77  
    78  // GetEtcdKeyChangeFeedStatus returns the key of a changefeed status
    79  func GetEtcdKeyChangeFeedStatus(changefeedID string) string {
    80  	return GetEtcdKeyJob(changefeedID)
    81  }
    82  
    83  // GetEtcdKeyTaskStatusList returns the key of a task status without captureID part
    84  func GetEtcdKeyTaskStatusList(changefeedID string) string {
    85  	return fmt.Sprintf("%s/changefeed/task/status/%s", EtcdKeyBase, changefeedID)
    86  }
    87  
    88  // GetEtcdKeyTaskPositionList returns the key of a task position without captureID part
    89  func GetEtcdKeyTaskPositionList(changefeedID string) string {
    90  	return fmt.Sprintf("%s/changefeed/task/position/%s", EtcdKeyBase, changefeedID)
    91  }
    92  
    93  // GetEtcdKeyTaskPosition returns the key of a task position
    94  func GetEtcdKeyTaskPosition(changefeedID, captureID string) string {
    95  	return TaskPositionKeyPrefix + "/" + captureID + "/" + changefeedID
    96  }
    97  
    98  // GetEtcdKeyCaptureInfo returns the key of a capture info
    99  func GetEtcdKeyCaptureInfo(id string) string {
   100  	return CaptureInfoKeyPrefix + "/" + id
   101  }
   102  
   103  // GetEtcdKeyTaskStatus returns the key for the task status
   104  func GetEtcdKeyTaskStatus(changeFeedID, captureID string) string {
   105  	return TaskStatusKeyPrefix + "/" + captureID + "/" + changeFeedID
   106  }
   107  
   108  // GetEtcdKeyTaskWorkload returns the key for the task workload
   109  func GetEtcdKeyTaskWorkload(changeFeedID, captureID string) string {
   110  	return TaskWorkloadKeyPrefix + "/" + captureID + "/" + changeFeedID
   111  }
   112  
   113  // GetEtcdKeyJob returns the key for a job status
   114  func GetEtcdKeyJob(changeFeedID string) string {
   115  	return JobKeyPrefix + "/" + changeFeedID
   116  }
   117  
   118  // CDCEtcdClient is a wrap of etcd client
   119  type CDCEtcdClient struct {
   120  	Client *etcd.Client
   121  }
   122  
   123  // NewCDCEtcdClient returns a new CDCEtcdClient
   124  func NewCDCEtcdClient(ctx context.Context, cli *clientv3.Client) CDCEtcdClient {
   125  	captureAddr := util.CaptureAddrFromCtx(ctx)
   126  	metrics := map[string]prometheus.Counter{
   127  		etcd.EtcdPut:    etcdRequestCounter.WithLabelValues(etcd.EtcdPut, captureAddr),
   128  		etcd.EtcdGet:    etcdRequestCounter.WithLabelValues(etcd.EtcdGet, captureAddr),
   129  		etcd.EtcdDel:    etcdRequestCounter.WithLabelValues(etcd.EtcdDel, captureAddr),
   130  		etcd.EtcdTxn:    etcdRequestCounter.WithLabelValues(etcd.EtcdTxn, captureAddr),
   131  		etcd.EtcdGrant:  etcdRequestCounter.WithLabelValues(etcd.EtcdGrant, captureAddr),
   132  		etcd.EtcdRevoke: etcdRequestCounter.WithLabelValues(etcd.EtcdRevoke, captureAddr),
   133  	}
   134  	return CDCEtcdClient{Client: etcd.Wrap(cli, metrics)}
   135  }
   136  
   137  // Close releases resources in CDCEtcdClient
   138  func (c CDCEtcdClient) Close() error {
   139  	return c.Client.Unwrap().Close()
   140  }
   141  
   142  func (c CDCEtcdClient) contextWithSafeLease(ctx context.Context, leaseID clientv3.LeaseID) (context.Context, context.CancelFunc, error) {
   143  	lease, err := c.Client.TimeToLive(ctx, leaseID)
   144  	if err != nil {
   145  		return nil, nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   146  	}
   147  	if lease.TTL == int64(-1) {
   148  		return nil, nil, cerror.ErrLeaseTimeout.GenWithStackByArgs()
   149  	}
   150  	ctx, cancel := context.WithTimeout(ctx, time.Duration(lease.TTL)*time.Second)
   151  	return ctx, cancel, nil
   152  }
   153  
   154  // ClearAllCDCInfo delete all keys created by CDC
   155  func (c CDCEtcdClient) ClearAllCDCInfo(ctx context.Context) error {
   156  	_, err := c.Client.Delete(ctx, EtcdKeyBase, clientv3.WithPrefix())
   157  	return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   158  }
   159  
   160  // GetAllCDCInfo get all keys created by CDC
   161  func (c CDCEtcdClient) GetAllCDCInfo(ctx context.Context) ([]*mvccpb.KeyValue, error) {
   162  	resp, err := c.Client.Get(ctx, EtcdKeyBase, clientv3.WithPrefix())
   163  	if err != nil {
   164  		return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   165  	}
   166  	return resp.Kvs, nil
   167  }
   168  
   169  // RevokeAllLeases revokes all leases passed from parameter
   170  func (c CDCEtcdClient) RevokeAllLeases(ctx context.Context, leases map[string]int64) error {
   171  	for _, lease := range leases {
   172  		_, err := c.Client.Revoke(ctx, clientv3.LeaseID(lease))
   173  		if err == nil {
   174  			continue
   175  		} else if etcdErr := err.(rpctypes.EtcdError); etcdErr.Code() == codes.NotFound {
   176  			// it means the etcd lease is already expired or revoked
   177  			continue
   178  		}
   179  		return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   180  	}
   181  	return nil
   182  }
   183  
   184  // GetChangeFeeds returns kv revision and a map mapping from changefeedID to changefeed detail mvccpb.KeyValue
   185  func (c CDCEtcdClient) GetChangeFeeds(ctx context.Context) (int64, map[string]*mvccpb.KeyValue, error) {
   186  	key := GetEtcdKeyChangeFeedList()
   187  
   188  	resp, err := c.Client.Get(ctx, key, clientv3.WithPrefix())
   189  	if err != nil {
   190  		return 0, nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   191  	}
   192  	revision := resp.Header.Revision
   193  	details := make(map[string]*mvccpb.KeyValue, resp.Count)
   194  	for _, kv := range resp.Kvs {
   195  		id, err := model.ExtractKeySuffix(string(kv.Key))
   196  		if err != nil {
   197  			return 0, nil, err
   198  		}
   199  		details[id] = kv
   200  	}
   201  	return revision, details, nil
   202  }
   203  
   204  // GetChangeFeedInfo queries the config of a given changefeed
   205  func (c CDCEtcdClient) GetChangeFeedInfo(ctx context.Context, id string) (*model.ChangeFeedInfo, error) {
   206  	key := GetEtcdKeyChangeFeedInfo(id)
   207  	resp, err := c.Client.Get(ctx, key)
   208  	if err != nil {
   209  		return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   210  	}
   211  	if resp.Count == 0 {
   212  		return nil, cerror.ErrChangeFeedNotExists.GenWithStackByArgs(key)
   213  	}
   214  	detail := &model.ChangeFeedInfo{}
   215  	err = detail.Unmarshal(resp.Kvs[0].Value)
   216  	return detail, errors.Trace(err)
   217  }
   218  
   219  // DeleteChangeFeedInfo deletes a changefeed config from etcd
   220  func (c CDCEtcdClient) DeleteChangeFeedInfo(ctx context.Context, id string) error {
   221  	key := GetEtcdKeyChangeFeedInfo(id)
   222  	_, err := c.Client.Delete(ctx, key)
   223  	return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   224  }
   225  
   226  // GetAllChangeFeedStatus queries all changefeed job status
   227  func (c CDCEtcdClient) GetAllChangeFeedStatus(ctx context.Context) (map[string]*model.ChangeFeedStatus, error) {
   228  	key := JobKeyPrefix
   229  	resp, err := c.Client.Get(ctx, key, clientv3.WithPrefix())
   230  	if err != nil {
   231  		return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   232  	}
   233  	statuses := make(map[string]*model.ChangeFeedStatus, resp.Count)
   234  	for _, rawKv := range resp.Kvs {
   235  		changefeedID, err := model.ExtractKeySuffix(string(rawKv.Key))
   236  		if err != nil {
   237  			return nil, err
   238  		}
   239  		status := &model.ChangeFeedStatus{}
   240  		err = status.Unmarshal(rawKv.Value)
   241  		if err != nil {
   242  			return nil, errors.Trace(err)
   243  		}
   244  		statuses[changefeedID] = status
   245  	}
   246  	return statuses, nil
   247  }
   248  
   249  // GetChangeFeedStatus queries the checkpointTs and resovledTs of a given changefeed
   250  func (c CDCEtcdClient) GetChangeFeedStatus(ctx context.Context, id string) (*model.ChangeFeedStatus, int64, error) {
   251  	key := GetEtcdKeyJob(id)
   252  	resp, err := c.Client.Get(ctx, key)
   253  	if err != nil {
   254  		return nil, 0, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   255  	}
   256  	if resp.Count == 0 {
   257  		return nil, 0, cerror.ErrChangeFeedNotExists.GenWithStackByArgs(key)
   258  	}
   259  	info := &model.ChangeFeedStatus{}
   260  	err = info.Unmarshal(resp.Kvs[0].Value)
   261  	return info, resp.Kvs[0].ModRevision, errors.Trace(err)
   262  }
   263  
   264  // GetCaptures returns kv revision and CaptureInfo list
   265  func (c CDCEtcdClient) GetCaptures(ctx context.Context) (int64, []*model.CaptureInfo, error) {
   266  	key := CaptureInfoKeyPrefix
   267  
   268  	resp, err := c.Client.Get(ctx, key, clientv3.WithPrefix())
   269  	if err != nil {
   270  		return 0, nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   271  	}
   272  	revision := resp.Header.Revision
   273  	infos := make([]*model.CaptureInfo, 0, resp.Count)
   274  	for _, kv := range resp.Kvs {
   275  		info := &model.CaptureInfo{}
   276  		err := info.Unmarshal(kv.Value)
   277  		if err != nil {
   278  			return 0, nil, errors.Trace(err)
   279  		}
   280  		infos = append(infos, info)
   281  	}
   282  	return revision, infos, nil
   283  }
   284  
   285  // GetCaptureLeases returns a map mapping from capture ID to its lease
   286  func (c CDCEtcdClient) GetCaptureLeases(ctx context.Context) (map[string]int64, error) {
   287  	key := CaptureInfoKeyPrefix
   288  
   289  	resp, err := c.Client.Get(ctx, key, clientv3.WithPrefix())
   290  	if err != nil {
   291  		return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   292  	}
   293  	leases := make(map[string]int64, resp.Count)
   294  	for _, kv := range resp.Kvs {
   295  		captureID, err := model.ExtractKeySuffix(string(kv.Key))
   296  		if err != nil {
   297  			return nil, err
   298  		}
   299  		leases[captureID] = kv.Lease
   300  	}
   301  	return leases, nil
   302  }
   303  
   304  // CreateChangefeedInfo creates a change feed info into etcd and fails if it is already exists.
   305  func (c CDCEtcdClient) CreateChangefeedInfo(ctx context.Context, info *model.ChangeFeedInfo, changeFeedID string) error {
   306  	infoKey := GetEtcdKeyChangeFeedInfo(changeFeedID)
   307  	jobKey := GetEtcdKeyJob(changeFeedID)
   308  	value, err := info.Marshal()
   309  	if err != nil {
   310  		return errors.Trace(err)
   311  	}
   312  	resp, err := c.Client.Txn(ctx).If(
   313  		clientv3.Compare(clientv3.ModRevision(infoKey), "=", 0),
   314  		clientv3.Compare(clientv3.ModRevision(jobKey), "=", 0),
   315  	).Then(
   316  		clientv3.OpPut(infoKey, value),
   317  	).Commit()
   318  	if err != nil {
   319  		return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   320  	}
   321  	if !resp.Succeeded {
   322  		log.Warn("changefeed already exists, ignore create changefeed",
   323  			zap.String("changefeed", changeFeedID))
   324  		return cerror.ErrChangeFeedAlreadyExists.GenWithStackByArgs(changeFeedID)
   325  	}
   326  	return errors.Trace(err)
   327  }
   328  
   329  // SaveChangeFeedInfo stores change feed info into etcd
   330  // TODO: this should be called from outer system, such as from a TiDB client
   331  func (c CDCEtcdClient) SaveChangeFeedInfo(ctx context.Context, info *model.ChangeFeedInfo, changeFeedID string) error {
   332  	key := GetEtcdKeyChangeFeedInfo(changeFeedID)
   333  	value, err := info.Marshal()
   334  	if err != nil {
   335  		return errors.Trace(err)
   336  	}
   337  	_, err = c.Client.Put(ctx, key, value)
   338  	return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   339  }
   340  
   341  // GetAllTaskPositions queries all task positions of a changefeed, and returns a map
   342  // mapping from captureID to TaskPositions
   343  func (c CDCEtcdClient) GetAllTaskPositions(ctx context.Context, changefeedID string) (map[string]*model.TaskPosition, error) {
   344  	resp, err := c.Client.Get(ctx, TaskPositionKeyPrefix, clientv3.WithPrefix())
   345  	if err != nil {
   346  		return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   347  	}
   348  	positions := make(map[string]*model.TaskPosition, resp.Count)
   349  	for _, rawKv := range resp.Kvs {
   350  		changeFeed, err := model.ExtractKeySuffix(string(rawKv.Key))
   351  		if err != nil {
   352  			return nil, err
   353  		}
   354  		endIndex := len(rawKv.Key) - len(changeFeed) - 1
   355  		captureID, err := model.ExtractKeySuffix(string(rawKv.Key[0:endIndex]))
   356  		if err != nil {
   357  			return nil, err
   358  		}
   359  		if changeFeed != changefeedID {
   360  			continue
   361  		}
   362  		info := &model.TaskPosition{}
   363  		err = info.Unmarshal(rawKv.Value)
   364  		if err != nil {
   365  			return nil, cerror.ErrDecodeFailed.GenWithStackByArgs("failed to unmarshal task position: %s", err)
   366  		}
   367  		positions[captureID] = info
   368  	}
   369  	return positions, nil
   370  }
   371  
   372  // RemoveAllTaskPositions removes all task positions of a changefeed
   373  func (c CDCEtcdClient) RemoveAllTaskPositions(ctx context.Context, changefeedID string) error {
   374  	resp, err := c.Client.Get(ctx, TaskPositionKeyPrefix, clientv3.WithPrefix())
   375  	if err != nil {
   376  		return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   377  	}
   378  	for _, rawKv := range resp.Kvs {
   379  		changeFeed, err := model.ExtractKeySuffix(string(rawKv.Key))
   380  		if err != nil {
   381  			return err
   382  		}
   383  		endIndex := len(rawKv.Key) - len(changeFeed) - 1
   384  		captureID, err := model.ExtractKeySuffix(string(rawKv.Key[0:endIndex]))
   385  		if err != nil {
   386  			return err
   387  		}
   388  		if changeFeed != changefeedID {
   389  			continue
   390  		}
   391  		key := GetEtcdKeyTaskPosition(changefeedID, captureID)
   392  		_, err = c.Client.Delete(ctx, key)
   393  		if err != nil {
   394  			return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   395  		}
   396  	}
   397  	return nil
   398  }
   399  
   400  // GetProcessors queries all processors of the cdc cluster,
   401  // and returns a slice of ProcInfoSnap(without table info)
   402  func (c CDCEtcdClient) GetProcessors(ctx context.Context) ([]*model.ProcInfoSnap, error) {
   403  	resp, err := c.Client.Get(ctx, TaskStatusKeyPrefix, clientv3.WithPrefix())
   404  	if err != nil {
   405  		return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   406  	}
   407  	infos := make([]*model.ProcInfoSnap, 0, resp.Count)
   408  	for _, rawKv := range resp.Kvs {
   409  		changefeedID, err := model.ExtractKeySuffix(string(rawKv.Key))
   410  		if err != nil {
   411  			return nil, err
   412  		}
   413  		endIndex := len(rawKv.Key) - len(changefeedID) - 1
   414  		captureID, err := model.ExtractKeySuffix(string(rawKv.Key[0:endIndex]))
   415  		if err != nil {
   416  			return nil, err
   417  		}
   418  		info := &model.ProcInfoSnap{
   419  			CfID:      changefeedID,
   420  			CaptureID: captureID,
   421  		}
   422  		infos = append(infos, info)
   423  	}
   424  	return infos, nil
   425  }
   426  
   427  // GetAllTaskStatus queries all task status of a changefeed, and returns a map
   428  // mapping from captureID to TaskStatus
   429  func (c CDCEtcdClient) GetAllTaskStatus(ctx context.Context, changefeedID string) (model.ProcessorsInfos, error) {
   430  	resp, err := c.Client.Get(ctx, TaskStatusKeyPrefix, clientv3.WithPrefix())
   431  	if err != nil {
   432  		return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   433  	}
   434  	pinfo := make(map[string]*model.TaskStatus, resp.Count)
   435  	for _, rawKv := range resp.Kvs {
   436  		changeFeed, err := model.ExtractKeySuffix(string(rawKv.Key))
   437  		if err != nil {
   438  			return nil, err
   439  		}
   440  		endIndex := len(rawKv.Key) - len(changeFeed) - 1
   441  		captureID, err := model.ExtractKeySuffix(string(rawKv.Key[0:endIndex]))
   442  		if err != nil {
   443  			return nil, err
   444  		}
   445  		if changeFeed != changefeedID {
   446  			continue
   447  		}
   448  		info := &model.TaskStatus{}
   449  		err = info.Unmarshal(rawKv.Value)
   450  		if err != nil {
   451  			return nil, cerror.ErrDecodeFailed.GenWithStackByArgs("failed to unmarshal task status: %s", err)
   452  		}
   453  		info.ModRevision = rawKv.ModRevision
   454  		pinfo[captureID] = info
   455  	}
   456  	return pinfo, nil
   457  }
   458  
   459  // RemoveAllTaskStatus removes all task status of a changefeed
   460  func (c CDCEtcdClient) RemoveAllTaskStatus(ctx context.Context, changefeedID string) error {
   461  	resp, err := c.Client.Get(ctx, TaskStatusKeyPrefix, clientv3.WithPrefix())
   462  	if err != nil {
   463  		return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   464  	}
   465  	for _, rawKv := range resp.Kvs {
   466  		changeFeed, err := model.ExtractKeySuffix(string(rawKv.Key))
   467  		if err != nil {
   468  			return err
   469  		}
   470  		endIndex := len(rawKv.Key) - len(changeFeed) - 1
   471  		captureID, err := model.ExtractKeySuffix(string(rawKv.Key[0:endIndex]))
   472  		if err != nil {
   473  			return err
   474  		}
   475  		if changeFeed != changefeedID {
   476  			continue
   477  		}
   478  		key := GetEtcdKeyTaskStatus(changefeedID, captureID)
   479  		_, err = c.Client.Delete(ctx, key)
   480  		if err != nil {
   481  			return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   482  		}
   483  	}
   484  	return nil
   485  }
   486  
   487  // GetTaskStatus queries task status from etcd, returns
   488  //  - ModRevision of the given key
   489  //  - *model.TaskStatus unmarshaled from the value
   490  //  - error if error happens
   491  func (c CDCEtcdClient) GetTaskStatus(
   492  	ctx context.Context,
   493  	changefeedID string,
   494  	captureID string,
   495  ) (int64, *model.TaskStatus, error) {
   496  	key := GetEtcdKeyTaskStatus(changefeedID, captureID)
   497  	resp, err := c.Client.Get(ctx, key)
   498  	if err != nil {
   499  		return 0, nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   500  	}
   501  	if resp.Count == 0 {
   502  		return 0, nil, cerror.ErrTaskStatusNotExists.GenWithStackByArgs(key)
   503  	}
   504  	info := &model.TaskStatus{}
   505  	err = info.Unmarshal(resp.Kvs[0].Value)
   506  	return resp.Kvs[0].ModRevision, info, errors.Trace(err)
   507  }
   508  
   509  // PutTaskStatus puts task status into etcd.
   510  func (c CDCEtcdClient) PutTaskStatus(
   511  	ctx context.Context,
   512  	changefeedID string,
   513  	captureID string,
   514  	info *model.TaskStatus,
   515  ) error {
   516  	data, err := info.Marshal()
   517  	if err != nil {
   518  		return errors.Trace(err)
   519  	}
   520  
   521  	key := GetEtcdKeyTaskStatus(changefeedID, captureID)
   522  
   523  	_, err = c.Client.Put(ctx, key, data)
   524  	if err != nil {
   525  		return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   526  	}
   527  
   528  	return nil
   529  }
   530  
   531  // GetTaskWorkload queries task workload from etcd, returns
   532  //  - model.TaskWorkload unmarshaled from the value
   533  //  - error if error happens
   534  func (c CDCEtcdClient) GetTaskWorkload(
   535  	ctx context.Context,
   536  	changefeedID string,
   537  	captureID string,
   538  ) (model.TaskWorkload, error) {
   539  	key := GetEtcdKeyTaskWorkload(changefeedID, captureID)
   540  	resp, err := c.Client.Get(ctx, key)
   541  	if err != nil {
   542  		return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   543  	}
   544  	if resp.Count == 0 {
   545  		return make(model.TaskWorkload), nil
   546  	}
   547  	workload := make(model.TaskWorkload)
   548  	err = workload.Unmarshal(resp.Kvs[0].Value)
   549  	return workload, errors.Trace(err)
   550  }
   551  
   552  // PutTaskWorkload puts task workload into etcd.
   553  func (c CDCEtcdClient) PutTaskWorkload(
   554  	ctx context.Context,
   555  	changefeedID string,
   556  	captureID model.CaptureID,
   557  	info *model.TaskWorkload,
   558  ) error {
   559  	data, err := info.Marshal()
   560  	if err != nil {
   561  		return errors.Trace(err)
   562  	}
   563  
   564  	key := GetEtcdKeyTaskWorkload(changefeedID, captureID)
   565  
   566  	_, err = c.Client.Put(ctx, key, data)
   567  	if err != nil {
   568  		return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   569  	}
   570  
   571  	return nil
   572  }
   573  
   574  // DeleteTaskWorkload deletes task workload from etcd
   575  func (c CDCEtcdClient) DeleteTaskWorkload(
   576  	ctx context.Context,
   577  	changefeedID string,
   578  	captureID string,
   579  ) error {
   580  	key := GetEtcdKeyTaskWorkload(changefeedID, captureID)
   581  	_, err := c.Client.Delete(ctx, key)
   582  	return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   583  }
   584  
   585  // GetAllTaskWorkloads queries all task workloads of a changefeed, and returns a map
   586  // mapping from captureID to TaskWorkloads
   587  func (c CDCEtcdClient) GetAllTaskWorkloads(ctx context.Context, changefeedID string) (map[string]*model.TaskWorkload, error) {
   588  	resp, err := c.Client.Get(ctx, TaskWorkloadKeyPrefix, clientv3.WithPrefix())
   589  	if err != nil {
   590  		return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   591  	}
   592  	workloads := make(map[string]*model.TaskWorkload, resp.Count)
   593  	for _, rawKv := range resp.Kvs {
   594  		changeFeed, err := model.ExtractKeySuffix(string(rawKv.Key))
   595  		if err != nil {
   596  			return nil, err
   597  		}
   598  		endIndex := len(rawKv.Key) - len(changeFeed) - 1
   599  		captureID, err := model.ExtractKeySuffix(string(rawKv.Key[0:endIndex]))
   600  		if err != nil {
   601  			return nil, err
   602  		}
   603  		if changeFeed != changefeedID {
   604  			continue
   605  		}
   606  		info := &model.TaskWorkload{}
   607  		err = info.Unmarshal(rawKv.Value)
   608  		if err != nil {
   609  			return nil, cerror.ErrDecodeFailed.GenWithStackByArgs("failed to unmarshal task workload: %s", err)
   610  		}
   611  		workloads[captureID] = info
   612  	}
   613  	return workloads, nil
   614  }
   615  
   616  // UpdateTaskStatusFunc is a function that updates the task status
   617  type UpdateTaskStatusFunc func(int64, *model.TaskStatus) (updated bool, err error)
   618  
   619  // AtomicPutTaskStatus puts task status into etcd atomically.
   620  func (c CDCEtcdClient) AtomicPutTaskStatus(
   621  	ctx context.Context,
   622  	changefeedID string,
   623  	captureID string,
   624  	updateFuncs ...UpdateTaskStatusFunc,
   625  ) (*model.TaskStatus, int64, error) {
   626  	var status *model.TaskStatus
   627  	var newModRevision int64
   628  	err := retry.Do(ctx, func() error {
   629  		var modRevision int64
   630  		var err error
   631  		modRevision, status, err = c.GetTaskStatus(ctx, changefeedID, captureID)
   632  		key := GetEtcdKeyTaskStatus(changefeedID, captureID)
   633  		var writeCmp clientv3.Cmp
   634  		if err != nil {
   635  			if cerror.ErrTaskStatusNotExists.NotEqual(err) {
   636  				return errors.Trace(err)
   637  			}
   638  			status = new(model.TaskStatus)
   639  			writeCmp = clientv3.Compare(clientv3.ModRevision(key), "=", 0)
   640  		} else {
   641  			writeCmp = clientv3.Compare(clientv3.ModRevision(key), "=", modRevision)
   642  		}
   643  		updated := false
   644  		for _, updateFunc := range updateFuncs {
   645  			u, err := updateFunc(modRevision, status)
   646  			if err != nil {
   647  				return err
   648  			}
   649  			updated = updated || u
   650  		}
   651  		if !updated {
   652  			return nil
   653  		}
   654  		value, err := status.Marshal()
   655  		if err != nil {
   656  			return errors.Trace(err)
   657  		}
   658  
   659  		resp, err := c.Client.Txn(ctx).If(writeCmp).Then(
   660  			clientv3.OpPut(key, value),
   661  		).Commit()
   662  		if err != nil {
   663  			return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   664  		}
   665  
   666  		if !resp.Succeeded {
   667  			log.Info("outdated table infos, ignore update taskStatus")
   668  			return cerror.ErrWriteTsConflict.GenWithStackByArgs(key)
   669  		}
   670  		newModRevision = resp.Header.GetRevision()
   671  		return nil
   672  	}, retry.WithBackoffBaseDelay(putTaskStatusBackoffBaseDelayInMs), retry.WithMaxTries(putTaskStatusMaxTries), retry.WithIsRetryableErr(cerror.IsRetryableError))
   673  	if err != nil {
   674  		return nil, newModRevision, errors.Trace(err)
   675  	}
   676  	return status, newModRevision, nil
   677  }
   678  
   679  // GetTaskPosition queries task process from etcd, returns
   680  //  - ModRevision of the given key
   681  //  - *model.TaskPosition unmarshaled from the value
   682  //  - error if error happens
   683  func (c CDCEtcdClient) GetTaskPosition(
   684  	ctx context.Context,
   685  	changefeedID string,
   686  	captureID string,
   687  ) (int64, *model.TaskPosition, error) {
   688  	key := GetEtcdKeyTaskPosition(changefeedID, captureID)
   689  	resp, err := c.Client.Get(ctx, key)
   690  	if err != nil {
   691  		return 0, nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   692  	}
   693  	if resp.Count == 0 {
   694  		return 0, nil, cerror.ErrTaskPositionNotExists.GenWithStackByArgs(key)
   695  	}
   696  	info := &model.TaskPosition{}
   697  	err = info.Unmarshal(resp.Kvs[0].Value)
   698  	return resp.Kvs[0].ModRevision, info, errors.Trace(err)
   699  }
   700  
   701  // PutTaskPositionOnChange puts task position information into etcd if the
   702  // task position value changes or the presvious value does not exist in etcd.
   703  // returns true if task position is written to etcd.
   704  func (c CDCEtcdClient) PutTaskPositionOnChange(
   705  	ctx context.Context,
   706  	changefeedID string,
   707  	captureID string,
   708  	info *model.TaskPosition,
   709  ) (bool, error) {
   710  	data, err := info.Marshal()
   711  	if err != nil {
   712  		return false, errors.Trace(err)
   713  	}
   714  
   715  	key := GetEtcdKeyTaskPosition(changefeedID, captureID)
   716  	resp, err := c.Client.Txn(ctx).If(
   717  		clientv3.Compare(clientv3.ModRevision(key), ">", 0),
   718  		clientv3.Compare(clientv3.Value(key), "=", data),
   719  	).Else(clientv3.OpPut(key, data)).Commit()
   720  	if err != nil {
   721  		return false, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   722  	}
   723  	return !resp.Succeeded, nil
   724  }
   725  
   726  // DeleteTaskPosition remove task position from etcd
   727  func (c CDCEtcdClient) DeleteTaskPosition(ctx context.Context, changefeedID string, captureID string) error {
   728  	key := GetEtcdKeyTaskPosition(changefeedID, captureID)
   729  	_, err := c.Client.Delete(ctx, key)
   730  	return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   731  }
   732  
   733  // RemoveChangeFeedStatus removes changefeed job status from etcd
   734  func (c CDCEtcdClient) RemoveChangeFeedStatus(
   735  	ctx context.Context,
   736  	changefeedID string,
   737  ) error {
   738  	key := GetEtcdKeyJob(changefeedID)
   739  	_, err := c.Client.Delete(ctx, key)
   740  	return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   741  }
   742  
   743  // PutChangeFeedStatus puts changefeed synchronization status into etcd
   744  func (c CDCEtcdClient) PutChangeFeedStatus(
   745  	ctx context.Context,
   746  	changefeedID string,
   747  	status *model.ChangeFeedStatus,
   748  ) error {
   749  	key := GetEtcdKeyJob(changefeedID)
   750  	value, err := status.Marshal()
   751  	if err != nil {
   752  		return errors.Trace(err)
   753  	}
   754  	_, err = c.Client.Put(ctx, key, value)
   755  	return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   756  }
   757  
   758  // SetChangeFeedStatusTTL sets the TTL of changefeed synchronization status
   759  func (c CDCEtcdClient) SetChangeFeedStatusTTL(
   760  	ctx context.Context,
   761  	changefeedID string,
   762  	ttl int64,
   763  ) error {
   764  	key := GetEtcdKeyJob(changefeedID)
   765  	leaseResp, err := c.Client.Grant(ctx, ttl)
   766  	if err != nil {
   767  		return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   768  	}
   769  	status, _, err := c.GetChangeFeedStatus(ctx, changefeedID)
   770  	if err != nil {
   771  		return errors.Trace(err)
   772  	}
   773  	statusStr, err := status.Marshal()
   774  	if err != nil {
   775  		return errors.Trace(err)
   776  	}
   777  	_, err = c.Client.Put(ctx, key, statusStr, clientv3.WithLease(leaseResp.ID))
   778  	return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   779  }
   780  
   781  // PutAllChangeFeedStatus puts ChangeFeedStatus of each changefeed into etcd
   782  func (c CDCEtcdClient) PutAllChangeFeedStatus(ctx context.Context, infos map[model.ChangeFeedID]*model.ChangeFeedStatus) error {
   783  	var (
   784  		txn = c.Client.Txn(ctx)
   785  		ops = make([]clientv3.Op, 0, embed.DefaultMaxTxnOps)
   786  	)
   787  	for changefeedID, info := range infos {
   788  		storeVal, err := info.Marshal()
   789  		if err != nil {
   790  			return errors.Trace(err)
   791  		}
   792  		key := GetEtcdKeyJob(changefeedID)
   793  		ops = append(ops, clientv3.OpPut(key, storeVal))
   794  		if uint(len(ops)) >= embed.DefaultMaxTxnOps {
   795  			_, err = txn.Then(ops...).Commit()
   796  			if err != nil {
   797  				return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   798  			}
   799  			txn = c.Client.Txn(ctx)
   800  			ops = ops[:0]
   801  		}
   802  	}
   803  	if len(ops) > 0 {
   804  		_, err := txn.Then(ops...).Commit()
   805  		if err != nil {
   806  			return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   807  		}
   808  	}
   809  	return nil
   810  }
   811  
   812  // DeleteTaskStatus deletes task status from etcd
   813  func (c CDCEtcdClient) DeleteTaskStatus(
   814  	ctx context.Context,
   815  	cfID string,
   816  	captureID string,
   817  ) error {
   818  	key := GetEtcdKeyTaskStatus(cfID, captureID)
   819  	_, err := c.Client.Delete(ctx, key)
   820  	return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   821  }
   822  
   823  // PutCaptureInfo put capture info into etcd.
   824  func (c CDCEtcdClient) PutCaptureInfo(ctx context.Context, info *model.CaptureInfo, leaseID clientv3.LeaseID) error {
   825  	data, err := info.Marshal()
   826  	if err != nil {
   827  		return errors.Trace(err)
   828  	}
   829  
   830  	key := GetEtcdKeyCaptureInfo(info.ID)
   831  	_, err = c.Client.Put(ctx, key, string(data), clientv3.WithLease(leaseID))
   832  	return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   833  }
   834  
   835  // DeleteCaptureInfo delete capture info from etcd.
   836  func (c CDCEtcdClient) DeleteCaptureInfo(ctx context.Context, id string) error {
   837  	key := GetEtcdKeyCaptureInfo(id)
   838  	_, err := c.Client.Delete(ctx, key)
   839  	return cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   840  }
   841  
   842  // GetCaptureInfo get capture info from etcd.
   843  // return errCaptureNotExist if the capture not exists.
   844  func (c CDCEtcdClient) GetCaptureInfo(ctx context.Context, id string) (info *model.CaptureInfo, err error) {
   845  	key := GetEtcdKeyCaptureInfo(id)
   846  
   847  	resp, err := c.Client.Get(ctx, key)
   848  	if err != nil {
   849  		return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   850  	}
   851  
   852  	if len(resp.Kvs) == 0 {
   853  		return nil, cerror.ErrCaptureNotExist.GenWithStackByArgs(key)
   854  	}
   855  
   856  	info = new(model.CaptureInfo)
   857  	err = info.Unmarshal(resp.Kvs[0].Value)
   858  	if err != nil {
   859  		return nil, errors.Trace(err)
   860  	}
   861  
   862  	return
   863  }
   864  
   865  // GetOwnerID returns the owner id by querying etcd
   866  func (c CDCEtcdClient) GetOwnerID(ctx context.Context, key string) (string, error) {
   867  	resp, err := c.Client.Get(ctx, key, clientv3.WithFirstCreate()...)
   868  	if err != nil {
   869  		return "", cerror.WrapError(cerror.ErrPDEtcdAPIError, err)
   870  	}
   871  	if len(resp.Kvs) == 0 {
   872  		return "", concurrency.ErrElectionNoLeader
   873  	}
   874  	return string(resp.Kvs[0].Value), nil
   875  }
   876  
   877  // LeaseGuardDeleteTaskStatus is a wrapper to DeleteTaskStatus,
   878  // with a context restricted by lease TTL.
   879  func (c CDCEtcdClient) LeaseGuardDeleteTaskStatus(
   880  	ctx context.Context,
   881  	cfID string,
   882  	captureID string,
   883  	leaseID clientv3.LeaseID,
   884  ) error {
   885  	ctx, cancel, err := c.contextWithSafeLease(ctx, leaseID)
   886  	if err != nil {
   887  		return errors.Trace(err)
   888  	}
   889  	defer cancel()
   890  	return c.DeleteTaskStatus(ctx, cfID, captureID)
   891  }
   892  
   893  // LeaseGuardDeleteTaskPosition is a wrapper to DeleteTaskPosition,
   894  // with a context restricted by lease TTL.
   895  func (c CDCEtcdClient) LeaseGuardDeleteTaskPosition(
   896  	ctx context.Context,
   897  	cfID string,
   898  	captureID string,
   899  	leaseID clientv3.LeaseID,
   900  ) error {
   901  	ctx, cancel, err := c.contextWithSafeLease(ctx, leaseID)
   902  	if err != nil {
   903  		return errors.Trace(err)
   904  	}
   905  	defer cancel()
   906  	return c.DeleteTaskPosition(ctx, cfID, captureID)
   907  }
   908  
   909  // LeaseGuardDeleteTaskWorkload is a wrapper to DeleteTaskWorkload,
   910  // with a context restricted by lease TTL.
   911  func (c CDCEtcdClient) LeaseGuardDeleteTaskWorkload(
   912  	ctx context.Context,
   913  	cfID string,
   914  	captureID string,
   915  	leaseID clientv3.LeaseID,
   916  ) error {
   917  	ctx, cancel, err := c.contextWithSafeLease(ctx, leaseID)
   918  	if err != nil {
   919  		return errors.Trace(err)
   920  	}
   921  	defer cancel()
   922  	return c.DeleteTaskWorkload(ctx, cfID, captureID)
   923  }
   924  
   925  // LeaseGuardSaveChangeFeedInfo is a wrapper to SaveChangeFeedInfo,
   926  // with a context restricted by lease TTL.
   927  func (c CDCEtcdClient) LeaseGuardSaveChangeFeedInfo(
   928  	ctx context.Context,
   929  	info *model.ChangeFeedInfo,
   930  	changefeedID string,
   931  	leaseID clientv3.LeaseID,
   932  ) error {
   933  	ctx, cancel, err := c.contextWithSafeLease(ctx, leaseID)
   934  	if err != nil {
   935  		return errors.Trace(err)
   936  	}
   937  	defer cancel()
   938  	return c.SaveChangeFeedInfo(ctx, info, changefeedID)
   939  }
   940  
   941  // LeaseGuardDeleteChangeFeedInfo is a wrapper to DeleteChangeFeedInfo,
   942  // with a context restricted by lease TTL.
   943  func (c CDCEtcdClient) LeaseGuardDeleteChangeFeedInfo(
   944  	ctx context.Context,
   945  	changefeedID string,
   946  	leaseID clientv3.LeaseID,
   947  ) error {
   948  	ctx, cancel, err := c.contextWithSafeLease(ctx, leaseID)
   949  	if err != nil {
   950  		return errors.Trace(err)
   951  	}
   952  	defer cancel()
   953  	return c.DeleteChangeFeedInfo(ctx, changefeedID)
   954  }
   955  
   956  // LeaseGuardRemoveChangeFeedStatus is a wrapper to RemoveChangeFeedStatus,
   957  // with a context restricted by lease TTL.
   958  func (c CDCEtcdClient) LeaseGuardRemoveChangeFeedStatus(
   959  	ctx context.Context,
   960  	changefeedID string,
   961  	leaseID clientv3.LeaseID,
   962  ) error {
   963  	ctx, cancel, err := c.contextWithSafeLease(ctx, leaseID)
   964  	if err != nil {
   965  		return errors.Trace(err)
   966  	}
   967  	defer cancel()
   968  	return c.RemoveChangeFeedStatus(ctx, changefeedID)
   969  }
   970  
   971  // LeaseGuardPutChangeFeedStatus is a wrapper to PutChangeFeedStatus,
   972  // with a context restricted by lease TTL.
   973  func (c CDCEtcdClient) LeaseGuardPutChangeFeedStatus(
   974  	ctx context.Context,
   975  	changefeedID string,
   976  	status *model.ChangeFeedStatus,
   977  	leaseID clientv3.LeaseID,
   978  ) error {
   979  	ctx, cancel, err := c.contextWithSafeLease(ctx, leaseID)
   980  	if err != nil {
   981  		return errors.Trace(err)
   982  	}
   983  	defer cancel()
   984  	return c.PutChangeFeedStatus(ctx, changefeedID, status)
   985  }
   986  
   987  // LeaseGuardRemoveAllTaskStatus wraps RemoveAllTaskStatus,
   988  // with a context restricted by lease TTL.
   989  func (c CDCEtcdClient) LeaseGuardRemoveAllTaskStatus(
   990  	ctx context.Context,
   991  	changefeedID string,
   992  	leaseID clientv3.LeaseID,
   993  ) error {
   994  	ctx, cancel, err := c.contextWithSafeLease(ctx, leaseID)
   995  	if err != nil {
   996  		return errors.Trace(err)
   997  	}
   998  	defer cancel()
   999  	return c.RemoveAllTaskStatus(ctx, changefeedID)
  1000  }
  1001  
  1002  // LeaseGuardRemoveAllTaskPositions wraps RemoveAllTaskPositions with a context restricted by lease TTL.
  1003  func (c CDCEtcdClient) LeaseGuardRemoveAllTaskPositions(
  1004  	ctx context.Context,
  1005  	changefeedID string,
  1006  	leaseID clientv3.LeaseID,
  1007  ) error {
  1008  	ctx, cancel, err := c.contextWithSafeLease(ctx, leaseID)
  1009  	if err != nil {
  1010  		return errors.Trace(err)
  1011  	}
  1012  	defer cancel()
  1013  	return c.RemoveAllTaskPositions(ctx, changefeedID)
  1014  }
  1015  
  1016  // LeaseGuardPutAllChangeFeedStatus wraps PutAllChangeFeedStatus with a context restricted by lease TTL.
  1017  func (c CDCEtcdClient) LeaseGuardPutAllChangeFeedStatus(
  1018  	ctx context.Context,
  1019  	infos map[model.ChangeFeedID]*model.ChangeFeedStatus,
  1020  	leaseID clientv3.LeaseID,
  1021  ) error {
  1022  	ctx, cancel, err := c.contextWithSafeLease(ctx, leaseID)
  1023  	if err != nil {
  1024  		return errors.Trace(err)
  1025  	}
  1026  	defer cancel()
  1027  	return c.PutAllChangeFeedStatus(ctx, infos)
  1028  }
  1029  
  1030  // LeaseGuardAtomicPutTaskStatus puts task status into etcd atomically.
  1031  func (c CDCEtcdClient) LeaseGuardAtomicPutTaskStatus(
  1032  	ctx context.Context,
  1033  	changefeedID string,
  1034  	captureID string,
  1035  	leaseID clientv3.LeaseID,
  1036  	updateFuncs ...UpdateTaskStatusFunc,
  1037  ) (*model.TaskStatus, int64, error) {
  1038  	ctx, cancel, err := c.contextWithSafeLease(ctx, leaseID)
  1039  	if err != nil {
  1040  		return nil, 0, errors.Trace(err)
  1041  	}
  1042  	defer cancel()
  1043  	return c.AtomicPutTaskStatus(ctx, changefeedID, captureID, updateFuncs...)
  1044  }