github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/pkg/etcd/client.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package etcd
    15  
    16  import (
    17  	"context"
    18  
    19  	"github.com/pingcap/errors"
    20  	"github.com/pingcap/log"
    21  	cerrors "github.com/pingcap/ticdc/pkg/errors"
    22  	"github.com/pingcap/ticdc/pkg/retry"
    23  	"github.com/prometheus/client_golang/prometheus"
    24  	"go.etcd.io/etcd/clientv3"
    25  	"go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
    26  	"go.uber.org/zap"
    27  	"google.golang.org/grpc/codes"
    28  )
    29  
    30  // etcd operation names
    31  const (
    32  	EtcdPut    = "Put"
    33  	EtcdGet    = "Get"
    34  	EtcdTxn    = "Txn"
    35  	EtcdDel    = "Del"
    36  	EtcdGrant  = "Grant"
    37  	EtcdRevoke = "Revoke"
    38  )
    39  
    40  const (
    41  	backoffBaseDelayInMs = 500
    42  	// in previous/backoff retry pkg, the DefaultMaxInterval = 60 * time.Second
    43  	backoffMaxDelayInMs = 60 * 1000
    44  )
    45  
    46  // set to var instead of const for mocking the value to speedup test
    47  var maxTries int64 = 8
    48  
    49  // Client is a simple wrapper that adds retry to etcd RPC
    50  type Client struct {
    51  	cli     *clientv3.Client
    52  	metrics map[string]prometheus.Counter
    53  }
    54  
    55  // Wrap warps a clientv3.Client that provides etcd APIs required by TiCDC.
    56  func Wrap(cli *clientv3.Client, metrics map[string]prometheus.Counter) *Client {
    57  	return &Client{cli: cli, metrics: metrics}
    58  }
    59  
    60  // Unwrap returns a clientv3.Client
    61  func (c *Client) Unwrap() *clientv3.Client {
    62  	return c.cli
    63  }
    64  
    65  func retryRPC(rpcName string, metric prometheus.Counter, etcdRPC func() error) error {
    66  	// By default, PD etcd sets [3s, 6s) for election timeout.
    67  	// Some rpc could fail due to etcd errors, like "proposal dropped".
    68  	// Retry at least two election timeout to handle the case that two PDs restarted
    69  	// (the first election maybe failed).
    70  	// 16s = \sum_{n=0}^{6} 0.5*1.5^n
    71  	return retry.Do(context.Background(), func() error {
    72  		err := etcdRPC()
    73  		if err != nil && errors.Cause(err) != context.Canceled {
    74  			log.Warn("etcd RPC failed", zap.String("RPC", rpcName), zap.Error(err))
    75  		}
    76  		if metric != nil {
    77  			metric.Inc()
    78  		}
    79  		return err
    80  	}, retry.WithBackoffBaseDelay(backoffBaseDelayInMs), retry.WithBackoffMaxDelay(backoffMaxDelayInMs), retry.WithMaxTries(maxTries), retry.WithIsRetryableErr(isRetryableError(rpcName)))
    81  }
    82  
    83  // Put delegates request to clientv3.KV.Put
    84  func (c *Client) Put(ctx context.Context, key, val string, opts ...clientv3.OpOption) (resp *clientv3.PutResponse, err error) {
    85  	err = retryRPC(EtcdPut, c.metrics[EtcdPut], func() error {
    86  		var inErr error
    87  		resp, inErr = c.cli.Put(ctx, key, val, opts...)
    88  		return inErr
    89  	})
    90  	return
    91  }
    92  
    93  // Get delegates request to clientv3.KV.Get
    94  func (c *Client) Get(ctx context.Context, key string, opts ...clientv3.OpOption) (resp *clientv3.GetResponse, err error) {
    95  	err = retryRPC(EtcdGet, c.metrics[EtcdGet], func() error {
    96  		var inErr error
    97  		resp, inErr = c.cli.Get(ctx, key, opts...)
    98  		return inErr
    99  	})
   100  	return
   101  }
   102  
   103  // Delete delegates request to clientv3.KV.Delete
   104  func (c *Client) Delete(ctx context.Context, key string, opts ...clientv3.OpOption) (resp *clientv3.DeleteResponse, err error) {
   105  	if metric, ok := c.metrics[EtcdTxn]; ok {
   106  		metric.Inc()
   107  	}
   108  	// We don't retry on delete operatoin. It's dangerous.
   109  	return c.cli.Delete(ctx, key, opts...)
   110  }
   111  
   112  // Txn delegates request to clientv3.KV.Txn
   113  func (c *Client) Txn(ctx context.Context) clientv3.Txn {
   114  	if metric, ok := c.metrics[EtcdTxn]; ok {
   115  		metric.Inc()
   116  	}
   117  	return c.cli.Txn(ctx)
   118  }
   119  
   120  // Grant delegates request to clientv3.Lease.Grant
   121  func (c *Client) Grant(ctx context.Context, ttl int64) (resp *clientv3.LeaseGrantResponse, err error) {
   122  	err = retryRPC(EtcdGrant, c.metrics[EtcdGrant], func() error {
   123  		var inErr error
   124  		resp, inErr = c.cli.Grant(ctx, ttl)
   125  		return inErr
   126  	})
   127  	return
   128  }
   129  
   130  func isRetryableError(rpcName string) retry.IsRetryable {
   131  	return func(err error) bool {
   132  		if !cerrors.IsRetryableError(err) {
   133  			return false
   134  		}
   135  		if rpcName == EtcdRevoke {
   136  			if etcdErr, ok := err.(rpctypes.EtcdError); ok && etcdErr.Code() == codes.NotFound {
   137  				// it means the etcd lease is already expired or revoked
   138  				return false
   139  			}
   140  		}
   141  
   142  		return true
   143  	}
   144  }
   145  
   146  // Revoke delegates request to clientv3.Lease.Revoke
   147  func (c *Client) Revoke(ctx context.Context, id clientv3.LeaseID) (resp *clientv3.LeaseRevokeResponse, err error) {
   148  	err = retryRPC(EtcdRevoke, c.metrics[EtcdRevoke], func() error {
   149  		var inErr error
   150  		resp, inErr = c.cli.Revoke(ctx, id)
   151  		return inErr
   152  	})
   153  	return
   154  }
   155  
   156  // TimeToLive delegates request to clientv3.Lease.TimeToLive
   157  func (c *Client) TimeToLive(ctx context.Context, lease clientv3.LeaseID, opts ...clientv3.LeaseOption) (resp *clientv3.LeaseTimeToLiveResponse, err error) {
   158  	err = retryRPC(EtcdRevoke, c.metrics[EtcdRevoke], func() error {
   159  		var inErr error
   160  		resp, inErr = c.cli.TimeToLive(ctx, lease, opts...)
   161  		return inErr
   162  	})
   163  	return
   164  }
   165  
   166  // Watch delegates request to clientv3.Watcher.Watch
   167  func (c *Client) Watch(ctx context.Context, key string, opts ...clientv3.OpOption) clientv3.WatchChan {
   168  	return c.cli.Watch(ctx, key, opts...)
   169  }
   170  
   171  // RequestProgress requests a progress notify response be sent in all watch channels.
   172  func (c *Client) RequestProgress(ctx context.Context) error {
   173  	return c.cli.RequestProgress(ctx)
   174  }