github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/pkg/etcd/client.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package etcd 15 16 import ( 17 "context" 18 19 "github.com/pingcap/errors" 20 "github.com/pingcap/log" 21 cerrors "github.com/pingcap/ticdc/pkg/errors" 22 "github.com/pingcap/ticdc/pkg/retry" 23 "github.com/prometheus/client_golang/prometheus" 24 "go.etcd.io/etcd/clientv3" 25 "go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes" 26 "go.uber.org/zap" 27 "google.golang.org/grpc/codes" 28 ) 29 30 // etcd operation names 31 const ( 32 EtcdPut = "Put" 33 EtcdGet = "Get" 34 EtcdTxn = "Txn" 35 EtcdDel = "Del" 36 EtcdGrant = "Grant" 37 EtcdRevoke = "Revoke" 38 ) 39 40 const ( 41 backoffBaseDelayInMs = 500 42 // in previous/backoff retry pkg, the DefaultMaxInterval = 60 * time.Second 43 backoffMaxDelayInMs = 60 * 1000 44 ) 45 46 // set to var instead of const for mocking the value to speedup test 47 var maxTries int64 = 8 48 49 // Client is a simple wrapper that adds retry to etcd RPC 50 type Client struct { 51 cli *clientv3.Client 52 metrics map[string]prometheus.Counter 53 } 54 55 // Wrap warps a clientv3.Client that provides etcd APIs required by TiCDC. 56 func Wrap(cli *clientv3.Client, metrics map[string]prometheus.Counter) *Client { 57 return &Client{cli: cli, metrics: metrics} 58 } 59 60 // Unwrap returns a clientv3.Client 61 func (c *Client) Unwrap() *clientv3.Client { 62 return c.cli 63 } 64 65 func retryRPC(rpcName string, metric prometheus.Counter, etcdRPC func() error) error { 66 // By default, PD etcd sets [3s, 6s) for election timeout. 67 // Some rpc could fail due to etcd errors, like "proposal dropped". 68 // Retry at least two election timeout to handle the case that two PDs restarted 69 // (the first election maybe failed). 70 // 16s = \sum_{n=0}^{6} 0.5*1.5^n 71 return retry.Do(context.Background(), func() error { 72 err := etcdRPC() 73 if err != nil && errors.Cause(err) != context.Canceled { 74 log.Warn("etcd RPC failed", zap.String("RPC", rpcName), zap.Error(err)) 75 } 76 if metric != nil { 77 metric.Inc() 78 } 79 return err 80 }, retry.WithBackoffBaseDelay(backoffBaseDelayInMs), retry.WithBackoffMaxDelay(backoffMaxDelayInMs), retry.WithMaxTries(maxTries), retry.WithIsRetryableErr(isRetryableError(rpcName))) 81 } 82 83 // Put delegates request to clientv3.KV.Put 84 func (c *Client) Put(ctx context.Context, key, val string, opts ...clientv3.OpOption) (resp *clientv3.PutResponse, err error) { 85 err = retryRPC(EtcdPut, c.metrics[EtcdPut], func() error { 86 var inErr error 87 resp, inErr = c.cli.Put(ctx, key, val, opts...) 88 return inErr 89 }) 90 return 91 } 92 93 // Get delegates request to clientv3.KV.Get 94 func (c *Client) Get(ctx context.Context, key string, opts ...clientv3.OpOption) (resp *clientv3.GetResponse, err error) { 95 err = retryRPC(EtcdGet, c.metrics[EtcdGet], func() error { 96 var inErr error 97 resp, inErr = c.cli.Get(ctx, key, opts...) 98 return inErr 99 }) 100 return 101 } 102 103 // Delete delegates request to clientv3.KV.Delete 104 func (c *Client) Delete(ctx context.Context, key string, opts ...clientv3.OpOption) (resp *clientv3.DeleteResponse, err error) { 105 if metric, ok := c.metrics[EtcdTxn]; ok { 106 metric.Inc() 107 } 108 // We don't retry on delete operatoin. It's dangerous. 109 return c.cli.Delete(ctx, key, opts...) 110 } 111 112 // Txn delegates request to clientv3.KV.Txn 113 func (c *Client) Txn(ctx context.Context) clientv3.Txn { 114 if metric, ok := c.metrics[EtcdTxn]; ok { 115 metric.Inc() 116 } 117 return c.cli.Txn(ctx) 118 } 119 120 // Grant delegates request to clientv3.Lease.Grant 121 func (c *Client) Grant(ctx context.Context, ttl int64) (resp *clientv3.LeaseGrantResponse, err error) { 122 err = retryRPC(EtcdGrant, c.metrics[EtcdGrant], func() error { 123 var inErr error 124 resp, inErr = c.cli.Grant(ctx, ttl) 125 return inErr 126 }) 127 return 128 } 129 130 func isRetryableError(rpcName string) retry.IsRetryable { 131 return func(err error) bool { 132 if !cerrors.IsRetryableError(err) { 133 return false 134 } 135 if rpcName == EtcdRevoke { 136 if etcdErr, ok := err.(rpctypes.EtcdError); ok && etcdErr.Code() == codes.NotFound { 137 // it means the etcd lease is already expired or revoked 138 return false 139 } 140 } 141 142 return true 143 } 144 } 145 146 // Revoke delegates request to clientv3.Lease.Revoke 147 func (c *Client) Revoke(ctx context.Context, id clientv3.LeaseID) (resp *clientv3.LeaseRevokeResponse, err error) { 148 err = retryRPC(EtcdRevoke, c.metrics[EtcdRevoke], func() error { 149 var inErr error 150 resp, inErr = c.cli.Revoke(ctx, id) 151 return inErr 152 }) 153 return 154 } 155 156 // TimeToLive delegates request to clientv3.Lease.TimeToLive 157 func (c *Client) TimeToLive(ctx context.Context, lease clientv3.LeaseID, opts ...clientv3.LeaseOption) (resp *clientv3.LeaseTimeToLiveResponse, err error) { 158 err = retryRPC(EtcdRevoke, c.metrics[EtcdRevoke], func() error { 159 var inErr error 160 resp, inErr = c.cli.TimeToLive(ctx, lease, opts...) 161 return inErr 162 }) 163 return 164 } 165 166 // Watch delegates request to clientv3.Watcher.Watch 167 func (c *Client) Watch(ctx context.Context, key string, opts ...clientv3.OpOption) clientv3.WatchChan { 168 return c.cli.Watch(ctx, key, opts...) 169 } 170 171 // RequestProgress requests a progress notify response be sent in all watch channels. 172 func (c *Client) RequestProgress(ctx context.Context) error { 173 return c.cli.RequestProgress(ctx) 174 }