github.com/pachyderm/pachyderm@v1.13.4/src/client/pkg/discovery/etcd_client.go (about) 1 package discovery 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "net/http" 7 "strings" 8 "time" 9 10 "github.com/coreos/go-etcd/etcd" 11 "github.com/sirupsen/logrus" 12 13 "github.com/pachyderm/pachyderm/src/client/pkg/errors" 14 ) 15 16 type etcdClient struct { 17 client *etcd.Client 18 } 19 20 // customCheckRetry is a fork of etcd's DefaultCheckRetry, except that it issues 21 // more retries before giving up. Because Pachyderm often starts before etcd is 22 // ready, retrying Pachd's connection to etcd in a tight loop (<1s) is often 23 // much faster than waiting for kubernetes to restart the pachd pod. 24 func customCheckRetry(cluster *etcd.Cluster, numReqs int, lastResp http.Response, 25 err error) error { 26 // Retry for 5 minutes, unless the cluster is super huge 27 maxRetries := 2 * len(cluster.Machines) 28 if 600 > maxRetries { 29 maxRetries = 600 30 } 31 if numReqs > maxRetries { 32 errStr := fmt.Sprintf("failed to propose on members %v [last error: %v]", cluster.Machines, err) 33 return &etcd.EtcdError{ 34 ErrorCode: etcd.ErrCodeEtcdNotReachable, 35 Message: "All the given peers are not reachable", 36 Cause: errStr, 37 Index: 0, 38 } 39 } 40 41 if lastResp.StatusCode == 0 { 42 // always retry if it failed to get a response 43 return nil 44 } 45 if lastResp.StatusCode != http.StatusInternalServerError { 46 // The status code indicates that etcd is no longer in leader election. 47 // Something is wrong 48 body := []byte("nil") 49 if lastResp.Body != nil { 50 if b, err := ioutil.ReadAll(lastResp.Body); err == nil { 51 body = b 52 } 53 } 54 errStr := fmt.Sprintf("unhandled http status [%s] with body [%s]", http.StatusText(lastResp.StatusCode), body) 55 return &etcd.EtcdError{ 56 ErrorCode: etcd.ErrCodeUnhandledHTTPStatus, 57 Message: "Unhandled HTTP Status", 58 Cause: errStr, 59 Index: 0, 60 } 61 } 62 63 // sleep some time and expect leader election finish 64 time.Sleep(time.Millisecond * 500) 65 logrus.Warnf("bad response status code from etcd: %d", lastResp.StatusCode) 66 return nil 67 } 68 69 func newEtcdClient(addresses ...string) *etcdClient { 70 client := etcd.NewClient(addresses) 71 client.CheckRetry = customCheckRetry 72 return &etcdClient{client} 73 } 74 75 func (c *etcdClient) Close() error { 76 c.client.Close() 77 return nil 78 } 79 80 func (c *etcdClient) Get(key string) (string, error) { 81 response, err := c.client.Get(key, false, false) 82 if err != nil { 83 return "", err 84 } 85 return response.Node.Value, nil 86 } 87 88 func (c *etcdClient) GetAll(key string) (map[string]string, error) { 89 response, err := c.client.Get(key, false, true) 90 result := make(map[string]string) 91 if err != nil { 92 if strings.HasPrefix(err.Error(), "100: Key not found") { 93 return result, nil 94 } 95 return nil, err 96 } 97 nodeToMap(response.Node, result) 98 return result, nil 99 } 100 101 func (c *etcdClient) WatchAll(key string, cancel chan bool, callBack func(map[string]string) error) error { 102 for { 103 if err := c.watchAllWithoutRetry(key, cancel, callBack); err != nil { 104 etcdErr := &etcd.EtcdError{} 105 if errors.As(err, &etcdErr) { 106 if etcdErr.ErrorCode == 401 { 107 continue 108 } 109 if etcdErr.ErrorCode == 501 { 110 continue 111 } 112 } 113 114 return err 115 } 116 } 117 } 118 119 func (c *etcdClient) Set(key string, value string, ttl uint64) error { 120 _, err := c.client.Set(key, value, ttl) 121 if err != nil { 122 return err 123 } 124 return nil 125 } 126 127 func (c *etcdClient) Create(key string, value string, ttl uint64) error { 128 _, err := c.client.Create(key, value, ttl) 129 if err != nil { 130 return err 131 } 132 return nil 133 } 134 135 func (c *etcdClient) Delete(key string) error { 136 _, err := c.client.Delete(key, false) 137 if err != nil { 138 return err 139 } 140 return nil 141 } 142 143 func (c *etcdClient) CheckAndSet(key string, value string, ttl uint64, oldValue string) error { 144 var err error 145 if oldValue == "" { 146 _, err = c.client.Create(key, value, ttl) 147 } else { 148 _, err = c.client.CompareAndSwap(key, value, ttl, oldValue, 0) 149 } 150 if err != nil { 151 return err 152 } 153 return nil 154 } 155 156 // nodeToMap translates the contents of a node into a map 157 // nodeToMap can be called on the same map with successive results from watch 158 // to accumulate a value 159 // nodeToMap returns true if out was modified 160 func nodeToMap(node *etcd.Node, out map[string]string) bool { 161 key := strings.TrimPrefix(node.Key, "/") 162 if !node.Dir { 163 if node.Value == "" { 164 if _, ok := out[key]; ok { 165 delete(out, key) 166 return true 167 } 168 return false 169 } 170 if value, ok := out[key]; !ok || value != node.Value { 171 out[key] = node.Value 172 return true 173 } 174 return false 175 } 176 changed := false 177 for _, node := range node.Nodes { 178 changed = nodeToMap(node, out) || changed 179 } 180 return changed 181 } 182 183 func (c *etcdClient) watchAllWithoutRetry(key string, cancel chan bool, callBack func(map[string]string) error) error { 184 var waitIndex uint64 = 1 185 value := make(map[string]string) 186 // First get the starting value of the key 187 response, err := c.client.Get(key, false, false) 188 if err != nil { 189 if strings.HasPrefix(err.Error(), "100: Key not found") { 190 err = callBack(nil) 191 if err != nil { 192 return err 193 } 194 } else { 195 return err 196 } 197 } else { 198 waitIndex = response.EtcdIndex + 1 199 if nodeToMap(response.Node, value) { 200 err = callBack(value) 201 if err != nil { 202 return err 203 } 204 } 205 } 206 for { 207 response, err := c.client.Watch(key, waitIndex, true, nil, cancel) 208 if err != nil { 209 if errors.Is(err, etcd.ErrWatchStoppedByUser) { 210 return ErrCancelled 211 } 212 return err 213 } 214 waitIndex = response.EtcdIndex + 1 215 if nodeToMap(response.Node, value) { 216 err = callBack(value) 217 if err != nil { 218 return err 219 } 220 } 221 } 222 }