go.etcd.io/etcd@v3.3.27+incompatible/discovery/discovery.go (about) 1 // Copyright 2015 The etcd Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package discovery provides an implementation of the cluster discovery that 16 // is used by etcd. 17 package discovery 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "math" 24 "net/http" 25 "net/url" 26 "path" 27 "sort" 28 "strconv" 29 "strings" 30 "time" 31 32 "github.com/coreos/etcd/client" 33 "github.com/coreos/etcd/pkg/transport" 34 "github.com/coreos/etcd/pkg/types" 35 36 "github.com/coreos/pkg/capnslog" 37 "github.com/jonboulle/clockwork" 38 ) 39 40 var ( 41 plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "discovery") 42 43 ErrInvalidURL = errors.New("discovery: invalid URL") 44 ErrBadSizeKey = errors.New("discovery: size key is bad") 45 ErrSizeNotFound = errors.New("discovery: size key not found") 46 ErrTokenNotFound = errors.New("discovery: token not found") 47 ErrDuplicateID = errors.New("discovery: found duplicate id") 48 ErrDuplicateName = errors.New("discovery: found duplicate name") 49 ErrFullCluster = errors.New("discovery: cluster is full") 50 ErrTooManyRetries = errors.New("discovery: too many retries") 51 ErrBadDiscoveryEndpoint = errors.New("discovery: bad discovery endpoint") 52 ) 53 54 var ( 55 // Number of retries discovery will attempt before giving up and erroring out. 56 nRetries = uint(math.MaxUint32) 57 maxExpoentialRetries = uint(8) 58 ) 59 60 // JoinCluster will connect to the discovery service at the given url, and 61 // register the server represented by the given id and config to the cluster 62 func JoinCluster(durl, dproxyurl string, id types.ID, config string) (string, error) { 63 d, err := newDiscovery(durl, dproxyurl, id) 64 if err != nil { 65 return "", err 66 } 67 return d.joinCluster(config) 68 } 69 70 // GetCluster will connect to the discovery service at the given url and 71 // retrieve a string describing the cluster 72 func GetCluster(durl, dproxyurl string) (string, error) { 73 d, err := newDiscovery(durl, dproxyurl, 0) 74 if err != nil { 75 return "", err 76 } 77 return d.getCluster() 78 } 79 80 type discovery struct { 81 cluster string 82 id types.ID 83 c client.KeysAPI 84 retries uint 85 url *url.URL 86 87 clock clockwork.Clock 88 } 89 90 // newProxyFunc builds a proxy function from the given string, which should 91 // represent a URL that can be used as a proxy. It performs basic 92 // sanitization of the URL and returns any error encountered. 93 func newProxyFunc(proxy string) (func(*http.Request) (*url.URL, error), error) { 94 if proxy == "" { 95 return nil, nil 96 } 97 // Do a small amount of URL sanitization to help the user 98 // Derived from net/http.ProxyFromEnvironment 99 proxyURL, err := url.Parse(proxy) 100 if err != nil || !strings.HasPrefix(proxyURL.Scheme, "http") { 101 // proxy was bogus. Try prepending "http://" to it and 102 // see if that parses correctly. If not, we ignore the 103 // error and complain about the original one 104 var err2 error 105 proxyURL, err2 = url.Parse("http://" + proxy) 106 if err2 == nil { 107 err = nil 108 } 109 } 110 if err != nil { 111 return nil, fmt.Errorf("invalid proxy address %q: %v", proxy, err) 112 } 113 114 plog.Infof("using proxy %q", proxyURL.String()) 115 return http.ProxyURL(proxyURL), nil 116 } 117 118 func newDiscovery(durl, dproxyurl string, id types.ID) (*discovery, error) { 119 u, err := url.Parse(durl) 120 if err != nil { 121 return nil, err 122 } 123 token := u.Path 124 u.Path = "" 125 pf, err := newProxyFunc(dproxyurl) 126 if err != nil { 127 return nil, err 128 } 129 130 // TODO: add ResponseHeaderTimeout back when watch on discovery service writes header early 131 tr, err := transport.NewTransport(transport.TLSInfo{}, 30*time.Second) 132 if err != nil { 133 return nil, err 134 } 135 tr.Proxy = pf 136 cfg := client.Config{ 137 Transport: tr, 138 Endpoints: []string{u.String()}, 139 } 140 c, err := client.New(cfg) 141 if err != nil { 142 return nil, err 143 } 144 dc := client.NewKeysAPIWithPrefix(c, "") 145 return &discovery{ 146 cluster: token, 147 c: dc, 148 id: id, 149 url: u, 150 clock: clockwork.NewRealClock(), 151 }, nil 152 } 153 154 func (d *discovery) joinCluster(config string) (string, error) { 155 // fast path: if the cluster is full, return the error 156 // do not need to register to the cluster in this case. 157 if _, _, _, err := d.checkCluster(); err != nil { 158 return "", err 159 } 160 161 if err := d.createSelf(config); err != nil { 162 // Fails, even on a timeout, if createSelf times out. 163 // TODO(barakmich): Retrying the same node might want to succeed here 164 // (ie, createSelf should be idempotent for discovery). 165 return "", err 166 } 167 168 nodes, size, index, err := d.checkCluster() 169 if err != nil { 170 return "", err 171 } 172 173 all, err := d.waitNodes(nodes, size, index) 174 if err != nil { 175 return "", err 176 } 177 178 return nodesToCluster(all, size) 179 } 180 181 func (d *discovery) getCluster() (string, error) { 182 nodes, size, index, err := d.checkCluster() 183 if err != nil { 184 if err == ErrFullCluster { 185 return nodesToCluster(nodes, size) 186 } 187 return "", err 188 } 189 190 all, err := d.waitNodes(nodes, size, index) 191 if err != nil { 192 return "", err 193 } 194 return nodesToCluster(all, size) 195 } 196 197 func (d *discovery) createSelf(contents string) error { 198 ctx, cancel := context.WithTimeout(context.Background(), client.DefaultRequestTimeout) 199 resp, err := d.c.Create(ctx, d.selfKey(), contents) 200 cancel() 201 if err != nil { 202 if eerr, ok := err.(client.Error); ok && eerr.Code == client.ErrorCodeNodeExist { 203 return ErrDuplicateID 204 } 205 return err 206 } 207 208 // ensure self appears on the server we connected to 209 w := d.c.Watcher(d.selfKey(), &client.WatcherOptions{AfterIndex: resp.Node.CreatedIndex - 1}) 210 _, err = w.Next(context.Background()) 211 return err 212 } 213 214 func (d *discovery) checkCluster() ([]*client.Node, uint64, uint64, error) { 215 configKey := path.Join("/", d.cluster, "_config") 216 ctx, cancel := context.WithTimeout(context.Background(), client.DefaultRequestTimeout) 217 // find cluster size 218 resp, err := d.c.Get(ctx, path.Join(configKey, "size"), nil) 219 cancel() 220 if err != nil { 221 if eerr, ok := err.(*client.Error); ok && eerr.Code == client.ErrorCodeKeyNotFound { 222 return nil, 0, 0, ErrSizeNotFound 223 } 224 if err == client.ErrInvalidJSON { 225 return nil, 0, 0, ErrBadDiscoveryEndpoint 226 } 227 if ce, ok := err.(*client.ClusterError); ok { 228 plog.Error(ce.Detail()) 229 return d.checkClusterRetry() 230 } 231 return nil, 0, 0, err 232 } 233 size, err := strconv.ParseUint(resp.Node.Value, 10, 0) 234 if err != nil { 235 return nil, 0, 0, ErrBadSizeKey 236 } 237 238 ctx, cancel = context.WithTimeout(context.Background(), client.DefaultRequestTimeout) 239 resp, err = d.c.Get(ctx, d.cluster, nil) 240 cancel() 241 if err != nil { 242 if ce, ok := err.(*client.ClusterError); ok { 243 plog.Error(ce.Detail()) 244 return d.checkClusterRetry() 245 } 246 return nil, 0, 0, err 247 } 248 var nodes []*client.Node 249 // append non-config keys to nodes 250 for _, n := range resp.Node.Nodes { 251 if !(path.Base(n.Key) == path.Base(configKey)) { 252 nodes = append(nodes, n) 253 } 254 } 255 256 snodes := sortableNodes{nodes} 257 sort.Sort(snodes) 258 259 // find self position 260 for i := range nodes { 261 if path.Base(nodes[i].Key) == path.Base(d.selfKey()) { 262 break 263 } 264 if uint64(i) >= size-1 { 265 return nodes[:size], size, resp.Index, ErrFullCluster 266 } 267 } 268 return nodes, size, resp.Index, nil 269 } 270 271 func (d *discovery) logAndBackoffForRetry(step string) { 272 d.retries++ 273 // logAndBackoffForRetry stops exponential backoff when the retries are more than maxExpoentialRetries and is set to a constant backoff afterward. 274 retries := d.retries 275 if retries > maxExpoentialRetries { 276 retries = maxExpoentialRetries 277 } 278 retryTimeInSecond := time.Duration(0x1<<retries) * time.Second 279 plog.Infof("%s: error connecting to %s, retrying in %s", step, d.url, retryTimeInSecond) 280 d.clock.Sleep(retryTimeInSecond) 281 } 282 283 func (d *discovery) checkClusterRetry() ([]*client.Node, uint64, uint64, error) { 284 if d.retries < nRetries { 285 d.logAndBackoffForRetry("cluster status check") 286 return d.checkCluster() 287 } 288 return nil, 0, 0, ErrTooManyRetries 289 } 290 291 func (d *discovery) waitNodesRetry() ([]*client.Node, error) { 292 if d.retries < nRetries { 293 d.logAndBackoffForRetry("waiting for other nodes") 294 nodes, n, index, err := d.checkCluster() 295 if err != nil { 296 return nil, err 297 } 298 return d.waitNodes(nodes, n, index) 299 } 300 return nil, ErrTooManyRetries 301 } 302 303 func (d *discovery) waitNodes(nodes []*client.Node, size uint64, index uint64) ([]*client.Node, error) { 304 if uint64(len(nodes)) > size { 305 nodes = nodes[:size] 306 } 307 // watch from the next index 308 w := d.c.Watcher(d.cluster, &client.WatcherOptions{AfterIndex: index, Recursive: true}) 309 all := make([]*client.Node, len(nodes)) 310 copy(all, nodes) 311 for _, n := range all { 312 if path.Base(n.Key) == path.Base(d.selfKey()) { 313 plog.Noticef("found self %s in the cluster", path.Base(d.selfKey())) 314 } else { 315 plog.Noticef("found peer %s in the cluster", path.Base(n.Key)) 316 } 317 } 318 319 // wait for others 320 for uint64(len(all)) < size { 321 plog.Noticef("found %d peer(s), waiting for %d more", len(all), int(size-uint64(len(all)))) 322 resp, err := w.Next(context.Background()) 323 if err != nil { 324 if ce, ok := err.(*client.ClusterError); ok { 325 plog.Error(ce.Detail()) 326 return d.waitNodesRetry() 327 } 328 return nil, err 329 } 330 plog.Noticef("found peer %s in the cluster", path.Base(resp.Node.Key)) 331 all = append(all, resp.Node) 332 } 333 plog.Noticef("found %d needed peer(s)", len(all)) 334 return all, nil 335 } 336 337 func (d *discovery) selfKey() string { 338 return path.Join("/", d.cluster, d.id.String()) 339 } 340 341 func nodesToCluster(ns []*client.Node, size uint64) (string, error) { 342 s := make([]string, len(ns)) 343 for i, n := range ns { 344 s[i] = n.Value 345 } 346 us := strings.Join(s, ",") 347 m, err := types.NewURLsMap(us) 348 if err != nil { 349 return us, ErrInvalidURL 350 } 351 if uint64(m.Len()) != size { 352 return us, ErrDuplicateName 353 } 354 return us, nil 355 } 356 357 type sortableNodes struct{ Nodes []*client.Node } 358 359 func (ns sortableNodes) Len() int { return len(ns.Nodes) } 360 func (ns sortableNodes) Less(i, j int) bool { 361 return ns.Nodes[i].CreatedIndex < ns.Nodes[j].CreatedIndex 362 } 363 func (ns sortableNodes) Swap(i, j int) { ns.Nodes[i], ns.Nodes[j] = ns.Nodes[j], ns.Nodes[i] }