github.com/looshlee/beatles@v0.0.0-20220727174639-742810ab631c/pkg/kvstore/etcd.go (about) 1 // Copyright 2016-2020 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kvstore 16 17 import ( 18 "bytes" 19 "context" 20 "crypto/tls" 21 "errors" 22 "fmt" 23 "io/ioutil" 24 "math/rand" 25 "net/url" 26 "os" 27 "strconv" 28 "strings" 29 "time" 30 31 "github.com/cilium/cilium/pkg/controller" 32 "github.com/cilium/cilium/pkg/defaults" 33 "github.com/cilium/cilium/pkg/lock" 34 "github.com/cilium/cilium/pkg/option" 35 "github.com/cilium/cilium/pkg/spanstat" 36 37 "github.com/hashicorp/go-version" 38 "github.com/sirupsen/logrus" 39 client "go.etcd.io/etcd/clientv3" 40 "go.etcd.io/etcd/clientv3/concurrency" 41 clientyaml "go.etcd.io/etcd/clientv3/yaml" 42 v3rpcErrors "go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes" 43 "go.etcd.io/etcd/pkg/tlsutil" 44 ctx "golang.org/x/net/context" 45 "golang.org/x/time/rate" 46 "sigs.k8s.io/yaml" 47 ) 48 49 const ( 50 // EtcdBackendName is the backend name for etcd 51 EtcdBackendName = "etcd" 52 53 EtcdAddrOption = "etcd.address" 54 isEtcdOperatorOption = "etcd.operator" 55 EtcdOptionConfig = "etcd.config" 56 57 // EtcdRateLimitOption specifies maximum kv operations per second 58 EtcdRateLimitOption = "etcd.qps" 59 ) 60 61 var ( 62 // ErrLockLeaseExpired is an error whenever the lease of the lock does not 63 // exist or it was expired. 64 ErrLockLeaseExpired = errors.New("transaction did not succeed: lock lease expired") 65 ) 66 67 func init() { 68 rand.Seed(time.Now().UnixNano()) 69 } 70 71 type etcdModule struct { 72 opts backendOptions 73 config *client.Config 74 } 75 76 var ( 77 // versionCheckTimeout is the time we wait trying to verify the version 78 // of an etcd endpoint. The timeout can be encountered on network 79 // connectivity problems. 80 versionCheckTimeout = 30 * time.Second 81 82 // statusCheckTimeout is the timeout when performing status checks with 83 // all etcd endpoints 84 statusCheckTimeout = 10 * time.Second 85 86 // initialConnectionTimeout is the timeout for the initial connection to 87 // the etcd server 88 initialConnectionTimeout = 15 * time.Minute 89 90 minRequiredVersion, _ = version.NewConstraint(">= 3.1.0") 91 92 // etcdDummyAddress can be overwritten from test invokers using ldflags 93 etcdDummyAddress = "http://127.0.0.1:4002" 94 95 etcdInstance = newEtcdModule() 96 ) 97 98 func EtcdDummyAddress() string { 99 return etcdDummyAddress 100 } 101 102 func newEtcdModule() backendModule { 103 return &etcdModule{ 104 opts: backendOptions{ 105 isEtcdOperatorOption: &backendOption{ 106 description: "if the configuration is setting up an etcd-operator", 107 }, 108 EtcdAddrOption: &backendOption{ 109 description: "Addresses of etcd cluster", 110 }, 111 EtcdOptionConfig: &backendOption{ 112 description: "Path to etcd configuration file", 113 }, 114 EtcdRateLimitOption: &backendOption{ 115 description: "Rate limit in kv store operations per second", 116 validate: func(v string) error { 117 _, err := strconv.Atoi(v) 118 return err 119 }, 120 }, 121 }, 122 } 123 } 124 125 func (e *etcdModule) createInstance() backendModule { 126 return newEtcdModule() 127 } 128 129 func (e *etcdModule) getName() string { 130 return EtcdBackendName 131 } 132 133 func (e *etcdModule) setConfigDummy() { 134 e.config = &client.Config{} 135 e.config.Endpoints = []string{etcdDummyAddress} 136 } 137 138 func (e *etcdModule) setConfig(opts map[string]string) error { 139 return setOpts(opts, e.opts) 140 } 141 142 func (e *etcdModule) setExtraConfig(opts *ExtraOptions) error { 143 if opts != nil && len(opts.DialOption) != 0 { 144 e.config = &client.Config{} 145 e.config.DialOptions = append(e.config.DialOptions, opts.DialOption...) 146 } 147 return nil 148 } 149 150 func (e *etcdModule) getConfig() map[string]string { 151 return getOpts(e.opts) 152 } 153 154 func (e *etcdModule) newClient(opts *ExtraOptions) (BackendOperations, chan error) { 155 errChan := make(chan error, 10) 156 157 endpointsOpt, endpointsSet := e.opts[EtcdAddrOption] 158 configPathOpt, configSet := e.opts[EtcdOptionConfig] 159 160 rateLimitOpt, rateLimitSet := e.opts[EtcdRateLimitOption] 161 162 rateLimit := defaults.KVstoreQPS 163 if rateLimitSet { 164 // error is discarded here because this option has validation 165 rateLimit, _ = strconv.Atoi(rateLimitOpt.value) 166 } 167 168 var configPath string 169 if configSet { 170 configPath = configPathOpt.value 171 } 172 if e.config == nil { 173 if !endpointsSet && !configSet { 174 errChan <- fmt.Errorf("invalid etcd configuration, %s or %s must be specified", EtcdOptionConfig, EtcdAddrOption) 175 close(errChan) 176 return nil, errChan 177 } 178 179 if endpointsOpt.value == "" && configPath == "" { 180 errChan <- fmt.Errorf("invalid etcd configuration, %s or %s must be specified", 181 EtcdOptionConfig, EtcdAddrOption) 182 close(errChan) 183 return nil, errChan 184 } 185 186 e.config = &client.Config{} 187 } 188 189 if e.config.Endpoints == nil && endpointsSet { 190 e.config.Endpoints = []string{endpointsOpt.value} 191 } 192 193 for { 194 // connectEtcdClient will close errChan when the connection attempt has 195 // been successful 196 backend, err := connectEtcdClient(e.config, configPath, errChan, rateLimit, opts) 197 switch { 198 case os.IsNotExist(err): 199 log.WithError(err).Info("Waiting for all etcd configuration files to be available") 200 time.Sleep(5 * time.Second) 201 case err != nil: 202 errChan <- err 203 close(errChan) 204 return backend, errChan 205 default: 206 return backend, errChan 207 } 208 } 209 } 210 211 func init() { 212 // register etcd module for use 213 registerBackend(EtcdBackendName, etcdInstance) 214 215 if duration := os.Getenv("CILIUM_ETCD_STATUS_CHECK_INTERVAL"); duration != "" { 216 timeout, err := time.ParseDuration(duration) 217 if err == nil { 218 statusCheckTimeout = timeout 219 } 220 } 221 } 222 223 // Hint tries to improve the error message displayed to te user. 224 func Hint(err error) error { 225 switch err { 226 case ctx.DeadlineExceeded: 227 return fmt.Errorf("etcd client timeout exceeded") 228 default: 229 return err 230 } 231 } 232 233 type etcdClient struct { 234 // firstSession is a channel that will be closed once the first session 235 // is set up in the etcd Client. 236 firstSession chan struct{} 237 238 // stopStatusChecker is closed when the status checker can be terminated 239 stopStatusChecker chan struct{} 240 241 client *client.Client 242 controllers *controller.Manager 243 244 // config and configPath are initialized once and never written to again, they can be accessed without locking 245 config *client.Config 246 configPath string 247 248 // protects sessions from concurrent access 249 lock.RWMutex 250 session *concurrency.Session 251 lockSession *concurrency.Session 252 253 // statusLock protects latestStatusSnapshot and latestErrorStatus for 254 // read/write access 255 statusLock lock.RWMutex 256 257 // latestStatusSnapshot is a snapshot of the latest etcd cluster status 258 latestStatusSnapshot string 259 260 // latestErrorStatus is the latest error condition of the etcd connection 261 latestErrorStatus error 262 263 extraOptions *ExtraOptions 264 265 limiter *rate.Limiter 266 } 267 268 func (e *etcdClient) getLogger() *logrus.Entry { 269 endpoints, path := []string{""}, "" 270 if e != nil { 271 if e.config != nil { 272 endpoints = e.config.Endpoints 273 } 274 path = e.configPath 275 } 276 277 return log.WithFields(logrus.Fields{ 278 "endpoints": endpoints, 279 "config": path, 280 }) 281 } 282 283 type etcdMutex struct { 284 mutex *concurrency.Mutex 285 } 286 287 func (e *etcdMutex) Unlock() error { 288 return e.mutex.Unlock(ctx.TODO()) 289 } 290 291 func (e *etcdMutex) Comparator() interface{} { 292 return e.mutex.IsOwner() 293 } 294 295 // GetSessionLeaseID returns the current lease ID. 296 func (e *etcdClient) GetSessionLeaseID() client.LeaseID { 297 e.RWMutex.RLock() 298 l := e.session.Lease() 299 e.RWMutex.RUnlock() 300 return l 301 } 302 303 // GetLockSessionLeaseID returns the current lease ID for the lock session. 304 func (e *etcdClient) GetLockSessionLeaseID() client.LeaseID { 305 e.RWMutex.RLock() 306 l := e.lockSession.Lease() 307 e.RWMutex.RUnlock() 308 return l 309 } 310 311 // checkSession verifies if the lease is still valid from the return error of 312 // an etcd API call. If the error explicitly states that a lease was not found 313 // we mark the session has an orphan for this etcd client. If we would not mark 314 // it as an Orphan() the session would be considered expired after the leaseTTL 315 // By make it orphan we guarantee the session will be marked to be renewed. 316 func (e *etcdClient) checkSession(err error, leaseID client.LeaseID) { 317 if err == v3rpcErrors.ErrLeaseNotFound { 318 e.closeSession(leaseID) 319 } 320 } 321 322 // checkSession verifies if the lease is still valid from the return error of 323 // an etcd API call. If the error explicitly states that a lease was not found 324 // we mark the session has an orphan for this etcd client. If we would not mark 325 // it as an Orphan() the session would be considered expired after the leaseTTL 326 // By make it orphan we guarantee the session will be marked to be renewed. 327 func (e *etcdClient) checkLockSession(err error, leaseID client.LeaseID) { 328 if err == v3rpcErrors.ErrLeaseNotFound { 329 e.closeLockSession(leaseID) 330 } 331 } 332 333 // closeSession closes the current session. 334 func (e *etcdClient) closeSession(leaseID client.LeaseID) { 335 e.RWMutex.RLock() 336 // only mark a session as orphan if the leaseID is the same as the 337 // session ID to avoid making any other sessions as orphan. 338 if e.session.Lease() == leaseID { 339 e.session.Orphan() 340 } 341 e.RWMutex.RUnlock() 342 } 343 344 // closeSession closes the current session. 345 func (e *etcdClient) closeLockSession(leaseID client.LeaseID) { 346 e.RWMutex.RLock() 347 // only mark a session as orphan if the leaseID is the same as the 348 // session ID to avoid making any other sessions as orphan. 349 if e.lockSession.Lease() == leaseID { 350 e.lockSession.Orphan() 351 } 352 e.RWMutex.RUnlock() 353 } 354 355 func (e *etcdClient) waitForInitLock(ctx context.Context) <-chan bool { 356 initLockSucceeded := make(chan bool) 357 358 go func() { 359 for { 360 select { 361 case <-ctx.Done(): 362 initLockSucceeded <- false 363 close(initLockSucceeded) 364 return 365 default: 366 } 367 368 // Generate a random number so that we can acquire a lock even 369 // if other agents are killed while locking this path. 370 randNumber := strconv.FormatUint(rand.Uint64(), 16) 371 locker, err := e.LockPath(ctx, InitLockPath+"/"+randNumber) 372 if err == nil { 373 initLockSucceeded <- true 374 close(initLockSucceeded) 375 locker.Unlock() 376 e.getLogger().Debug("Distributed lock successful, etcd has quorum") 377 return 378 } 379 380 time.Sleep(100 * time.Millisecond) 381 } 382 }() 383 384 return initLockSucceeded 385 } 386 387 func (e *etcdClient) isConnectedAndHasQuorum() bool { 388 ctxTimeout, cancel := ctx.WithTimeout(ctx.TODO(), statusCheckTimeout) 389 defer cancel() 390 391 select { 392 // Wait for the the initial connection to be established 393 case <-e.firstSession: 394 // Timeout while waiting for initial connection, no success 395 case <-ctxTimeout.Done(): 396 return false 397 } 398 399 e.RLock() 400 ch := e.session.Done() 401 e.RUnlock() 402 403 initLockSucceeded := e.waitForInitLock(ctxTimeout) 404 select { 405 // Catch disconnect event, no success 406 case <-ch: 407 return false 408 // wait for initial lock to succeed 409 case success := <-initLockSucceeded: 410 return success 411 } 412 } 413 414 // Connected closes the returned channel when the etcd client is connected. 415 func (e *etcdClient) Connected() <-chan struct{} { 416 out := make(chan struct{}) 417 go func() { 418 for !e.isConnectedAndHasQuorum() { 419 time.Sleep(100 * time.Millisecond) 420 } 421 close(out) 422 }() 423 return out 424 } 425 426 // Disconnected closes the returned channel when the etcd client is 427 // disconnected after being reconnected. Blocks until the etcd client is first 428 // connected with the kvstore. 429 func (e *etcdClient) Disconnected() <-chan struct{} { 430 <-e.firstSession 431 e.RLock() 432 ch := e.session.Done() 433 e.RUnlock() 434 return ch 435 } 436 437 func (e *etcdClient) renewSession() error { 438 <-e.firstSession 439 <-e.session.Done() 440 // This is an attempt to avoid concurrent access of a session that was 441 // already expired. It's not perfect as there is still a period between the 442 // e.session.Done() is closed and the e.Lock() is held where parallel go 443 // routines can get a lease ID of an already expired lease. 444 e.Lock() 445 446 newSession, err := concurrency.NewSession(e.client, concurrency.WithTTL(int(option.Config.KVstoreLeaseTTL.Seconds()))) 447 if err != nil { 448 e.UnlockIgnoreTime() 449 return fmt.Errorf("unable to renew etcd session: %s", err) 450 } 451 log.Infof("Got new lease ID %x", newSession.Lease()) 452 453 e.session = newSession 454 e.UnlockIgnoreTime() 455 456 e.getLogger().WithField(fieldSession, newSession).Debug("Renewing etcd session") 457 458 if err := e.checkMinVersion(); err != nil { 459 return err 460 } 461 462 return nil 463 } 464 465 func (e *etcdClient) renewLockSession() error { 466 <-e.firstSession 467 <-e.lockSession.Done() 468 // This is an attempt to avoid concurrent access of a session that was 469 // already expired. It's not perfect as there is still a period between the 470 // e.lockSession.Done() is closed and the e.Lock() is held where parallel go 471 // routines can get a lease ID of an already expired lease. 472 e.Lock() 473 474 newSession, err := concurrency.NewSession(e.client, concurrency.WithTTL(int(defaults.LockLeaseTTL.Seconds()))) 475 if err != nil { 476 e.UnlockIgnoreTime() 477 return fmt.Errorf("unable to renew etcd lock session: %s", err) 478 } 479 log.Infof("Got new lock lease ID %x", newSession.Lease()) 480 481 e.lockSession = newSession 482 e.UnlockIgnoreTime() 483 484 e.getLogger().WithField(fieldSession, newSession).Debug("Renewing etcd lock session") 485 486 return nil 487 } 488 489 func connectEtcdClient(config *client.Config, cfgPath string, errChan chan error, rateLimit int, opts *ExtraOptions) (BackendOperations, error) { 490 if cfgPath != "" { 491 cfg, err := newConfig(cfgPath) 492 if err != nil { 493 return nil, err 494 } 495 cfg.DialOptions = append(cfg.DialOptions, config.DialOptions...) 496 config = cfg 497 } 498 499 // Set DialTimeout to 0, otherwise the creation of a new client will 500 // block until DialTimeout is reached or a connection to the server 501 // is made. 502 config.DialTimeout = 0 503 c, err := client.New(*config) 504 if err != nil { 505 return nil, err 506 } 507 508 log.WithFields(logrus.Fields{ 509 "endpoints": config.Endpoints, 510 "config": cfgPath, 511 }).Info("Connecting to etcd server...") 512 513 var s, ls concurrency.Session 514 firstSession := make(chan struct{}) 515 errorChan := make(chan error) 516 517 // create session in parallel as this is a blocking operation 518 go func() { 519 session, err := concurrency.NewSession(c, concurrency.WithTTL(int(option.Config.KVstoreLeaseTTL.Seconds()))) 520 if err != nil { 521 errorChan <- err 522 close(errorChan) 523 return 524 } 525 lockSession, err := concurrency.NewSession(c, concurrency.WithTTL(int(defaults.LockLeaseTTL.Seconds()))) 526 if err != nil { 527 errorChan <- err 528 close(errorChan) 529 return 530 } 531 s = *session 532 ls = *lockSession 533 534 log.Infof("Got lease ID %x", s.Lease()) 535 log.Infof("Got lock lease ID %x", ls.Lease()) 536 close(errorChan) 537 }() 538 539 ec := &etcdClient{ 540 client: c, 541 config: config, 542 configPath: cfgPath, 543 session: &s, 544 lockSession: &ls, 545 firstSession: firstSession, 546 controllers: controller.NewManager(), 547 latestStatusSnapshot: "No connection to etcd", 548 stopStatusChecker: make(chan struct{}), 549 extraOptions: opts, 550 limiter: rate.NewLimiter(rate.Limit(rateLimit), rateLimit), 551 } 552 553 // wait for session to be created also in parallel 554 go func() { 555 defer close(errChan) 556 557 select { 558 case err = <-errorChan: 559 if err != nil { 560 errChan <- err 561 return 562 } 563 case <-time.After(initialConnectionTimeout): 564 errChan <- fmt.Errorf("timed out while waiting for etcd session. Ensure that etcd is running on %s", config.Endpoints) 565 return 566 } 567 568 ec.getLogger().Debugf("Session received") 569 close(ec.firstSession) 570 571 if err := ec.checkMinVersion(); err != nil { 572 errChan <- fmt.Errorf("unable to validate etcd version: %s", err) 573 } 574 }() 575 576 go ec.statusChecker() 577 578 ec.controllers.UpdateController("kvstore-etcd-session-renew", 579 controller.ControllerParams{ 580 DoFunc: func(ctx context.Context) error { 581 return ec.renewSession() 582 }, 583 RunInterval: time.Duration(10) * time.Millisecond, 584 }, 585 ) 586 587 ec.controllers.UpdateController("kvstore-etcd-lock-session-renew", 588 controller.ControllerParams{ 589 DoFunc: func(ctx context.Context) error { 590 return ec.renewLockSession() 591 }, 592 RunInterval: time.Duration(10) * time.Millisecond, 593 }, 594 ) 595 596 return ec, nil 597 } 598 599 func getEPVersion(c client.Maintenance, etcdEP string, timeout time.Duration) (*version.Version, error) { 600 ctxTimeout, cancel := ctx.WithTimeout(ctx.TODO(), timeout) 601 defer cancel() 602 sr, err := c.Status(ctxTimeout, etcdEP) 603 if err != nil { 604 return nil, Hint(err) 605 } 606 v, err := version.NewVersion(sr.Version) 607 if err != nil { 608 return nil, fmt.Errorf("error parsing server version %q: %s", sr.Version, Hint(err)) 609 } 610 return v, nil 611 } 612 613 // checkMinVersion checks the minimal version running on etcd cluster. This 614 // function should be run whenever the etcd client is connected for the first 615 // time and whenever the session is renewed. 616 func (e *etcdClient) checkMinVersion() error { 617 eps := e.client.Endpoints() 618 619 for _, ep := range eps { 620 v, err := getEPVersion(e.client.Maintenance, ep, versionCheckTimeout) 621 if err != nil { 622 e.getLogger().WithError(Hint(err)).WithField(fieldEtcdEndpoint, ep). 623 Warn("Unable to verify version of etcd endpoint") 624 continue 625 } 626 627 if !minRequiredVersion.Check(v) { 628 return fmt.Errorf("minimal etcd version not met in %q, required: %s, found: %s", 629 ep, minRequiredVersion.String(), v.String()) 630 } 631 632 e.getLogger().WithFields(logrus.Fields{ 633 fieldEtcdEndpoint: ep, 634 "version": v, 635 }).Info("Successfully verified version of etcd endpoint") 636 } 637 638 if len(eps) == 0 { 639 e.getLogger().Warn("Minimal etcd version unknown: No etcd endpoints available") 640 } 641 642 return nil 643 } 644 645 func (e *etcdClient) LockPath(ctx context.Context, path string) (KVLocker, error) { 646 select { 647 case <-e.firstSession: 648 case <-ctx.Done(): 649 return nil, fmt.Errorf("lock cancelled via context: %s", ctx.Err()) 650 } 651 652 e.RLock() 653 mu := concurrency.NewMutex(e.lockSession, path) 654 leaseID := e.lockSession.Lease() 655 e.RUnlock() 656 657 ctx, cancel := context.WithTimeout(ctx, time.Minute) 658 defer cancel() 659 err := mu.Lock(ctx) 660 if err != nil { 661 e.checkLockSession(err, leaseID) 662 return nil, Hint(err) 663 } 664 665 return &etcdMutex{mutex: mu}, nil 666 } 667 668 func (e *etcdClient) DeletePrefix(path string) (err error) { 669 defer func() { Trace("DeletePrefix", err, logrus.Fields{fieldPrefix: path}) }() 670 duration := spanstat.Start() 671 e.limiter.Wait(ctx.TODO()) 672 _, err = e.client.Delete(ctx.Background(), path, client.WithPrefix()) 673 increaseMetric(path, metricDelete, "DeletePrefix", duration.EndError(err).Total(), err) 674 return Hint(err) 675 } 676 677 // Watch starts watching for changes in a prefix 678 func (e *etcdClient) Watch(w *Watcher) { 679 localCache := watcherCache{} 680 listSignalSent := false 681 682 scopedLog := e.getLogger().WithFields(logrus.Fields{ 683 fieldWatcher: w, 684 fieldPrefix: w.prefix, 685 }) 686 <-e.Connected() 687 688 reList: 689 for { 690 e.limiter.Wait(ctx.TODO()) 691 res, err := e.client.Get(ctx.Background(), w.prefix, client.WithPrefix(), 692 client.WithSerializable()) 693 if err != nil { 694 scopedLog.WithError(Hint(err)).Warn("Unable to list keys before starting watcher") 695 continue 696 } 697 698 nextRev := res.Header.Revision + 1 699 scopedLog.Debugf("List response from etcd len=%d: %+v", res.Count, res) 700 701 if res.Count > 0 { 702 for _, key := range res.Kvs { 703 t := EventTypeCreate 704 if localCache.Exists(key.Key) { 705 t = EventTypeModify 706 } 707 708 localCache.MarkInUse(key.Key) 709 scopedLog.Debugf("Emitting list result as %v event for %s=%v", t, key.Key, key.Value) 710 711 queueStart := spanstat.Start() 712 w.Events <- KeyValueEvent{ 713 Key: string(key.Key), 714 Value: key.Value, 715 Typ: t, 716 } 717 trackEventQueued(string(key.Key), t, queueStart.End(true).Total()) 718 } 719 } 720 721 // More keys to be read, call Get() again 722 if res.More { 723 continue 724 } 725 726 // Send out deletion events for all keys that were deleted 727 // between our last known revision and the latest revision 728 // received via Get 729 localCache.RemoveDeleted(func(k string) { 730 event := KeyValueEvent{ 731 Key: k, 732 Typ: EventTypeDelete, 733 } 734 735 scopedLog.Debugf("Emitting EventTypeDelete event for %s", k) 736 queueStart := spanstat.Start() 737 w.Events <- event 738 trackEventQueued(k, EventTypeDelete, queueStart.End(true).Total()) 739 }) 740 741 // Only send the list signal once 742 if !listSignalSent { 743 w.Events <- KeyValueEvent{Typ: EventTypeListDone} 744 listSignalSent = true 745 } 746 747 recreateWatcher: 748 scopedLog.WithField(fieldRev, nextRev).Debug("Starting to watch a prefix") 749 750 e.limiter.Wait(ctx.TODO()) 751 etcdWatch := e.client.Watch(ctx.Background(), w.prefix, 752 client.WithPrefix(), client.WithRev(nextRev)) 753 for { 754 select { 755 case <-w.stopWatch: 756 close(w.Events) 757 w.stopWait.Done() 758 return 759 760 case r, ok := <-etcdWatch: 761 if !ok { 762 time.Sleep(50 * time.Millisecond) 763 goto recreateWatcher 764 } 765 766 scopedLog := scopedLog.WithField(fieldRev, r.Header.Revision) 767 768 if err := r.Err(); err != nil { 769 // We tried to watch on a compacted 770 // revision that may no longer exist, 771 // recreate the watcher and try to 772 // watch on the next possible revision 773 if err == v3rpcErrors.ErrCompacted { 774 scopedLog.WithError(Hint(err)).Debug("Tried watching on compacted revision") 775 } 776 777 // mark all local keys in state for 778 // deletion unless the upcoming GET 779 // marks them alive 780 localCache.MarkAllForDeletion() 781 782 goto reList 783 } 784 785 nextRev = r.Header.Revision + 1 786 scopedLog.Debugf("Received event from etcd: %+v", r) 787 788 for _, ev := range r.Events { 789 event := KeyValueEvent{ 790 Key: string(ev.Kv.Key), 791 Value: ev.Kv.Value, 792 } 793 794 switch { 795 case ev.Type == client.EventTypeDelete: 796 event.Typ = EventTypeDelete 797 localCache.RemoveKey(ev.Kv.Key) 798 case ev.IsCreate(): 799 event.Typ = EventTypeCreate 800 localCache.MarkInUse(ev.Kv.Key) 801 default: 802 event.Typ = EventTypeModify 803 localCache.MarkInUse(ev.Kv.Key) 804 } 805 806 scopedLog.Debugf("Emitting %v event for %s=%v", event.Typ, event.Key, event.Value) 807 808 queueStart := spanstat.Start() 809 w.Events <- event 810 trackEventQueued(string(ev.Kv.Key), event.Typ, queueStart.End(true).Total()) 811 } 812 } 813 } 814 } 815 } 816 817 func (e *etcdClient) determineEndpointStatus(endpointAddress string) (string, error) { 818 ctxTimeout, cancel := ctx.WithTimeout(ctx.Background(), statusCheckTimeout) 819 defer cancel() 820 821 e.getLogger().Debugf("Checking status to etcd endpoint %s", endpointAddress) 822 823 e.limiter.Wait(ctxTimeout) 824 status, err := e.client.Status(ctxTimeout, endpointAddress) 825 if err != nil { 826 return fmt.Sprintf("%s - %s", endpointAddress, err), Hint(err) 827 } 828 829 str := fmt.Sprintf("%s - %s", endpointAddress, status.Version) 830 if status.Header.MemberId == status.Leader { 831 str += " (Leader)" 832 } 833 834 return str, nil 835 } 836 837 func (e *etcdClient) statusChecker() { 838 for { 839 newStatus := []string{} 840 ok := 0 841 842 hasQuorum := e.isConnectedAndHasQuorum() 843 844 endpoints := e.client.Endpoints() 845 for _, ep := range endpoints { 846 st, err := e.determineEndpointStatus(ep) 847 if err == nil { 848 ok++ 849 } 850 851 newStatus = append(newStatus, st) 852 } 853 854 allConnected := len(endpoints) == ok 855 856 e.RWMutex.RLock() 857 sessionLeaseID := e.session.Lease() 858 lockSessionLeaseID := e.lockSession.Lease() 859 e.RWMutex.RUnlock() 860 861 e.statusLock.Lock() 862 e.latestStatusSnapshot = fmt.Sprintf("etcd: %d/%d connected, lease-ID=%x, lock lease-ID=%x, has-quorum=%t: %s", 863 ok, len(endpoints), sessionLeaseID, lockSessionLeaseID, hasQuorum, strings.Join(newStatus, "; ")) 864 865 // Only mark the etcd health as unstable if no etcd endpoints can be reached 866 if len(endpoints) > 0 && ok == 0 { 867 e.latestErrorStatus = fmt.Errorf("not able to connect to any etcd endpoints") 868 } else { 869 e.latestErrorStatus = nil 870 } 871 872 e.statusLock.Unlock() 873 874 select { 875 case <-e.stopStatusChecker: 876 return 877 case <-time.After(e.extraOptions.StatusCheckInterval(allConnected)): 878 } 879 } 880 } 881 882 func (e *etcdClient) Status() (string, error) { 883 e.statusLock.RLock() 884 defer e.statusLock.RUnlock() 885 886 return e.latestStatusSnapshot, Hint(e.latestErrorStatus) 887 } 888 889 // GetIfLocked returns value of key if the client is still holding the given lock. 890 func (e *etcdClient) GetIfLocked(key string, lock KVLocker) (bv []byte, err error) { 891 defer func() { Trace("GetIfLocked", err, logrus.Fields{fieldKey: key, fieldValue: string(bv)}) }() 892 duration := spanstat.Start() 893 e.limiter.Wait(ctx.TODO()) 894 opGet := client.OpGet(key) 895 cmp := lock.Comparator().(client.Cmp) 896 txnReply, err := e.client.Txn(context.Background()).If(cmp).Then(opGet).Commit() 897 if err == nil && !txnReply.Succeeded { 898 err = ErrLockLeaseExpired 899 } 900 increaseMetric(key, metricRead, "GetLocked", duration.EndError(err).Total(), err) 901 if err != nil { 902 return nil, Hint(err) 903 } 904 905 getR := txnReply.Responses[0].GetResponseRange() 906 // RangeResponse 907 if getR.Count == 0 { 908 return nil, nil 909 } 910 bv, err = getR.Kvs[0].Value, nil 911 return bv, err 912 } 913 914 // Get returns value of key 915 func (e *etcdClient) Get(key string) (bv []byte, err error) { 916 defer func() { Trace("Get", err, logrus.Fields{fieldKey: key, fieldValue: string(bv)}) }() 917 duration := spanstat.Start() 918 e.limiter.Wait(ctx.TODO()) 919 var getR *client.GetResponse 920 getR, err = e.client.Get(ctx.Background(), key) 921 increaseMetric(key, metricRead, "Get", duration.EndError(err).Total(), err) 922 if err != nil { 923 err = Hint(err) 924 return nil, err 925 } 926 927 if getR.Count == 0 { 928 return nil, nil 929 } 930 return getR.Kvs[0].Value, nil 931 } 932 933 // GetPrefixIfLocked returns the first key which matches the prefix and its value if the client is still holding the given lock. 934 func (e *etcdClient) GetPrefixIfLocked(ctx context.Context, prefix string, lock KVLocker) (k string, bv []byte, err error) { 935 defer func() { 936 Trace("GetPrefixIfLocked", err, logrus.Fields{fieldPrefix: prefix, fieldKey: k, fieldValue: string(bv)}) 937 }() 938 939 duration := spanstat.Start() 940 e.limiter.Wait(ctx) 941 opGet := client.OpGet(prefix, client.WithPrefix(), client.WithLimit(1)) 942 cmp := lock.Comparator().(client.Cmp) 943 txnReply, err := e.client.Txn(ctx).If(cmp).Then(opGet).Commit() 944 if err == nil && !txnReply.Succeeded { 945 err = ErrLockLeaseExpired 946 } 947 increaseMetric(prefix, metricRead, "GetPrefixLocked", duration.EndError(err).Total(), err) 948 if err != nil { 949 return "", nil, Hint(err) 950 } 951 getR := txnReply.Responses[0].GetResponseRange() 952 953 if getR.Count == 0 { 954 return "", nil, nil 955 } 956 return string(getR.Kvs[0].Key), getR.Kvs[0].Value, nil 957 } 958 959 // GetPrefix returns the first key which matches the prefix and its value 960 func (e *etcdClient) GetPrefix(ctx context.Context, prefix string) (k string, bv []byte, err error) { 961 defer func() { 962 Trace("GetPrefix", err, logrus.Fields{fieldPrefix: prefix, fieldKey: k, fieldValue: string(bv)}) 963 }() 964 965 duration := spanstat.Start() 966 e.limiter.Wait(ctx) 967 getR, err := e.client.Get(ctx, prefix, client.WithPrefix(), client.WithLimit(1)) 968 increaseMetric(prefix, metricRead, "GetPrefix", duration.EndError(err).Total(), err) 969 if err != nil { 970 return "", nil, Hint(err) 971 } 972 973 if getR.Count == 0 { 974 return "", nil, nil 975 } 976 return string(getR.Kvs[0].Key), getR.Kvs[0].Value, nil 977 } 978 979 // Set sets value of key 980 func (e *etcdClient) Set(key string, value []byte) (err error) { 981 defer func() { Trace("Set", err, logrus.Fields{fieldKey: key, fieldValue: string(value)}) }() 982 duration := spanstat.Start() 983 e.limiter.Wait(ctx.TODO()) 984 _, err = e.client.Put(ctx.Background(), key, string(value)) 985 increaseMetric(key, metricSet, "Set", duration.EndError(err).Total(), err) 986 err = Hint(err) 987 return err 988 } 989 990 // DeleteIfLocked deletes a key if the client is still holding the given lock. 991 func (e *etcdClient) DeleteIfLocked(key string, lock KVLocker) (err error) { 992 defer func() { Trace("DeleteIfLocked", err, logrus.Fields{fieldKey: key}) }() 993 duration := spanstat.Start() 994 opDel := client.OpDelete(key) 995 cmp := lock.Comparator().(client.Cmp) 996 var txnReply *client.TxnResponse 997 txnReply, err = e.client.Txn(context.Background()).If(cmp).Then(opDel).Commit() 998 if err == nil && !txnReply.Succeeded { 999 err = ErrLockLeaseExpired 1000 } 1001 increaseMetric(key, metricDelete, "DeleteLocked", duration.EndError(err).Total(), err) 1002 err = Hint(err) 1003 return err 1004 } 1005 1006 // Delete deletes a key 1007 func (e *etcdClient) Delete(key string) (err error) { 1008 defer func() { Trace("Delete", err, logrus.Fields{fieldKey: key}) }() 1009 duration := spanstat.Start() 1010 e.limiter.Wait(ctx.TODO()) 1011 _, err = e.client.Delete(ctx.Background(), key) 1012 increaseMetric(key, metricDelete, "Delete", duration.EndError(err).Total(), err) 1013 err = Hint(err) 1014 return err 1015 } 1016 1017 func (e *etcdClient) createOpPut(key string, value []byte, leaseID client.LeaseID) *client.Op { 1018 if leaseID != 0 { 1019 op := client.OpPut(key, string(value), client.WithLease(leaseID)) 1020 return &op 1021 } 1022 1023 op := client.OpPut(key, string(value)) 1024 return &op 1025 } 1026 1027 // UpdateIfLocked atomically creates a key or fails if it already exists if the client is still holding the given lock. 1028 func (e *etcdClient) UpdateIfLocked(ctx context.Context, key string, value []byte, lease bool, lock KVLocker) error { 1029 select { 1030 case <-e.firstSession: 1031 case <-ctx.Done(): 1032 return fmt.Errorf("update cancelled via context: %s", ctx.Err()) 1033 } 1034 1035 var ( 1036 txnReply *client.TxnResponse 1037 err error 1038 ) 1039 1040 duration := spanstat.Start() 1041 e.limiter.Wait(ctx) 1042 if lease { 1043 leaseID := e.GetSessionLeaseID() 1044 opPut := client.OpPut(key, string(value), client.WithLease(leaseID)) 1045 cmp := lock.Comparator().(client.Cmp) 1046 txnReply, err = e.client.Txn(context.Background()).If(cmp).Then(opPut).Commit() 1047 e.checkSession(err, leaseID) 1048 } else { 1049 opPut := client.OpPut(key, string(value)) 1050 cmp := lock.Comparator().(client.Cmp) 1051 txnReply, err = e.client.Txn(context.Background()).If(cmp).Then(opPut).Commit() 1052 } 1053 if err == nil && !txnReply.Succeeded { 1054 err = ErrLockLeaseExpired 1055 } 1056 increaseMetric(key, metricSet, "UpdateIfLocked", duration.EndError(err).Total(), err) 1057 return Hint(err) 1058 } 1059 1060 // Update creates or updates a key 1061 func (e *etcdClient) Update(ctx context.Context, key string, value []byte, lease bool) (err error) { 1062 defer Trace("Update", err, logrus.Fields{fieldKey: key, fieldValue: string(value), fieldAttachLease: lease}) 1063 1064 select { 1065 case <-e.firstSession: 1066 case <-ctx.Done(): 1067 return fmt.Errorf("update cancelled via context: %s", ctx.Err()) 1068 } 1069 1070 if lease { 1071 duration := spanstat.Start() 1072 leaseID := e.GetSessionLeaseID() 1073 e.limiter.Wait(ctx) 1074 _, err := e.client.Put(ctx, key, string(value), client.WithLease(leaseID)) 1075 e.checkSession(err, leaseID) 1076 increaseMetric(key, metricSet, "Update", duration.EndError(err).Total(), err) 1077 return Hint(err) 1078 } 1079 1080 duration := spanstat.Start() 1081 e.limiter.Wait(ctx) 1082 _, err = e.client.Put(ctx, key, string(value)) 1083 increaseMetric(key, metricSet, "Update", duration.EndError(err).Total(), err) 1084 return Hint(err) 1085 } 1086 1087 // UpdateIfDifferentIfLocked updates a key if the value is different and if the client is still holding the given lock. 1088 func (e *etcdClient) UpdateIfDifferentIfLocked(ctx context.Context, key string, value []byte, lease bool, lock KVLocker) (recreated bool, err error) { 1089 defer func() { 1090 Trace("UpdateIfDifferentIfLocked", err, logrus.Fields{fieldKey: key, fieldValue: value, fieldAttachLease: lease, "recreated": recreated}) 1091 }() 1092 1093 select { 1094 case <-e.firstSession: 1095 case <-ctx.Done(): 1096 return false, fmt.Errorf("update cancelled via context: %s", ctx.Err()) 1097 } 1098 duration := spanstat.Start() 1099 e.limiter.Wait(ctx) 1100 cnds := lock.Comparator().(client.Cmp) 1101 txnresp, err := e.client.Txn(ctx).If(cnds).Then(client.OpGet(key)).Commit() 1102 1103 increaseMetric(key, metricRead, "Get", duration.EndError(err).Total(), err) 1104 1105 // On error, attempt update blindly 1106 if err != nil { 1107 return true, e.UpdateIfLocked(ctx, key, value, lease, lock) 1108 } 1109 1110 if !txnresp.Succeeded { 1111 return false, ErrLockLeaseExpired 1112 } 1113 1114 getR := txnresp.Responses[0].GetResponseRange() 1115 if getR.Count == 0 { 1116 return true, e.UpdateIfLocked(ctx, key, value, lease, lock) 1117 } 1118 1119 if lease { 1120 e.RWMutex.RLock() 1121 leaseID := e.session.Lease() 1122 e.RWMutex.RUnlock() 1123 if getR.Kvs[0].Lease != int64(leaseID) { 1124 return true, e.UpdateIfLocked(ctx, key, value, lease, lock) 1125 } 1126 } 1127 // if value is not equal then update. 1128 if !bytes.Equal(getR.Kvs[0].Value, value) { 1129 return true, e.UpdateIfLocked(ctx, key, value, lease, lock) 1130 } 1131 1132 return false, nil 1133 } 1134 1135 // UpdateIfDifferent updates a key if the value is different 1136 func (e *etcdClient) UpdateIfDifferent(ctx context.Context, key string, value []byte, lease bool) (recreated bool, err error) { 1137 defer func() { 1138 Trace("UpdateIfDifferent", err, logrus.Fields{fieldKey: key, fieldValue: value, fieldAttachLease: lease, "recreated": recreated}) 1139 }() 1140 1141 select { 1142 case <-e.firstSession: 1143 case <-ctx.Done(): 1144 return false, fmt.Errorf("update cancelled via context: %s", ctx.Err()) 1145 } 1146 1147 duration := spanstat.Start() 1148 e.limiter.Wait(ctx) 1149 getR, err := e.client.Get(ctx, key) 1150 increaseMetric(key, metricRead, "Get", duration.EndError(err).Total(), err) 1151 // On error, attempt update blindly 1152 if err != nil || getR.Count == 0 { 1153 return true, e.Update(ctx, key, value, lease) 1154 } 1155 if lease { 1156 e.RWMutex.RLock() 1157 leaseID := e.session.Lease() 1158 e.RWMutex.RUnlock() 1159 if getR.Kvs[0].Lease != int64(leaseID) { 1160 return true, e.Update(ctx, key, value, lease) 1161 } 1162 } 1163 // if value is not equal then update. 1164 if !bytes.Equal(getR.Kvs[0].Value, value) { 1165 return true, e.Update(ctx, key, value, lease) 1166 } 1167 1168 return false, nil 1169 } 1170 1171 // CreateOnlyIfLocked atomically creates a key if the client is still holding the given lock or fails if it already exists 1172 func (e *etcdClient) CreateOnlyIfLocked(ctx context.Context, key string, value []byte, lease bool, lock KVLocker) (success bool, err error) { 1173 defer func() { 1174 Trace("CreateOnlyIfLocked", err, logrus.Fields{fieldKey: key, fieldValue: value, fieldAttachLease: lease, "success": success}) 1175 }() 1176 1177 duration := spanstat.Start() 1178 var leaseID client.LeaseID 1179 if lease { 1180 leaseID = e.GetSessionLeaseID() 1181 } 1182 req := e.createOpPut(key, value, leaseID) 1183 cnds := []client.Cmp{ 1184 client.Compare(client.Version(key), "=", 0), 1185 lock.Comparator().(client.Cmp), 1186 } 1187 1188 // We need to do a get in the else of the txn to detect if the lock is still 1189 // valid or not. 1190 opGets := []client.Op{ 1191 client.OpGet(key), 1192 } 1193 1194 e.limiter.Wait(ctx) 1195 txnresp, err := e.client.Txn(ctx).If(cnds...).Then(*req).Else(opGets...).Commit() 1196 increaseMetric(key, metricSet, "CreateOnlyLocked", duration.EndError(err).Total(), err) 1197 if err != nil { 1198 e.checkSession(err, leaseID) 1199 return false, Hint(err) 1200 } 1201 1202 // The txn can failed for the following reasons: 1203 // - Key version is not zero; 1204 // - Lock does not exist or is expired. 1205 // For both of those cases, the key that we are comparing might or not 1206 // exist, so we have: 1207 // A - Key does not exist and lock does not exist => ErrLockLeaseExpired 1208 // B - Key does not exist and lock exist => txn should succeed 1209 // C - Key does exist, version is == 0 and lock does not exist => ErrLockLeaseExpired 1210 // D - Key does exist, version is != 0 and lock does not exist => ErrLockLeaseExpired 1211 // E - Key does exist, version is == 0 and lock does exist => txn should succeed 1212 // F - Key does exist, version is != 0 and lock does exist => txn fails but returned is nil! 1213 1214 if !txnresp.Succeeded { 1215 // case F 1216 if len(txnresp.Responses[0].GetResponseRange().Kvs) != 0 && 1217 txnresp.Responses[0].GetResponseRange().Kvs[0].Version != 0 { 1218 return false, nil 1219 } 1220 1221 // case A, C and D 1222 return false, ErrLockLeaseExpired 1223 } 1224 1225 // case B and E 1226 return true, nil 1227 } 1228 1229 // CreateOnly creates a key with the value and will fail if the key already exists 1230 func (e *etcdClient) CreateOnly(ctx context.Context, key string, value []byte, lease bool) (success bool, err error) { 1231 defer func() { 1232 Trace("CreateOnly", err, logrus.Fields{fieldKey: key, fieldValue: value, fieldAttachLease: lease, "success": success}) 1233 }() 1234 1235 duration := spanstat.Start() 1236 var leaseID client.LeaseID 1237 if lease { 1238 leaseID = e.GetSessionLeaseID() 1239 } 1240 req := e.createOpPut(key, value, leaseID) 1241 cond := client.Compare(client.Version(key), "=", 0) 1242 1243 e.limiter.Wait(ctx) 1244 txnresp, err := e.client.Txn(ctx).If(cond).Then(*req).Commit() 1245 increaseMetric(key, metricSet, "CreateOnly", duration.EndError(err).Total(), err) 1246 if err != nil { 1247 e.checkSession(err, leaseID) 1248 return false, Hint(err) 1249 } 1250 1251 return txnresp.Succeeded, nil 1252 } 1253 1254 // CreateIfExists creates a key with the value only if key condKey exists 1255 func (e *etcdClient) CreateIfExists(condKey, key string, value []byte, lease bool) (err error) { 1256 defer func() { 1257 Trace("CreateIfExists", err, logrus.Fields{fieldKey: key, fieldValue: string(value), fieldCondition: condKey, fieldAttachLease: lease}) 1258 }() 1259 duration := spanstat.Start() 1260 var leaseID client.LeaseID 1261 if lease { 1262 leaseID = e.GetSessionLeaseID() 1263 } 1264 req := e.createOpPut(key, value, leaseID) 1265 cond := client.Compare(client.Version(condKey), "!=", 0) 1266 1267 e.limiter.Wait(ctx.TODO()) 1268 txnresp, err := e.client.Txn(ctx.TODO()).If(cond).Then(*req).Commit() 1269 increaseMetric(key, metricSet, "CreateIfExists", duration.EndError(err).Total(), err) 1270 if err != nil { 1271 e.checkSession(err, leaseID) 1272 err = Hint(err) 1273 return err 1274 } 1275 1276 if !txnresp.Succeeded { 1277 return fmt.Errorf("create was unsuccessful") 1278 } 1279 1280 return nil 1281 } 1282 1283 // FIXME: When we rebase to etcd 3.3 1284 // 1285 // DeleteOnZeroCount deletes the key if no matching keys for prefix exist 1286 //func (e *etcdClient) DeleteOnZeroCount(key, prefix string) error { 1287 // txnresp, err := e.client.Txn(ctx.TODO()). 1288 // If(client.Compare(client.Version(prefix).WithPrefix(), "=", 0)). 1289 // Then(client.OpDelete(key)). 1290 // Commit() 1291 // if err != nil { 1292 // return err 1293 // } 1294 // 1295 // if txnresp.Succeeded == false { 1296 // return fmt.Errorf("delete was unsuccessful") 1297 // } 1298 // 1299 // return nil 1300 //} 1301 1302 // ListPrefixIfLocked returns a list of keys matching the prefix only if the client is still holding the given lock. 1303 func (e *etcdClient) ListPrefixIfLocked(prefix string, lock KVLocker) (pairs KeyValuePairs, err error) { 1304 defer func() { 1305 Trace("ListPrefixIfLocked", err, logrus.Fields{fieldPrefix: prefix, fieldNumEntries: len(pairs)}) 1306 }() 1307 duration := spanstat.Start() 1308 e.limiter.Wait(ctx.TODO()) 1309 opGet := client.OpGet(prefix, client.WithPrefix()) 1310 cmp := lock.Comparator().(client.Cmp) 1311 var txnReply *client.TxnResponse 1312 txnReply, err = e.client.Txn(context.Background()).If(cmp).Then(opGet).Commit() 1313 if err == nil && !txnReply.Succeeded { 1314 err = ErrLockLeaseExpired 1315 } 1316 increaseMetric(prefix, metricRead, "ListPrefixLocked", duration.EndError(err).Total(), err) 1317 if err != nil { 1318 err = Hint(err) 1319 return nil, err 1320 } 1321 getR := txnReply.Responses[0].GetResponseRange() 1322 1323 pairs = KeyValuePairs(make(map[string]Value, getR.Count)) 1324 for i := int64(0); i < getR.Count; i++ { 1325 pairs[string(getR.Kvs[i].Key)] = Value{ 1326 Data: getR.Kvs[i].Value, 1327 ModRevision: uint64(getR.Kvs[i].ModRevision), 1328 } 1329 1330 } 1331 1332 return pairs, nil 1333 } 1334 1335 // ListPrefix returns a map of matching keys 1336 func (e *etcdClient) ListPrefix(prefix string) (pairs KeyValuePairs, err error) { 1337 defer func() { Trace("ListPrefix", err, logrus.Fields{fieldPrefix: prefix, fieldNumEntries: len(pairs)}) }() 1338 duration := spanstat.Start() 1339 1340 e.limiter.Wait(ctx.TODO()) 1341 var getR *client.GetResponse 1342 getR, err = e.client.Get(ctx.Background(), prefix, client.WithPrefix()) 1343 increaseMetric(prefix, metricRead, "ListPrefix", duration.EndError(err).Total(), err) 1344 if err != nil { 1345 return nil, Hint(err) 1346 } 1347 1348 pairs = KeyValuePairs(make(map[string]Value, getR.Count)) 1349 for i := int64(0); i < getR.Count; i++ { 1350 pairs[string(getR.Kvs[i].Key)] = Value{ 1351 Data: getR.Kvs[i].Value, 1352 ModRevision: uint64(getR.Kvs[i].ModRevision), 1353 LeaseID: getR.Kvs[i].Lease, 1354 } 1355 1356 } 1357 1358 return pairs, nil 1359 } 1360 1361 // Close closes the etcd session 1362 func (e *etcdClient) Close() { 1363 close(e.stopStatusChecker) 1364 <-e.firstSession 1365 if e.controllers != nil { 1366 e.controllers.RemoveAll() 1367 } 1368 e.RLock() 1369 defer e.RUnlock() 1370 e.lockSession.Close() 1371 e.session.Close() 1372 e.client.Close() 1373 } 1374 1375 // GetCapabilities returns the capabilities of the backend 1376 func (e *etcdClient) GetCapabilities() Capabilities { 1377 return Capabilities(CapabilityCreateIfExists) 1378 } 1379 1380 // Encode encodes a binary slice into a character set that the backend supports 1381 func (e *etcdClient) Encode(in []byte) (out string) { 1382 defer func() { Trace("Encode", nil, logrus.Fields{"in": in, "out": out}) }() 1383 return string(in) 1384 } 1385 1386 // Decode decodes a key previously encoded back into the original binary slice 1387 func (e *etcdClient) Decode(in string) (out []byte, err error) { 1388 defer func() { Trace("Decode", err, logrus.Fields{"in": in, "out": out}) }() 1389 return []byte(in), nil 1390 } 1391 1392 // ListAndWatch implements the BackendOperations.ListAndWatch using etcd 1393 func (e *etcdClient) ListAndWatch(name, prefix string, chanSize int) *Watcher { 1394 w := newWatcher(name, prefix, chanSize) 1395 1396 e.getLogger().WithField(fieldWatcher, w).Debug("Starting watcher...") 1397 1398 go e.Watch(w) 1399 1400 return w 1401 } 1402 1403 // SplitK8sServiceURL returns the service name and namespace for the given address. 1404 // If the given address is not parseable or it is not the format 1405 // '<protocol>://><name>.<namespace>[optional]', returns an error. 1406 func SplitK8sServiceURL(address string) (string, string, error) { 1407 u, err := url.Parse(address) 1408 if err != nil { 1409 return "", "", err 1410 } 1411 // typical service name "cilium-etcd-client.kube-system.svc" 1412 names := strings.Split(u.Hostname(), ".") 1413 if len(names) >= 2 { 1414 return names[0], names[1], nil 1415 } 1416 return "", "", 1417 fmt.Errorf("invalid service name. expecting <protocol://><name>.<namespace>[optional], got: %s", address) 1418 } 1419 1420 // IsEtcdOperator returns the service name if the configuration is setting up an 1421 // etcd-operator. If the configuration explicitly states it is configured 1422 // to connect to an etcd operator, e.g. with etcd.operator=true, the returned 1423 // service name is the first found within the configuration specified. 1424 func IsEtcdOperator(selectedBackend string, opts map[string]string, k8sNamespace string) (string, bool) { 1425 if selectedBackend != EtcdBackendName { 1426 return "", false 1427 } 1428 1429 isEtcdOperator := strings.ToLower(opts[isEtcdOperatorOption]) == "true" 1430 1431 fqdnIsEtcdOperator := func(address string) bool { 1432 svcName, ns, err := SplitK8sServiceURL(address) 1433 return err == nil && 1434 svcName == "cilium-etcd-client" && 1435 ns == k8sNamespace 1436 } 1437 1438 fqdn := opts[EtcdAddrOption] 1439 if len(fqdn) != 0 { 1440 if fqdnIsEtcdOperator(fqdn) || isEtcdOperator { 1441 return fqdn, true 1442 } 1443 return "", false 1444 } 1445 1446 bm := newEtcdModule() 1447 err := bm.setConfig(opts) 1448 if err != nil { 1449 return "", false 1450 } 1451 etcdConfig := bm.getConfig()[EtcdOptionConfig] 1452 if len(etcdConfig) == 0 { 1453 return "", false 1454 } 1455 1456 cfg, err := newConfig(etcdConfig) 1457 if err != nil { 1458 log.WithError(err).Error("Unable to read etcd configuration.") 1459 return "", false 1460 } 1461 for _, endpoint := range cfg.Endpoints { 1462 if fqdnIsEtcdOperator(endpoint) || isEtcdOperator { 1463 return endpoint, true 1464 } 1465 } 1466 1467 return "", false 1468 } 1469 1470 // newConfig is a wrapper of clientyaml.NewConfig. Since etcd has deprecated 1471 // the `ca-file` field from yamlConfig in v3.4, the clientyaml.NewConfig won't 1472 // read that field from the etcd configuration file making Cilium fail to 1473 // connect to a TLS-enabled etcd server. Since we should have deprecated the 1474 // usage of this field a long time ago, in this galaxy, we will have this 1475 // wrapper function as a workaround which will still use the `ca-file` field to 1476 // avoid users breaking their connectivity to etcd when upgrading Cilium. 1477 // TODO remove this wrapper in cilium >= 1.8 1478 func newConfig(fpath string) (*client.Config, error) { 1479 cfg, err := clientyaml.NewConfig(fpath) 1480 if err != nil { 1481 return nil, err 1482 } 1483 if cfg.TLS == nil || cfg.TLS.RootCAs != nil { 1484 return cfg, nil 1485 } 1486 1487 yc := &yamlConfig{} 1488 b, err := ioutil.ReadFile(fpath) 1489 if err != nil { 1490 return nil, err 1491 } 1492 err = yaml.Unmarshal(b, yc) 1493 if err != nil { 1494 return nil, err 1495 } 1496 if yc.InsecureTransport { 1497 return cfg, nil 1498 } 1499 1500 if yc.CAfile != "" { 1501 cp, err := tlsutil.NewCertPool([]string{yc.CAfile}) 1502 if err != nil { 1503 return nil, err 1504 } 1505 cfg.TLS.RootCAs = cp 1506 } 1507 cfg.TLS.GetClientCertificate = func(_ *tls.CertificateRequestInfo) (*tls.Certificate, error) { 1508 cer, err := tls.LoadX509KeyPair(yc.Certfile, yc.Keyfile) 1509 return &cer, err 1510 } 1511 return cfg, nil 1512 } 1513 1514 // copy of the internal structure in go.etcd.io/etcd/clientv3/yaml so we 1515 // can still use the `ca-file` field for one more release. 1516 type yamlConfig struct { 1517 client.Config 1518 1519 InsecureTransport bool `json:"insecure-transport"` 1520 InsecureSkipTLSVerify bool `json:"insecure-skip-tls-verify"` 1521 Certfile string `json:"cert-file"` 1522 Keyfile string `json:"key-file"` 1523 TrustedCAfile string `json:"trusted-ca-file"` 1524 1525 // CAfile is being deprecated. Use 'TrustedCAfile' instead. 1526 // TODO: deprecate this in v4 1527 CAfile string `json:"ca-file"` 1528 }