github.com/looshlee/beatles@v0.0.0-20220727174639-742810ab631c/pkg/kvstore/etcd.go (about)

     1  // Copyright 2016-2020 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kvstore
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"crypto/tls"
    21  	"errors"
    22  	"fmt"
    23  	"io/ioutil"
    24  	"math/rand"
    25  	"net/url"
    26  	"os"
    27  	"strconv"
    28  	"strings"
    29  	"time"
    30  
    31  	"github.com/cilium/cilium/pkg/controller"
    32  	"github.com/cilium/cilium/pkg/defaults"
    33  	"github.com/cilium/cilium/pkg/lock"
    34  	"github.com/cilium/cilium/pkg/option"
    35  	"github.com/cilium/cilium/pkg/spanstat"
    36  
    37  	"github.com/hashicorp/go-version"
    38  	"github.com/sirupsen/logrus"
    39  	client "go.etcd.io/etcd/clientv3"
    40  	"go.etcd.io/etcd/clientv3/concurrency"
    41  	clientyaml "go.etcd.io/etcd/clientv3/yaml"
    42  	v3rpcErrors "go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
    43  	"go.etcd.io/etcd/pkg/tlsutil"
    44  	ctx "golang.org/x/net/context"
    45  	"golang.org/x/time/rate"
    46  	"sigs.k8s.io/yaml"
    47  )
    48  
    49  const (
    50  	// EtcdBackendName is the backend name for etcd
    51  	EtcdBackendName = "etcd"
    52  
    53  	EtcdAddrOption       = "etcd.address"
    54  	isEtcdOperatorOption = "etcd.operator"
    55  	EtcdOptionConfig     = "etcd.config"
    56  
    57  	// EtcdRateLimitOption specifies maximum kv operations per second
    58  	EtcdRateLimitOption = "etcd.qps"
    59  )
    60  
    61  var (
    62  	// ErrLockLeaseExpired is an error whenever the lease of the lock does not
    63  	// exist or it was expired.
    64  	ErrLockLeaseExpired = errors.New("transaction did not succeed: lock lease expired")
    65  )
    66  
    67  func init() {
    68  	rand.Seed(time.Now().UnixNano())
    69  }
    70  
    71  type etcdModule struct {
    72  	opts   backendOptions
    73  	config *client.Config
    74  }
    75  
    76  var (
    77  	// versionCheckTimeout is the time we wait trying to verify the version
    78  	// of an etcd endpoint. The timeout can be encountered on network
    79  	// connectivity problems.
    80  	versionCheckTimeout = 30 * time.Second
    81  
    82  	// statusCheckTimeout is the timeout when performing status checks with
    83  	// all etcd endpoints
    84  	statusCheckTimeout = 10 * time.Second
    85  
    86  	// initialConnectionTimeout  is the timeout for the initial connection to
    87  	// the etcd server
    88  	initialConnectionTimeout = 15 * time.Minute
    89  
    90  	minRequiredVersion, _ = version.NewConstraint(">= 3.1.0")
    91  
    92  	// etcdDummyAddress can be overwritten from test invokers using ldflags
    93  	etcdDummyAddress = "http://127.0.0.1:4002"
    94  
    95  	etcdInstance = newEtcdModule()
    96  )
    97  
    98  func EtcdDummyAddress() string {
    99  	return etcdDummyAddress
   100  }
   101  
   102  func newEtcdModule() backendModule {
   103  	return &etcdModule{
   104  		opts: backendOptions{
   105  			isEtcdOperatorOption: &backendOption{
   106  				description: "if the configuration is setting up an etcd-operator",
   107  			},
   108  			EtcdAddrOption: &backendOption{
   109  				description: "Addresses of etcd cluster",
   110  			},
   111  			EtcdOptionConfig: &backendOption{
   112  				description: "Path to etcd configuration file",
   113  			},
   114  			EtcdRateLimitOption: &backendOption{
   115  				description: "Rate limit in kv store operations per second",
   116  				validate: func(v string) error {
   117  					_, err := strconv.Atoi(v)
   118  					return err
   119  				},
   120  			},
   121  		},
   122  	}
   123  }
   124  
   125  func (e *etcdModule) createInstance() backendModule {
   126  	return newEtcdModule()
   127  }
   128  
   129  func (e *etcdModule) getName() string {
   130  	return EtcdBackendName
   131  }
   132  
   133  func (e *etcdModule) setConfigDummy() {
   134  	e.config = &client.Config{}
   135  	e.config.Endpoints = []string{etcdDummyAddress}
   136  }
   137  
   138  func (e *etcdModule) setConfig(opts map[string]string) error {
   139  	return setOpts(opts, e.opts)
   140  }
   141  
   142  func (e *etcdModule) setExtraConfig(opts *ExtraOptions) error {
   143  	if opts != nil && len(opts.DialOption) != 0 {
   144  		e.config = &client.Config{}
   145  		e.config.DialOptions = append(e.config.DialOptions, opts.DialOption...)
   146  	}
   147  	return nil
   148  }
   149  
   150  func (e *etcdModule) getConfig() map[string]string {
   151  	return getOpts(e.opts)
   152  }
   153  
   154  func (e *etcdModule) newClient(opts *ExtraOptions) (BackendOperations, chan error) {
   155  	errChan := make(chan error, 10)
   156  
   157  	endpointsOpt, endpointsSet := e.opts[EtcdAddrOption]
   158  	configPathOpt, configSet := e.opts[EtcdOptionConfig]
   159  
   160  	rateLimitOpt, rateLimitSet := e.opts[EtcdRateLimitOption]
   161  
   162  	rateLimit := defaults.KVstoreQPS
   163  	if rateLimitSet {
   164  		// error is discarded here because this option has validation
   165  		rateLimit, _ = strconv.Atoi(rateLimitOpt.value)
   166  	}
   167  
   168  	var configPath string
   169  	if configSet {
   170  		configPath = configPathOpt.value
   171  	}
   172  	if e.config == nil {
   173  		if !endpointsSet && !configSet {
   174  			errChan <- fmt.Errorf("invalid etcd configuration, %s or %s must be specified", EtcdOptionConfig, EtcdAddrOption)
   175  			close(errChan)
   176  			return nil, errChan
   177  		}
   178  
   179  		if endpointsOpt.value == "" && configPath == "" {
   180  			errChan <- fmt.Errorf("invalid etcd configuration, %s or %s must be specified",
   181  				EtcdOptionConfig, EtcdAddrOption)
   182  			close(errChan)
   183  			return nil, errChan
   184  		}
   185  
   186  		e.config = &client.Config{}
   187  	}
   188  
   189  	if e.config.Endpoints == nil && endpointsSet {
   190  		e.config.Endpoints = []string{endpointsOpt.value}
   191  	}
   192  
   193  	for {
   194  		// connectEtcdClient will close errChan when the connection attempt has
   195  		// been successful
   196  		backend, err := connectEtcdClient(e.config, configPath, errChan, rateLimit, opts)
   197  		switch {
   198  		case os.IsNotExist(err):
   199  			log.WithError(err).Info("Waiting for all etcd configuration files to be available")
   200  			time.Sleep(5 * time.Second)
   201  		case err != nil:
   202  			errChan <- err
   203  			close(errChan)
   204  			return backend, errChan
   205  		default:
   206  			return backend, errChan
   207  		}
   208  	}
   209  }
   210  
   211  func init() {
   212  	// register etcd module for use
   213  	registerBackend(EtcdBackendName, etcdInstance)
   214  
   215  	if duration := os.Getenv("CILIUM_ETCD_STATUS_CHECK_INTERVAL"); duration != "" {
   216  		timeout, err := time.ParseDuration(duration)
   217  		if err == nil {
   218  			statusCheckTimeout = timeout
   219  		}
   220  	}
   221  }
   222  
   223  // Hint tries to improve the error message displayed to te user.
   224  func Hint(err error) error {
   225  	switch err {
   226  	case ctx.DeadlineExceeded:
   227  		return fmt.Errorf("etcd client timeout exceeded")
   228  	default:
   229  		return err
   230  	}
   231  }
   232  
   233  type etcdClient struct {
   234  	// firstSession is a channel that will be closed once the first session
   235  	// is set up in the etcd Client.
   236  	firstSession chan struct{}
   237  
   238  	// stopStatusChecker is closed when the status checker can be terminated
   239  	stopStatusChecker chan struct{}
   240  
   241  	client      *client.Client
   242  	controllers *controller.Manager
   243  
   244  	// config and configPath are initialized once and never written to again, they can be accessed without locking
   245  	config     *client.Config
   246  	configPath string
   247  
   248  	// protects sessions from concurrent access
   249  	lock.RWMutex
   250  	session     *concurrency.Session
   251  	lockSession *concurrency.Session
   252  
   253  	// statusLock protects latestStatusSnapshot and latestErrorStatus for
   254  	// read/write access
   255  	statusLock lock.RWMutex
   256  
   257  	// latestStatusSnapshot is a snapshot of the latest etcd cluster status
   258  	latestStatusSnapshot string
   259  
   260  	// latestErrorStatus is the latest error condition of the etcd connection
   261  	latestErrorStatus error
   262  
   263  	extraOptions *ExtraOptions
   264  
   265  	limiter *rate.Limiter
   266  }
   267  
   268  func (e *etcdClient) getLogger() *logrus.Entry {
   269  	endpoints, path := []string{""}, ""
   270  	if e != nil {
   271  		if e.config != nil {
   272  			endpoints = e.config.Endpoints
   273  		}
   274  		path = e.configPath
   275  	}
   276  
   277  	return log.WithFields(logrus.Fields{
   278  		"endpoints": endpoints,
   279  		"config":    path,
   280  	})
   281  }
   282  
   283  type etcdMutex struct {
   284  	mutex *concurrency.Mutex
   285  }
   286  
   287  func (e *etcdMutex) Unlock() error {
   288  	return e.mutex.Unlock(ctx.TODO())
   289  }
   290  
   291  func (e *etcdMutex) Comparator() interface{} {
   292  	return e.mutex.IsOwner()
   293  }
   294  
   295  // GetSessionLeaseID returns the current lease ID.
   296  func (e *etcdClient) GetSessionLeaseID() client.LeaseID {
   297  	e.RWMutex.RLock()
   298  	l := e.session.Lease()
   299  	e.RWMutex.RUnlock()
   300  	return l
   301  }
   302  
   303  // GetLockSessionLeaseID returns the current lease ID for the lock session.
   304  func (e *etcdClient) GetLockSessionLeaseID() client.LeaseID {
   305  	e.RWMutex.RLock()
   306  	l := e.lockSession.Lease()
   307  	e.RWMutex.RUnlock()
   308  	return l
   309  }
   310  
   311  // checkSession verifies if the lease is still valid from the return error of
   312  // an etcd API call. If the error explicitly states that a lease was not found
   313  // we mark the session has an orphan for this etcd client. If we would not mark
   314  // it as an Orphan() the session would be considered expired after the leaseTTL
   315  // By make it orphan we guarantee the session will be marked to be renewed.
   316  func (e *etcdClient) checkSession(err error, leaseID client.LeaseID) {
   317  	if err == v3rpcErrors.ErrLeaseNotFound {
   318  		e.closeSession(leaseID)
   319  	}
   320  }
   321  
   322  // checkSession verifies if the lease is still valid from the return error of
   323  // an etcd API call. If the error explicitly states that a lease was not found
   324  // we mark the session has an orphan for this etcd client. If we would not mark
   325  // it as an Orphan() the session would be considered expired after the leaseTTL
   326  // By make it orphan we guarantee the session will be marked to be renewed.
   327  func (e *etcdClient) checkLockSession(err error, leaseID client.LeaseID) {
   328  	if err == v3rpcErrors.ErrLeaseNotFound {
   329  		e.closeLockSession(leaseID)
   330  	}
   331  }
   332  
   333  // closeSession closes the current session.
   334  func (e *etcdClient) closeSession(leaseID client.LeaseID) {
   335  	e.RWMutex.RLock()
   336  	// only mark a session as orphan if the leaseID is the same as the
   337  	// session ID to avoid making any other sessions as orphan.
   338  	if e.session.Lease() == leaseID {
   339  		e.session.Orphan()
   340  	}
   341  	e.RWMutex.RUnlock()
   342  }
   343  
   344  // closeSession closes the current session.
   345  func (e *etcdClient) closeLockSession(leaseID client.LeaseID) {
   346  	e.RWMutex.RLock()
   347  	// only mark a session as orphan if the leaseID is the same as the
   348  	// session ID to avoid making any other sessions as orphan.
   349  	if e.lockSession.Lease() == leaseID {
   350  		e.lockSession.Orphan()
   351  	}
   352  	e.RWMutex.RUnlock()
   353  }
   354  
   355  func (e *etcdClient) waitForInitLock(ctx context.Context) <-chan bool {
   356  	initLockSucceeded := make(chan bool)
   357  
   358  	go func() {
   359  		for {
   360  			select {
   361  			case <-ctx.Done():
   362  				initLockSucceeded <- false
   363  				close(initLockSucceeded)
   364  				return
   365  			default:
   366  			}
   367  
   368  			// Generate a random number so that we can acquire a lock even
   369  			// if other agents are killed while locking this path.
   370  			randNumber := strconv.FormatUint(rand.Uint64(), 16)
   371  			locker, err := e.LockPath(ctx, InitLockPath+"/"+randNumber)
   372  			if err == nil {
   373  				initLockSucceeded <- true
   374  				close(initLockSucceeded)
   375  				locker.Unlock()
   376  				e.getLogger().Debug("Distributed lock successful, etcd has quorum")
   377  				return
   378  			}
   379  
   380  			time.Sleep(100 * time.Millisecond)
   381  		}
   382  	}()
   383  
   384  	return initLockSucceeded
   385  }
   386  
   387  func (e *etcdClient) isConnectedAndHasQuorum() bool {
   388  	ctxTimeout, cancel := ctx.WithTimeout(ctx.TODO(), statusCheckTimeout)
   389  	defer cancel()
   390  
   391  	select {
   392  	// Wait for the the initial connection to be established
   393  	case <-e.firstSession:
   394  	// Timeout while waiting for initial connection, no success
   395  	case <-ctxTimeout.Done():
   396  		return false
   397  	}
   398  
   399  	e.RLock()
   400  	ch := e.session.Done()
   401  	e.RUnlock()
   402  
   403  	initLockSucceeded := e.waitForInitLock(ctxTimeout)
   404  	select {
   405  	// Catch disconnect event, no success
   406  	case <-ch:
   407  		return false
   408  	// wait for initial lock to succeed
   409  	case success := <-initLockSucceeded:
   410  		return success
   411  	}
   412  }
   413  
   414  // Connected closes the returned channel when the etcd client is connected.
   415  func (e *etcdClient) Connected() <-chan struct{} {
   416  	out := make(chan struct{})
   417  	go func() {
   418  		for !e.isConnectedAndHasQuorum() {
   419  			time.Sleep(100 * time.Millisecond)
   420  		}
   421  		close(out)
   422  	}()
   423  	return out
   424  }
   425  
   426  // Disconnected closes the returned channel when the etcd client is
   427  // disconnected after being reconnected. Blocks until the etcd client is first
   428  // connected with the kvstore.
   429  func (e *etcdClient) Disconnected() <-chan struct{} {
   430  	<-e.firstSession
   431  	e.RLock()
   432  	ch := e.session.Done()
   433  	e.RUnlock()
   434  	return ch
   435  }
   436  
   437  func (e *etcdClient) renewSession() error {
   438  	<-e.firstSession
   439  	<-e.session.Done()
   440  	// This is an attempt to avoid concurrent access of a session that was
   441  	// already expired. It's not perfect as there is still a period between the
   442  	// e.session.Done() is closed and the e.Lock() is held where parallel go
   443  	// routines can get a lease ID of an already expired lease.
   444  	e.Lock()
   445  
   446  	newSession, err := concurrency.NewSession(e.client, concurrency.WithTTL(int(option.Config.KVstoreLeaseTTL.Seconds())))
   447  	if err != nil {
   448  		e.UnlockIgnoreTime()
   449  		return fmt.Errorf("unable to renew etcd session: %s", err)
   450  	}
   451  	log.Infof("Got new lease ID %x", newSession.Lease())
   452  
   453  	e.session = newSession
   454  	e.UnlockIgnoreTime()
   455  
   456  	e.getLogger().WithField(fieldSession, newSession).Debug("Renewing etcd session")
   457  
   458  	if err := e.checkMinVersion(); err != nil {
   459  		return err
   460  	}
   461  
   462  	return nil
   463  }
   464  
   465  func (e *etcdClient) renewLockSession() error {
   466  	<-e.firstSession
   467  	<-e.lockSession.Done()
   468  	// This is an attempt to avoid concurrent access of a session that was
   469  	// already expired. It's not perfect as there is still a period between the
   470  	// e.lockSession.Done() is closed and the e.Lock() is held where parallel go
   471  	// routines can get a lease ID of an already expired lease.
   472  	e.Lock()
   473  
   474  	newSession, err := concurrency.NewSession(e.client, concurrency.WithTTL(int(defaults.LockLeaseTTL.Seconds())))
   475  	if err != nil {
   476  		e.UnlockIgnoreTime()
   477  		return fmt.Errorf("unable to renew etcd lock session: %s", err)
   478  	}
   479  	log.Infof("Got new lock lease ID %x", newSession.Lease())
   480  
   481  	e.lockSession = newSession
   482  	e.UnlockIgnoreTime()
   483  
   484  	e.getLogger().WithField(fieldSession, newSession).Debug("Renewing etcd lock session")
   485  
   486  	return nil
   487  }
   488  
   489  func connectEtcdClient(config *client.Config, cfgPath string, errChan chan error, rateLimit int, opts *ExtraOptions) (BackendOperations, error) {
   490  	if cfgPath != "" {
   491  		cfg, err := newConfig(cfgPath)
   492  		if err != nil {
   493  			return nil, err
   494  		}
   495  		cfg.DialOptions = append(cfg.DialOptions, config.DialOptions...)
   496  		config = cfg
   497  	}
   498  
   499  	// Set DialTimeout to 0, otherwise the creation of a new client will
   500  	// block until DialTimeout is reached or a connection to the server
   501  	// is made.
   502  	config.DialTimeout = 0
   503  	c, err := client.New(*config)
   504  	if err != nil {
   505  		return nil, err
   506  	}
   507  
   508  	log.WithFields(logrus.Fields{
   509  		"endpoints": config.Endpoints,
   510  		"config":    cfgPath,
   511  	}).Info("Connecting to etcd server...")
   512  
   513  	var s, ls concurrency.Session
   514  	firstSession := make(chan struct{})
   515  	errorChan := make(chan error)
   516  
   517  	// create session in parallel as this is a blocking operation
   518  	go func() {
   519  		session, err := concurrency.NewSession(c, concurrency.WithTTL(int(option.Config.KVstoreLeaseTTL.Seconds())))
   520  		if err != nil {
   521  			errorChan <- err
   522  			close(errorChan)
   523  			return
   524  		}
   525  		lockSession, err := concurrency.NewSession(c, concurrency.WithTTL(int(defaults.LockLeaseTTL.Seconds())))
   526  		if err != nil {
   527  			errorChan <- err
   528  			close(errorChan)
   529  			return
   530  		}
   531  		s = *session
   532  		ls = *lockSession
   533  
   534  		log.Infof("Got lease ID %x", s.Lease())
   535  		log.Infof("Got lock lease ID %x", ls.Lease())
   536  		close(errorChan)
   537  	}()
   538  
   539  	ec := &etcdClient{
   540  		client:               c,
   541  		config:               config,
   542  		configPath:           cfgPath,
   543  		session:              &s,
   544  		lockSession:          &ls,
   545  		firstSession:         firstSession,
   546  		controllers:          controller.NewManager(),
   547  		latestStatusSnapshot: "No connection to etcd",
   548  		stopStatusChecker:    make(chan struct{}),
   549  		extraOptions:         opts,
   550  		limiter:              rate.NewLimiter(rate.Limit(rateLimit), rateLimit),
   551  	}
   552  
   553  	// wait for session to be created also in parallel
   554  	go func() {
   555  		defer close(errChan)
   556  
   557  		select {
   558  		case err = <-errorChan:
   559  			if err != nil {
   560  				errChan <- err
   561  				return
   562  			}
   563  		case <-time.After(initialConnectionTimeout):
   564  			errChan <- fmt.Errorf("timed out while waiting for etcd session. Ensure that etcd is running on %s", config.Endpoints)
   565  			return
   566  		}
   567  
   568  		ec.getLogger().Debugf("Session received")
   569  		close(ec.firstSession)
   570  
   571  		if err := ec.checkMinVersion(); err != nil {
   572  			errChan <- fmt.Errorf("unable to validate etcd version: %s", err)
   573  		}
   574  	}()
   575  
   576  	go ec.statusChecker()
   577  
   578  	ec.controllers.UpdateController("kvstore-etcd-session-renew",
   579  		controller.ControllerParams{
   580  			DoFunc: func(ctx context.Context) error {
   581  				return ec.renewSession()
   582  			},
   583  			RunInterval: time.Duration(10) * time.Millisecond,
   584  		},
   585  	)
   586  
   587  	ec.controllers.UpdateController("kvstore-etcd-lock-session-renew",
   588  		controller.ControllerParams{
   589  			DoFunc: func(ctx context.Context) error {
   590  				return ec.renewLockSession()
   591  			},
   592  			RunInterval: time.Duration(10) * time.Millisecond,
   593  		},
   594  	)
   595  
   596  	return ec, nil
   597  }
   598  
   599  func getEPVersion(c client.Maintenance, etcdEP string, timeout time.Duration) (*version.Version, error) {
   600  	ctxTimeout, cancel := ctx.WithTimeout(ctx.TODO(), timeout)
   601  	defer cancel()
   602  	sr, err := c.Status(ctxTimeout, etcdEP)
   603  	if err != nil {
   604  		return nil, Hint(err)
   605  	}
   606  	v, err := version.NewVersion(sr.Version)
   607  	if err != nil {
   608  		return nil, fmt.Errorf("error parsing server version %q: %s", sr.Version, Hint(err))
   609  	}
   610  	return v, nil
   611  }
   612  
   613  // checkMinVersion checks the minimal version running on etcd cluster.  This
   614  // function should be run whenever the etcd client is connected for the first
   615  // time and whenever the session is renewed.
   616  func (e *etcdClient) checkMinVersion() error {
   617  	eps := e.client.Endpoints()
   618  
   619  	for _, ep := range eps {
   620  		v, err := getEPVersion(e.client.Maintenance, ep, versionCheckTimeout)
   621  		if err != nil {
   622  			e.getLogger().WithError(Hint(err)).WithField(fieldEtcdEndpoint, ep).
   623  				Warn("Unable to verify version of etcd endpoint")
   624  			continue
   625  		}
   626  
   627  		if !minRequiredVersion.Check(v) {
   628  			return fmt.Errorf("minimal etcd version not met in %q, required: %s, found: %s",
   629  				ep, minRequiredVersion.String(), v.String())
   630  		}
   631  
   632  		e.getLogger().WithFields(logrus.Fields{
   633  			fieldEtcdEndpoint: ep,
   634  			"version":         v,
   635  		}).Info("Successfully verified version of etcd endpoint")
   636  	}
   637  
   638  	if len(eps) == 0 {
   639  		e.getLogger().Warn("Minimal etcd version unknown: No etcd endpoints available")
   640  	}
   641  
   642  	return nil
   643  }
   644  
   645  func (e *etcdClient) LockPath(ctx context.Context, path string) (KVLocker, error) {
   646  	select {
   647  	case <-e.firstSession:
   648  	case <-ctx.Done():
   649  		return nil, fmt.Errorf("lock cancelled via context: %s", ctx.Err())
   650  	}
   651  
   652  	e.RLock()
   653  	mu := concurrency.NewMutex(e.lockSession, path)
   654  	leaseID := e.lockSession.Lease()
   655  	e.RUnlock()
   656  
   657  	ctx, cancel := context.WithTimeout(ctx, time.Minute)
   658  	defer cancel()
   659  	err := mu.Lock(ctx)
   660  	if err != nil {
   661  		e.checkLockSession(err, leaseID)
   662  		return nil, Hint(err)
   663  	}
   664  
   665  	return &etcdMutex{mutex: mu}, nil
   666  }
   667  
   668  func (e *etcdClient) DeletePrefix(path string) (err error) {
   669  	defer func() { Trace("DeletePrefix", err, logrus.Fields{fieldPrefix: path}) }()
   670  	duration := spanstat.Start()
   671  	e.limiter.Wait(ctx.TODO())
   672  	_, err = e.client.Delete(ctx.Background(), path, client.WithPrefix())
   673  	increaseMetric(path, metricDelete, "DeletePrefix", duration.EndError(err).Total(), err)
   674  	return Hint(err)
   675  }
   676  
   677  // Watch starts watching for changes in a prefix
   678  func (e *etcdClient) Watch(w *Watcher) {
   679  	localCache := watcherCache{}
   680  	listSignalSent := false
   681  
   682  	scopedLog := e.getLogger().WithFields(logrus.Fields{
   683  		fieldWatcher: w,
   684  		fieldPrefix:  w.prefix,
   685  	})
   686  	<-e.Connected()
   687  
   688  reList:
   689  	for {
   690  		e.limiter.Wait(ctx.TODO())
   691  		res, err := e.client.Get(ctx.Background(), w.prefix, client.WithPrefix(),
   692  			client.WithSerializable())
   693  		if err != nil {
   694  			scopedLog.WithError(Hint(err)).Warn("Unable to list keys before starting watcher")
   695  			continue
   696  		}
   697  
   698  		nextRev := res.Header.Revision + 1
   699  		scopedLog.Debugf("List response from etcd len=%d: %+v", res.Count, res)
   700  
   701  		if res.Count > 0 {
   702  			for _, key := range res.Kvs {
   703  				t := EventTypeCreate
   704  				if localCache.Exists(key.Key) {
   705  					t = EventTypeModify
   706  				}
   707  
   708  				localCache.MarkInUse(key.Key)
   709  				scopedLog.Debugf("Emitting list result as %v event for %s=%v", t, key.Key, key.Value)
   710  
   711  				queueStart := spanstat.Start()
   712  				w.Events <- KeyValueEvent{
   713  					Key:   string(key.Key),
   714  					Value: key.Value,
   715  					Typ:   t,
   716  				}
   717  				trackEventQueued(string(key.Key), t, queueStart.End(true).Total())
   718  			}
   719  		}
   720  
   721  		// More keys to be read, call Get() again
   722  		if res.More {
   723  			continue
   724  		}
   725  
   726  		// Send out deletion events for all keys that were deleted
   727  		// between our last known revision and the latest revision
   728  		// received via Get
   729  		localCache.RemoveDeleted(func(k string) {
   730  			event := KeyValueEvent{
   731  				Key: k,
   732  				Typ: EventTypeDelete,
   733  			}
   734  
   735  			scopedLog.Debugf("Emitting EventTypeDelete event for %s", k)
   736  			queueStart := spanstat.Start()
   737  			w.Events <- event
   738  			trackEventQueued(k, EventTypeDelete, queueStart.End(true).Total())
   739  		})
   740  
   741  		// Only send the list signal once
   742  		if !listSignalSent {
   743  			w.Events <- KeyValueEvent{Typ: EventTypeListDone}
   744  			listSignalSent = true
   745  		}
   746  
   747  	recreateWatcher:
   748  		scopedLog.WithField(fieldRev, nextRev).Debug("Starting to watch a prefix")
   749  
   750  		e.limiter.Wait(ctx.TODO())
   751  		etcdWatch := e.client.Watch(ctx.Background(), w.prefix,
   752  			client.WithPrefix(), client.WithRev(nextRev))
   753  		for {
   754  			select {
   755  			case <-w.stopWatch:
   756  				close(w.Events)
   757  				w.stopWait.Done()
   758  				return
   759  
   760  			case r, ok := <-etcdWatch:
   761  				if !ok {
   762  					time.Sleep(50 * time.Millisecond)
   763  					goto recreateWatcher
   764  				}
   765  
   766  				scopedLog := scopedLog.WithField(fieldRev, r.Header.Revision)
   767  
   768  				if err := r.Err(); err != nil {
   769  					// We tried to watch on a compacted
   770  					// revision that may no longer exist,
   771  					// recreate the watcher and try to
   772  					// watch on the next possible revision
   773  					if err == v3rpcErrors.ErrCompacted {
   774  						scopedLog.WithError(Hint(err)).Debug("Tried watching on compacted revision")
   775  					}
   776  
   777  					// mark all local keys in state for
   778  					// deletion unless the upcoming GET
   779  					// marks them alive
   780  					localCache.MarkAllForDeletion()
   781  
   782  					goto reList
   783  				}
   784  
   785  				nextRev = r.Header.Revision + 1
   786  				scopedLog.Debugf("Received event from etcd: %+v", r)
   787  
   788  				for _, ev := range r.Events {
   789  					event := KeyValueEvent{
   790  						Key:   string(ev.Kv.Key),
   791  						Value: ev.Kv.Value,
   792  					}
   793  
   794  					switch {
   795  					case ev.Type == client.EventTypeDelete:
   796  						event.Typ = EventTypeDelete
   797  						localCache.RemoveKey(ev.Kv.Key)
   798  					case ev.IsCreate():
   799  						event.Typ = EventTypeCreate
   800  						localCache.MarkInUse(ev.Kv.Key)
   801  					default:
   802  						event.Typ = EventTypeModify
   803  						localCache.MarkInUse(ev.Kv.Key)
   804  					}
   805  
   806  					scopedLog.Debugf("Emitting %v event for %s=%v", event.Typ, event.Key, event.Value)
   807  
   808  					queueStart := spanstat.Start()
   809  					w.Events <- event
   810  					trackEventQueued(string(ev.Kv.Key), event.Typ, queueStart.End(true).Total())
   811  				}
   812  			}
   813  		}
   814  	}
   815  }
   816  
   817  func (e *etcdClient) determineEndpointStatus(endpointAddress string) (string, error) {
   818  	ctxTimeout, cancel := ctx.WithTimeout(ctx.Background(), statusCheckTimeout)
   819  	defer cancel()
   820  
   821  	e.getLogger().Debugf("Checking status to etcd endpoint %s", endpointAddress)
   822  
   823  	e.limiter.Wait(ctxTimeout)
   824  	status, err := e.client.Status(ctxTimeout, endpointAddress)
   825  	if err != nil {
   826  		return fmt.Sprintf("%s - %s", endpointAddress, err), Hint(err)
   827  	}
   828  
   829  	str := fmt.Sprintf("%s - %s", endpointAddress, status.Version)
   830  	if status.Header.MemberId == status.Leader {
   831  		str += " (Leader)"
   832  	}
   833  
   834  	return str, nil
   835  }
   836  
   837  func (e *etcdClient) statusChecker() {
   838  	for {
   839  		newStatus := []string{}
   840  		ok := 0
   841  
   842  		hasQuorum := e.isConnectedAndHasQuorum()
   843  
   844  		endpoints := e.client.Endpoints()
   845  		for _, ep := range endpoints {
   846  			st, err := e.determineEndpointStatus(ep)
   847  			if err == nil {
   848  				ok++
   849  			}
   850  
   851  			newStatus = append(newStatus, st)
   852  		}
   853  
   854  		allConnected := len(endpoints) == ok
   855  
   856  		e.RWMutex.RLock()
   857  		sessionLeaseID := e.session.Lease()
   858  		lockSessionLeaseID := e.lockSession.Lease()
   859  		e.RWMutex.RUnlock()
   860  
   861  		e.statusLock.Lock()
   862  		e.latestStatusSnapshot = fmt.Sprintf("etcd: %d/%d connected, lease-ID=%x, lock lease-ID=%x, has-quorum=%t: %s",
   863  			ok, len(endpoints), sessionLeaseID, lockSessionLeaseID, hasQuorum, strings.Join(newStatus, "; "))
   864  
   865  		// Only mark the etcd health as unstable if no etcd endpoints can be reached
   866  		if len(endpoints) > 0 && ok == 0 {
   867  			e.latestErrorStatus = fmt.Errorf("not able to connect to any etcd endpoints")
   868  		} else {
   869  			e.latestErrorStatus = nil
   870  		}
   871  
   872  		e.statusLock.Unlock()
   873  
   874  		select {
   875  		case <-e.stopStatusChecker:
   876  			return
   877  		case <-time.After(e.extraOptions.StatusCheckInterval(allConnected)):
   878  		}
   879  	}
   880  }
   881  
   882  func (e *etcdClient) Status() (string, error) {
   883  	e.statusLock.RLock()
   884  	defer e.statusLock.RUnlock()
   885  
   886  	return e.latestStatusSnapshot, Hint(e.latestErrorStatus)
   887  }
   888  
   889  // GetIfLocked returns value of key if the client is still holding the given lock.
   890  func (e *etcdClient) GetIfLocked(key string, lock KVLocker) (bv []byte, err error) {
   891  	defer func() { Trace("GetIfLocked", err, logrus.Fields{fieldKey: key, fieldValue: string(bv)}) }()
   892  	duration := spanstat.Start()
   893  	e.limiter.Wait(ctx.TODO())
   894  	opGet := client.OpGet(key)
   895  	cmp := lock.Comparator().(client.Cmp)
   896  	txnReply, err := e.client.Txn(context.Background()).If(cmp).Then(opGet).Commit()
   897  	if err == nil && !txnReply.Succeeded {
   898  		err = ErrLockLeaseExpired
   899  	}
   900  	increaseMetric(key, metricRead, "GetLocked", duration.EndError(err).Total(), err)
   901  	if err != nil {
   902  		return nil, Hint(err)
   903  	}
   904  
   905  	getR := txnReply.Responses[0].GetResponseRange()
   906  	// RangeResponse
   907  	if getR.Count == 0 {
   908  		return nil, nil
   909  	}
   910  	bv, err = getR.Kvs[0].Value, nil
   911  	return bv, err
   912  }
   913  
   914  // Get returns value of key
   915  func (e *etcdClient) Get(key string) (bv []byte, err error) {
   916  	defer func() { Trace("Get", err, logrus.Fields{fieldKey: key, fieldValue: string(bv)}) }()
   917  	duration := spanstat.Start()
   918  	e.limiter.Wait(ctx.TODO())
   919  	var getR *client.GetResponse
   920  	getR, err = e.client.Get(ctx.Background(), key)
   921  	increaseMetric(key, metricRead, "Get", duration.EndError(err).Total(), err)
   922  	if err != nil {
   923  		err = Hint(err)
   924  		return nil, err
   925  	}
   926  
   927  	if getR.Count == 0 {
   928  		return nil, nil
   929  	}
   930  	return getR.Kvs[0].Value, nil
   931  }
   932  
   933  // GetPrefixIfLocked returns the first key which matches the prefix and its value if the client is still holding the given lock.
   934  func (e *etcdClient) GetPrefixIfLocked(ctx context.Context, prefix string, lock KVLocker) (k string, bv []byte, err error) {
   935  	defer func() {
   936  		Trace("GetPrefixIfLocked", err, logrus.Fields{fieldPrefix: prefix, fieldKey: k, fieldValue: string(bv)})
   937  	}()
   938  
   939  	duration := spanstat.Start()
   940  	e.limiter.Wait(ctx)
   941  	opGet := client.OpGet(prefix, client.WithPrefix(), client.WithLimit(1))
   942  	cmp := lock.Comparator().(client.Cmp)
   943  	txnReply, err := e.client.Txn(ctx).If(cmp).Then(opGet).Commit()
   944  	if err == nil && !txnReply.Succeeded {
   945  		err = ErrLockLeaseExpired
   946  	}
   947  	increaseMetric(prefix, metricRead, "GetPrefixLocked", duration.EndError(err).Total(), err)
   948  	if err != nil {
   949  		return "", nil, Hint(err)
   950  	}
   951  	getR := txnReply.Responses[0].GetResponseRange()
   952  
   953  	if getR.Count == 0 {
   954  		return "", nil, nil
   955  	}
   956  	return string(getR.Kvs[0].Key), getR.Kvs[0].Value, nil
   957  }
   958  
   959  // GetPrefix returns the first key which matches the prefix and its value
   960  func (e *etcdClient) GetPrefix(ctx context.Context, prefix string) (k string, bv []byte, err error) {
   961  	defer func() {
   962  		Trace("GetPrefix", err, logrus.Fields{fieldPrefix: prefix, fieldKey: k, fieldValue: string(bv)})
   963  	}()
   964  
   965  	duration := spanstat.Start()
   966  	e.limiter.Wait(ctx)
   967  	getR, err := e.client.Get(ctx, prefix, client.WithPrefix(), client.WithLimit(1))
   968  	increaseMetric(prefix, metricRead, "GetPrefix", duration.EndError(err).Total(), err)
   969  	if err != nil {
   970  		return "", nil, Hint(err)
   971  	}
   972  
   973  	if getR.Count == 0 {
   974  		return "", nil, nil
   975  	}
   976  	return string(getR.Kvs[0].Key), getR.Kvs[0].Value, nil
   977  }
   978  
   979  // Set sets value of key
   980  func (e *etcdClient) Set(key string, value []byte) (err error) {
   981  	defer func() { Trace("Set", err, logrus.Fields{fieldKey: key, fieldValue: string(value)}) }()
   982  	duration := spanstat.Start()
   983  	e.limiter.Wait(ctx.TODO())
   984  	_, err = e.client.Put(ctx.Background(), key, string(value))
   985  	increaseMetric(key, metricSet, "Set", duration.EndError(err).Total(), err)
   986  	err = Hint(err)
   987  	return err
   988  }
   989  
   990  // DeleteIfLocked deletes a key if the client is still holding the given lock.
   991  func (e *etcdClient) DeleteIfLocked(key string, lock KVLocker) (err error) {
   992  	defer func() { Trace("DeleteIfLocked", err, logrus.Fields{fieldKey: key}) }()
   993  	duration := spanstat.Start()
   994  	opDel := client.OpDelete(key)
   995  	cmp := lock.Comparator().(client.Cmp)
   996  	var txnReply *client.TxnResponse
   997  	txnReply, err = e.client.Txn(context.Background()).If(cmp).Then(opDel).Commit()
   998  	if err == nil && !txnReply.Succeeded {
   999  		err = ErrLockLeaseExpired
  1000  	}
  1001  	increaseMetric(key, metricDelete, "DeleteLocked", duration.EndError(err).Total(), err)
  1002  	err = Hint(err)
  1003  	return err
  1004  }
  1005  
  1006  // Delete deletes a key
  1007  func (e *etcdClient) Delete(key string) (err error) {
  1008  	defer func() { Trace("Delete", err, logrus.Fields{fieldKey: key}) }()
  1009  	duration := spanstat.Start()
  1010  	e.limiter.Wait(ctx.TODO())
  1011  	_, err = e.client.Delete(ctx.Background(), key)
  1012  	increaseMetric(key, metricDelete, "Delete", duration.EndError(err).Total(), err)
  1013  	err = Hint(err)
  1014  	return err
  1015  }
  1016  
  1017  func (e *etcdClient) createOpPut(key string, value []byte, leaseID client.LeaseID) *client.Op {
  1018  	if leaseID != 0 {
  1019  		op := client.OpPut(key, string(value), client.WithLease(leaseID))
  1020  		return &op
  1021  	}
  1022  
  1023  	op := client.OpPut(key, string(value))
  1024  	return &op
  1025  }
  1026  
  1027  // UpdateIfLocked atomically creates a key or fails if it already exists if the client is still holding the given lock.
  1028  func (e *etcdClient) UpdateIfLocked(ctx context.Context, key string, value []byte, lease bool, lock KVLocker) error {
  1029  	select {
  1030  	case <-e.firstSession:
  1031  	case <-ctx.Done():
  1032  		return fmt.Errorf("update cancelled via context: %s", ctx.Err())
  1033  	}
  1034  
  1035  	var (
  1036  		txnReply *client.TxnResponse
  1037  		err      error
  1038  	)
  1039  
  1040  	duration := spanstat.Start()
  1041  	e.limiter.Wait(ctx)
  1042  	if lease {
  1043  		leaseID := e.GetSessionLeaseID()
  1044  		opPut := client.OpPut(key, string(value), client.WithLease(leaseID))
  1045  		cmp := lock.Comparator().(client.Cmp)
  1046  		txnReply, err = e.client.Txn(context.Background()).If(cmp).Then(opPut).Commit()
  1047  		e.checkSession(err, leaseID)
  1048  	} else {
  1049  		opPut := client.OpPut(key, string(value))
  1050  		cmp := lock.Comparator().(client.Cmp)
  1051  		txnReply, err = e.client.Txn(context.Background()).If(cmp).Then(opPut).Commit()
  1052  	}
  1053  	if err == nil && !txnReply.Succeeded {
  1054  		err = ErrLockLeaseExpired
  1055  	}
  1056  	increaseMetric(key, metricSet, "UpdateIfLocked", duration.EndError(err).Total(), err)
  1057  	return Hint(err)
  1058  }
  1059  
  1060  // Update creates or updates a key
  1061  func (e *etcdClient) Update(ctx context.Context, key string, value []byte, lease bool) (err error) {
  1062  	defer Trace("Update", err, logrus.Fields{fieldKey: key, fieldValue: string(value), fieldAttachLease: lease})
  1063  
  1064  	select {
  1065  	case <-e.firstSession:
  1066  	case <-ctx.Done():
  1067  		return fmt.Errorf("update cancelled via context: %s", ctx.Err())
  1068  	}
  1069  
  1070  	if lease {
  1071  		duration := spanstat.Start()
  1072  		leaseID := e.GetSessionLeaseID()
  1073  		e.limiter.Wait(ctx)
  1074  		_, err := e.client.Put(ctx, key, string(value), client.WithLease(leaseID))
  1075  		e.checkSession(err, leaseID)
  1076  		increaseMetric(key, metricSet, "Update", duration.EndError(err).Total(), err)
  1077  		return Hint(err)
  1078  	}
  1079  
  1080  	duration := spanstat.Start()
  1081  	e.limiter.Wait(ctx)
  1082  	_, err = e.client.Put(ctx, key, string(value))
  1083  	increaseMetric(key, metricSet, "Update", duration.EndError(err).Total(), err)
  1084  	return Hint(err)
  1085  }
  1086  
  1087  // UpdateIfDifferentIfLocked updates a key if the value is different and if the client is still holding the given lock.
  1088  func (e *etcdClient) UpdateIfDifferentIfLocked(ctx context.Context, key string, value []byte, lease bool, lock KVLocker) (recreated bool, err error) {
  1089  	defer func() {
  1090  		Trace("UpdateIfDifferentIfLocked", err, logrus.Fields{fieldKey: key, fieldValue: value, fieldAttachLease: lease, "recreated": recreated})
  1091  	}()
  1092  
  1093  	select {
  1094  	case <-e.firstSession:
  1095  	case <-ctx.Done():
  1096  		return false, fmt.Errorf("update cancelled via context: %s", ctx.Err())
  1097  	}
  1098  	duration := spanstat.Start()
  1099  	e.limiter.Wait(ctx)
  1100  	cnds := lock.Comparator().(client.Cmp)
  1101  	txnresp, err := e.client.Txn(ctx).If(cnds).Then(client.OpGet(key)).Commit()
  1102  
  1103  	increaseMetric(key, metricRead, "Get", duration.EndError(err).Total(), err)
  1104  
  1105  	// On error, attempt update blindly
  1106  	if err != nil {
  1107  		return true, e.UpdateIfLocked(ctx, key, value, lease, lock)
  1108  	}
  1109  
  1110  	if !txnresp.Succeeded {
  1111  		return false, ErrLockLeaseExpired
  1112  	}
  1113  
  1114  	getR := txnresp.Responses[0].GetResponseRange()
  1115  	if getR.Count == 0 {
  1116  		return true, e.UpdateIfLocked(ctx, key, value, lease, lock)
  1117  	}
  1118  
  1119  	if lease {
  1120  		e.RWMutex.RLock()
  1121  		leaseID := e.session.Lease()
  1122  		e.RWMutex.RUnlock()
  1123  		if getR.Kvs[0].Lease != int64(leaseID) {
  1124  			return true, e.UpdateIfLocked(ctx, key, value, lease, lock)
  1125  		}
  1126  	}
  1127  	// if value is not equal then update.
  1128  	if !bytes.Equal(getR.Kvs[0].Value, value) {
  1129  		return true, e.UpdateIfLocked(ctx, key, value, lease, lock)
  1130  	}
  1131  
  1132  	return false, nil
  1133  }
  1134  
  1135  // UpdateIfDifferent updates a key if the value is different
  1136  func (e *etcdClient) UpdateIfDifferent(ctx context.Context, key string, value []byte, lease bool) (recreated bool, err error) {
  1137  	defer func() {
  1138  		Trace("UpdateIfDifferent", err, logrus.Fields{fieldKey: key, fieldValue: value, fieldAttachLease: lease, "recreated": recreated})
  1139  	}()
  1140  
  1141  	select {
  1142  	case <-e.firstSession:
  1143  	case <-ctx.Done():
  1144  		return false, fmt.Errorf("update cancelled via context: %s", ctx.Err())
  1145  	}
  1146  
  1147  	duration := spanstat.Start()
  1148  	e.limiter.Wait(ctx)
  1149  	getR, err := e.client.Get(ctx, key)
  1150  	increaseMetric(key, metricRead, "Get", duration.EndError(err).Total(), err)
  1151  	// On error, attempt update blindly
  1152  	if err != nil || getR.Count == 0 {
  1153  		return true, e.Update(ctx, key, value, lease)
  1154  	}
  1155  	if lease {
  1156  		e.RWMutex.RLock()
  1157  		leaseID := e.session.Lease()
  1158  		e.RWMutex.RUnlock()
  1159  		if getR.Kvs[0].Lease != int64(leaseID) {
  1160  			return true, e.Update(ctx, key, value, lease)
  1161  		}
  1162  	}
  1163  	// if value is not equal then update.
  1164  	if !bytes.Equal(getR.Kvs[0].Value, value) {
  1165  		return true, e.Update(ctx, key, value, lease)
  1166  	}
  1167  
  1168  	return false, nil
  1169  }
  1170  
  1171  // CreateOnlyIfLocked atomically creates a key if the client is still holding the given lock or fails if it already exists
  1172  func (e *etcdClient) CreateOnlyIfLocked(ctx context.Context, key string, value []byte, lease bool, lock KVLocker) (success bool, err error) {
  1173  	defer func() {
  1174  		Trace("CreateOnlyIfLocked", err, logrus.Fields{fieldKey: key, fieldValue: value, fieldAttachLease: lease, "success": success})
  1175  	}()
  1176  
  1177  	duration := spanstat.Start()
  1178  	var leaseID client.LeaseID
  1179  	if lease {
  1180  		leaseID = e.GetSessionLeaseID()
  1181  	}
  1182  	req := e.createOpPut(key, value, leaseID)
  1183  	cnds := []client.Cmp{
  1184  		client.Compare(client.Version(key), "=", 0),
  1185  		lock.Comparator().(client.Cmp),
  1186  	}
  1187  
  1188  	// We need to do a get in the else of the txn to detect if the lock is still
  1189  	// valid or not.
  1190  	opGets := []client.Op{
  1191  		client.OpGet(key),
  1192  	}
  1193  
  1194  	e.limiter.Wait(ctx)
  1195  	txnresp, err := e.client.Txn(ctx).If(cnds...).Then(*req).Else(opGets...).Commit()
  1196  	increaseMetric(key, metricSet, "CreateOnlyLocked", duration.EndError(err).Total(), err)
  1197  	if err != nil {
  1198  		e.checkSession(err, leaseID)
  1199  		return false, Hint(err)
  1200  	}
  1201  
  1202  	// The txn can failed for the following reasons:
  1203  	//  - Key version is not zero;
  1204  	//  - Lock does not exist or is expired.
  1205  	// For both of those cases, the key that we are comparing might or not
  1206  	// exist, so we have:
  1207  	//  A - Key does not exist and lock does not exist => ErrLockLeaseExpired
  1208  	//  B - Key does not exist and lock exist => txn should succeed
  1209  	//  C - Key does exist, version is == 0 and lock does not exist => ErrLockLeaseExpired
  1210  	//  D - Key does exist, version is != 0 and lock does not exist => ErrLockLeaseExpired
  1211  	//  E - Key does exist, version is == 0 and lock does exist => txn should succeed
  1212  	//  F - Key does exist, version is != 0 and lock does exist => txn fails but returned is nil!
  1213  
  1214  	if !txnresp.Succeeded {
  1215  		// case F
  1216  		if len(txnresp.Responses[0].GetResponseRange().Kvs) != 0 &&
  1217  			txnresp.Responses[0].GetResponseRange().Kvs[0].Version != 0 {
  1218  			return false, nil
  1219  		}
  1220  
  1221  		// case A, C and D
  1222  		return false, ErrLockLeaseExpired
  1223  	}
  1224  
  1225  	// case B and E
  1226  	return true, nil
  1227  }
  1228  
  1229  // CreateOnly creates a key with the value and will fail if the key already exists
  1230  func (e *etcdClient) CreateOnly(ctx context.Context, key string, value []byte, lease bool) (success bool, err error) {
  1231  	defer func() {
  1232  		Trace("CreateOnly", err, logrus.Fields{fieldKey: key, fieldValue: value, fieldAttachLease: lease, "success": success})
  1233  	}()
  1234  
  1235  	duration := spanstat.Start()
  1236  	var leaseID client.LeaseID
  1237  	if lease {
  1238  		leaseID = e.GetSessionLeaseID()
  1239  	}
  1240  	req := e.createOpPut(key, value, leaseID)
  1241  	cond := client.Compare(client.Version(key), "=", 0)
  1242  
  1243  	e.limiter.Wait(ctx)
  1244  	txnresp, err := e.client.Txn(ctx).If(cond).Then(*req).Commit()
  1245  	increaseMetric(key, metricSet, "CreateOnly", duration.EndError(err).Total(), err)
  1246  	if err != nil {
  1247  		e.checkSession(err, leaseID)
  1248  		return false, Hint(err)
  1249  	}
  1250  
  1251  	return txnresp.Succeeded, nil
  1252  }
  1253  
  1254  // CreateIfExists creates a key with the value only if key condKey exists
  1255  func (e *etcdClient) CreateIfExists(condKey, key string, value []byte, lease bool) (err error) {
  1256  	defer func() {
  1257  		Trace("CreateIfExists", err, logrus.Fields{fieldKey: key, fieldValue: string(value), fieldCondition: condKey, fieldAttachLease: lease})
  1258  	}()
  1259  	duration := spanstat.Start()
  1260  	var leaseID client.LeaseID
  1261  	if lease {
  1262  		leaseID = e.GetSessionLeaseID()
  1263  	}
  1264  	req := e.createOpPut(key, value, leaseID)
  1265  	cond := client.Compare(client.Version(condKey), "!=", 0)
  1266  
  1267  	e.limiter.Wait(ctx.TODO())
  1268  	txnresp, err := e.client.Txn(ctx.TODO()).If(cond).Then(*req).Commit()
  1269  	increaseMetric(key, metricSet, "CreateIfExists", duration.EndError(err).Total(), err)
  1270  	if err != nil {
  1271  		e.checkSession(err, leaseID)
  1272  		err = Hint(err)
  1273  		return err
  1274  	}
  1275  
  1276  	if !txnresp.Succeeded {
  1277  		return fmt.Errorf("create was unsuccessful")
  1278  	}
  1279  
  1280  	return nil
  1281  }
  1282  
  1283  // FIXME: When we rebase to etcd 3.3
  1284  //
  1285  // DeleteOnZeroCount deletes the key if no matching keys for prefix exist
  1286  //func (e *etcdClient) DeleteOnZeroCount(key, prefix string) error {
  1287  //	txnresp, err := e.client.Txn(ctx.TODO()).
  1288  //		If(client.Compare(client.Version(prefix).WithPrefix(), "=", 0)).
  1289  //		Then(client.OpDelete(key)).
  1290  //		Commit()
  1291  //	if err != nil {
  1292  //		return err
  1293  //	}
  1294  //
  1295  //	if txnresp.Succeeded == false {
  1296  //		return fmt.Errorf("delete was unsuccessful")
  1297  //	}
  1298  //
  1299  //	return nil
  1300  //}
  1301  
  1302  // ListPrefixIfLocked returns a list of keys matching the prefix only if the client is still holding the given lock.
  1303  func (e *etcdClient) ListPrefixIfLocked(prefix string, lock KVLocker) (pairs KeyValuePairs, err error) {
  1304  	defer func() {
  1305  		Trace("ListPrefixIfLocked", err, logrus.Fields{fieldPrefix: prefix, fieldNumEntries: len(pairs)})
  1306  	}()
  1307  	duration := spanstat.Start()
  1308  	e.limiter.Wait(ctx.TODO())
  1309  	opGet := client.OpGet(prefix, client.WithPrefix())
  1310  	cmp := lock.Comparator().(client.Cmp)
  1311  	var txnReply *client.TxnResponse
  1312  	txnReply, err = e.client.Txn(context.Background()).If(cmp).Then(opGet).Commit()
  1313  	if err == nil && !txnReply.Succeeded {
  1314  		err = ErrLockLeaseExpired
  1315  	}
  1316  	increaseMetric(prefix, metricRead, "ListPrefixLocked", duration.EndError(err).Total(), err)
  1317  	if err != nil {
  1318  		err = Hint(err)
  1319  		return nil, err
  1320  	}
  1321  	getR := txnReply.Responses[0].GetResponseRange()
  1322  
  1323  	pairs = KeyValuePairs(make(map[string]Value, getR.Count))
  1324  	for i := int64(0); i < getR.Count; i++ {
  1325  		pairs[string(getR.Kvs[i].Key)] = Value{
  1326  			Data:        getR.Kvs[i].Value,
  1327  			ModRevision: uint64(getR.Kvs[i].ModRevision),
  1328  		}
  1329  
  1330  	}
  1331  
  1332  	return pairs, nil
  1333  }
  1334  
  1335  // ListPrefix returns a map of matching keys
  1336  func (e *etcdClient) ListPrefix(prefix string) (pairs KeyValuePairs, err error) {
  1337  	defer func() { Trace("ListPrefix", err, logrus.Fields{fieldPrefix: prefix, fieldNumEntries: len(pairs)}) }()
  1338  	duration := spanstat.Start()
  1339  
  1340  	e.limiter.Wait(ctx.TODO())
  1341  	var getR *client.GetResponse
  1342  	getR, err = e.client.Get(ctx.Background(), prefix, client.WithPrefix())
  1343  	increaseMetric(prefix, metricRead, "ListPrefix", duration.EndError(err).Total(), err)
  1344  	if err != nil {
  1345  		return nil, Hint(err)
  1346  	}
  1347  
  1348  	pairs = KeyValuePairs(make(map[string]Value, getR.Count))
  1349  	for i := int64(0); i < getR.Count; i++ {
  1350  		pairs[string(getR.Kvs[i].Key)] = Value{
  1351  			Data:        getR.Kvs[i].Value,
  1352  			ModRevision: uint64(getR.Kvs[i].ModRevision),
  1353  			LeaseID:     getR.Kvs[i].Lease,
  1354  		}
  1355  
  1356  	}
  1357  
  1358  	return pairs, nil
  1359  }
  1360  
  1361  // Close closes the etcd session
  1362  func (e *etcdClient) Close() {
  1363  	close(e.stopStatusChecker)
  1364  	<-e.firstSession
  1365  	if e.controllers != nil {
  1366  		e.controllers.RemoveAll()
  1367  	}
  1368  	e.RLock()
  1369  	defer e.RUnlock()
  1370  	e.lockSession.Close()
  1371  	e.session.Close()
  1372  	e.client.Close()
  1373  }
  1374  
  1375  // GetCapabilities returns the capabilities of the backend
  1376  func (e *etcdClient) GetCapabilities() Capabilities {
  1377  	return Capabilities(CapabilityCreateIfExists)
  1378  }
  1379  
  1380  // Encode encodes a binary slice into a character set that the backend supports
  1381  func (e *etcdClient) Encode(in []byte) (out string) {
  1382  	defer func() { Trace("Encode", nil, logrus.Fields{"in": in, "out": out}) }()
  1383  	return string(in)
  1384  }
  1385  
  1386  // Decode decodes a key previously encoded back into the original binary slice
  1387  func (e *etcdClient) Decode(in string) (out []byte, err error) {
  1388  	defer func() { Trace("Decode", err, logrus.Fields{"in": in, "out": out}) }()
  1389  	return []byte(in), nil
  1390  }
  1391  
  1392  // ListAndWatch implements the BackendOperations.ListAndWatch using etcd
  1393  func (e *etcdClient) ListAndWatch(name, prefix string, chanSize int) *Watcher {
  1394  	w := newWatcher(name, prefix, chanSize)
  1395  
  1396  	e.getLogger().WithField(fieldWatcher, w).Debug("Starting watcher...")
  1397  
  1398  	go e.Watch(w)
  1399  
  1400  	return w
  1401  }
  1402  
  1403  // SplitK8sServiceURL returns the service name and namespace for the given address.
  1404  // If the given address is not parseable or it is not the format
  1405  // '<protocol>://><name>.<namespace>[optional]', returns an error.
  1406  func SplitK8sServiceURL(address string) (string, string, error) {
  1407  	u, err := url.Parse(address)
  1408  	if err != nil {
  1409  		return "", "", err
  1410  	}
  1411  	// typical service name "cilium-etcd-client.kube-system.svc"
  1412  	names := strings.Split(u.Hostname(), ".")
  1413  	if len(names) >= 2 {
  1414  		return names[0], names[1], nil
  1415  	}
  1416  	return "", "",
  1417  		fmt.Errorf("invalid service name. expecting <protocol://><name>.<namespace>[optional], got: %s", address)
  1418  }
  1419  
  1420  // IsEtcdOperator returns the service name if the configuration is setting up an
  1421  // etcd-operator. If the configuration explicitly states it is configured
  1422  // to connect to an etcd operator, e.g. with etcd.operator=true, the returned
  1423  // service name is the first found within the configuration specified.
  1424  func IsEtcdOperator(selectedBackend string, opts map[string]string, k8sNamespace string) (string, bool) {
  1425  	if selectedBackend != EtcdBackendName {
  1426  		return "", false
  1427  	}
  1428  
  1429  	isEtcdOperator := strings.ToLower(opts[isEtcdOperatorOption]) == "true"
  1430  
  1431  	fqdnIsEtcdOperator := func(address string) bool {
  1432  		svcName, ns, err := SplitK8sServiceURL(address)
  1433  		return err == nil &&
  1434  			svcName == "cilium-etcd-client" &&
  1435  			ns == k8sNamespace
  1436  	}
  1437  
  1438  	fqdn := opts[EtcdAddrOption]
  1439  	if len(fqdn) != 0 {
  1440  		if fqdnIsEtcdOperator(fqdn) || isEtcdOperator {
  1441  			return fqdn, true
  1442  		}
  1443  		return "", false
  1444  	}
  1445  
  1446  	bm := newEtcdModule()
  1447  	err := bm.setConfig(opts)
  1448  	if err != nil {
  1449  		return "", false
  1450  	}
  1451  	etcdConfig := bm.getConfig()[EtcdOptionConfig]
  1452  	if len(etcdConfig) == 0 {
  1453  		return "", false
  1454  	}
  1455  
  1456  	cfg, err := newConfig(etcdConfig)
  1457  	if err != nil {
  1458  		log.WithError(err).Error("Unable to read etcd configuration.")
  1459  		return "", false
  1460  	}
  1461  	for _, endpoint := range cfg.Endpoints {
  1462  		if fqdnIsEtcdOperator(endpoint) || isEtcdOperator {
  1463  			return endpoint, true
  1464  		}
  1465  	}
  1466  
  1467  	return "", false
  1468  }
  1469  
  1470  // newConfig is a wrapper of clientyaml.NewConfig. Since etcd has deprecated
  1471  // the `ca-file` field from yamlConfig in v3.4, the clientyaml.NewConfig won't
  1472  // read that field from the etcd configuration file making Cilium fail to
  1473  // connect to a TLS-enabled etcd server. Since we should have deprecated the
  1474  // usage of this field a long time ago, in this galaxy, we will have this
  1475  // wrapper function as a workaround which will still use the `ca-file` field to
  1476  // avoid users breaking their connectivity to etcd when upgrading Cilium.
  1477  // TODO remove this wrapper in cilium >= 1.8
  1478  func newConfig(fpath string) (*client.Config, error) {
  1479  	cfg, err := clientyaml.NewConfig(fpath)
  1480  	if err != nil {
  1481  		return nil, err
  1482  	}
  1483  	if cfg.TLS == nil || cfg.TLS.RootCAs != nil {
  1484  		return cfg, nil
  1485  	}
  1486  
  1487  	yc := &yamlConfig{}
  1488  	b, err := ioutil.ReadFile(fpath)
  1489  	if err != nil {
  1490  		return nil, err
  1491  	}
  1492  	err = yaml.Unmarshal(b, yc)
  1493  	if err != nil {
  1494  		return nil, err
  1495  	}
  1496  	if yc.InsecureTransport {
  1497  		return cfg, nil
  1498  	}
  1499  
  1500  	if yc.CAfile != "" {
  1501  		cp, err := tlsutil.NewCertPool([]string{yc.CAfile})
  1502  		if err != nil {
  1503  			return nil, err
  1504  		}
  1505  		cfg.TLS.RootCAs = cp
  1506  	}
  1507  	cfg.TLS.GetClientCertificate = func(_ *tls.CertificateRequestInfo) (*tls.Certificate, error) {
  1508  		cer, err := tls.LoadX509KeyPair(yc.Certfile, yc.Keyfile)
  1509  		return &cer, err
  1510  	}
  1511  	return cfg, nil
  1512  }
  1513  
  1514  // copy of the internal structure in go.etcd.io/etcd/clientv3/yaml so we
  1515  // can still use the `ca-file` field for one more release.
  1516  type yamlConfig struct {
  1517  	client.Config
  1518  
  1519  	InsecureTransport     bool   `json:"insecure-transport"`
  1520  	InsecureSkipTLSVerify bool   `json:"insecure-skip-tls-verify"`
  1521  	Certfile              string `json:"cert-file"`
  1522  	Keyfile               string `json:"key-file"`
  1523  	TrustedCAfile         string `json:"trusted-ca-file"`
  1524  
  1525  	// CAfile is being deprecated. Use 'TrustedCAfile' instead.
  1526  	// TODO: deprecate this in v4
  1527  	CAfile string `json:"ca-file"`
  1528  }