github.com/zhyoulun/cilium@v1.6.12/pkg/kvstore/etcd.go (about)

     1  // Copyright 2016-2020 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kvstore
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"crypto/tls"
    21  	"errors"
    22  	"fmt"
    23  	"io/ioutil"
    24  	"math/rand"
    25  	"net/url"
    26  	"os"
    27  	"strconv"
    28  	"strings"
    29  	"time"
    30  
    31  	"github.com/cilium/cilium/pkg/contexthelpers"
    32  	"github.com/cilium/cilium/pkg/controller"
    33  	"github.com/cilium/cilium/pkg/defaults"
    34  	"github.com/cilium/cilium/pkg/lock"
    35  	"github.com/cilium/cilium/pkg/option"
    36  	"github.com/cilium/cilium/pkg/spanstat"
    37  
    38  	"github.com/hashicorp/go-version"
    39  	"github.com/sirupsen/logrus"
    40  	client "go.etcd.io/etcd/clientv3"
    41  	"go.etcd.io/etcd/clientv3/concurrency"
    42  	clientyaml "go.etcd.io/etcd/clientv3/yaml"
    43  	v3rpcErrors "go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
    44  	"go.etcd.io/etcd/pkg/tlsutil"
    45  	ctx "golang.org/x/net/context"
    46  	"golang.org/x/time/rate"
    47  	"sigs.k8s.io/yaml"
    48  )
    49  
    50  const (
    51  	// EtcdBackendName is the backend name for etcd
    52  	EtcdBackendName = "etcd"
    53  
    54  	EtcdAddrOption       = "etcd.address"
    55  	isEtcdOperatorOption = "etcd.operator"
    56  	EtcdOptionConfig     = "etcd.config"
    57  
    58  	// EtcdRateLimitOption specifies maximum kv operations per second
    59  	EtcdRateLimitOption = "etcd.qps"
    60  )
    61  
    62  var (
    63  	// ErrLockLeaseExpired is an error whenever the lease of the lock does not
    64  	// exist or it was expired.
    65  	ErrLockLeaseExpired = errors.New("transaction did not succeed: lock lease expired")
    66  )
    67  
    68  func init() {
    69  	rand.Seed(time.Now().UnixNano())
    70  }
    71  
    72  type etcdModule struct {
    73  	opts   backendOptions
    74  	config *client.Config
    75  }
    76  
    77  var (
    78  	// versionCheckTimeout is the time we wait trying to verify the version
    79  	// of an etcd endpoint. The timeout can be encountered on network
    80  	// connectivity problems.
    81  	versionCheckTimeout = 30 * time.Second
    82  
    83  	// statusCheckTimeout is the timeout when performing status checks with
    84  	// all etcd endpoints
    85  	statusCheckTimeout = 10 * time.Second
    86  
    87  	// initialConnectionTimeout  is the timeout for the initial connection to
    88  	// the etcd server
    89  	initialConnectionTimeout = 15 * time.Minute
    90  
    91  	minRequiredVersion, _ = version.NewConstraint(">= 3.1.0")
    92  
    93  	// etcdDummyAddress can be overwritten from test invokers using ldflags
    94  	etcdDummyAddress = "http://127.0.0.1:4002"
    95  
    96  	etcdInstance = newEtcdModule()
    97  )
    98  
    99  func EtcdDummyAddress() string {
   100  	return etcdDummyAddress
   101  }
   102  
   103  func newEtcdModule() backendModule {
   104  	return &etcdModule{
   105  		opts: backendOptions{
   106  			isEtcdOperatorOption: &backendOption{
   107  				description: "if the configuration is setting up an etcd-operator",
   108  			},
   109  			EtcdAddrOption: &backendOption{
   110  				description: "Addresses of etcd cluster",
   111  			},
   112  			EtcdOptionConfig: &backendOption{
   113  				description: "Path to etcd configuration file",
   114  			},
   115  			EtcdRateLimitOption: &backendOption{
   116  				description: "Rate limit in kv store operations per second",
   117  				validate: func(v string) error {
   118  					_, err := strconv.Atoi(v)
   119  					return err
   120  				},
   121  			},
   122  		},
   123  	}
   124  }
   125  
   126  func (e *etcdModule) createInstance() backendModule {
   127  	return newEtcdModule()
   128  }
   129  
   130  func (e *etcdModule) getName() string {
   131  	return EtcdBackendName
   132  }
   133  
   134  func (e *etcdModule) setConfigDummy() {
   135  	e.config = &client.Config{}
   136  	e.config.Endpoints = []string{etcdDummyAddress}
   137  }
   138  
   139  func (e *etcdModule) setConfig(opts map[string]string) error {
   140  	return setOpts(opts, e.opts)
   141  }
   142  
   143  func (e *etcdModule) setExtraConfig(opts *ExtraOptions) error {
   144  	if opts != nil && len(opts.DialOption) != 0 {
   145  		e.config = &client.Config{}
   146  		e.config.DialOptions = append(e.config.DialOptions, opts.DialOption...)
   147  	}
   148  	return nil
   149  }
   150  
   151  func (e *etcdModule) getConfig() map[string]string {
   152  	return getOpts(e.opts)
   153  }
   154  
   155  func (e *etcdModule) newClient(opts *ExtraOptions) (BackendOperations, chan error) {
   156  	errChan := make(chan error, 10)
   157  
   158  	endpointsOpt, endpointsSet := e.opts[EtcdAddrOption]
   159  	configPathOpt, configSet := e.opts[EtcdOptionConfig]
   160  
   161  	rateLimitOpt, rateLimitSet := e.opts[EtcdRateLimitOption]
   162  
   163  	rateLimit := defaults.KVstoreQPS
   164  	if rateLimitSet {
   165  		// error is discarded here because this option has validation
   166  		rateLimit, _ = strconv.Atoi(rateLimitOpt.value)
   167  	}
   168  
   169  	var configPath string
   170  	if configSet {
   171  		configPath = configPathOpt.value
   172  	}
   173  	if e.config == nil {
   174  		if !endpointsSet && !configSet {
   175  			errChan <- fmt.Errorf("invalid etcd configuration, %s or %s must be specified", EtcdOptionConfig, EtcdAddrOption)
   176  			close(errChan)
   177  			return nil, errChan
   178  		}
   179  
   180  		if endpointsOpt.value == "" && configPath == "" {
   181  			errChan <- fmt.Errorf("invalid etcd configuration, %s or %s must be specified",
   182  				EtcdOptionConfig, EtcdAddrOption)
   183  			close(errChan)
   184  			return nil, errChan
   185  		}
   186  
   187  		e.config = &client.Config{}
   188  	}
   189  
   190  	if e.config.Endpoints == nil && endpointsSet {
   191  		e.config.Endpoints = []string{endpointsOpt.value}
   192  	}
   193  
   194  	for {
   195  		// connectEtcdClient will close errChan when the connection attempt has
   196  		// been successful
   197  		backend, err := connectEtcdClient(e.config, configPath, errChan, rateLimit, opts)
   198  		switch {
   199  		case os.IsNotExist(err):
   200  			log.WithError(err).Info("Waiting for all etcd configuration files to be available")
   201  			time.Sleep(5 * time.Second)
   202  		case err != nil:
   203  			errChan <- err
   204  			close(errChan)
   205  			return backend, errChan
   206  		default:
   207  			return backend, errChan
   208  		}
   209  	}
   210  }
   211  
   212  func init() {
   213  	// register etcd module for use
   214  	registerBackend(EtcdBackendName, etcdInstance)
   215  
   216  	if duration := os.Getenv("CILIUM_ETCD_STATUS_CHECK_INTERVAL"); duration != "" {
   217  		timeout, err := time.ParseDuration(duration)
   218  		if err == nil {
   219  			statusCheckTimeout = timeout
   220  		}
   221  	}
   222  }
   223  
   224  // Hint tries to improve the error message displayed to te user.
   225  func Hint(err error) error {
   226  	switch err {
   227  	case ctx.DeadlineExceeded:
   228  		return fmt.Errorf("etcd client timeout exceeded")
   229  	default:
   230  		return err
   231  	}
   232  }
   233  
   234  type etcdClient struct {
   235  	// firstSession is a channel that will be closed once the first session
   236  	// is set up in the etcd Client.
   237  	firstSession chan struct{}
   238  
   239  	// stopStatusChecker is closed when the status checker can be terminated
   240  	stopStatusChecker chan struct{}
   241  
   242  	client      *client.Client
   243  	controllers *controller.Manager
   244  
   245  	// config and configPath are initialized once and never written to again, they can be accessed without locking
   246  	config     *client.Config
   247  	configPath string
   248  
   249  	// protects sessions from concurrent access
   250  	lock.RWMutex
   251  
   252  	session       *concurrency.Session
   253  	sessionCancel context.CancelFunc
   254  
   255  	lockSession       *concurrency.Session
   256  	lockSessionCancel context.CancelFunc
   257  
   258  	// statusLock protects latestStatusSnapshot and latestErrorStatus for
   259  	// read/write access
   260  	statusLock lock.RWMutex
   261  
   262  	// latestStatusSnapshot is a snapshot of the latest etcd cluster status
   263  	latestStatusSnapshot string
   264  
   265  	// latestErrorStatus is the latest error condition of the etcd connection
   266  	latestErrorStatus error
   267  
   268  	extraOptions *ExtraOptions
   269  
   270  	limiter *rate.Limiter
   271  }
   272  
   273  func (e *etcdClient) getLogger() *logrus.Entry {
   274  	endpoints, path := []string{""}, ""
   275  	if e != nil {
   276  		if e.config != nil {
   277  			endpoints = e.config.Endpoints
   278  		}
   279  		path = e.configPath
   280  	}
   281  
   282  	return log.WithFields(logrus.Fields{
   283  		"endpoints": endpoints,
   284  		"config":    path,
   285  	})
   286  }
   287  
   288  type etcdMutex struct {
   289  	mutex *concurrency.Mutex
   290  }
   291  
   292  func (e *etcdMutex) Unlock() error {
   293  	return e.mutex.Unlock(ctx.TODO())
   294  }
   295  
   296  func (e *etcdMutex) Comparator() interface{} {
   297  	return e.mutex.IsOwner()
   298  }
   299  
   300  // GetSessionLeaseID returns the current lease ID.
   301  func (e *etcdClient) GetSessionLeaseID() client.LeaseID {
   302  	e.RWMutex.RLock()
   303  	l := e.session.Lease()
   304  	e.RWMutex.RUnlock()
   305  	return l
   306  }
   307  
   308  // GetLockSessionLeaseID returns the current lease ID for the lock session.
   309  func (e *etcdClient) GetLockSessionLeaseID() client.LeaseID {
   310  	e.RWMutex.RLock()
   311  	l := e.lockSession.Lease()
   312  	e.RWMutex.RUnlock()
   313  	return l
   314  }
   315  
   316  // checkSession verifies if the lease is still valid from the return error of
   317  // an etcd API call. If the error explicitly states that a lease was not found
   318  // we mark the session has an orphan for this etcd client. If we would not mark
   319  // it as an Orphan() the session would be considered expired after the leaseTTL
   320  // By make it orphan we guarantee the session will be marked to be renewed.
   321  func (e *etcdClient) checkSession(err error, leaseID client.LeaseID) {
   322  	if err == v3rpcErrors.ErrLeaseNotFound {
   323  		e.closeSession(leaseID)
   324  	}
   325  }
   326  
   327  // checkSession verifies if the lease is still valid from the return error of
   328  // an etcd API call. If the error explicitly states that a lease was not found
   329  // we mark the session has an orphan for this etcd client. If we would not mark
   330  // it as an Orphan() the session would be considered expired after the leaseTTL
   331  // By make it orphan we guarantee the session will be marked to be renewed.
   332  func (e *etcdClient) checkLockSession(err error, leaseID client.LeaseID) {
   333  	if err == v3rpcErrors.ErrLeaseNotFound {
   334  		e.closeLockSession(leaseID)
   335  	}
   336  }
   337  
   338  // closeSession closes the current session.
   339  func (e *etcdClient) closeSession(leaseID client.LeaseID) {
   340  	e.RWMutex.RLock()
   341  	// only mark a session as orphan if the leaseID is the same as the
   342  	// session ID to avoid making any other sessions as orphan.
   343  	if e.session.Lease() == leaseID {
   344  		e.session.Orphan()
   345  	}
   346  	e.RWMutex.RUnlock()
   347  }
   348  
   349  // closeSession closes the current session.
   350  func (e *etcdClient) closeLockSession(leaseID client.LeaseID) {
   351  	e.RWMutex.RLock()
   352  	// only mark a session as orphan if the leaseID is the same as the
   353  	// session ID to avoid making any other sessions as orphan.
   354  	if e.lockSession.Lease() == leaseID {
   355  		e.lockSession.Orphan()
   356  	}
   357  	e.RWMutex.RUnlock()
   358  }
   359  
   360  func (e *etcdClient) waitForInitLock(ctx context.Context) <-chan bool {
   361  	initLockSucceeded := make(chan bool)
   362  
   363  	go func() {
   364  		for {
   365  			select {
   366  			case <-ctx.Done():
   367  				initLockSucceeded <- false
   368  				close(initLockSucceeded)
   369  				return
   370  			default:
   371  			}
   372  
   373  			// Generate a random number so that we can acquire a lock even
   374  			// if other agents are killed while locking this path.
   375  			randNumber := strconv.FormatUint(rand.Uint64(), 16)
   376  			locker, err := e.LockPath(ctx, InitLockPath+"/"+randNumber)
   377  			if err == nil {
   378  				initLockSucceeded <- true
   379  				close(initLockSucceeded)
   380  				locker.Unlock()
   381  				e.getLogger().Debug("Distributed lock successful, etcd has quorum")
   382  				return
   383  			}
   384  
   385  			time.Sleep(100 * time.Millisecond)
   386  		}
   387  	}()
   388  
   389  	return initLockSucceeded
   390  }
   391  
   392  func (e *etcdClient) isConnectedAndHasQuorum() bool {
   393  	ctxTimeout, cancel := ctx.WithTimeout(ctx.TODO(), statusCheckTimeout)
   394  	defer cancel()
   395  
   396  	select {
   397  	// Wait for the the initial connection to be established
   398  	case <-e.firstSession:
   399  	// Timeout while waiting for initial connection, no success
   400  	case <-ctxTimeout.Done():
   401  		return false
   402  	}
   403  
   404  	e.RLock()
   405  	ch := e.session.Done()
   406  	e.RUnlock()
   407  
   408  	initLockSucceeded := e.waitForInitLock(ctxTimeout)
   409  	select {
   410  	// Catch disconnect event, no success
   411  	case <-ch:
   412  		return false
   413  	// wait for initial lock to succeed
   414  	case success := <-initLockSucceeded:
   415  		return success
   416  	}
   417  }
   418  
   419  // Connected closes the returned channel when the etcd client is connected.
   420  func (e *etcdClient) Connected() <-chan struct{} {
   421  	out := make(chan struct{})
   422  	go func() {
   423  		for !e.isConnectedAndHasQuorum() {
   424  			time.Sleep(100 * time.Millisecond)
   425  		}
   426  		close(out)
   427  	}()
   428  	return out
   429  }
   430  
   431  // Disconnected closes the returned channel when the etcd client is
   432  // disconnected after being reconnected. Blocks until the etcd client is first
   433  // connected with the kvstore.
   434  func (e *etcdClient) Disconnected() <-chan struct{} {
   435  	<-e.firstSession
   436  	e.RLock()
   437  	ch := e.session.Done()
   438  	e.RUnlock()
   439  	return ch
   440  }
   441  
   442  func (e *etcdClient) renewSession(ctx context.Context) error {
   443  	select {
   444  	// wait for initial session to be established
   445  	case <-e.firstSession:
   446  	// controller has stopped or etcd client is closing
   447  	case <-ctx.Done():
   448  		return nil
   449  	case <-e.client.Ctx().Done():
   450  		return nil
   451  	}
   452  
   453  	e.RLock()
   454  	sessionChan := e.session.Done()
   455  	e.RUnlock()
   456  
   457  	select {
   458  	// session has ended
   459  	case <-sessionChan:
   460  	// controller has stopped or etcd client is closing
   461  	case <-ctx.Done():
   462  		return nil
   463  	case <-e.client.Ctx().Done():
   464  		return nil
   465  	}
   466  	// This is an attempt to avoid concurrent access of a session that was
   467  	// already expired. It's not perfect as there is still a period between the
   468  	// e.session.Done() is closed and the e.Lock() is held where parallel go
   469  	// routines can get a lease ID of an already expired lease.
   470  	e.Lock()
   471  
   472  	// Cancel any eventual old session context
   473  	if e.sessionCancel != nil {
   474  		e.sessionCancel()
   475  		e.sessionCancel = nil
   476  	}
   477  
   478  	// Create a context representing the lifetime of the session. It will
   479  	// timeout if the session creation does not succeed in time and then
   480  	// persists until any of the below conditions are met:
   481  	//  - The parent context is cancelled due to the etcd client closing
   482  	//  - The above call to sessionCancel() cancels the session due to the
   483  	//  session ending and requiring renewal.
   484  	sessionContext, sessionCancel, sessionSuccess := contexthelpers.NewConditionalTimeoutContext(e.client.Ctx(), statusCheckTimeout)
   485  	defer close(sessionSuccess)
   486  
   487  	newSession, err := concurrency.NewSession(
   488  		e.client,
   489  		concurrency.WithTTL(int(option.Config.KVstoreLeaseTTL.Seconds())),
   490  		concurrency.WithContext(sessionContext),
   491  	)
   492  	if err != nil {
   493  		e.UnlockIgnoreTime()
   494  		return fmt.Errorf("unable to renew etcd session: %s", err)
   495  	}
   496  	sessionSuccess <- true
   497  	log.Infof("Got new lease ID %x", newSession.Lease())
   498  
   499  	e.session = newSession
   500  	e.sessionCancel = sessionCancel
   501  	e.UnlockIgnoreTime()
   502  
   503  	e.getLogger().WithField(fieldSession, newSession).Debug("Renewing etcd session")
   504  
   505  	if err := e.checkMinVersion(); err != nil {
   506  		return err
   507  	}
   508  
   509  	return nil
   510  }
   511  
   512  func (e *etcdClient) renewLockSession(ctx context.Context) error {
   513  	select {
   514  	// wait for initial session to be established
   515  	case <-e.firstSession:
   516  	// controller has stopped or etcd client is closing
   517  	case <-ctx.Done():
   518  		return nil
   519  	case <-e.client.Ctx().Done():
   520  		return nil
   521  	}
   522  
   523  	e.RWMutex.RLock()
   524  	lockSessionChan := e.lockSession.Done()
   525  	e.RWMutex.RUnlock()
   526  
   527  	select {
   528  	// session has ended
   529  	case <-lockSessionChan:
   530  	// controller has stopped or etcd client is closing
   531  	case <-ctx.Done():
   532  		return nil
   533  	case <-e.client.Ctx().Done():
   534  		return nil
   535  	}
   536  	// This is an attempt to avoid concurrent access of a session that was
   537  	// already expired. It's not perfect as there is still a period between the
   538  	// e.lockSession.Done() is closed and the e.Lock() is held where parallel go
   539  	// routines can get a lease ID of an already expired lease.
   540  	e.Lock()
   541  
   542  	if e.lockSessionCancel != nil {
   543  		e.lockSessionCancel()
   544  		e.lockSessionCancel = nil
   545  	}
   546  
   547  	// Create a context representing the lifetime of the lock session. It
   548  	// will timeout if the session creation does not succeed in time and
   549  	// persists until any of the below conditions are met:
   550  	//  - The parent context is cancelled due to the etcd client closing
   551  	//  - The above call to sessionCancel() cancels the session due to the
   552  	//  session ending and requiring renewal.
   553  	sessionContext, sessionCancel, sessionSuccess := contexthelpers.NewConditionalTimeoutContext(e.client.Ctx(), statusCheckTimeout)
   554  	defer close(sessionSuccess)
   555  
   556  	newSession, err := concurrency.NewSession(
   557  		e.client,
   558  		concurrency.WithTTL(int(defaults.LockLeaseTTL.Seconds())),
   559  		concurrency.WithContext(sessionContext),
   560  	)
   561  	if err != nil {
   562  		e.UnlockIgnoreTime()
   563  		return fmt.Errorf("unable to renew etcd lock session: %s", err)
   564  	}
   565  	sessionSuccess <- true
   566  	log.Infof("Got new lock lease ID %x", newSession.Lease())
   567  
   568  	e.lockSession = newSession
   569  	e.lockSessionCancel = sessionCancel
   570  	e.UnlockIgnoreTime()
   571  
   572  	e.getLogger().WithField(fieldSession, newSession).Debug("Renewing etcd lock session")
   573  
   574  	return nil
   575  }
   576  
   577  func connectEtcdClient(config *client.Config, cfgPath string, errChan chan error, rateLimit int, opts *ExtraOptions) (BackendOperations, error) {
   578  	if cfgPath != "" {
   579  		cfg, err := newConfig(cfgPath)
   580  		if err != nil {
   581  			return nil, err
   582  		}
   583  		cfg.DialOptions = append(cfg.DialOptions, config.DialOptions...)
   584  		config = cfg
   585  	}
   586  
   587  	// Set DialTimeout to 0, otherwise the creation of a new client will
   588  	// block until DialTimeout is reached or a connection to the server
   589  	// is made.
   590  	config.DialTimeout = 0
   591  	c, err := client.New(*config)
   592  	if err != nil {
   593  		return nil, err
   594  	}
   595  
   596  	log.WithFields(logrus.Fields{
   597  		"endpoints": config.Endpoints,
   598  		"config":    cfgPath,
   599  	}).Info("Connecting to etcd server...")
   600  
   601  	var s, ls concurrency.Session
   602  	firstSession := make(chan struct{})
   603  	errorChan := make(chan error)
   604  
   605  	// create session in parallel as this is a blocking operation
   606  	go func() {
   607  		session, err := concurrency.NewSession(c, concurrency.WithTTL(int(option.Config.KVstoreLeaseTTL.Seconds())))
   608  		if err != nil {
   609  			errorChan <- err
   610  			close(errorChan)
   611  			return
   612  		}
   613  		lockSession, err := concurrency.NewSession(c, concurrency.WithTTL(int(defaults.LockLeaseTTL.Seconds())))
   614  		if err != nil {
   615  			errorChan <- err
   616  			close(errorChan)
   617  			return
   618  		}
   619  		s = *session
   620  		ls = *lockSession
   621  
   622  		log.Infof("Got lease ID %x", s.Lease())
   623  		log.Infof("Got lock lease ID %x", ls.Lease())
   624  		close(errorChan)
   625  	}()
   626  
   627  	ec := &etcdClient{
   628  		client:               c,
   629  		config:               config,
   630  		configPath:           cfgPath,
   631  		session:              &s,
   632  		lockSession:          &ls,
   633  		firstSession:         firstSession,
   634  		controllers:          controller.NewManager(),
   635  		latestStatusSnapshot: "No connection to etcd",
   636  		stopStatusChecker:    make(chan struct{}),
   637  		extraOptions:         opts,
   638  		limiter:              rate.NewLimiter(rate.Limit(rateLimit), rateLimit),
   639  	}
   640  
   641  	// wait for session to be created also in parallel
   642  	go func() {
   643  		defer close(errChan)
   644  
   645  		select {
   646  		case err = <-errorChan:
   647  			if err != nil {
   648  				errChan <- err
   649  				return
   650  			}
   651  		case <-time.After(initialConnectionTimeout):
   652  			errChan <- fmt.Errorf("timed out while waiting for etcd session. Ensure that etcd is running on %s", config.Endpoints)
   653  			return
   654  		}
   655  
   656  		ec.getLogger().Debugf("Session received")
   657  		close(ec.firstSession)
   658  
   659  		if err := ec.checkMinVersion(); err != nil {
   660  			errChan <- fmt.Errorf("unable to validate etcd version: %s", err)
   661  		}
   662  	}()
   663  
   664  	go ec.statusChecker()
   665  
   666  	ec.controllers.UpdateController("kvstore-etcd-session-renew",
   667  		controller.ControllerParams{
   668  			DoFunc: func(ctx context.Context) error {
   669  				return ec.renewSession(ctx)
   670  			},
   671  			RunInterval: time.Duration(10) * time.Millisecond,
   672  		},
   673  	)
   674  
   675  	ec.controllers.UpdateController("kvstore-etcd-lock-session-renew",
   676  		controller.ControllerParams{
   677  			DoFunc: func(ctx context.Context) error {
   678  				return ec.renewLockSession(ctx)
   679  			},
   680  			RunInterval: time.Duration(10) * time.Millisecond,
   681  		},
   682  	)
   683  
   684  	return ec, nil
   685  }
   686  
   687  func getEPVersion(c client.Maintenance, etcdEP string, timeout time.Duration) (*version.Version, error) {
   688  	ctxTimeout, cancel := ctx.WithTimeout(ctx.TODO(), timeout)
   689  	defer cancel()
   690  	sr, err := c.Status(ctxTimeout, etcdEP)
   691  	if err != nil {
   692  		return nil, Hint(err)
   693  	}
   694  	v, err := version.NewVersion(sr.Version)
   695  	if err != nil {
   696  		return nil, fmt.Errorf("error parsing server version %q: %s", sr.Version, Hint(err))
   697  	}
   698  	return v, nil
   699  }
   700  
   701  // checkMinVersion checks the minimal version running on etcd cluster.  This
   702  // function should be run whenever the etcd client is connected for the first
   703  // time and whenever the session is renewed.
   704  func (e *etcdClient) checkMinVersion() error {
   705  	eps := e.client.Endpoints()
   706  
   707  	for _, ep := range eps {
   708  		v, err := getEPVersion(e.client.Maintenance, ep, versionCheckTimeout)
   709  		if err != nil {
   710  			e.getLogger().WithError(Hint(err)).WithField(fieldEtcdEndpoint, ep).
   711  				Warn("Unable to verify version of etcd endpoint")
   712  			continue
   713  		}
   714  
   715  		if !minRequiredVersion.Check(v) {
   716  			return fmt.Errorf("minimal etcd version not met in %q, required: %s, found: %s",
   717  				ep, minRequiredVersion.String(), v.String())
   718  		}
   719  
   720  		e.getLogger().WithFields(logrus.Fields{
   721  			fieldEtcdEndpoint: ep,
   722  			"version":         v,
   723  		}).Info("Successfully verified version of etcd endpoint")
   724  	}
   725  
   726  	if len(eps) == 0 {
   727  		e.getLogger().Warn("Minimal etcd version unknown: No etcd endpoints available")
   728  	}
   729  
   730  	return nil
   731  }
   732  
   733  func (e *etcdClient) LockPath(ctx context.Context, path string) (KVLocker, error) {
   734  	select {
   735  	case <-e.firstSession:
   736  	case <-ctx.Done():
   737  		return nil, fmt.Errorf("lock cancelled via context: %s", ctx.Err())
   738  	}
   739  
   740  	// Create the context first so that if a connectivity issue causes the
   741  	// RLock acquisition below to block, this timeout will run concurrently
   742  	// with the timeouts in renewSession() rather than running serially.
   743  	ctx, cancel := context.WithTimeout(ctx, time.Minute)
   744  	defer cancel()
   745  
   746  	e.RLock()
   747  	mu := concurrency.NewMutex(e.lockSession, path)
   748  	leaseID := e.lockSession.Lease()
   749  	e.RUnlock()
   750  
   751  	err := mu.Lock(ctx)
   752  	if err != nil {
   753  		e.checkLockSession(err, leaseID)
   754  		return nil, Hint(err)
   755  	}
   756  
   757  	return &etcdMutex{mutex: mu}, nil
   758  }
   759  
   760  func (e *etcdClient) DeletePrefix(path string) (err error) {
   761  	defer func() { Trace("DeletePrefix", err, logrus.Fields{fieldPrefix: path}) }()
   762  	duration := spanstat.Start()
   763  	e.limiter.Wait(ctx.TODO())
   764  	_, err = e.client.Delete(ctx.Background(), path, client.WithPrefix())
   765  	increaseMetric(path, metricDelete, "DeletePrefix", duration.EndError(err).Total(), err)
   766  	return Hint(err)
   767  }
   768  
   769  // Watch starts watching for changes in a prefix
   770  func (e *etcdClient) Watch(w *Watcher) {
   771  	localCache := watcherCache{}
   772  	listSignalSent := false
   773  
   774  	scopedLog := e.getLogger().WithFields(logrus.Fields{
   775  		fieldWatcher: w,
   776  		fieldPrefix:  w.prefix,
   777  	})
   778  	<-e.Connected()
   779  
   780  reList:
   781  	for {
   782  		e.limiter.Wait(ctx.TODO())
   783  		res, err := e.client.Get(ctx.Background(), w.prefix, client.WithPrefix(),
   784  			client.WithSerializable())
   785  		if err != nil {
   786  			scopedLog.WithError(Hint(err)).Warn("Unable to list keys before starting watcher")
   787  			continue
   788  		}
   789  
   790  		nextRev := res.Header.Revision + 1
   791  		scopedLog.Debugf("List response from etcd len=%d: %+v", res.Count, res)
   792  
   793  		if res.Count > 0 {
   794  			for _, key := range res.Kvs {
   795  				t := EventTypeCreate
   796  				if localCache.Exists(key.Key) {
   797  					t = EventTypeModify
   798  				}
   799  
   800  				localCache.MarkInUse(key.Key)
   801  				scopedLog.Debugf("Emitting list result as %v event for %s=%v", t, key.Key, key.Value)
   802  
   803  				queueStart := spanstat.Start()
   804  				w.Events <- KeyValueEvent{
   805  					Key:   string(key.Key),
   806  					Value: key.Value,
   807  					Typ:   t,
   808  				}
   809  				trackEventQueued(string(key.Key), t, queueStart.End(true).Total())
   810  			}
   811  		}
   812  
   813  		// More keys to be read, call Get() again
   814  		if res.More {
   815  			continue
   816  		}
   817  
   818  		// Send out deletion events for all keys that were deleted
   819  		// between our last known revision and the latest revision
   820  		// received via Get
   821  		localCache.RemoveDeleted(func(k string) {
   822  			event := KeyValueEvent{
   823  				Key: k,
   824  				Typ: EventTypeDelete,
   825  			}
   826  
   827  			scopedLog.Debugf("Emitting EventTypeDelete event for %s", k)
   828  			queueStart := spanstat.Start()
   829  			w.Events <- event
   830  			trackEventQueued(k, EventTypeDelete, queueStart.End(true).Total())
   831  		})
   832  
   833  		// Only send the list signal once
   834  		if !listSignalSent {
   835  			w.Events <- KeyValueEvent{Typ: EventTypeListDone}
   836  			listSignalSent = true
   837  		}
   838  
   839  	recreateWatcher:
   840  		scopedLog.WithField(fieldRev, nextRev).Debug("Starting to watch a prefix")
   841  
   842  		e.limiter.Wait(ctx.TODO())
   843  		etcdWatch := e.client.Watch(ctx.Background(), w.prefix,
   844  			client.WithPrefix(), client.WithRev(nextRev))
   845  		for {
   846  			select {
   847  			case <-w.stopWatch:
   848  				close(w.Events)
   849  				w.stopWait.Done()
   850  				return
   851  
   852  			case r, ok := <-etcdWatch:
   853  				if !ok {
   854  					time.Sleep(50 * time.Millisecond)
   855  					goto recreateWatcher
   856  				}
   857  
   858  				scopedLog := scopedLog.WithField(fieldRev, r.Header.Revision)
   859  
   860  				if err := r.Err(); err != nil {
   861  					// We tried to watch on a compacted
   862  					// revision that may no longer exist,
   863  					// recreate the watcher and try to
   864  					// watch on the next possible revision
   865  					if err == v3rpcErrors.ErrCompacted {
   866  						scopedLog.WithError(Hint(err)).Debug("Tried watching on compacted revision")
   867  					}
   868  
   869  					// mark all local keys in state for
   870  					// deletion unless the upcoming GET
   871  					// marks them alive
   872  					localCache.MarkAllForDeletion()
   873  
   874  					goto reList
   875  				}
   876  
   877  				nextRev = r.Header.Revision + 1
   878  				scopedLog.Debugf("Received event from etcd: %+v", r)
   879  
   880  				for _, ev := range r.Events {
   881  					event := KeyValueEvent{
   882  						Key:   string(ev.Kv.Key),
   883  						Value: ev.Kv.Value,
   884  					}
   885  
   886  					switch {
   887  					case ev.Type == client.EventTypeDelete:
   888  						event.Typ = EventTypeDelete
   889  						localCache.RemoveKey(ev.Kv.Key)
   890  					case ev.IsCreate():
   891  						event.Typ = EventTypeCreate
   892  						localCache.MarkInUse(ev.Kv.Key)
   893  					default:
   894  						event.Typ = EventTypeModify
   895  						localCache.MarkInUse(ev.Kv.Key)
   896  					}
   897  
   898  					scopedLog.Debugf("Emitting %v event for %s=%v", event.Typ, event.Key, event.Value)
   899  
   900  					queueStart := spanstat.Start()
   901  					w.Events <- event
   902  					trackEventQueued(string(ev.Kv.Key), event.Typ, queueStart.End(true).Total())
   903  				}
   904  			}
   905  		}
   906  	}
   907  }
   908  
   909  func (e *etcdClient) determineEndpointStatus(endpointAddress string) (string, error) {
   910  	ctxTimeout, cancel := ctx.WithTimeout(ctx.Background(), statusCheckTimeout)
   911  	defer cancel()
   912  
   913  	e.getLogger().Debugf("Checking status to etcd endpoint %s", endpointAddress)
   914  
   915  	e.limiter.Wait(ctxTimeout)
   916  	status, err := e.client.Status(ctxTimeout, endpointAddress)
   917  	if err != nil {
   918  		return fmt.Sprintf("%s - %s", endpointAddress, err), Hint(err)
   919  	}
   920  
   921  	str := fmt.Sprintf("%s - %s", endpointAddress, status.Version)
   922  	if status.Header.MemberId == status.Leader {
   923  		str += " (Leader)"
   924  	}
   925  
   926  	return str, nil
   927  }
   928  
   929  func (e *etcdClient) statusChecker() {
   930  	for {
   931  		newStatus := []string{}
   932  		ok := 0
   933  
   934  		hasQuorum := e.isConnectedAndHasQuorum()
   935  
   936  		endpoints := e.client.Endpoints()
   937  		for _, ep := range endpoints {
   938  			st, err := e.determineEndpointStatus(ep)
   939  			if err == nil {
   940  				ok++
   941  			}
   942  
   943  			newStatus = append(newStatus, st)
   944  		}
   945  
   946  		allConnected := len(endpoints) == ok
   947  
   948  		e.RWMutex.RLock()
   949  		sessionLeaseID := e.session.Lease()
   950  		lockSessionLeaseID := e.lockSession.Lease()
   951  		e.RWMutex.RUnlock()
   952  
   953  		e.statusLock.Lock()
   954  		e.latestStatusSnapshot = fmt.Sprintf("etcd: %d/%d connected, lease-ID=%x, lock lease-ID=%x, has-quorum=%t: %s",
   955  			ok, len(endpoints), sessionLeaseID, lockSessionLeaseID, hasQuorum, strings.Join(newStatus, "; "))
   956  
   957  		// Only mark the etcd health as unstable if no etcd endpoints can be reached
   958  		if len(endpoints) > 0 && ok == 0 {
   959  			e.latestErrorStatus = fmt.Errorf("not able to connect to any etcd endpoints")
   960  		} else {
   961  			e.latestErrorStatus = nil
   962  		}
   963  
   964  		e.statusLock.Unlock()
   965  
   966  		select {
   967  		case <-e.stopStatusChecker:
   968  			return
   969  		case <-time.After(e.extraOptions.StatusCheckInterval(allConnected)):
   970  		}
   971  	}
   972  }
   973  
   974  func (e *etcdClient) Status() (string, error) {
   975  	e.statusLock.RLock()
   976  	defer e.statusLock.RUnlock()
   977  
   978  	return e.latestStatusSnapshot, Hint(e.latestErrorStatus)
   979  }
   980  
   981  // GetIfLocked returns value of key if the client is still holding the given lock.
   982  func (e *etcdClient) GetIfLocked(key string, lock KVLocker) (bv []byte, err error) {
   983  	defer func() { Trace("GetIfLocked", err, logrus.Fields{fieldKey: key, fieldValue: string(bv)}) }()
   984  	duration := spanstat.Start()
   985  	e.limiter.Wait(ctx.TODO())
   986  	opGet := client.OpGet(key)
   987  	cmp := lock.Comparator().(client.Cmp)
   988  	txnReply, err := e.client.Txn(context.Background()).If(cmp).Then(opGet).Commit()
   989  	if err == nil && !txnReply.Succeeded {
   990  		err = ErrLockLeaseExpired
   991  	}
   992  	increaseMetric(key, metricRead, "GetLocked", duration.EndError(err).Total(), err)
   993  	if err != nil {
   994  		return nil, Hint(err)
   995  	}
   996  
   997  	getR := txnReply.Responses[0].GetResponseRange()
   998  	// RangeResponse
   999  	if getR.Count == 0 {
  1000  		return nil, nil
  1001  	}
  1002  	bv, err = getR.Kvs[0].Value, nil
  1003  	return bv, err
  1004  }
  1005  
  1006  // Get returns value of key
  1007  func (e *etcdClient) Get(key string) (bv []byte, err error) {
  1008  	defer func() { Trace("Get", err, logrus.Fields{fieldKey: key, fieldValue: string(bv)}) }()
  1009  	duration := spanstat.Start()
  1010  	e.limiter.Wait(ctx.TODO())
  1011  	var getR *client.GetResponse
  1012  	getR, err = e.client.Get(ctx.Background(), key)
  1013  	increaseMetric(key, metricRead, "Get", duration.EndError(err).Total(), err)
  1014  	if err != nil {
  1015  		err = Hint(err)
  1016  		return nil, err
  1017  	}
  1018  
  1019  	if getR.Count == 0 {
  1020  		return nil, nil
  1021  	}
  1022  	return getR.Kvs[0].Value, nil
  1023  }
  1024  
  1025  // GetPrefixIfLocked returns the first key which matches the prefix and its value if the client is still holding the given lock.
  1026  func (e *etcdClient) GetPrefixIfLocked(ctx context.Context, prefix string, lock KVLocker) (k string, bv []byte, err error) {
  1027  	defer func() {
  1028  		Trace("GetPrefixIfLocked", err, logrus.Fields{fieldPrefix: prefix, fieldKey: k, fieldValue: string(bv)})
  1029  	}()
  1030  
  1031  	duration := spanstat.Start()
  1032  	e.limiter.Wait(ctx)
  1033  	opGet := client.OpGet(prefix, client.WithPrefix(), client.WithLimit(1))
  1034  	cmp := lock.Comparator().(client.Cmp)
  1035  	txnReply, err := e.client.Txn(ctx).If(cmp).Then(opGet).Commit()
  1036  	if err == nil && !txnReply.Succeeded {
  1037  		err = ErrLockLeaseExpired
  1038  	}
  1039  	increaseMetric(prefix, metricRead, "GetPrefixLocked", duration.EndError(err).Total(), err)
  1040  	if err != nil {
  1041  		return "", nil, Hint(err)
  1042  	}
  1043  	getR := txnReply.Responses[0].GetResponseRange()
  1044  
  1045  	if getR.Count == 0 {
  1046  		return "", nil, nil
  1047  	}
  1048  	return string(getR.Kvs[0].Key), getR.Kvs[0].Value, nil
  1049  }
  1050  
  1051  // GetPrefix returns the first key which matches the prefix and its value
  1052  func (e *etcdClient) GetPrefix(ctx context.Context, prefix string) (k string, bv []byte, err error) {
  1053  	defer func() {
  1054  		Trace("GetPrefix", err, logrus.Fields{fieldPrefix: prefix, fieldKey: k, fieldValue: string(bv)})
  1055  	}()
  1056  
  1057  	duration := spanstat.Start()
  1058  	e.limiter.Wait(ctx)
  1059  	getR, err := e.client.Get(ctx, prefix, client.WithPrefix(), client.WithLimit(1))
  1060  	increaseMetric(prefix, metricRead, "GetPrefix", duration.EndError(err).Total(), err)
  1061  	if err != nil {
  1062  		return "", nil, Hint(err)
  1063  	}
  1064  
  1065  	if getR.Count == 0 {
  1066  		return "", nil, nil
  1067  	}
  1068  	return string(getR.Kvs[0].Key), getR.Kvs[0].Value, nil
  1069  }
  1070  
  1071  // Set sets value of key
  1072  func (e *etcdClient) Set(key string, value []byte) (err error) {
  1073  	defer func() { Trace("Set", err, logrus.Fields{fieldKey: key, fieldValue: string(value)}) }()
  1074  	duration := spanstat.Start()
  1075  	e.limiter.Wait(ctx.TODO())
  1076  	_, err = e.client.Put(ctx.Background(), key, string(value))
  1077  	increaseMetric(key, metricSet, "Set", duration.EndError(err).Total(), err)
  1078  	err = Hint(err)
  1079  	return err
  1080  }
  1081  
  1082  // DeleteIfLocked deletes a key if the client is still holding the given lock.
  1083  func (e *etcdClient) DeleteIfLocked(key string, lock KVLocker) (err error) {
  1084  	defer func() { Trace("DeleteIfLocked", err, logrus.Fields{fieldKey: key}) }()
  1085  	duration := spanstat.Start()
  1086  	opDel := client.OpDelete(key)
  1087  	cmp := lock.Comparator().(client.Cmp)
  1088  	var txnReply *client.TxnResponse
  1089  	txnReply, err = e.client.Txn(context.Background()).If(cmp).Then(opDel).Commit()
  1090  	if err == nil && !txnReply.Succeeded {
  1091  		err = ErrLockLeaseExpired
  1092  	}
  1093  	increaseMetric(key, metricDelete, "DeleteLocked", duration.EndError(err).Total(), err)
  1094  	err = Hint(err)
  1095  	return err
  1096  }
  1097  
  1098  // Delete deletes a key
  1099  func (e *etcdClient) Delete(key string) (err error) {
  1100  	defer func() { Trace("Delete", err, logrus.Fields{fieldKey: key}) }()
  1101  	duration := spanstat.Start()
  1102  	e.limiter.Wait(ctx.TODO())
  1103  	_, err = e.client.Delete(ctx.Background(), key)
  1104  	increaseMetric(key, metricDelete, "Delete", duration.EndError(err).Total(), err)
  1105  	err = Hint(err)
  1106  	return err
  1107  }
  1108  
  1109  func (e *etcdClient) createOpPut(key string, value []byte, leaseID client.LeaseID) *client.Op {
  1110  	if leaseID != 0 {
  1111  		op := client.OpPut(key, string(value), client.WithLease(leaseID))
  1112  		return &op
  1113  	}
  1114  
  1115  	op := client.OpPut(key, string(value))
  1116  	return &op
  1117  }
  1118  
  1119  // UpdateIfLocked atomically creates a key or fails if it already exists if the client is still holding the given lock.
  1120  func (e *etcdClient) UpdateIfLocked(ctx context.Context, key string, value []byte, lease bool, lock KVLocker) error {
  1121  	select {
  1122  	case <-e.firstSession:
  1123  	case <-ctx.Done():
  1124  		return fmt.Errorf("update cancelled via context: %s", ctx.Err())
  1125  	}
  1126  
  1127  	var (
  1128  		txnReply *client.TxnResponse
  1129  		err      error
  1130  	)
  1131  
  1132  	duration := spanstat.Start()
  1133  	e.limiter.Wait(ctx)
  1134  	if lease {
  1135  		leaseID := e.GetSessionLeaseID()
  1136  		opPut := client.OpPut(key, string(value), client.WithLease(leaseID))
  1137  		cmp := lock.Comparator().(client.Cmp)
  1138  		txnReply, err = e.client.Txn(context.Background()).If(cmp).Then(opPut).Commit()
  1139  		e.checkSession(err, leaseID)
  1140  	} else {
  1141  		opPut := client.OpPut(key, string(value))
  1142  		cmp := lock.Comparator().(client.Cmp)
  1143  		txnReply, err = e.client.Txn(context.Background()).If(cmp).Then(opPut).Commit()
  1144  	}
  1145  	if err == nil && !txnReply.Succeeded {
  1146  		err = ErrLockLeaseExpired
  1147  	}
  1148  	increaseMetric(key, metricSet, "UpdateIfLocked", duration.EndError(err).Total(), err)
  1149  	return Hint(err)
  1150  }
  1151  
  1152  // Update creates or updates a key
  1153  func (e *etcdClient) Update(ctx context.Context, key string, value []byte, lease bool) (err error) {
  1154  	defer Trace("Update", err, logrus.Fields{fieldKey: key, fieldValue: string(value), fieldAttachLease: lease})
  1155  
  1156  	select {
  1157  	case <-e.firstSession:
  1158  	case <-ctx.Done():
  1159  		return fmt.Errorf("update cancelled via context: %s", ctx.Err())
  1160  	}
  1161  
  1162  	if lease {
  1163  		duration := spanstat.Start()
  1164  		leaseID := e.GetSessionLeaseID()
  1165  		e.limiter.Wait(ctx)
  1166  		_, err := e.client.Put(ctx, key, string(value), client.WithLease(leaseID))
  1167  		e.checkSession(err, leaseID)
  1168  		increaseMetric(key, metricSet, "Update", duration.EndError(err).Total(), err)
  1169  		return Hint(err)
  1170  	}
  1171  
  1172  	duration := spanstat.Start()
  1173  	e.limiter.Wait(ctx)
  1174  	_, err = e.client.Put(ctx, key, string(value))
  1175  	increaseMetric(key, metricSet, "Update", duration.EndError(err).Total(), err)
  1176  	return Hint(err)
  1177  }
  1178  
  1179  // UpdateIfDifferentIfLocked updates a key if the value is different and if the client is still holding the given lock.
  1180  func (e *etcdClient) UpdateIfDifferentIfLocked(ctx context.Context, key string, value []byte, lease bool, lock KVLocker) (recreated bool, err error) {
  1181  	defer func() {
  1182  		Trace("UpdateIfDifferentIfLocked", err, logrus.Fields{fieldKey: key, fieldValue: value, fieldAttachLease: lease, "recreated": recreated})
  1183  	}()
  1184  
  1185  	select {
  1186  	case <-e.firstSession:
  1187  	case <-ctx.Done():
  1188  		return false, fmt.Errorf("update cancelled via context: %s", ctx.Err())
  1189  	}
  1190  	duration := spanstat.Start()
  1191  	e.limiter.Wait(ctx)
  1192  	cnds := lock.Comparator().(client.Cmp)
  1193  	txnresp, err := e.client.Txn(ctx).If(cnds).Then(client.OpGet(key)).Commit()
  1194  
  1195  	increaseMetric(key, metricRead, "Get", duration.EndError(err).Total(), err)
  1196  
  1197  	// On error, attempt update blindly
  1198  	if err != nil {
  1199  		return true, e.UpdateIfLocked(ctx, key, value, lease, lock)
  1200  	}
  1201  
  1202  	if !txnresp.Succeeded {
  1203  		return false, ErrLockLeaseExpired
  1204  	}
  1205  
  1206  	getR := txnresp.Responses[0].GetResponseRange()
  1207  	if getR.Count == 0 {
  1208  		return true, e.UpdateIfLocked(ctx, key, value, lease, lock)
  1209  	}
  1210  
  1211  	if lease {
  1212  		e.RWMutex.RLock()
  1213  		leaseID := e.session.Lease()
  1214  		e.RWMutex.RUnlock()
  1215  		if getR.Kvs[0].Lease != int64(leaseID) {
  1216  			return true, e.UpdateIfLocked(ctx, key, value, lease, lock)
  1217  		}
  1218  	}
  1219  	// if value is not equal then update.
  1220  	if !bytes.Equal(getR.Kvs[0].Value, value) {
  1221  		return true, e.UpdateIfLocked(ctx, key, value, lease, lock)
  1222  	}
  1223  
  1224  	return false, nil
  1225  }
  1226  
  1227  // UpdateIfDifferent updates a key if the value is different
  1228  func (e *etcdClient) UpdateIfDifferent(ctx context.Context, key string, value []byte, lease bool) (recreated bool, err error) {
  1229  	defer func() {
  1230  		Trace("UpdateIfDifferent", err, logrus.Fields{fieldKey: key, fieldValue: value, fieldAttachLease: lease, "recreated": recreated})
  1231  	}()
  1232  
  1233  	select {
  1234  	case <-e.firstSession:
  1235  	case <-ctx.Done():
  1236  		return false, fmt.Errorf("update cancelled via context: %s", ctx.Err())
  1237  	}
  1238  
  1239  	duration := spanstat.Start()
  1240  	e.limiter.Wait(ctx)
  1241  	getR, err := e.client.Get(ctx, key)
  1242  	increaseMetric(key, metricRead, "Get", duration.EndError(err).Total(), err)
  1243  	// On error, attempt update blindly
  1244  	if err != nil || getR.Count == 0 {
  1245  		return true, e.Update(ctx, key, value, lease)
  1246  	}
  1247  	if lease {
  1248  		e.RWMutex.RLock()
  1249  		leaseID := e.session.Lease()
  1250  		e.RWMutex.RUnlock()
  1251  		if getR.Kvs[0].Lease != int64(leaseID) {
  1252  			return true, e.Update(ctx, key, value, lease)
  1253  		}
  1254  	}
  1255  	// if value is not equal then update.
  1256  	if !bytes.Equal(getR.Kvs[0].Value, value) {
  1257  		return true, e.Update(ctx, key, value, lease)
  1258  	}
  1259  
  1260  	return false, nil
  1261  }
  1262  
  1263  // CreateOnlyIfLocked atomically creates a key if the client is still holding the given lock or fails if it already exists
  1264  func (e *etcdClient) CreateOnlyIfLocked(ctx context.Context, key string, value []byte, lease bool, lock KVLocker) (success bool, err error) {
  1265  	defer func() {
  1266  		Trace("CreateOnlyIfLocked", err, logrus.Fields{fieldKey: key, fieldValue: value, fieldAttachLease: lease, "success": success})
  1267  	}()
  1268  
  1269  	duration := spanstat.Start()
  1270  	var leaseID client.LeaseID
  1271  	if lease {
  1272  		leaseID = e.GetSessionLeaseID()
  1273  	}
  1274  	req := e.createOpPut(key, value, leaseID)
  1275  	cnds := []client.Cmp{
  1276  		client.Compare(client.Version(key), "=", 0),
  1277  		lock.Comparator().(client.Cmp),
  1278  	}
  1279  
  1280  	// We need to do a get in the else of the txn to detect if the lock is still
  1281  	// valid or not.
  1282  	opGets := []client.Op{
  1283  		client.OpGet(key),
  1284  	}
  1285  
  1286  	e.limiter.Wait(ctx)
  1287  	txnresp, err := e.client.Txn(ctx).If(cnds...).Then(*req).Else(opGets...).Commit()
  1288  	increaseMetric(key, metricSet, "CreateOnlyLocked", duration.EndError(err).Total(), err)
  1289  	if err != nil {
  1290  		e.checkSession(err, leaseID)
  1291  		return false, Hint(err)
  1292  	}
  1293  
  1294  	// The txn can failed for the following reasons:
  1295  	//  - Key version is not zero;
  1296  	//  - Lock does not exist or is expired.
  1297  	// For both of those cases, the key that we are comparing might or not
  1298  	// exist, so we have:
  1299  	//  A - Key does not exist and lock does not exist => ErrLockLeaseExpired
  1300  	//  B - Key does not exist and lock exist => txn should succeed
  1301  	//  C - Key does exist, version is == 0 and lock does not exist => ErrLockLeaseExpired
  1302  	//  D - Key does exist, version is != 0 and lock does not exist => ErrLockLeaseExpired
  1303  	//  E - Key does exist, version is == 0 and lock does exist => txn should succeed
  1304  	//  F - Key does exist, version is != 0 and lock does exist => txn fails but returned is nil!
  1305  
  1306  	if !txnresp.Succeeded {
  1307  		// case F
  1308  		if len(txnresp.Responses[0].GetResponseRange().Kvs) != 0 &&
  1309  			txnresp.Responses[0].GetResponseRange().Kvs[0].Version != 0 {
  1310  			return false, nil
  1311  		}
  1312  
  1313  		// case A, C and D
  1314  		return false, ErrLockLeaseExpired
  1315  	}
  1316  
  1317  	// case B and E
  1318  	return true, nil
  1319  }
  1320  
  1321  // CreateOnly creates a key with the value and will fail if the key already exists
  1322  func (e *etcdClient) CreateOnly(ctx context.Context, key string, value []byte, lease bool) (success bool, err error) {
  1323  	defer func() {
  1324  		Trace("CreateOnly", err, logrus.Fields{fieldKey: key, fieldValue: value, fieldAttachLease: lease, "success": success})
  1325  	}()
  1326  
  1327  	duration := spanstat.Start()
  1328  	var leaseID client.LeaseID
  1329  	if lease {
  1330  		leaseID = e.GetSessionLeaseID()
  1331  	}
  1332  	req := e.createOpPut(key, value, leaseID)
  1333  	cond := client.Compare(client.Version(key), "=", 0)
  1334  
  1335  	e.limiter.Wait(ctx)
  1336  	txnresp, err := e.client.Txn(ctx).If(cond).Then(*req).Commit()
  1337  	increaseMetric(key, metricSet, "CreateOnly", duration.EndError(err).Total(), err)
  1338  	if err != nil {
  1339  		e.checkSession(err, leaseID)
  1340  		return false, Hint(err)
  1341  	}
  1342  
  1343  	return txnresp.Succeeded, nil
  1344  }
  1345  
  1346  // CreateIfExists creates a key with the value only if key condKey exists
  1347  func (e *etcdClient) CreateIfExists(condKey, key string, value []byte, lease bool) (err error) {
  1348  	defer func() {
  1349  		Trace("CreateIfExists", err, logrus.Fields{fieldKey: key, fieldValue: string(value), fieldCondition: condKey, fieldAttachLease: lease})
  1350  	}()
  1351  	duration := spanstat.Start()
  1352  	var leaseID client.LeaseID
  1353  	if lease {
  1354  		leaseID = e.GetSessionLeaseID()
  1355  	}
  1356  	req := e.createOpPut(key, value, leaseID)
  1357  	cond := client.Compare(client.Version(condKey), "!=", 0)
  1358  
  1359  	e.limiter.Wait(ctx.TODO())
  1360  	txnresp, err := e.client.Txn(ctx.TODO()).If(cond).Then(*req).Commit()
  1361  	increaseMetric(key, metricSet, "CreateIfExists", duration.EndError(err).Total(), err)
  1362  	if err != nil {
  1363  		e.checkSession(err, leaseID)
  1364  		err = Hint(err)
  1365  		return err
  1366  	}
  1367  
  1368  	if !txnresp.Succeeded {
  1369  		return fmt.Errorf("create was unsuccessful")
  1370  	}
  1371  
  1372  	return nil
  1373  }
  1374  
  1375  // FIXME: When we rebase to etcd 3.3
  1376  //
  1377  // DeleteOnZeroCount deletes the key if no matching keys for prefix exist
  1378  //func (e *etcdClient) DeleteOnZeroCount(key, prefix string) error {
  1379  //	txnresp, err := e.client.Txn(ctx.TODO()).
  1380  //		If(client.Compare(client.Version(prefix).WithPrefix(), "=", 0)).
  1381  //		Then(client.OpDelete(key)).
  1382  //		Commit()
  1383  //	if err != nil {
  1384  //		return err
  1385  //	}
  1386  //
  1387  //	if txnresp.Succeeded == false {
  1388  //		return fmt.Errorf("delete was unsuccessful")
  1389  //	}
  1390  //
  1391  //	return nil
  1392  //}
  1393  
  1394  // ListPrefixIfLocked returns a list of keys matching the prefix only if the client is still holding the given lock.
  1395  func (e *etcdClient) ListPrefixIfLocked(prefix string, lock KVLocker) (pairs KeyValuePairs, err error) {
  1396  	defer func() {
  1397  		Trace("ListPrefixIfLocked", err, logrus.Fields{fieldPrefix: prefix, fieldNumEntries: len(pairs)})
  1398  	}()
  1399  	duration := spanstat.Start()
  1400  	e.limiter.Wait(ctx.TODO())
  1401  	opGet := client.OpGet(prefix, client.WithPrefix())
  1402  	cmp := lock.Comparator().(client.Cmp)
  1403  	var txnReply *client.TxnResponse
  1404  	txnReply, err = e.client.Txn(context.Background()).If(cmp).Then(opGet).Commit()
  1405  	if err == nil && !txnReply.Succeeded {
  1406  		err = ErrLockLeaseExpired
  1407  	}
  1408  	increaseMetric(prefix, metricRead, "ListPrefixLocked", duration.EndError(err).Total(), err)
  1409  	if err != nil {
  1410  		err = Hint(err)
  1411  		return nil, err
  1412  	}
  1413  	getR := txnReply.Responses[0].GetResponseRange()
  1414  
  1415  	pairs = KeyValuePairs(make(map[string]Value, getR.Count))
  1416  	for i := int64(0); i < getR.Count; i++ {
  1417  		pairs[string(getR.Kvs[i].Key)] = Value{
  1418  			Data:        getR.Kvs[i].Value,
  1419  			ModRevision: uint64(getR.Kvs[i].ModRevision),
  1420  		}
  1421  
  1422  	}
  1423  
  1424  	return pairs, nil
  1425  }
  1426  
  1427  // ListPrefix returns a map of matching keys
  1428  func (e *etcdClient) ListPrefix(prefix string) (pairs KeyValuePairs, err error) {
  1429  	defer func() { Trace("ListPrefix", err, logrus.Fields{fieldPrefix: prefix, fieldNumEntries: len(pairs)}) }()
  1430  	duration := spanstat.Start()
  1431  
  1432  	e.limiter.Wait(ctx.TODO())
  1433  	var getR *client.GetResponse
  1434  	getR, err = e.client.Get(ctx.Background(), prefix, client.WithPrefix())
  1435  	increaseMetric(prefix, metricRead, "ListPrefix", duration.EndError(err).Total(), err)
  1436  	if err != nil {
  1437  		return nil, Hint(err)
  1438  	}
  1439  
  1440  	pairs = KeyValuePairs(make(map[string]Value, getR.Count))
  1441  	for i := int64(0); i < getR.Count; i++ {
  1442  		pairs[string(getR.Kvs[i].Key)] = Value{
  1443  			Data:        getR.Kvs[i].Value,
  1444  			ModRevision: uint64(getR.Kvs[i].ModRevision),
  1445  			LeaseID:     getR.Kvs[i].Lease,
  1446  		}
  1447  
  1448  	}
  1449  
  1450  	return pairs, nil
  1451  }
  1452  
  1453  // Close closes the etcd session
  1454  func (e *etcdClient) Close() {
  1455  	close(e.stopStatusChecker)
  1456  	<-e.firstSession
  1457  	if e.controllers != nil {
  1458  		e.controllers.RemoveAll()
  1459  	}
  1460  	e.RLock()
  1461  	defer e.RUnlock()
  1462  	e.lockSession.Close()
  1463  	e.session.Close()
  1464  	e.client.Close()
  1465  }
  1466  
  1467  // GetCapabilities returns the capabilities of the backend
  1468  func (e *etcdClient) GetCapabilities() Capabilities {
  1469  	return Capabilities(CapabilityCreateIfExists)
  1470  }
  1471  
  1472  // Encode encodes a binary slice into a character set that the backend supports
  1473  func (e *etcdClient) Encode(in []byte) (out string) {
  1474  	defer func() { Trace("Encode", nil, logrus.Fields{"in": in, "out": out}) }()
  1475  	return string(in)
  1476  }
  1477  
  1478  // Decode decodes a key previously encoded back into the original binary slice
  1479  func (e *etcdClient) Decode(in string) (out []byte, err error) {
  1480  	defer func() { Trace("Decode", err, logrus.Fields{"in": in, "out": out}) }()
  1481  	return []byte(in), nil
  1482  }
  1483  
  1484  // ListAndWatch implements the BackendOperations.ListAndWatch using etcd
  1485  func (e *etcdClient) ListAndWatch(name, prefix string, chanSize int) *Watcher {
  1486  	w := newWatcher(name, prefix, chanSize)
  1487  
  1488  	e.getLogger().WithField(fieldWatcher, w).Debug("Starting watcher...")
  1489  
  1490  	go e.Watch(w)
  1491  
  1492  	return w
  1493  }
  1494  
  1495  // SplitK8sServiceURL returns the service name and namespace for the given address.
  1496  // If the given address is not parseable or it is not the format
  1497  // '<protocol>://><name>.<namespace>[optional]', returns an error.
  1498  func SplitK8sServiceURL(address string) (string, string, error) {
  1499  	u, err := url.Parse(address)
  1500  	if err != nil {
  1501  		return "", "", err
  1502  	}
  1503  	// typical service name "cilium-etcd-client.kube-system.svc"
  1504  	names := strings.Split(u.Hostname(), ".")
  1505  	if len(names) >= 2 {
  1506  		return names[0], names[1], nil
  1507  	}
  1508  	return "", "",
  1509  		fmt.Errorf("invalid service name. expecting <protocol://><name>.<namespace>[optional], got: %s", address)
  1510  }
  1511  
  1512  // IsEtcdOperator returns the service name if the configuration is setting up an
  1513  // etcd-operator. If the configuration explicitly states it is configured
  1514  // to connect to an etcd operator, e.g. with etcd.operator=true, the returned
  1515  // service name is the first found within the configuration specified.
  1516  func IsEtcdOperator(selectedBackend string, opts map[string]string, k8sNamespace string) (string, bool) {
  1517  	if selectedBackend != EtcdBackendName {
  1518  		return "", false
  1519  	}
  1520  
  1521  	isEtcdOperator := strings.ToLower(opts[isEtcdOperatorOption]) == "true"
  1522  
  1523  	fqdnIsEtcdOperator := func(address string) bool {
  1524  		svcName, ns, err := SplitK8sServiceURL(address)
  1525  		return err == nil &&
  1526  			svcName == "cilium-etcd-client" &&
  1527  			ns == k8sNamespace
  1528  	}
  1529  
  1530  	fqdn := opts[EtcdAddrOption]
  1531  	if len(fqdn) != 0 {
  1532  		if fqdnIsEtcdOperator(fqdn) || isEtcdOperator {
  1533  			return fqdn, true
  1534  		}
  1535  		return "", false
  1536  	}
  1537  
  1538  	bm := newEtcdModule()
  1539  	err := bm.setConfig(opts)
  1540  	if err != nil {
  1541  		return "", false
  1542  	}
  1543  	etcdConfig := bm.getConfig()[EtcdOptionConfig]
  1544  	if len(etcdConfig) == 0 {
  1545  		return "", false
  1546  	}
  1547  
  1548  	cfg, err := newConfig(etcdConfig)
  1549  	if err != nil {
  1550  		log.WithError(err).Error("Unable to read etcd configuration.")
  1551  		return "", false
  1552  	}
  1553  	for _, endpoint := range cfg.Endpoints {
  1554  		if fqdnIsEtcdOperator(endpoint) || isEtcdOperator {
  1555  			return endpoint, true
  1556  		}
  1557  	}
  1558  
  1559  	return "", false
  1560  }
  1561  
  1562  // newConfig is a wrapper of clientyaml.NewConfig. Since etcd has deprecated
  1563  // the `ca-file` field from yamlConfig in v3.4, the clientyaml.NewConfig won't
  1564  // read that field from the etcd configuration file making Cilium fail to
  1565  // connect to a TLS-enabled etcd server. Since we should have deprecated the
  1566  // usage of this field a long time ago, in this galaxy, we will have this
  1567  // wrapper function as a workaround which will still use the `ca-file` field to
  1568  // avoid users breaking their connectivity to etcd when upgrading Cilium.
  1569  // TODO remove this wrapper in cilium >= 1.8
  1570  func newConfig(fpath string) (*client.Config, error) {
  1571  	cfg, err := clientyaml.NewConfig(fpath)
  1572  	if err != nil {
  1573  		return nil, err
  1574  	}
  1575  	if cfg.TLS == nil || cfg.TLS.RootCAs != nil {
  1576  		return cfg, nil
  1577  	}
  1578  
  1579  	yc := &yamlConfig{}
  1580  	b, err := ioutil.ReadFile(fpath)
  1581  	if err != nil {
  1582  		return nil, err
  1583  	}
  1584  	err = yaml.Unmarshal(b, yc)
  1585  	if err != nil {
  1586  		return nil, err
  1587  	}
  1588  	if yc.InsecureTransport {
  1589  		return cfg, nil
  1590  	}
  1591  
  1592  	if yc.CAfile != "" {
  1593  		cp, err := tlsutil.NewCertPool([]string{yc.CAfile})
  1594  		if err != nil {
  1595  			return nil, err
  1596  		}
  1597  		cfg.TLS.RootCAs = cp
  1598  	}
  1599  	cfg.TLS.GetClientCertificate = func(_ *tls.CertificateRequestInfo) (*tls.Certificate, error) {
  1600  		cer, err := tls.LoadX509KeyPair(yc.Certfile, yc.Keyfile)
  1601  		return &cer, err
  1602  	}
  1603  	return cfg, nil
  1604  }
  1605  
  1606  // copy of the internal structure in go.etcd.io/etcd/clientv3/yaml so we
  1607  // can still use the `ca-file` field for one more release.
  1608  type yamlConfig struct {
  1609  	client.Config
  1610  
  1611  	InsecureTransport     bool   `json:"insecure-transport"`
  1612  	InsecureSkipTLSVerify bool   `json:"insecure-skip-tls-verify"`
  1613  	Certfile              string `json:"cert-file"`
  1614  	Keyfile               string `json:"key-file"`
  1615  	TrustedCAfile         string `json:"trusted-ca-file"`
  1616  
  1617  	// CAfile is being deprecated. Use 'TrustedCAfile' instead.
  1618  	// TODO: deprecate this in v4
  1619  	CAfile string `json:"ca-file"`
  1620  }