github.com/cilium/cilium@v1.16.2/pkg/clustermesh/common/remote_cluster.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package common
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/go-openapi/strfmt"
    14  	"github.com/prometheus/client_golang/prometheus"
    15  	"github.com/sirupsen/logrus"
    16  	"google.golang.org/grpc"
    17  
    18  	"github.com/cilium/cilium/api/v1/models"
    19  	"github.com/cilium/cilium/pkg/clustermesh/types"
    20  	cmutils "github.com/cilium/cilium/pkg/clustermesh/utils"
    21  	"github.com/cilium/cilium/pkg/controller"
    22  	"github.com/cilium/cilium/pkg/dial"
    23  	"github.com/cilium/cilium/pkg/kvstore"
    24  	"github.com/cilium/cilium/pkg/lock"
    25  	"github.com/cilium/cilium/pkg/logging/logfields"
    26  	"github.com/cilium/cilium/pkg/metrics"
    27  	"github.com/cilium/cilium/pkg/option"
    28  )
    29  
    30  var (
    31  	remoteConnectionControllerGroup = controller.NewGroup("clustermesh-remote-cluster")
    32  	clusterConfigControllerGroup    = controller.NewGroup("clustermesh-cluster-config")
    33  )
    34  
    35  type RemoteCluster interface {
    36  	// Run implements the actual business logic once the connection to the remote cluster has been established.
    37  	// The ready channel shall be closed when the initialization tasks completed, possibly returning an error.
    38  	Run(ctx context.Context, backend kvstore.BackendOperations, config types.CiliumClusterConfig, ready chan<- error)
    39  
    40  	Stop()
    41  	Remove(ctx context.Context)
    42  }
    43  
    44  // remoteCluster represents another cluster other than the cluster the agent is
    45  // running in
    46  type remoteCluster struct {
    47  	RemoteCluster
    48  
    49  	// name is the name of the cluster
    50  	name string
    51  
    52  	// configPath is the path to the etcd configuration to be used to
    53  	// connect to the etcd cluster of the remote cluster
    54  	configPath string
    55  
    56  	// clusterSizeDependantInterval allows to calculate intervals based on cluster size.
    57  	clusterSizeDependantInterval kvstore.ClusterSizeDependantIntervalFunc
    58  
    59  	// resolvers are the set of resolvers used to create the custom dialer.
    60  	resolvers []dial.Resolver
    61  
    62  	// changed receives an event when the remote cluster configuration has
    63  	// changed and is closed when the configuration file was removed
    64  	changed chan bool
    65  
    66  	controllers *controller.Manager
    67  
    68  	// wg is used to wait for the termination of the goroutines spawned by the
    69  	// controller upon reconnection for long running background tasks.
    70  	wg sync.WaitGroup
    71  
    72  	// remoteConnectionControllerName is the name of the backing controller
    73  	// that maintains the remote connection
    74  	remoteConnectionControllerName string
    75  
    76  	// mutex protects the following variables
    77  	// - backend
    78  	// - config
    79  	// - etcdClusterID
    80  	// - failures
    81  	// - lastFailure
    82  	mutex lock.RWMutex
    83  
    84  	// backend is the kvstore backend being used
    85  	backend kvstore.BackendOperations
    86  
    87  	// config contains the information about the cluster config for status reporting
    88  	config *models.RemoteClusterConfig
    89  
    90  	// etcdClusterID contains the information about the etcd cluster ID for status
    91  	// reporting. It is used to distinguish which instance of the clustermesh-apiserver
    92  	// we are connected to when running in HA mode.
    93  	etcdClusterID string
    94  
    95  	// failures is the number of observed failures
    96  	failures int
    97  
    98  	// lastFailure is the timestamp of the last failure
    99  	lastFailure time.Time
   100  
   101  	logger logrus.FieldLogger
   102  
   103  	metricLastFailureTimestamp prometheus.Gauge
   104  	metricReadinessStatus      prometheus.Gauge
   105  	metricTotalFailures        prometheus.Gauge
   106  }
   107  
   108  var (
   109  	// skipKvstoreConnection skips the etcd connection, used for testing
   110  	skipKvstoreConnection bool
   111  )
   112  
   113  // releaseOldConnection releases the etcd connection to a remote cluster
   114  func (rc *remoteCluster) releaseOldConnection() {
   115  	rc.metricReadinessStatus.Set(metrics.BoolToFloat64(false))
   116  
   117  	// Make sure that all child goroutines terminated before performing cleanup.
   118  	rc.wg.Wait()
   119  
   120  	rc.mutex.Lock()
   121  	backend := rc.backend
   122  	rc.backend = nil
   123  	rc.config = nil
   124  	rc.etcdClusterID = ""
   125  	rc.mutex.Unlock()
   126  
   127  	if backend != nil {
   128  		backend.Close()
   129  	}
   130  }
   131  
   132  func (rc *remoteCluster) restartRemoteConnection() {
   133  	rc.controllers.UpdateController(
   134  		rc.remoteConnectionControllerName,
   135  		controller.ControllerParams{
   136  			Group: remoteConnectionControllerGroup,
   137  			DoFunc: func(ctx context.Context) error {
   138  				rc.releaseOldConnection()
   139  
   140  				clusterLock := newClusterLock()
   141  
   142  				extraOpts := rc.makeExtraOpts(clusterLock)
   143  
   144  				backend, errChan := kvstore.NewClient(ctx, kvstore.EtcdBackendName,
   145  					rc.makeEtcdOpts(), &extraOpts)
   146  
   147  				// Block until either an error is returned or
   148  				// the channel is closed due to success of the
   149  				// connection
   150  				rc.logger.Debugf("Waiting for connection to be established")
   151  
   152  				var err error
   153  				select {
   154  				case err = <-errChan:
   155  				case err = <-clusterLock.errors:
   156  				}
   157  
   158  				if err != nil {
   159  					if backend != nil {
   160  						backend.Close()
   161  					}
   162  					rc.logger.WithError(err).Warning("Unable to establish etcd connection to remote cluster")
   163  					return err
   164  				}
   165  
   166  				etcdClusterID := fmt.Sprintf("%x", clusterLock.etcdClusterID.Load())
   167  
   168  				rc.mutex.Lock()
   169  				rc.backend = backend
   170  				rc.etcdClusterID = etcdClusterID
   171  				rc.mutex.Unlock()
   172  
   173  				ctx, cancel := context.WithCancel(ctx)
   174  				rc.wg.Add(1)
   175  				go func() {
   176  					rc.watchdog(ctx, backend, clusterLock)
   177  					cancel()
   178  					rc.wg.Done()
   179  				}()
   180  
   181  				rc.logger.WithField(logfields.EtcdClusterID, etcdClusterID).Info("Connection to remote cluster established")
   182  
   183  				config, err := rc.getClusterConfig(ctx, backend)
   184  				if err != nil {
   185  					lgr := rc.logger
   186  					if errors.Is(err, cmutils.ErrClusterConfigNotFound) {
   187  						lgr = lgr.WithField(logfields.Hint,
   188  							"If KVStoreMesh is enabled, check whether it is connected to the target cluster."+
   189  								" Additionally, ensure that the cluster name is correct.")
   190  					}
   191  
   192  					lgr.WithError(err).Warning("Unable to get remote cluster configuration")
   193  					cancel()
   194  					return err
   195  				}
   196  				rc.logger.Info("Found remote cluster configuration")
   197  
   198  				ready := make(chan error)
   199  
   200  				// Let's execute the long running logic in background. This allows
   201  				// to return early from the controller body, so that the statistics
   202  				// are updated correctly. Instead, blocking until rc.Run terminates
   203  				// would prevent a previous failure from being cleared out.
   204  				rc.wg.Add(1)
   205  				go func() {
   206  					rc.Run(ctx, backend, config, ready)
   207  					cancel()
   208  					rc.wg.Done()
   209  				}()
   210  
   211  				if err := <-ready; err != nil {
   212  					rc.logger.WithError(err).Warning("Connection to remote cluster failed")
   213  					return err
   214  				}
   215  
   216  				rc.metricReadinessStatus.Set(metrics.BoolToFloat64(true))
   217  				return nil
   218  			},
   219  			StopFunc: func(ctx context.Context) error {
   220  				rc.releaseOldConnection()
   221  				rc.logger.Info("Connection to remote cluster stopped")
   222  				return nil
   223  			},
   224  			CancelDoFuncOnUpdate: true,
   225  		},
   226  	)
   227  }
   228  
   229  func (rc *remoteCluster) watchdog(ctx context.Context, backend kvstore.BackendOperations, clusterLock *clusterLock) {
   230  	handleErr := func(err error) {
   231  		rc.logger.WithError(err).Warning("Error observed on etcd connection, reconnecting etcd")
   232  		rc.mutex.Lock()
   233  		rc.failures++
   234  		rc.lastFailure = time.Now()
   235  		rc.metricLastFailureTimestamp.SetToCurrentTime()
   236  		rc.metricTotalFailures.Set(float64(rc.failures))
   237  		rc.metricReadinessStatus.Set(metrics.BoolToFloat64(rc.isReadyLocked()))
   238  		rc.mutex.Unlock()
   239  
   240  		rc.restartRemoteConnection()
   241  	}
   242  
   243  	select {
   244  	case err, ok := <-backend.StatusCheckErrors():
   245  		if ok && err != nil {
   246  			handleErr(err)
   247  		}
   248  	case err, ok := <-clusterLock.errors:
   249  		if ok && err != nil {
   250  			handleErr(err)
   251  		}
   252  	case <-ctx.Done():
   253  		return
   254  	}
   255  }
   256  
   257  func (rc *remoteCluster) getClusterConfig(ctx context.Context, backend kvstore.BackendOperations) (types.CiliumClusterConfig, error) {
   258  	var (
   259  		clusterConfigRetrievalTimeout = 3 * time.Minute
   260  		lastError                     = context.Canceled
   261  		lastErrorLock                 lock.Mutex
   262  	)
   263  
   264  	ctx, cancel := context.WithTimeout(ctx, clusterConfigRetrievalTimeout)
   265  	defer cancel()
   266  
   267  	rc.mutex.Lock()
   268  	rc.config = &models.RemoteClusterConfig{Required: true}
   269  	rc.mutex.Unlock()
   270  
   271  	cfgch := make(chan types.CiliumClusterConfig, 1)
   272  	defer close(cfgch)
   273  
   274  	// We retry here rather than simply returning an error and relying on the external
   275  	// controller backoff period to avoid recreating every time a new connection to the remote
   276  	// kvstore, which would introduce an unnecessary overhead. Still, we do return in case of
   277  	// consecutive failures, to ensure that we do not retry forever if something strange happened.
   278  	ctrlname := rc.remoteConnectionControllerName + "-cluster-config"
   279  	defer rc.controllers.RemoveControllerAndWait(ctrlname)
   280  	rc.controllers.UpdateController(ctrlname, controller.ControllerParams{
   281  		Group: clusterConfigControllerGroup,
   282  		DoFunc: func(ctx context.Context) error {
   283  			rc.logger.Debug("Retrieving cluster configuration from remote kvstore")
   284  			config, err := cmutils.GetClusterConfig(ctx, rc.name, backend)
   285  			if err != nil {
   286  				lastErrorLock.Lock()
   287  				lastError = err
   288  				lastErrorLock.Unlock()
   289  				return err
   290  			}
   291  
   292  			cfgch <- config
   293  			return nil
   294  		},
   295  		Context:          ctx,
   296  		MaxRetryInterval: 30 * time.Second,
   297  	})
   298  
   299  	// Wait until either the configuration is retrieved, or the context expires
   300  	select {
   301  	case config := <-cfgch:
   302  		rc.mutex.Lock()
   303  		rc.config.Retrieved = true
   304  		rc.config.ClusterID = int64(config.ID)
   305  		rc.config.Kvstoremesh = config.Capabilities.Cached
   306  		rc.config.SyncCanaries = config.Capabilities.SyncedCanaries
   307  		rc.mutex.Unlock()
   308  
   309  		return config, nil
   310  	case <-ctx.Done():
   311  		lastErrorLock.Lock()
   312  		defer lastErrorLock.Unlock()
   313  		return types.CiliumClusterConfig{}, fmt.Errorf("failed to retrieve cluster configuration: %w", lastError)
   314  	}
   315  }
   316  
   317  func (rc *remoteCluster) makeEtcdOpts() map[string]string {
   318  	opts := map[string]string{
   319  		kvstore.EtcdOptionConfig: rc.configPath,
   320  	}
   321  
   322  	for key, value := range option.Config.KVStoreOpt {
   323  		switch key {
   324  		case kvstore.EtcdRateLimitOption, kvstore.EtcdMaxInflightOption, kvstore.EtcdListLimitOption,
   325  			kvstore.EtcdOptionKeepAliveHeartbeat, kvstore.EtcdOptionKeepAliveTimeout:
   326  			opts[key] = value
   327  		}
   328  	}
   329  
   330  	return opts
   331  }
   332  
   333  func (rc *remoteCluster) makeExtraOpts(clusterLock *clusterLock) kvstore.ExtraOptions {
   334  	var dialOpts []grpc.DialOption
   335  
   336  	dialOpts = append(dialOpts, grpc.WithStreamInterceptor(newStreamInterceptor(clusterLock)), grpc.WithUnaryInterceptor(newUnaryInterceptor(clusterLock)))
   337  
   338  	// Allow to resolve service names without depending on the DNS. This prevents the need
   339  	// for setting the DNSPolicy to ClusterFirstWithHostNet when running in host network.
   340  	dialOpts = append(dialOpts, grpc.WithContextDialer(dial.NewContextDialer(rc.logger, rc.resolvers...)))
   341  
   342  	return kvstore.ExtraOptions{
   343  		NoLockQuorumCheck:            true,
   344  		ClusterName:                  rc.name,
   345  		ClusterSizeDependantInterval: rc.clusterSizeDependantInterval,
   346  		DialOption:                   dialOpts,
   347  		NoEndpointStatusChecks:       true,
   348  	}
   349  }
   350  
   351  func (rc *remoteCluster) onInsert() {
   352  	rc.logger.Info("New remote cluster configuration")
   353  
   354  	if skipKvstoreConnection {
   355  		return
   356  	}
   357  
   358  	rc.remoteConnectionControllerName = fmt.Sprintf("remote-etcd-%s", rc.name)
   359  	rc.restartRemoteConnection()
   360  
   361  	go func() {
   362  		for {
   363  			val := <-rc.changed
   364  			if val {
   365  				rc.logger.Info("etcd configuration has changed, re-creating connection")
   366  				rc.restartRemoteConnection()
   367  			} else {
   368  				rc.logger.Info("Closing connection to remote etcd")
   369  				return
   370  			}
   371  		}
   372  	}()
   373  }
   374  
   375  // onStop is executed when the clustermesh subsystem is being stopped.
   376  // In this case, we don't want to drain the known entries, otherwise
   377  // we would break existing connections when the agent gets restarted.
   378  func (rc *remoteCluster) onStop() {
   379  	_ = rc.controllers.RemoveControllerAndWait(rc.remoteConnectionControllerName)
   380  	close(rc.changed)
   381  	rc.Stop()
   382  }
   383  
   384  // onRemove is executed when a remote cluster is explicitly disconnected
   385  // (i.e., its configuration is removed). In this case, we need to drain
   386  // all known entries, to properly cleanup the status without requiring to
   387  // restart the agent.
   388  func (rc *remoteCluster) onRemove(ctx context.Context) {
   389  	rc.onStop()
   390  	rc.Remove(ctx)
   391  
   392  	rc.logger.Info("Remote cluster disconnected")
   393  }
   394  
   395  func (rc *remoteCluster) isReady() bool {
   396  	rc.mutex.RLock()
   397  	defer rc.mutex.RUnlock()
   398  
   399  	return rc.isReadyLocked()
   400  }
   401  
   402  func (rc *remoteCluster) isReadyLocked() bool {
   403  	return rc.backend != nil && rc.config != nil && (!rc.config.Required || rc.config.Retrieved)
   404  }
   405  
   406  func (rc *remoteCluster) status() *models.RemoteCluster {
   407  	rc.mutex.RLock()
   408  	defer rc.mutex.RUnlock()
   409  
   410  	// This can happen when the controller in restartRemoteConnection is waiting
   411  	// for the first connection to succeed.
   412  	var backendStatus = "Waiting for initial connection to be established"
   413  	if rc.backend != nil {
   414  		var backendError error
   415  		backendStatus, backendError = rc.backend.Status()
   416  		if backendError != nil {
   417  			backendStatus = backendError.Error()
   418  		}
   419  
   420  		if rc.etcdClusterID != "" {
   421  			backendStatus += ", ID: " + rc.etcdClusterID
   422  		}
   423  	}
   424  
   425  	status := &models.RemoteCluster{
   426  		Name:        rc.name,
   427  		Ready:       rc.isReadyLocked(),
   428  		Connected:   rc.backend != nil,
   429  		Status:      backendStatus,
   430  		Config:      rc.config,
   431  		NumFailures: int64(rc.failures),
   432  		LastFailure: strfmt.DateTime(rc.lastFailure),
   433  	}
   434  
   435  	return status
   436  }