github.com/cilium/cilium@v1.16.2/pkg/clustermesh/kvstoremesh/remote_cluster.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package kvstoremesh
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"path"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/sirupsen/logrus"
    14  	"k8s.io/utils/clock"
    15  
    16  	"github.com/cilium/cilium/api/v1/models"
    17  	"github.com/cilium/cilium/pkg/clustermesh/common"
    18  	"github.com/cilium/cilium/pkg/clustermesh/types"
    19  	cmutils "github.com/cilium/cilium/pkg/clustermesh/utils"
    20  	"github.com/cilium/cilium/pkg/clustermesh/wait"
    21  	identityCache "github.com/cilium/cilium/pkg/identity/cache"
    22  	"github.com/cilium/cilium/pkg/ipcache"
    23  	"github.com/cilium/cilium/pkg/kvstore"
    24  	"github.com/cilium/cilium/pkg/kvstore/store"
    25  	"github.com/cilium/cilium/pkg/lock"
    26  	"github.com/cilium/cilium/pkg/logging/logfields"
    27  	nodeStore "github.com/cilium/cilium/pkg/node/store"
    28  	serviceStore "github.com/cilium/cilium/pkg/service/store"
    29  )
    30  
    31  // remoteCluster represents a remote cluster other than the local one this
    32  // service is running in
    33  type remoteCluster struct {
    34  	name string
    35  
    36  	localBackend kvstore.BackendOperations
    37  
    38  	nodes      reflector
    39  	services   reflector
    40  	identities reflector
    41  	ipcache    reflector
    42  
    43  	// status is the function which fills the common part of the status.
    44  	status common.StatusFunc
    45  
    46  	cancel context.CancelFunc
    47  	wg     sync.WaitGroup
    48  
    49  	storeFactory store.Factory
    50  
    51  	// synced tracks the initial synchronization of the remote cluster.
    52  	synced synced
    53  	// readyTimeout is the duration to wait for a connection to be established
    54  	// before removing the cluster from readiness checks.
    55  	readyTimeout time.Duration
    56  
    57  	// disableDrainOnDisconnection disables the removal of cached data upon
    58  	// cluster disconnection.
    59  	disableDrainOnDisconnection bool
    60  
    61  	logger logrus.FieldLogger
    62  	clock  clock.Clock
    63  }
    64  
    65  func (rc *remoteCluster) Run(ctx context.Context, backend kvstore.BackendOperations, srccfg types.CiliumClusterConfig, ready chan<- error) {
    66  	// Closing the synced.connected channel cancels the timeout goroutine.
    67  	// Ensure we do not attempt to close the channel more than once.
    68  	select {
    69  	case <-rc.synced.connected:
    70  	default:
    71  		close(rc.synced.connected)
    72  	}
    73  
    74  	dstcfg := types.CiliumClusterConfig{
    75  		ID: srccfg.ID,
    76  		Capabilities: types.CiliumClusterConfigCapabilities{
    77  			SyncedCanaries:       true,
    78  			Cached:               true,
    79  			MaxConnectedClusters: srccfg.Capabilities.MaxConnectedClusters,
    80  		},
    81  	}
    82  
    83  	stopAndWait, err := cmutils.EnforceClusterConfig(ctx, rc.name, dstcfg, rc.localBackend, rc.logger)
    84  	defer stopAndWait()
    85  	if err != nil {
    86  		ready <- fmt.Errorf("failed to propagate cluster configuration: %w", err)
    87  		close(ready)
    88  		return
    89  	}
    90  
    91  	var mgr store.WatchStoreManager
    92  	if srccfg.Capabilities.SyncedCanaries {
    93  		mgr = rc.storeFactory.NewWatchStoreManager(backend, rc.name)
    94  	} else {
    95  		mgr = store.NewWatchStoreManagerImmediate(rc.name)
    96  	}
    97  
    98  	adapter := func(prefix string) string { return prefix }
    99  	if srccfg.Capabilities.Cached {
   100  		adapter = kvstore.StateToCachePrefix
   101  	}
   102  
   103  	mgr.Register(adapter(nodeStore.NodeStorePrefix), func(ctx context.Context) {
   104  		rc.nodes.watcher.Watch(ctx, backend, path.Join(adapter(nodeStore.NodeStorePrefix), rc.name))
   105  	})
   106  
   107  	mgr.Register(adapter(serviceStore.ServiceStorePrefix), func(ctx context.Context) {
   108  		rc.services.watcher.Watch(ctx, backend, path.Join(adapter(serviceStore.ServiceStorePrefix), rc.name))
   109  	})
   110  
   111  	mgr.Register(adapter(ipcache.IPIdentitiesPath), func(ctx context.Context) {
   112  		suffix := ipcache.DefaultAddressSpace
   113  		if srccfg.Capabilities.Cached {
   114  			suffix = rc.name
   115  		}
   116  
   117  		rc.ipcache.watcher.Watch(ctx, backend, path.Join(adapter(ipcache.IPIdentitiesPath), suffix))
   118  	})
   119  
   120  	mgr.Register(adapter(identityCache.IdentitiesPath), func(ctx context.Context) {
   121  		var suffix string
   122  		if srccfg.Capabilities.Cached {
   123  			suffix = rc.name
   124  		}
   125  
   126  		rc.identities.watcher.Watch(ctx, backend, path.Join(adapter(identityCache.IdentitiesPath), suffix))
   127  	})
   128  
   129  	close(ready)
   130  	mgr.Run(ctx)
   131  }
   132  
   133  func (rc *remoteCluster) Stop() {
   134  	rc.cancel()
   135  	rc.synced.Stop()
   136  	rc.wg.Wait()
   137  }
   138  
   139  func (rc *remoteCluster) Remove(ctx context.Context) {
   140  	if rc.disableDrainOnDisconnection {
   141  		rc.logger.Warning("Remote cluster disconnected, but cached data removal is disabled. " +
   142  			"Reconnecting to the same cluster without first restarting KVStoreMesh may lead to inconsistencies")
   143  		return
   144  	}
   145  
   146  	const retries = 5
   147  	var (
   148  		retry   = 0
   149  		backoff = 2 * time.Second
   150  	)
   151  
   152  	rc.logger.Info("Remote cluster disconnected: draining cached data")
   153  	for {
   154  		err := rc.drain(ctx, retry == 0)
   155  		switch {
   156  		case err == nil:
   157  			rc.logger.Info("Successfully removed all cached data from kvstore")
   158  			return
   159  		case ctx.Err() != nil:
   160  			return
   161  		case retry == retries:
   162  			rc.logger.WithError(err).Error(
   163  				"Failed to remove cached data from kvstore, despite retries. Reconnecting to the " +
   164  					"same cluster without first restarting KVStoreMesh may lead to inconsistencies")
   165  			return
   166  		}
   167  
   168  		rc.logger.WithError(err).Warning("Failed to remove cached data from kvstore, retrying")
   169  		select {
   170  		case <-rc.clock.After(backoff):
   171  			retry++
   172  			backoff *= 2
   173  		case <-ctx.Done():
   174  			return
   175  		}
   176  	}
   177  }
   178  
   179  // drain drains the cached data from the local kvstore. The cluster configuration
   180  // is removed as first step, to prevent bootstrapping agents from connecting while
   181  // removing the rest of the cached data. Indeed, there's no point in retrieving
   182  // incomplete data, and it is expected that agents will be disconnecting as well.
   183  func (rc *remoteCluster) drain(ctx context.Context, withGracePeriod bool) (err error) {
   184  	keys := []string{
   185  		path.Join(kvstore.ClusterConfigPrefix, rc.name),
   186  	}
   187  	prefixes := []string{
   188  		path.Join(kvstore.SyncedPrefix, rc.name),
   189  		path.Join(kvstore.StateToCachePrefix(nodeStore.NodeStorePrefix), rc.name),
   190  		path.Join(kvstore.StateToCachePrefix(serviceStore.ServiceStorePrefix), rc.name),
   191  		path.Join(kvstore.StateToCachePrefix(identityCache.IdentitiesPath), rc.name),
   192  		path.Join(kvstore.StateToCachePrefix(ipcache.IPIdentitiesPath), rc.name),
   193  	}
   194  
   195  	for _, key := range keys {
   196  		if err = rc.localBackend.Delete(ctx, key); err != nil {
   197  			return fmt.Errorf("deleting key %q: %w", key, err)
   198  		}
   199  	}
   200  
   201  	if withGracePeriod {
   202  		// Wait for the grace period before deleting all the cached data. This
   203  		// allows Cilium agents to disconnect in the meanwhile, to reduce the
   204  		// overhead on etcd and prevent issues in case KVStoreMesh is disabled
   205  		// (as the removal of the configurations would cause the draining as
   206  		// well). The cluster configuration is deleted before waiting to prevent
   207  		// new agents from connecting in this time window.
   208  		const drainGracePeriod = 3 * time.Minute
   209  		rc.logger.WithField(logfields.Duration, drainGracePeriod).
   210  			Info("Waiting before removing cached data from kvstore, to allow Cilium agents to disconnect")
   211  		select {
   212  		case <-ctx.Done():
   213  			return ctx.Err()
   214  		case <-rc.clock.After(drainGracePeriod):
   215  			rc.logger.Info("Finished waiting before removing cached data from kvstore")
   216  		}
   217  	}
   218  
   219  	for _, prefix := range prefixes {
   220  		if err = rc.localBackend.DeletePrefix(ctx, prefix+"/"); err != nil {
   221  			return fmt.Errorf("deleting prefix %q: %w", prefix+"/", err)
   222  		}
   223  	}
   224  
   225  	return nil
   226  }
   227  
   228  // waitForConnection waits for a connection to be established to the remote cluster.
   229  // If the connection is not established within the timeout, the remote cluster is
   230  // removed from readiness checks.
   231  func (rc *remoteCluster) waitForConnection(ctx context.Context) {
   232  	select {
   233  	case <-ctx.Done():
   234  	case <-rc.synced.connected:
   235  	case <-time.After(rc.readyTimeout):
   236  		rc.logger.Info("Remote cluster did not connect within timeout, removing from readiness checks")
   237  		for {
   238  			select {
   239  			case <-rc.synced.resources.WaitChannel():
   240  				return
   241  			default:
   242  				rc.synced.resources.Done()
   243  			}
   244  		}
   245  	}
   246  }
   247  
   248  func (rc *remoteCluster) Status() *models.RemoteCluster {
   249  	status := rc.status()
   250  
   251  	status.NumNodes = int64(rc.nodes.watcher.NumEntries())
   252  	status.NumSharedServices = int64(rc.services.watcher.NumEntries())
   253  	status.NumIdentities = int64(rc.identities.watcher.NumEntries())
   254  	status.NumEndpoints = int64(rc.ipcache.watcher.NumEntries())
   255  
   256  	status.Synced = &models.RemoteClusterSynced{
   257  		Nodes:      rc.nodes.watcher.Synced(),
   258  		Services:   rc.services.watcher.Synced(),
   259  		Identities: rc.identities.watcher.Synced(),
   260  		Endpoints:  rc.ipcache.watcher.Synced(),
   261  	}
   262  
   263  	status.Ready = status.Ready &&
   264  		status.Synced.Nodes && status.Synced.Services &&
   265  		status.Synced.Identities && status.Synced.Endpoints
   266  
   267  	return status
   268  }
   269  
   270  type reflector struct {
   271  	watcher store.WatchStore
   272  	syncer  syncer
   273  }
   274  
   275  type syncer struct {
   276  	store.SyncStore
   277  	synced *lock.StoppableWaitGroup
   278  }
   279  
   280  func (o *syncer) OnUpdate(key store.Key) {
   281  	o.UpsertKey(context.Background(), key)
   282  }
   283  
   284  func (o *syncer) OnDelete(key store.NamedKey) {
   285  	o.DeleteKey(context.Background(), key)
   286  }
   287  
   288  func (o *syncer) OnSync(ctx context.Context) {
   289  	o.Synced(ctx, func(context.Context) { o.synced.Done() })
   290  }
   291  
   292  func newReflector(local kvstore.BackendOperations, cluster, prefix string, factory store.Factory, synced *lock.StoppableWaitGroup) reflector {
   293  	synced.Add()
   294  	prefix = kvstore.StateToCachePrefix(prefix)
   295  	syncer := syncer{
   296  		SyncStore: factory.NewSyncStore(cluster, local, path.Join(prefix, cluster),
   297  			store.WSSWithSyncedKeyOverride(prefix)),
   298  		synced: synced,
   299  	}
   300  
   301  	watcher := factory.NewWatchStore(cluster, store.KVPairCreator, &syncer,
   302  		store.RWSWithOnSyncCallback(syncer.OnSync),
   303  	)
   304  
   305  	return reflector{
   306  		syncer:  syncer,
   307  		watcher: watcher,
   308  	}
   309  }
   310  
   311  type synced struct {
   312  	wait.SyncedCommon
   313  	resources *lock.StoppableWaitGroup
   314  	connected chan struct{}
   315  }
   316  
   317  func newSynced() synced {
   318  	return synced{
   319  		SyncedCommon: wait.NewSyncedCommon(),
   320  		resources:    lock.NewStoppableWaitGroup(),
   321  		connected:    make(chan struct{}),
   322  	}
   323  }
   324  
   325  func (s *synced) Resources(ctx context.Context) error {
   326  	return s.Wait(ctx, s.resources.WaitChannel())
   327  }