github.com/cilium/cilium@v1.16.2/pkg/clustermesh/kvstoremesh/kvstoremesh.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package kvstoremesh
     5  
     6  import (
     7  	"cmp"
     8  	"context"
     9  	"slices"
    10  	"time"
    11  
    12  	"github.com/cilium/hive/cell"
    13  	"github.com/cilium/hive/job"
    14  	"github.com/sirupsen/logrus"
    15  	"github.com/spf13/pflag"
    16  	"k8s.io/utils/clock"
    17  
    18  	"github.com/cilium/cilium/api/v1/models"
    19  	"github.com/cilium/cilium/clustermesh-apiserver/syncstate"
    20  	"github.com/cilium/cilium/pkg/clustermesh/common"
    21  	"github.com/cilium/cilium/pkg/clustermesh/types"
    22  	"github.com/cilium/cilium/pkg/clustermesh/wait"
    23  	identityCache "github.com/cilium/cilium/pkg/identity/cache"
    24  	"github.com/cilium/cilium/pkg/ipcache"
    25  	"github.com/cilium/cilium/pkg/kvstore"
    26  	"github.com/cilium/cilium/pkg/kvstore/store"
    27  	"github.com/cilium/cilium/pkg/logging/logfields"
    28  	nodeStore "github.com/cilium/cilium/pkg/node/store"
    29  	"github.com/cilium/cilium/pkg/promise"
    30  	serviceStore "github.com/cilium/cilium/pkg/service/store"
    31  )
    32  
    33  type Config struct {
    34  	PerClusterReadyTimeout time.Duration
    35  	GlobalReadyTimeout     time.Duration
    36  
    37  	DisableDrainOnDisconnection bool
    38  }
    39  
    40  var DefaultConfig = Config{
    41  	PerClusterReadyTimeout: 15 * time.Second,
    42  	GlobalReadyTimeout:     10 * time.Minute,
    43  }
    44  
    45  func (def Config) Flags(flags *pflag.FlagSet) {
    46  	flags.Duration("per-cluster-ready-timeout", def.PerClusterReadyTimeout, "Remote clusters will be disregarded for readiness checks if a connection cannot be established within this duration")
    47  	flags.Duration("global-ready-timeout", def.GlobalReadyTimeout, "KVStoreMesh will be considered ready even if any remote clusters have failed to synchronize within this duration")
    48  
    49  	flags.Bool("disable-drain-on-disconnection", def.DisableDrainOnDisconnection, "Do not drain cached data upon cluster disconnection")
    50  	flags.MarkHidden("disable-drain-on-disconnection")
    51  }
    52  
    53  // KVStoreMesh is a cache of multiple remote clusters
    54  type KVStoreMesh struct {
    55  	common common.ClusterMesh
    56  	config Config
    57  
    58  	// backend is the interface to operate the local kvstore
    59  	backend        kvstore.BackendOperations
    60  	backendPromise promise.Promise[kvstore.BackendOperations]
    61  
    62  	storeFactory store.Factory
    63  
    64  	logger logrus.FieldLogger
    65  
    66  	// clock allows to override the clock for testing purposes
    67  	clock clock.Clock
    68  }
    69  
    70  type params struct {
    71  	cell.In
    72  
    73  	Config
    74  
    75  	ClusterInfo  types.ClusterInfo
    76  	CommonConfig common.Config
    77  
    78  	BackendPromise promise.Promise[kvstore.BackendOperations]
    79  
    80  	Metrics      common.Metrics
    81  	StoreFactory store.Factory
    82  
    83  	Logger logrus.FieldLogger
    84  }
    85  
    86  func newKVStoreMesh(lc cell.Lifecycle, params params) *KVStoreMesh {
    87  	km := KVStoreMesh{
    88  		config:         params.Config,
    89  		backendPromise: params.BackendPromise,
    90  		storeFactory:   params.StoreFactory,
    91  		logger:         params.Logger,
    92  		clock:          clock.RealClock{},
    93  	}
    94  	km.common = common.NewClusterMesh(common.Configuration{
    95  		Config:           params.CommonConfig,
    96  		ClusterInfo:      params.ClusterInfo,
    97  		NewRemoteCluster: km.newRemoteCluster,
    98  		Metrics:          params.Metrics,
    99  	})
   100  
   101  	lc.Append(&km)
   102  
   103  	// The "common" Start hook needs to be executed after that the kvstoremesh one
   104  	// terminated, to ensure that the backend promise has already been resolved.
   105  	lc.Append(km.common)
   106  
   107  	return &km
   108  }
   109  
   110  type SyncWaiterParams struct {
   111  	cell.In
   112  
   113  	KVStoreMesh *KVStoreMesh
   114  	SyncState   syncstate.SyncState
   115  	Lifecycle   cell.Lifecycle
   116  	JobGroup    job.Group
   117  	Health      cell.Health
   118  }
   119  
   120  func RegisterSyncWaiter(p SyncWaiterParams) {
   121  	syncedCallback := p.SyncState.WaitForResource()
   122  	p.SyncState.Stop()
   123  
   124  	p.JobGroup.Add(
   125  		job.OneShot("kvstoremesh-sync-waiter", func(ctx context.Context, health cell.Health) error {
   126  			return p.KVStoreMesh.synced(ctx, syncedCallback)
   127  		}),
   128  	)
   129  }
   130  
   131  func (km *KVStoreMesh) Start(ctx cell.HookContext) error {
   132  	backend, err := km.backendPromise.Await(ctx)
   133  	if err != nil {
   134  		return err
   135  	}
   136  
   137  	km.backend = backend
   138  	return nil
   139  }
   140  
   141  func (km *KVStoreMesh) Stop(cell.HookContext) error {
   142  	return nil
   143  }
   144  
   145  func (km *KVStoreMesh) newRemoteCluster(name string, status common.StatusFunc) common.RemoteCluster {
   146  	ctx, cancel := context.WithCancel(context.Background())
   147  
   148  	synced := newSynced()
   149  	defer synced.resources.Stop()
   150  
   151  	rc := &remoteCluster{
   152  		name:         name,
   153  		localBackend: km.backend,
   154  
   155  		cancel: cancel,
   156  
   157  		nodes:        newReflector(km.backend, name, nodeStore.NodeStorePrefix, km.storeFactory, synced.resources),
   158  		services:     newReflector(km.backend, name, serviceStore.ServiceStorePrefix, km.storeFactory, synced.resources),
   159  		identities:   newReflector(km.backend, name, identityCache.IdentitiesPath, km.storeFactory, synced.resources),
   160  		ipcache:      newReflector(km.backend, name, ipcache.IPIdentitiesPath, km.storeFactory, synced.resources),
   161  		status:       status,
   162  		storeFactory: km.storeFactory,
   163  		synced:       synced,
   164  		readyTimeout: km.config.PerClusterReadyTimeout,
   165  		logger:       km.logger.WithField(logfields.ClusterName, name),
   166  		clock:        km.clock,
   167  
   168  		disableDrainOnDisconnection: km.config.DisableDrainOnDisconnection,
   169  	}
   170  
   171  	run := func(fn func(context.Context)) {
   172  		rc.wg.Add(1)
   173  		go func() {
   174  			fn(ctx)
   175  			rc.wg.Done()
   176  		}()
   177  	}
   178  
   179  	run(rc.nodes.syncer.Run)
   180  	run(rc.services.syncer.Run)
   181  	run(rc.identities.syncer.Run)
   182  	run(rc.ipcache.syncer.Run)
   183  
   184  	run(rc.waitForConnection)
   185  
   186  	return rc
   187  }
   188  
   189  // synced returns once all remote clusters have been synchronized or the global
   190  // timeout has been reached. The given syncCallback is always executed before
   191  // the function returns.
   192  func (km *KVStoreMesh) synced(ctx context.Context, syncCallback func(context.Context)) error {
   193  	ctx, cancel := context.WithTimeout(ctx, km.config.GlobalReadyTimeout)
   194  	defer func() {
   195  		syncCallback(ctx)
   196  		cancel()
   197  	}()
   198  
   199  	waiters := make([]wait.Fn, 0)
   200  	km.common.ForEachRemoteCluster(func(rci common.RemoteCluster) error {
   201  		rc := rci.(*remoteCluster)
   202  		waiters = append(waiters, rc.synced.Resources)
   203  		return nil
   204  	})
   205  
   206  	if err := wait.ForAll(ctx, waiters); err != nil {
   207  		km.logger.WithError(err).Info("Failed to wait for synchronization. KVStoreMesh will now handle requests, but some clusters may not have been synchronized.")
   208  		return err
   209  	}
   210  
   211  	return nil
   212  }
   213  
   214  // Status returns the status of the ClusterMesh subsystem
   215  func (km *KVStoreMesh) status() []*models.RemoteCluster {
   216  	var clusters []*models.RemoteCluster
   217  
   218  	km.common.ForEachRemoteCluster(func(rci common.RemoteCluster) error {
   219  		rc := rci.(*remoteCluster)
   220  		clusters = append(clusters, rc.Status())
   221  		return nil
   222  	})
   223  
   224  	// Sort the remote clusters information to ensure consistent ordering.
   225  	slices.SortFunc(clusters,
   226  		func(a, b *models.RemoteCluster) int { return cmp.Compare(a.Name, b.Name) })
   227  
   228  	return clusters
   229  }