github.com/cilium/cilium@v1.16.2/pkg/clustermesh/kvstoremesh/kvstoremesh.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package kvstoremesh 5 6 import ( 7 "cmp" 8 "context" 9 "slices" 10 "time" 11 12 "github.com/cilium/hive/cell" 13 "github.com/cilium/hive/job" 14 "github.com/sirupsen/logrus" 15 "github.com/spf13/pflag" 16 "k8s.io/utils/clock" 17 18 "github.com/cilium/cilium/api/v1/models" 19 "github.com/cilium/cilium/clustermesh-apiserver/syncstate" 20 "github.com/cilium/cilium/pkg/clustermesh/common" 21 "github.com/cilium/cilium/pkg/clustermesh/types" 22 "github.com/cilium/cilium/pkg/clustermesh/wait" 23 identityCache "github.com/cilium/cilium/pkg/identity/cache" 24 "github.com/cilium/cilium/pkg/ipcache" 25 "github.com/cilium/cilium/pkg/kvstore" 26 "github.com/cilium/cilium/pkg/kvstore/store" 27 "github.com/cilium/cilium/pkg/logging/logfields" 28 nodeStore "github.com/cilium/cilium/pkg/node/store" 29 "github.com/cilium/cilium/pkg/promise" 30 serviceStore "github.com/cilium/cilium/pkg/service/store" 31 ) 32 33 type Config struct { 34 PerClusterReadyTimeout time.Duration 35 GlobalReadyTimeout time.Duration 36 37 DisableDrainOnDisconnection bool 38 } 39 40 var DefaultConfig = Config{ 41 PerClusterReadyTimeout: 15 * time.Second, 42 GlobalReadyTimeout: 10 * time.Minute, 43 } 44 45 func (def Config) Flags(flags *pflag.FlagSet) { 46 flags.Duration("per-cluster-ready-timeout", def.PerClusterReadyTimeout, "Remote clusters will be disregarded for readiness checks if a connection cannot be established within this duration") 47 flags.Duration("global-ready-timeout", def.GlobalReadyTimeout, "KVStoreMesh will be considered ready even if any remote clusters have failed to synchronize within this duration") 48 49 flags.Bool("disable-drain-on-disconnection", def.DisableDrainOnDisconnection, "Do not drain cached data upon cluster disconnection") 50 flags.MarkHidden("disable-drain-on-disconnection") 51 } 52 53 // KVStoreMesh is a cache of multiple remote clusters 54 type KVStoreMesh struct { 55 common common.ClusterMesh 56 config Config 57 58 // backend is the interface to operate the local kvstore 59 backend kvstore.BackendOperations 60 backendPromise promise.Promise[kvstore.BackendOperations] 61 62 storeFactory store.Factory 63 64 logger logrus.FieldLogger 65 66 // clock allows to override the clock for testing purposes 67 clock clock.Clock 68 } 69 70 type params struct { 71 cell.In 72 73 Config 74 75 ClusterInfo types.ClusterInfo 76 CommonConfig common.Config 77 78 BackendPromise promise.Promise[kvstore.BackendOperations] 79 80 Metrics common.Metrics 81 StoreFactory store.Factory 82 83 Logger logrus.FieldLogger 84 } 85 86 func newKVStoreMesh(lc cell.Lifecycle, params params) *KVStoreMesh { 87 km := KVStoreMesh{ 88 config: params.Config, 89 backendPromise: params.BackendPromise, 90 storeFactory: params.StoreFactory, 91 logger: params.Logger, 92 clock: clock.RealClock{}, 93 } 94 km.common = common.NewClusterMesh(common.Configuration{ 95 Config: params.CommonConfig, 96 ClusterInfo: params.ClusterInfo, 97 NewRemoteCluster: km.newRemoteCluster, 98 Metrics: params.Metrics, 99 }) 100 101 lc.Append(&km) 102 103 // The "common" Start hook needs to be executed after that the kvstoremesh one 104 // terminated, to ensure that the backend promise has already been resolved. 105 lc.Append(km.common) 106 107 return &km 108 } 109 110 type SyncWaiterParams struct { 111 cell.In 112 113 KVStoreMesh *KVStoreMesh 114 SyncState syncstate.SyncState 115 Lifecycle cell.Lifecycle 116 JobGroup job.Group 117 Health cell.Health 118 } 119 120 func RegisterSyncWaiter(p SyncWaiterParams) { 121 syncedCallback := p.SyncState.WaitForResource() 122 p.SyncState.Stop() 123 124 p.JobGroup.Add( 125 job.OneShot("kvstoremesh-sync-waiter", func(ctx context.Context, health cell.Health) error { 126 return p.KVStoreMesh.synced(ctx, syncedCallback) 127 }), 128 ) 129 } 130 131 func (km *KVStoreMesh) Start(ctx cell.HookContext) error { 132 backend, err := km.backendPromise.Await(ctx) 133 if err != nil { 134 return err 135 } 136 137 km.backend = backend 138 return nil 139 } 140 141 func (km *KVStoreMesh) Stop(cell.HookContext) error { 142 return nil 143 } 144 145 func (km *KVStoreMesh) newRemoteCluster(name string, status common.StatusFunc) common.RemoteCluster { 146 ctx, cancel := context.WithCancel(context.Background()) 147 148 synced := newSynced() 149 defer synced.resources.Stop() 150 151 rc := &remoteCluster{ 152 name: name, 153 localBackend: km.backend, 154 155 cancel: cancel, 156 157 nodes: newReflector(km.backend, name, nodeStore.NodeStorePrefix, km.storeFactory, synced.resources), 158 services: newReflector(km.backend, name, serviceStore.ServiceStorePrefix, km.storeFactory, synced.resources), 159 identities: newReflector(km.backend, name, identityCache.IdentitiesPath, km.storeFactory, synced.resources), 160 ipcache: newReflector(km.backend, name, ipcache.IPIdentitiesPath, km.storeFactory, synced.resources), 161 status: status, 162 storeFactory: km.storeFactory, 163 synced: synced, 164 readyTimeout: km.config.PerClusterReadyTimeout, 165 logger: km.logger.WithField(logfields.ClusterName, name), 166 clock: km.clock, 167 168 disableDrainOnDisconnection: km.config.DisableDrainOnDisconnection, 169 } 170 171 run := func(fn func(context.Context)) { 172 rc.wg.Add(1) 173 go func() { 174 fn(ctx) 175 rc.wg.Done() 176 }() 177 } 178 179 run(rc.nodes.syncer.Run) 180 run(rc.services.syncer.Run) 181 run(rc.identities.syncer.Run) 182 run(rc.ipcache.syncer.Run) 183 184 run(rc.waitForConnection) 185 186 return rc 187 } 188 189 // synced returns once all remote clusters have been synchronized or the global 190 // timeout has been reached. The given syncCallback is always executed before 191 // the function returns. 192 func (km *KVStoreMesh) synced(ctx context.Context, syncCallback func(context.Context)) error { 193 ctx, cancel := context.WithTimeout(ctx, km.config.GlobalReadyTimeout) 194 defer func() { 195 syncCallback(ctx) 196 cancel() 197 }() 198 199 waiters := make([]wait.Fn, 0) 200 km.common.ForEachRemoteCluster(func(rci common.RemoteCluster) error { 201 rc := rci.(*remoteCluster) 202 waiters = append(waiters, rc.synced.Resources) 203 return nil 204 }) 205 206 if err := wait.ForAll(ctx, waiters); err != nil { 207 km.logger.WithError(err).Info("Failed to wait for synchronization. KVStoreMesh will now handle requests, but some clusters may not have been synchronized.") 208 return err 209 } 210 211 return nil 212 } 213 214 // Status returns the status of the ClusterMesh subsystem 215 func (km *KVStoreMesh) status() []*models.RemoteCluster { 216 var clusters []*models.RemoteCluster 217 218 km.common.ForEachRemoteCluster(func(rci common.RemoteCluster) error { 219 rc := rci.(*remoteCluster) 220 clusters = append(clusters, rc.Status()) 221 return nil 222 }) 223 224 // Sort the remote clusters information to ensure consistent ordering. 225 slices.SortFunc(clusters, 226 func(a, b *models.RemoteCluster) int { return cmp.Compare(a.Name, b.Name) }) 227 228 return clusters 229 }