github.com/cilium/cilium@v1.16.2/pkg/clustermesh/kvstoremesh/remote_cluster.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package kvstoremesh 5 6 import ( 7 "context" 8 "fmt" 9 "path" 10 "sync" 11 "time" 12 13 "github.com/sirupsen/logrus" 14 "k8s.io/utils/clock" 15 16 "github.com/cilium/cilium/api/v1/models" 17 "github.com/cilium/cilium/pkg/clustermesh/common" 18 "github.com/cilium/cilium/pkg/clustermesh/types" 19 cmutils "github.com/cilium/cilium/pkg/clustermesh/utils" 20 "github.com/cilium/cilium/pkg/clustermesh/wait" 21 identityCache "github.com/cilium/cilium/pkg/identity/cache" 22 "github.com/cilium/cilium/pkg/ipcache" 23 "github.com/cilium/cilium/pkg/kvstore" 24 "github.com/cilium/cilium/pkg/kvstore/store" 25 "github.com/cilium/cilium/pkg/lock" 26 "github.com/cilium/cilium/pkg/logging/logfields" 27 nodeStore "github.com/cilium/cilium/pkg/node/store" 28 serviceStore "github.com/cilium/cilium/pkg/service/store" 29 ) 30 31 // remoteCluster represents a remote cluster other than the local one this 32 // service is running in 33 type remoteCluster struct { 34 name string 35 36 localBackend kvstore.BackendOperations 37 38 nodes reflector 39 services reflector 40 identities reflector 41 ipcache reflector 42 43 // status is the function which fills the common part of the status. 44 status common.StatusFunc 45 46 cancel context.CancelFunc 47 wg sync.WaitGroup 48 49 storeFactory store.Factory 50 51 // synced tracks the initial synchronization of the remote cluster. 52 synced synced 53 // readyTimeout is the duration to wait for a connection to be established 54 // before removing the cluster from readiness checks. 55 readyTimeout time.Duration 56 57 // disableDrainOnDisconnection disables the removal of cached data upon 58 // cluster disconnection. 59 disableDrainOnDisconnection bool 60 61 logger logrus.FieldLogger 62 clock clock.Clock 63 } 64 65 func (rc *remoteCluster) Run(ctx context.Context, backend kvstore.BackendOperations, srccfg types.CiliumClusterConfig, ready chan<- error) { 66 // Closing the synced.connected channel cancels the timeout goroutine. 67 // Ensure we do not attempt to close the channel more than once. 68 select { 69 case <-rc.synced.connected: 70 default: 71 close(rc.synced.connected) 72 } 73 74 dstcfg := types.CiliumClusterConfig{ 75 ID: srccfg.ID, 76 Capabilities: types.CiliumClusterConfigCapabilities{ 77 SyncedCanaries: true, 78 Cached: true, 79 MaxConnectedClusters: srccfg.Capabilities.MaxConnectedClusters, 80 }, 81 } 82 83 stopAndWait, err := cmutils.EnforceClusterConfig(ctx, rc.name, dstcfg, rc.localBackend, rc.logger) 84 defer stopAndWait() 85 if err != nil { 86 ready <- fmt.Errorf("failed to propagate cluster configuration: %w", err) 87 close(ready) 88 return 89 } 90 91 var mgr store.WatchStoreManager 92 if srccfg.Capabilities.SyncedCanaries { 93 mgr = rc.storeFactory.NewWatchStoreManager(backend, rc.name) 94 } else { 95 mgr = store.NewWatchStoreManagerImmediate(rc.name) 96 } 97 98 adapter := func(prefix string) string { return prefix } 99 if srccfg.Capabilities.Cached { 100 adapter = kvstore.StateToCachePrefix 101 } 102 103 mgr.Register(adapter(nodeStore.NodeStorePrefix), func(ctx context.Context) { 104 rc.nodes.watcher.Watch(ctx, backend, path.Join(adapter(nodeStore.NodeStorePrefix), rc.name)) 105 }) 106 107 mgr.Register(adapter(serviceStore.ServiceStorePrefix), func(ctx context.Context) { 108 rc.services.watcher.Watch(ctx, backend, path.Join(adapter(serviceStore.ServiceStorePrefix), rc.name)) 109 }) 110 111 mgr.Register(adapter(ipcache.IPIdentitiesPath), func(ctx context.Context) { 112 suffix := ipcache.DefaultAddressSpace 113 if srccfg.Capabilities.Cached { 114 suffix = rc.name 115 } 116 117 rc.ipcache.watcher.Watch(ctx, backend, path.Join(adapter(ipcache.IPIdentitiesPath), suffix)) 118 }) 119 120 mgr.Register(adapter(identityCache.IdentitiesPath), func(ctx context.Context) { 121 var suffix string 122 if srccfg.Capabilities.Cached { 123 suffix = rc.name 124 } 125 126 rc.identities.watcher.Watch(ctx, backend, path.Join(adapter(identityCache.IdentitiesPath), suffix)) 127 }) 128 129 close(ready) 130 mgr.Run(ctx) 131 } 132 133 func (rc *remoteCluster) Stop() { 134 rc.cancel() 135 rc.synced.Stop() 136 rc.wg.Wait() 137 } 138 139 func (rc *remoteCluster) Remove(ctx context.Context) { 140 if rc.disableDrainOnDisconnection { 141 rc.logger.Warning("Remote cluster disconnected, but cached data removal is disabled. " + 142 "Reconnecting to the same cluster without first restarting KVStoreMesh may lead to inconsistencies") 143 return 144 } 145 146 const retries = 5 147 var ( 148 retry = 0 149 backoff = 2 * time.Second 150 ) 151 152 rc.logger.Info("Remote cluster disconnected: draining cached data") 153 for { 154 err := rc.drain(ctx, retry == 0) 155 switch { 156 case err == nil: 157 rc.logger.Info("Successfully removed all cached data from kvstore") 158 return 159 case ctx.Err() != nil: 160 return 161 case retry == retries: 162 rc.logger.WithError(err).Error( 163 "Failed to remove cached data from kvstore, despite retries. Reconnecting to the " + 164 "same cluster without first restarting KVStoreMesh may lead to inconsistencies") 165 return 166 } 167 168 rc.logger.WithError(err).Warning("Failed to remove cached data from kvstore, retrying") 169 select { 170 case <-rc.clock.After(backoff): 171 retry++ 172 backoff *= 2 173 case <-ctx.Done(): 174 return 175 } 176 } 177 } 178 179 // drain drains the cached data from the local kvstore. The cluster configuration 180 // is removed as first step, to prevent bootstrapping agents from connecting while 181 // removing the rest of the cached data. Indeed, there's no point in retrieving 182 // incomplete data, and it is expected that agents will be disconnecting as well. 183 func (rc *remoteCluster) drain(ctx context.Context, withGracePeriod bool) (err error) { 184 keys := []string{ 185 path.Join(kvstore.ClusterConfigPrefix, rc.name), 186 } 187 prefixes := []string{ 188 path.Join(kvstore.SyncedPrefix, rc.name), 189 path.Join(kvstore.StateToCachePrefix(nodeStore.NodeStorePrefix), rc.name), 190 path.Join(kvstore.StateToCachePrefix(serviceStore.ServiceStorePrefix), rc.name), 191 path.Join(kvstore.StateToCachePrefix(identityCache.IdentitiesPath), rc.name), 192 path.Join(kvstore.StateToCachePrefix(ipcache.IPIdentitiesPath), rc.name), 193 } 194 195 for _, key := range keys { 196 if err = rc.localBackend.Delete(ctx, key); err != nil { 197 return fmt.Errorf("deleting key %q: %w", key, err) 198 } 199 } 200 201 if withGracePeriod { 202 // Wait for the grace period before deleting all the cached data. This 203 // allows Cilium agents to disconnect in the meanwhile, to reduce the 204 // overhead on etcd and prevent issues in case KVStoreMesh is disabled 205 // (as the removal of the configurations would cause the draining as 206 // well). The cluster configuration is deleted before waiting to prevent 207 // new agents from connecting in this time window. 208 const drainGracePeriod = 3 * time.Minute 209 rc.logger.WithField(logfields.Duration, drainGracePeriod). 210 Info("Waiting before removing cached data from kvstore, to allow Cilium agents to disconnect") 211 select { 212 case <-ctx.Done(): 213 return ctx.Err() 214 case <-rc.clock.After(drainGracePeriod): 215 rc.logger.Info("Finished waiting before removing cached data from kvstore") 216 } 217 } 218 219 for _, prefix := range prefixes { 220 if err = rc.localBackend.DeletePrefix(ctx, prefix+"/"); err != nil { 221 return fmt.Errorf("deleting prefix %q: %w", prefix+"/", err) 222 } 223 } 224 225 return nil 226 } 227 228 // waitForConnection waits for a connection to be established to the remote cluster. 229 // If the connection is not established within the timeout, the remote cluster is 230 // removed from readiness checks. 231 func (rc *remoteCluster) waitForConnection(ctx context.Context) { 232 select { 233 case <-ctx.Done(): 234 case <-rc.synced.connected: 235 case <-time.After(rc.readyTimeout): 236 rc.logger.Info("Remote cluster did not connect within timeout, removing from readiness checks") 237 for { 238 select { 239 case <-rc.synced.resources.WaitChannel(): 240 return 241 default: 242 rc.synced.resources.Done() 243 } 244 } 245 } 246 } 247 248 func (rc *remoteCluster) Status() *models.RemoteCluster { 249 status := rc.status() 250 251 status.NumNodes = int64(rc.nodes.watcher.NumEntries()) 252 status.NumSharedServices = int64(rc.services.watcher.NumEntries()) 253 status.NumIdentities = int64(rc.identities.watcher.NumEntries()) 254 status.NumEndpoints = int64(rc.ipcache.watcher.NumEntries()) 255 256 status.Synced = &models.RemoteClusterSynced{ 257 Nodes: rc.nodes.watcher.Synced(), 258 Services: rc.services.watcher.Synced(), 259 Identities: rc.identities.watcher.Synced(), 260 Endpoints: rc.ipcache.watcher.Synced(), 261 } 262 263 status.Ready = status.Ready && 264 status.Synced.Nodes && status.Synced.Services && 265 status.Synced.Identities && status.Synced.Endpoints 266 267 return status 268 } 269 270 type reflector struct { 271 watcher store.WatchStore 272 syncer syncer 273 } 274 275 type syncer struct { 276 store.SyncStore 277 synced *lock.StoppableWaitGroup 278 } 279 280 func (o *syncer) OnUpdate(key store.Key) { 281 o.UpsertKey(context.Background(), key) 282 } 283 284 func (o *syncer) OnDelete(key store.NamedKey) { 285 o.DeleteKey(context.Background(), key) 286 } 287 288 func (o *syncer) OnSync(ctx context.Context) { 289 o.Synced(ctx, func(context.Context) { o.synced.Done() }) 290 } 291 292 func newReflector(local kvstore.BackendOperations, cluster, prefix string, factory store.Factory, synced *lock.StoppableWaitGroup) reflector { 293 synced.Add() 294 prefix = kvstore.StateToCachePrefix(prefix) 295 syncer := syncer{ 296 SyncStore: factory.NewSyncStore(cluster, local, path.Join(prefix, cluster), 297 store.WSSWithSyncedKeyOverride(prefix)), 298 synced: synced, 299 } 300 301 watcher := factory.NewWatchStore(cluster, store.KVPairCreator, &syncer, 302 store.RWSWithOnSyncCallback(syncer.OnSync), 303 ) 304 305 return reflector{ 306 syncer: syncer, 307 watcher: watcher, 308 } 309 } 310 311 type synced struct { 312 wait.SyncedCommon 313 resources *lock.StoppableWaitGroup 314 connected chan struct{} 315 } 316 317 func newSynced() synced { 318 return synced{ 319 SyncedCommon: wait.NewSyncedCommon(), 320 resources: lock.NewStoppableWaitGroup(), 321 connected: make(chan struct{}), 322 } 323 } 324 325 func (s *synced) Resources(ctx context.Context) error { 326 return s.Wait(ctx, s.resources.WaitChannel()) 327 }