github.com/cilium/cilium@v1.16.2/pkg/clustermesh/remote_cluster_test.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package clustermesh 5 6 import ( 7 "context" 8 "fmt" 9 "net" 10 "sync" 11 "sync/atomic" 12 "testing" 13 "time" 14 15 "github.com/sirupsen/logrus" 16 "github.com/stretchr/testify/assert" 17 "github.com/stretchr/testify/require" 18 19 "github.com/cilium/cilium/pkg/clustermesh/common" 20 "github.com/cilium/cilium/pkg/clustermesh/types" 21 "github.com/cilium/cilium/pkg/identity" 22 "github.com/cilium/cilium/pkg/identity/cache" 23 "github.com/cilium/cilium/pkg/ipcache" 24 "github.com/cilium/cilium/pkg/kvstore" 25 "github.com/cilium/cilium/pkg/kvstore/store" 26 "github.com/cilium/cilium/pkg/lock" 27 "github.com/cilium/cilium/pkg/metrics" 28 nodeTypes "github.com/cilium/cilium/pkg/node/types" 29 serviceStore "github.com/cilium/cilium/pkg/service/store" 30 "github.com/cilium/cilium/pkg/source" 31 "github.com/cilium/cilium/pkg/testutils" 32 testidentity "github.com/cilium/cilium/pkg/testutils/identity" 33 ) 34 35 // Configure a generous timeout to prevent flakes when running in a noisy CI environment. 36 var ( 37 tick = 10 * time.Millisecond 38 timeout = 5 * time.Second 39 ) 40 41 type remoteEtcdClientWrapper struct { 42 kvstore.BackendOperations 43 name string 44 syncedCanariesWatched bool 45 } 46 47 // Override the ListAndWatch method so that we can track whether the synced canaries prefix has been watched. 48 func (w *remoteEtcdClientWrapper) ListAndWatch(ctx context.Context, prefix string, chanSize int) *kvstore.Watcher { 49 if prefix == fmt.Sprintf("cilium/synced/%s/", w.name) { 50 w.syncedCanariesWatched = true 51 } 52 53 return w.BackendOperations.ListAndWatch(ctx, prefix, chanSize) 54 } 55 56 type fakeIPCache struct{ updates atomic.Int32 } 57 58 func (f *fakeIPCache) Delete(string, source.Source) bool { return false } 59 func (f *fakeIPCache) Upsert(string, net.IP, uint8, *ipcache.K8sMetadata, ipcache.Identity) (bool, error) { 60 f.updates.Add(1) 61 return false, nil 62 } 63 64 func TestRemoteClusterRun(t *testing.T) { 65 testutils.IntegrationTest(t) 66 67 kvstore.SetupDummyWithConfigOpts(t, "etcd", 68 // Explicitly set higher QPS than the default to speedup the test 69 map[string]string{kvstore.EtcdRateLimitOption: "100"}, 70 ) 71 72 tests := []struct { 73 name string 74 srccfg types.CiliumClusterConfig 75 kvs map[string]string 76 }{ 77 { 78 name: "remote cluster has no capabilities", 79 srccfg: types.CiliumClusterConfig{ID: 1}, 80 kvs: map[string]string{ 81 "cilium/state/nodes/v1/foo/bar": `{"name": "bar", "cluster": "foo", "clusterID": 1}`, 82 "cilium/state/services/v1/foo/baz/bar": `{"name": "bar", "namespace": "baz", "cluster": "foo", "clusterID": 1}`, 83 "cilium/state/identities/v1/id/65538": `key1=value1;key2=value2;k8s:io.cilium.k8s.policy.cluster=foo`, 84 "cilium/state/ip/v1/default/1.1.1.1": `{"IP": "1.1.1.1"}`, 85 }, 86 }, 87 { 88 name: "remote cluster supports sync canaries", 89 srccfg: types.CiliumClusterConfig{ 90 ID: 255, 91 Capabilities: types.CiliumClusterConfigCapabilities{ 92 SyncedCanaries: true, 93 MaxConnectedClusters: 255, 94 }, 95 }, 96 kvs: map[string]string{ 97 "cilium/state/nodes/v1/foo/bar": `{"name": "bar", "cluster": "foo", "clusterID": 255}`, 98 "cilium/state/services/v1/foo/baz/bar": `{"name": "bar", "namespace": "baz", "cluster": "foo", "clusterID": 255}`, 99 "cilium/state/identities/v1/id/16711681": `key1=value1;key2=value2;k8s:io.cilium.k8s.policy.cluster=foo`, 100 "cilium/state/ip/v1/default/1.1.1.1": `{"IP": "1.1.1.1"}`, 101 102 "cilium/synced/foo/cilium/state/nodes/v1": "true", 103 "cilium/synced/foo/cilium/state/services/v1": "true", 104 "cilium/synced/foo/cilium/state/identities/v1": "true", 105 "cilium/synced/foo/cilium/state/ip/v1": "true", 106 }, 107 }, 108 { 109 name: "remote cluster supports both sync canaries and cached prefixes", 110 srccfg: types.CiliumClusterConfig{ 111 ID: 255, 112 Capabilities: types.CiliumClusterConfigCapabilities{ 113 SyncedCanaries: true, 114 Cached: true, 115 MaxConnectedClusters: 255, 116 }, 117 }, 118 kvs: map[string]string{ 119 "cilium/cache/nodes/v1/foo/bar": `{"name": "bar", "cluster": "foo", "clusterID": 255}`, 120 "cilium/cache/services/v1/foo/baz/bar": `{"name": "bar", "namespace": "baz", "cluster": "foo", "clusterID": 255}`, 121 "cilium/cache/identities/v1/foo/id/16711681": `key1=value1;key2=value2;k8s:io.cilium.k8s.policy.cluster=foo`, 122 "cilium/cache/ip/v1/foo/1.1.1.1": `{"IP": "1.1.1.1"}`, 123 124 "cilium/synced/foo/cilium/cache/nodes/v1": "true", 125 "cilium/synced/foo/cilium/cache/services/v1": "true", 126 "cilium/synced/foo/cilium/cache/identities/v1": "true", 127 "cilium/synced/foo/cilium/cache/ip/v1": "true", 128 }, 129 }, 130 } 131 132 store := store.NewFactory(store.MetricsProvider()) 133 for _, tt := range tests { 134 t.Run(tt.name, func(t *testing.T) { 135 var wg sync.WaitGroup 136 ctx, cancel := context.WithCancel(context.Background()) 137 138 // The nils are only used by k8s CRD identities. We default to kvstore. 139 allocator := cache.NewCachingIdentityAllocator(&testidentity.IdentityAllocatorOwnerMock{}) 140 <-allocator.InitIdentityAllocator(nil) 141 142 t.Cleanup(func() { 143 cancel() 144 wg.Wait() 145 146 allocator.Close() 147 require.NoError(t, kvstore.Client().DeletePrefix(context.Background(), kvstore.BaseKeyPrefix)) 148 }) 149 150 // Populate the kvstore with the appropriate KV pairs 151 for key, value := range tt.kvs { 152 require.NoErrorf(t, kvstore.Client().Update(ctx, key, []byte(value), false), "Failed to set %s=%s", key, value) 153 } 154 155 var ipc fakeIPCache 156 cm := ClusterMesh{ 157 conf: Configuration{ 158 NodeObserver: newNodesObserver(), 159 IPCache: &ipc, 160 RemoteIdentityWatcher: allocator, 161 ClusterIDsManager: NewClusterMeshUsedIDs(localClusterID), 162 Metrics: NewMetrics(), 163 StoreFactory: store, 164 ClusterInfo: types.ClusterInfo{ID: localClusterID, Name: localClusterName, MaxConnectedClusters: 255}, 165 Logger: logrus.New(), 166 }, 167 globalServices: common.NewGlobalServiceCache(metrics.NoOpGauge), 168 } 169 rc := cm.NewRemoteCluster("foo", nil).(*remoteCluster) 170 ready := make(chan error) 171 172 remoteClient := &remoteEtcdClientWrapper{ 173 BackendOperations: kvstore.Client(), 174 name: "foo", 175 } 176 177 wg.Add(1) 178 go func() { 179 rc.Run(ctx, remoteClient, tt.srccfg, ready) 180 wg.Done() 181 }() 182 183 require.NoError(t, <-ready, "rc.Run() failed") 184 185 // Assert that we correctly watch nodes 186 require.EventuallyWithT(t, func(c *assert.CollectT) { 187 assert.EqualValues(c, 1, rc.remoteNodes.NumEntries()) 188 }, timeout, tick, "Nodes are not watched correctly") 189 190 // Assert that we correctly watch services 191 require.EventuallyWithT(t, func(c *assert.CollectT) { 192 assert.EqualValues(c, 1, rc.remoteServices.NumEntries()) 193 }, timeout, tick, "Services are not watched correctly") 194 195 // Assert that we correctly watch ipcache entries 196 require.EventuallyWithT(t, func(c *assert.CollectT) { 197 assert.EqualValues(c, 1, ipc.updates.Load()) 198 }, timeout, tick, "IPCache entries are not watched correctly") 199 200 // Assert that we correctly watch identities 201 require.EventuallyWithT(t, func(c *assert.CollectT) { 202 rc.mutex.RLock() 203 defer rc.mutex.RUnlock() 204 assert.EqualValues(c, 1, rc.remoteIdentityCache.NumEntries()) 205 }, timeout, tick, "Identities are not watched correctly") 206 207 // Assert that synced canaries have been watched if expected 208 require.Equal(t, tt.srccfg.Capabilities.SyncedCanaries, remoteClient.syncedCanariesWatched) 209 }) 210 } 211 } 212 213 type fakeObserver struct { 214 updates atomic.Uint32 215 deletes atomic.Uint32 216 } 217 218 func (o *fakeObserver) reset() { 219 o.updates.Store(0) 220 o.deletes.Store(0) 221 } 222 223 func (o *fakeObserver) NodeUpdated(_ nodeTypes.Node) { o.updates.Add(1) } 224 func (o *fakeObserver) NodeDeleted(_ nodeTypes.Node) { o.deletes.Add(1) } 225 226 func (o *fakeObserver) MergeExternalServiceUpdate(_ *serviceStore.ClusterService, swg *lock.StoppableWaitGroup) { 227 o.updates.Add(1) 228 swg.Done() 229 } 230 231 func (o *fakeObserver) MergeExternalServiceDelete(_ *serviceStore.ClusterService, swg *lock.StoppableWaitGroup) { 232 o.deletes.Add(1) 233 swg.Done() 234 } 235 236 func (o *fakeObserver) Upsert(string, net.IP, uint8, *ipcache.K8sMetadata, ipcache.Identity) (bool, error) { 237 o.updates.Add(1) 238 return false, nil 239 } 240 241 func (o *fakeObserver) Delete(string, source.Source) bool { 242 o.deletes.Add(1) 243 return false 244 } 245 246 func TestRemoteClusterClusterIDChange(t *testing.T) { 247 const cid1, cid2, cid3 = 10, 20, 30 248 testutils.IntegrationTest(t) 249 250 kvstore.SetupDummyWithConfigOpts(t, "etcd", 251 // Explicitly set higher QPS than the default to speedup the test 252 map[string]string{kvstore.EtcdRateLimitOption: "100"}, 253 ) 254 255 id := func(clusterID uint32) identity.NumericIdentity { return identity.NumericIdentity(clusterID<<16 + 9999) } 256 // Use the KVStoreMesh API to prevent the allocator from thinking that the 257 // identity belongs to the local cluster. 258 kvs := func(clusterID uint32) map[string]string { 259 return map[string]string{ 260 "cilium/cache/nodes/v1/foo/bar": fmt.Sprintf(`{"name": "bar", "cluster": "foo", "clusterID": %d}`, clusterID), 261 "cilium/cache/nodes/v1/foo/baz": fmt.Sprintf(`{"name": "baz", "cluster": "foo", "clusterID": %d}`, clusterID), 262 "cilium/cache/nodes/v1/foo/qux": fmt.Sprintf(`{"name": "qux", "cluster": "foo", "clusterID": %d}`, clusterID), 263 "cilium/cache/services/v1/foo/baz/bar": fmt.Sprintf(`{"name": "bar", "namespace": "baz", "cluster": "foo", "clusterID": %d, "shared": true}`, clusterID), 264 "cilium/cache/services/v1/foo/baz/qux": fmt.Sprintf(`{"name": "qux", "namespace": "baz", "cluster": "foo", "clusterID": %d, "shared": true}`, clusterID), 265 "cilium/cache/ip/v1/foo/1.1.1.1": `{"IP": "1.1.1.1"}`, 266 "cilium/cache/ip/v1/foo/1.1.1.2": `{"IP": "1.1.1.2"}`, 267 "cilium/cache/ip/v1/foo/1.1.1.3": `{"IP": "1.1.1.3"}`, 268 269 fmt.Sprintf("cilium/cache/identities/v1/foo/id/%d", id(clusterID)): `key1=value1;key2=value2;k8s:io.cilium.k8s.policy.cluster=foo`, 270 } 271 } 272 273 store := store.NewFactory(store.MetricsProvider()) 274 var wg sync.WaitGroup 275 ctx := context.Background() 276 277 // The nils are only used by k8s CRD identities. We default to kvstore. 278 allocator := cache.NewCachingIdentityAllocator(&testidentity.IdentityAllocatorOwnerMock{}) 279 <-allocator.InitIdentityAllocator(nil) 280 281 t.Cleanup(func() { 282 allocator.Close() 283 require.NoError(t, kvstore.Client().DeletePrefix(context.Background(), kvstore.BaseKeyPrefix)) 284 }) 285 286 var obs fakeObserver 287 cm := ClusterMesh{ 288 conf: Configuration{ 289 NodeObserver: &obs, 290 ServiceMerger: &obs, 291 IPCache: &obs, 292 RemoteIdentityWatcher: allocator, 293 ClusterIDsManager: NewClusterMeshUsedIDs(localClusterID), 294 Metrics: NewMetrics(), 295 StoreFactory: store, 296 ClusterInfo: types.ClusterInfo{ID: localClusterID, Name: localClusterName, MaxConnectedClusters: 255}, 297 Logger: logrus.New(), 298 }, 299 globalServices: common.NewGlobalServiceCache(metrics.NoOpGauge), 300 } 301 rc := cm.NewRemoteCluster("foo", nil).(*remoteCluster) 302 303 fixture := func(t *testing.T, id uint32, run func(t *testing.T, ready <-chan error)) { 304 ctx, cancel := context.WithCancel(ctx) 305 ready := make(chan error) 306 307 defer func() { 308 cancel() 309 wg.Wait() 310 }() 311 312 wg.Add(1) 313 go func() { 314 cfg := types.CiliumClusterConfig{ID: id, Capabilities: types.CiliumClusterConfigCapabilities{Cached: true}} 315 rc.Run(ctx, kvstore.Client(), cfg, ready) 316 wg.Done() 317 }() 318 319 run(t, ready) 320 } 321 322 fixture(t, cid1, func(t *testing.T, ready <-chan error) { 323 require.NoError(t, <-ready, "rc.Run() failed") 324 325 // Populate the kvstore with the appropriate KV pairs 326 for key, value := range kvs(cid1) { 327 require.NoErrorf(t, kvstore.Client().Update(ctx, key, []byte(value), false), "Failed to set %s=%s", key, value) 328 } 329 330 require.EventuallyWithT(t, func(c *assert.CollectT) { 331 assert.EqualValues(c, 8, obs.updates.Load(), "Upsertions not observed correctly") 332 assert.EqualValues(c, 0, obs.deletes.Load(), "Deletions not observed correctly") 333 assert.NotNil(c, allocator.LookupIdentityByID(ctx, id(cid1)), "Identity upsertion not observed correctly") 334 }, timeout, tick) 335 }) 336 337 // Reconnect the cluster with a different ID, and assert that a synthetic 338 // deletion event has been generated for all known entries. 339 obs.reset() 340 fixture(t, cid2, func(t *testing.T, ready <-chan error) { 341 require.NoError(t, <-ready, "rc.Run() failed") 342 343 require.EventuallyWithT(t, func(c *assert.CollectT) { 344 // The IP entries don't include the ClusterID, hence they are not 345 // filtered out by the validation, but propagated correctly. 346 assert.EqualValues(c, 3, obs.updates.Load(), "Upsertions not observed correctly") 347 assert.EqualValues(c, 8, obs.deletes.Load(), "Deletions not observed correctly") 348 assert.Nil(c, allocator.LookupIdentityByID(ctx, id(cid1)), "Identity deletion not observed correctly") 349 }, timeout, tick) 350 351 // Update the kvstore pairs with the new ClusterID 352 obs.reset() 353 for key, value := range kvs(cid2) { 354 require.NoErrorf(t, kvstore.Client().Update(ctx, key, []byte(value), false), "Failed to set %s=%s", key, value) 355 } 356 357 require.EventuallyWithT(t, func(c *assert.CollectT) { 358 assert.EqualValues(c, 8, obs.updates.Load(), "Upsertions not observed correctly") 359 assert.EqualValues(c, 0, obs.deletes.Load(), "Deletions not observed correctly") 360 assert.NotNil(c, allocator.LookupIdentityByID(ctx, id(cid2)), "Identity upsertion not observed correctly") 361 }, timeout, tick) 362 }) 363 364 // Reconnect the cluster with yet another different ID, that is already reserved. 365 // Assert that a synthetic deletion event has been generated for all known entries 366 // also in this case (i.e., before actually reserving the Cluster ID). 367 obs.reset() 368 cm.conf.ClusterIDsManager.ReserveClusterID(cid3) 369 fixture(t, cid3, func(t *testing.T, ready <-chan error) { 370 require.ErrorContains(t, <-ready, "clusterID 30 is already used", "rc.Run() should have failed") 371 372 require.EventuallyWithT(t, func(c *assert.CollectT) { 373 assert.EqualValues(c, 0, obs.updates.Load(), "Upsertions not observed correctly") 374 assert.EqualValues(c, 8, obs.deletes.Load(), "Deletions not observed correctly") 375 assert.Nil(c, allocator.LookupIdentityByID(ctx, id(cid2)), "Identity deletion not observed correctly") 376 }, timeout, tick) 377 }) 378 } 379 380 func TestIPCacheWatcherOpts(t *testing.T) { 381 tests := []struct { 382 name string 383 config *types.CiliumClusterConfig 384 extra IPCacheWatcherOptsFn 385 expected int 386 }{ 387 { 388 name: "nil config", 389 expected: 0, 390 }, 391 { 392 name: "non-nil config", 393 config: &types.CiliumClusterConfig{}, 394 expected: 1, 395 }, 396 { 397 name: "with extra opts", 398 extra: func(config *types.CiliumClusterConfig) []ipcache.IWOpt { 399 return []ipcache.IWOpt{ipcache.WithClusterID(10), ipcache.WithSelfDeletionProtection()} 400 }, 401 expected: 2, 402 }, 403 } 404 405 for _, tt := range tests { 406 t.Run(tt.name, func(t *testing.T) { 407 rc := remoteCluster{ipCacheWatcherExtraOpts: tt.extra} 408 // Asserting the number of returned options, because it is not 409 // possible to compare them, being functions. 410 assert.Len(t, rc.ipCacheWatcherOpts(tt.config), tt.expected) 411 }) 412 } 413 }