github.com/cilium/cilium@v1.16.2/pkg/clustermesh/remote_cluster_test.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package clustermesh
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"net"
    10  	"sync"
    11  	"sync/atomic"
    12  	"testing"
    13  	"time"
    14  
    15  	"github.com/sirupsen/logrus"
    16  	"github.com/stretchr/testify/assert"
    17  	"github.com/stretchr/testify/require"
    18  
    19  	"github.com/cilium/cilium/pkg/clustermesh/common"
    20  	"github.com/cilium/cilium/pkg/clustermesh/types"
    21  	"github.com/cilium/cilium/pkg/identity"
    22  	"github.com/cilium/cilium/pkg/identity/cache"
    23  	"github.com/cilium/cilium/pkg/ipcache"
    24  	"github.com/cilium/cilium/pkg/kvstore"
    25  	"github.com/cilium/cilium/pkg/kvstore/store"
    26  	"github.com/cilium/cilium/pkg/lock"
    27  	"github.com/cilium/cilium/pkg/metrics"
    28  	nodeTypes "github.com/cilium/cilium/pkg/node/types"
    29  	serviceStore "github.com/cilium/cilium/pkg/service/store"
    30  	"github.com/cilium/cilium/pkg/source"
    31  	"github.com/cilium/cilium/pkg/testutils"
    32  	testidentity "github.com/cilium/cilium/pkg/testutils/identity"
    33  )
    34  
    35  // Configure a generous timeout to prevent flakes when running in a noisy CI environment.
    36  var (
    37  	tick    = 10 * time.Millisecond
    38  	timeout = 5 * time.Second
    39  )
    40  
    41  type remoteEtcdClientWrapper struct {
    42  	kvstore.BackendOperations
    43  	name                  string
    44  	syncedCanariesWatched bool
    45  }
    46  
    47  // Override the ListAndWatch method so that we can track whether the synced canaries prefix has been watched.
    48  func (w *remoteEtcdClientWrapper) ListAndWatch(ctx context.Context, prefix string, chanSize int) *kvstore.Watcher {
    49  	if prefix == fmt.Sprintf("cilium/synced/%s/", w.name) {
    50  		w.syncedCanariesWatched = true
    51  	}
    52  
    53  	return w.BackendOperations.ListAndWatch(ctx, prefix, chanSize)
    54  }
    55  
    56  type fakeIPCache struct{ updates atomic.Int32 }
    57  
    58  func (f *fakeIPCache) Delete(string, source.Source) bool { return false }
    59  func (f *fakeIPCache) Upsert(string, net.IP, uint8, *ipcache.K8sMetadata, ipcache.Identity) (bool, error) {
    60  	f.updates.Add(1)
    61  	return false, nil
    62  }
    63  
    64  func TestRemoteClusterRun(t *testing.T) {
    65  	testutils.IntegrationTest(t)
    66  
    67  	kvstore.SetupDummyWithConfigOpts(t, "etcd",
    68  		// Explicitly set higher QPS than the default to speedup the test
    69  		map[string]string{kvstore.EtcdRateLimitOption: "100"},
    70  	)
    71  
    72  	tests := []struct {
    73  		name   string
    74  		srccfg types.CiliumClusterConfig
    75  		kvs    map[string]string
    76  	}{
    77  		{
    78  			name:   "remote cluster has no capabilities",
    79  			srccfg: types.CiliumClusterConfig{ID: 1},
    80  			kvs: map[string]string{
    81  				"cilium/state/nodes/v1/foo/bar":        `{"name": "bar", "cluster": "foo", "clusterID": 1}`,
    82  				"cilium/state/services/v1/foo/baz/bar": `{"name": "bar", "namespace": "baz", "cluster": "foo", "clusterID": 1}`,
    83  				"cilium/state/identities/v1/id/65538":  `key1=value1;key2=value2;k8s:io.cilium.k8s.policy.cluster=foo`,
    84  				"cilium/state/ip/v1/default/1.1.1.1":   `{"IP": "1.1.1.1"}`,
    85  			},
    86  		},
    87  		{
    88  			name: "remote cluster supports sync canaries",
    89  			srccfg: types.CiliumClusterConfig{
    90  				ID: 255,
    91  				Capabilities: types.CiliumClusterConfigCapabilities{
    92  					SyncedCanaries:       true,
    93  					MaxConnectedClusters: 255,
    94  				},
    95  			},
    96  			kvs: map[string]string{
    97  				"cilium/state/nodes/v1/foo/bar":          `{"name": "bar", "cluster": "foo", "clusterID": 255}`,
    98  				"cilium/state/services/v1/foo/baz/bar":   `{"name": "bar", "namespace": "baz", "cluster": "foo", "clusterID": 255}`,
    99  				"cilium/state/identities/v1/id/16711681": `key1=value1;key2=value2;k8s:io.cilium.k8s.policy.cluster=foo`,
   100  				"cilium/state/ip/v1/default/1.1.1.1":     `{"IP": "1.1.1.1"}`,
   101  
   102  				"cilium/synced/foo/cilium/state/nodes/v1":      "true",
   103  				"cilium/synced/foo/cilium/state/services/v1":   "true",
   104  				"cilium/synced/foo/cilium/state/identities/v1": "true",
   105  				"cilium/synced/foo/cilium/state/ip/v1":         "true",
   106  			},
   107  		},
   108  		{
   109  			name: "remote cluster supports both sync canaries and cached prefixes",
   110  			srccfg: types.CiliumClusterConfig{
   111  				ID: 255,
   112  				Capabilities: types.CiliumClusterConfigCapabilities{
   113  					SyncedCanaries:       true,
   114  					Cached:               true,
   115  					MaxConnectedClusters: 255,
   116  				},
   117  			},
   118  			kvs: map[string]string{
   119  				"cilium/cache/nodes/v1/foo/bar":              `{"name": "bar", "cluster": "foo", "clusterID": 255}`,
   120  				"cilium/cache/services/v1/foo/baz/bar":       `{"name": "bar", "namespace": "baz", "cluster": "foo", "clusterID": 255}`,
   121  				"cilium/cache/identities/v1/foo/id/16711681": `key1=value1;key2=value2;k8s:io.cilium.k8s.policy.cluster=foo`,
   122  				"cilium/cache/ip/v1/foo/1.1.1.1":             `{"IP": "1.1.1.1"}`,
   123  
   124  				"cilium/synced/foo/cilium/cache/nodes/v1":      "true",
   125  				"cilium/synced/foo/cilium/cache/services/v1":   "true",
   126  				"cilium/synced/foo/cilium/cache/identities/v1": "true",
   127  				"cilium/synced/foo/cilium/cache/ip/v1":         "true",
   128  			},
   129  		},
   130  	}
   131  
   132  	store := store.NewFactory(store.MetricsProvider())
   133  	for _, tt := range tests {
   134  		t.Run(tt.name, func(t *testing.T) {
   135  			var wg sync.WaitGroup
   136  			ctx, cancel := context.WithCancel(context.Background())
   137  
   138  			// The nils are only used by k8s CRD identities. We default to kvstore.
   139  			allocator := cache.NewCachingIdentityAllocator(&testidentity.IdentityAllocatorOwnerMock{})
   140  			<-allocator.InitIdentityAllocator(nil)
   141  
   142  			t.Cleanup(func() {
   143  				cancel()
   144  				wg.Wait()
   145  
   146  				allocator.Close()
   147  				require.NoError(t, kvstore.Client().DeletePrefix(context.Background(), kvstore.BaseKeyPrefix))
   148  			})
   149  
   150  			// Populate the kvstore with the appropriate KV pairs
   151  			for key, value := range tt.kvs {
   152  				require.NoErrorf(t, kvstore.Client().Update(ctx, key, []byte(value), false), "Failed to set %s=%s", key, value)
   153  			}
   154  
   155  			var ipc fakeIPCache
   156  			cm := ClusterMesh{
   157  				conf: Configuration{
   158  					NodeObserver:          newNodesObserver(),
   159  					IPCache:               &ipc,
   160  					RemoteIdentityWatcher: allocator,
   161  					ClusterIDsManager:     NewClusterMeshUsedIDs(localClusterID),
   162  					Metrics:               NewMetrics(),
   163  					StoreFactory:          store,
   164  					ClusterInfo:           types.ClusterInfo{ID: localClusterID, Name: localClusterName, MaxConnectedClusters: 255},
   165  					Logger:                logrus.New(),
   166  				},
   167  				globalServices: common.NewGlobalServiceCache(metrics.NoOpGauge),
   168  			}
   169  			rc := cm.NewRemoteCluster("foo", nil).(*remoteCluster)
   170  			ready := make(chan error)
   171  
   172  			remoteClient := &remoteEtcdClientWrapper{
   173  				BackendOperations: kvstore.Client(),
   174  				name:              "foo",
   175  			}
   176  
   177  			wg.Add(1)
   178  			go func() {
   179  				rc.Run(ctx, remoteClient, tt.srccfg, ready)
   180  				wg.Done()
   181  			}()
   182  
   183  			require.NoError(t, <-ready, "rc.Run() failed")
   184  
   185  			// Assert that we correctly watch nodes
   186  			require.EventuallyWithT(t, func(c *assert.CollectT) {
   187  				assert.EqualValues(c, 1, rc.remoteNodes.NumEntries())
   188  			}, timeout, tick, "Nodes are not watched correctly")
   189  
   190  			// Assert that we correctly watch services
   191  			require.EventuallyWithT(t, func(c *assert.CollectT) {
   192  				assert.EqualValues(c, 1, rc.remoteServices.NumEntries())
   193  			}, timeout, tick, "Services are not watched correctly")
   194  
   195  			// Assert that we correctly watch ipcache entries
   196  			require.EventuallyWithT(t, func(c *assert.CollectT) {
   197  				assert.EqualValues(c, 1, ipc.updates.Load())
   198  			}, timeout, tick, "IPCache entries are not watched correctly")
   199  
   200  			// Assert that we correctly watch identities
   201  			require.EventuallyWithT(t, func(c *assert.CollectT) {
   202  				rc.mutex.RLock()
   203  				defer rc.mutex.RUnlock()
   204  				assert.EqualValues(c, 1, rc.remoteIdentityCache.NumEntries())
   205  			}, timeout, tick, "Identities are not watched correctly")
   206  
   207  			// Assert that synced canaries have been watched if expected
   208  			require.Equal(t, tt.srccfg.Capabilities.SyncedCanaries, remoteClient.syncedCanariesWatched)
   209  		})
   210  	}
   211  }
   212  
   213  type fakeObserver struct {
   214  	updates atomic.Uint32
   215  	deletes atomic.Uint32
   216  }
   217  
   218  func (o *fakeObserver) reset() {
   219  	o.updates.Store(0)
   220  	o.deletes.Store(0)
   221  }
   222  
   223  func (o *fakeObserver) NodeUpdated(_ nodeTypes.Node) { o.updates.Add(1) }
   224  func (o *fakeObserver) NodeDeleted(_ nodeTypes.Node) { o.deletes.Add(1) }
   225  
   226  func (o *fakeObserver) MergeExternalServiceUpdate(_ *serviceStore.ClusterService, swg *lock.StoppableWaitGroup) {
   227  	o.updates.Add(1)
   228  	swg.Done()
   229  }
   230  
   231  func (o *fakeObserver) MergeExternalServiceDelete(_ *serviceStore.ClusterService, swg *lock.StoppableWaitGroup) {
   232  	o.deletes.Add(1)
   233  	swg.Done()
   234  }
   235  
   236  func (o *fakeObserver) Upsert(string, net.IP, uint8, *ipcache.K8sMetadata, ipcache.Identity) (bool, error) {
   237  	o.updates.Add(1)
   238  	return false, nil
   239  }
   240  
   241  func (o *fakeObserver) Delete(string, source.Source) bool {
   242  	o.deletes.Add(1)
   243  	return false
   244  }
   245  
   246  func TestRemoteClusterClusterIDChange(t *testing.T) {
   247  	const cid1, cid2, cid3 = 10, 20, 30
   248  	testutils.IntegrationTest(t)
   249  
   250  	kvstore.SetupDummyWithConfigOpts(t, "etcd",
   251  		// Explicitly set higher QPS than the default to speedup the test
   252  		map[string]string{kvstore.EtcdRateLimitOption: "100"},
   253  	)
   254  
   255  	id := func(clusterID uint32) identity.NumericIdentity { return identity.NumericIdentity(clusterID<<16 + 9999) }
   256  	// Use the KVStoreMesh API to prevent the allocator from thinking that the
   257  	// identity belongs to the local cluster.
   258  	kvs := func(clusterID uint32) map[string]string {
   259  		return map[string]string{
   260  			"cilium/cache/nodes/v1/foo/bar":        fmt.Sprintf(`{"name": "bar", "cluster": "foo", "clusterID": %d}`, clusterID),
   261  			"cilium/cache/nodes/v1/foo/baz":        fmt.Sprintf(`{"name": "baz", "cluster": "foo", "clusterID": %d}`, clusterID),
   262  			"cilium/cache/nodes/v1/foo/qux":        fmt.Sprintf(`{"name": "qux", "cluster": "foo", "clusterID": %d}`, clusterID),
   263  			"cilium/cache/services/v1/foo/baz/bar": fmt.Sprintf(`{"name": "bar", "namespace": "baz", "cluster": "foo", "clusterID": %d, "shared": true}`, clusterID),
   264  			"cilium/cache/services/v1/foo/baz/qux": fmt.Sprintf(`{"name": "qux", "namespace": "baz", "cluster": "foo", "clusterID": %d, "shared": true}`, clusterID),
   265  			"cilium/cache/ip/v1/foo/1.1.1.1":       `{"IP": "1.1.1.1"}`,
   266  			"cilium/cache/ip/v1/foo/1.1.1.2":       `{"IP": "1.1.1.2"}`,
   267  			"cilium/cache/ip/v1/foo/1.1.1.3":       `{"IP": "1.1.1.3"}`,
   268  
   269  			fmt.Sprintf("cilium/cache/identities/v1/foo/id/%d", id(clusterID)): `key1=value1;key2=value2;k8s:io.cilium.k8s.policy.cluster=foo`,
   270  		}
   271  	}
   272  
   273  	store := store.NewFactory(store.MetricsProvider())
   274  	var wg sync.WaitGroup
   275  	ctx := context.Background()
   276  
   277  	// The nils are only used by k8s CRD identities. We default to kvstore.
   278  	allocator := cache.NewCachingIdentityAllocator(&testidentity.IdentityAllocatorOwnerMock{})
   279  	<-allocator.InitIdentityAllocator(nil)
   280  
   281  	t.Cleanup(func() {
   282  		allocator.Close()
   283  		require.NoError(t, kvstore.Client().DeletePrefix(context.Background(), kvstore.BaseKeyPrefix))
   284  	})
   285  
   286  	var obs fakeObserver
   287  	cm := ClusterMesh{
   288  		conf: Configuration{
   289  			NodeObserver:          &obs,
   290  			ServiceMerger:         &obs,
   291  			IPCache:               &obs,
   292  			RemoteIdentityWatcher: allocator,
   293  			ClusterIDsManager:     NewClusterMeshUsedIDs(localClusterID),
   294  			Metrics:               NewMetrics(),
   295  			StoreFactory:          store,
   296  			ClusterInfo:           types.ClusterInfo{ID: localClusterID, Name: localClusterName, MaxConnectedClusters: 255},
   297  			Logger:                logrus.New(),
   298  		},
   299  		globalServices: common.NewGlobalServiceCache(metrics.NoOpGauge),
   300  	}
   301  	rc := cm.NewRemoteCluster("foo", nil).(*remoteCluster)
   302  
   303  	fixture := func(t *testing.T, id uint32, run func(t *testing.T, ready <-chan error)) {
   304  		ctx, cancel := context.WithCancel(ctx)
   305  		ready := make(chan error)
   306  
   307  		defer func() {
   308  			cancel()
   309  			wg.Wait()
   310  		}()
   311  
   312  		wg.Add(1)
   313  		go func() {
   314  			cfg := types.CiliumClusterConfig{ID: id, Capabilities: types.CiliumClusterConfigCapabilities{Cached: true}}
   315  			rc.Run(ctx, kvstore.Client(), cfg, ready)
   316  			wg.Done()
   317  		}()
   318  
   319  		run(t, ready)
   320  	}
   321  
   322  	fixture(t, cid1, func(t *testing.T, ready <-chan error) {
   323  		require.NoError(t, <-ready, "rc.Run() failed")
   324  
   325  		// Populate the kvstore with the appropriate KV pairs
   326  		for key, value := range kvs(cid1) {
   327  			require.NoErrorf(t, kvstore.Client().Update(ctx, key, []byte(value), false), "Failed to set %s=%s", key, value)
   328  		}
   329  
   330  		require.EventuallyWithT(t, func(c *assert.CollectT) {
   331  			assert.EqualValues(c, 8, obs.updates.Load(), "Upsertions not observed correctly")
   332  			assert.EqualValues(c, 0, obs.deletes.Load(), "Deletions not observed correctly")
   333  			assert.NotNil(c, allocator.LookupIdentityByID(ctx, id(cid1)), "Identity upsertion not observed correctly")
   334  		}, timeout, tick)
   335  	})
   336  
   337  	// Reconnect the cluster with a different ID, and assert that a synthetic
   338  	// deletion event has been generated for all known entries.
   339  	obs.reset()
   340  	fixture(t, cid2, func(t *testing.T, ready <-chan error) {
   341  		require.NoError(t, <-ready, "rc.Run() failed")
   342  
   343  		require.EventuallyWithT(t, func(c *assert.CollectT) {
   344  			// The IP entries don't include the ClusterID, hence they are not
   345  			// filtered out by the validation, but propagated correctly.
   346  			assert.EqualValues(c, 3, obs.updates.Load(), "Upsertions not observed correctly")
   347  			assert.EqualValues(c, 8, obs.deletes.Load(), "Deletions not observed correctly")
   348  			assert.Nil(c, allocator.LookupIdentityByID(ctx, id(cid1)), "Identity deletion not observed correctly")
   349  		}, timeout, tick)
   350  
   351  		// Update the kvstore pairs with the new ClusterID
   352  		obs.reset()
   353  		for key, value := range kvs(cid2) {
   354  			require.NoErrorf(t, kvstore.Client().Update(ctx, key, []byte(value), false), "Failed to set %s=%s", key, value)
   355  		}
   356  
   357  		require.EventuallyWithT(t, func(c *assert.CollectT) {
   358  			assert.EqualValues(c, 8, obs.updates.Load(), "Upsertions not observed correctly")
   359  			assert.EqualValues(c, 0, obs.deletes.Load(), "Deletions not observed correctly")
   360  			assert.NotNil(c, allocator.LookupIdentityByID(ctx, id(cid2)), "Identity upsertion not observed correctly")
   361  		}, timeout, tick)
   362  	})
   363  
   364  	// Reconnect the cluster with yet another different ID, that is already reserved.
   365  	// Assert that a synthetic deletion event has been generated for all known entries
   366  	// also in this case (i.e., before actually reserving the Cluster ID).
   367  	obs.reset()
   368  	cm.conf.ClusterIDsManager.ReserveClusterID(cid3)
   369  	fixture(t, cid3, func(t *testing.T, ready <-chan error) {
   370  		require.ErrorContains(t, <-ready, "clusterID 30 is already used", "rc.Run() should have failed")
   371  
   372  		require.EventuallyWithT(t, func(c *assert.CollectT) {
   373  			assert.EqualValues(c, 0, obs.updates.Load(), "Upsertions not observed correctly")
   374  			assert.EqualValues(c, 8, obs.deletes.Load(), "Deletions not observed correctly")
   375  			assert.Nil(c, allocator.LookupIdentityByID(ctx, id(cid2)), "Identity deletion not observed correctly")
   376  		}, timeout, tick)
   377  	})
   378  }
   379  
   380  func TestIPCacheWatcherOpts(t *testing.T) {
   381  	tests := []struct {
   382  		name     string
   383  		config   *types.CiliumClusterConfig
   384  		extra    IPCacheWatcherOptsFn
   385  		expected int
   386  	}{
   387  		{
   388  			name:     "nil config",
   389  			expected: 0,
   390  		},
   391  		{
   392  			name:     "non-nil config",
   393  			config:   &types.CiliumClusterConfig{},
   394  			expected: 1,
   395  		},
   396  		{
   397  			name: "with extra opts",
   398  			extra: func(config *types.CiliumClusterConfig) []ipcache.IWOpt {
   399  				return []ipcache.IWOpt{ipcache.WithClusterID(10), ipcache.WithSelfDeletionProtection()}
   400  			},
   401  			expected: 2,
   402  		},
   403  	}
   404  
   405  	for _, tt := range tests {
   406  		t.Run(tt.name, func(t *testing.T) {
   407  			rc := remoteCluster{ipCacheWatcherExtraOpts: tt.extra}
   408  			// Asserting the number of returned options, because it is not
   409  			// possible to compare them, being functions.
   410  			assert.Len(t, rc.ipCacheWatcherOpts(tt.config), tt.expected)
   411  		})
   412  	}
   413  }