github.com/cilium/cilium@v1.16.2/pkg/clustermesh/clustermesh_test.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package clustermesh 5 6 import ( 7 "context" 8 "fmt" 9 "os" 10 "path" 11 "sync" 12 "testing" 13 14 "github.com/cilium/hive/hivetest" 15 "github.com/sirupsen/logrus" 16 "github.com/stretchr/testify/assert" 17 "github.com/stretchr/testify/require" 18 19 "github.com/cilium/cilium/pkg/clustermesh/common" 20 "github.com/cilium/cilium/pkg/clustermesh/types" 21 cmutils "github.com/cilium/cilium/pkg/clustermesh/utils" 22 "github.com/cilium/cilium/pkg/identity/cache" 23 "github.com/cilium/cilium/pkg/ipcache" 24 "github.com/cilium/cilium/pkg/kvstore" 25 "github.com/cilium/cilium/pkg/kvstore/store" 26 "github.com/cilium/cilium/pkg/lock" 27 nodeStore "github.com/cilium/cilium/pkg/node/store" 28 nodeTypes "github.com/cilium/cilium/pkg/node/types" 29 "github.com/cilium/cilium/pkg/testutils" 30 testidentity "github.com/cilium/cilium/pkg/testutils/identity" 31 ) 32 33 const ( 34 localClusterID = 99 35 localClusterName = "local" 36 ) 37 38 type testObserver struct { 39 nodes map[string]*nodeTypes.Node 40 nodesMutex lock.RWMutex 41 } 42 43 func newNodesObserver() *testObserver { 44 return &testObserver{nodes: make(map[string]*nodeTypes.Node)} 45 } 46 47 func (o *testObserver) NodeUpdated(no nodeTypes.Node) { 48 o.nodesMutex.Lock() 49 o.nodes[no.Fullname()] = &no 50 o.nodesMutex.Unlock() 51 } 52 53 func (o *testObserver) NodeDeleted(no nodeTypes.Node) { 54 o.nodesMutex.Lock() 55 delete(o.nodes, no.Fullname()) 56 o.nodesMutex.Unlock() 57 } 58 59 func TestClusterMesh(t *testing.T) { 60 testutils.IntegrationTest(t) 61 62 var wg sync.WaitGroup 63 ctx, cancel := context.WithCancel(context.Background()) 64 defer func() { 65 cancel() 66 wg.Wait() 67 }() 68 69 kvstore.SetupDummy(t, "etcd") 70 71 // The nils are only used by k8s CRD identities. We default to kvstore. 72 mgr := cache.NewCachingIdentityAllocator(&testidentity.IdentityAllocatorOwnerMock{}) 73 <-mgr.InitIdentityAllocator(nil) 74 t.Cleanup(mgr.Close) 75 76 dir := t.TempDir() 77 etcdConfig := []byte(fmt.Sprintf("endpoints:\n- %s\n", kvstore.EtcdDummyAddress())) 78 79 // cluster3 doesn't have cluster configuration on kvstore. 80 // We should not be able to establish a connection in this case. 81 for i, name := range []string{"test2", "cluster1", "cluster2"} { 82 config := types.CiliumClusterConfig{ 83 ID: uint32(i + 1), 84 Capabilities: types.CiliumClusterConfigCapabilities{ 85 MaxConnectedClusters: 255, 86 }, 87 } 88 89 if name == "cluster2" { 90 // Cluster2 supports synced canaries 91 config.Capabilities.SyncedCanaries = true 92 } 93 94 err := cmutils.SetClusterConfig(ctx, name, config, kvstore.Client()) 95 require.NoErrorf(t, err, "Failed to set cluster config for %s", name) 96 } 97 98 config1 := path.Join(dir, "cluster1") 99 require.NoError(t, os.WriteFile(config1, etcdConfig, 0644), "Failed to write config file for cluster1") 100 101 config2 := path.Join(dir, "cluster2") 102 require.NoError(t, os.WriteFile(config2, etcdConfig, 0644), "Failed to write config file for cluster2") 103 104 config3 := path.Join(dir, "cluster3") 105 require.NoError(t, os.WriteFile(config3, etcdConfig, 0644), "Failed to write config file for cluster3") 106 107 ipc := ipcache.NewIPCache(&ipcache.Configuration{ 108 Context: ctx, 109 }) 110 t.Cleanup(func() { ipc.Shutdown() }) 111 112 usedIDs := NewClusterMeshUsedIDs(localClusterID) 113 storeFactory := store.NewFactory(store.MetricsProvider()) 114 nodesObserver := newNodesObserver() 115 cm := NewClusterMesh(hivetest.Lifecycle(t), Configuration{ 116 Config: common.Config{ClusterMeshConfig: dir}, 117 ClusterInfo: types.ClusterInfo{ID: localClusterID, Name: localClusterName, MaxConnectedClusters: 255}, 118 NodeObserver: nodesObserver, 119 RemoteIdentityWatcher: mgr, 120 IPCache: ipc, 121 ClusterIDsManager: usedIDs, 122 Metrics: NewMetrics(), 123 CommonMetrics: common.MetricsProvider(subsystem)(), 124 StoreFactory: storeFactory, 125 Logger: logrus.New(), 126 }) 127 require.NotNil(t, cm, "Failed to initialize clustermesh") 128 // cluster2 is the cluster which is tested with sync canaries 129 nodesWSS := storeFactory.NewSyncStore("cluster2", kvstore.Client(), nodeStore.NodeStorePrefix) 130 wg.Add(1) 131 go func() { 132 nodesWSS.Run(ctx) 133 wg.Done() 134 }() 135 nodeNames := []string{"foo", "bar", "baz"} 136 137 // wait for the two expected clusters to appear in the list of cm clusters 138 require.EventuallyWithT(t, func(c *assert.CollectT) { 139 assert.Equal(c, 2, cm.NumReadyClusters()) 140 }, timeout, tick, "Clusters did not become ready in time") 141 142 // Ensure that ClusterIDs are reserved correctly after connect 143 require.EventuallyWithT(t, func(c *assert.CollectT) { 144 usedIDs.UsedClusterIDsMutex.Lock() 145 defer usedIDs.UsedClusterIDsMutex.Unlock() 146 147 assert.Contains(c, usedIDs.UsedClusterIDs, uint32(2)) 148 assert.Contains(c, usedIDs.UsedClusterIDs, uint32(3)) 149 assert.Len(c, usedIDs.UsedClusterIDs, 2) 150 }, timeout, tick, "Cluster IDs were not reserved correctly") 151 152 // Reconnect cluster with changed ClusterID 153 config := types.CiliumClusterConfig{ 154 ID: 255, 155 Capabilities: types.CiliumClusterConfigCapabilities{ 156 MaxConnectedClusters: 255, 157 }, 158 } 159 err := cmutils.SetClusterConfig(ctx, "cluster1", config, kvstore.Client()) 160 require.NoErrorf(t, err, "Failed to set cluster config for cluster1") 161 // Ugly hack to trigger config update 162 etcdConfigNew := append(etcdConfig, []byte("\n")...) 163 require.NoError(t, os.WriteFile(config1, etcdConfigNew, 0644), "Failed to write config file for cluster1") 164 165 require.EventuallyWithT(t, func(c *assert.CollectT) { 166 usedIDs.UsedClusterIDsMutex.Lock() 167 defer usedIDs.UsedClusterIDsMutex.Unlock() 168 169 // Ensure if old ClusterID for cluster1 is released 170 // and new ClusterID is reserved. 171 assert.NotContains(c, usedIDs.UsedClusterIDs, uint32(2)) 172 assert.Contains(c, usedIDs.UsedClusterIDs, uint32(255)) 173 }, timeout, tick, "Reserved cluster IDs not updated correctly") 174 175 for cluster, id := range map[string]uint32{"cluster1": 255, "cluster2": 3, "cluster3": 4} { 176 for _, name := range nodeNames { 177 require.NoErrorf(t, nodesWSS.UpsertKey(ctx, &nodeTypes.Node{Name: name, Cluster: cluster, ClusterID: id}), 178 "Failed upserting node %s/%s into kvstore", cluster, name) 179 } 180 } 181 182 // Write the sync canary for cluster2 183 require.NoError(t, nodesWSS.Synced(ctx), "Failed writing the synched key into kvstore") 184 185 // wait for all cm nodes in both clusters to appear in the node list 186 require.EventuallyWithT(t, func(c *assert.CollectT) { 187 nodesObserver.nodesMutex.RLock() 188 defer nodesObserver.nodesMutex.RUnlock() 189 assert.Len(c, nodesObserver.nodes, 2*len(nodeNames)) 190 }, timeout, tick, "Nodes not watched correctly") 191 192 require.NoError(t, os.Remove(config2), "Failed to remove config file for cluster2") 193 194 // wait for the removed cluster to disappear 195 require.EventuallyWithT(t, func(c *assert.CollectT) { 196 assert.Equal(c, 1, cm.NumReadyClusters()) 197 }, timeout, tick, "Cluster2 was not correctly removed") 198 199 // Make sure that ID is freed 200 require.EventuallyWithT(t, func(c *assert.CollectT) { 201 usedIDs.UsedClusterIDsMutex.Lock() 202 defer usedIDs.UsedClusterIDsMutex.Unlock() 203 assert.NotContains(c, usedIDs.UsedClusterIDs, uint32(2)) 204 assert.Len(c, usedIDs.UsedClusterIDs, 1) 205 }, timeout, tick, "Cluster IDs were not freed correctly") 206 207 // wait for the nodes of the removed cluster to disappear 208 require.EventuallyWithT(t, func(c *assert.CollectT) { 209 nodesObserver.nodesMutex.RLock() 210 defer nodesObserver.nodesMutex.RUnlock() 211 assert.Len(c, nodesObserver.nodes, 1*len(nodeNames)) 212 }, timeout, tick, "Nodes were not drained correctly") 213 214 require.NoError(t, os.Remove(config1), "Failed to remove config file for cluster1") 215 require.NoError(t, os.Remove(config3), "Failed to remove config file for cluster3") 216 217 // wait for the removed cluster to disappear 218 require.EventuallyWithT(t, func(c *assert.CollectT) { 219 assert.Equal(c, 0, cm.NumReadyClusters()) 220 }, timeout, tick, "Clusters were not correctly removed") 221 222 // wait for the nodes of the removed cluster to disappear 223 require.EventuallyWithT(t, func(c *assert.CollectT) { 224 nodesObserver.nodesMutex.RLock() 225 defer nodesObserver.nodesMutex.RUnlock() 226 assert.Len(c, nodesObserver.nodes, 0) 227 }, timeout, tick, "Nodes were not drained correctly") 228 229 // Make sure that IDs are freed 230 require.EventuallyWithT(t, func(c *assert.CollectT) { 231 usedIDs.UsedClusterIDsMutex.Lock() 232 defer usedIDs.UsedClusterIDsMutex.Unlock() 233 assert.Len(c, usedIDs.UsedClusterIDs, 0) 234 }, timeout, tick, "Cluster IDs were not freed correctly") 235 }