github.com/cilium/cilium@v1.16.2/pkg/clustermesh/operator/clustermesh.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package operator 5 6 import ( 7 "cmp" 8 "context" 9 "fmt" 10 "slices" 11 "sync" 12 "sync/atomic" 13 14 "github.com/cilium/hive/cell" 15 "github.com/sirupsen/logrus" 16 17 "github.com/cilium/cilium/api/v1/models" 18 "github.com/cilium/cilium/pkg/clustermesh/common" 19 "github.com/cilium/cilium/pkg/clustermesh/wait" 20 "github.com/cilium/cilium/pkg/kvstore/store" 21 "github.com/cilium/cilium/pkg/logging/logfields" 22 serviceStore "github.com/cilium/cilium/pkg/service/store" 23 ) 24 25 // clusterMesh is a cache of multiple remote clusters 26 type clusterMesh struct { 27 // common implements the common logic to connect to remote clusters. 28 common common.ClusterMesh 29 30 logger logrus.FieldLogger 31 Metrics Metrics 32 33 // globalServices is a list of all global services. The datastructure 34 // is protected by its own mutex inside the structure. 35 globalServices *common.GlobalServiceCache 36 37 storeFactory store.Factory 38 39 started atomic.Bool 40 clusterAddHooks []func(string) 41 clusterDeleteHooks []func(string) 42 clusterServiceUpdateHooks []func(*serviceStore.ClusterService) 43 clusterServiceDeleteHooks []func(*serviceStore.ClusterService) 44 45 syncTimeoutConfig wait.TimeoutConfig 46 syncTimeoutLogOnce sync.Once 47 } 48 49 // ClusterMesh is the interface corresponding to the clusterMesh struct to expose 50 // its public methods to other Cilium packages. 51 type ClusterMesh interface { 52 // RegisterClusterAddHook register a hook when a cluster is added to the mesh. 53 // This should NOT be called after the Start hook. 54 RegisterClusterAddHook(clusterAddHook func(string)) 55 // RegisterClusterDeleteHook register a hook when a cluster is removed from the mesh. 56 // This should NOT be called after the Start hook. 57 RegisterClusterDeleteHook(clusterDeleteHook func(string)) 58 // RegisterClusterServiceUpdateHook register a hook when a service in the mesh is updated. 59 // This should NOT be called after the Start hook. 60 RegisterClusterServiceUpdateHook(clusterServiceUpdateHook func(*serviceStore.ClusterService)) 61 // RegisterClusterServiceDeleteHook register a hook when a service in the mesh is deleted. 62 // This should NOT be called after the Start hook. 63 RegisterClusterServiceDeleteHook(clusterServiceDeleteHook func(*serviceStore.ClusterService)) 64 65 ServicesSynced(ctx context.Context) error 66 GlobalServices() *common.GlobalServiceCache 67 } 68 69 func newClusterMesh(lc cell.Lifecycle, params clusterMeshParams) (*clusterMesh, ClusterMesh) { 70 if params.ClusterMeshConfig == "" || !params.Cfg.ClusterMeshEnableEndpointSync { 71 return nil, nil 72 } 73 74 params.Logger.Info("Operator ClusterMesh component enabled") 75 76 cm := clusterMesh{ 77 logger: params.Logger, 78 globalServices: common.NewGlobalServiceCache( 79 params.Metrics.TotalGlobalServices.WithLabelValues(params.ClusterInfo.Name), 80 ), 81 storeFactory: params.StoreFactory, 82 syncTimeoutConfig: params.TimeoutConfig, 83 } 84 cm.common = common.NewClusterMesh(common.Configuration{ 85 Config: params.Config, 86 ClusterInfo: params.ClusterInfo, 87 NewRemoteCluster: cm.newRemoteCluster, 88 ServiceResolver: params.ServiceResolver, 89 Metrics: params.CommonMetrics, 90 }) 91 92 lc.Append(cm.common) 93 lc.Append(&cm) 94 return &cm, &cm 95 } 96 97 // RegisterClusterAddHook register a hook when a cluster is added to the mesh. 98 // This should NOT be called after the Start hook. 99 func (cm *clusterMesh) RegisterClusterAddHook(clusterAddHook func(string)) { 100 if cm.started.Load() { 101 panic(fmt.Errorf("can't call RegisterClusterAddHook after the Start hook")) 102 } 103 cm.clusterAddHooks = append(cm.clusterAddHooks, clusterAddHook) 104 } 105 106 // RegisterClusterDeleteHook register a hook when a cluster is removed from the mesh. 107 // This should NOT be called after the Start hook. 108 func (cm *clusterMesh) RegisterClusterDeleteHook(clusterDeleteHook func(string)) { 109 if cm.started.Load() { 110 panic(fmt.Errorf("can't call RegisterClusterDeleteHook after the Start hook")) 111 } 112 cm.clusterDeleteHooks = append(cm.clusterDeleteHooks, clusterDeleteHook) 113 } 114 115 // RegisterClusterServiceUpdateHook register a hook when a service in the mesh is updated. 116 // This should NOT be called after the Start hook. 117 func (cm *clusterMesh) RegisterClusterServiceUpdateHook(clusterServiceUpdateHook func(*serviceStore.ClusterService)) { 118 if cm.started.Load() { 119 panic(fmt.Errorf("can't call RegisterClusterServiceUpdateHook after the Start hook")) 120 } 121 cm.clusterServiceUpdateHooks = append(cm.clusterServiceUpdateHooks, clusterServiceUpdateHook) 122 } 123 124 // RegisterClusterServiceDeleteHook register a hook when a service in the mesh is deleted. 125 // This should NOT be called after the Start hook. 126 func (cm *clusterMesh) RegisterClusterServiceDeleteHook(clusterServiceDeleteHook func(*serviceStore.ClusterService)) { 127 if cm.started.Load() { 128 panic(fmt.Errorf("can't call RegisterClusterServiceDeleteHook after the Start hook")) 129 } 130 cm.clusterServiceDeleteHooks = append(cm.clusterServiceDeleteHooks, clusterServiceDeleteHook) 131 } 132 133 func (cm *clusterMesh) GlobalServices() *common.GlobalServiceCache { 134 return cm.globalServices 135 } 136 137 func (cm *clusterMesh) newRemoteCluster(name string, status common.StatusFunc) common.RemoteCluster { 138 rc := &remoteCluster{ 139 name: name, 140 globalServices: cm.globalServices, 141 storeFactory: cm.storeFactory, 142 synced: newSynced(), 143 status: status, 144 clusterAddHooks: cm.clusterAddHooks, 145 clusterDeleteHooks: cm.clusterDeleteHooks, 146 } 147 148 rc.remoteServices = cm.storeFactory.NewWatchStore( 149 name, 150 serviceStore.KeyCreator( 151 serviceStore.ClusterNameValidator(name), 152 serviceStore.NamespacedNameValidator(), 153 ), 154 common.NewSharedServicesObserver( 155 cm.logger.WithField(logfields.ClusterName, name), 156 cm.globalServices, 157 func(svc *serviceStore.ClusterService) { 158 for _, hook := range cm.clusterServiceUpdateHooks { 159 hook(svc) 160 } 161 }, 162 func(svc *serviceStore.ClusterService) { 163 for _, hook := range cm.clusterServiceDeleteHooks { 164 hook(svc) 165 } 166 }, 167 ), 168 store.RWSWithOnSyncCallback(func(ctx context.Context) { rc.synced.services.Stop() }), 169 ) 170 171 return rc 172 } 173 174 func (cm *clusterMesh) Start(cell.HookContext) error { 175 cm.started.Store(true) 176 return nil 177 } 178 179 func (cm *clusterMesh) Stop(cell.HookContext) error { 180 return nil 181 } 182 183 // ServicesSynced returns after that either the initial list of shared services has 184 // been received from all remote clusters, or the maximum wait period controlled by the 185 // clustermesh-sync-timeout flag elapsed. It returns an error if the given context expired. 186 func (cm *clusterMesh) ServicesSynced(ctx context.Context) error { 187 return cm.synced(ctx, func(rc *remoteCluster) wait.Fn { return rc.synced.Services }) 188 } 189 190 func (cm *clusterMesh) synced(ctx context.Context, toWaitFn func(*remoteCluster) wait.Fn) error { 191 wctx, cancel := context.WithTimeout(ctx, cm.syncTimeoutConfig.Timeout()) 192 defer cancel() 193 194 waiters := make([]wait.Fn, 0) 195 cm.common.ForEachRemoteCluster(func(rci common.RemoteCluster) error { 196 rc := rci.(*remoteCluster) 197 waiters = append(waiters, toWaitFn(rc)) 198 return nil 199 }) 200 201 err := wait.ForAll(wctx, waiters) 202 if ctx.Err() == nil && wctx.Err() != nil { 203 // The sync timeout expired, but the parent context is still valid, which 204 // means that the circuit breaker was triggered. Print a warning message 205 // and continue normally, as if the synchronization completed successfully. 206 // This ensures that we don't block forever in case of misconfigurations. 207 cm.syncTimeoutLogOnce.Do(func() { 208 cm.logger.Warning("Failed waiting for clustermesh synchronization, expect possible disruption of cross-cluster connections") 209 }) 210 211 return nil 212 } 213 214 return err 215 } 216 217 // Status returns the status of the ClusterMesh subsystem 218 func (cm *clusterMesh) status() []*models.RemoteCluster { 219 var clusters []*models.RemoteCluster 220 221 cm.common.ForEachRemoteCluster(func(rci common.RemoteCluster) error { 222 rc := rci.(*remoteCluster) 223 clusters = append(clusters, rc.Status()) 224 return nil 225 }) 226 227 // Sort the remote clusters information to ensure consistent ordering. 228 slices.SortFunc(clusters, 229 func(a, b *models.RemoteCluster) int { return cmp.Compare(a.Name, b.Name) }) 230 231 return clusters 232 }