github.com/cilium/cilium@v1.16.2/clustermesh-apiserver/clustermesh/root.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package clustermesh 5 6 import ( 7 "context" 8 "errors" 9 "log/slog" 10 "net" 11 "path" 12 "sync" 13 14 "github.com/cilium/hive/cell" 15 "github.com/sirupsen/logrus" 16 "github.com/spf13/cobra" 17 "k8s.io/apimachinery/pkg/runtime" 18 19 cmk8s "github.com/cilium/cilium/clustermesh-apiserver/clustermesh/k8s" 20 "github.com/cilium/cilium/clustermesh-apiserver/syncstate" 21 operatorWatchers "github.com/cilium/cilium/operator/watchers" 22 cmtypes "github.com/cilium/cilium/pkg/clustermesh/types" 23 cmutils "github.com/cilium/cilium/pkg/clustermesh/utils" 24 "github.com/cilium/cilium/pkg/hive" 25 "github.com/cilium/cilium/pkg/identity" 26 identityCache "github.com/cilium/cilium/pkg/identity/cache" 27 "github.com/cilium/cilium/pkg/ipcache" 28 ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" 29 k8sClient "github.com/cilium/cilium/pkg/k8s/client" 30 "github.com/cilium/cilium/pkg/k8s/resource" 31 "github.com/cilium/cilium/pkg/k8s/types" 32 "github.com/cilium/cilium/pkg/kvstore" 33 "github.com/cilium/cilium/pkg/kvstore/store" 34 "github.com/cilium/cilium/pkg/labels" 35 "github.com/cilium/cilium/pkg/logging" 36 "github.com/cilium/cilium/pkg/logging/logfields" 37 "github.com/cilium/cilium/pkg/metrics" 38 nodeStore "github.com/cilium/cilium/pkg/node/store" 39 nodeTypes "github.com/cilium/cilium/pkg/node/types" 40 "github.com/cilium/cilium/pkg/option" 41 "github.com/cilium/cilium/pkg/promise" 42 "github.com/cilium/cilium/pkg/version" 43 ) 44 45 var ( 46 log = logging.DefaultLogger.WithField(logfields.LogSubsys, "clustermesh-apiserver") 47 ) 48 49 func NewCmd(h *hive.Hive) *cobra.Command { 50 rootCmd := &cobra.Command{ 51 Use: "clustermesh", 52 Short: "Run ClusterMesh", 53 Run: func(cmd *cobra.Command, args []string) { 54 if err := h.Run(slog.Default()); err != nil { 55 log.Fatal(err) 56 } 57 }, 58 PreRun: func(cmd *cobra.Command, args []string) { 59 // Overwrite the metrics namespace with the one specific for the ClusterMesh API Server 60 metrics.Namespace = metrics.CiliumClusterMeshAPIServerNamespace 61 option.Config.Populate(h.Viper()) 62 if option.Config.Debug { 63 log.Logger.SetLevel(logrus.DebugLevel) 64 } 65 option.LogRegisteredOptions(h.Viper(), log) 66 log.Infof("Cilium ClusterMesh %s", version.Version) 67 }, 68 } 69 70 h.RegisterFlags(rootCmd.Flags()) 71 rootCmd.AddCommand(h.Command()) 72 return rootCmd 73 } 74 75 type parameters struct { 76 cell.In 77 78 ExternalWorkloadsConfig 79 ClusterInfo cmtypes.ClusterInfo 80 Clientset k8sClient.Clientset 81 Resources cmk8s.Resources 82 BackendPromise promise.Promise[kvstore.BackendOperations] 83 StoreFactory store.Factory 84 SyncState syncstate.SyncState 85 } 86 87 func registerHooks(lc cell.Lifecycle, params parameters) error { 88 lc.Append(cell.Hook{ 89 OnStart: func(ctx cell.HookContext) error { 90 if !params.Clientset.IsEnabled() { 91 return errors.New("Kubernetes client not configured, cannot continue.") 92 } 93 94 backend, err := params.BackendPromise.Await(ctx) 95 if err != nil { 96 return err 97 } 98 99 startServer(ctx, params.ClusterInfo, params.EnableExternalWorkloads, params.Clientset, backend, params.Resources, params.StoreFactory, params.SyncState) 100 return nil 101 }, 102 }) 103 return nil 104 } 105 106 type identitySynchronizer struct { 107 store store.SyncStore 108 encoder func([]byte) string 109 syncCallback func(context.Context) 110 } 111 112 func newIdentitySynchronizer(ctx context.Context, cinfo cmtypes.ClusterInfo, backend kvstore.BackendOperations, factory store.Factory, syncCallback func(context.Context)) synchronizer { 113 identitiesStore := factory.NewSyncStore(cinfo.Name, backend, 114 path.Join(identityCache.IdentitiesPath, "id"), 115 store.WSSWithSyncedKeyOverride(identityCache.IdentitiesPath)) 116 go identitiesStore.Run(ctx) 117 118 return &identitySynchronizer{store: identitiesStore, encoder: backend.Encode, syncCallback: syncCallback} 119 } 120 121 func parseLabelArrayFromMap(base map[string]string) labels.LabelArray { 122 array := make(labels.LabelArray, 0, len(base)) 123 for sourceAndKey, value := range base { 124 array = append(array, labels.NewLabel(sourceAndKey, value, "")) 125 } 126 return array.Sort() 127 } 128 129 func (is *identitySynchronizer) upsert(ctx context.Context, _ resource.Key, obj runtime.Object) error { 130 identity := obj.(*ciliumv2.CiliumIdentity) 131 scopedLog := log.WithField(logfields.Identity, identity.Name) 132 if len(identity.SecurityLabels) == 0 { 133 scopedLog.WithError(errors.New("missing security labels")).Warning("Ignoring invalid identity") 134 // Do not return an error, since it is pointless to retry. 135 // We will receive a new update event if the security labels change. 136 return nil 137 } 138 139 labelArray := parseLabelArrayFromMap(identity.SecurityLabels) 140 141 var labels []byte 142 for _, l := range labelArray { 143 labels = append(labels, l.FormatForKVStore()...) 144 } 145 146 scopedLog.Info("Upserting identity in etcd") 147 kv := store.NewKVPair(identity.Name, is.encoder(labels)) 148 if err := is.store.UpsertKey(ctx, kv); err != nil { 149 // The only errors surfaced by WorkqueueSyncStore are the unrecoverable ones. 150 log.WithError(err).Warning("Unable to upsert identity in etcd") 151 } 152 153 return nil 154 } 155 156 func (is *identitySynchronizer) delete(ctx context.Context, key resource.Key) error { 157 scopedLog := log.WithField(logfields.Identity, key.Name) 158 scopedLog.Info("Deleting identity from etcd") 159 160 if err := is.store.DeleteKey(ctx, store.NewKVPair(key.Name, "")); err != nil { 161 // The only errors surfaced by WorkqueueSyncStore are the unrecoverable ones. 162 scopedLog.WithError(err).Warning("Unable to delete node from etcd") 163 } 164 165 return nil 166 } 167 168 func (is *identitySynchronizer) synced(ctx context.Context) error { 169 log.Info("Initial list of identities successfully received from Kubernetes") 170 return is.store.Synced(ctx, is.syncCallback) 171 } 172 173 type nodeStub struct { 174 cluster string 175 name string 176 } 177 178 func (n *nodeStub) GetKeyName() string { 179 return nodeTypes.GetKeyNodeName(n.cluster, n.name) 180 } 181 182 type nodeSynchronizer struct { 183 clusterInfo cmtypes.ClusterInfo 184 store store.SyncStore 185 syncCallback func(context.Context) 186 } 187 188 func newNodeSynchronizer(ctx context.Context, cinfo cmtypes.ClusterInfo, backend kvstore.BackendOperations, factory store.Factory, syncCallback func(context.Context)) synchronizer { 189 nodesStore := factory.NewSyncStore(cinfo.Name, backend, nodeStore.NodeStorePrefix) 190 go nodesStore.Run(ctx) 191 192 return &nodeSynchronizer{clusterInfo: cinfo, store: nodesStore, syncCallback: syncCallback} 193 } 194 195 func (ns *nodeSynchronizer) upsert(ctx context.Context, _ resource.Key, obj runtime.Object) error { 196 n := nodeTypes.ParseCiliumNode(obj.(*ciliumv2.CiliumNode)) 197 n.Cluster = ns.clusterInfo.Name 198 n.ClusterID = ns.clusterInfo.ID 199 200 scopedLog := log.WithField(logfields.Node, n.Name) 201 scopedLog.Info("Upserting node in etcd") 202 203 if err := ns.store.UpsertKey(ctx, &n); err != nil { 204 // The only errors surfaced by WorkqueueSyncStore are the unrecoverable ones. 205 log.WithError(err).Warning("Unable to upsert node in etcd") 206 } 207 208 return nil 209 } 210 211 func (ns *nodeSynchronizer) delete(ctx context.Context, key resource.Key) error { 212 n := nodeStub{ 213 cluster: ns.clusterInfo.Name, 214 name: key.Name, 215 } 216 217 scopedLog := log.WithFields(logrus.Fields{logfields.Node: key.Name}) 218 scopedLog.Info("Deleting node from etcd") 219 220 if err := ns.store.DeleteKey(ctx, &n); err != nil { 221 // The only errors surfaced by WorkqueueSyncStore are the unrecoverable ones. 222 scopedLog.WithError(err).Warning("Unable to delete node from etcd") 223 } 224 225 return nil 226 } 227 228 func (ns *nodeSynchronizer) synced(ctx context.Context) error { 229 log.Info("Initial list of nodes successfully received from Kubernetes") 230 return ns.store.Synced(ctx, ns.syncCallback) 231 } 232 233 type ipmap map[string]struct{} 234 235 type endpointSynchronizer struct { 236 store store.SyncStore 237 cache map[string]ipmap 238 syncCallback func(context.Context) 239 } 240 241 func newEndpointSynchronizer(ctx context.Context, cinfo cmtypes.ClusterInfo, backend kvstore.BackendOperations, factory store.Factory, syncCallback func(context.Context)) synchronizer { 242 endpointsStore := factory.NewSyncStore(cinfo.Name, backend, 243 path.Join(ipcache.IPIdentitiesPath, ipcache.DefaultAddressSpace), 244 store.WSSWithSyncedKeyOverride(ipcache.IPIdentitiesPath)) 245 go endpointsStore.Run(ctx) 246 247 return &endpointSynchronizer{ 248 store: endpointsStore, 249 cache: make(map[string]ipmap), 250 syncCallback: syncCallback, 251 } 252 } 253 254 func (es *endpointSynchronizer) upsert(ctx context.Context, key resource.Key, obj runtime.Object) error { 255 endpoint := obj.(*types.CiliumEndpoint) 256 ips := make(ipmap) 257 stale := es.cache[key.String()] 258 259 if n := endpoint.Networking; n != nil { 260 for _, address := range n.Addressing { 261 for _, ip := range []string{address.IPV4, address.IPV6} { 262 if ip == "" { 263 continue 264 } 265 266 scopedLog := log.WithFields(logrus.Fields{logfields.Endpoint: key.String(), logfields.IPAddr: ip}) 267 entry := identity.IPIdentityPair{ 268 IP: net.ParseIP(ip), 269 HostIP: net.ParseIP(n.NodeIP), 270 K8sNamespace: endpoint.Namespace, 271 K8sPodName: endpoint.Name, 272 } 273 274 if endpoint.Identity != nil { 275 entry.ID = identity.NumericIdentity(endpoint.Identity.ID) 276 } 277 278 if endpoint.Encryption != nil { 279 entry.Key = uint8(endpoint.Encryption.Key) 280 } 281 282 scopedLog.Info("Upserting endpoint in etcd") 283 if err := es.store.UpsertKey(ctx, &entry); err != nil { 284 // The only errors surfaced by WorkqueueSyncStore are the unrecoverable ones. 285 scopedLog.WithError(err).Warning("Unable to upsert endpoint in etcd") 286 continue 287 } 288 289 ips[ip] = struct{}{} 290 delete(stale, ip) 291 } 292 } 293 } 294 295 // Delete the stale endpoint IPs from the KVStore. 296 es.deleteEndpoints(ctx, key, stale) 297 es.cache[key.String()] = ips 298 299 return nil 300 } 301 302 func (es *endpointSynchronizer) delete(ctx context.Context, key resource.Key) error { 303 es.deleteEndpoints(ctx, key, es.cache[key.String()]) 304 delete(es.cache, key.String()) 305 return nil 306 } 307 308 func (es *endpointSynchronizer) synced(ctx context.Context) error { 309 log.Info("Initial list of endpoints successfully received from Kubernetes") 310 return es.store.Synced(ctx, es.syncCallback) 311 } 312 313 func (es *endpointSynchronizer) deleteEndpoints(ctx context.Context, key resource.Key, ips ipmap) { 314 for ip := range ips { 315 scopedLog := log.WithFields(logrus.Fields{logfields.Endpoint: key.String(), logfields.IPAddr: ip}) 316 scopedLog.Info("Deleting endpoint from etcd") 317 318 entry := identity.IPIdentityPair{IP: net.ParseIP(ip)} 319 if err := es.store.DeleteKey(ctx, &entry); err != nil { 320 // The only errors surfaced by WorkqueueSyncStore are the unrecoverable ones. 321 scopedLog.WithError(err).Warning("Unable to delete endpoint from etcd") 322 } 323 } 324 } 325 326 type synchronizer interface { 327 upsert(ctx context.Context, key resource.Key, obj runtime.Object) error 328 delete(ctx context.Context, key resource.Key) error 329 synced(ctx context.Context) error 330 } 331 332 func synchronize[T runtime.Object](ctx context.Context, r resource.Resource[T], sync synchronizer) { 333 for event := range r.Events(ctx) { 334 switch event.Kind { 335 case resource.Upsert: 336 event.Done(sync.upsert(ctx, event.Key, event.Object)) 337 case resource.Delete: 338 event.Done(sync.delete(ctx, event.Key)) 339 case resource.Sync: 340 event.Done(sync.synced(ctx)) 341 } 342 } 343 } 344 345 func startServer( 346 startCtx cell.HookContext, 347 cinfo cmtypes.ClusterInfo, 348 allServices bool, 349 clientset k8sClient.Clientset, 350 backend kvstore.BackendOperations, 351 resources cmk8s.Resources, 352 factory store.Factory, 353 syncState syncstate.SyncState, 354 ) { 355 log.WithFields(logrus.Fields{ 356 "cluster-name": cinfo.Name, 357 "cluster-id": cinfo.ID, 358 }).Info("Starting clustermesh-apiserver...") 359 360 config := cmtypes.CiliumClusterConfig{ 361 ID: cinfo.ID, 362 Capabilities: cmtypes.CiliumClusterConfigCapabilities{ 363 SyncedCanaries: true, 364 MaxConnectedClusters: cinfo.MaxConnectedClusters, 365 }, 366 } 367 368 _, err := cmutils.EnforceClusterConfig(context.Background(), cinfo.Name, config, backend, log) 369 if err != nil { 370 log.WithError(err).Fatal("Unable to set local cluster config on kvstore") 371 } 372 373 ctx := context.Background() 374 go synchronize(ctx, resources.CiliumIdentities, newIdentitySynchronizer(ctx, cinfo, backend, factory, syncState.WaitForResource())) 375 go synchronize(ctx, resources.CiliumNodes, newNodeSynchronizer(ctx, cinfo, backend, factory, syncState.WaitForResource())) 376 go synchronize(ctx, resources.CiliumSlimEndpoints, newEndpointSynchronizer(ctx, cinfo, backend, factory, syncState.WaitForResource())) 377 operatorWatchers.StartSynchronizingServices(ctx, &sync.WaitGroup{}, operatorWatchers.ServiceSyncParameters{ 378 ClusterInfo: cinfo, 379 Clientset: clientset, 380 Services: resources.Services, 381 Endpoints: resources.Endpoints, 382 Backend: backend, 383 SharedOnly: !allServices, 384 StoreFactory: factory, 385 SyncCallback: syncState.WaitForResource(), 386 }) 387 syncState.Stop() 388 389 log.Info("Initialization complete") 390 }