github.com/cilium/cilium@v1.16.2/pkg/k8s/watchers/watcher.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package watchers 5 6 import ( 7 "context" 8 "net" 9 "net/netip" 10 "sync" 11 12 "github.com/sirupsen/logrus" 13 "k8s.io/apimachinery/pkg/util/runtime" 14 15 "github.com/cilium/cilium/pkg/endpoint" 16 "github.com/cilium/cilium/pkg/ipcache" 17 ipcacheTypes "github.com/cilium/cilium/pkg/ipcache/types" 18 "github.com/cilium/cilium/pkg/k8s" 19 cilium_v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" 20 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2alpha1" 21 "github.com/cilium/cilium/pkg/k8s/client" 22 slim_corev1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/core/v1" 23 "github.com/cilium/cilium/pkg/k8s/synced" 24 "github.com/cilium/cilium/pkg/k8s/watchers/resources" 25 "github.com/cilium/cilium/pkg/labels" 26 "github.com/cilium/cilium/pkg/loadbalancer" 27 "github.com/cilium/cilium/pkg/logging" 28 "github.com/cilium/cilium/pkg/logging/logfields" 29 nodeTypes "github.com/cilium/cilium/pkg/node/types" 30 "github.com/cilium/cilium/pkg/option" 31 "github.com/cilium/cilium/pkg/redirectpolicy" 32 "github.com/cilium/cilium/pkg/source" 33 ) 34 35 const ( 36 k8sAPIGroupNamespaceV1Core = "core/v1::Namespace" 37 K8sAPIGroupServiceV1Core = "core/v1::Service" 38 k8sAPIGroupNetworkingV1Core = "networking.k8s.io/v1::NetworkPolicy" 39 k8sAPIGroupCiliumNetworkPolicyV2 = "cilium/v2::CiliumNetworkPolicy" 40 k8sAPIGroupCiliumClusterwideNetworkPolicyV2 = "cilium/v2::CiliumClusterwideNetworkPolicy" 41 k8sAPIGroupCiliumCIDRGroupV2Alpha1 = "cilium/v2alpha1::CiliumCIDRGroup" 42 k8sAPIGroupCiliumNodeV2 = "cilium/v2::CiliumNode" 43 k8sAPIGroupCiliumEndpointV2 = "cilium/v2::CiliumEndpoint" 44 k8sAPIGroupCiliumLocalRedirectPolicyV2 = "cilium/v2::CiliumLocalRedirectPolicy" 45 k8sAPIGroupCiliumEndpointSliceV2Alpha1 = "cilium/v2alpha1::CiliumEndpointSlice" 46 k8sAPIGroupCiliumEnvoyConfigV2 = "cilium/v2::CiliumEnvoyConfig" 47 k8sAPIGroupCiliumClusterwideEnvoyConfigV2 = "cilium/v2::CiliumClusterwideEnvoyConfig" 48 49 metricCLRP = "CiliumLocalRedirectPolicy" 50 metricPod = "Pod" 51 ) 52 53 func init() { 54 // Replace error handler with our own 55 runtime.ErrorHandlers = []func(error){ 56 k8s.K8sErrorHandler, 57 } 58 } 59 60 var log = logging.DefaultLogger.WithField(logfields.LogSubsys, "k8s-watcher") 61 62 type endpointManager interface { 63 LookupCEPName(string) *endpoint.Endpoint 64 GetEndpoints() []*endpoint.Endpoint 65 GetHostEndpoint() *endpoint.Endpoint 66 GetEndpointsByPodName(string) []*endpoint.Endpoint 67 WaitForEndpointsAtPolicyRev(ctx context.Context, rev uint64) error 68 UpdatePolicyMaps(context.Context, *sync.WaitGroup) *sync.WaitGroup 69 } 70 71 type nodeManager interface { 72 NodeDeleted(n nodeTypes.Node) 73 NodeUpdated(n nodeTypes.Node) 74 NodeSync() 75 } 76 77 type policyManager interface { 78 TriggerPolicyUpdates(force bool, reason string) 79 } 80 81 type svcManager interface { 82 DeleteService(frontend loadbalancer.L3n4Addr) (bool, error) 83 GetDeepCopyServiceByFrontend(frontend loadbalancer.L3n4Addr) (*loadbalancer.SVC, bool) 84 UpsertService(*loadbalancer.SVC) (bool, loadbalancer.ID, error) 85 } 86 87 type redirectPolicyManager interface { 88 AddRedirectPolicy(config redirectpolicy.LRPConfig) (bool, error) 89 DeleteRedirectPolicy(config redirectpolicy.LRPConfig) error 90 OnAddService(svcID k8s.ServiceID) 91 OnDeleteService(svcID k8s.ServiceID) 92 OnUpdatePod(pod *slim_corev1.Pod, needsReassign bool, ready bool) 93 OnDeletePod(pod *slim_corev1.Pod) 94 OnAddPod(pod *slim_corev1.Pod) 95 } 96 97 type bgpSpeakerManager interface { 98 OnUpdateService(svc *slim_corev1.Service) error 99 OnDeleteService(svc *slim_corev1.Service) error 100 101 OnUpdateEndpoints(eps *k8s.Endpoints) error 102 } 103 104 type cgroupManager interface { 105 OnAddPod(pod *slim_corev1.Pod) 106 OnUpdatePod(oldPod, newPod *slim_corev1.Pod) 107 OnDeletePod(pod *slim_corev1.Pod) 108 } 109 110 type CacheAccessK8SWatcher interface { 111 GetCachedNamespace(namespace string) (*slim_corev1.Namespace, error) 112 GetCachedPod(namespace, name string) (*slim_corev1.Pod, error) 113 } 114 115 type ipcacheManager interface { 116 // GH-21142: Re-evaluate the need for these APIs 117 Upsert(ip string, hostIP net.IP, hostKey uint8, k8sMeta *ipcache.K8sMetadata, newIdentity ipcache.Identity) (namedPortsChanged bool, err error) 118 LookupByIP(IP string) (ipcache.Identity, bool) 119 Delete(IP string, source source.Source) (namedPortsChanged bool) 120 121 UpsertLabels(prefix netip.Prefix, lbls labels.Labels, src source.Source, resource ipcacheTypes.ResourceID) 122 RemoveLabelsExcluded(lbls labels.Labels, toExclude map[netip.Prefix]struct{}, resource ipcacheTypes.ResourceID) 123 DeleteOnMetadataMatch(IP string, source source.Source, namespace, name string) (namedPortsChanged bool) 124 } 125 126 type K8sWatcher struct { 127 resourceGroupsFn func(cfg WatcherConfiguration) (resourceGroups, waitForCachesOnly []string) 128 129 clientset client.Clientset 130 131 k8sEventReporter *K8sEventReporter 132 k8sPodWatcher *K8sPodWatcher 133 k8sCiliumNodeWatcher *K8sCiliumNodeWatcher 134 k8sNamespaceWatcher *K8sNamespaceWatcher 135 k8sServiceWatcher *K8sServiceWatcher 136 k8sEndpointsWatcher *K8sEndpointsWatcher 137 k8sCiliumLRPWatcher *K8sCiliumLRPWatcher 138 k8sCiliumEndpointsWatcher *K8sCiliumEndpointsWatcher 139 140 // k8sResourceSynced maps a resource name to a channel. Once the given 141 // resource name is synchronized with k8s, the channel for which that 142 // resource name maps to is closed. 143 k8sResourceSynced *synced.Resources 144 145 // k8sAPIGroups is a set of k8s API in use. They are setup in watchers, 146 // and may be disabled while the agent runs. 147 k8sAPIGroups *synced.APIGroups 148 149 cfg WatcherConfiguration 150 } 151 152 func newWatcher( 153 clientset client.Clientset, 154 k8sPodWatcher *K8sPodWatcher, 155 k8sCiliumNodeWatcher *K8sCiliumNodeWatcher, 156 k8sNamespaceWatcher *K8sNamespaceWatcher, 157 k8sServiceWatcher *K8sServiceWatcher, 158 k8sEndpointsWatcher *K8sEndpointsWatcher, 159 k8sCiliumLRPWatcher *K8sCiliumLRPWatcher, 160 k8sCiliumEndpointsWatcher *K8sCiliumEndpointsWatcher, 161 k8sEventReporter *K8sEventReporter, 162 k8sResourceSynced *synced.Resources, 163 k8sAPIGroups *synced.APIGroups, 164 cfg WatcherConfiguration, 165 ) *K8sWatcher { 166 return &K8sWatcher{ 167 resourceGroupsFn: resourceGroups, 168 clientset: clientset, 169 k8sEventReporter: k8sEventReporter, 170 k8sPodWatcher: k8sPodWatcher, 171 k8sCiliumNodeWatcher: k8sCiliumNodeWatcher, 172 k8sNamespaceWatcher: k8sNamespaceWatcher, 173 k8sServiceWatcher: k8sServiceWatcher, 174 k8sEndpointsWatcher: k8sEndpointsWatcher, 175 k8sCiliumLRPWatcher: k8sCiliumLRPWatcher, 176 k8sCiliumEndpointsWatcher: k8sCiliumEndpointsWatcher, 177 k8sResourceSynced: k8sResourceSynced, 178 k8sAPIGroups: k8sAPIGroups, 179 cfg: cfg, 180 } 181 } 182 183 // WaitForCacheSync blocks until the given resources have been synchronized from k8s. Note that if 184 // the controller for a resource has not been started, the wait for that resource returns 185 // immediately. If it is required that the resource exists and is actually synchronized, the caller 186 // must ensure the controller for that resource has been started before calling 187 // WaitForCacheSync. For most resources this can be done by receiving from controllersStarted 188 // channel (<-k.controllersStarted), which is closed after most watchers have been started. 189 func (k *K8sWatcher) WaitForCacheSync(resourceNames ...string) { 190 k.k8sResourceSynced.WaitForCacheSync(resourceNames...) 191 } 192 193 func (k *K8sWatcher) GetAPIGroups() []string { 194 return k.k8sAPIGroups.GetGroups() 195 } 196 197 // WaitForCRDsToRegister will wait for the Cilium Operator to register the CRDs 198 // with the apiserver. This step is required before launching the full K8s 199 // watcher, as those resource controllers need the resources to be registered 200 // with K8s first. 201 func (k *K8sWatcher) WaitForCRDsToRegister(ctx context.Context) error { 202 return synced.SyncCRDs(ctx, k.clientset, synced.AgentCRDResourceNames(), k.k8sResourceSynced, k.k8sAPIGroups) 203 } 204 205 type watcherKind int 206 207 const ( 208 // skip causes watcher to not be started. 209 skip watcherKind = iota 210 211 // start causes watcher to be started as soon as possible. 212 start 213 214 // waitOnly will not start a watcher for this resource, but cause us to 215 // wait for an external go routine to initialize it 216 waitOnly 217 ) 218 219 type watcherInfo struct { 220 kind watcherKind 221 group string 222 } 223 224 var ciliumResourceToGroupMapping = map[string]watcherInfo{ 225 synced.CRDResourceName(cilium_v2.CNPName): {waitOnly, k8sAPIGroupCiliumNetworkPolicyV2}, // Handled in pkg/policy/k8s/ 226 synced.CRDResourceName(cilium_v2.CCNPName): {waitOnly, k8sAPIGroupCiliumClusterwideNetworkPolicyV2}, // Handled in pkg/policy/k8s/ 227 synced.CRDResourceName(cilium_v2.CEPName): {start, k8sAPIGroupCiliumEndpointV2}, // ipcache 228 synced.CRDResourceName(cilium_v2.CNName): {start, k8sAPIGroupCiliumNodeV2}, 229 synced.CRDResourceName(cilium_v2.CIDName): {skip, ""}, // Handled in pkg/k8s/identitybackend/ 230 synced.CRDResourceName(cilium_v2.CLRPName): {start, k8sAPIGroupCiliumLocalRedirectPolicyV2}, 231 synced.CRDResourceName(cilium_v2.CEWName): {skip, ""}, // Handled in clustermesh-apiserver/ 232 synced.CRDResourceName(cilium_v2.CEGPName): {skip, ""}, // Handled via Resource[T]. 233 synced.CRDResourceName(v2alpha1.CESName): {start, k8sAPIGroupCiliumEndpointSliceV2Alpha1}, 234 synced.CRDResourceName(cilium_v2.CCECName): {waitOnly, k8sAPIGroupCiliumClusterwideEnvoyConfigV2}, // Handled in pkg/ciliumenvoyconfig/ 235 synced.CRDResourceName(cilium_v2.CECName): {waitOnly, k8sAPIGroupCiliumEnvoyConfigV2}, // Handled in pkg/ciliumenvoyconfig/ 236 synced.CRDResourceName(v2alpha1.BGPPName): {skip, ""}, // Handled in BGP control plane 237 synced.CRDResourceName(v2alpha1.BGPCCName): {skip, ""}, // Handled in BGP control plane 238 synced.CRDResourceName(v2alpha1.BGPAName): {skip, ""}, // Handled in BGP control plane 239 synced.CRDResourceName(v2alpha1.BGPPCName): {skip, ""}, // Handled in BGP control plane 240 synced.CRDResourceName(v2alpha1.BGPNCName): {skip, ""}, // Handled in BGP control plane 241 synced.CRDResourceName(v2alpha1.BGPNCOName): {skip, ""}, // Handled in BGP control plane 242 synced.CRDResourceName(v2alpha1.LBIPPoolName): {skip, ""}, // Handled in LB IPAM 243 synced.CRDResourceName(v2alpha1.CNCName): {skip, ""}, // Handled by init directly 244 synced.CRDResourceName(v2alpha1.CCGName): {waitOnly, k8sAPIGroupCiliumCIDRGroupV2Alpha1}, 245 synced.CRDResourceName(v2alpha1.L2AnnouncementName): {skip, ""}, // Handled by L2 announcement directly 246 synced.CRDResourceName(v2alpha1.CPIPName): {skip, ""}, // Handled by multi-pool IPAM allocator 247 } 248 249 // resourceGroups are all of the core Kubernetes and Cilium resource groups 250 // which the Cilium agent watches to implement CNI functionality. 251 func resourceGroups(cfg WatcherConfiguration) (resourceGroups, waitForCachesOnly []string) { 252 k8sGroups := []string{ 253 // To perform the service translation and have the BPF LB datapath 254 // with the right service -> backend (k8s endpoints) translation. 255 K8sAPIGroupServiceV1Core, 256 257 // Namespaces can contain labels which are essential for 258 // endpoints being restored to have the right identity. 259 k8sAPIGroupNamespaceV1Core, 260 // Pods can contain labels which are essential for endpoints 261 // being restored to have the right identity. 262 resources.K8sAPIGroupPodV1Core, 263 // To perform the service translation and have the BPF LB datapath 264 // with the right service -> backend (k8s endpoints) translation. 265 resources.K8sAPIGroupEndpointSliceOrEndpoint, 266 } 267 268 if cfg.K8sNetworkPolicyEnabled() { 269 // When the flag is set, 270 // We need all network policies in place before restoring to 271 // make sure we are enforcing the correct policies for each 272 // endpoint before restarting. 273 waitForCachesOnly = append(waitForCachesOnly, k8sAPIGroupNetworkingV1Core) 274 } 275 276 ciliumResources := synced.AgentCRDResourceNames() 277 ciliumGroups := make([]string, 0, len(ciliumResources)) 278 for _, r := range ciliumResources { 279 groupInfo, ok := ciliumResourceToGroupMapping[r] 280 if !ok { 281 log.Fatalf("Unknown resource %s. Please update pkg/k8s/watchers to understand this type.", r) 282 } 283 switch groupInfo.kind { 284 case skip: 285 continue 286 case start: 287 ciliumGroups = append(ciliumGroups, groupInfo.group) 288 case waitOnly: 289 waitForCachesOnly = append(waitForCachesOnly, groupInfo.group) 290 } 291 } 292 293 return append(k8sGroups, ciliumGroups...), waitForCachesOnly 294 } 295 296 // InitK8sSubsystem takes a channel for which it will be closed when all 297 // caches essential for daemon are synchronized. 298 // It initializes the K8s subsystem and starts the watchers for the resources 299 // that the daemon is interested in. 300 // The cachesSynced channel is closed when all caches are synchronized. 301 // To be called after WaitForCRDsToRegister() so that all needed CRDs have 302 // already been registered. 303 func (k *K8sWatcher) InitK8sSubsystem(ctx context.Context, cachesSynced chan struct{}) { 304 resources, cachesOnly := k.resourceGroupsFn(k.cfg) 305 306 log.Info("Enabling k8s event listener") 307 k.enableK8sWatchers(ctx, resources) 308 close(k.k8sPodWatcher.controllersStarted) 309 310 go func() { 311 log.Info("Waiting until all pre-existing resources have been received") 312 allResources := append(resources, cachesOnly...) 313 if err := k.k8sResourceSynced.WaitForCacheSyncWithTimeout(option.Config.K8sSyncTimeout, allResources...); err != nil { 314 log.WithError(err).Fatal("Timed out waiting for pre-existing resources to be received; exiting") 315 } 316 close(cachesSynced) 317 }() 318 } 319 320 // WatcherConfiguration is the required configuration for enableK8sWatchers 321 type WatcherConfiguration interface { 322 // K8sNetworkPolicyEnabled returns true if cilium agent needs to support K8s NetworkPolicy 323 K8sNetworkPolicyEnabled() bool 324 } 325 326 // enableK8sWatchers starts watchers for given resources. 327 func (k *K8sWatcher) enableK8sWatchers(ctx context.Context, resourceNames []string) { 328 if !k.clientset.IsEnabled() { 329 log.Debug("Not enabling k8s event listener because k8s is not enabled") 330 return 331 } 332 asyncControllers := &sync.WaitGroup{} 333 334 for _, r := range resourceNames { 335 switch r { 336 // Core Cilium 337 case resources.K8sAPIGroupPodV1Core: 338 asyncControllers.Add(1) 339 go k.k8sPodWatcher.podsInit(asyncControllers) 340 case k8sAPIGroupNamespaceV1Core: 341 k.k8sNamespaceWatcher.namespacesInit() 342 case k8sAPIGroupCiliumNodeV2: 343 asyncControllers.Add(1) 344 go k.k8sCiliumNodeWatcher.ciliumNodeInit(ctx, asyncControllers) 345 case resources.K8sAPIGroupServiceV1Core: 346 k.k8sServiceWatcher.servicesInit() 347 case resources.K8sAPIGroupEndpointSliceOrEndpoint: 348 k.k8sEndpointsWatcher.endpointsInit() 349 case k8sAPIGroupCiliumEndpointV2: 350 k.k8sCiliumEndpointsWatcher.initCiliumEndpointOrSlices(ctx, asyncControllers) 351 case k8sAPIGroupCiliumEndpointSliceV2Alpha1: 352 // no-op; handled in k8sAPIGroupCiliumEndpointV2 353 case k8sAPIGroupCiliumLocalRedirectPolicyV2: 354 k.k8sCiliumLRPWatcher.ciliumLocalRedirectPolicyInit() 355 default: 356 log.WithFields(logrus.Fields{ 357 logfields.Resource: r, 358 }).Fatal("Not listening for Kubernetes resource updates for unhandled type") 359 } 360 } 361 362 asyncControllers.Wait() 363 } 364 365 func (k *K8sWatcher) StopWatcher() { 366 k.k8sNamespaceWatcher.stopWatcher() 367 k.k8sServiceWatcher.stopWatcher() 368 k.k8sEndpointsWatcher.stopWatcher() 369 k.k8sCiliumLRPWatcher.stopWatcher() 370 } 371 372 // K8sEventProcessed is called to do metrics accounting for each processed 373 // Kubernetes event 374 func (k *K8sWatcher) K8sEventProcessed(scope, action string, status bool) { 375 k.k8sEventReporter.K8sEventProcessed(scope, action, status) 376 } 377 378 // K8sEventReceived does metric accounting for each received Kubernetes event, as well 379 // as notifying of events for k8s resources synced. 380 func (k *K8sWatcher) K8sEventReceived(apiResourceName, scope, action string, valid, equal bool) { 381 k.k8sEventReporter.K8sEventReceived(apiResourceName, scope, action, valid, equal) 382 } 383 384 // GetCachedPod returns a pod from the local store. 385 func (k *K8sWatcher) GetCachedPod(namespace, name string) (*slim_corev1.Pod, error) { 386 return k.k8sPodWatcher.GetCachedPod(namespace, name) 387 } 388 389 // GetCachedNamespace returns a namespace from the local store. 390 func (k *K8sWatcher) GetCachedNamespace(namespace string) (*slim_corev1.Namespace, error) { 391 return k.k8sNamespaceWatcher.GetCachedNamespace(namespace) 392 } 393 394 func (k *K8sWatcher) RunK8sServiceHandler() { 395 k.k8sServiceWatcher.RunK8sServiceHandler() 396 }