k8s.io/kubernetes@v1.29.3/pkg/controller/servicecidrs/servicecidrs_controller.go (about) 1 /* 2 Copyright 2023 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package servicecidrs 18 19 import ( 20 "context" 21 "encoding/json" 22 "net/netip" 23 "sync" 24 "time" 25 26 v1 "k8s.io/api/core/v1" 27 networkingapiv1alpha1 "k8s.io/api/networking/v1alpha1" 28 apierrors "k8s.io/apimachinery/pkg/api/errors" 29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 "k8s.io/apimachinery/pkg/labels" 31 "k8s.io/apimachinery/pkg/types" 32 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 33 "k8s.io/apimachinery/pkg/util/sets" 34 "k8s.io/apimachinery/pkg/util/wait" 35 metav1apply "k8s.io/client-go/applyconfigurations/meta/v1" 36 networkingapiv1alpha1apply "k8s.io/client-go/applyconfigurations/networking/v1alpha1" 37 networkinginformers "k8s.io/client-go/informers/networking/v1alpha1" 38 clientset "k8s.io/client-go/kubernetes" 39 "k8s.io/client-go/kubernetes/scheme" 40 v1core "k8s.io/client-go/kubernetes/typed/core/v1" 41 networkinglisters "k8s.io/client-go/listers/networking/v1alpha1" 42 "k8s.io/client-go/tools/cache" 43 "k8s.io/client-go/tools/record" 44 "k8s.io/client-go/util/workqueue" 45 "k8s.io/klog/v2" 46 "k8s.io/kubernetes/pkg/registry/core/service/ipallocator" 47 "k8s.io/kubernetes/pkg/util/iptree" 48 netutils "k8s.io/utils/net" 49 ) 50 51 const ( 52 // maxRetries is the max number of times a service object will be retried before it is dropped out of the queue. 53 // With the current rate-limiter in use (5ms*2^(maxRetries-1)) the following numbers represent the 54 // sequence of delays between successive queuings of a service. 55 // 56 // 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, 10.2s, 20.4s, 41s, 82s 57 maxRetries = 15 58 controllerName = "service-cidr-controller" 59 60 ServiceCIDRProtectionFinalizer = "networking.k8s.io/service-cidr-finalizer" 61 62 // deletionGracePeriod is the time in seconds to wait to remove the finalizer from a ServiceCIDR to ensure the 63 // deletion informations has been propagated to the apiserver allocators to avoid allocating any IP address 64 // before we complete delete the ServiceCIDR 65 deletionGracePeriod = 10 * time.Second 66 ) 67 68 // NewController returns a new *Controller. 69 func NewController( 70 serviceCIDRInformer networkinginformers.ServiceCIDRInformer, 71 ipAddressInformer networkinginformers.IPAddressInformer, 72 client clientset.Interface, 73 ) *Controller { 74 broadcaster := record.NewBroadcaster() 75 recorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: controllerName}) 76 c := &Controller{ 77 client: client, 78 queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "ipaddresses"), 79 tree: iptree.New[sets.Set[string]](), 80 workerLoopPeriod: time.Second, 81 } 82 83 _, _ = serviceCIDRInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 84 AddFunc: c.addServiceCIDR, 85 UpdateFunc: c.updateServiceCIDR, 86 DeleteFunc: c.deleteServiceCIDR, 87 }) 88 c.serviceCIDRLister = serviceCIDRInformer.Lister() 89 c.serviceCIDRsSynced = serviceCIDRInformer.Informer().HasSynced 90 91 _, _ = ipAddressInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 92 AddFunc: c.addIPAddress, 93 DeleteFunc: c.deleteIPAddress, 94 }) 95 96 c.ipAddressLister = ipAddressInformer.Lister() 97 c.ipAddressSynced = ipAddressInformer.Informer().HasSynced 98 99 c.eventBroadcaster = broadcaster 100 c.eventRecorder = recorder 101 102 return c 103 } 104 105 // Controller manages selector-based service ipAddress. 106 type Controller struct { 107 client clientset.Interface 108 eventBroadcaster record.EventBroadcaster 109 eventRecorder record.EventRecorder 110 111 serviceCIDRLister networkinglisters.ServiceCIDRLister 112 serviceCIDRsSynced cache.InformerSynced 113 114 ipAddressLister networkinglisters.IPAddressLister 115 ipAddressSynced cache.InformerSynced 116 117 queue workqueue.RateLimitingInterface 118 119 // workerLoopPeriod is the time between worker runs. The workers process the queue of service and ipRange changes. 120 workerLoopPeriod time.Duration 121 122 // tree store the ServiceCIDRs names associated to each 123 muTree sync.Mutex 124 tree *iptree.Tree[sets.Set[string]] 125 } 126 127 // Run will not return until stopCh is closed. 128 func (c *Controller) Run(ctx context.Context, workers int) { 129 defer utilruntime.HandleCrash() 130 defer c.queue.ShutDown() 131 132 c.eventBroadcaster.StartStructuredLogging(0) 133 c.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: c.client.CoreV1().Events("")}) 134 defer c.eventBroadcaster.Shutdown() 135 136 logger := klog.FromContext(ctx) 137 138 logger.Info("Starting", "controller", controllerName) 139 defer logger.Info("Shutting down", "controller", controllerName) 140 141 if !cache.WaitForNamedCacheSync(controllerName, ctx.Done(), c.serviceCIDRsSynced, c.ipAddressSynced) { 142 return 143 } 144 145 for i := 0; i < workers; i++ { 146 go wait.UntilWithContext(ctx, c.worker, c.workerLoopPeriod) 147 } 148 <-ctx.Done() 149 } 150 151 func (c *Controller) addServiceCIDR(obj interface{}) { 152 cidr, ok := obj.(*networkingapiv1alpha1.ServiceCIDR) 153 if !ok { 154 return 155 } 156 c.queue.Add(cidr.Name) 157 for _, key := range c.overlappingServiceCIDRs(cidr) { 158 c.queue.Add(key) 159 } 160 } 161 162 func (c *Controller) updateServiceCIDR(oldObj, obj interface{}) { 163 key, err := cache.MetaNamespaceKeyFunc(obj) 164 if err == nil { 165 c.queue.Add(key) 166 } 167 } 168 169 // deleteServiceCIDR 170 func (c *Controller) deleteServiceCIDR(obj interface{}) { 171 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) 172 if err == nil { 173 c.queue.Add(key) 174 } 175 } 176 177 // addIPAddress may block a ServiceCIDR deletion 178 func (c *Controller) addIPAddress(obj interface{}) { 179 ip, ok := obj.(*networkingapiv1alpha1.IPAddress) 180 if !ok { 181 return 182 } 183 184 for _, cidr := range c.containingServiceCIDRs(ip) { 185 c.queue.Add(cidr) 186 } 187 } 188 189 // deleteIPAddress may unblock a ServiceCIDR deletion 190 func (c *Controller) deleteIPAddress(obj interface{}) { 191 ip, ok := obj.(*networkingapiv1alpha1.IPAddress) 192 if !ok { 193 tombstone, ok := obj.(cache.DeletedFinalStateUnknown) 194 if !ok { 195 return 196 } 197 ip, ok = tombstone.Obj.(*networkingapiv1alpha1.IPAddress) 198 if !ok { 199 return 200 } 201 } 202 203 for _, cidr := range c.containingServiceCIDRs(ip) { 204 c.queue.Add(cidr) 205 } 206 } 207 208 // overlappingServiceCIDRs, given a ServiceCIDR return the ServiceCIDRs that contain or are contained, 209 // this is required because adding or removing a CIDR will require to recompute the 210 // state of each ServiceCIDR to check if can be unblocked on deletion. 211 func (c *Controller) overlappingServiceCIDRs(serviceCIDR *networkingapiv1alpha1.ServiceCIDR) []string { 212 c.muTree.Lock() 213 defer c.muTree.Unlock() 214 215 serviceCIDRs := sets.New[string]() 216 for _, cidr := range serviceCIDR.Spec.CIDRs { 217 if prefix, err := netip.ParsePrefix(cidr); err == nil { // if is empty err will not be nil 218 c.tree.WalkPath(prefix, func(k netip.Prefix, v sets.Set[string]) bool { 219 serviceCIDRs.Insert(v.UnsortedList()...) 220 return false 221 }) 222 c.tree.WalkPrefix(prefix, func(k netip.Prefix, v sets.Set[string]) bool { 223 serviceCIDRs.Insert(v.UnsortedList()...) 224 return false 225 }) 226 } 227 } 228 229 return serviceCIDRs.UnsortedList() 230 } 231 232 // containingServiceCIDRs, given an IPAddress return the ServiceCIDRs that contains the IP, 233 // as it may block or be blocking the deletion of the ServiceCIDRs that contain it. 234 func (c *Controller) containingServiceCIDRs(ip *networkingapiv1alpha1.IPAddress) []string { 235 // only process IPs managed by the kube-apiserver 236 managedBy, ok := ip.Labels[networkingapiv1alpha1.LabelManagedBy] 237 if !ok || managedBy != ipallocator.ControllerName { 238 return []string{} 239 } 240 241 address, err := netip.ParseAddr(ip.Name) 242 if err != nil { 243 // This should not happen, the IPAddress object validates 244 // the name is a valid IPAddress 245 return []string{} 246 } 247 248 c.muTree.Lock() 249 defer c.muTree.Unlock() 250 serviceCIDRs := []string{} 251 // walk the tree to get all the ServiceCIDRs that contain this IP address 252 prefixes := c.tree.GetHostIPPrefixMatches(address) 253 for _, v := range prefixes { 254 serviceCIDRs = append(serviceCIDRs, v.UnsortedList()...) 255 } 256 257 return serviceCIDRs 258 } 259 260 func (c *Controller) worker(ctx context.Context) { 261 for c.processNext(ctx) { 262 } 263 } 264 265 func (c *Controller) processNext(ctx context.Context) bool { 266 eKey, quit := c.queue.Get() 267 if quit { 268 return false 269 } 270 defer c.queue.Done(eKey) 271 272 key := eKey.(string) 273 err := c.sync(ctx, key) 274 if err == nil { 275 c.queue.Forget(key) 276 return true 277 } 278 logger := klog.FromContext(ctx) 279 if c.queue.NumRequeues(key) < maxRetries { 280 logger.V(2).Info("Error syncing ServiceCIDR, retrying", "ServiceCIDR", key, "err", err) 281 c.queue.AddRateLimited(key) 282 } else { 283 logger.Info("Dropping ServiceCIDR out of the queue", "ServiceCIDR", key, "err", err) 284 c.queue.Forget(key) 285 utilruntime.HandleError(err) 286 } 287 return true 288 } 289 290 // syncCIDRs rebuilds the radix tree based from the informers cache 291 func (c *Controller) syncCIDRs() error { 292 serviceCIDRList, err := c.serviceCIDRLister.List(labels.Everything()) 293 if err != nil { 294 return err 295 } 296 297 // track the names of the different ServiceCIDRs, there 298 // can be multiple ServiceCIDRs sharing the same prefixes 299 // and this is important to determine if a ServiceCIDR can 300 // be deleted. 301 tree := iptree.New[sets.Set[string]]() 302 for _, serviceCIDR := range serviceCIDRList { 303 for _, cidr := range serviceCIDR.Spec.CIDRs { 304 if prefix, err := netip.ParsePrefix(cidr); err == nil { // if is empty err will not be nil 305 // if the prefix already exist append the new ServiceCIDR name 306 v, ok := tree.GetPrefix(prefix) 307 if !ok { 308 v = sets.Set[string]{} 309 } 310 v.Insert(serviceCIDR.Name) 311 tree.InsertPrefix(prefix, v) 312 } 313 } 314 } 315 316 c.muTree.Lock() 317 defer c.muTree.Unlock() 318 c.tree = tree 319 return nil 320 } 321 322 func (c *Controller) sync(ctx context.Context, key string) error { 323 logger := klog.FromContext(ctx) 324 startTime := time.Now() 325 defer func() { 326 logger.V(4).Info("Finished syncing ServiceCIDR)", "ServiceCIDR", key, "elapsed", time.Since(startTime)) 327 }() 328 329 // TODO(aojea) verify if this present a performance problem 330 // restore the radix tree from the current state 331 err := c.syncCIDRs() 332 if err != nil { 333 return err 334 } 335 336 logger.V(4).Info("syncing ServiceCIDR", "ServiceCIDR", key) 337 cidr, err := c.serviceCIDRLister.Get(key) 338 if err != nil { 339 if apierrors.IsNotFound(err) { 340 logger.V(4).Info("ServiceCIDR no longer exist", "ServiceCIDR", key) 341 return nil 342 } 343 return err 344 } 345 346 // Deleting .... 347 if !cidr.GetDeletionTimestamp().IsZero() { 348 // check if the existing ServiceCIDR can be deleted before removing the finalizer 349 ok, err := c.canDeleteCIDR(ctx, cidr) 350 if err != nil { 351 return err 352 } 353 if !ok { 354 // update the status to indicate why the ServiceCIDR can not be deleted, 355 // it will be reevaludated by an event on any ServiceCIDR or IPAddress related object 356 // that may remove this condition. 357 svcApplyStatus := networkingapiv1alpha1apply.ServiceCIDRStatus().WithConditions( 358 metav1apply.Condition(). 359 WithType(networkingapiv1alpha1.ServiceCIDRConditionReady). 360 WithStatus(metav1.ConditionFalse). 361 WithReason(networkingapiv1alpha1.ServiceCIDRReasonTerminating). 362 WithMessage("There are still IPAddresses referencing the ServiceCIDR, please remove them or create a new ServiceCIDR"). 363 WithLastTransitionTime(metav1.Now())) 364 svcApply := networkingapiv1alpha1apply.ServiceCIDR(cidr.Name).WithStatus(svcApplyStatus) 365 _, err = c.client.NetworkingV1alpha1().ServiceCIDRs().ApplyStatus(ctx, svcApply, metav1.ApplyOptions{FieldManager: controllerName, Force: true}) 366 return err 367 } 368 // If there are no IPAddress depending on this ServiceCIDR is safe to remove it, 369 // however, there can be a race when the allocators still consider the ServiceCIDR 370 // ready and allocate a new IPAddress from them, to avoid that, we wait during a 371 // a grace period to be sure the deletion change has been propagated to the allocators 372 // and no new IPAddress is going to be allocated. 373 timeUntilDeleted := deletionGracePeriod - time.Since(cidr.GetDeletionTimestamp().Time) 374 if timeUntilDeleted > 0 { 375 c.queue.AddAfter(key, timeUntilDeleted) 376 return nil 377 } 378 return c.removeServiceCIDRFinalizerIfNeeded(ctx, cidr) 379 } 380 381 // Created or Updated, the ServiceCIDR must have a finalizer. 382 err = c.addServiceCIDRFinalizerIfNeeded(ctx, cidr) 383 if err != nil { 384 return err 385 } 386 387 // Set Ready condition to True. 388 svcApplyStatus := networkingapiv1alpha1apply.ServiceCIDRStatus().WithConditions( 389 metav1apply.Condition(). 390 WithType(networkingapiv1alpha1.ServiceCIDRConditionReady). 391 WithStatus(metav1.ConditionTrue). 392 WithMessage("Kubernetes Service CIDR is ready"). 393 WithLastTransitionTime(metav1.Now())) 394 svcApply := networkingapiv1alpha1apply.ServiceCIDR(cidr.Name).WithStatus(svcApplyStatus) 395 if _, err := c.client.NetworkingV1alpha1().ServiceCIDRs().ApplyStatus(ctx, svcApply, metav1.ApplyOptions{FieldManager: controllerName, Force: true}); err != nil { 396 logger.Info("error updating default ServiceCIDR status", "error", err) 397 c.eventRecorder.Eventf(cidr, v1.EventTypeWarning, "KubernetesServiceCIDRError", "The ServiceCIDR Status can not be set to Ready=True") 398 return err 399 } 400 401 return nil 402 } 403 404 // canDeleteCIDR checks that the ServiceCIDR can be safely deleted and not leave orphan IPAddresses 405 func (c *Controller) canDeleteCIDR(ctx context.Context, serviceCIDR *networkingapiv1alpha1.ServiceCIDR) (bool, error) { 406 // TODO(aojea) Revisit the lock usage and if we need to keep it only for the tree operations 407 // to avoid holding it during the whole operation. 408 c.muTree.Lock() 409 defer c.muTree.Unlock() 410 logger := klog.FromContext(ctx) 411 // Check if there is a subnet that already contains the ServiceCIDR that is going to be deleted. 412 hasParent := true 413 for _, cidr := range serviceCIDR.Spec.CIDRs { 414 // Walk the tree to find if there is a larger subnet that contains the existing one, 415 // or there is another ServiceCIDR with the same subnet. 416 if prefix, err := netip.ParsePrefix(cidr); err == nil { 417 serviceCIDRs := sets.New[string]() 418 c.tree.WalkPath(prefix, func(k netip.Prefix, v sets.Set[string]) bool { 419 serviceCIDRs.Insert(v.UnsortedList()...) 420 return false 421 }) 422 if serviceCIDRs.Len() == 1 && serviceCIDRs.Has(serviceCIDR.Name) { 423 hasParent = false 424 } 425 } 426 } 427 428 // All the existing IP addresses will be contained on the parent ServiceCIDRs, 429 // it is safe to delete, remove the finalizer. 430 if hasParent { 431 logger.V(2).Info("Removing finalizer for ServiceCIDR", "ServiceCIDR", serviceCIDR.String()) 432 return true, nil 433 } 434 435 // TODO: optimize this 436 // Since current ServiceCIDR does not have another ServiceCIDR containing it, 437 // verify there are no existing IPAddresses referencing it that will be orphan. 438 for _, cidr := range serviceCIDR.Spec.CIDRs { 439 // get all the IPv4 addresses 440 ipLabelSelector := labels.Set(map[string]string{ 441 networkingapiv1alpha1.LabelIPAddressFamily: string(convertToV1IPFamily(netutils.IPFamilyOfCIDRString(cidr))), 442 networkingapiv1alpha1.LabelManagedBy: ipallocator.ControllerName, 443 }).AsSelectorPreValidated() 444 ips, err := c.ipAddressLister.List(ipLabelSelector) 445 if err != nil { 446 return false, err 447 } 448 for _, ip := range ips { 449 // if the longest prefix match is the ServiceCIDR to be deleted 450 // and is the only existing one, at least one IPAddress will be 451 // orphan, block the ServiceCIDR deletion. 452 address, err := netip.ParseAddr(ip.Name) 453 if err != nil { 454 // the IPAddress object validates that the name is a valid IPAddress 455 logger.Info("[SHOULD NOT HAPPEN] unexpected error parsing IPAddress", "IPAddress", ip.Name, "error", err) 456 continue 457 } 458 // walk the tree to find all ServiceCIDRs containing this IP 459 prefixes := c.tree.GetHostIPPrefixMatches(address) 460 if len(prefixes) != 1 { 461 continue 462 } 463 for _, v := range prefixes { 464 if v.Len() == 1 && v.Has(serviceCIDR.Name) { 465 return false, nil 466 } 467 } 468 } 469 } 470 471 // There are no IPAddresses that depend on the existing ServiceCIDR, so 472 // it is safe to delete, remove finalizer. 473 logger.Info("ServiceCIDR no longer have orphan IPs", "ServiceCDIR", serviceCIDR.String()) 474 return true, nil 475 } 476 477 func (c *Controller) addServiceCIDRFinalizerIfNeeded(ctx context.Context, cidr *networkingapiv1alpha1.ServiceCIDR) error { 478 for _, f := range cidr.GetFinalizers() { 479 if f == ServiceCIDRProtectionFinalizer { 480 return nil 481 } 482 } 483 484 patch := map[string]interface{}{ 485 "metadata": map[string]interface{}{ 486 "finalizers": []string{ServiceCIDRProtectionFinalizer}, 487 }, 488 } 489 patchBytes, err := json.Marshal(patch) 490 if err != nil { 491 return err 492 } 493 _, err = c.client.NetworkingV1alpha1().ServiceCIDRs().Patch(ctx, cidr.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}) 494 if err != nil && !apierrors.IsNotFound(err) { 495 return err 496 } 497 klog.FromContext(ctx).V(4).Info("Added protection finalizer to ServiceCIDR", "ServiceCIDR", cidr.Name) 498 return nil 499 500 } 501 502 func (c *Controller) removeServiceCIDRFinalizerIfNeeded(ctx context.Context, cidr *networkingapiv1alpha1.ServiceCIDR) error { 503 found := false 504 for _, f := range cidr.GetFinalizers() { 505 if f == ServiceCIDRProtectionFinalizer { 506 found = true 507 break 508 } 509 } 510 if !found { 511 return nil 512 } 513 patch := map[string]interface{}{ 514 "metadata": map[string]interface{}{ 515 "$deleteFromPrimitiveList/finalizers": []string{ServiceCIDRProtectionFinalizer}, 516 }, 517 } 518 patchBytes, err := json.Marshal(patch) 519 if err != nil { 520 return err 521 } 522 _, err = c.client.NetworkingV1alpha1().ServiceCIDRs().Patch(ctx, cidr.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}) 523 if err != nil && !apierrors.IsNotFound(err) { 524 return err 525 } 526 klog.FromContext(ctx).V(4).Info("Removed protection finalizer from ServiceCIDRs", "ServiceCIDR", cidr.Name) 527 return nil 528 } 529 530 // Convert netutils.IPFamily to v1.IPFamily 531 // TODO: consolidate helpers 532 // copied from pkg/proxy/util/utils.go 533 func convertToV1IPFamily(ipFamily netutils.IPFamily) v1.IPFamily { 534 switch ipFamily { 535 case netutils.IPv4: 536 return v1.IPv4Protocol 537 case netutils.IPv6: 538 return v1.IPv6Protocol 539 } 540 541 return v1.IPFamilyUnknown 542 }