k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/servicecidrs/servicecidrs_controller.go (about) 1 /* 2 Copyright 2023 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package servicecidrs 18 19 import ( 20 "context" 21 "encoding/json" 22 "net/netip" 23 "sync" 24 "time" 25 26 v1 "k8s.io/api/core/v1" 27 networkingapiv1alpha1 "k8s.io/api/networking/v1alpha1" 28 apierrors "k8s.io/apimachinery/pkg/api/errors" 29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 "k8s.io/apimachinery/pkg/labels" 31 "k8s.io/apimachinery/pkg/types" 32 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 33 "k8s.io/apimachinery/pkg/util/sets" 34 "k8s.io/apimachinery/pkg/util/wait" 35 metav1apply "k8s.io/client-go/applyconfigurations/meta/v1" 36 networkingapiv1alpha1apply "k8s.io/client-go/applyconfigurations/networking/v1alpha1" 37 networkinginformers "k8s.io/client-go/informers/networking/v1alpha1" 38 clientset "k8s.io/client-go/kubernetes" 39 "k8s.io/client-go/kubernetes/scheme" 40 v1core "k8s.io/client-go/kubernetes/typed/core/v1" 41 networkinglisters "k8s.io/client-go/listers/networking/v1alpha1" 42 "k8s.io/client-go/tools/cache" 43 "k8s.io/client-go/tools/record" 44 "k8s.io/client-go/util/workqueue" 45 "k8s.io/klog/v2" 46 "k8s.io/kubernetes/pkg/registry/core/service/ipallocator" 47 "k8s.io/kubernetes/pkg/util/iptree" 48 netutils "k8s.io/utils/net" 49 ) 50 51 const ( 52 // maxRetries is the max number of times a service object will be retried before it is dropped out of the queue. 53 // With the current rate-limiter in use (5ms*2^(maxRetries-1)) the following numbers represent the 54 // sequence of delays between successive queuings of a service. 55 // 56 // 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, 10.2s, 20.4s, 41s, 82s 57 maxRetries = 15 58 controllerName = "service-cidr-controller" 59 60 ServiceCIDRProtectionFinalizer = "networking.k8s.io/service-cidr-finalizer" 61 62 // deletionGracePeriod is the time in seconds to wait to remove the finalizer from a ServiceCIDR to ensure the 63 // deletion informations has been propagated to the apiserver allocators to avoid allocating any IP address 64 // before we complete delete the ServiceCIDR 65 deletionGracePeriod = 10 * time.Second 66 ) 67 68 // NewController returns a new *Controller. 69 func NewController( 70 ctx context.Context, 71 serviceCIDRInformer networkinginformers.ServiceCIDRInformer, 72 ipAddressInformer networkinginformers.IPAddressInformer, 73 client clientset.Interface, 74 ) *Controller { 75 broadcaster := record.NewBroadcaster(record.WithContext(ctx)) 76 recorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: controllerName}) 77 c := &Controller{ 78 client: client, 79 queue: workqueue.NewTypedRateLimitingQueueWithConfig( 80 workqueue.DefaultTypedControllerRateLimiter[string](), 81 workqueue.TypedRateLimitingQueueConfig[string]{Name: "ipaddresses"}, 82 ), 83 tree: iptree.New[sets.Set[string]](), 84 workerLoopPeriod: time.Second, 85 } 86 87 _, _ = serviceCIDRInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 88 AddFunc: c.addServiceCIDR, 89 UpdateFunc: c.updateServiceCIDR, 90 DeleteFunc: c.deleteServiceCIDR, 91 }) 92 c.serviceCIDRLister = serviceCIDRInformer.Lister() 93 c.serviceCIDRsSynced = serviceCIDRInformer.Informer().HasSynced 94 95 _, _ = ipAddressInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 96 AddFunc: c.addIPAddress, 97 DeleteFunc: c.deleteIPAddress, 98 }) 99 100 c.ipAddressLister = ipAddressInformer.Lister() 101 c.ipAddressSynced = ipAddressInformer.Informer().HasSynced 102 103 c.eventBroadcaster = broadcaster 104 c.eventRecorder = recorder 105 106 return c 107 } 108 109 // Controller manages selector-based service ipAddress. 110 type Controller struct { 111 client clientset.Interface 112 eventBroadcaster record.EventBroadcaster 113 eventRecorder record.EventRecorder 114 115 serviceCIDRLister networkinglisters.ServiceCIDRLister 116 serviceCIDRsSynced cache.InformerSynced 117 118 ipAddressLister networkinglisters.IPAddressLister 119 ipAddressSynced cache.InformerSynced 120 121 queue workqueue.TypedRateLimitingInterface[string] 122 123 // workerLoopPeriod is the time between worker runs. The workers process the queue of service and ipRange changes. 124 workerLoopPeriod time.Duration 125 126 // tree store the ServiceCIDRs names associated to each 127 muTree sync.Mutex 128 tree *iptree.Tree[sets.Set[string]] 129 } 130 131 // Run will not return until stopCh is closed. 132 func (c *Controller) Run(ctx context.Context, workers int) { 133 defer utilruntime.HandleCrash() 134 defer c.queue.ShutDown() 135 136 c.eventBroadcaster.StartStructuredLogging(3) 137 c.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: c.client.CoreV1().Events("")}) 138 defer c.eventBroadcaster.Shutdown() 139 140 logger := klog.FromContext(ctx) 141 142 logger.Info("Starting", "controller", controllerName) 143 defer logger.Info("Shutting down", "controller", controllerName) 144 145 if !cache.WaitForNamedCacheSync(controllerName, ctx.Done(), c.serviceCIDRsSynced, c.ipAddressSynced) { 146 return 147 } 148 149 for i := 0; i < workers; i++ { 150 go wait.UntilWithContext(ctx, c.worker, c.workerLoopPeriod) 151 } 152 <-ctx.Done() 153 } 154 155 func (c *Controller) addServiceCIDR(obj interface{}) { 156 cidr, ok := obj.(*networkingapiv1alpha1.ServiceCIDR) 157 if !ok { 158 return 159 } 160 c.queue.Add(cidr.Name) 161 for _, key := range c.overlappingServiceCIDRs(cidr) { 162 c.queue.Add(key) 163 } 164 } 165 166 func (c *Controller) updateServiceCIDR(oldObj, obj interface{}) { 167 key, err := cache.MetaNamespaceKeyFunc(obj) 168 if err == nil { 169 c.queue.Add(key) 170 } 171 } 172 173 // deleteServiceCIDR 174 func (c *Controller) deleteServiceCIDR(obj interface{}) { 175 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) 176 if err == nil { 177 c.queue.Add(key) 178 } 179 } 180 181 // addIPAddress may block a ServiceCIDR deletion 182 func (c *Controller) addIPAddress(obj interface{}) { 183 ip, ok := obj.(*networkingapiv1alpha1.IPAddress) 184 if !ok { 185 return 186 } 187 188 for _, cidr := range c.containingServiceCIDRs(ip) { 189 c.queue.Add(cidr) 190 } 191 } 192 193 // deleteIPAddress may unblock a ServiceCIDR deletion 194 func (c *Controller) deleteIPAddress(obj interface{}) { 195 ip, ok := obj.(*networkingapiv1alpha1.IPAddress) 196 if !ok { 197 tombstone, ok := obj.(cache.DeletedFinalStateUnknown) 198 if !ok { 199 return 200 } 201 ip, ok = tombstone.Obj.(*networkingapiv1alpha1.IPAddress) 202 if !ok { 203 return 204 } 205 } 206 207 for _, cidr := range c.containingServiceCIDRs(ip) { 208 c.queue.Add(cidr) 209 } 210 } 211 212 // overlappingServiceCIDRs, given a ServiceCIDR return the ServiceCIDRs that contain or are contained, 213 // this is required because adding or removing a CIDR will require to recompute the 214 // state of each ServiceCIDR to check if can be unblocked on deletion. 215 func (c *Controller) overlappingServiceCIDRs(serviceCIDR *networkingapiv1alpha1.ServiceCIDR) []string { 216 c.muTree.Lock() 217 defer c.muTree.Unlock() 218 219 serviceCIDRs := sets.New[string]() 220 for _, cidr := range serviceCIDR.Spec.CIDRs { 221 if prefix, err := netip.ParsePrefix(cidr); err == nil { // if is empty err will not be nil 222 c.tree.WalkPath(prefix, func(k netip.Prefix, v sets.Set[string]) bool { 223 serviceCIDRs.Insert(v.UnsortedList()...) 224 return false 225 }) 226 c.tree.WalkPrefix(prefix, func(k netip.Prefix, v sets.Set[string]) bool { 227 serviceCIDRs.Insert(v.UnsortedList()...) 228 return false 229 }) 230 } 231 } 232 233 return serviceCIDRs.UnsortedList() 234 } 235 236 // containingServiceCIDRs, given an IPAddress return the ServiceCIDRs that contains the IP, 237 // as it may block or be blocking the deletion of the ServiceCIDRs that contain it. 238 func (c *Controller) containingServiceCIDRs(ip *networkingapiv1alpha1.IPAddress) []string { 239 // only process IPs managed by the kube-apiserver 240 managedBy, ok := ip.Labels[networkingapiv1alpha1.LabelManagedBy] 241 if !ok || managedBy != ipallocator.ControllerName { 242 return []string{} 243 } 244 245 address, err := netip.ParseAddr(ip.Name) 246 if err != nil { 247 // This should not happen, the IPAddress object validates 248 // the name is a valid IPAddress 249 return []string{} 250 } 251 252 c.muTree.Lock() 253 defer c.muTree.Unlock() 254 serviceCIDRs := []string{} 255 // walk the tree to get all the ServiceCIDRs that contain this IP address 256 prefixes := c.tree.GetHostIPPrefixMatches(address) 257 for _, v := range prefixes { 258 serviceCIDRs = append(serviceCIDRs, v.UnsortedList()...) 259 } 260 261 return serviceCIDRs 262 } 263 264 func (c *Controller) worker(ctx context.Context) { 265 for c.processNext(ctx) { 266 } 267 } 268 269 func (c *Controller) processNext(ctx context.Context) bool { 270 key, quit := c.queue.Get() 271 if quit { 272 return false 273 } 274 defer c.queue.Done(key) 275 276 err := c.sync(ctx, key) 277 if err == nil { 278 c.queue.Forget(key) 279 return true 280 } 281 logger := klog.FromContext(ctx) 282 if c.queue.NumRequeues(key) < maxRetries { 283 logger.V(2).Info("Error syncing ServiceCIDR, retrying", "ServiceCIDR", key, "err", err) 284 c.queue.AddRateLimited(key) 285 } else { 286 logger.Info("Dropping ServiceCIDR out of the queue", "ServiceCIDR", key, "err", err) 287 c.queue.Forget(key) 288 utilruntime.HandleError(err) 289 } 290 return true 291 } 292 293 // syncCIDRs rebuilds the radix tree based from the informers cache 294 func (c *Controller) syncCIDRs() error { 295 serviceCIDRList, err := c.serviceCIDRLister.List(labels.Everything()) 296 if err != nil { 297 return err 298 } 299 300 // track the names of the different ServiceCIDRs, there 301 // can be multiple ServiceCIDRs sharing the same prefixes 302 // and this is important to determine if a ServiceCIDR can 303 // be deleted. 304 tree := iptree.New[sets.Set[string]]() 305 for _, serviceCIDR := range serviceCIDRList { 306 for _, cidr := range serviceCIDR.Spec.CIDRs { 307 if prefix, err := netip.ParsePrefix(cidr); err == nil { // if is empty err will not be nil 308 // if the prefix already exist append the new ServiceCIDR name 309 v, ok := tree.GetPrefix(prefix) 310 if !ok { 311 v = sets.Set[string]{} 312 } 313 v.Insert(serviceCIDR.Name) 314 tree.InsertPrefix(prefix, v) 315 } 316 } 317 } 318 319 c.muTree.Lock() 320 defer c.muTree.Unlock() 321 c.tree = tree 322 return nil 323 } 324 325 func (c *Controller) sync(ctx context.Context, key string) error { 326 logger := klog.FromContext(ctx) 327 startTime := time.Now() 328 defer func() { 329 logger.V(4).Info("Finished syncing ServiceCIDR)", "ServiceCIDR", key, "elapsed", time.Since(startTime)) 330 }() 331 332 // TODO(aojea) verify if this present a performance problem 333 // restore the radix tree from the current state 334 err := c.syncCIDRs() 335 if err != nil { 336 return err 337 } 338 339 logger.V(4).Info("syncing ServiceCIDR", "ServiceCIDR", key) 340 cidr, err := c.serviceCIDRLister.Get(key) 341 if err != nil { 342 if apierrors.IsNotFound(err) { 343 logger.V(4).Info("ServiceCIDR no longer exist", "ServiceCIDR", key) 344 return nil 345 } 346 return err 347 } 348 349 // Deleting .... 350 if !cidr.GetDeletionTimestamp().IsZero() { 351 // check if the existing ServiceCIDR can be deleted before removing the finalizer 352 ok, err := c.canDeleteCIDR(ctx, cidr) 353 if err != nil { 354 return err 355 } 356 if !ok { 357 // update the status to indicate why the ServiceCIDR can not be deleted, 358 // it will be reevaludated by an event on any ServiceCIDR or IPAddress related object 359 // that may remove this condition. 360 svcApplyStatus := networkingapiv1alpha1apply.ServiceCIDRStatus().WithConditions( 361 metav1apply.Condition(). 362 WithType(networkingapiv1alpha1.ServiceCIDRConditionReady). 363 WithStatus(metav1.ConditionFalse). 364 WithReason(networkingapiv1alpha1.ServiceCIDRReasonTerminating). 365 WithMessage("There are still IPAddresses referencing the ServiceCIDR, please remove them or create a new ServiceCIDR"). 366 WithLastTransitionTime(metav1.Now())) 367 svcApply := networkingapiv1alpha1apply.ServiceCIDR(cidr.Name).WithStatus(svcApplyStatus) 368 _, err = c.client.NetworkingV1alpha1().ServiceCIDRs().ApplyStatus(ctx, svcApply, metav1.ApplyOptions{FieldManager: controllerName, Force: true}) 369 return err 370 } 371 // If there are no IPAddress depending on this ServiceCIDR is safe to remove it, 372 // however, there can be a race when the allocators still consider the ServiceCIDR 373 // ready and allocate a new IPAddress from them, to avoid that, we wait during a 374 // a grace period to be sure the deletion change has been propagated to the allocators 375 // and no new IPAddress is going to be allocated. 376 timeUntilDeleted := deletionGracePeriod - time.Since(cidr.GetDeletionTimestamp().Time) 377 if timeUntilDeleted > 0 { 378 c.queue.AddAfter(key, timeUntilDeleted) 379 return nil 380 } 381 return c.removeServiceCIDRFinalizerIfNeeded(ctx, cidr) 382 } 383 384 // Created or Updated, the ServiceCIDR must have a finalizer. 385 err = c.addServiceCIDRFinalizerIfNeeded(ctx, cidr) 386 if err != nil { 387 return err 388 } 389 390 // Set Ready condition to True. 391 svcApplyStatus := networkingapiv1alpha1apply.ServiceCIDRStatus().WithConditions( 392 metav1apply.Condition(). 393 WithType(networkingapiv1alpha1.ServiceCIDRConditionReady). 394 WithStatus(metav1.ConditionTrue). 395 WithMessage("Kubernetes Service CIDR is ready"). 396 WithLastTransitionTime(metav1.Now())) 397 svcApply := networkingapiv1alpha1apply.ServiceCIDR(cidr.Name).WithStatus(svcApplyStatus) 398 if _, err := c.client.NetworkingV1alpha1().ServiceCIDRs().ApplyStatus(ctx, svcApply, metav1.ApplyOptions{FieldManager: controllerName, Force: true}); err != nil { 399 logger.Info("error updating default ServiceCIDR status", "error", err) 400 c.eventRecorder.Eventf(cidr, v1.EventTypeWarning, "KubernetesServiceCIDRError", "The ServiceCIDR Status can not be set to Ready=True") 401 return err 402 } 403 404 return nil 405 } 406 407 // canDeleteCIDR checks that the ServiceCIDR can be safely deleted and not leave orphan IPAddresses 408 func (c *Controller) canDeleteCIDR(ctx context.Context, serviceCIDR *networkingapiv1alpha1.ServiceCIDR) (bool, error) { 409 // TODO(aojea) Revisit the lock usage and if we need to keep it only for the tree operations 410 // to avoid holding it during the whole operation. 411 c.muTree.Lock() 412 defer c.muTree.Unlock() 413 logger := klog.FromContext(ctx) 414 // Check if there is a subnet that already contains the ServiceCIDR that is going to be deleted. 415 hasParent := true 416 for _, cidr := range serviceCIDR.Spec.CIDRs { 417 // Walk the tree to find if there is a larger subnet that contains the existing one, 418 // or there is another ServiceCIDR with the same subnet. 419 if prefix, err := netip.ParsePrefix(cidr); err == nil { 420 serviceCIDRs := sets.New[string]() 421 c.tree.WalkPath(prefix, func(k netip.Prefix, v sets.Set[string]) bool { 422 serviceCIDRs.Insert(v.UnsortedList()...) 423 return false 424 }) 425 if serviceCIDRs.Len() == 1 && serviceCIDRs.Has(serviceCIDR.Name) { 426 hasParent = false 427 } 428 } 429 } 430 431 // All the existing IP addresses will be contained on the parent ServiceCIDRs, 432 // it is safe to delete, remove the finalizer. 433 if hasParent { 434 logger.V(2).Info("Removing finalizer for ServiceCIDR", "ServiceCIDR", serviceCIDR.String()) 435 return true, nil 436 } 437 438 // TODO: optimize this 439 // Since current ServiceCIDR does not have another ServiceCIDR containing it, 440 // verify there are no existing IPAddresses referencing it that will be orphan. 441 for _, cidr := range serviceCIDR.Spec.CIDRs { 442 // get all the IPv4 addresses 443 ipLabelSelector := labels.Set(map[string]string{ 444 networkingapiv1alpha1.LabelIPAddressFamily: string(convertToV1IPFamily(netutils.IPFamilyOfCIDRString(cidr))), 445 networkingapiv1alpha1.LabelManagedBy: ipallocator.ControllerName, 446 }).AsSelectorPreValidated() 447 ips, err := c.ipAddressLister.List(ipLabelSelector) 448 if err != nil { 449 return false, err 450 } 451 for _, ip := range ips { 452 // if the longest prefix match is the ServiceCIDR to be deleted 453 // and is the only existing one, at least one IPAddress will be 454 // orphan, block the ServiceCIDR deletion. 455 address, err := netip.ParseAddr(ip.Name) 456 if err != nil { 457 // the IPAddress object validates that the name is a valid IPAddress 458 logger.Info("[SHOULD NOT HAPPEN] unexpected error parsing IPAddress", "IPAddress", ip.Name, "error", err) 459 continue 460 } 461 // walk the tree to find all ServiceCIDRs containing this IP 462 prefixes := c.tree.GetHostIPPrefixMatches(address) 463 if len(prefixes) != 1 { 464 continue 465 } 466 for _, v := range prefixes { 467 if v.Len() == 1 && v.Has(serviceCIDR.Name) { 468 return false, nil 469 } 470 } 471 } 472 } 473 474 // There are no IPAddresses that depend on the existing ServiceCIDR, so 475 // it is safe to delete, remove finalizer. 476 logger.Info("ServiceCIDR no longer have orphan IPs", "ServiceCDIR", serviceCIDR.String()) 477 return true, nil 478 } 479 480 func (c *Controller) addServiceCIDRFinalizerIfNeeded(ctx context.Context, cidr *networkingapiv1alpha1.ServiceCIDR) error { 481 for _, f := range cidr.GetFinalizers() { 482 if f == ServiceCIDRProtectionFinalizer { 483 return nil 484 } 485 } 486 487 patch := map[string]interface{}{ 488 "metadata": map[string]interface{}{ 489 "finalizers": []string{ServiceCIDRProtectionFinalizer}, 490 }, 491 } 492 patchBytes, err := json.Marshal(patch) 493 if err != nil { 494 return err 495 } 496 _, err = c.client.NetworkingV1alpha1().ServiceCIDRs().Patch(ctx, cidr.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}) 497 if err != nil && !apierrors.IsNotFound(err) { 498 return err 499 } 500 klog.FromContext(ctx).V(4).Info("Added protection finalizer to ServiceCIDR", "ServiceCIDR", cidr.Name) 501 return nil 502 503 } 504 505 func (c *Controller) removeServiceCIDRFinalizerIfNeeded(ctx context.Context, cidr *networkingapiv1alpha1.ServiceCIDR) error { 506 found := false 507 for _, f := range cidr.GetFinalizers() { 508 if f == ServiceCIDRProtectionFinalizer { 509 found = true 510 break 511 } 512 } 513 if !found { 514 return nil 515 } 516 patch := map[string]interface{}{ 517 "metadata": map[string]interface{}{ 518 "$deleteFromPrimitiveList/finalizers": []string{ServiceCIDRProtectionFinalizer}, 519 }, 520 } 521 patchBytes, err := json.Marshal(patch) 522 if err != nil { 523 return err 524 } 525 _, err = c.client.NetworkingV1alpha1().ServiceCIDRs().Patch(ctx, cidr.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}) 526 if err != nil && !apierrors.IsNotFound(err) { 527 return err 528 } 529 klog.FromContext(ctx).V(4).Info("Removed protection finalizer from ServiceCIDRs", "ServiceCIDR", cidr.Name) 530 return nil 531 } 532 533 // Convert netutils.IPFamily to v1.IPFamily 534 // TODO: consolidate helpers 535 // copied from pkg/proxy/util/utils.go 536 func convertToV1IPFamily(ipFamily netutils.IPFamily) v1.IPFamily { 537 switch ipFamily { 538 case netutils.IPv4: 539 return v1.IPv4Protocol 540 case netutils.IPv6: 541 return v1.IPv6Protocol 542 } 543 544 return v1.IPFamilyUnknown 545 }