k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/registry/core/service/ipallocator/controller/repairip.go (about) 1 /* 2 Copyright 2023 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package controller 18 19 import ( 20 "context" 21 "fmt" 22 "net" 23 "net/netip" 24 "sync" 25 "time" 26 27 v1 "k8s.io/api/core/v1" 28 networkingv1alpha1 "k8s.io/api/networking/v1alpha1" 29 apierrors "k8s.io/apimachinery/pkg/api/errors" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 "k8s.io/apimachinery/pkg/labels" 32 "k8s.io/apimachinery/pkg/util/runtime" 33 "k8s.io/apimachinery/pkg/util/wait" 34 coreinformers "k8s.io/client-go/informers/core/v1" 35 networkinginformers "k8s.io/client-go/informers/networking/v1alpha1" 36 "k8s.io/client-go/kubernetes" 37 corelisters "k8s.io/client-go/listers/core/v1" 38 networkinglisters "k8s.io/client-go/listers/networking/v1alpha1" 39 "k8s.io/client-go/tools/cache" 40 "k8s.io/client-go/tools/events" 41 "k8s.io/client-go/util/retry" 42 "k8s.io/client-go/util/workqueue" 43 "k8s.io/klog/v2" 44 "k8s.io/kubernetes/pkg/api/legacyscheme" 45 "k8s.io/kubernetes/pkg/apis/core/v1/helper" 46 "k8s.io/kubernetes/pkg/registry/core/service/ipallocator" 47 "k8s.io/kubernetes/pkg/util/iptree" 48 "k8s.io/utils/clock" 49 netutils "k8s.io/utils/net" 50 ) 51 52 const ( 53 // maxRetries is the number of times a service will be retried before it is dropped out of the queue. 54 // With the current rate-limiter in use (5ms*2^(maxRetries-1)) the following numbers represent the 55 // sequence of delays between successive queuings of a service. 56 // 57 // 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, 10.2s, 20.4s, 41s, 82s 58 maxRetries = 15 59 workers = 5 60 ) 61 62 // Repair is a controller loop that examines all service ClusterIP allocations and logs any errors, 63 // and then creates the accurate list of IPAddresses objects with all allocated ClusterIPs. 64 // 65 // Handles: 66 // * Duplicate ClusterIP assignments caused by operator action or undetected race conditions 67 // * Allocations to services that were not actually created due to a crash or powerloss 68 // * Migrates old versions of Kubernetes services into the new ipallocator automatically 69 // creating the corresponding IPAddress objects 70 // * IPAddress objects with wrong references or labels 71 // 72 // Logs about: 73 // * ClusterIPs that do not match the currently configured range 74 // 75 // There is a one-to-one relation between Service ClusterIPs and IPAddresses. 76 // The bidirectional relation is achieved using the following fields: 77 // Service.Spec.Cluster == IPAddress.Name AND IPAddress.ParentRef == Service 78 // 79 // The controller use two reconcile loops, one for Services and other for IPAddress. 80 // The Service reconcile loop verifies the bidirectional relation exists and is correct. 81 // 1. Service_X [ClusterIP_X] <------> IPAddress_X [Ref:Service_X] ok 82 // 2. Service_Y [ClusterIP_Y] <------> IPAddress_Y [Ref:GatewayA] !ok, wrong reference 83 // 3. Service_Z [ClusterIP_Z] <------> !ok, missing IPAddress 84 // 4. Service_A [ClusterIP_A] <------> IPAddress_A [Ref:Service_B] !ok, duplicate IPAddress 85 // Service_B [ClusterIP_A] <------> only one service can verify the relation 86 // The IPAddress reconcile loop checks there are no orphan IPAddresses, the rest of the 87 // cases are covered by the Services loop 88 // 1. <------> IPAddress_Z [Ref:Service_C] !ok, orphan IPAddress 89 90 type RepairIPAddress struct { 91 client kubernetes.Interface 92 interval time.Duration 93 94 serviceLister corelisters.ServiceLister 95 servicesSynced cache.InformerSynced 96 97 serviceCIDRLister networkinglisters.ServiceCIDRLister 98 serviceCIDRSynced cache.InformerSynced 99 100 ipAddressLister networkinglisters.IPAddressLister 101 ipAddressSynced cache.InformerSynced 102 103 cidrQueue workqueue.TypedRateLimitingInterface[string] 104 svcQueue workqueue.TypedRateLimitingInterface[string] 105 ipQueue workqueue.TypedRateLimitingInterface[string] 106 workerLoopPeriod time.Duration 107 108 muTree sync.Mutex 109 tree *iptree.Tree[string] 110 111 broadcaster events.EventBroadcaster 112 recorder events.EventRecorder 113 clock clock.Clock 114 } 115 116 // NewRepair creates a controller that periodically ensures that all clusterIPs are uniquely allocated across the cluster 117 // and generates informational warnings for a cluster that is not in sync. 118 func NewRepairIPAddress(interval time.Duration, 119 client kubernetes.Interface, 120 serviceInformer coreinformers.ServiceInformer, 121 serviceCIDRInformer networkinginformers.ServiceCIDRInformer, 122 ipAddressInformer networkinginformers.IPAddressInformer) *RepairIPAddress { 123 eventBroadcaster := events.NewBroadcaster(&events.EventSinkImpl{Interface: client.EventsV1()}) 124 recorder := eventBroadcaster.NewRecorder(legacyscheme.Scheme, "ipallocator-repair-controller") 125 126 r := &RepairIPAddress{ 127 interval: interval, 128 client: client, 129 serviceLister: serviceInformer.Lister(), 130 servicesSynced: serviceInformer.Informer().HasSynced, 131 serviceCIDRLister: serviceCIDRInformer.Lister(), 132 serviceCIDRSynced: serviceCIDRInformer.Informer().HasSynced, 133 ipAddressLister: ipAddressInformer.Lister(), 134 ipAddressSynced: ipAddressInformer.Informer().HasSynced, 135 cidrQueue: workqueue.NewTypedRateLimitingQueueWithConfig( 136 workqueue.DefaultTypedControllerRateLimiter[string](), 137 workqueue.TypedRateLimitingQueueConfig[string]{Name: "servicecidrs"}, 138 ), 139 svcQueue: workqueue.NewTypedRateLimitingQueueWithConfig( 140 workqueue.DefaultTypedControllerRateLimiter[string](), 141 workqueue.TypedRateLimitingQueueConfig[string]{Name: "services"}, 142 ), 143 ipQueue: workqueue.NewTypedRateLimitingQueueWithConfig( 144 workqueue.DefaultTypedControllerRateLimiter[string](), 145 workqueue.TypedRateLimitingQueueConfig[string]{Name: "ipaddresses"}, 146 ), 147 tree: iptree.New[string](), 148 workerLoopPeriod: time.Second, 149 broadcaster: eventBroadcaster, 150 recorder: recorder, 151 clock: clock.RealClock{}, 152 } 153 154 _, _ = serviceInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{ 155 AddFunc: func(obj interface{}) { 156 key, err := cache.MetaNamespaceKeyFunc(obj) 157 if err == nil { 158 r.svcQueue.Add(key) 159 } 160 }, 161 UpdateFunc: func(old interface{}, new interface{}) { 162 key, err := cache.MetaNamespaceKeyFunc(new) 163 if err == nil { 164 r.svcQueue.Add(key) 165 } 166 }, 167 DeleteFunc: func(obj interface{}) { 168 // IndexerInformer uses a delta queue, therefore for deletes we have to use this 169 // key function. 170 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) 171 if err == nil { 172 r.svcQueue.Add(key) 173 } 174 }, 175 }, interval) 176 177 _, _ = serviceCIDRInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 178 AddFunc: func(obj interface{}) { 179 key, err := cache.MetaNamespaceKeyFunc(obj) 180 if err == nil { 181 r.cidrQueue.Add(key) 182 } 183 }, 184 UpdateFunc: func(old interface{}, new interface{}) { 185 key, err := cache.MetaNamespaceKeyFunc(new) 186 if err == nil { 187 r.cidrQueue.Add(key) 188 } 189 }, 190 DeleteFunc: func(obj interface{}) { 191 // IndexerInformer uses a delta queue, therefore for deletes we have to use this 192 // key function. 193 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) 194 if err == nil { 195 r.cidrQueue.Add(key) 196 } 197 }, 198 }) 199 200 ipAddressInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{ 201 AddFunc: func(obj interface{}) { 202 key, err := cache.MetaNamespaceKeyFunc(obj) 203 if err == nil { 204 r.ipQueue.Add(key) 205 } 206 }, 207 UpdateFunc: func(old interface{}, new interface{}) { 208 key, err := cache.MetaNamespaceKeyFunc(new) 209 if err == nil { 210 r.ipQueue.Add(key) 211 } 212 }, 213 DeleteFunc: func(obj interface{}) { 214 // IndexerInformer uses a delta queue, therefore for deletes we have to use this 215 // key function. 216 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) 217 if err == nil { 218 r.ipQueue.Add(key) 219 } 220 }, 221 }, interval) 222 223 return r 224 } 225 226 // RunUntil starts the controller until the provided ch is closed. 227 func (r *RepairIPAddress) RunUntil(onFirstSuccess func(), stopCh chan struct{}) { 228 defer r.cidrQueue.ShutDown() 229 defer r.ipQueue.ShutDown() 230 defer r.svcQueue.ShutDown() 231 r.broadcaster.StartRecordingToSink(stopCh) 232 defer r.broadcaster.Shutdown() 233 234 klog.Info("Starting ipallocator-repair-controller") 235 defer klog.Info("Shutting down ipallocator-repair-controller") 236 237 if !cache.WaitForNamedCacheSync("ipallocator-repair-controller", stopCh, r.ipAddressSynced, r.servicesSynced, r.serviceCIDRSynced) { 238 return 239 } 240 241 // First sync goes through all the Services and IPAddresses in the cache, 242 // once synced, it signals the main loop and works using the handlers, since 243 // it's less expensive and more optimal. 244 if err := r.runOnce(); err != nil { 245 runtime.HandleError(err) 246 return 247 } 248 onFirstSuccess() 249 250 // serialize the operations on ServiceCIDRs 251 go wait.Until(r.cidrWorker, r.workerLoopPeriod, stopCh) 252 253 for i := 0; i < workers; i++ { 254 go wait.Until(r.ipWorker, r.workerLoopPeriod, stopCh) 255 go wait.Until(r.svcWorker, r.workerLoopPeriod, stopCh) 256 } 257 258 <-stopCh 259 } 260 261 // runOnce verifies the state of the ClusterIP allocations and returns an error if an unrecoverable problem occurs. 262 func (r *RepairIPAddress) runOnce() error { 263 return retry.RetryOnConflict(retry.DefaultBackoff, r.doRunOnce) 264 } 265 266 // doRunOnce verifies the state of the ClusterIP allocations and returns an error if an unrecoverable problem occurs. 267 func (r *RepairIPAddress) doRunOnce() error { 268 services, err := r.serviceLister.List(labels.Everything()) 269 if err != nil { 270 return fmt.Errorf("unable to refresh the service IP block: %v", err) 271 } 272 273 // Check every Service's ClusterIP, and rebuild the state as we think it should be. 274 for _, svc := range services { 275 key, err := cache.MetaNamespaceKeyFunc(svc) 276 if err != nil { 277 return err 278 } 279 err = r.syncService(key) 280 if err != nil { 281 return err 282 } 283 } 284 285 // We have checked that every Service has its corresponding IP. 286 // Check that there is no IP created by the allocator without 287 // a Service associated. 288 ipLabelSelector := labels.Set(map[string]string{ 289 networkingv1alpha1.LabelManagedBy: ipallocator.ControllerName, 290 }).AsSelectorPreValidated() 291 ipAddresses, err := r.ipAddressLister.List(ipLabelSelector) 292 if err != nil { 293 return fmt.Errorf("unable to refresh the IPAddress block: %v", err) 294 } 295 // Check every IPAddress matches the corresponding Service, and rebuild the state as we think it should be. 296 for _, ipAddress := range ipAddresses { 297 key, err := cache.MetaNamespaceKeyFunc(ipAddress) 298 if err != nil { 299 return err 300 } 301 err = r.syncIPAddress(key) 302 if err != nil { 303 return err 304 } 305 } 306 307 return nil 308 } 309 310 func (r *RepairIPAddress) svcWorker() { 311 for r.processNextWorkSvc() { 312 } 313 } 314 315 func (r *RepairIPAddress) processNextWorkSvc() bool { 316 eKey, quit := r.svcQueue.Get() 317 if quit { 318 return false 319 } 320 defer r.svcQueue.Done(eKey) 321 322 err := r.syncService(eKey) 323 r.handleSvcErr(err, eKey) 324 325 return true 326 } 327 328 func (r *RepairIPAddress) handleSvcErr(err error, key string) { 329 if err == nil { 330 r.svcQueue.Forget(key) 331 return 332 } 333 334 if r.svcQueue.NumRequeues(key) < maxRetries { 335 klog.V(2).InfoS("Error syncing Service, retrying", "service", key, "err", err) 336 r.svcQueue.AddRateLimited(key) 337 return 338 } 339 340 klog.Warningf("Dropping Service %q out of the queue: %v", key, err) 341 r.svcQueue.Forget(key) 342 runtime.HandleError(err) 343 } 344 345 // syncServices reconcile the Service ClusterIPs to verify that each one has the corresponding IPAddress object associated 346 func (r *RepairIPAddress) syncService(key string) error { 347 var syncError error 348 namespace, name, err := cache.SplitMetaNamespaceKey(key) 349 if err != nil { 350 return err 351 } 352 svc, err := r.serviceLister.Services(namespace).Get(name) 353 if err != nil { 354 // nothing to do 355 return nil 356 } 357 if !helper.IsServiceIPSet(svc) { 358 // didn't need a ClusterIP 359 return nil 360 } 361 362 for _, clusterIP := range svc.Spec.ClusterIPs { 363 ip := netutils.ParseIPSloppy(clusterIP) 364 if ip == nil { 365 // ClusterIP is corrupt, ClusterIPs are already validated, but double checking here 366 // in case there are some inconsistencies with the parsers 367 r.recorder.Eventf(svc, nil, v1.EventTypeWarning, "ClusterIPNotValid", "ClusterIPValidation", "Cluster IP %s is not a valid IP; please recreate Service", ip) 368 runtime.HandleError(fmt.Errorf("the ClusterIP %s for Service %s/%s is not a valid IP; please recreate Service", ip, svc.Namespace, svc.Name)) 369 continue 370 } 371 // TODO(aojea) Refactor to abstract the IPs checks 372 family := getFamilyByIP(ip) 373 374 r.muTree.Lock() 375 prefixes := r.tree.GetHostIPPrefixMatches(ipToAddr(ip)) 376 r.muTree.Unlock() 377 if len(prefixes) == 0 { 378 // ClusterIP is out of range 379 r.recorder.Eventf(svc, nil, v1.EventTypeWarning, "ClusterIPOutOfRange", "ClusterIPAllocation", "Cluster IP [%v]: %s is not within any configured Service CIDR; please recreate service", family, ip) 380 runtime.HandleError(fmt.Errorf("the ClusterIP [%v]: %s for Service %s/%s is not within any service CIDR; please recreate", family, ip, svc.Namespace, svc.Name)) 381 continue 382 } 383 384 // Get the IPAddress object associated to the ClusterIP 385 ipAddress, err := r.ipAddressLister.Get(ip.String()) 386 if apierrors.IsNotFound(err) { 387 // ClusterIP doesn't seem to be allocated, create it. 388 r.recorder.Eventf(svc, nil, v1.EventTypeWarning, "ClusterIPNotAllocated", "ClusterIPAllocation", "Cluster IP [%v]: %s is not allocated; repairing", family, ip) 389 runtime.HandleError(fmt.Errorf("the ClusterIP [%v]: %s for Service %s/%s is not allocated; repairing", family, ip, svc.Namespace, svc.Name)) 390 _, err := r.client.NetworkingV1alpha1().IPAddresses().Create(context.Background(), newIPAddress(ip.String(), svc), metav1.CreateOptions{}) 391 if err != nil { 392 return err 393 } 394 continue 395 } 396 if err != nil { 397 r.recorder.Eventf(svc, nil, v1.EventTypeWarning, "UnknownError", "ClusterIPAllocation", "Unable to allocate ClusterIP [%v]: %s due to an unknown error", family, ip) 398 return fmt.Errorf("unable to allocate ClusterIP [%v]: %s for Service %s/%s due to an unknown error, will retry later: %v", family, ip, svc.Namespace, svc.Name, err) 399 } 400 401 // IPAddress that belongs to a Service must reference a Service 402 if ipAddress.Spec.ParentRef.Group != "" || 403 ipAddress.Spec.ParentRef.Resource != "services" { 404 r.recorder.Eventf(svc, nil, v1.EventTypeWarning, "ClusterIPNotAllocated", "ClusterIPAllocation", "the ClusterIP [%v]: %s for Service %s/%s has a wrong reference; repairing", family, ip, svc.Namespace, svc.Name) 405 if err := r.recreateIPAddress(ipAddress.Name, svc); err != nil { 406 return err 407 } 408 continue 409 } 410 411 // IPAddress that belongs to a Service must reference the current Service 412 if ipAddress.Spec.ParentRef.Namespace != svc.Namespace || 413 ipAddress.Spec.ParentRef.Name != svc.Name { 414 // verify that there are no two Services with the same IP, otherwise 415 // it will keep deleting and recreating the same IPAddress changing the reference 416 refService, err := r.serviceLister.Services(ipAddress.Spec.ParentRef.Namespace).Get(ipAddress.Spec.ParentRef.Name) 417 if err != nil { 418 r.recorder.Eventf(svc, nil, v1.EventTypeWarning, "ClusterIPNotAllocated", "ClusterIPAllocation", "the ClusterIP [%v]: %s for Service %s/%s has a wrong reference; repairing", family, ip, svc.Namespace, svc.Name) 419 if err := r.recreateIPAddress(ipAddress.Name, svc); err != nil { 420 return err 421 } 422 continue 423 } 424 // the IPAddress is duplicate but current Service is not the referenced, it has to be recreated 425 for _, clusterIP := range refService.Spec.ClusterIPs { 426 if ipAddress.Name == clusterIP { 427 r.recorder.Eventf(svc, nil, v1.EventTypeWarning, "ClusterIPAlreadyAllocated", "ClusterIPAllocation", "Cluster IP [%v]:%s was assigned to multiple services; please recreate service", family, ip) 428 runtime.HandleError(fmt.Errorf("the cluster IP [%v]:%s for service %s/%s was assigned to other services %s/%s; please recreate", family, ip, svc.Namespace, svc.Name, refService.Namespace, refService.Name)) 429 break 430 } 431 } 432 } 433 434 // IPAddress must have the corresponding labels assigned by the allocator 435 if !verifyIPAddressLabels(ipAddress) { 436 if err := r.recreateIPAddress(ipAddress.Name, svc); err != nil { 437 return err 438 } 439 continue 440 } 441 442 } 443 return syncError 444 } 445 446 func (r *RepairIPAddress) recreateIPAddress(name string, svc *v1.Service) error { 447 err := r.client.NetworkingV1alpha1().IPAddresses().Delete(context.Background(), name, metav1.DeleteOptions{}) 448 if err != nil && !apierrors.IsNotFound(err) { 449 return err 450 } 451 _, err = r.client.NetworkingV1alpha1().IPAddresses().Create(context.Background(), newIPAddress(name, svc), metav1.CreateOptions{}) 452 if err != nil { 453 return err 454 } 455 return nil 456 } 457 458 func (r *RepairIPAddress) ipWorker() { 459 for r.processNextWorkIp() { 460 } 461 } 462 463 func (r *RepairIPAddress) processNextWorkIp() bool { 464 eKey, quit := r.ipQueue.Get() 465 if quit { 466 return false 467 } 468 defer r.ipQueue.Done(eKey) 469 470 err := r.syncIPAddress(eKey) 471 r.handleIPErr(err, eKey) 472 473 return true 474 } 475 476 func (r *RepairIPAddress) handleIPErr(err error, key string) { 477 if err == nil { 478 r.ipQueue.Forget(key) 479 return 480 } 481 482 if r.ipQueue.NumRequeues(key) < maxRetries { 483 klog.V(2).InfoS("Error syncing Service, retrying", "service", key, "err", err) 484 r.ipQueue.AddRateLimited(key) 485 return 486 } 487 488 klog.Warningf("Dropping Service %q out of the queue: %v", key, err) 489 r.ipQueue.Forget(key) 490 runtime.HandleError(err) 491 } 492 493 // syncIPAddress verify that the IPAddress that are owned by the ipallocator controller reference an existing Service 494 // to avoid leaking IPAddresses. IPAddresses that are owned by other controllers are not processed to avoid hotloops. 495 // IPAddress that reference Services and are part of the ClusterIP are validated in the syncService loop. 496 func (r *RepairIPAddress) syncIPAddress(key string) error { 497 ipAddress, err := r.ipAddressLister.Get(key) 498 if err != nil { 499 // nothing to do 500 return nil 501 } 502 503 // not mananged by this controller 504 if !managedByController(ipAddress) { 505 return nil 506 } 507 508 // does not reference a Service but created by the service allocator, something else have changed it, delete it 509 if ipAddress.Spec.ParentRef.Group != "" || ipAddress.Spec.ParentRef.Resource != "services" { 510 runtime.HandleError(fmt.Errorf("IPAddress %s appears to have been modified, not referencing a Service %v: cleaning up", ipAddress.Name, ipAddress.Spec.ParentRef)) 511 r.recorder.Eventf(ipAddress, nil, v1.EventTypeWarning, "IPAddressNotAllocated", "IPAddressAllocation", "IPAddress %s appears to have been modified, not referencing a Service %v: cleaning up", ipAddress.Name, ipAddress.Spec.ParentRef) 512 err := r.client.NetworkingV1alpha1().IPAddresses().Delete(context.Background(), ipAddress.Name, metav1.DeleteOptions{}) 513 if err != nil && !apierrors.IsNotFound(err) { 514 return err 515 } 516 return nil 517 } 518 519 svc, err := r.serviceLister.Services(ipAddress.Spec.ParentRef.Namespace).Get(ipAddress.Spec.ParentRef.Name) 520 if apierrors.IsNotFound(err) { 521 // cleaning all IPAddress without an owner reference IF the time since it was created is greater than 60 seconds (default timeout value on the kube-apiserver) 522 // This is required because during the Service creation there is a time that the IPAddress object exists but the Service is still being created 523 // Assume that CreationTimestamp exists. 524 ipLifetime := r.clock.Now().Sub(ipAddress.CreationTimestamp.Time) 525 gracePeriod := 60 * time.Second 526 if ipLifetime > gracePeriod { 527 runtime.HandleError(fmt.Errorf("IPAddress %s appears to have leaked: cleaning up", ipAddress.Name)) 528 r.recorder.Eventf(ipAddress, nil, v1.EventTypeWarning, "IPAddressNotAllocated", "IPAddressAllocation", "IPAddress: %s for Service %s/%s appears to have leaked: cleaning up", ipAddress.Name, ipAddress.Spec.ParentRef.Namespace, ipAddress.Spec.ParentRef.Name) 529 err := r.client.NetworkingV1alpha1().IPAddresses().Delete(context.Background(), ipAddress.Name, metav1.DeleteOptions{}) 530 if err != nil && !apierrors.IsNotFound(err) { 531 return err 532 } 533 } 534 // requeue after the grace period 535 r.ipQueue.AddAfter(key, gracePeriod-ipLifetime) 536 return nil 537 } 538 if err != nil { 539 runtime.HandleError(fmt.Errorf("unable to get parent Service for IPAddress %s due to an unknown error: %v", ipAddress, err)) 540 r.recorder.Eventf(ipAddress, nil, v1.EventTypeWarning, "UnknownError", "IPAddressAllocation", "Unable to get parent Service for IPAddress %s due to an unknown error", ipAddress) 541 return err 542 } 543 // The service exists, we have checked in previous loop that all Service to IPAddress are correct 544 // but we also have to check the reverse, that the IPAddress to Service relation is correct 545 for _, clusterIP := range svc.Spec.ClusterIPs { 546 if ipAddress.Name == clusterIP { 547 return nil 548 } 549 } 550 runtime.HandleError(fmt.Errorf("the IPAddress: %s for Service %s/%s has a wrong reference %#v; cleaning up", ipAddress.Name, svc.Name, svc.Namespace, ipAddress.Spec.ParentRef)) 551 r.recorder.Eventf(ipAddress, nil, v1.EventTypeWarning, "IPAddressWrongReference", "IPAddressAllocation", "IPAddress: %s for Service %s/%s has a wrong reference; cleaning up", ipAddress.Name, svc.Namespace, svc.Name) 552 err = r.client.NetworkingV1alpha1().IPAddresses().Delete(context.Background(), ipAddress.Name, metav1.DeleteOptions{}) 553 if err != nil && !apierrors.IsNotFound(err) { 554 return err 555 } 556 return nil 557 558 } 559 560 func (r *RepairIPAddress) cidrWorker() { 561 for r.processNextWorkCIDR() { 562 } 563 } 564 565 func (r *RepairIPAddress) processNextWorkCIDR() bool { 566 eKey, quit := r.cidrQueue.Get() 567 if quit { 568 return false 569 } 570 defer r.cidrQueue.Done(eKey) 571 572 err := r.syncCIDRs() 573 r.handleCIDRErr(err, eKey) 574 575 return true 576 } 577 578 func (r *RepairIPAddress) handleCIDRErr(err error, key string) { 579 if err == nil { 580 r.cidrQueue.Forget(key) 581 return 582 } 583 584 if r.cidrQueue.NumRequeues(key) < maxRetries { 585 klog.V(2).InfoS("Error syncing ServiceCIDR, retrying", "serviceCIDR", key, "err", err) 586 r.cidrQueue.AddRateLimited(key) 587 return 588 } 589 590 klog.Warningf("Dropping ServiceCIDR %q out of the queue: %v", key, err) 591 r.cidrQueue.Forget(key) 592 runtime.HandleError(err) 593 } 594 595 // syncCIDRs rebuilds the radix tree based from the informers cache 596 func (r *RepairIPAddress) syncCIDRs() error { 597 serviceCIDRList, err := r.serviceCIDRLister.List(labels.Everything()) 598 if err != nil { 599 return err 600 } 601 602 tree := iptree.New[string]() 603 for _, serviceCIDR := range serviceCIDRList { 604 for _, cidr := range serviceCIDR.Spec.CIDRs { 605 if prefix, err := netip.ParsePrefix(cidr); err == nil { // it can not fail since is already validated 606 tree.InsertPrefix(prefix, serviceCIDR.Name) 607 } 608 } 609 } 610 r.muTree.Lock() 611 defer r.muTree.Unlock() 612 r.tree = tree 613 return nil 614 } 615 616 func newIPAddress(name string, svc *v1.Service) *networkingv1alpha1.IPAddress { 617 family := string(v1.IPv4Protocol) 618 if netutils.IsIPv6String(name) { 619 family = string(v1.IPv6Protocol) 620 } 621 return &networkingv1alpha1.IPAddress{ 622 ObjectMeta: metav1.ObjectMeta{ 623 Name: name, 624 Labels: map[string]string{ 625 networkingv1alpha1.LabelIPAddressFamily: family, 626 networkingv1alpha1.LabelManagedBy: ipallocator.ControllerName, 627 }, 628 }, 629 Spec: networkingv1alpha1.IPAddressSpec{ 630 ParentRef: serviceToRef(svc), 631 }, 632 } 633 } 634 635 func serviceToRef(svc *v1.Service) *networkingv1alpha1.ParentReference { 636 if svc == nil { 637 return nil 638 } 639 640 return &networkingv1alpha1.ParentReference{ 641 Group: "", 642 Resource: "services", 643 Namespace: svc.Namespace, 644 Name: svc.Name, 645 } 646 } 647 648 func getFamilyByIP(ip net.IP) v1.IPFamily { 649 if netutils.IsIPv6(ip) { 650 return v1.IPv6Protocol 651 } 652 return v1.IPv4Protocol 653 } 654 655 // managedByController returns true if the controller of the provided 656 // EndpointSlices is the EndpointSlice controller. 657 func managedByController(ip *networkingv1alpha1.IPAddress) bool { 658 managedBy, ok := ip.Labels[networkingv1alpha1.LabelManagedBy] 659 if !ok { 660 return false 661 } 662 return managedBy == ipallocator.ControllerName 663 } 664 665 func verifyIPAddressLabels(ip *networkingv1alpha1.IPAddress) bool { 666 labelFamily, ok := ip.Labels[networkingv1alpha1.LabelIPAddressFamily] 667 if !ok { 668 return false 669 } 670 671 family := string(v1.IPv4Protocol) 672 if netutils.IsIPv6String(ip.Name) { 673 family = string(v1.IPv6Protocol) 674 } 675 if family != labelFamily { 676 return false 677 } 678 return managedByController(ip) 679 } 680 681 // TODO(aojea) move to utils, already in pkg/registry/core/service/ipallocator/cidrallocator.go 682 // ipToAddr converts a net.IP to a netip.Addr 683 // if the net.IP is not valid it returns an empty netip.Addr{} 684 func ipToAddr(ip net.IP) netip.Addr { 685 // https://pkg.go.dev/net/netip#AddrFromSlice can return an IPv4 in IPv6 format 686 // so we have to check the IP family to return exactly the format that we want 687 // address, _ := netip.AddrFromSlice(net.ParseIPSloppy(192.168.0.1)) returns 688 // an address like ::ffff:192.168.0.1/32 689 bytes := ip.To4() 690 if bytes == nil { 691 bytes = ip.To16() 692 } 693 // AddrFromSlice returns Addr{}, false if the input is invalid. 694 address, _ := netip.AddrFromSlice(bytes) 695 return address 696 }