k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/registry/core/service/ipallocator/controller/repairip.go (about)

     1  /*
     2  Copyright 2023 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controller
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"net"
    23  	"net/netip"
    24  	"sync"
    25  	"time"
    26  
    27  	v1 "k8s.io/api/core/v1"
    28  	networkingv1alpha1 "k8s.io/api/networking/v1alpha1"
    29  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    30  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    31  	"k8s.io/apimachinery/pkg/labels"
    32  	"k8s.io/apimachinery/pkg/util/runtime"
    33  	"k8s.io/apimachinery/pkg/util/wait"
    34  	coreinformers "k8s.io/client-go/informers/core/v1"
    35  	networkinginformers "k8s.io/client-go/informers/networking/v1alpha1"
    36  	"k8s.io/client-go/kubernetes"
    37  	corelisters "k8s.io/client-go/listers/core/v1"
    38  	networkinglisters "k8s.io/client-go/listers/networking/v1alpha1"
    39  	"k8s.io/client-go/tools/cache"
    40  	"k8s.io/client-go/tools/events"
    41  	"k8s.io/client-go/util/retry"
    42  	"k8s.io/client-go/util/workqueue"
    43  	"k8s.io/klog/v2"
    44  	"k8s.io/kubernetes/pkg/api/legacyscheme"
    45  	"k8s.io/kubernetes/pkg/apis/core/v1/helper"
    46  	"k8s.io/kubernetes/pkg/registry/core/service/ipallocator"
    47  	"k8s.io/kubernetes/pkg/util/iptree"
    48  	"k8s.io/utils/clock"
    49  	netutils "k8s.io/utils/net"
    50  )
    51  
    52  const (
    53  	// maxRetries is the number of times a service will be retried before it is dropped out of the queue.
    54  	// With the current rate-limiter in use (5ms*2^(maxRetries-1)) the following numbers represent the
    55  	// sequence of delays between successive queuings of a service.
    56  	//
    57  	// 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, 10.2s, 20.4s, 41s, 82s
    58  	maxRetries = 15
    59  	workers    = 5
    60  )
    61  
    62  // Repair is a controller loop that examines all service ClusterIP allocations and logs any errors,
    63  // and then creates the accurate list of IPAddresses objects with all allocated ClusterIPs.
    64  //
    65  // Handles:
    66  // * Duplicate ClusterIP assignments caused by operator action or undetected race conditions
    67  // * Allocations to services that were not actually created due to a crash or powerloss
    68  // * Migrates old versions of Kubernetes services into the new ipallocator automatically
    69  //   creating the corresponding IPAddress objects
    70  // * IPAddress objects with wrong references or labels
    71  //
    72  // Logs about:
    73  // * ClusterIPs that do not match the currently configured range
    74  //
    75  // There is a one-to-one relation between Service ClusterIPs and IPAddresses.
    76  // The bidirectional relation is achieved using the following fields:
    77  // Service.Spec.Cluster == IPAddress.Name AND IPAddress.ParentRef == Service
    78  //
    79  // The controller use two reconcile loops, one for Services and other for IPAddress.
    80  // The Service reconcile loop verifies the bidirectional relation exists and is correct.
    81  // 1. Service_X [ClusterIP_X]  <------>  IPAddress_X [Ref:Service_X]   ok
    82  // 2. Service_Y [ClusterIP_Y]  <------>  IPAddress_Y [Ref:GatewayA]    !ok, wrong reference
    83  // 3. Service_Z [ClusterIP_Z]  <------>  							   !ok, missing IPAddress
    84  // 4. Service_A [ClusterIP_A]  <------>  IPAddress_A [Ref:Service_B]   !ok, duplicate IPAddress
    85  //    Service_B [ClusterIP_A]  <------> 								only one service can verify the relation
    86  // The IPAddress reconcile loop checks there are no orphan IPAddresses, the rest of the
    87  // cases are covered by the Services loop
    88  // 1.                          <------>  IPAddress_Z [Ref:Service_C]   !ok, orphan IPAddress
    89  
    90  type RepairIPAddress struct {
    91  	client   kubernetes.Interface
    92  	interval time.Duration
    93  
    94  	serviceLister  corelisters.ServiceLister
    95  	servicesSynced cache.InformerSynced
    96  
    97  	serviceCIDRLister networkinglisters.ServiceCIDRLister
    98  	serviceCIDRSynced cache.InformerSynced
    99  
   100  	ipAddressLister networkinglisters.IPAddressLister
   101  	ipAddressSynced cache.InformerSynced
   102  
   103  	cidrQueue        workqueue.TypedRateLimitingInterface[string]
   104  	svcQueue         workqueue.TypedRateLimitingInterface[string]
   105  	ipQueue          workqueue.TypedRateLimitingInterface[string]
   106  	workerLoopPeriod time.Duration
   107  
   108  	muTree sync.Mutex
   109  	tree   *iptree.Tree[string]
   110  
   111  	broadcaster events.EventBroadcaster
   112  	recorder    events.EventRecorder
   113  	clock       clock.Clock
   114  }
   115  
   116  // NewRepair creates a controller that periodically ensures that all clusterIPs are uniquely allocated across the cluster
   117  // and generates informational warnings for a cluster that is not in sync.
   118  func NewRepairIPAddress(interval time.Duration,
   119  	client kubernetes.Interface,
   120  	serviceInformer coreinformers.ServiceInformer,
   121  	serviceCIDRInformer networkinginformers.ServiceCIDRInformer,
   122  	ipAddressInformer networkinginformers.IPAddressInformer) *RepairIPAddress {
   123  	eventBroadcaster := events.NewBroadcaster(&events.EventSinkImpl{Interface: client.EventsV1()})
   124  	recorder := eventBroadcaster.NewRecorder(legacyscheme.Scheme, "ipallocator-repair-controller")
   125  
   126  	r := &RepairIPAddress{
   127  		interval:          interval,
   128  		client:            client,
   129  		serviceLister:     serviceInformer.Lister(),
   130  		servicesSynced:    serviceInformer.Informer().HasSynced,
   131  		serviceCIDRLister: serviceCIDRInformer.Lister(),
   132  		serviceCIDRSynced: serviceCIDRInformer.Informer().HasSynced,
   133  		ipAddressLister:   ipAddressInformer.Lister(),
   134  		ipAddressSynced:   ipAddressInformer.Informer().HasSynced,
   135  		cidrQueue: workqueue.NewTypedRateLimitingQueueWithConfig(
   136  			workqueue.DefaultTypedControllerRateLimiter[string](),
   137  			workqueue.TypedRateLimitingQueueConfig[string]{Name: "servicecidrs"},
   138  		),
   139  		svcQueue: workqueue.NewTypedRateLimitingQueueWithConfig(
   140  			workqueue.DefaultTypedControllerRateLimiter[string](),
   141  			workqueue.TypedRateLimitingQueueConfig[string]{Name: "services"},
   142  		),
   143  		ipQueue: workqueue.NewTypedRateLimitingQueueWithConfig(
   144  			workqueue.DefaultTypedControllerRateLimiter[string](),
   145  			workqueue.TypedRateLimitingQueueConfig[string]{Name: "ipaddresses"},
   146  		),
   147  		tree:             iptree.New[string](),
   148  		workerLoopPeriod: time.Second,
   149  		broadcaster:      eventBroadcaster,
   150  		recorder:         recorder,
   151  		clock:            clock.RealClock{},
   152  	}
   153  
   154  	_, _ = serviceInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{
   155  		AddFunc: func(obj interface{}) {
   156  			key, err := cache.MetaNamespaceKeyFunc(obj)
   157  			if err == nil {
   158  				r.svcQueue.Add(key)
   159  			}
   160  		},
   161  		UpdateFunc: func(old interface{}, new interface{}) {
   162  			key, err := cache.MetaNamespaceKeyFunc(new)
   163  			if err == nil {
   164  				r.svcQueue.Add(key)
   165  			}
   166  		},
   167  		DeleteFunc: func(obj interface{}) {
   168  			// IndexerInformer uses a delta queue, therefore for deletes we have to use this
   169  			// key function.
   170  			key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
   171  			if err == nil {
   172  				r.svcQueue.Add(key)
   173  			}
   174  		},
   175  	}, interval)
   176  
   177  	_, _ = serviceCIDRInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   178  		AddFunc: func(obj interface{}) {
   179  			key, err := cache.MetaNamespaceKeyFunc(obj)
   180  			if err == nil {
   181  				r.cidrQueue.Add(key)
   182  			}
   183  		},
   184  		UpdateFunc: func(old interface{}, new interface{}) {
   185  			key, err := cache.MetaNamespaceKeyFunc(new)
   186  			if err == nil {
   187  				r.cidrQueue.Add(key)
   188  			}
   189  		},
   190  		DeleteFunc: func(obj interface{}) {
   191  			// IndexerInformer uses a delta queue, therefore for deletes we have to use this
   192  			// key function.
   193  			key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
   194  			if err == nil {
   195  				r.cidrQueue.Add(key)
   196  			}
   197  		},
   198  	})
   199  
   200  	ipAddressInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{
   201  		AddFunc: func(obj interface{}) {
   202  			key, err := cache.MetaNamespaceKeyFunc(obj)
   203  			if err == nil {
   204  				r.ipQueue.Add(key)
   205  			}
   206  		},
   207  		UpdateFunc: func(old interface{}, new interface{}) {
   208  			key, err := cache.MetaNamespaceKeyFunc(new)
   209  			if err == nil {
   210  				r.ipQueue.Add(key)
   211  			}
   212  		},
   213  		DeleteFunc: func(obj interface{}) {
   214  			// IndexerInformer uses a delta queue, therefore for deletes we have to use this
   215  			// key function.
   216  			key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
   217  			if err == nil {
   218  				r.ipQueue.Add(key)
   219  			}
   220  		},
   221  	}, interval)
   222  
   223  	return r
   224  }
   225  
   226  // RunUntil starts the controller until the provided ch is closed.
   227  func (r *RepairIPAddress) RunUntil(onFirstSuccess func(), stopCh chan struct{}) {
   228  	defer r.cidrQueue.ShutDown()
   229  	defer r.ipQueue.ShutDown()
   230  	defer r.svcQueue.ShutDown()
   231  	r.broadcaster.StartRecordingToSink(stopCh)
   232  	defer r.broadcaster.Shutdown()
   233  
   234  	klog.Info("Starting ipallocator-repair-controller")
   235  	defer klog.Info("Shutting down ipallocator-repair-controller")
   236  
   237  	if !cache.WaitForNamedCacheSync("ipallocator-repair-controller", stopCh, r.ipAddressSynced, r.servicesSynced, r.serviceCIDRSynced) {
   238  		return
   239  	}
   240  
   241  	// First sync goes through all the Services and IPAddresses in the cache,
   242  	// once synced, it signals the main loop and works using the handlers, since
   243  	// it's less expensive and more optimal.
   244  	if err := r.runOnce(); err != nil {
   245  		runtime.HandleError(err)
   246  		return
   247  	}
   248  	onFirstSuccess()
   249  
   250  	// serialize the operations on ServiceCIDRs
   251  	go wait.Until(r.cidrWorker, r.workerLoopPeriod, stopCh)
   252  
   253  	for i := 0; i < workers; i++ {
   254  		go wait.Until(r.ipWorker, r.workerLoopPeriod, stopCh)
   255  		go wait.Until(r.svcWorker, r.workerLoopPeriod, stopCh)
   256  	}
   257  
   258  	<-stopCh
   259  }
   260  
   261  // runOnce verifies the state of the ClusterIP allocations and returns an error if an unrecoverable problem occurs.
   262  func (r *RepairIPAddress) runOnce() error {
   263  	return retry.RetryOnConflict(retry.DefaultBackoff, r.doRunOnce)
   264  }
   265  
   266  // doRunOnce verifies the state of the ClusterIP allocations and returns an error if an unrecoverable problem occurs.
   267  func (r *RepairIPAddress) doRunOnce() error {
   268  	services, err := r.serviceLister.List(labels.Everything())
   269  	if err != nil {
   270  		return fmt.Errorf("unable to refresh the service IP block: %v", err)
   271  	}
   272  
   273  	// Check every Service's ClusterIP, and rebuild the state as we think it should be.
   274  	for _, svc := range services {
   275  		key, err := cache.MetaNamespaceKeyFunc(svc)
   276  		if err != nil {
   277  			return err
   278  		}
   279  		err = r.syncService(key)
   280  		if err != nil {
   281  			return err
   282  		}
   283  	}
   284  
   285  	// We have checked that every Service has its corresponding IP.
   286  	// Check that there is no IP created by the allocator without
   287  	// a Service associated.
   288  	ipLabelSelector := labels.Set(map[string]string{
   289  		networkingv1alpha1.LabelManagedBy: ipallocator.ControllerName,
   290  	}).AsSelectorPreValidated()
   291  	ipAddresses, err := r.ipAddressLister.List(ipLabelSelector)
   292  	if err != nil {
   293  		return fmt.Errorf("unable to refresh the IPAddress block: %v", err)
   294  	}
   295  	// Check every IPAddress matches the corresponding Service, and rebuild the state as we think it should be.
   296  	for _, ipAddress := range ipAddresses {
   297  		key, err := cache.MetaNamespaceKeyFunc(ipAddress)
   298  		if err != nil {
   299  			return err
   300  		}
   301  		err = r.syncIPAddress(key)
   302  		if err != nil {
   303  			return err
   304  		}
   305  	}
   306  
   307  	return nil
   308  }
   309  
   310  func (r *RepairIPAddress) svcWorker() {
   311  	for r.processNextWorkSvc() {
   312  	}
   313  }
   314  
   315  func (r *RepairIPAddress) processNextWorkSvc() bool {
   316  	eKey, quit := r.svcQueue.Get()
   317  	if quit {
   318  		return false
   319  	}
   320  	defer r.svcQueue.Done(eKey)
   321  
   322  	err := r.syncService(eKey)
   323  	r.handleSvcErr(err, eKey)
   324  
   325  	return true
   326  }
   327  
   328  func (r *RepairIPAddress) handleSvcErr(err error, key string) {
   329  	if err == nil {
   330  		r.svcQueue.Forget(key)
   331  		return
   332  	}
   333  
   334  	if r.svcQueue.NumRequeues(key) < maxRetries {
   335  		klog.V(2).InfoS("Error syncing Service, retrying", "service", key, "err", err)
   336  		r.svcQueue.AddRateLimited(key)
   337  		return
   338  	}
   339  
   340  	klog.Warningf("Dropping Service %q out of the queue: %v", key, err)
   341  	r.svcQueue.Forget(key)
   342  	runtime.HandleError(err)
   343  }
   344  
   345  // syncServices reconcile the Service ClusterIPs to verify that each one has the corresponding IPAddress object associated
   346  func (r *RepairIPAddress) syncService(key string) error {
   347  	var syncError error
   348  	namespace, name, err := cache.SplitMetaNamespaceKey(key)
   349  	if err != nil {
   350  		return err
   351  	}
   352  	svc, err := r.serviceLister.Services(namespace).Get(name)
   353  	if err != nil {
   354  		// nothing to do
   355  		return nil
   356  	}
   357  	if !helper.IsServiceIPSet(svc) {
   358  		// didn't need a ClusterIP
   359  		return nil
   360  	}
   361  
   362  	for _, clusterIP := range svc.Spec.ClusterIPs {
   363  		ip := netutils.ParseIPSloppy(clusterIP)
   364  		if ip == nil {
   365  			// ClusterIP is corrupt, ClusterIPs are already validated, but double checking here
   366  			// in case there are some inconsistencies with the parsers
   367  			r.recorder.Eventf(svc, nil, v1.EventTypeWarning, "ClusterIPNotValid", "ClusterIPValidation", "Cluster IP %s is not a valid IP; please recreate Service", ip)
   368  			runtime.HandleError(fmt.Errorf("the ClusterIP %s for Service %s/%s is not a valid IP; please recreate Service", ip, svc.Namespace, svc.Name))
   369  			continue
   370  		}
   371  		// TODO(aojea) Refactor to abstract the IPs checks
   372  		family := getFamilyByIP(ip)
   373  
   374  		r.muTree.Lock()
   375  		prefixes := r.tree.GetHostIPPrefixMatches(ipToAddr(ip))
   376  		r.muTree.Unlock()
   377  		if len(prefixes) == 0 {
   378  			// ClusterIP is out of range
   379  			r.recorder.Eventf(svc, nil, v1.EventTypeWarning, "ClusterIPOutOfRange", "ClusterIPAllocation", "Cluster IP [%v]: %s is not within any configured Service CIDR; please recreate service", family, ip)
   380  			runtime.HandleError(fmt.Errorf("the ClusterIP [%v]: %s for Service %s/%s is not within any service CIDR; please recreate", family, ip, svc.Namespace, svc.Name))
   381  			continue
   382  		}
   383  
   384  		// Get the IPAddress object associated to the ClusterIP
   385  		ipAddress, err := r.ipAddressLister.Get(ip.String())
   386  		if apierrors.IsNotFound(err) {
   387  			// ClusterIP doesn't seem to be allocated, create it.
   388  			r.recorder.Eventf(svc, nil, v1.EventTypeWarning, "ClusterIPNotAllocated", "ClusterIPAllocation", "Cluster IP [%v]: %s is not allocated; repairing", family, ip)
   389  			runtime.HandleError(fmt.Errorf("the ClusterIP [%v]: %s for Service %s/%s is not allocated; repairing", family, ip, svc.Namespace, svc.Name))
   390  			_, err := r.client.NetworkingV1alpha1().IPAddresses().Create(context.Background(), newIPAddress(ip.String(), svc), metav1.CreateOptions{})
   391  			if err != nil {
   392  				return err
   393  			}
   394  			continue
   395  		}
   396  		if err != nil {
   397  			r.recorder.Eventf(svc, nil, v1.EventTypeWarning, "UnknownError", "ClusterIPAllocation", "Unable to allocate ClusterIP [%v]: %s due to an unknown error", family, ip)
   398  			return fmt.Errorf("unable to allocate ClusterIP [%v]: %s for Service %s/%s due to an unknown error, will retry later: %v", family, ip, svc.Namespace, svc.Name, err)
   399  		}
   400  
   401  		// IPAddress that belongs to a Service must reference a Service
   402  		if ipAddress.Spec.ParentRef.Group != "" ||
   403  			ipAddress.Spec.ParentRef.Resource != "services" {
   404  			r.recorder.Eventf(svc, nil, v1.EventTypeWarning, "ClusterIPNotAllocated", "ClusterIPAllocation", "the ClusterIP [%v]: %s for Service %s/%s has a wrong reference; repairing", family, ip, svc.Namespace, svc.Name)
   405  			if err := r.recreateIPAddress(ipAddress.Name, svc); err != nil {
   406  				return err
   407  			}
   408  			continue
   409  		}
   410  
   411  		// IPAddress that belongs to a Service must reference the current Service
   412  		if ipAddress.Spec.ParentRef.Namespace != svc.Namespace ||
   413  			ipAddress.Spec.ParentRef.Name != svc.Name {
   414  			// verify that there are no two Services with the same IP, otherwise
   415  			// it will keep deleting and recreating the same IPAddress changing the reference
   416  			refService, err := r.serviceLister.Services(ipAddress.Spec.ParentRef.Namespace).Get(ipAddress.Spec.ParentRef.Name)
   417  			if err != nil {
   418  				r.recorder.Eventf(svc, nil, v1.EventTypeWarning, "ClusterIPNotAllocated", "ClusterIPAllocation", "the ClusterIP [%v]: %s for Service %s/%s has a wrong reference; repairing", family, ip, svc.Namespace, svc.Name)
   419  				if err := r.recreateIPAddress(ipAddress.Name, svc); err != nil {
   420  					return err
   421  				}
   422  				continue
   423  			}
   424  			// the IPAddress is duplicate but current Service is not the referenced, it has to be recreated
   425  			for _, clusterIP := range refService.Spec.ClusterIPs {
   426  				if ipAddress.Name == clusterIP {
   427  					r.recorder.Eventf(svc, nil, v1.EventTypeWarning, "ClusterIPAlreadyAllocated", "ClusterIPAllocation", "Cluster IP [%v]:%s was assigned to multiple services; please recreate service", family, ip)
   428  					runtime.HandleError(fmt.Errorf("the cluster IP [%v]:%s for service %s/%s was assigned to other services %s/%s; please recreate", family, ip, svc.Namespace, svc.Name, refService.Namespace, refService.Name))
   429  					break
   430  				}
   431  			}
   432  		}
   433  
   434  		// IPAddress must have the corresponding labels assigned by the allocator
   435  		if !verifyIPAddressLabels(ipAddress) {
   436  			if err := r.recreateIPAddress(ipAddress.Name, svc); err != nil {
   437  				return err
   438  			}
   439  			continue
   440  		}
   441  
   442  	}
   443  	return syncError
   444  }
   445  
   446  func (r *RepairIPAddress) recreateIPAddress(name string, svc *v1.Service) error {
   447  	err := r.client.NetworkingV1alpha1().IPAddresses().Delete(context.Background(), name, metav1.DeleteOptions{})
   448  	if err != nil && !apierrors.IsNotFound(err) {
   449  		return err
   450  	}
   451  	_, err = r.client.NetworkingV1alpha1().IPAddresses().Create(context.Background(), newIPAddress(name, svc), metav1.CreateOptions{})
   452  	if err != nil {
   453  		return err
   454  	}
   455  	return nil
   456  }
   457  
   458  func (r *RepairIPAddress) ipWorker() {
   459  	for r.processNextWorkIp() {
   460  	}
   461  }
   462  
   463  func (r *RepairIPAddress) processNextWorkIp() bool {
   464  	eKey, quit := r.ipQueue.Get()
   465  	if quit {
   466  		return false
   467  	}
   468  	defer r.ipQueue.Done(eKey)
   469  
   470  	err := r.syncIPAddress(eKey)
   471  	r.handleIPErr(err, eKey)
   472  
   473  	return true
   474  }
   475  
   476  func (r *RepairIPAddress) handleIPErr(err error, key string) {
   477  	if err == nil {
   478  		r.ipQueue.Forget(key)
   479  		return
   480  	}
   481  
   482  	if r.ipQueue.NumRequeues(key) < maxRetries {
   483  		klog.V(2).InfoS("Error syncing Service, retrying", "service", key, "err", err)
   484  		r.ipQueue.AddRateLimited(key)
   485  		return
   486  	}
   487  
   488  	klog.Warningf("Dropping Service %q out of the queue: %v", key, err)
   489  	r.ipQueue.Forget(key)
   490  	runtime.HandleError(err)
   491  }
   492  
   493  // syncIPAddress verify that the IPAddress that are owned by the ipallocator controller reference an existing Service
   494  // to avoid leaking IPAddresses. IPAddresses that are owned by other controllers are not processed to avoid hotloops.
   495  // IPAddress that reference Services and are part of the ClusterIP are validated in the syncService loop.
   496  func (r *RepairIPAddress) syncIPAddress(key string) error {
   497  	ipAddress, err := r.ipAddressLister.Get(key)
   498  	if err != nil {
   499  		// nothing to do
   500  		return nil
   501  	}
   502  
   503  	// not mananged by this controller
   504  	if !managedByController(ipAddress) {
   505  		return nil
   506  	}
   507  
   508  	// does not reference a Service but created by the service allocator, something else have changed it, delete it
   509  	if ipAddress.Spec.ParentRef.Group != "" || ipAddress.Spec.ParentRef.Resource != "services" {
   510  		runtime.HandleError(fmt.Errorf("IPAddress %s appears to have been modified, not referencing a Service %v: cleaning up", ipAddress.Name, ipAddress.Spec.ParentRef))
   511  		r.recorder.Eventf(ipAddress, nil, v1.EventTypeWarning, "IPAddressNotAllocated", "IPAddressAllocation", "IPAddress %s appears to have been modified, not referencing a Service %v: cleaning up", ipAddress.Name, ipAddress.Spec.ParentRef)
   512  		err := r.client.NetworkingV1alpha1().IPAddresses().Delete(context.Background(), ipAddress.Name, metav1.DeleteOptions{})
   513  		if err != nil && !apierrors.IsNotFound(err) {
   514  			return err
   515  		}
   516  		return nil
   517  	}
   518  
   519  	svc, err := r.serviceLister.Services(ipAddress.Spec.ParentRef.Namespace).Get(ipAddress.Spec.ParentRef.Name)
   520  	if apierrors.IsNotFound(err) {
   521  		// cleaning all IPAddress without an owner reference IF the time since it was created is greater than 60 seconds (default timeout value on the kube-apiserver)
   522  		// This is required because during the Service creation there is a time that the IPAddress object exists but the Service is still being created
   523  		// Assume that CreationTimestamp exists.
   524  		ipLifetime := r.clock.Now().Sub(ipAddress.CreationTimestamp.Time)
   525  		gracePeriod := 60 * time.Second
   526  		if ipLifetime > gracePeriod {
   527  			runtime.HandleError(fmt.Errorf("IPAddress %s appears to have leaked: cleaning up", ipAddress.Name))
   528  			r.recorder.Eventf(ipAddress, nil, v1.EventTypeWarning, "IPAddressNotAllocated", "IPAddressAllocation", "IPAddress: %s for Service %s/%s appears to have leaked: cleaning up", ipAddress.Name, ipAddress.Spec.ParentRef.Namespace, ipAddress.Spec.ParentRef.Name)
   529  			err := r.client.NetworkingV1alpha1().IPAddresses().Delete(context.Background(), ipAddress.Name, metav1.DeleteOptions{})
   530  			if err != nil && !apierrors.IsNotFound(err) {
   531  				return err
   532  			}
   533  		}
   534  		// requeue after the grace period
   535  		r.ipQueue.AddAfter(key, gracePeriod-ipLifetime)
   536  		return nil
   537  	}
   538  	if err != nil {
   539  		runtime.HandleError(fmt.Errorf("unable to get parent Service for IPAddress %s due to an unknown error: %v", ipAddress, err))
   540  		r.recorder.Eventf(ipAddress, nil, v1.EventTypeWarning, "UnknownError", "IPAddressAllocation", "Unable to get parent Service for IPAddress %s due to an unknown error", ipAddress)
   541  		return err
   542  	}
   543  	// The service exists, we have checked in previous loop that all Service to IPAddress are correct
   544  	// but we also have to check the reverse, that the IPAddress to Service relation is correct
   545  	for _, clusterIP := range svc.Spec.ClusterIPs {
   546  		if ipAddress.Name == clusterIP {
   547  			return nil
   548  		}
   549  	}
   550  	runtime.HandleError(fmt.Errorf("the IPAddress: %s for Service %s/%s has a wrong reference %#v; cleaning up", ipAddress.Name, svc.Name, svc.Namespace, ipAddress.Spec.ParentRef))
   551  	r.recorder.Eventf(ipAddress, nil, v1.EventTypeWarning, "IPAddressWrongReference", "IPAddressAllocation", "IPAddress: %s for Service %s/%s has a wrong reference; cleaning up", ipAddress.Name, svc.Namespace, svc.Name)
   552  	err = r.client.NetworkingV1alpha1().IPAddresses().Delete(context.Background(), ipAddress.Name, metav1.DeleteOptions{})
   553  	if err != nil && !apierrors.IsNotFound(err) {
   554  		return err
   555  	}
   556  	return nil
   557  
   558  }
   559  
   560  func (r *RepairIPAddress) cidrWorker() {
   561  	for r.processNextWorkCIDR() {
   562  	}
   563  }
   564  
   565  func (r *RepairIPAddress) processNextWorkCIDR() bool {
   566  	eKey, quit := r.cidrQueue.Get()
   567  	if quit {
   568  		return false
   569  	}
   570  	defer r.cidrQueue.Done(eKey)
   571  
   572  	err := r.syncCIDRs()
   573  	r.handleCIDRErr(err, eKey)
   574  
   575  	return true
   576  }
   577  
   578  func (r *RepairIPAddress) handleCIDRErr(err error, key string) {
   579  	if err == nil {
   580  		r.cidrQueue.Forget(key)
   581  		return
   582  	}
   583  
   584  	if r.cidrQueue.NumRequeues(key) < maxRetries {
   585  		klog.V(2).InfoS("Error syncing ServiceCIDR, retrying", "serviceCIDR", key, "err", err)
   586  		r.cidrQueue.AddRateLimited(key)
   587  		return
   588  	}
   589  
   590  	klog.Warningf("Dropping ServiceCIDR %q out of the queue: %v", key, err)
   591  	r.cidrQueue.Forget(key)
   592  	runtime.HandleError(err)
   593  }
   594  
   595  // syncCIDRs rebuilds the radix tree based from the informers cache
   596  func (r *RepairIPAddress) syncCIDRs() error {
   597  	serviceCIDRList, err := r.serviceCIDRLister.List(labels.Everything())
   598  	if err != nil {
   599  		return err
   600  	}
   601  
   602  	tree := iptree.New[string]()
   603  	for _, serviceCIDR := range serviceCIDRList {
   604  		for _, cidr := range serviceCIDR.Spec.CIDRs {
   605  			if prefix, err := netip.ParsePrefix(cidr); err == nil { // it can not fail since is already validated
   606  				tree.InsertPrefix(prefix, serviceCIDR.Name)
   607  			}
   608  		}
   609  	}
   610  	r.muTree.Lock()
   611  	defer r.muTree.Unlock()
   612  	r.tree = tree
   613  	return nil
   614  }
   615  
   616  func newIPAddress(name string, svc *v1.Service) *networkingv1alpha1.IPAddress {
   617  	family := string(v1.IPv4Protocol)
   618  	if netutils.IsIPv6String(name) {
   619  		family = string(v1.IPv6Protocol)
   620  	}
   621  	return &networkingv1alpha1.IPAddress{
   622  		ObjectMeta: metav1.ObjectMeta{
   623  			Name: name,
   624  			Labels: map[string]string{
   625  				networkingv1alpha1.LabelIPAddressFamily: family,
   626  				networkingv1alpha1.LabelManagedBy:       ipallocator.ControllerName,
   627  			},
   628  		},
   629  		Spec: networkingv1alpha1.IPAddressSpec{
   630  			ParentRef: serviceToRef(svc),
   631  		},
   632  	}
   633  }
   634  
   635  func serviceToRef(svc *v1.Service) *networkingv1alpha1.ParentReference {
   636  	if svc == nil {
   637  		return nil
   638  	}
   639  
   640  	return &networkingv1alpha1.ParentReference{
   641  		Group:     "",
   642  		Resource:  "services",
   643  		Namespace: svc.Namespace,
   644  		Name:      svc.Name,
   645  	}
   646  }
   647  
   648  func getFamilyByIP(ip net.IP) v1.IPFamily {
   649  	if netutils.IsIPv6(ip) {
   650  		return v1.IPv6Protocol
   651  	}
   652  	return v1.IPv4Protocol
   653  }
   654  
   655  // managedByController returns true if the controller of the provided
   656  // EndpointSlices is the EndpointSlice controller.
   657  func managedByController(ip *networkingv1alpha1.IPAddress) bool {
   658  	managedBy, ok := ip.Labels[networkingv1alpha1.LabelManagedBy]
   659  	if !ok {
   660  		return false
   661  	}
   662  	return managedBy == ipallocator.ControllerName
   663  }
   664  
   665  func verifyIPAddressLabels(ip *networkingv1alpha1.IPAddress) bool {
   666  	labelFamily, ok := ip.Labels[networkingv1alpha1.LabelIPAddressFamily]
   667  	if !ok {
   668  		return false
   669  	}
   670  
   671  	family := string(v1.IPv4Protocol)
   672  	if netutils.IsIPv6String(ip.Name) {
   673  		family = string(v1.IPv6Protocol)
   674  	}
   675  	if family != labelFamily {
   676  		return false
   677  	}
   678  	return managedByController(ip)
   679  }
   680  
   681  // TODO(aojea) move to utils, already in pkg/registry/core/service/ipallocator/cidrallocator.go
   682  // ipToAddr converts a net.IP to a netip.Addr
   683  // if the net.IP is not valid it returns an empty netip.Addr{}
   684  func ipToAddr(ip net.IP) netip.Addr {
   685  	// https://pkg.go.dev/net/netip#AddrFromSlice can return an IPv4 in IPv6 format
   686  	// so we have to check the IP family to return exactly the format that we want
   687  	// address, _ := netip.AddrFromSlice(net.ParseIPSloppy(192.168.0.1)) returns
   688  	// an address like ::ffff:192.168.0.1/32
   689  	bytes := ip.To4()
   690  	if bytes == nil {
   691  		bytes = ip.To16()
   692  	}
   693  	// AddrFromSlice returns Addr{}, false if the input is invalid.
   694  	address, _ := netip.AddrFromSlice(bytes)
   695  	return address
   696  }