k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/registry/core/service/ipallocator/cidrallocator.go (about)

     1  /*
     2  Copyright 2023 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package ipallocator
    18  
    19  import (
    20  	"fmt"
    21  	"net"
    22  	"net/netip"
    23  	"sync"
    24  	"time"
    25  
    26  	v1 "k8s.io/api/core/v1"
    27  	networkingv1alpha1 "k8s.io/api/networking/v1alpha1"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/apimachinery/pkg/labels"
    30  	utilerrors "k8s.io/apimachinery/pkg/util/errors"
    31  	"k8s.io/apimachinery/pkg/util/runtime"
    32  	"k8s.io/apimachinery/pkg/util/sets"
    33  	"k8s.io/apimachinery/pkg/util/wait"
    34  	networkingv1alpha1informers "k8s.io/client-go/informers/networking/v1alpha1"
    35  	networkingv1alpha1client "k8s.io/client-go/kubernetes/typed/networking/v1alpha1"
    36  	networkingv1alpha1listers "k8s.io/client-go/listers/networking/v1alpha1"
    37  	"k8s.io/client-go/tools/cache"
    38  	"k8s.io/client-go/util/workqueue"
    39  	"k8s.io/klog/v2"
    40  	api "k8s.io/kubernetes/pkg/apis/core"
    41  	"k8s.io/kubernetes/pkg/util/iptree"
    42  	netutils "k8s.io/utils/net"
    43  )
    44  
    45  // MetaAllocator maintains a Tree with the ServiceCIDRs containing an IP Allocator
    46  // on the nodes. Since each allocator doesn't stored the IPAddresses because it reads
    47  // them from the informer cache, it is cheap to create and delete IP Allocators.
    48  // MetaAllocator forwards the request to any of the internal allocators that has free
    49  // addresses.
    50  
    51  // MetaAllocator implements current allocator interface using
    52  // ServiceCIDR and IPAddress API objects.
    53  type MetaAllocator struct {
    54  	client            networkingv1alpha1client.NetworkingV1alpha1Interface
    55  	serviceCIDRLister networkingv1alpha1listers.ServiceCIDRLister
    56  	serviceCIDRSynced cache.InformerSynced
    57  	ipAddressLister   networkingv1alpha1listers.IPAddressLister
    58  	ipAddressSynced   cache.InformerSynced
    59  	ipAddressInformer networkingv1alpha1informers.IPAddressInformer
    60  	queue             workqueue.TypedRateLimitingInterface[string]
    61  
    62  	internalStopCh chan struct{}
    63  
    64  	muTree sync.Mutex
    65  	tree   *iptree.Tree[*Allocator]
    66  
    67  	ipFamily api.IPFamily
    68  }
    69  
    70  var _ Interface = &MetaAllocator{}
    71  
    72  // NewMetaAllocator returns an IP allocator that use the IPAddress
    73  // and ServiceCIDR objects to track the assigned IP addresses,
    74  // using an informer cache as storage.
    75  func NewMetaAllocator(
    76  	client networkingv1alpha1client.NetworkingV1alpha1Interface,
    77  	serviceCIDRInformer networkingv1alpha1informers.ServiceCIDRInformer,
    78  	ipAddressInformer networkingv1alpha1informers.IPAddressInformer,
    79  	isIPv6 bool,
    80  ) (*MetaAllocator, error) {
    81  
    82  	// TODO: make the NewMetaAllocator agnostic of the IP family
    83  	family := api.IPv4Protocol
    84  	if isIPv6 {
    85  		family = api.IPv6Protocol
    86  	}
    87  
    88  	c := &MetaAllocator{
    89  		client:            client,
    90  		serviceCIDRLister: serviceCIDRInformer.Lister(),
    91  		serviceCIDRSynced: serviceCIDRInformer.Informer().HasSynced,
    92  		ipAddressLister:   ipAddressInformer.Lister(),
    93  		ipAddressSynced:   ipAddressInformer.Informer().HasSynced,
    94  		ipAddressInformer: ipAddressInformer,
    95  		queue: workqueue.NewTypedRateLimitingQueueWithConfig(
    96  			workqueue.DefaultTypedControllerRateLimiter[string](),
    97  			workqueue.TypedRateLimitingQueueConfig[string]{Name: ControllerName},
    98  		),
    99  		internalStopCh: make(chan struct{}),
   100  		tree:           iptree.New[*Allocator](),
   101  		ipFamily:       family,
   102  	}
   103  
   104  	_, _ = serviceCIDRInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   105  		AddFunc:    c.addServiceCIDR,
   106  		UpdateFunc: c.updateServiceCIDR,
   107  		DeleteFunc: c.deleteServiceCIDR,
   108  	})
   109  
   110  	go c.run()
   111  
   112  	return c, nil
   113  }
   114  
   115  func (c *MetaAllocator) addServiceCIDR(obj interface{}) {
   116  	key, err := cache.MetaNamespaceKeyFunc(obj)
   117  	if err == nil {
   118  		c.queue.Add(key)
   119  	}
   120  }
   121  func (c *MetaAllocator) updateServiceCIDR(old, new interface{}) {
   122  	key, err := cache.MetaNamespaceKeyFunc(new)
   123  	if err == nil {
   124  		c.queue.Add(key)
   125  	}
   126  }
   127  
   128  func (c *MetaAllocator) deleteServiceCIDR(obj interface{}) {
   129  	key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
   130  	if err == nil {
   131  		c.queue.Add(key)
   132  	}
   133  }
   134  
   135  func (c *MetaAllocator) run() {
   136  	defer runtime.HandleCrash()
   137  	defer c.queue.ShutDown()
   138  	klog.Info("Starting ServiceCIDR Allocator Controller")
   139  	defer klog.Info("Stopping ServiceCIDR Allocator Controllerr")
   140  
   141  	// Wait for all involved caches to be synced, before processing items from the queue is started
   142  	if !cache.WaitForCacheSync(c.internalStopCh, c.serviceCIDRSynced, c.ipAddressSynced) {
   143  		runtime.HandleError(fmt.Errorf("timed out waiting for caches to sync"))
   144  		return
   145  	}
   146  
   147  	// this is single threaded only one serviceCIDR at a time
   148  	go wait.Until(c.runWorker, time.Second, c.internalStopCh)
   149  
   150  	<-c.internalStopCh
   151  }
   152  
   153  func (c *MetaAllocator) runWorker() {
   154  	for c.processNextItem() {
   155  	}
   156  }
   157  
   158  func (c *MetaAllocator) processNextItem() bool {
   159  	// Wait until there is a new item in the working queue
   160  	key, quit := c.queue.Get()
   161  	if quit {
   162  		return false
   163  	}
   164  	defer c.queue.Done(key)
   165  
   166  	err := c.syncTree()
   167  	// Handle the error if something went wrong during the execution of the business logic
   168  	if err != nil {
   169  		if c.queue.NumRequeues(key) < 5 {
   170  			klog.Infof("Error syncing cidr %v: %v", key, err)
   171  			c.queue.AddRateLimited(key)
   172  			return true
   173  		}
   174  	}
   175  	c.queue.Forget(key)
   176  	return true
   177  }
   178  
   179  // syncTree syncs the ipTrees from the informer cache
   180  // It deletes or creates allocator and sets the corresponding state
   181  func (c *MetaAllocator) syncTree() error {
   182  	now := time.Now()
   183  	defer func() {
   184  		klog.V(2).Infof("Finished sync for CIDRs took %v", time.Since(now))
   185  	}()
   186  
   187  	serviceCIDRs, err := c.serviceCIDRLister.List(labels.Everything())
   188  	if err != nil {
   189  		return err
   190  	}
   191  
   192  	cidrsSet := sets.New[string]()
   193  	cidrReady := map[string]bool{}
   194  	for _, serviceCIDR := range serviceCIDRs {
   195  		ready := true
   196  		if !isReady(serviceCIDR) || !serviceCIDR.DeletionTimestamp.IsZero() {
   197  			ready = false
   198  		}
   199  
   200  		for _, cidr := range serviceCIDR.Spec.CIDRs {
   201  			if c.ipFamily == api.IPFamily(convertToV1IPFamily(netutils.IPFamilyOfCIDRString(cidr))) {
   202  				cidrsSet.Insert(cidr)
   203  				cidrReady[cidr] = ready
   204  			}
   205  		}
   206  	}
   207  
   208  	// obtain the existing allocators and set the existing state
   209  	treeSet := sets.New[string]()
   210  	c.muTree.Lock()
   211  	c.tree.DepthFirstWalk(c.ipFamily == api.IPv6Protocol, func(k netip.Prefix, v *Allocator) bool {
   212  		v.ready.Store(cidrReady[k.String()])
   213  		treeSet.Insert(k.String())
   214  		return false
   215  	})
   216  	c.muTree.Unlock()
   217  	cidrsToRemove := treeSet.Difference(cidrsSet)
   218  	cidrsToAdd := cidrsSet.Difference(treeSet)
   219  
   220  	errs := []error{}
   221  	// Add new allocators
   222  	for _, cidr := range cidrsToAdd.UnsortedList() {
   223  		_, ipnet, err := netutils.ParseCIDRSloppy(cidr)
   224  		if err != nil {
   225  			return err
   226  		}
   227  		// New ServiceCIDR, create new allocator
   228  		allocator, err := NewIPAllocator(ipnet, c.client, c.ipAddressInformer)
   229  		if err != nil {
   230  			errs = append(errs, err)
   231  			continue
   232  		}
   233  		allocator.ready.Store(cidrReady[cidr])
   234  		prefix, err := netip.ParsePrefix(cidr)
   235  		if err != nil {
   236  			return err
   237  		}
   238  		c.addAllocator(prefix, allocator)
   239  		klog.Infof("Created ClusterIP allocator for Service CIDR %s", cidr)
   240  	}
   241  	// Remove allocators that no longer exist
   242  	for _, cidr := range cidrsToRemove.UnsortedList() {
   243  		prefix, err := netip.ParsePrefix(cidr)
   244  		if err != nil {
   245  			return err
   246  		}
   247  		c.deleteAllocator(prefix)
   248  	}
   249  
   250  	return utilerrors.NewAggregate(errs)
   251  }
   252  
   253  func (c *MetaAllocator) getAllocator(ip net.IP) (*Allocator, error) {
   254  	c.muTree.Lock()
   255  	defer c.muTree.Unlock()
   256  
   257  	address := ipToAddr(ip)
   258  	prefix := netip.PrefixFrom(address, address.BitLen())
   259  	// Use the largest subnet to allocate addresses because
   260  	// all the other subnets will be contained.
   261  	_, allocator, ok := c.tree.ShortestPrefixMatch(prefix)
   262  	if !ok {
   263  		klog.V(2).Infof("Could not get allocator for IP %s", ip.String())
   264  		return nil, ErrMismatchedNetwork
   265  	}
   266  	return allocator, nil
   267  }
   268  
   269  func (c *MetaAllocator) addAllocator(cidr netip.Prefix, allocator *Allocator) {
   270  	c.muTree.Lock()
   271  	defer c.muTree.Unlock()
   272  	c.tree.InsertPrefix(cidr, allocator)
   273  }
   274  
   275  func (c *MetaAllocator) deleteAllocator(cidr netip.Prefix) {
   276  	c.muTree.Lock()
   277  	defer c.muTree.Unlock()
   278  	ok := c.tree.DeletePrefix(cidr)
   279  	if ok {
   280  		klog.V(3).Infof("CIDR %s deleted", cidr)
   281  	}
   282  }
   283  
   284  func (c *MetaAllocator) AllocateService(service *api.Service, ip net.IP) error {
   285  	allocator, err := c.getAllocator(ip)
   286  	if err != nil {
   287  		return err
   288  	}
   289  	return allocator.AllocateService(service, ip)
   290  }
   291  
   292  func (c *MetaAllocator) Allocate(ip net.IP) error {
   293  	allocator, err := c.getAllocator(ip)
   294  	if err != nil {
   295  		return err
   296  	}
   297  	return allocator.Allocate(ip)
   298  }
   299  
   300  func (c *MetaAllocator) AllocateNextService(service *api.Service) (net.IP, error) {
   301  	c.muTree.Lock()
   302  	defer c.muTree.Unlock()
   303  
   304  	// TODO(aojea) add strategy to return a random allocator but
   305  	// taking into consideration the number of addresses of each allocator.
   306  	// Per example, if we have allocator A and B with 256 and 1024 possible
   307  	// addresses each, the chances to get B has to be 4 times the chances to
   308  	// get A so we can spread the load of IPs randomly.
   309  	// However, we need to validate the best strategy before going to Beta.
   310  	isIPv6 := c.ipFamily == api.IPFamily(v1.IPv6Protocol)
   311  	for _, allocator := range c.tree.TopLevelPrefixes(isIPv6) {
   312  		ip, err := allocator.AllocateNextService(service)
   313  		if err == nil {
   314  			return ip, nil
   315  		}
   316  	}
   317  	return nil, ErrFull
   318  }
   319  
   320  func (c *MetaAllocator) AllocateNext() (net.IP, error) {
   321  	c.muTree.Lock()
   322  	defer c.muTree.Unlock()
   323  
   324  	// TODO(aojea) add strategy to return a random allocator but
   325  	// taking into consideration the number of addresses of each allocator.
   326  	// Per example, if we have allocator A and B with 256 and 1024 possible
   327  	// addresses each, the chances to get B has to be 4 times the chances to
   328  	// get A so we can spread the load of IPs randomly.
   329  	// However, we need to validate the best strategy before going to Beta.
   330  	isIPv6 := c.ipFamily == api.IPFamily(v1.IPv6Protocol)
   331  	for _, allocator := range c.tree.TopLevelPrefixes(isIPv6) {
   332  		ip, err := allocator.AllocateNext()
   333  		if err == nil {
   334  			return ip, nil
   335  		}
   336  	}
   337  	return nil, ErrFull
   338  }
   339  
   340  func (c *MetaAllocator) Release(ip net.IP) error {
   341  	allocator, err := c.getAllocator(ip)
   342  	if err != nil {
   343  		return err
   344  	}
   345  	return allocator.Release(ip)
   346  
   347  }
   348  func (c *MetaAllocator) ForEach(f func(ip net.IP)) {
   349  	ipLabelSelector := labels.Set(map[string]string{
   350  		networkingv1alpha1.LabelIPAddressFamily: string(c.IPFamily()),
   351  		networkingv1alpha1.LabelManagedBy:       ControllerName,
   352  	}).AsSelectorPreValidated()
   353  	ips, err := c.ipAddressLister.List(ipLabelSelector)
   354  	if err != nil {
   355  		return
   356  	}
   357  	for _, ip := range ips {
   358  		f(netutils.ParseIPSloppy(ip.Name))
   359  	}
   360  }
   361  
   362  func (c *MetaAllocator) CIDR() net.IPNet {
   363  	return net.IPNet{}
   364  
   365  }
   366  func (c *MetaAllocator) IPFamily() api.IPFamily {
   367  	return c.ipFamily
   368  }
   369  func (c *MetaAllocator) Has(ip net.IP) bool {
   370  	allocator, err := c.getAllocator(ip)
   371  	if err != nil {
   372  		return false
   373  	}
   374  	return allocator.Has(ip)
   375  }
   376  func (c *MetaAllocator) Destroy() {
   377  	select {
   378  	case <-c.internalStopCh:
   379  	default:
   380  		close(c.internalStopCh)
   381  	}
   382  }
   383  
   384  // for testing
   385  func (c *MetaAllocator) Used() int {
   386  	ipLabelSelector := labels.Set(map[string]string{
   387  		networkingv1alpha1.LabelIPAddressFamily: string(c.IPFamily()),
   388  		networkingv1alpha1.LabelManagedBy:       ControllerName,
   389  	}).AsSelectorPreValidated()
   390  	ips, err := c.ipAddressLister.List(ipLabelSelector)
   391  	if err != nil {
   392  		return 0
   393  	}
   394  	return len(ips)
   395  }
   396  
   397  // for testing
   398  func (c *MetaAllocator) Free() int {
   399  	c.muTree.Lock()
   400  	defer c.muTree.Unlock()
   401  
   402  	size := 0
   403  	isIPv6 := c.ipFamily == api.IPFamily(v1.IPv6Protocol)
   404  	for _, allocator := range c.tree.TopLevelPrefixes(isIPv6) {
   405  		size += int(allocator.size)
   406  	}
   407  	return size - c.Used()
   408  }
   409  
   410  func (c *MetaAllocator) EnableMetrics() {}
   411  
   412  // DryRun returns a random allocator
   413  func (c *MetaAllocator) DryRun() Interface {
   414  	c.muTree.Lock()
   415  	defer c.muTree.Unlock()
   416  	isIPv6 := c.ipFamily == api.IPFamily(v1.IPv6Protocol)
   417  	for _, allocator := range c.tree.TopLevelPrefixes(isIPv6) {
   418  		return allocator.DryRun()
   419  	}
   420  	return &Allocator{}
   421  }
   422  
   423  func isReady(serviceCIDR *networkingv1alpha1.ServiceCIDR) bool {
   424  	if serviceCIDR == nil {
   425  		return false
   426  	}
   427  
   428  	for _, condition := range serviceCIDR.Status.Conditions {
   429  		if condition.Type == networkingv1alpha1.ServiceCIDRConditionReady {
   430  			return condition.Status == metav1.ConditionStatus(metav1.ConditionTrue)
   431  		}
   432  	}
   433  	// assume the ServiceCIDR is Ready, in order to handle scenarios where kcm is not running
   434  	return true
   435  }
   436  
   437  // ipToAddr converts a net.IP to a netip.Addr
   438  // if the net.IP is not valid it returns an empty netip.Addr{}
   439  func ipToAddr(ip net.IP) netip.Addr {
   440  	// https://pkg.go.dev/net/netip#AddrFromSlice can return an IPv4 in IPv6 format
   441  	// so we have to check the IP family to return exactly the format that we want
   442  	// address, _ := netip.AddrFromSlice(net.ParseIPSloppy(192.168.0.1)) returns
   443  	// an address like ::ffff:192.168.0.1/32
   444  	bytes := ip.To4()
   445  	if bytes == nil {
   446  		bytes = ip.To16()
   447  	}
   448  	// AddrFromSlice returns Addr{}, false if the input is invalid.
   449  	address, _ := netip.AddrFromSlice(bytes)
   450  	return address
   451  }
   452  
   453  // Convert netutils.IPFamily to v1.IPFamily
   454  // TODO: consolidate helpers
   455  // copied from pkg/proxy/util/utils.go
   456  func convertToV1IPFamily(ipFamily netutils.IPFamily) v1.IPFamily {
   457  	switch ipFamily {
   458  	case netutils.IPv4:
   459  		return v1.IPv4Protocol
   460  	case netutils.IPv6:
   461  		return v1.IPv6Protocol
   462  	}
   463  
   464  	return v1.IPFamilyUnknown
   465  }