k8s.io/kubernetes@v1.29.3/pkg/controller/nodeipam/ipam/multicidrset/multi_cidr_set.go (about)

     1  /*
     2  Copyright 2022 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package multicidrset
    18  
    19  import (
    20  	"encoding/binary"
    21  	"fmt"
    22  	"math/big"
    23  	"math/bits"
    24  	"net"
    25  	"sync"
    26  
    27  	netutils "k8s.io/utils/net"
    28  )
    29  
    30  // MultiCIDRSet manages a set of CIDR ranges from which blocks of IPs can
    31  // be allocated from.
    32  type MultiCIDRSet struct {
    33  	sync.Mutex
    34  	// ClusterCIDR is the CIDR assigned to the cluster.
    35  	ClusterCIDR *net.IPNet
    36  	// NodeMaskSize is the mask size, in bits,assigned to the nodes
    37  	// caches the mask size to avoid the penalty of calling nodeMask.Size().
    38  	NodeMaskSize int
    39  	// MaxCIDRs is the maximum number of CIDRs that can be allocated.
    40  	MaxCIDRs int
    41  	// Label stores the CIDR in a string, it is used to identify the metrics such
    42  	// as Number of allocations, Total number of CIDR releases, Percentage of
    43  	// allocated CIDRs, Tries required for allocating a CIDR for a particular CIDRSet.
    44  	Label string
    45  	// AllocatedCIDRMap stores all the allocated CIDRs from the current CIDRSet.
    46  	// Stores a mapping of the next candidate CIDR for allocation to it's
    47  	// allocation status. Next candidate is used only if allocation status is false.
    48  	AllocatedCIDRMap map[string]bool
    49  
    50  	// clusterMaskSize is the mask size, in bits, assigned to the cluster.
    51  	// caches the mask size to avoid the penalty of calling clusterCIDR.Mask.Size().
    52  	clusterMaskSize int
    53  	// nodeMask is the network mask assigned to the nodes.
    54  	nodeMask net.IPMask
    55  	// allocatedCIDRs counts the number of CIDRs allocated.
    56  	allocatedCIDRs int
    57  	// nextCandidate points to the next CIDR that should be free.
    58  	nextCandidate int
    59  }
    60  
    61  // ClusterCIDR is an internal representation of the ClusterCIDR API object.
    62  type ClusterCIDR struct {
    63  	// Name of the associated ClusterCIDR API object.
    64  	Name string
    65  	// IPv4CIDRSet is the MultiCIDRSet representation of ClusterCIDR.spec.ipv4
    66  	// of the associated ClusterCIDR API object.
    67  	IPv4CIDRSet *MultiCIDRSet
    68  	// IPv6CIDRSet is the MultiCIDRSet representation of ClusterCIDR.spec.ipv6
    69  	// of the associated ClusterCIDR API object.
    70  	IPv6CIDRSet *MultiCIDRSet
    71  	// AssociatedNodes is used to identify which nodes have CIDRs allocated from this ClusterCIDR.
    72  	// Stores a mapping of node name to association status.
    73  	AssociatedNodes map[string]bool
    74  	// Terminating is used to identify whether ClusterCIDR has been marked for termination.
    75  	Terminating bool
    76  }
    77  
    78  const (
    79  	// The subnet mask size cannot be greater than 16 more than the cluster mask size
    80  	// TODO: https://github.com/kubernetes/kubernetes/issues/44918
    81  	// clusterSubnetMaxDiff limited to 16 due to the uncompressed bitmap.
    82  	// Due to this limitation the subnet mask for IPv6 cluster cidr needs to be >= 48
    83  	// as default mask size for IPv6 is 64.
    84  	clusterSubnetMaxDiff = 16
    85  	// halfIPv6Len is the half of the IPv6 length.
    86  	halfIPv6Len = net.IPv6len / 2
    87  )
    88  
    89  // CIDRRangeNoCIDRsRemainingErr is an error type used to denote there is no more
    90  // space to allocate CIDR ranges from the given CIDR.
    91  type CIDRRangeNoCIDRsRemainingErr struct {
    92  	// CIDR represents the CIDR which is exhausted.
    93  	CIDR string
    94  }
    95  
    96  func (err *CIDRRangeNoCIDRsRemainingErr) Error() string {
    97  	return fmt.Sprintf("CIDR allocation failed; there are no remaining CIDRs left to allocate in the range %s", err.CIDR)
    98  }
    99  
   100  // CIDRSetSubNetTooBigErr is an error type to denote that subnet mask size is too
   101  // big compared to the CIDR mask size.
   102  type CIDRSetSubNetTooBigErr struct {
   103  	cidr            string
   104  	subnetMaskSize  int
   105  	clusterMaskSize int
   106  }
   107  
   108  func (err *CIDRSetSubNetTooBigErr) Error() string {
   109  	return fmt.Sprintf("Creation of New CIDR Set failed for %s. "+
   110  		"PerNodeMaskSize %d is too big for CIDR Mask %d, Maximum difference allowed "+
   111  		"is %d", err.cidr, err.subnetMaskSize, err.clusterMaskSize, clusterSubnetMaxDiff)
   112  }
   113  
   114  // NewMultiCIDRSet creates a new MultiCIDRSet.
   115  func NewMultiCIDRSet(cidrConfig *net.IPNet, perNodeHostBits int) (*MultiCIDRSet, error) {
   116  	clusterMask := cidrConfig.Mask
   117  	clusterMaskSize, bits := clusterMask.Size()
   118  
   119  	var subNetMaskSize int
   120  	switch /*v4 or v6*/ {
   121  	case netutils.IsIPv4(cidrConfig.IP):
   122  		subNetMaskSize = 32 - perNodeHostBits
   123  	case netutils.IsIPv6(cidrConfig.IP):
   124  		subNetMaskSize = 128 - perNodeHostBits
   125  	}
   126  
   127  	if netutils.IsIPv6(cidrConfig.IP) && (subNetMaskSize-clusterMaskSize > clusterSubnetMaxDiff) {
   128  		return nil, &CIDRSetSubNetTooBigErr{
   129  			cidr:            cidrConfig.String(),
   130  			subnetMaskSize:  subNetMaskSize,
   131  			clusterMaskSize: clusterMaskSize,
   132  		}
   133  	}
   134  
   135  	// Register MultiCIDRSet metrics.
   136  	registerCidrsetMetrics()
   137  
   138  	maxCIDRs := getMaxCIDRs(subNetMaskSize, clusterMaskSize)
   139  	multiCIDRSet := &MultiCIDRSet{
   140  		ClusterCIDR:      cidrConfig,
   141  		nodeMask:         net.CIDRMask(subNetMaskSize, bits),
   142  		clusterMaskSize:  clusterMaskSize,
   143  		MaxCIDRs:         maxCIDRs,
   144  		NodeMaskSize:     subNetMaskSize,
   145  		Label:            cidrConfig.String(),
   146  		AllocatedCIDRMap: make(map[string]bool, 0),
   147  	}
   148  	cidrSetMaxCidrs.WithLabelValues(multiCIDRSet.Label).Set(float64(maxCIDRs))
   149  
   150  	return multiCIDRSet, nil
   151  }
   152  
   153  func (s *MultiCIDRSet) indexToCIDRBlock(index int) (*net.IPNet, error) {
   154  	var ip []byte
   155  	switch /*v4 or v6*/ {
   156  	case netutils.IsIPv4(s.ClusterCIDR.IP):
   157  		j := uint32(index) << uint32(32-s.NodeMaskSize)
   158  		ipInt := (binary.BigEndian.Uint32(s.ClusterCIDR.IP)) | j
   159  		ip = make([]byte, net.IPv4len)
   160  		binary.BigEndian.PutUint32(ip, ipInt)
   161  	case netutils.IsIPv6(s.ClusterCIDR.IP):
   162  		// leftClusterIP      |     rightClusterIP
   163  		// 2001:0DB8:1234:0000:0000:0000:0000:0000
   164  		const v6NBits = 128
   165  		const halfV6NBits = v6NBits / 2
   166  		leftClusterIP := binary.BigEndian.Uint64(s.ClusterCIDR.IP[:halfIPv6Len])
   167  		rightClusterIP := binary.BigEndian.Uint64(s.ClusterCIDR.IP[halfIPv6Len:])
   168  
   169  		ip = make([]byte, net.IPv6len)
   170  
   171  		if s.NodeMaskSize <= halfV6NBits {
   172  			// We only care about left side IP.
   173  			leftClusterIP |= uint64(index) << uint(halfV6NBits-s.NodeMaskSize)
   174  		} else {
   175  			if s.clusterMaskSize < halfV6NBits {
   176  				// see how many bits are needed to reach the left side.
   177  				btl := uint(s.NodeMaskSize - halfV6NBits)
   178  				indexMaxBit := uint(64 - bits.LeadingZeros64(uint64(index)))
   179  				if indexMaxBit > btl {
   180  					leftClusterIP |= uint64(index) >> btl
   181  				}
   182  			}
   183  			// the right side will be calculated the same way either the
   184  			// subNetMaskSize affects both left and right sides.
   185  			rightClusterIP |= uint64(index) << uint(v6NBits-s.NodeMaskSize)
   186  		}
   187  		binary.BigEndian.PutUint64(ip[:halfIPv6Len], leftClusterIP)
   188  		binary.BigEndian.PutUint64(ip[halfIPv6Len:], rightClusterIP)
   189  	default:
   190  		return nil, fmt.Errorf("invalid IP: %s", s.ClusterCIDR.IP)
   191  	}
   192  	return &net.IPNet{
   193  		IP:   ip,
   194  		Mask: s.nodeMask,
   195  	}, nil
   196  }
   197  
   198  // NextCandidate returns the next candidate and the last evaluated index
   199  // for the current cidrSet. Returns nil if the candidate is already allocated.
   200  func (s *MultiCIDRSet) NextCandidate() (*net.IPNet, int, error) {
   201  	s.Lock()
   202  	defer s.Unlock()
   203  
   204  	if s.allocatedCIDRs == s.MaxCIDRs {
   205  		return nil, 0, &CIDRRangeNoCIDRsRemainingErr{
   206  			CIDR: s.Label,
   207  		}
   208  	}
   209  
   210  	candidate := s.nextCandidate
   211  	for i := 0; i < s.MaxCIDRs; i++ {
   212  		nextCandidateCIDR, err := s.indexToCIDRBlock(candidate)
   213  		if err != nil {
   214  			return nil, i, err
   215  		}
   216  		// Check if the nextCandidate is not already allocated.
   217  		if _, ok := s.AllocatedCIDRMap[nextCandidateCIDR.String()]; !ok {
   218  			s.nextCandidate = (candidate + 1) % s.MaxCIDRs
   219  			return nextCandidateCIDR, i, nil
   220  		}
   221  		candidate = (candidate + 1) % s.MaxCIDRs
   222  	}
   223  
   224  	return nil, s.MaxCIDRs, &CIDRRangeNoCIDRsRemainingErr{
   225  		CIDR: s.Label,
   226  	}
   227  }
   228  
   229  // getBeginningAndEndIndices returns the indices for the given CIDR, returned
   230  // values are inclusive indices [beginning, end].
   231  func (s *MultiCIDRSet) getBeginningAndEndIndices(cidr *net.IPNet) (int, int, error) {
   232  	if cidr == nil {
   233  		return -1, -1, fmt.Errorf("error getting indices for cluster cidr %v, cidr is nil", s.ClusterCIDR)
   234  	}
   235  	begin, end := 0, s.MaxCIDRs-1
   236  	cidrMask := cidr.Mask
   237  	maskSize, _ := cidrMask.Size()
   238  	var ipSize int
   239  
   240  	if !s.ClusterCIDR.Contains(cidr.IP.Mask(s.ClusterCIDR.Mask)) && !cidr.Contains(s.ClusterCIDR.IP.Mask(cidr.Mask)) {
   241  		return -1, -1, fmt.Errorf("cidr %v is out the range of cluster cidr %v", cidr, s.ClusterCIDR)
   242  	}
   243  
   244  	if s.clusterMaskSize < maskSize {
   245  		var err error
   246  		ipSize = net.IPv4len
   247  		if netutils.IsIPv6(cidr.IP) {
   248  			ipSize = net.IPv6len
   249  		}
   250  		begin, err = s.getIndexForIP(cidr.IP.Mask(s.nodeMask))
   251  		if err != nil {
   252  			return -1, -1, err
   253  		}
   254  		ip := make([]byte, ipSize)
   255  		if netutils.IsIPv4(cidr.IP) {
   256  			ipInt := binary.BigEndian.Uint32(cidr.IP) | (^binary.BigEndian.Uint32(cidr.Mask))
   257  			binary.BigEndian.PutUint32(ip, ipInt)
   258  		} else {
   259  			// ipIntLeft          |         ipIntRight
   260  			// 2001:0DB8:1234:0000:0000:0000:0000:0000
   261  			ipIntLeft := binary.BigEndian.Uint64(cidr.IP[:net.IPv6len/2]) | (^binary.BigEndian.Uint64(cidr.Mask[:net.IPv6len/2]))
   262  			ipIntRight := binary.BigEndian.Uint64(cidr.IP[net.IPv6len/2:]) | (^binary.BigEndian.Uint64(cidr.Mask[net.IPv6len/2:]))
   263  			binary.BigEndian.PutUint64(ip[:net.IPv6len/2], ipIntLeft)
   264  			binary.BigEndian.PutUint64(ip[net.IPv6len/2:], ipIntRight)
   265  		}
   266  		end, err = s.getIndexForIP(net.IP(ip).Mask(s.nodeMask))
   267  		if err != nil {
   268  			return -1, -1, err
   269  		}
   270  	}
   271  	return begin, end, nil
   272  }
   273  
   274  // Release releases the given CIDR range.
   275  func (s *MultiCIDRSet) Release(cidr *net.IPNet) error {
   276  	begin, end, err := s.getBeginningAndEndIndices(cidr)
   277  	if err != nil {
   278  		return err
   279  	}
   280  	s.Lock()
   281  	defer s.Unlock()
   282  
   283  	for i := begin; i <= end; i++ {
   284  		// Remove from the allocated CIDR Map and decrement the counter only if currently
   285  		// marked allocated. Avoids double counting.
   286  		currCIDR, err := s.indexToCIDRBlock(i)
   287  		if err != nil {
   288  			return err
   289  		}
   290  		if _, ok := s.AllocatedCIDRMap[currCIDR.String()]; ok {
   291  			delete(s.AllocatedCIDRMap, currCIDR.String())
   292  			s.allocatedCIDRs--
   293  			cidrSetReleases.WithLabelValues(s.Label).Inc()
   294  		}
   295  	}
   296  
   297  	cidrSetUsage.WithLabelValues(s.Label).Set(float64(s.allocatedCIDRs) / float64(s.MaxCIDRs))
   298  
   299  	return nil
   300  }
   301  
   302  // Occupy marks the given CIDR range as used. Occupy succeeds even if the CIDR
   303  // range was previously used.
   304  func (s *MultiCIDRSet) Occupy(cidr *net.IPNet) (err error) {
   305  	begin, end, err := s.getBeginningAndEndIndices(cidr)
   306  	if err != nil {
   307  		return err
   308  	}
   309  	s.Lock()
   310  	defer s.Unlock()
   311  
   312  	for i := begin; i <= end; i++ {
   313  		// Add to the allocated CIDR Map and increment the counter only if not already
   314  		// marked allocated. Prevents double counting.
   315  		currCIDR, err := s.indexToCIDRBlock(i)
   316  		if err != nil {
   317  			return err
   318  		}
   319  		if _, ok := s.AllocatedCIDRMap[currCIDR.String()]; !ok {
   320  			s.AllocatedCIDRMap[currCIDR.String()] = true
   321  			cidrSetAllocations.WithLabelValues(s.Label).Inc()
   322  			s.allocatedCIDRs++
   323  		}
   324  	}
   325  	cidrSetUsage.WithLabelValues(s.Label).Set(float64(s.allocatedCIDRs) / float64(s.MaxCIDRs))
   326  
   327  	return nil
   328  }
   329  
   330  func (s *MultiCIDRSet) getIndexForIP(ip net.IP) (int, error) {
   331  	if ip.To4() != nil {
   332  		cidrIndex := (binary.BigEndian.Uint32(s.ClusterCIDR.IP) ^ binary.BigEndian.Uint32(ip.To4())) >> uint32(32-s.NodeMaskSize)
   333  		if cidrIndex >= uint32(s.MaxCIDRs) {
   334  			return 0, fmt.Errorf("CIDR: %v/%v is out of the range of CIDR allocator", ip, s.NodeMaskSize)
   335  		}
   336  		return int(cidrIndex), nil
   337  	}
   338  	if netutils.IsIPv6(ip) {
   339  		bigIP := big.NewInt(0).SetBytes(s.ClusterCIDR.IP)
   340  		bigIP = bigIP.Xor(bigIP, big.NewInt(0).SetBytes(ip))
   341  		cidrIndexBig := bigIP.Rsh(bigIP, uint(net.IPv6len*8-s.NodeMaskSize))
   342  		cidrIndex := cidrIndexBig.Uint64()
   343  		if cidrIndex >= uint64(s.MaxCIDRs) {
   344  			return 0, fmt.Errorf("CIDR: %v/%v is out of the range of CIDR allocator", ip, s.NodeMaskSize)
   345  		}
   346  		return int(cidrIndex), nil
   347  	}
   348  
   349  	return 0, fmt.Errorf("invalid IP: %v", ip)
   350  }
   351  
   352  // UpdateEvaluatedCount increments the evaluated count.
   353  func (s *MultiCIDRSet) UpdateEvaluatedCount(evaluated int) {
   354  	cidrSetAllocationTriesPerRequest.WithLabelValues(s.Label).Observe(float64(evaluated))
   355  }
   356  
   357  // getMaxCIDRs returns the max number of CIDRs that can be obtained by subdividing a mask of size `clusterMaskSize`
   358  // into subnets with mask of size `subNetMaskSize`.
   359  func getMaxCIDRs(subNetMaskSize, clusterMaskSize int) int {
   360  	return 1 << uint32(subNetMaskSize-clusterMaskSize)
   361  }