k8s.io/kubernetes@v1.29.3/pkg/controller/nodeipam/ipam/multicidrset/multi_cidr_set.go (about) 1 /* 2 Copyright 2022 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package multicidrset 18 19 import ( 20 "encoding/binary" 21 "fmt" 22 "math/big" 23 "math/bits" 24 "net" 25 "sync" 26 27 netutils "k8s.io/utils/net" 28 ) 29 30 // MultiCIDRSet manages a set of CIDR ranges from which blocks of IPs can 31 // be allocated from. 32 type MultiCIDRSet struct { 33 sync.Mutex 34 // ClusterCIDR is the CIDR assigned to the cluster. 35 ClusterCIDR *net.IPNet 36 // NodeMaskSize is the mask size, in bits,assigned to the nodes 37 // caches the mask size to avoid the penalty of calling nodeMask.Size(). 38 NodeMaskSize int 39 // MaxCIDRs is the maximum number of CIDRs that can be allocated. 40 MaxCIDRs int 41 // Label stores the CIDR in a string, it is used to identify the metrics such 42 // as Number of allocations, Total number of CIDR releases, Percentage of 43 // allocated CIDRs, Tries required for allocating a CIDR for a particular CIDRSet. 44 Label string 45 // AllocatedCIDRMap stores all the allocated CIDRs from the current CIDRSet. 46 // Stores a mapping of the next candidate CIDR for allocation to it's 47 // allocation status. Next candidate is used only if allocation status is false. 48 AllocatedCIDRMap map[string]bool 49 50 // clusterMaskSize is the mask size, in bits, assigned to the cluster. 51 // caches the mask size to avoid the penalty of calling clusterCIDR.Mask.Size(). 52 clusterMaskSize int 53 // nodeMask is the network mask assigned to the nodes. 54 nodeMask net.IPMask 55 // allocatedCIDRs counts the number of CIDRs allocated. 56 allocatedCIDRs int 57 // nextCandidate points to the next CIDR that should be free. 58 nextCandidate int 59 } 60 61 // ClusterCIDR is an internal representation of the ClusterCIDR API object. 62 type ClusterCIDR struct { 63 // Name of the associated ClusterCIDR API object. 64 Name string 65 // IPv4CIDRSet is the MultiCIDRSet representation of ClusterCIDR.spec.ipv4 66 // of the associated ClusterCIDR API object. 67 IPv4CIDRSet *MultiCIDRSet 68 // IPv6CIDRSet is the MultiCIDRSet representation of ClusterCIDR.spec.ipv6 69 // of the associated ClusterCIDR API object. 70 IPv6CIDRSet *MultiCIDRSet 71 // AssociatedNodes is used to identify which nodes have CIDRs allocated from this ClusterCIDR. 72 // Stores a mapping of node name to association status. 73 AssociatedNodes map[string]bool 74 // Terminating is used to identify whether ClusterCIDR has been marked for termination. 75 Terminating bool 76 } 77 78 const ( 79 // The subnet mask size cannot be greater than 16 more than the cluster mask size 80 // TODO: https://github.com/kubernetes/kubernetes/issues/44918 81 // clusterSubnetMaxDiff limited to 16 due to the uncompressed bitmap. 82 // Due to this limitation the subnet mask for IPv6 cluster cidr needs to be >= 48 83 // as default mask size for IPv6 is 64. 84 clusterSubnetMaxDiff = 16 85 // halfIPv6Len is the half of the IPv6 length. 86 halfIPv6Len = net.IPv6len / 2 87 ) 88 89 // CIDRRangeNoCIDRsRemainingErr is an error type used to denote there is no more 90 // space to allocate CIDR ranges from the given CIDR. 91 type CIDRRangeNoCIDRsRemainingErr struct { 92 // CIDR represents the CIDR which is exhausted. 93 CIDR string 94 } 95 96 func (err *CIDRRangeNoCIDRsRemainingErr) Error() string { 97 return fmt.Sprintf("CIDR allocation failed; there are no remaining CIDRs left to allocate in the range %s", err.CIDR) 98 } 99 100 // CIDRSetSubNetTooBigErr is an error type to denote that subnet mask size is too 101 // big compared to the CIDR mask size. 102 type CIDRSetSubNetTooBigErr struct { 103 cidr string 104 subnetMaskSize int 105 clusterMaskSize int 106 } 107 108 func (err *CIDRSetSubNetTooBigErr) Error() string { 109 return fmt.Sprintf("Creation of New CIDR Set failed for %s. "+ 110 "PerNodeMaskSize %d is too big for CIDR Mask %d, Maximum difference allowed "+ 111 "is %d", err.cidr, err.subnetMaskSize, err.clusterMaskSize, clusterSubnetMaxDiff) 112 } 113 114 // NewMultiCIDRSet creates a new MultiCIDRSet. 115 func NewMultiCIDRSet(cidrConfig *net.IPNet, perNodeHostBits int) (*MultiCIDRSet, error) { 116 clusterMask := cidrConfig.Mask 117 clusterMaskSize, bits := clusterMask.Size() 118 119 var subNetMaskSize int 120 switch /*v4 or v6*/ { 121 case netutils.IsIPv4(cidrConfig.IP): 122 subNetMaskSize = 32 - perNodeHostBits 123 case netutils.IsIPv6(cidrConfig.IP): 124 subNetMaskSize = 128 - perNodeHostBits 125 } 126 127 if netutils.IsIPv6(cidrConfig.IP) && (subNetMaskSize-clusterMaskSize > clusterSubnetMaxDiff) { 128 return nil, &CIDRSetSubNetTooBigErr{ 129 cidr: cidrConfig.String(), 130 subnetMaskSize: subNetMaskSize, 131 clusterMaskSize: clusterMaskSize, 132 } 133 } 134 135 // Register MultiCIDRSet metrics. 136 registerCidrsetMetrics() 137 138 maxCIDRs := getMaxCIDRs(subNetMaskSize, clusterMaskSize) 139 multiCIDRSet := &MultiCIDRSet{ 140 ClusterCIDR: cidrConfig, 141 nodeMask: net.CIDRMask(subNetMaskSize, bits), 142 clusterMaskSize: clusterMaskSize, 143 MaxCIDRs: maxCIDRs, 144 NodeMaskSize: subNetMaskSize, 145 Label: cidrConfig.String(), 146 AllocatedCIDRMap: make(map[string]bool, 0), 147 } 148 cidrSetMaxCidrs.WithLabelValues(multiCIDRSet.Label).Set(float64(maxCIDRs)) 149 150 return multiCIDRSet, nil 151 } 152 153 func (s *MultiCIDRSet) indexToCIDRBlock(index int) (*net.IPNet, error) { 154 var ip []byte 155 switch /*v4 or v6*/ { 156 case netutils.IsIPv4(s.ClusterCIDR.IP): 157 j := uint32(index) << uint32(32-s.NodeMaskSize) 158 ipInt := (binary.BigEndian.Uint32(s.ClusterCIDR.IP)) | j 159 ip = make([]byte, net.IPv4len) 160 binary.BigEndian.PutUint32(ip, ipInt) 161 case netutils.IsIPv6(s.ClusterCIDR.IP): 162 // leftClusterIP | rightClusterIP 163 // 2001:0DB8:1234:0000:0000:0000:0000:0000 164 const v6NBits = 128 165 const halfV6NBits = v6NBits / 2 166 leftClusterIP := binary.BigEndian.Uint64(s.ClusterCIDR.IP[:halfIPv6Len]) 167 rightClusterIP := binary.BigEndian.Uint64(s.ClusterCIDR.IP[halfIPv6Len:]) 168 169 ip = make([]byte, net.IPv6len) 170 171 if s.NodeMaskSize <= halfV6NBits { 172 // We only care about left side IP. 173 leftClusterIP |= uint64(index) << uint(halfV6NBits-s.NodeMaskSize) 174 } else { 175 if s.clusterMaskSize < halfV6NBits { 176 // see how many bits are needed to reach the left side. 177 btl := uint(s.NodeMaskSize - halfV6NBits) 178 indexMaxBit := uint(64 - bits.LeadingZeros64(uint64(index))) 179 if indexMaxBit > btl { 180 leftClusterIP |= uint64(index) >> btl 181 } 182 } 183 // the right side will be calculated the same way either the 184 // subNetMaskSize affects both left and right sides. 185 rightClusterIP |= uint64(index) << uint(v6NBits-s.NodeMaskSize) 186 } 187 binary.BigEndian.PutUint64(ip[:halfIPv6Len], leftClusterIP) 188 binary.BigEndian.PutUint64(ip[halfIPv6Len:], rightClusterIP) 189 default: 190 return nil, fmt.Errorf("invalid IP: %s", s.ClusterCIDR.IP) 191 } 192 return &net.IPNet{ 193 IP: ip, 194 Mask: s.nodeMask, 195 }, nil 196 } 197 198 // NextCandidate returns the next candidate and the last evaluated index 199 // for the current cidrSet. Returns nil if the candidate is already allocated. 200 func (s *MultiCIDRSet) NextCandidate() (*net.IPNet, int, error) { 201 s.Lock() 202 defer s.Unlock() 203 204 if s.allocatedCIDRs == s.MaxCIDRs { 205 return nil, 0, &CIDRRangeNoCIDRsRemainingErr{ 206 CIDR: s.Label, 207 } 208 } 209 210 candidate := s.nextCandidate 211 for i := 0; i < s.MaxCIDRs; i++ { 212 nextCandidateCIDR, err := s.indexToCIDRBlock(candidate) 213 if err != nil { 214 return nil, i, err 215 } 216 // Check if the nextCandidate is not already allocated. 217 if _, ok := s.AllocatedCIDRMap[nextCandidateCIDR.String()]; !ok { 218 s.nextCandidate = (candidate + 1) % s.MaxCIDRs 219 return nextCandidateCIDR, i, nil 220 } 221 candidate = (candidate + 1) % s.MaxCIDRs 222 } 223 224 return nil, s.MaxCIDRs, &CIDRRangeNoCIDRsRemainingErr{ 225 CIDR: s.Label, 226 } 227 } 228 229 // getBeginningAndEndIndices returns the indices for the given CIDR, returned 230 // values are inclusive indices [beginning, end]. 231 func (s *MultiCIDRSet) getBeginningAndEndIndices(cidr *net.IPNet) (int, int, error) { 232 if cidr == nil { 233 return -1, -1, fmt.Errorf("error getting indices for cluster cidr %v, cidr is nil", s.ClusterCIDR) 234 } 235 begin, end := 0, s.MaxCIDRs-1 236 cidrMask := cidr.Mask 237 maskSize, _ := cidrMask.Size() 238 var ipSize int 239 240 if !s.ClusterCIDR.Contains(cidr.IP.Mask(s.ClusterCIDR.Mask)) && !cidr.Contains(s.ClusterCIDR.IP.Mask(cidr.Mask)) { 241 return -1, -1, fmt.Errorf("cidr %v is out the range of cluster cidr %v", cidr, s.ClusterCIDR) 242 } 243 244 if s.clusterMaskSize < maskSize { 245 var err error 246 ipSize = net.IPv4len 247 if netutils.IsIPv6(cidr.IP) { 248 ipSize = net.IPv6len 249 } 250 begin, err = s.getIndexForIP(cidr.IP.Mask(s.nodeMask)) 251 if err != nil { 252 return -1, -1, err 253 } 254 ip := make([]byte, ipSize) 255 if netutils.IsIPv4(cidr.IP) { 256 ipInt := binary.BigEndian.Uint32(cidr.IP) | (^binary.BigEndian.Uint32(cidr.Mask)) 257 binary.BigEndian.PutUint32(ip, ipInt) 258 } else { 259 // ipIntLeft | ipIntRight 260 // 2001:0DB8:1234:0000:0000:0000:0000:0000 261 ipIntLeft := binary.BigEndian.Uint64(cidr.IP[:net.IPv6len/2]) | (^binary.BigEndian.Uint64(cidr.Mask[:net.IPv6len/2])) 262 ipIntRight := binary.BigEndian.Uint64(cidr.IP[net.IPv6len/2:]) | (^binary.BigEndian.Uint64(cidr.Mask[net.IPv6len/2:])) 263 binary.BigEndian.PutUint64(ip[:net.IPv6len/2], ipIntLeft) 264 binary.BigEndian.PutUint64(ip[net.IPv6len/2:], ipIntRight) 265 } 266 end, err = s.getIndexForIP(net.IP(ip).Mask(s.nodeMask)) 267 if err != nil { 268 return -1, -1, err 269 } 270 } 271 return begin, end, nil 272 } 273 274 // Release releases the given CIDR range. 275 func (s *MultiCIDRSet) Release(cidr *net.IPNet) error { 276 begin, end, err := s.getBeginningAndEndIndices(cidr) 277 if err != nil { 278 return err 279 } 280 s.Lock() 281 defer s.Unlock() 282 283 for i := begin; i <= end; i++ { 284 // Remove from the allocated CIDR Map and decrement the counter only if currently 285 // marked allocated. Avoids double counting. 286 currCIDR, err := s.indexToCIDRBlock(i) 287 if err != nil { 288 return err 289 } 290 if _, ok := s.AllocatedCIDRMap[currCIDR.String()]; ok { 291 delete(s.AllocatedCIDRMap, currCIDR.String()) 292 s.allocatedCIDRs-- 293 cidrSetReleases.WithLabelValues(s.Label).Inc() 294 } 295 } 296 297 cidrSetUsage.WithLabelValues(s.Label).Set(float64(s.allocatedCIDRs) / float64(s.MaxCIDRs)) 298 299 return nil 300 } 301 302 // Occupy marks the given CIDR range as used. Occupy succeeds even if the CIDR 303 // range was previously used. 304 func (s *MultiCIDRSet) Occupy(cidr *net.IPNet) (err error) { 305 begin, end, err := s.getBeginningAndEndIndices(cidr) 306 if err != nil { 307 return err 308 } 309 s.Lock() 310 defer s.Unlock() 311 312 for i := begin; i <= end; i++ { 313 // Add to the allocated CIDR Map and increment the counter only if not already 314 // marked allocated. Prevents double counting. 315 currCIDR, err := s.indexToCIDRBlock(i) 316 if err != nil { 317 return err 318 } 319 if _, ok := s.AllocatedCIDRMap[currCIDR.String()]; !ok { 320 s.AllocatedCIDRMap[currCIDR.String()] = true 321 cidrSetAllocations.WithLabelValues(s.Label).Inc() 322 s.allocatedCIDRs++ 323 } 324 } 325 cidrSetUsage.WithLabelValues(s.Label).Set(float64(s.allocatedCIDRs) / float64(s.MaxCIDRs)) 326 327 return nil 328 } 329 330 func (s *MultiCIDRSet) getIndexForIP(ip net.IP) (int, error) { 331 if ip.To4() != nil { 332 cidrIndex := (binary.BigEndian.Uint32(s.ClusterCIDR.IP) ^ binary.BigEndian.Uint32(ip.To4())) >> uint32(32-s.NodeMaskSize) 333 if cidrIndex >= uint32(s.MaxCIDRs) { 334 return 0, fmt.Errorf("CIDR: %v/%v is out of the range of CIDR allocator", ip, s.NodeMaskSize) 335 } 336 return int(cidrIndex), nil 337 } 338 if netutils.IsIPv6(ip) { 339 bigIP := big.NewInt(0).SetBytes(s.ClusterCIDR.IP) 340 bigIP = bigIP.Xor(bigIP, big.NewInt(0).SetBytes(ip)) 341 cidrIndexBig := bigIP.Rsh(bigIP, uint(net.IPv6len*8-s.NodeMaskSize)) 342 cidrIndex := cidrIndexBig.Uint64() 343 if cidrIndex >= uint64(s.MaxCIDRs) { 344 return 0, fmt.Errorf("CIDR: %v/%v is out of the range of CIDR allocator", ip, s.NodeMaskSize) 345 } 346 return int(cidrIndex), nil 347 } 348 349 return 0, fmt.Errorf("invalid IP: %v", ip) 350 } 351 352 // UpdateEvaluatedCount increments the evaluated count. 353 func (s *MultiCIDRSet) UpdateEvaluatedCount(evaluated int) { 354 cidrSetAllocationTriesPerRequest.WithLabelValues(s.Label).Observe(float64(evaluated)) 355 } 356 357 // getMaxCIDRs returns the max number of CIDRs that can be obtained by subdividing a mask of size `clusterMaskSize` 358 // into subnets with mask of size `subNetMaskSize`. 359 func getMaxCIDRs(subNetMaskSize, clusterMaskSize int) int { 360 return 1 << uint32(subNetMaskSize-clusterMaskSize) 361 }