k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/nodeipam/ipam/cidrset/cidr_set.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package cidrset 18 19 import ( 20 "encoding/binary" 21 "errors" 22 "fmt" 23 "math/big" 24 "math/bits" 25 "net" 26 "sync" 27 ) 28 29 // CidrSet manages a set of CIDR ranges from which blocks of IPs can 30 // be allocated from. 31 type CidrSet struct { 32 sync.Mutex 33 // clusterCIDR is the CIDR assigned to the cluster 34 clusterCIDR *net.IPNet 35 // clusterMaskSize is the mask size, in bits, assigned to the cluster 36 // caches the mask size to avoid the penalty of calling clusterCIDR.Mask.Size() 37 clusterMaskSize int 38 // nodeMask is the network mask assigned to the nodes 39 nodeMask net.IPMask 40 // nodeMaskSize is the mask size, in bits,assigned to the nodes 41 // caches the mask size to avoid the penalty of calling nodeMask.Size() 42 nodeMaskSize int 43 // maxCIDRs is the maximum number of CIDRs that can be allocated 44 maxCIDRs int 45 // allocatedCIDRs counts the number of CIDRs allocated 46 allocatedCIDRs int 47 // nextCandidate points to the next CIDR that should be free 48 nextCandidate int 49 // used is a bitmap used to track the CIDRs allocated 50 used big.Int 51 // label is used to identify the metrics 52 label string 53 } 54 55 const ( 56 // The subnet mask size cannot be greater than 16 more than the cluster mask size 57 // TODO: https://github.com/kubernetes/kubernetes/issues/44918 58 // clusterSubnetMaxDiff limited to 16 due to the uncompressed bitmap 59 // Due to this limitation the subnet mask for IPv6 cluster cidr needs to be >= 48 60 // as default mask size for IPv6 is 64. 61 clusterSubnetMaxDiff = 16 62 // halfIPv6Len is the half of the IPv6 length 63 halfIPv6Len = net.IPv6len / 2 64 ) 65 66 var ( 67 // ErrCIDRRangeNoCIDRsRemaining occurs when there is no more space 68 // to allocate CIDR ranges. 69 ErrCIDRRangeNoCIDRsRemaining = errors.New( 70 "CIDR allocation failed; there are no remaining CIDRs left to allocate in the accepted range") 71 // ErrCIDRSetSubNetTooBig occurs when the subnet mask size is too 72 // big compared to the CIDR mask size. 73 ErrCIDRSetSubNetTooBig = errors.New( 74 "New CIDR set failed; the node CIDR size is too big") 75 ) 76 77 // NewCIDRSet creates a new CidrSet. 78 func NewCIDRSet(clusterCIDR *net.IPNet, subNetMaskSize int) (*CidrSet, error) { 79 clusterMask := clusterCIDR.Mask 80 clusterMaskSize, bits := clusterMask.Size() 81 82 if (clusterCIDR.IP.To4() == nil) && (subNetMaskSize-clusterMaskSize > clusterSubnetMaxDiff) { 83 return nil, ErrCIDRSetSubNetTooBig 84 } 85 86 // register CidrSet metrics 87 registerCidrsetMetrics() 88 89 maxCIDRs := getMaxCIDRs(subNetMaskSize, clusterMaskSize) 90 cidrSet := &CidrSet{ 91 clusterCIDR: clusterCIDR, 92 nodeMask: net.CIDRMask(subNetMaskSize, bits), 93 clusterMaskSize: clusterMaskSize, 94 maxCIDRs: maxCIDRs, 95 nodeMaskSize: subNetMaskSize, 96 label: clusterCIDR.String(), 97 } 98 cidrSetMaxCidrs.WithLabelValues(cidrSet.label).Set(float64(maxCIDRs)) 99 100 return cidrSet, nil 101 } 102 103 func (s *CidrSet) indexToCIDRBlock(index int) *net.IPNet { 104 var ip []byte 105 switch /*v4 or v6*/ { 106 case s.clusterCIDR.IP.To4() != nil: 107 { 108 j := uint32(index) << uint32(32-s.nodeMaskSize) 109 ipInt := (binary.BigEndian.Uint32(s.clusterCIDR.IP)) | j 110 ip = make([]byte, net.IPv4len) 111 binary.BigEndian.PutUint32(ip, ipInt) 112 } 113 case s.clusterCIDR.IP.To16() != nil: 114 { 115 // leftClusterIP | rightClusterIP 116 // 2001:0DB8:1234:0000:0000:0000:0000:0000 117 const v6NBits = 128 118 const halfV6NBits = v6NBits / 2 119 leftClusterIP := binary.BigEndian.Uint64(s.clusterCIDR.IP[:halfIPv6Len]) 120 rightClusterIP := binary.BigEndian.Uint64(s.clusterCIDR.IP[halfIPv6Len:]) 121 122 ip = make([]byte, net.IPv6len) 123 124 if s.nodeMaskSize <= halfV6NBits { 125 // We only care about left side IP 126 leftClusterIP |= uint64(index) << uint(halfV6NBits-s.nodeMaskSize) 127 } else { 128 if s.clusterMaskSize < halfV6NBits { 129 // see how many bits are needed to reach the left side 130 btl := uint(s.nodeMaskSize - halfV6NBits) 131 indexMaxBit := uint(64 - bits.LeadingZeros64(uint64(index))) 132 if indexMaxBit > btl { 133 leftClusterIP |= uint64(index) >> btl 134 } 135 } 136 // the right side will be calculated the same way either the 137 // subNetMaskSize affects both left and right sides 138 rightClusterIP |= uint64(index) << uint(v6NBits-s.nodeMaskSize) 139 } 140 binary.BigEndian.PutUint64(ip[:halfIPv6Len], leftClusterIP) 141 binary.BigEndian.PutUint64(ip[halfIPv6Len:], rightClusterIP) 142 } 143 } 144 return &net.IPNet{ 145 IP: ip, 146 Mask: s.nodeMask, 147 } 148 } 149 150 // AllocateNext allocates the next free CIDR range. This will set the range 151 // as occupied and return the allocated range. 152 func (s *CidrSet) AllocateNext() (*net.IPNet, error) { 153 s.Lock() 154 defer s.Unlock() 155 156 if s.allocatedCIDRs == s.maxCIDRs { 157 return nil, ErrCIDRRangeNoCIDRsRemaining 158 } 159 candidate := s.nextCandidate 160 var i int 161 for i = 0; i < s.maxCIDRs; i++ { 162 if s.used.Bit(candidate) == 0 { 163 break 164 } 165 candidate = (candidate + 1) % s.maxCIDRs 166 } 167 168 s.nextCandidate = (candidate + 1) % s.maxCIDRs 169 s.used.SetBit(&s.used, candidate, 1) 170 s.allocatedCIDRs++ 171 // Update metrics 172 cidrSetAllocations.WithLabelValues(s.label).Inc() 173 cidrSetAllocationTriesPerRequest.WithLabelValues(s.label).Observe(float64(i)) 174 cidrSetUsage.WithLabelValues(s.label).Set(float64(s.allocatedCIDRs) / float64(s.maxCIDRs)) 175 176 return s.indexToCIDRBlock(candidate), nil 177 } 178 179 func (s *CidrSet) getBeginningAndEndIndices(cidr *net.IPNet) (begin, end int, err error) { 180 if cidr == nil { 181 return -1, -1, fmt.Errorf("error getting indices for cluster cidr %v, cidr is nil", s.clusterCIDR) 182 } 183 begin, end = 0, s.maxCIDRs-1 184 cidrMask := cidr.Mask 185 maskSize, _ := cidrMask.Size() 186 var ipSize int 187 188 if !s.clusterCIDR.Contains(cidr.IP.Mask(s.clusterCIDR.Mask)) && !cidr.Contains(s.clusterCIDR.IP.Mask(cidr.Mask)) { 189 return -1, -1, fmt.Errorf("cidr %v is out the range of cluster cidr %v", cidr, s.clusterCIDR) 190 } 191 192 if s.clusterMaskSize < maskSize { 193 194 ipSize = net.IPv4len 195 if cidr.IP.To4() == nil { 196 ipSize = net.IPv6len 197 } 198 begin, err = s.getIndexForIP(cidr.IP.Mask(s.nodeMask)) 199 if err != nil { 200 return -1, -1, err 201 } 202 ip := make([]byte, ipSize) 203 if cidr.IP.To4() != nil { 204 ipInt := binary.BigEndian.Uint32(cidr.IP) | (^binary.BigEndian.Uint32(cidr.Mask)) 205 binary.BigEndian.PutUint32(ip, ipInt) 206 } else { 207 // ipIntLeft | ipIntRight 208 // 2001:0DB8:1234:0000:0000:0000:0000:0000 209 ipIntLeft := binary.BigEndian.Uint64(cidr.IP[:net.IPv6len/2]) | (^binary.BigEndian.Uint64(cidr.Mask[:net.IPv6len/2])) 210 ipIntRight := binary.BigEndian.Uint64(cidr.IP[net.IPv6len/2:]) | (^binary.BigEndian.Uint64(cidr.Mask[net.IPv6len/2:])) 211 binary.BigEndian.PutUint64(ip[:net.IPv6len/2], ipIntLeft) 212 binary.BigEndian.PutUint64(ip[net.IPv6len/2:], ipIntRight) 213 } 214 end, err = s.getIndexForIP(net.IP(ip).Mask(s.nodeMask)) 215 if err != nil { 216 return -1, -1, err 217 } 218 } 219 return begin, end, nil 220 } 221 222 // Release releases the given CIDR range. 223 func (s *CidrSet) Release(cidr *net.IPNet) error { 224 begin, end, err := s.getBeginningAndEndIndices(cidr) 225 if err != nil { 226 return err 227 } 228 s.Lock() 229 defer s.Unlock() 230 for i := begin; i <= end; i++ { 231 // Only change the counters if we change the bit to prevent 232 // double counting. 233 if s.used.Bit(i) != 0 { 234 s.used.SetBit(&s.used, i, 0) 235 s.allocatedCIDRs-- 236 cidrSetReleases.WithLabelValues(s.label).Inc() 237 } 238 } 239 240 cidrSetUsage.WithLabelValues(s.label).Set(float64(s.allocatedCIDRs) / float64(s.maxCIDRs)) 241 return nil 242 } 243 244 // Occupy marks the given CIDR range as used. Occupy succeeds even if the CIDR 245 // range was previously used. 246 func (s *CidrSet) Occupy(cidr *net.IPNet) (err error) { 247 begin, end, err := s.getBeginningAndEndIndices(cidr) 248 if err != nil { 249 return err 250 } 251 s.Lock() 252 defer s.Unlock() 253 for i := begin; i <= end; i++ { 254 // Only change the counters if we change the bit to prevent 255 // double counting. 256 if s.used.Bit(i) == 0 { 257 s.used.SetBit(&s.used, i, 1) 258 s.allocatedCIDRs++ 259 cidrSetAllocations.WithLabelValues(s.label).Inc() 260 } 261 } 262 263 cidrSetUsage.WithLabelValues(s.label).Set(float64(s.allocatedCIDRs) / float64(s.maxCIDRs)) 264 return nil 265 } 266 267 func (s *CidrSet) getIndexForIP(ip net.IP) (int, error) { 268 if ip.To4() != nil { 269 cidrIndex := (binary.BigEndian.Uint32(s.clusterCIDR.IP) ^ binary.BigEndian.Uint32(ip.To4())) >> uint32(32-s.nodeMaskSize) 270 if cidrIndex >= uint32(s.maxCIDRs) { 271 return 0, fmt.Errorf("CIDR: %v/%v is out of the range of CIDR allocator", ip, s.nodeMaskSize) 272 } 273 return int(cidrIndex), nil 274 } 275 if ip.To16() != nil { 276 bigIP := big.NewInt(0).SetBytes(s.clusterCIDR.IP) 277 bigIP = bigIP.Xor(bigIP, big.NewInt(0).SetBytes(ip)) 278 cidrIndexBig := bigIP.Rsh(bigIP, uint(net.IPv6len*8-s.nodeMaskSize)) 279 cidrIndex := cidrIndexBig.Uint64() 280 if cidrIndex >= uint64(s.maxCIDRs) { 281 return 0, fmt.Errorf("CIDR: %v/%v is out of the range of CIDR allocator", ip, s.nodeMaskSize) 282 } 283 return int(cidrIndex), nil 284 } 285 286 return 0, fmt.Errorf("invalid IP: %v", ip) 287 } 288 289 // getMaxCIDRs returns the max number of CIDRs that can be obtained by subdividing a mask of size `clusterMaskSize` 290 // into subnets with mask of size `subNetMaskSize`. 291 func getMaxCIDRs(subNetMaskSize, clusterMaskSize int) int { 292 return 1 << uint32(subNetMaskSize-clusterMaskSize) 293 }