github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/tcpip/ports/ports.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package ports provides PortManager that manages allocating, reserving and 16 // releasing ports. 17 package ports 18 19 import ( 20 "math" 21 22 "github.com/metacubex/gvisor/pkg/rand" 23 "github.com/metacubex/gvisor/pkg/sync" 24 "github.com/metacubex/gvisor/pkg/tcpip" 25 "github.com/metacubex/gvisor/pkg/tcpip/header" 26 ) 27 28 const ( 29 firstEphemeral = 16000 30 ) 31 32 var ( 33 anyIPAddress = tcpip.Address{} 34 ) 35 36 // Reservation describes a port reservation. 37 type Reservation struct { 38 // Networks is a list of network protocols to which the reservation 39 // applies. Can be IPv4, IPv6, or both. 40 Networks []tcpip.NetworkProtocolNumber 41 42 // Transport is the transport protocol to which the reservation applies. 43 Transport tcpip.TransportProtocolNumber 44 45 // Addr is the address of the local endpoint. 46 Addr tcpip.Address 47 48 // Port is the local port number. 49 Port uint16 50 51 // Flags describe features of the reservation. 52 Flags Flags 53 54 // BindToDevice is the NIC to which the reservation applies. 55 BindToDevice tcpip.NICID 56 57 // Dest is the destination address. 58 Dest tcpip.FullAddress 59 } 60 61 func (rs Reservation) dst() destination { 62 return destination{ 63 rs.Dest.Addr, 64 rs.Dest.Port, 65 } 66 } 67 68 type portDescriptor struct { 69 network tcpip.NetworkProtocolNumber 70 transport tcpip.TransportProtocolNumber 71 port uint16 72 } 73 74 type destination struct { 75 addr tcpip.Address 76 port uint16 77 } 78 79 // destToCounter maps each destination to the FlagCounter that represents 80 // endpoints to that destination. 81 // 82 // destToCounter is never empty. When it has no elements, it is removed from 83 // the map that references it. 84 type destToCounter map[destination]FlagCounter 85 86 // intersectionFlags calculates the intersection of flag bit values which affect 87 // the specified destination. 88 // 89 // If no destinations are present, all flag values are returned as there are no 90 // entries to limit possible flag values of a new entry. 91 // 92 // In addition to the intersection, the number of intersecting refs is 93 // returned. 94 func (dc destToCounter) intersectionFlags(res Reservation) (BitFlags, int) { 95 intersection := FlagMask 96 var count int 97 98 for dest, counter := range dc { 99 if dest == res.dst() { 100 intersection &= counter.SharedFlags() 101 count++ 102 continue 103 } 104 // Wildcard destinations affect all destinations for TupleOnly. 105 if dest.addr == anyIPAddress || res.Dest.Addr == anyIPAddress { 106 // Only bitwise and the TupleOnlyFlag. 107 intersection &= (^TupleOnlyFlag) | counter.SharedFlags() 108 count++ 109 } 110 } 111 112 return intersection, count 113 } 114 115 // deviceToDest maps NICs to destinations for which there are port reservations. 116 // 117 // deviceToDest is never empty. When it has no elements, it is removed from the 118 // map that references it. 119 type deviceToDest map[tcpip.NICID]destToCounter 120 121 // isAvailable checks whether binding is possible by device. If not binding to 122 // a device, check against all FlagCounters. If binding to a specific device, 123 // check against the unspecified device and the provided device. 124 // 125 // If either of the port reuse flags is enabled on any of the nodes, all nodes 126 // sharing a port must share at least one reuse flag. This matches Linux's 127 // behavior. 128 func (dd deviceToDest) isAvailable(res Reservation, portSpecified bool) bool { 129 flagBits := res.Flags.Bits() 130 if res.BindToDevice == 0 { 131 intersection := FlagMask 132 for _, dest := range dd { 133 flags, count := dest.intersectionFlags(res) 134 if count == 0 { 135 continue 136 } 137 intersection &= flags 138 if intersection&flagBits == 0 { 139 // Can't bind because the (addr,port) was 140 // previously bound without reuse. 141 return false 142 } 143 } 144 if !portSpecified && res.Transport == header.TCPProtocolNumber { 145 return false 146 } 147 return true 148 } 149 150 intersection := FlagMask 151 152 if dests, ok := dd[0]; ok { 153 var count int 154 intersection, count = dests.intersectionFlags(res) 155 if count > 0 { 156 if intersection&flagBits == 0 { 157 return false 158 } 159 if !portSpecified && res.Transport == header.TCPProtocolNumber { 160 return false 161 } 162 } 163 } 164 165 if dests, ok := dd[res.BindToDevice]; ok { 166 flags, count := dests.intersectionFlags(res) 167 intersection &= flags 168 if count > 0 { 169 if intersection&flagBits == 0 { 170 return false 171 } 172 if !portSpecified && res.Transport == header.TCPProtocolNumber { 173 return false 174 } 175 } 176 } 177 178 return true 179 } 180 181 // addrToDevice maps IP addresses to NICs that have port reservations. 182 type addrToDevice map[tcpip.Address]deviceToDest 183 184 // isAvailable checks whether an IP address is available to bind to. If the 185 // address is the "any" address, check all other addresses. Otherwise, just 186 // check against the "any" address and the provided address. 187 func (ad addrToDevice) isAvailable(res Reservation, portSpecified bool) bool { 188 if res.Addr == anyIPAddress { 189 // If binding to the "any" address then check that there are no 190 // conflicts with all addresses. 191 for _, devices := range ad { 192 if !devices.isAvailable(res, portSpecified) { 193 return false 194 } 195 } 196 return true 197 } 198 199 // Check that there is no conflict with the "any" address. 200 if devices, ok := ad[anyIPAddress]; ok { 201 if !devices.isAvailable(res, portSpecified) { 202 return false 203 } 204 } 205 206 // Check that this is no conflict with the provided address. 207 if devices, ok := ad[res.Addr]; ok { 208 if !devices.isAvailable(res, portSpecified) { 209 return false 210 } 211 } 212 213 return true 214 } 215 216 // PortManager manages allocating, reserving and releasing ports. 217 type PortManager struct { 218 // mu protects allocatedPorts. 219 // LOCK ORDERING: mu > ephemeralMu. 220 mu sync.RWMutex 221 // allocatedPorts is a nesting of maps that ultimately map Reservations 222 // to FlagCounters describing whether the Reservation is valid and can 223 // be reused. 224 allocatedPorts map[portDescriptor]addrToDevice 225 226 // ephemeralMu protects firstEphemeral and numEphemeral. 227 ephemeralMu sync.RWMutex 228 firstEphemeral uint16 229 numEphemeral uint16 230 } 231 232 // NewPortManager creates new PortManager. 233 func NewPortManager() *PortManager { 234 return &PortManager{ 235 allocatedPorts: make(map[portDescriptor]addrToDevice), 236 firstEphemeral: firstEphemeral, 237 numEphemeral: math.MaxUint16 - firstEphemeral + 1, 238 } 239 } 240 241 // PortTester indicates whether the passed in port is suitable. Returning an 242 // error causes the function to which the PortTester is passed to return that 243 // error. 244 type PortTester func(port uint16) (good bool, err tcpip.Error) 245 246 // PickEphemeralPort randomly chooses a starting point and iterates over all 247 // possible ephemeral ports, allowing the caller to decide whether a given port 248 // is suitable for its needs, and stopping when a port is found or an error 249 // occurs. 250 func (pm *PortManager) PickEphemeralPort(rng rand.RNG, testPort PortTester) (port uint16, err tcpip.Error) { 251 pm.ephemeralMu.RLock() 252 firstEphemeral := pm.firstEphemeral 253 numEphemeral := pm.numEphemeral 254 pm.ephemeralMu.RUnlock() 255 256 return pickEphemeralPort(rng.Uint32(), firstEphemeral, numEphemeral, testPort) 257 } 258 259 // pickEphemeralPort starts at the offset specified from the FirstEphemeral port 260 // and iterates over the number of ports specified by count and allows the 261 // caller to decide whether a given port is suitable for its needs, and stopping 262 // when a port is found or an error occurs. 263 func pickEphemeralPort(offset uint32, first, count uint16, testPort PortTester) (port uint16, err tcpip.Error) { 264 // This implements Algorithm 1 as per RFC 6056 Section 3.3.1. 265 for i := uint32(0); i < uint32(count); i++ { 266 port := uint16(uint32(first) + (offset+i)%uint32(count)) 267 ok, err := testPort(port) 268 if err != nil { 269 return 0, err 270 } 271 272 if ok { 273 return port, nil 274 } 275 } 276 277 return 0, &tcpip.ErrNoPortAvailable{} 278 } 279 280 // ReservePort marks a port/IP combination as reserved so that it cannot be 281 // reserved by another endpoint. If port is zero, ReservePort will search for 282 // an unreserved ephemeral port and reserve it, returning its value in the 283 // "port" return value. 284 // 285 // An optional PortTester can be passed in which if provided will be used to 286 // test if the picked port can be used. The function should return true if the 287 // port is safe to use, false otherwise. 288 func (pm *PortManager) ReservePort(rng rand.RNG, res Reservation, testPort PortTester) (reservedPort uint16, err tcpip.Error) { 289 pm.mu.Lock() 290 defer pm.mu.Unlock() 291 292 // If a port is specified, just try to reserve it for all network 293 // protocols. 294 if res.Port != 0 { 295 if !pm.reserveSpecificPortLocked(res, true /* portSpecified */) { 296 return 0, &tcpip.ErrPortInUse{} 297 } 298 if testPort != nil { 299 ok, err := testPort(res.Port) 300 if err != nil { 301 pm.releasePortLocked(res) 302 return 0, err 303 } 304 if !ok { 305 pm.releasePortLocked(res) 306 return 0, &tcpip.ErrPortInUse{} 307 } 308 } 309 return res.Port, nil 310 } 311 312 // A port wasn't specified, so try to find one. 313 return pm.PickEphemeralPort(rng, func(p uint16) (bool, tcpip.Error) { 314 res.Port = p 315 if !pm.reserveSpecificPortLocked(res, false /* portSpecified */) { 316 return false, nil 317 } 318 if testPort != nil { 319 ok, err := testPort(p) 320 if err != nil { 321 pm.releasePortLocked(res) 322 return false, err 323 } 324 if !ok { 325 pm.releasePortLocked(res) 326 return false, nil 327 } 328 } 329 return true, nil 330 }) 331 } 332 333 // reserveSpecificPortLocked tries to reserve the given port on all given 334 // protocols. 335 func (pm *PortManager) reserveSpecificPortLocked(res Reservation, portSpecified bool) bool { 336 // Make sure the port is available. 337 for _, network := range res.Networks { 338 desc := portDescriptor{network, res.Transport, res.Port} 339 if addrs, ok := pm.allocatedPorts[desc]; ok { 340 if !addrs.isAvailable(res, portSpecified) { 341 return false 342 } 343 } 344 } 345 346 // Reserve port on all network protocols. 347 flagBits := res.Flags.Bits() 348 dst := res.dst() 349 for _, network := range res.Networks { 350 desc := portDescriptor{network, res.Transport, res.Port} 351 addrToDev, ok := pm.allocatedPorts[desc] 352 if !ok { 353 addrToDev = make(addrToDevice) 354 pm.allocatedPorts[desc] = addrToDev 355 } 356 devToDest, ok := addrToDev[res.Addr] 357 if !ok { 358 devToDest = make(deviceToDest) 359 addrToDev[res.Addr] = devToDest 360 } 361 destToCntr := devToDest[res.BindToDevice] 362 if destToCntr == nil { 363 destToCntr = make(destToCounter) 364 } 365 counter := destToCntr[dst] 366 counter.AddRef(flagBits) 367 destToCntr[dst] = counter 368 devToDest[res.BindToDevice] = destToCntr 369 } 370 371 return true 372 } 373 374 // ReserveTuple adds a port reservation for the tuple on all given protocol. 375 func (pm *PortManager) ReserveTuple(res Reservation) bool { 376 flagBits := res.Flags.Bits() 377 dst := res.dst() 378 379 pm.mu.Lock() 380 defer pm.mu.Unlock() 381 382 // It is easier to undo the entire reservation, so if we find that the 383 // tuple can't be fully added, finish and undo the whole thing. 384 undo := false 385 386 // Reserve port on all network protocols. 387 for _, network := range res.Networks { 388 desc := portDescriptor{network, res.Transport, res.Port} 389 addrToDev, ok := pm.allocatedPorts[desc] 390 if !ok { 391 addrToDev = make(addrToDevice) 392 pm.allocatedPorts[desc] = addrToDev 393 } 394 devToDest, ok := addrToDev[res.Addr] 395 if !ok { 396 devToDest = make(deviceToDest) 397 addrToDev[res.Addr] = devToDest 398 } 399 destToCntr := devToDest[res.BindToDevice] 400 if destToCntr == nil { 401 destToCntr = make(destToCounter) 402 } 403 404 counter := destToCntr[dst] 405 if counter.TotalRefs() != 0 && counter.SharedFlags()&flagBits == 0 { 406 // Tuple already exists. 407 undo = true 408 } 409 counter.AddRef(flagBits) 410 destToCntr[dst] = counter 411 devToDest[res.BindToDevice] = destToCntr 412 } 413 414 if undo { 415 // releasePortLocked decrements the counts (rather than setting 416 // them to zero), so it will undo the incorrect incrementing 417 // above. 418 pm.releasePortLocked(res) 419 return false 420 } 421 422 return true 423 } 424 425 // ReleasePort releases the reservation on a port/IP combination so that it can 426 // be reserved by other endpoints. 427 func (pm *PortManager) ReleasePort(res Reservation) { 428 pm.mu.Lock() 429 defer pm.mu.Unlock() 430 431 pm.releasePortLocked(res) 432 } 433 434 func (pm *PortManager) releasePortLocked(res Reservation) { 435 dst := res.dst() 436 for _, network := range res.Networks { 437 desc := portDescriptor{network, res.Transport, res.Port} 438 addrToDev, ok := pm.allocatedPorts[desc] 439 if !ok { 440 continue 441 } 442 devToDest, ok := addrToDev[res.Addr] 443 if !ok { 444 continue 445 } 446 destToCounter, ok := devToDest[res.BindToDevice] 447 if !ok { 448 continue 449 } 450 counter, ok := destToCounter[dst] 451 if !ok { 452 continue 453 } 454 counter.DropRef(res.Flags.Bits()) 455 if counter.TotalRefs() > 0 { 456 destToCounter[dst] = counter 457 continue 458 } 459 delete(destToCounter, dst) 460 if len(destToCounter) > 0 { 461 continue 462 } 463 delete(devToDest, res.BindToDevice) 464 if len(devToDest) > 0 { 465 continue 466 } 467 delete(addrToDev, res.Addr) 468 if len(addrToDev) > 0 { 469 continue 470 } 471 delete(pm.allocatedPorts, desc) 472 } 473 } 474 475 // PortRange returns the UDP and TCP inclusive range of ephemeral ports used in 476 // both IPv4 and IPv6. 477 func (pm *PortManager) PortRange() (uint16, uint16) { 478 pm.ephemeralMu.RLock() 479 defer pm.ephemeralMu.RUnlock() 480 return pm.firstEphemeral, pm.firstEphemeral + pm.numEphemeral - 1 481 } 482 483 // SetPortRange sets the UDP and TCP IPv4 and IPv6 ephemeral port range 484 // (inclusive). 485 func (pm *PortManager) SetPortRange(start uint16, end uint16) tcpip.Error { 486 if start > end { 487 return &tcpip.ErrInvalidPortRange{} 488 } 489 pm.ephemeralMu.Lock() 490 defer pm.ephemeralMu.Unlock() 491 pm.firstEphemeral = start 492 pm.numEphemeral = end - start + 1 493 return nil 494 }