github.com/ttpreport/gvisor-ligolo@v0.0.0-20240123134145-a858404967ba/pkg/tcpip/ports/ports.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package ports provides PortManager that manages allocating, reserving and 16 // releasing ports. 17 package ports 18 19 import ( 20 "math" 21 "math/rand" 22 23 "github.com/ttpreport/gvisor-ligolo/pkg/atomicbitops" 24 "github.com/ttpreport/gvisor-ligolo/pkg/sync" 25 "github.com/ttpreport/gvisor-ligolo/pkg/tcpip" 26 "github.com/ttpreport/gvisor-ligolo/pkg/tcpip/header" 27 ) 28 29 const ( 30 firstEphemeral = 16000 31 ) 32 33 var ( 34 anyIPAddress = tcpip.Address{} 35 ) 36 37 // Reservation describes a port reservation. 38 type Reservation struct { 39 // Networks is a list of network protocols to which the reservation 40 // applies. Can be IPv4, IPv6, or both. 41 Networks []tcpip.NetworkProtocolNumber 42 43 // Transport is the transport protocol to which the reservation applies. 44 Transport tcpip.TransportProtocolNumber 45 46 // Addr is the address of the local endpoint. 47 Addr tcpip.Address 48 49 // Port is the local port number. 50 Port uint16 51 52 // Flags describe features of the reservation. 53 Flags Flags 54 55 // BindToDevice is the NIC to which the reservation applies. 56 BindToDevice tcpip.NICID 57 58 // Dest is the destination address. 59 Dest tcpip.FullAddress 60 } 61 62 func (rs Reservation) dst() destination { 63 return destination{ 64 rs.Dest.Addr, 65 rs.Dest.Port, 66 } 67 } 68 69 type portDescriptor struct { 70 network tcpip.NetworkProtocolNumber 71 transport tcpip.TransportProtocolNumber 72 port uint16 73 } 74 75 type destination struct { 76 addr tcpip.Address 77 port uint16 78 } 79 80 // destToCounter maps each destination to the FlagCounter that represents 81 // endpoints to that destination. 82 // 83 // destToCounter is never empty. When it has no elements, it is removed from 84 // the map that references it. 85 type destToCounter map[destination]FlagCounter 86 87 // intersectionFlags calculates the intersection of flag bit values which affect 88 // the specified destination. 89 // 90 // If no destinations are present, all flag values are returned as there are no 91 // entries to limit possible flag values of a new entry. 92 // 93 // In addition to the intersection, the number of intersecting refs is 94 // returned. 95 func (dc destToCounter) intersectionFlags(res Reservation) (BitFlags, int) { 96 intersection := FlagMask 97 var count int 98 99 for dest, counter := range dc { 100 if dest == res.dst() { 101 intersection &= counter.SharedFlags() 102 count++ 103 continue 104 } 105 // Wildcard destinations affect all destinations for TupleOnly. 106 if dest.addr == anyIPAddress || res.Dest.Addr == anyIPAddress { 107 // Only bitwise and the TupleOnlyFlag. 108 intersection &= (^TupleOnlyFlag) | counter.SharedFlags() 109 count++ 110 } 111 } 112 113 return intersection, count 114 } 115 116 // deviceToDest maps NICs to destinations for which there are port reservations. 117 // 118 // deviceToDest is never empty. When it has no elements, it is removed from the 119 // map that references it. 120 type deviceToDest map[tcpip.NICID]destToCounter 121 122 // isAvailable checks whether binding is possible by device. If not binding to 123 // a device, check against all FlagCounters. If binding to a specific device, 124 // check against the unspecified device and the provided device. 125 // 126 // If either of the port reuse flags is enabled on any of the nodes, all nodes 127 // sharing a port must share at least one reuse flag. This matches Linux's 128 // behavior. 129 func (dd deviceToDest) isAvailable(res Reservation, portSpecified bool) bool { 130 flagBits := res.Flags.Bits() 131 if res.BindToDevice == 0 { 132 intersection := FlagMask 133 for _, dest := range dd { 134 flags, count := dest.intersectionFlags(res) 135 if count == 0 { 136 continue 137 } 138 intersection &= flags 139 if intersection&flagBits == 0 { 140 // Can't bind because the (addr,port) was 141 // previously bound without reuse. 142 return false 143 } 144 } 145 if !portSpecified && res.Transport == header.TCPProtocolNumber { 146 return false 147 } 148 return true 149 } 150 151 intersection := FlagMask 152 153 if dests, ok := dd[0]; ok { 154 var count int 155 intersection, count = dests.intersectionFlags(res) 156 if count > 0 { 157 if intersection&flagBits == 0 { 158 return false 159 } 160 if !portSpecified && res.Transport == header.TCPProtocolNumber { 161 return false 162 } 163 } 164 } 165 166 if dests, ok := dd[res.BindToDevice]; ok { 167 flags, count := dests.intersectionFlags(res) 168 intersection &= flags 169 if count > 0 { 170 if intersection&flagBits == 0 { 171 return false 172 } 173 if !portSpecified && res.Transport == header.TCPProtocolNumber { 174 return false 175 } 176 } 177 } 178 179 return true 180 } 181 182 // addrToDevice maps IP addresses to NICs that have port reservations. 183 type addrToDevice map[tcpip.Address]deviceToDest 184 185 // isAvailable checks whether an IP address is available to bind to. If the 186 // address is the "any" address, check all other addresses. Otherwise, just 187 // check against the "any" address and the provided address. 188 func (ad addrToDevice) isAvailable(res Reservation, portSpecified bool) bool { 189 if res.Addr == anyIPAddress { 190 // If binding to the "any" address then check that there are no 191 // conflicts with all addresses. 192 for _, devices := range ad { 193 if !devices.isAvailable(res, portSpecified) { 194 return false 195 } 196 } 197 return true 198 } 199 200 // Check that there is no conflict with the "any" address. 201 if devices, ok := ad[anyIPAddress]; ok { 202 if !devices.isAvailable(res, portSpecified) { 203 return false 204 } 205 } 206 207 // Check that this is no conflict with the provided address. 208 if devices, ok := ad[res.Addr]; ok { 209 if !devices.isAvailable(res, portSpecified) { 210 return false 211 } 212 } 213 214 return true 215 } 216 217 // PortManager manages allocating, reserving and releasing ports. 218 type PortManager struct { 219 // mu protects allocatedPorts. 220 // LOCK ORDERING: mu > ephemeralMu. 221 mu sync.RWMutex 222 // allocatedPorts is a nesting of maps that ultimately map Reservations 223 // to FlagCounters describing whether the Reservation is valid and can 224 // be reused. 225 allocatedPorts map[portDescriptor]addrToDevice 226 227 // ephemeralMu protects firstEphemeral and numEphemeral. 228 ephemeralMu sync.RWMutex 229 firstEphemeral uint16 230 numEphemeral uint16 231 232 // hint is used to pick ports ephemeral ports in a stable order for 233 // a given port offset. 234 // 235 // hint must be accessed using the portHint/incPortHint helpers. 236 // TODO(gvisor.dev/issue/940): S/R this field. 237 hint atomicbitops.Uint32 238 } 239 240 // NewPortManager creates new PortManager. 241 func NewPortManager() *PortManager { 242 return &PortManager{ 243 allocatedPorts: make(map[portDescriptor]addrToDevice), 244 firstEphemeral: firstEphemeral, 245 numEphemeral: math.MaxUint16 - firstEphemeral + 1, 246 } 247 } 248 249 // PortTester indicates whether the passed in port is suitable. Returning an 250 // error causes the function to which the PortTester is passed to return that 251 // error. 252 type PortTester func(port uint16) (good bool, err tcpip.Error) 253 254 // PickEphemeralPort randomly chooses a starting point and iterates over all 255 // possible ephemeral ports, allowing the caller to decide whether a given port 256 // is suitable for its needs, and stopping when a port is found or an error 257 // occurs. 258 func (pm *PortManager) PickEphemeralPort(rng *rand.Rand, testPort PortTester) (port uint16, err tcpip.Error) { 259 pm.ephemeralMu.RLock() 260 firstEphemeral := pm.firstEphemeral 261 numEphemeral := pm.numEphemeral 262 pm.ephemeralMu.RUnlock() 263 264 offset := uint32(rng.Int31n(int32(numEphemeral))) 265 return pickEphemeralPort(offset, firstEphemeral, numEphemeral, testPort) 266 } 267 268 // portHint atomically reads and returns the pm.hint value. 269 func (pm *PortManager) portHint() uint32 { 270 return pm.hint.Load() 271 } 272 273 // incPortHint atomically increments pm.hint by 1. 274 func (pm *PortManager) incPortHint() { 275 pm.hint.Add(1) 276 } 277 278 // PickEphemeralPortStable starts at the specified offset + pm.portHint and 279 // iterates over all ephemeral ports, allowing the caller to decide whether a 280 // given port is suitable for its needs and stopping when a port is found or an 281 // error occurs. 282 func (pm *PortManager) PickEphemeralPortStable(offset uint32, testPort PortTester) (port uint16, err tcpip.Error) { 283 pm.ephemeralMu.RLock() 284 firstEphemeral := pm.firstEphemeral 285 numEphemeral := pm.numEphemeral 286 pm.ephemeralMu.RUnlock() 287 288 p, err := pickEphemeralPort(pm.portHint()+offset, firstEphemeral, numEphemeral, testPort) 289 if err == nil { 290 pm.incPortHint() 291 } 292 return p, err 293 } 294 295 // pickEphemeralPort starts at the offset specified from the FirstEphemeral port 296 // and iterates over the number of ports specified by count and allows the 297 // caller to decide whether a given port is suitable for its needs, and stopping 298 // when a port is found or an error occurs. 299 func pickEphemeralPort(offset uint32, first, count uint16, testPort PortTester) (port uint16, err tcpip.Error) { 300 for i := uint32(0); i < uint32(count); i++ { 301 port := uint16(uint32(first) + (offset+i)%uint32(count)) 302 ok, err := testPort(port) 303 if err != nil { 304 return 0, err 305 } 306 307 if ok { 308 return port, nil 309 } 310 } 311 312 return 0, &tcpip.ErrNoPortAvailable{} 313 } 314 315 // ReservePort marks a port/IP combination as reserved so that it cannot be 316 // reserved by another endpoint. If port is zero, ReservePort will search for 317 // an unreserved ephemeral port and reserve it, returning its value in the 318 // "port" return value. 319 // 320 // An optional PortTester can be passed in which if provided will be used to 321 // test if the picked port can be used. The function should return true if the 322 // port is safe to use, false otherwise. 323 func (pm *PortManager) ReservePort(rng *rand.Rand, res Reservation, testPort PortTester) (reservedPort uint16, err tcpip.Error) { 324 pm.mu.Lock() 325 defer pm.mu.Unlock() 326 327 // If a port is specified, just try to reserve it for all network 328 // protocols. 329 if res.Port != 0 { 330 if !pm.reserveSpecificPortLocked(res, true /* portSpecified */) { 331 return 0, &tcpip.ErrPortInUse{} 332 } 333 if testPort != nil { 334 ok, err := testPort(res.Port) 335 if err != nil { 336 pm.releasePortLocked(res) 337 return 0, err 338 } 339 if !ok { 340 pm.releasePortLocked(res) 341 return 0, &tcpip.ErrPortInUse{} 342 } 343 } 344 return res.Port, nil 345 } 346 347 // A port wasn't specified, so try to find one. 348 return pm.PickEphemeralPort(rng, func(p uint16) (bool, tcpip.Error) { 349 res.Port = p 350 if !pm.reserveSpecificPortLocked(res, false /* portSpecified */) { 351 return false, nil 352 } 353 if testPort != nil { 354 ok, err := testPort(p) 355 if err != nil { 356 pm.releasePortLocked(res) 357 return false, err 358 } 359 if !ok { 360 pm.releasePortLocked(res) 361 return false, nil 362 } 363 } 364 return true, nil 365 }) 366 } 367 368 // reserveSpecificPortLocked tries to reserve the given port on all given 369 // protocols. 370 func (pm *PortManager) reserveSpecificPortLocked(res Reservation, portSpecified bool) bool { 371 // Make sure the port is available. 372 for _, network := range res.Networks { 373 desc := portDescriptor{network, res.Transport, res.Port} 374 if addrs, ok := pm.allocatedPorts[desc]; ok { 375 if !addrs.isAvailable(res, portSpecified) { 376 return false 377 } 378 } 379 } 380 381 // Reserve port on all network protocols. 382 flagBits := res.Flags.Bits() 383 dst := res.dst() 384 for _, network := range res.Networks { 385 desc := portDescriptor{network, res.Transport, res.Port} 386 addrToDev, ok := pm.allocatedPorts[desc] 387 if !ok { 388 addrToDev = make(addrToDevice) 389 pm.allocatedPorts[desc] = addrToDev 390 } 391 devToDest, ok := addrToDev[res.Addr] 392 if !ok { 393 devToDest = make(deviceToDest) 394 addrToDev[res.Addr] = devToDest 395 } 396 destToCntr := devToDest[res.BindToDevice] 397 if destToCntr == nil { 398 destToCntr = make(destToCounter) 399 } 400 counter := destToCntr[dst] 401 counter.AddRef(flagBits) 402 destToCntr[dst] = counter 403 devToDest[res.BindToDevice] = destToCntr 404 } 405 406 return true 407 } 408 409 // ReserveTuple adds a port reservation for the tuple on all given protocol. 410 func (pm *PortManager) ReserveTuple(res Reservation) bool { 411 flagBits := res.Flags.Bits() 412 dst := res.dst() 413 414 pm.mu.Lock() 415 defer pm.mu.Unlock() 416 417 // It is easier to undo the entire reservation, so if we find that the 418 // tuple can't be fully added, finish and undo the whole thing. 419 undo := false 420 421 // Reserve port on all network protocols. 422 for _, network := range res.Networks { 423 desc := portDescriptor{network, res.Transport, res.Port} 424 addrToDev, ok := pm.allocatedPorts[desc] 425 if !ok { 426 addrToDev = make(addrToDevice) 427 pm.allocatedPorts[desc] = addrToDev 428 } 429 devToDest, ok := addrToDev[res.Addr] 430 if !ok { 431 devToDest = make(deviceToDest) 432 addrToDev[res.Addr] = devToDest 433 } 434 destToCntr := devToDest[res.BindToDevice] 435 if destToCntr == nil { 436 destToCntr = make(destToCounter) 437 } 438 439 counter := destToCntr[dst] 440 if counter.TotalRefs() != 0 && counter.SharedFlags()&flagBits == 0 { 441 // Tuple already exists. 442 undo = true 443 } 444 counter.AddRef(flagBits) 445 destToCntr[dst] = counter 446 devToDest[res.BindToDevice] = destToCntr 447 } 448 449 if undo { 450 // releasePortLocked decrements the counts (rather than setting 451 // them to zero), so it will undo the incorrect incrementing 452 // above. 453 pm.releasePortLocked(res) 454 return false 455 } 456 457 return true 458 } 459 460 // ReleasePort releases the reservation on a port/IP combination so that it can 461 // be reserved by other endpoints. 462 func (pm *PortManager) ReleasePort(res Reservation) { 463 pm.mu.Lock() 464 defer pm.mu.Unlock() 465 466 pm.releasePortLocked(res) 467 } 468 469 func (pm *PortManager) releasePortLocked(res Reservation) { 470 dst := res.dst() 471 for _, network := range res.Networks { 472 desc := portDescriptor{network, res.Transport, res.Port} 473 addrToDev, ok := pm.allocatedPorts[desc] 474 if !ok { 475 continue 476 } 477 devToDest, ok := addrToDev[res.Addr] 478 if !ok { 479 continue 480 } 481 destToCounter, ok := devToDest[res.BindToDevice] 482 if !ok { 483 continue 484 } 485 counter, ok := destToCounter[dst] 486 if !ok { 487 continue 488 } 489 counter.DropRef(res.Flags.Bits()) 490 if counter.TotalRefs() > 0 { 491 destToCounter[dst] = counter 492 continue 493 } 494 delete(destToCounter, dst) 495 if len(destToCounter) > 0 { 496 continue 497 } 498 delete(devToDest, res.BindToDevice) 499 if len(devToDest) > 0 { 500 continue 501 } 502 delete(addrToDev, res.Addr) 503 if len(addrToDev) > 0 { 504 continue 505 } 506 delete(pm.allocatedPorts, desc) 507 } 508 } 509 510 // PortRange returns the UDP and TCP inclusive range of ephemeral ports used in 511 // both IPv4 and IPv6. 512 func (pm *PortManager) PortRange() (uint16, uint16) { 513 pm.ephemeralMu.RLock() 514 defer pm.ephemeralMu.RUnlock() 515 return pm.firstEphemeral, pm.firstEphemeral + pm.numEphemeral - 1 516 } 517 518 // SetPortRange sets the UDP and TCP IPv4 and IPv6 ephemeral port range 519 // (inclusive). 520 func (pm *PortManager) SetPortRange(start uint16, end uint16) tcpip.Error { 521 if start > end { 522 return &tcpip.ErrInvalidPortRange{} 523 } 524 pm.ephemeralMu.Lock() 525 defer pm.ephemeralMu.Unlock() 526 pm.firstEphemeral = start 527 pm.numEphemeral = end - start + 1 528 return nil 529 }