github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/tcpip/ports/ports.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package ports provides PortManager that manages allocating, reserving and
    16  // releasing ports.
    17  package ports
    18  
    19  import (
    20  	"math"
    21  
    22  	"github.com/metacubex/gvisor/pkg/rand"
    23  	"github.com/metacubex/gvisor/pkg/sync"
    24  	"github.com/metacubex/gvisor/pkg/tcpip"
    25  	"github.com/metacubex/gvisor/pkg/tcpip/header"
    26  )
    27  
    28  const (
    29  	firstEphemeral = 16000
    30  )
    31  
    32  var (
    33  	anyIPAddress = tcpip.Address{}
    34  )
    35  
    36  // Reservation describes a port reservation.
    37  type Reservation struct {
    38  	// Networks is a list of network protocols to which the reservation
    39  	// applies. Can be IPv4, IPv6, or both.
    40  	Networks []tcpip.NetworkProtocolNumber
    41  
    42  	// Transport is the transport protocol to which the reservation applies.
    43  	Transport tcpip.TransportProtocolNumber
    44  
    45  	// Addr is the address of the local endpoint.
    46  	Addr tcpip.Address
    47  
    48  	// Port is the local port number.
    49  	Port uint16
    50  
    51  	// Flags describe features of the reservation.
    52  	Flags Flags
    53  
    54  	// BindToDevice is the NIC to which the reservation applies.
    55  	BindToDevice tcpip.NICID
    56  
    57  	// Dest is the destination address.
    58  	Dest tcpip.FullAddress
    59  }
    60  
    61  func (rs Reservation) dst() destination {
    62  	return destination{
    63  		rs.Dest.Addr,
    64  		rs.Dest.Port,
    65  	}
    66  }
    67  
    68  type portDescriptor struct {
    69  	network   tcpip.NetworkProtocolNumber
    70  	transport tcpip.TransportProtocolNumber
    71  	port      uint16
    72  }
    73  
    74  type destination struct {
    75  	addr tcpip.Address
    76  	port uint16
    77  }
    78  
    79  // destToCounter maps each destination to the FlagCounter that represents
    80  // endpoints to that destination.
    81  //
    82  // destToCounter is never empty. When it has no elements, it is removed from
    83  // the map that references it.
    84  type destToCounter map[destination]FlagCounter
    85  
    86  // intersectionFlags calculates the intersection of flag bit values which affect
    87  // the specified destination.
    88  //
    89  // If no destinations are present, all flag values are returned as there are no
    90  // entries to limit possible flag values of a new entry.
    91  //
    92  // In addition to the intersection, the number of intersecting refs is
    93  // returned.
    94  func (dc destToCounter) intersectionFlags(res Reservation) (BitFlags, int) {
    95  	intersection := FlagMask
    96  	var count int
    97  
    98  	for dest, counter := range dc {
    99  		if dest == res.dst() {
   100  			intersection &= counter.SharedFlags()
   101  			count++
   102  			continue
   103  		}
   104  		// Wildcard destinations affect all destinations for TupleOnly.
   105  		if dest.addr == anyIPAddress || res.Dest.Addr == anyIPAddress {
   106  			// Only bitwise and the TupleOnlyFlag.
   107  			intersection &= (^TupleOnlyFlag) | counter.SharedFlags()
   108  			count++
   109  		}
   110  	}
   111  
   112  	return intersection, count
   113  }
   114  
   115  // deviceToDest maps NICs to destinations for which there are port reservations.
   116  //
   117  // deviceToDest is never empty. When it has no elements, it is removed from the
   118  // map that references it.
   119  type deviceToDest map[tcpip.NICID]destToCounter
   120  
   121  // isAvailable checks whether binding is possible by device. If not binding to
   122  // a device, check against all FlagCounters. If binding to a specific device,
   123  // check against the unspecified device and the provided device.
   124  //
   125  // If either of the port reuse flags is enabled on any of the nodes, all nodes
   126  // sharing a port must share at least one reuse flag. This matches Linux's
   127  // behavior.
   128  func (dd deviceToDest) isAvailable(res Reservation, portSpecified bool) bool {
   129  	flagBits := res.Flags.Bits()
   130  	if res.BindToDevice == 0 {
   131  		intersection := FlagMask
   132  		for _, dest := range dd {
   133  			flags, count := dest.intersectionFlags(res)
   134  			if count == 0 {
   135  				continue
   136  			}
   137  			intersection &= flags
   138  			if intersection&flagBits == 0 {
   139  				// Can't bind because the (addr,port) was
   140  				// previously bound without reuse.
   141  				return false
   142  			}
   143  		}
   144  		if !portSpecified && res.Transport == header.TCPProtocolNumber {
   145  			return false
   146  		}
   147  		return true
   148  	}
   149  
   150  	intersection := FlagMask
   151  
   152  	if dests, ok := dd[0]; ok {
   153  		var count int
   154  		intersection, count = dests.intersectionFlags(res)
   155  		if count > 0 {
   156  			if intersection&flagBits == 0 {
   157  				return false
   158  			}
   159  			if !portSpecified && res.Transport == header.TCPProtocolNumber {
   160  				return false
   161  			}
   162  		}
   163  	}
   164  
   165  	if dests, ok := dd[res.BindToDevice]; ok {
   166  		flags, count := dests.intersectionFlags(res)
   167  		intersection &= flags
   168  		if count > 0 {
   169  			if intersection&flagBits == 0 {
   170  				return false
   171  			}
   172  			if !portSpecified && res.Transport == header.TCPProtocolNumber {
   173  				return false
   174  			}
   175  		}
   176  	}
   177  
   178  	return true
   179  }
   180  
   181  // addrToDevice maps IP addresses to NICs that have port reservations.
   182  type addrToDevice map[tcpip.Address]deviceToDest
   183  
   184  // isAvailable checks whether an IP address is available to bind to. If the
   185  // address is the "any" address, check all other addresses. Otherwise, just
   186  // check against the "any" address and the provided address.
   187  func (ad addrToDevice) isAvailable(res Reservation, portSpecified bool) bool {
   188  	if res.Addr == anyIPAddress {
   189  		// If binding to the "any" address then check that there are no
   190  		// conflicts with all addresses.
   191  		for _, devices := range ad {
   192  			if !devices.isAvailable(res, portSpecified) {
   193  				return false
   194  			}
   195  		}
   196  		return true
   197  	}
   198  
   199  	// Check that there is no conflict with the "any" address.
   200  	if devices, ok := ad[anyIPAddress]; ok {
   201  		if !devices.isAvailable(res, portSpecified) {
   202  			return false
   203  		}
   204  	}
   205  
   206  	// Check that this is no conflict with the provided address.
   207  	if devices, ok := ad[res.Addr]; ok {
   208  		if !devices.isAvailable(res, portSpecified) {
   209  			return false
   210  		}
   211  	}
   212  
   213  	return true
   214  }
   215  
   216  // PortManager manages allocating, reserving and releasing ports.
   217  type PortManager struct {
   218  	// mu protects allocatedPorts.
   219  	// LOCK ORDERING: mu > ephemeralMu.
   220  	mu sync.RWMutex
   221  	// allocatedPorts is a nesting of maps that ultimately map Reservations
   222  	// to FlagCounters describing whether the Reservation is valid and can
   223  	// be reused.
   224  	allocatedPorts map[portDescriptor]addrToDevice
   225  
   226  	// ephemeralMu protects firstEphemeral and numEphemeral.
   227  	ephemeralMu    sync.RWMutex
   228  	firstEphemeral uint16
   229  	numEphemeral   uint16
   230  }
   231  
   232  // NewPortManager creates new PortManager.
   233  func NewPortManager() *PortManager {
   234  	return &PortManager{
   235  		allocatedPorts: make(map[portDescriptor]addrToDevice),
   236  		firstEphemeral: firstEphemeral,
   237  		numEphemeral:   math.MaxUint16 - firstEphemeral + 1,
   238  	}
   239  }
   240  
   241  // PortTester indicates whether the passed in port is suitable. Returning an
   242  // error causes the function to which the PortTester is passed to return that
   243  // error.
   244  type PortTester func(port uint16) (good bool, err tcpip.Error)
   245  
   246  // PickEphemeralPort randomly chooses a starting point and iterates over all
   247  // possible ephemeral ports, allowing the caller to decide whether a given port
   248  // is suitable for its needs, and stopping when a port is found or an error
   249  // occurs.
   250  func (pm *PortManager) PickEphemeralPort(rng rand.RNG, testPort PortTester) (port uint16, err tcpip.Error) {
   251  	pm.ephemeralMu.RLock()
   252  	firstEphemeral := pm.firstEphemeral
   253  	numEphemeral := pm.numEphemeral
   254  	pm.ephemeralMu.RUnlock()
   255  
   256  	return pickEphemeralPort(rng.Uint32(), firstEphemeral, numEphemeral, testPort)
   257  }
   258  
   259  // pickEphemeralPort starts at the offset specified from the FirstEphemeral port
   260  // and iterates over the number of ports specified by count and allows the
   261  // caller to decide whether a given port is suitable for its needs, and stopping
   262  // when a port is found or an error occurs.
   263  func pickEphemeralPort(offset uint32, first, count uint16, testPort PortTester) (port uint16, err tcpip.Error) {
   264  	// This implements Algorithm 1 as per RFC 6056 Section 3.3.1.
   265  	for i := uint32(0); i < uint32(count); i++ {
   266  		port := uint16(uint32(first) + (offset+i)%uint32(count))
   267  		ok, err := testPort(port)
   268  		if err != nil {
   269  			return 0, err
   270  		}
   271  
   272  		if ok {
   273  			return port, nil
   274  		}
   275  	}
   276  
   277  	return 0, &tcpip.ErrNoPortAvailable{}
   278  }
   279  
   280  // ReservePort marks a port/IP combination as reserved so that it cannot be
   281  // reserved by another endpoint. If port is zero, ReservePort will search for
   282  // an unreserved ephemeral port and reserve it, returning its value in the
   283  // "port" return value.
   284  //
   285  // An optional PortTester can be passed in which if provided will be used to
   286  // test if the picked port can be used. The function should return true if the
   287  // port is safe to use, false otherwise.
   288  func (pm *PortManager) ReservePort(rng rand.RNG, res Reservation, testPort PortTester) (reservedPort uint16, err tcpip.Error) {
   289  	pm.mu.Lock()
   290  	defer pm.mu.Unlock()
   291  
   292  	// If a port is specified, just try to reserve it for all network
   293  	// protocols.
   294  	if res.Port != 0 {
   295  		if !pm.reserveSpecificPortLocked(res, true /* portSpecified */) {
   296  			return 0, &tcpip.ErrPortInUse{}
   297  		}
   298  		if testPort != nil {
   299  			ok, err := testPort(res.Port)
   300  			if err != nil {
   301  				pm.releasePortLocked(res)
   302  				return 0, err
   303  			}
   304  			if !ok {
   305  				pm.releasePortLocked(res)
   306  				return 0, &tcpip.ErrPortInUse{}
   307  			}
   308  		}
   309  		return res.Port, nil
   310  	}
   311  
   312  	// A port wasn't specified, so try to find one.
   313  	return pm.PickEphemeralPort(rng, func(p uint16) (bool, tcpip.Error) {
   314  		res.Port = p
   315  		if !pm.reserveSpecificPortLocked(res, false /* portSpecified */) {
   316  			return false, nil
   317  		}
   318  		if testPort != nil {
   319  			ok, err := testPort(p)
   320  			if err != nil {
   321  				pm.releasePortLocked(res)
   322  				return false, err
   323  			}
   324  			if !ok {
   325  				pm.releasePortLocked(res)
   326  				return false, nil
   327  			}
   328  		}
   329  		return true, nil
   330  	})
   331  }
   332  
   333  // reserveSpecificPortLocked tries to reserve the given port on all given
   334  // protocols.
   335  func (pm *PortManager) reserveSpecificPortLocked(res Reservation, portSpecified bool) bool {
   336  	// Make sure the port is available.
   337  	for _, network := range res.Networks {
   338  		desc := portDescriptor{network, res.Transport, res.Port}
   339  		if addrs, ok := pm.allocatedPorts[desc]; ok {
   340  			if !addrs.isAvailable(res, portSpecified) {
   341  				return false
   342  			}
   343  		}
   344  	}
   345  
   346  	// Reserve port on all network protocols.
   347  	flagBits := res.Flags.Bits()
   348  	dst := res.dst()
   349  	for _, network := range res.Networks {
   350  		desc := portDescriptor{network, res.Transport, res.Port}
   351  		addrToDev, ok := pm.allocatedPorts[desc]
   352  		if !ok {
   353  			addrToDev = make(addrToDevice)
   354  			pm.allocatedPorts[desc] = addrToDev
   355  		}
   356  		devToDest, ok := addrToDev[res.Addr]
   357  		if !ok {
   358  			devToDest = make(deviceToDest)
   359  			addrToDev[res.Addr] = devToDest
   360  		}
   361  		destToCntr := devToDest[res.BindToDevice]
   362  		if destToCntr == nil {
   363  			destToCntr = make(destToCounter)
   364  		}
   365  		counter := destToCntr[dst]
   366  		counter.AddRef(flagBits)
   367  		destToCntr[dst] = counter
   368  		devToDest[res.BindToDevice] = destToCntr
   369  	}
   370  
   371  	return true
   372  }
   373  
   374  // ReserveTuple adds a port reservation for the tuple on all given protocol.
   375  func (pm *PortManager) ReserveTuple(res Reservation) bool {
   376  	flagBits := res.Flags.Bits()
   377  	dst := res.dst()
   378  
   379  	pm.mu.Lock()
   380  	defer pm.mu.Unlock()
   381  
   382  	// It is easier to undo the entire reservation, so if we find that the
   383  	// tuple can't be fully added, finish and undo the whole thing.
   384  	undo := false
   385  
   386  	// Reserve port on all network protocols.
   387  	for _, network := range res.Networks {
   388  		desc := portDescriptor{network, res.Transport, res.Port}
   389  		addrToDev, ok := pm.allocatedPorts[desc]
   390  		if !ok {
   391  			addrToDev = make(addrToDevice)
   392  			pm.allocatedPorts[desc] = addrToDev
   393  		}
   394  		devToDest, ok := addrToDev[res.Addr]
   395  		if !ok {
   396  			devToDest = make(deviceToDest)
   397  			addrToDev[res.Addr] = devToDest
   398  		}
   399  		destToCntr := devToDest[res.BindToDevice]
   400  		if destToCntr == nil {
   401  			destToCntr = make(destToCounter)
   402  		}
   403  
   404  		counter := destToCntr[dst]
   405  		if counter.TotalRefs() != 0 && counter.SharedFlags()&flagBits == 0 {
   406  			// Tuple already exists.
   407  			undo = true
   408  		}
   409  		counter.AddRef(flagBits)
   410  		destToCntr[dst] = counter
   411  		devToDest[res.BindToDevice] = destToCntr
   412  	}
   413  
   414  	if undo {
   415  		// releasePortLocked decrements the counts (rather than setting
   416  		// them to zero), so it will undo the incorrect incrementing
   417  		// above.
   418  		pm.releasePortLocked(res)
   419  		return false
   420  	}
   421  
   422  	return true
   423  }
   424  
   425  // ReleasePort releases the reservation on a port/IP combination so that it can
   426  // be reserved by other endpoints.
   427  func (pm *PortManager) ReleasePort(res Reservation) {
   428  	pm.mu.Lock()
   429  	defer pm.mu.Unlock()
   430  
   431  	pm.releasePortLocked(res)
   432  }
   433  
   434  func (pm *PortManager) releasePortLocked(res Reservation) {
   435  	dst := res.dst()
   436  	for _, network := range res.Networks {
   437  		desc := portDescriptor{network, res.Transport, res.Port}
   438  		addrToDev, ok := pm.allocatedPorts[desc]
   439  		if !ok {
   440  			continue
   441  		}
   442  		devToDest, ok := addrToDev[res.Addr]
   443  		if !ok {
   444  			continue
   445  		}
   446  		destToCounter, ok := devToDest[res.BindToDevice]
   447  		if !ok {
   448  			continue
   449  		}
   450  		counter, ok := destToCounter[dst]
   451  		if !ok {
   452  			continue
   453  		}
   454  		counter.DropRef(res.Flags.Bits())
   455  		if counter.TotalRefs() > 0 {
   456  			destToCounter[dst] = counter
   457  			continue
   458  		}
   459  		delete(destToCounter, dst)
   460  		if len(destToCounter) > 0 {
   461  			continue
   462  		}
   463  		delete(devToDest, res.BindToDevice)
   464  		if len(devToDest) > 0 {
   465  			continue
   466  		}
   467  		delete(addrToDev, res.Addr)
   468  		if len(addrToDev) > 0 {
   469  			continue
   470  		}
   471  		delete(pm.allocatedPorts, desc)
   472  	}
   473  }
   474  
   475  // PortRange returns the UDP and TCP inclusive range of ephemeral ports used in
   476  // both IPv4 and IPv6.
   477  func (pm *PortManager) PortRange() (uint16, uint16) {
   478  	pm.ephemeralMu.RLock()
   479  	defer pm.ephemeralMu.RUnlock()
   480  	return pm.firstEphemeral, pm.firstEphemeral + pm.numEphemeral - 1
   481  }
   482  
   483  // SetPortRange sets the UDP and TCP IPv4 and IPv6 ephemeral port range
   484  // (inclusive).
   485  func (pm *PortManager) SetPortRange(start uint16, end uint16) tcpip.Error {
   486  	if start > end {
   487  		return &tcpip.ErrInvalidPortRange{}
   488  	}
   489  	pm.ephemeralMu.Lock()
   490  	defer pm.ephemeralMu.Unlock()
   491  	pm.firstEphemeral = start
   492  	pm.numEphemeral = end - start + 1
   493  	return nil
   494  }