github.com/ttpreport/gvisor-ligolo@v0.0.0-20240123134145-a858404967ba/pkg/tcpip/ports/ports.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package ports provides PortManager that manages allocating, reserving and
    16  // releasing ports.
    17  package ports
    18  
    19  import (
    20  	"math"
    21  	"math/rand"
    22  
    23  	"github.com/ttpreport/gvisor-ligolo/pkg/atomicbitops"
    24  	"github.com/ttpreport/gvisor-ligolo/pkg/sync"
    25  	"github.com/ttpreport/gvisor-ligolo/pkg/tcpip"
    26  	"github.com/ttpreport/gvisor-ligolo/pkg/tcpip/header"
    27  )
    28  
    29  const (
    30  	firstEphemeral = 16000
    31  )
    32  
    33  var (
    34  	anyIPAddress = tcpip.Address{}
    35  )
    36  
    37  // Reservation describes a port reservation.
    38  type Reservation struct {
    39  	// Networks is a list of network protocols to which the reservation
    40  	// applies. Can be IPv4, IPv6, or both.
    41  	Networks []tcpip.NetworkProtocolNumber
    42  
    43  	// Transport is the transport protocol to which the reservation applies.
    44  	Transport tcpip.TransportProtocolNumber
    45  
    46  	// Addr is the address of the local endpoint.
    47  	Addr tcpip.Address
    48  
    49  	// Port is the local port number.
    50  	Port uint16
    51  
    52  	// Flags describe features of the reservation.
    53  	Flags Flags
    54  
    55  	// BindToDevice is the NIC to which the reservation applies.
    56  	BindToDevice tcpip.NICID
    57  
    58  	// Dest is the destination address.
    59  	Dest tcpip.FullAddress
    60  }
    61  
    62  func (rs Reservation) dst() destination {
    63  	return destination{
    64  		rs.Dest.Addr,
    65  		rs.Dest.Port,
    66  	}
    67  }
    68  
    69  type portDescriptor struct {
    70  	network   tcpip.NetworkProtocolNumber
    71  	transport tcpip.TransportProtocolNumber
    72  	port      uint16
    73  }
    74  
    75  type destination struct {
    76  	addr tcpip.Address
    77  	port uint16
    78  }
    79  
    80  // destToCounter maps each destination to the FlagCounter that represents
    81  // endpoints to that destination.
    82  //
    83  // destToCounter is never empty. When it has no elements, it is removed from
    84  // the map that references it.
    85  type destToCounter map[destination]FlagCounter
    86  
    87  // intersectionFlags calculates the intersection of flag bit values which affect
    88  // the specified destination.
    89  //
    90  // If no destinations are present, all flag values are returned as there are no
    91  // entries to limit possible flag values of a new entry.
    92  //
    93  // In addition to the intersection, the number of intersecting refs is
    94  // returned.
    95  func (dc destToCounter) intersectionFlags(res Reservation) (BitFlags, int) {
    96  	intersection := FlagMask
    97  	var count int
    98  
    99  	for dest, counter := range dc {
   100  		if dest == res.dst() {
   101  			intersection &= counter.SharedFlags()
   102  			count++
   103  			continue
   104  		}
   105  		// Wildcard destinations affect all destinations for TupleOnly.
   106  		if dest.addr == anyIPAddress || res.Dest.Addr == anyIPAddress {
   107  			// Only bitwise and the TupleOnlyFlag.
   108  			intersection &= (^TupleOnlyFlag) | counter.SharedFlags()
   109  			count++
   110  		}
   111  	}
   112  
   113  	return intersection, count
   114  }
   115  
   116  // deviceToDest maps NICs to destinations for which there are port reservations.
   117  //
   118  // deviceToDest is never empty. When it has no elements, it is removed from the
   119  // map that references it.
   120  type deviceToDest map[tcpip.NICID]destToCounter
   121  
   122  // isAvailable checks whether binding is possible by device. If not binding to
   123  // a device, check against all FlagCounters. If binding to a specific device,
   124  // check against the unspecified device and the provided device.
   125  //
   126  // If either of the port reuse flags is enabled on any of the nodes, all nodes
   127  // sharing a port must share at least one reuse flag. This matches Linux's
   128  // behavior.
   129  func (dd deviceToDest) isAvailable(res Reservation, portSpecified bool) bool {
   130  	flagBits := res.Flags.Bits()
   131  	if res.BindToDevice == 0 {
   132  		intersection := FlagMask
   133  		for _, dest := range dd {
   134  			flags, count := dest.intersectionFlags(res)
   135  			if count == 0 {
   136  				continue
   137  			}
   138  			intersection &= flags
   139  			if intersection&flagBits == 0 {
   140  				// Can't bind because the (addr,port) was
   141  				// previously bound without reuse.
   142  				return false
   143  			}
   144  		}
   145  		if !portSpecified && res.Transport == header.TCPProtocolNumber {
   146  			return false
   147  		}
   148  		return true
   149  	}
   150  
   151  	intersection := FlagMask
   152  
   153  	if dests, ok := dd[0]; ok {
   154  		var count int
   155  		intersection, count = dests.intersectionFlags(res)
   156  		if count > 0 {
   157  			if intersection&flagBits == 0 {
   158  				return false
   159  			}
   160  			if !portSpecified && res.Transport == header.TCPProtocolNumber {
   161  				return false
   162  			}
   163  		}
   164  	}
   165  
   166  	if dests, ok := dd[res.BindToDevice]; ok {
   167  		flags, count := dests.intersectionFlags(res)
   168  		intersection &= flags
   169  		if count > 0 {
   170  			if intersection&flagBits == 0 {
   171  				return false
   172  			}
   173  			if !portSpecified && res.Transport == header.TCPProtocolNumber {
   174  				return false
   175  			}
   176  		}
   177  	}
   178  
   179  	return true
   180  }
   181  
   182  // addrToDevice maps IP addresses to NICs that have port reservations.
   183  type addrToDevice map[tcpip.Address]deviceToDest
   184  
   185  // isAvailable checks whether an IP address is available to bind to. If the
   186  // address is the "any" address, check all other addresses. Otherwise, just
   187  // check against the "any" address and the provided address.
   188  func (ad addrToDevice) isAvailable(res Reservation, portSpecified bool) bool {
   189  	if res.Addr == anyIPAddress {
   190  		// If binding to the "any" address then check that there are no
   191  		// conflicts with all addresses.
   192  		for _, devices := range ad {
   193  			if !devices.isAvailable(res, portSpecified) {
   194  				return false
   195  			}
   196  		}
   197  		return true
   198  	}
   199  
   200  	// Check that there is no conflict with the "any" address.
   201  	if devices, ok := ad[anyIPAddress]; ok {
   202  		if !devices.isAvailable(res, portSpecified) {
   203  			return false
   204  		}
   205  	}
   206  
   207  	// Check that this is no conflict with the provided address.
   208  	if devices, ok := ad[res.Addr]; ok {
   209  		if !devices.isAvailable(res, portSpecified) {
   210  			return false
   211  		}
   212  	}
   213  
   214  	return true
   215  }
   216  
   217  // PortManager manages allocating, reserving and releasing ports.
   218  type PortManager struct {
   219  	// mu protects allocatedPorts.
   220  	// LOCK ORDERING: mu > ephemeralMu.
   221  	mu sync.RWMutex
   222  	// allocatedPorts is a nesting of maps that ultimately map Reservations
   223  	// to FlagCounters describing whether the Reservation is valid and can
   224  	// be reused.
   225  	allocatedPorts map[portDescriptor]addrToDevice
   226  
   227  	// ephemeralMu protects firstEphemeral and numEphemeral.
   228  	ephemeralMu    sync.RWMutex
   229  	firstEphemeral uint16
   230  	numEphemeral   uint16
   231  
   232  	// hint is used to pick ports ephemeral ports in a stable order for
   233  	// a given port offset.
   234  	//
   235  	// hint must be accessed using the portHint/incPortHint helpers.
   236  	// TODO(gvisor.dev/issue/940): S/R this field.
   237  	hint atomicbitops.Uint32
   238  }
   239  
   240  // NewPortManager creates new PortManager.
   241  func NewPortManager() *PortManager {
   242  	return &PortManager{
   243  		allocatedPorts: make(map[portDescriptor]addrToDevice),
   244  		firstEphemeral: firstEphemeral,
   245  		numEphemeral:   math.MaxUint16 - firstEphemeral + 1,
   246  	}
   247  }
   248  
   249  // PortTester indicates whether the passed in port is suitable. Returning an
   250  // error causes the function to which the PortTester is passed to return that
   251  // error.
   252  type PortTester func(port uint16) (good bool, err tcpip.Error)
   253  
   254  // PickEphemeralPort randomly chooses a starting point and iterates over all
   255  // possible ephemeral ports, allowing the caller to decide whether a given port
   256  // is suitable for its needs, and stopping when a port is found or an error
   257  // occurs.
   258  func (pm *PortManager) PickEphemeralPort(rng *rand.Rand, testPort PortTester) (port uint16, err tcpip.Error) {
   259  	pm.ephemeralMu.RLock()
   260  	firstEphemeral := pm.firstEphemeral
   261  	numEphemeral := pm.numEphemeral
   262  	pm.ephemeralMu.RUnlock()
   263  
   264  	offset := uint32(rng.Int31n(int32(numEphemeral)))
   265  	return pickEphemeralPort(offset, firstEphemeral, numEphemeral, testPort)
   266  }
   267  
   268  // portHint atomically reads and returns the pm.hint value.
   269  func (pm *PortManager) portHint() uint32 {
   270  	return pm.hint.Load()
   271  }
   272  
   273  // incPortHint atomically increments pm.hint by 1.
   274  func (pm *PortManager) incPortHint() {
   275  	pm.hint.Add(1)
   276  }
   277  
   278  // PickEphemeralPortStable starts at the specified offset + pm.portHint and
   279  // iterates over all ephemeral ports, allowing the caller to decide whether a
   280  // given port is suitable for its needs and stopping when a port is found or an
   281  // error occurs.
   282  func (pm *PortManager) PickEphemeralPortStable(offset uint32, testPort PortTester) (port uint16, err tcpip.Error) {
   283  	pm.ephemeralMu.RLock()
   284  	firstEphemeral := pm.firstEphemeral
   285  	numEphemeral := pm.numEphemeral
   286  	pm.ephemeralMu.RUnlock()
   287  
   288  	p, err := pickEphemeralPort(pm.portHint()+offset, firstEphemeral, numEphemeral, testPort)
   289  	if err == nil {
   290  		pm.incPortHint()
   291  	}
   292  	return p, err
   293  }
   294  
   295  // pickEphemeralPort starts at the offset specified from the FirstEphemeral port
   296  // and iterates over the number of ports specified by count and allows the
   297  // caller to decide whether a given port is suitable for its needs, and stopping
   298  // when a port is found or an error occurs.
   299  func pickEphemeralPort(offset uint32, first, count uint16, testPort PortTester) (port uint16, err tcpip.Error) {
   300  	for i := uint32(0); i < uint32(count); i++ {
   301  		port := uint16(uint32(first) + (offset+i)%uint32(count))
   302  		ok, err := testPort(port)
   303  		if err != nil {
   304  			return 0, err
   305  		}
   306  
   307  		if ok {
   308  			return port, nil
   309  		}
   310  	}
   311  
   312  	return 0, &tcpip.ErrNoPortAvailable{}
   313  }
   314  
   315  // ReservePort marks a port/IP combination as reserved so that it cannot be
   316  // reserved by another endpoint. If port is zero, ReservePort will search for
   317  // an unreserved ephemeral port and reserve it, returning its value in the
   318  // "port" return value.
   319  //
   320  // An optional PortTester can be passed in which if provided will be used to
   321  // test if the picked port can be used. The function should return true if the
   322  // port is safe to use, false otherwise.
   323  func (pm *PortManager) ReservePort(rng *rand.Rand, res Reservation, testPort PortTester) (reservedPort uint16, err tcpip.Error) {
   324  	pm.mu.Lock()
   325  	defer pm.mu.Unlock()
   326  
   327  	// If a port is specified, just try to reserve it for all network
   328  	// protocols.
   329  	if res.Port != 0 {
   330  		if !pm.reserveSpecificPortLocked(res, true /* portSpecified */) {
   331  			return 0, &tcpip.ErrPortInUse{}
   332  		}
   333  		if testPort != nil {
   334  			ok, err := testPort(res.Port)
   335  			if err != nil {
   336  				pm.releasePortLocked(res)
   337  				return 0, err
   338  			}
   339  			if !ok {
   340  				pm.releasePortLocked(res)
   341  				return 0, &tcpip.ErrPortInUse{}
   342  			}
   343  		}
   344  		return res.Port, nil
   345  	}
   346  
   347  	// A port wasn't specified, so try to find one.
   348  	return pm.PickEphemeralPort(rng, func(p uint16) (bool, tcpip.Error) {
   349  		res.Port = p
   350  		if !pm.reserveSpecificPortLocked(res, false /* portSpecified */) {
   351  			return false, nil
   352  		}
   353  		if testPort != nil {
   354  			ok, err := testPort(p)
   355  			if err != nil {
   356  				pm.releasePortLocked(res)
   357  				return false, err
   358  			}
   359  			if !ok {
   360  				pm.releasePortLocked(res)
   361  				return false, nil
   362  			}
   363  		}
   364  		return true, nil
   365  	})
   366  }
   367  
   368  // reserveSpecificPortLocked tries to reserve the given port on all given
   369  // protocols.
   370  func (pm *PortManager) reserveSpecificPortLocked(res Reservation, portSpecified bool) bool {
   371  	// Make sure the port is available.
   372  	for _, network := range res.Networks {
   373  		desc := portDescriptor{network, res.Transport, res.Port}
   374  		if addrs, ok := pm.allocatedPorts[desc]; ok {
   375  			if !addrs.isAvailable(res, portSpecified) {
   376  				return false
   377  			}
   378  		}
   379  	}
   380  
   381  	// Reserve port on all network protocols.
   382  	flagBits := res.Flags.Bits()
   383  	dst := res.dst()
   384  	for _, network := range res.Networks {
   385  		desc := portDescriptor{network, res.Transport, res.Port}
   386  		addrToDev, ok := pm.allocatedPorts[desc]
   387  		if !ok {
   388  			addrToDev = make(addrToDevice)
   389  			pm.allocatedPorts[desc] = addrToDev
   390  		}
   391  		devToDest, ok := addrToDev[res.Addr]
   392  		if !ok {
   393  			devToDest = make(deviceToDest)
   394  			addrToDev[res.Addr] = devToDest
   395  		}
   396  		destToCntr := devToDest[res.BindToDevice]
   397  		if destToCntr == nil {
   398  			destToCntr = make(destToCounter)
   399  		}
   400  		counter := destToCntr[dst]
   401  		counter.AddRef(flagBits)
   402  		destToCntr[dst] = counter
   403  		devToDest[res.BindToDevice] = destToCntr
   404  	}
   405  
   406  	return true
   407  }
   408  
   409  // ReserveTuple adds a port reservation for the tuple on all given protocol.
   410  func (pm *PortManager) ReserveTuple(res Reservation) bool {
   411  	flagBits := res.Flags.Bits()
   412  	dst := res.dst()
   413  
   414  	pm.mu.Lock()
   415  	defer pm.mu.Unlock()
   416  
   417  	// It is easier to undo the entire reservation, so if we find that the
   418  	// tuple can't be fully added, finish and undo the whole thing.
   419  	undo := false
   420  
   421  	// Reserve port on all network protocols.
   422  	for _, network := range res.Networks {
   423  		desc := portDescriptor{network, res.Transport, res.Port}
   424  		addrToDev, ok := pm.allocatedPorts[desc]
   425  		if !ok {
   426  			addrToDev = make(addrToDevice)
   427  			pm.allocatedPorts[desc] = addrToDev
   428  		}
   429  		devToDest, ok := addrToDev[res.Addr]
   430  		if !ok {
   431  			devToDest = make(deviceToDest)
   432  			addrToDev[res.Addr] = devToDest
   433  		}
   434  		destToCntr := devToDest[res.BindToDevice]
   435  		if destToCntr == nil {
   436  			destToCntr = make(destToCounter)
   437  		}
   438  
   439  		counter := destToCntr[dst]
   440  		if counter.TotalRefs() != 0 && counter.SharedFlags()&flagBits == 0 {
   441  			// Tuple already exists.
   442  			undo = true
   443  		}
   444  		counter.AddRef(flagBits)
   445  		destToCntr[dst] = counter
   446  		devToDest[res.BindToDevice] = destToCntr
   447  	}
   448  
   449  	if undo {
   450  		// releasePortLocked decrements the counts (rather than setting
   451  		// them to zero), so it will undo the incorrect incrementing
   452  		// above.
   453  		pm.releasePortLocked(res)
   454  		return false
   455  	}
   456  
   457  	return true
   458  }
   459  
   460  // ReleasePort releases the reservation on a port/IP combination so that it can
   461  // be reserved by other endpoints.
   462  func (pm *PortManager) ReleasePort(res Reservation) {
   463  	pm.mu.Lock()
   464  	defer pm.mu.Unlock()
   465  
   466  	pm.releasePortLocked(res)
   467  }
   468  
   469  func (pm *PortManager) releasePortLocked(res Reservation) {
   470  	dst := res.dst()
   471  	for _, network := range res.Networks {
   472  		desc := portDescriptor{network, res.Transport, res.Port}
   473  		addrToDev, ok := pm.allocatedPorts[desc]
   474  		if !ok {
   475  			continue
   476  		}
   477  		devToDest, ok := addrToDev[res.Addr]
   478  		if !ok {
   479  			continue
   480  		}
   481  		destToCounter, ok := devToDest[res.BindToDevice]
   482  		if !ok {
   483  			continue
   484  		}
   485  		counter, ok := destToCounter[dst]
   486  		if !ok {
   487  			continue
   488  		}
   489  		counter.DropRef(res.Flags.Bits())
   490  		if counter.TotalRefs() > 0 {
   491  			destToCounter[dst] = counter
   492  			continue
   493  		}
   494  		delete(destToCounter, dst)
   495  		if len(destToCounter) > 0 {
   496  			continue
   497  		}
   498  		delete(devToDest, res.BindToDevice)
   499  		if len(devToDest) > 0 {
   500  			continue
   501  		}
   502  		delete(addrToDev, res.Addr)
   503  		if len(addrToDev) > 0 {
   504  			continue
   505  		}
   506  		delete(pm.allocatedPorts, desc)
   507  	}
   508  }
   509  
   510  // PortRange returns the UDP and TCP inclusive range of ephemeral ports used in
   511  // both IPv4 and IPv6.
   512  func (pm *PortManager) PortRange() (uint16, uint16) {
   513  	pm.ephemeralMu.RLock()
   514  	defer pm.ephemeralMu.RUnlock()
   515  	return pm.firstEphemeral, pm.firstEphemeral + pm.numEphemeral - 1
   516  }
   517  
   518  // SetPortRange sets the UDP and TCP IPv4 and IPv6 ephemeral port range
   519  // (inclusive).
   520  func (pm *PortManager) SetPortRange(start uint16, end uint16) tcpip.Error {
   521  	if start > end {
   522  		return &tcpip.ErrInvalidPortRange{}
   523  	}
   524  	pm.ephemeralMu.Lock()
   525  	defer pm.ephemeralMu.Unlock()
   526  	pm.firstEphemeral = start
   527  	pm.numEphemeral = end - start + 1
   528  	return nil
   529  }