github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/tcpip/stack/stack.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package stack provides the glue between networking protocols and the
    16  // consumers of the networking stack.
    17  //
    18  // For consumers, the only function of interest is New(), everything else is
    19  // provided by the tcpip/public package.
    20  package stack
    21  
    22  import (
    23  	"encoding/binary"
    24  	"fmt"
    25  	"io"
    26  	"math/rand"
    27  	"sync/atomic"
    28  	"time"
    29  
    30  	"golang.org/x/time/rate"
    31  	"github.com/nicocha30/gvisor-ligolo/pkg/atomicbitops"
    32  	"github.com/nicocha30/gvisor-ligolo/pkg/buffer"
    33  	"github.com/nicocha30/gvisor-ligolo/pkg/log"
    34  	cryptorand "github.com/nicocha30/gvisor-ligolo/pkg/rand"
    35  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip"
    36  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip/header"
    37  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip/ports"
    38  	"github.com/nicocha30/gvisor-ligolo/pkg/waiter"
    39  )
    40  
    41  const (
    42  	// DefaultTOS is the default type of service value for network endpoints.
    43  	DefaultTOS = 0
    44  )
    45  
    46  type transportProtocolState struct {
    47  	proto          TransportProtocol
    48  	defaultHandler func(id TransportEndpointID, pkt PacketBufferPtr) bool
    49  }
    50  
    51  // ResumableEndpoint is an endpoint that needs to be resumed after restore.
    52  type ResumableEndpoint interface {
    53  	// Resume resumes an endpoint after restore. This can be used to restart
    54  	// background workers such as protocol goroutines. This must be called after
    55  	// all indirect dependencies of the endpoint has been restored, which
    56  	// generally implies at the end of the restore process.
    57  	Resume(*Stack)
    58  }
    59  
    60  // uniqueIDGenerator is a default unique ID generator.
    61  type uniqueIDGenerator atomicbitops.Uint64
    62  
    63  func (u *uniqueIDGenerator) UniqueID() uint64 {
    64  	return ((*atomicbitops.Uint64)(u)).Add(1)
    65  }
    66  
    67  var netRawMissingLogger = log.BasicRateLimitedLogger(time.Minute)
    68  
    69  // Stack is a networking stack, with all supported protocols, NICs, and route
    70  // table.
    71  //
    72  // LOCK ORDERING: mu > routeMu.
    73  type Stack struct {
    74  	transportProtocols map[tcpip.TransportProtocolNumber]*transportProtocolState
    75  	networkProtocols   map[tcpip.NetworkProtocolNumber]NetworkProtocol
    76  
    77  	// rawFactory creates raw endpoints. If nil, raw endpoints are
    78  	// disabled. It is set during Stack creation and is immutable.
    79  	rawFactory                   RawFactory
    80  	packetEndpointWriteSupported bool
    81  
    82  	demux *transportDemuxer
    83  
    84  	stats tcpip.Stats
    85  
    86  	// routeMu protects annotated fields below.
    87  	routeMu routeStackRWMutex
    88  
    89  	// +checklocks:routeMu
    90  	routeTable []tcpip.Route
    91  
    92  	mu stackRWMutex
    93  	// +checklocks:mu
    94  	nics                     map[tcpip.NICID]*nic
    95  	defaultForwardingEnabled map[tcpip.NetworkProtocolNumber]struct{}
    96  
    97  	// cleanupEndpointsMu protects cleanupEndpoints.
    98  	cleanupEndpointsMu cleanupEndpointsMutex
    99  	// +checklocks:cleanupEndpointsMu
   100  	cleanupEndpoints map[TransportEndpoint]struct{}
   101  
   102  	*ports.PortManager
   103  
   104  	// If not nil, then any new endpoints will have this probe function
   105  	// invoked everytime they receive a TCP segment.
   106  	tcpProbeFunc atomic.Value // TCPProbeFunc
   107  
   108  	// clock is used to generate user-visible times.
   109  	clock tcpip.Clock
   110  
   111  	// handleLocal allows non-loopback interfaces to loop packets.
   112  	handleLocal bool
   113  
   114  	// tables are the iptables packet filtering and manipulation rules.
   115  	// TODO(gvisor.dev/issue/4595): S/R this field.
   116  	tables *IPTables
   117  
   118  	// resumableEndpoints is a list of endpoints that need to be resumed if the
   119  	// stack is being restored.
   120  	resumableEndpoints []ResumableEndpoint
   121  
   122  	// icmpRateLimiter is a global rate limiter for all ICMP messages generated
   123  	// by the stack.
   124  	icmpRateLimiter *ICMPRateLimiter
   125  
   126  	// seed is a one-time random value initialized at stack startup.
   127  	//
   128  	// TODO(gvisor.dev/issue/940): S/R this field.
   129  	seed uint32
   130  
   131  	// nudConfigs is the default NUD configurations used by interfaces.
   132  	nudConfigs NUDConfigurations
   133  
   134  	// nudDisp is the NUD event dispatcher that is used to send the netstack
   135  	// integrator NUD related events.
   136  	nudDisp NUDDispatcher
   137  
   138  	// uniqueIDGenerator is a generator of unique identifiers.
   139  	uniqueIDGenerator UniqueID
   140  
   141  	// randomGenerator is an injectable pseudo random generator that can be
   142  	// used when a random number is required.
   143  	randomGenerator *rand.Rand
   144  
   145  	// secureRNG is a cryptographically secure random number generator.
   146  	secureRNG io.Reader
   147  
   148  	// sendBufferSize holds the min/default/max send buffer sizes for
   149  	// endpoints other than TCP.
   150  	sendBufferSize tcpip.SendBufferSizeOption
   151  
   152  	// receiveBufferSize holds the min/default/max receive buffer sizes for
   153  	// endpoints other than TCP.
   154  	receiveBufferSize tcpip.ReceiveBufferSizeOption
   155  
   156  	// tcpInvalidRateLimit is the maximal rate for sending duplicate
   157  	// acknowledgements in response to incoming TCP packets that are for an existing
   158  	// connection but that are invalid due to any of the following reasons:
   159  	//
   160  	//   a) out-of-window sequence number.
   161  	//   b) out-of-window acknowledgement number.
   162  	//   c) PAWS check failure (when implemented).
   163  	//
   164  	// This is required to prevent potential ACK loops.
   165  	// Setting this to 0 will disable all rate limiting.
   166  	tcpInvalidRateLimit time.Duration
   167  
   168  	// tsOffsetSecret is the secret key for generating timestamp offsets
   169  	// initialized at stack startup.
   170  	tsOffsetSecret uint32
   171  }
   172  
   173  // UniqueID is an abstract generator of unique identifiers.
   174  type UniqueID interface {
   175  	UniqueID() uint64
   176  }
   177  
   178  // NetworkProtocolFactory instantiates a network protocol.
   179  //
   180  // NetworkProtocolFactory must not attempt to modify the stack, it may only
   181  // query the stack.
   182  type NetworkProtocolFactory func(*Stack) NetworkProtocol
   183  
   184  // TransportProtocolFactory instantiates a transport protocol.
   185  //
   186  // TransportProtocolFactory must not attempt to modify the stack, it may only
   187  // query the stack.
   188  type TransportProtocolFactory func(*Stack) TransportProtocol
   189  
   190  // Options contains optional Stack configuration.
   191  type Options struct {
   192  	// NetworkProtocols lists the network protocols to enable.
   193  	NetworkProtocols []NetworkProtocolFactory
   194  
   195  	// TransportProtocols lists the transport protocols to enable.
   196  	TransportProtocols []TransportProtocolFactory
   197  
   198  	// Clock is an optional clock used for timekeeping.
   199  	//
   200  	// If Clock is nil, tcpip.NewStdClock() will be used.
   201  	Clock tcpip.Clock
   202  
   203  	// Stats are optional statistic counters.
   204  	Stats tcpip.Stats
   205  
   206  	// HandleLocal indicates whether packets destined to their source
   207  	// should be handled by the stack internally (true) or outside the
   208  	// stack (false).
   209  	HandleLocal bool
   210  
   211  	// UniqueID is an optional generator of unique identifiers.
   212  	UniqueID UniqueID
   213  
   214  	// NUDConfigs is the default NUD configurations used by interfaces.
   215  	NUDConfigs NUDConfigurations
   216  
   217  	// NUDDisp is the NUD event dispatcher that an integrator can provide to
   218  	// receive NUD related events.
   219  	NUDDisp NUDDispatcher
   220  
   221  	// RawFactory produces raw endpoints. Raw endpoints are enabled only if
   222  	// this is non-nil.
   223  	RawFactory RawFactory
   224  
   225  	// AllowPacketEndpointWrite determines if packet endpoints support write
   226  	// operations.
   227  	AllowPacketEndpointWrite bool
   228  
   229  	// RandSource is an optional source to use to generate random
   230  	// numbers. If omitted it defaults to a Source seeded by the data
   231  	// returned by the stack secure RNG.
   232  	//
   233  	// RandSource must be thread-safe.
   234  	RandSource rand.Source
   235  
   236  	// IPTables are the initial iptables rules. If nil, DefaultIPTables will be
   237  	// used to construct the initial iptables rules.
   238  	// all traffic.
   239  	IPTables *IPTables
   240  
   241  	// DefaultIPTables is an optional iptables rules constructor that is called
   242  	// if IPTables is nil. If both fields are nil, iptables will allow all
   243  	// traffic.
   244  	DefaultIPTables func(clock tcpip.Clock, rand *rand.Rand) *IPTables
   245  
   246  	// SecureRNG is a cryptographically secure random number generator.
   247  	SecureRNG io.Reader
   248  }
   249  
   250  // TransportEndpointInfo holds useful information about a transport endpoint
   251  // which can be queried by monitoring tools.
   252  //
   253  // +stateify savable
   254  type TransportEndpointInfo struct {
   255  	// The following fields are initialized at creation time and are
   256  	// immutable.
   257  
   258  	NetProto   tcpip.NetworkProtocolNumber
   259  	TransProto tcpip.TransportProtocolNumber
   260  
   261  	// The following fields are protected by endpoint mu.
   262  
   263  	ID TransportEndpointID
   264  	// BindNICID and bindAddr are set via calls to Bind(). They are used to
   265  	// reject attempts to send data or connect via a different NIC or
   266  	// address
   267  	BindNICID tcpip.NICID
   268  	BindAddr  tcpip.Address
   269  	// RegisterNICID is the default NICID registered as a side-effect of
   270  	// connect or datagram write.
   271  	RegisterNICID tcpip.NICID
   272  }
   273  
   274  // AddrNetProtoLocked unwraps the specified address if it is a V4-mapped V6
   275  // address and returns the network protocol number to be used to communicate
   276  // with the specified address. It returns an error if the passed address is
   277  // incompatible with the receiver.
   278  //
   279  // Preconditon: the parent endpoint mu must be held while calling this method.
   280  func (t *TransportEndpointInfo) AddrNetProtoLocked(addr tcpip.FullAddress, v6only bool) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, tcpip.Error) {
   281  	netProto := t.NetProto
   282  	switch addr.Addr.BitLen() {
   283  	case header.IPv4AddressSizeBits:
   284  		netProto = header.IPv4ProtocolNumber
   285  	case header.IPv6AddressSizeBits:
   286  		if header.IsV4MappedAddress(addr.Addr) {
   287  			netProto = header.IPv4ProtocolNumber
   288  			addr.Addr = tcpip.AddrFrom4Slice(addr.Addr.AsSlice()[header.IPv6AddressSize-header.IPv4AddressSize:])
   289  			if addr.Addr == header.IPv4Any {
   290  				addr.Addr = tcpip.Address{}
   291  			}
   292  		}
   293  	}
   294  
   295  	switch t.ID.LocalAddress.BitLen() {
   296  	case header.IPv4AddressSizeBits:
   297  		if addr.Addr.BitLen() == header.IPv6AddressSizeBits {
   298  			return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{}
   299  		}
   300  	case header.IPv6AddressSizeBits:
   301  		if addr.Addr.BitLen() == header.IPv4AddressSizeBits {
   302  			return tcpip.FullAddress{}, 0, &tcpip.ErrNetworkUnreachable{}
   303  		}
   304  	}
   305  
   306  	switch {
   307  	case netProto == t.NetProto:
   308  	case netProto == header.IPv4ProtocolNumber && t.NetProto == header.IPv6ProtocolNumber:
   309  		if v6only {
   310  			return tcpip.FullAddress{}, 0, &tcpip.ErrHostUnreachable{}
   311  		}
   312  	default:
   313  		return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{}
   314  	}
   315  
   316  	return addr, netProto, nil
   317  }
   318  
   319  // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo
   320  // marker interface.
   321  func (*TransportEndpointInfo) IsEndpointInfo() {}
   322  
   323  // New allocates a new networking stack with only the requested networking and
   324  // transport protocols configured with default options.
   325  //
   326  // Note, NDPConfigurations will be fixed before being used by the Stack. That
   327  // is, if an invalid value was provided, it will be reset to the default value.
   328  //
   329  // Protocol options can be changed by calling the
   330  // SetNetworkProtocolOption/SetTransportProtocolOption methods provided by the
   331  // stack. Please refer to individual protocol implementations as to what options
   332  // are supported.
   333  func New(opts Options) *Stack {
   334  	clock := opts.Clock
   335  	if clock == nil {
   336  		clock = tcpip.NewStdClock()
   337  	}
   338  
   339  	if opts.UniqueID == nil {
   340  		opts.UniqueID = new(uniqueIDGenerator)
   341  	}
   342  
   343  	if opts.SecureRNG == nil {
   344  		opts.SecureRNG = cryptorand.Reader
   345  	}
   346  
   347  	randSrc := opts.RandSource
   348  	if randSrc == nil {
   349  		var v int64
   350  		if err := binary.Read(opts.SecureRNG, binary.LittleEndian, &v); err != nil {
   351  			panic(err)
   352  		}
   353  		// Source provided by rand.NewSource is not thread-safe so
   354  		// we wrap it in a simple thread-safe version.
   355  		randSrc = &lockedRandomSource{src: rand.NewSource(v)}
   356  	}
   357  	randomGenerator := rand.New(randSrc)
   358  
   359  	if opts.IPTables == nil {
   360  		if opts.DefaultIPTables == nil {
   361  			opts.DefaultIPTables = DefaultTables
   362  		}
   363  		opts.IPTables = opts.DefaultIPTables(clock, randomGenerator)
   364  	}
   365  
   366  	opts.NUDConfigs.resetInvalidFields()
   367  
   368  	s := &Stack{
   369  		transportProtocols:           make(map[tcpip.TransportProtocolNumber]*transportProtocolState),
   370  		networkProtocols:             make(map[tcpip.NetworkProtocolNumber]NetworkProtocol),
   371  		nics:                         make(map[tcpip.NICID]*nic),
   372  		packetEndpointWriteSupported: opts.AllowPacketEndpointWrite,
   373  		defaultForwardingEnabled:     make(map[tcpip.NetworkProtocolNumber]struct{}),
   374  		cleanupEndpoints:             make(map[TransportEndpoint]struct{}),
   375  		PortManager:                  ports.NewPortManager(),
   376  		clock:                        clock,
   377  		stats:                        opts.Stats.FillIn(),
   378  		handleLocal:                  opts.HandleLocal,
   379  		tables:                       opts.IPTables,
   380  		icmpRateLimiter:              NewICMPRateLimiter(clock),
   381  		seed:                         randomGenerator.Uint32(),
   382  		nudConfigs:                   opts.NUDConfigs,
   383  		uniqueIDGenerator:            opts.UniqueID,
   384  		nudDisp:                      opts.NUDDisp,
   385  		randomGenerator:              randomGenerator,
   386  		secureRNG:                    opts.SecureRNG,
   387  		sendBufferSize: tcpip.SendBufferSizeOption{
   388  			Min:     MinBufferSize,
   389  			Default: DefaultBufferSize,
   390  			Max:     DefaultMaxBufferSize,
   391  		},
   392  		receiveBufferSize: tcpip.ReceiveBufferSizeOption{
   393  			Min:     MinBufferSize,
   394  			Default: DefaultBufferSize,
   395  			Max:     DefaultMaxBufferSize,
   396  		},
   397  		tcpInvalidRateLimit: defaultTCPInvalidRateLimit,
   398  		tsOffsetSecret:      randomGenerator.Uint32(),
   399  	}
   400  
   401  	// Add specified network protocols.
   402  	for _, netProtoFactory := range opts.NetworkProtocols {
   403  		netProto := netProtoFactory(s)
   404  		s.networkProtocols[netProto.Number()] = netProto
   405  	}
   406  
   407  	// Add specified transport protocols.
   408  	for _, transProtoFactory := range opts.TransportProtocols {
   409  		transProto := transProtoFactory(s)
   410  		s.transportProtocols[transProto.Number()] = &transportProtocolState{
   411  			proto: transProto,
   412  		}
   413  	}
   414  
   415  	// Add the factory for raw endpoints, if present.
   416  	s.rawFactory = opts.RawFactory
   417  
   418  	// Create the global transport demuxer.
   419  	s.demux = newTransportDemuxer(s)
   420  
   421  	return s
   422  }
   423  
   424  // UniqueID returns a unique identifier.
   425  func (s *Stack) UniqueID() uint64 {
   426  	return s.uniqueIDGenerator.UniqueID()
   427  }
   428  
   429  // SetNetworkProtocolOption allows configuring individual protocol level
   430  // options. This method returns an error if the protocol is not supported or
   431  // option is not supported by the protocol implementation or the provided value
   432  // is incorrect.
   433  func (s *Stack) SetNetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.SettableNetworkProtocolOption) tcpip.Error {
   434  	netProto, ok := s.networkProtocols[network]
   435  	if !ok {
   436  		return &tcpip.ErrUnknownProtocol{}
   437  	}
   438  	return netProto.SetOption(option)
   439  }
   440  
   441  // NetworkProtocolOption allows retrieving individual protocol level option
   442  // values. This method returns an error if the protocol is not supported or
   443  // option is not supported by the protocol implementation. E.g.:
   444  //
   445  //	var v ipv4.MyOption
   446  //	err := s.NetworkProtocolOption(tcpip.IPv4ProtocolNumber, &v)
   447  //	if err != nil {
   448  //		...
   449  //	}
   450  func (s *Stack) NetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.GettableNetworkProtocolOption) tcpip.Error {
   451  	netProto, ok := s.networkProtocols[network]
   452  	if !ok {
   453  		return &tcpip.ErrUnknownProtocol{}
   454  	}
   455  	return netProto.Option(option)
   456  }
   457  
   458  // SetTransportProtocolOption allows configuring individual protocol level
   459  // options. This method returns an error if the protocol is not supported or
   460  // option is not supported by the protocol implementation or the provided value
   461  // is incorrect.
   462  func (s *Stack) SetTransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.SettableTransportProtocolOption) tcpip.Error {
   463  	transProtoState, ok := s.transportProtocols[transport]
   464  	if !ok {
   465  		return &tcpip.ErrUnknownProtocol{}
   466  	}
   467  	return transProtoState.proto.SetOption(option)
   468  }
   469  
   470  // TransportProtocolOption allows retrieving individual protocol level option
   471  // values. This method returns an error if the protocol is not supported or
   472  // option is not supported by the protocol implementation.
   473  //
   474  //	var v tcp.SACKEnabled
   475  //	if err := s.TransportProtocolOption(tcpip.TCPProtocolNumber, &v); err != nil {
   476  //		...
   477  //	}
   478  func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.GettableTransportProtocolOption) tcpip.Error {
   479  	transProtoState, ok := s.transportProtocols[transport]
   480  	if !ok {
   481  		return &tcpip.ErrUnknownProtocol{}
   482  	}
   483  	return transProtoState.proto.Option(option)
   484  }
   485  
   486  // SetTransportProtocolHandler sets the per-stack default handler for the given
   487  // protocol.
   488  //
   489  // It must be called only during initialization of the stack. Changing it as the
   490  // stack is operating is not supported.
   491  func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h func(TransportEndpointID, PacketBufferPtr) bool) {
   492  	state := s.transportProtocols[p]
   493  	if state != nil {
   494  		state.defaultHandler = h
   495  	}
   496  }
   497  
   498  // Clock returns the Stack's clock for retrieving the current time and
   499  // scheduling work.
   500  func (s *Stack) Clock() tcpip.Clock {
   501  	return s.clock
   502  }
   503  
   504  // Stats returns a mutable copy of the current stats.
   505  //
   506  // This is not generally exported via the public interface, but is available
   507  // internally.
   508  func (s *Stack) Stats() tcpip.Stats {
   509  	return s.stats
   510  }
   511  
   512  // SetNICForwarding enables or disables packet forwarding on the specified NIC
   513  // for the passed protocol.
   514  //
   515  // Returns the previous configuration on the NIC.
   516  func (s *Stack) SetNICForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) {
   517  	s.mu.RLock()
   518  	defer s.mu.RUnlock()
   519  
   520  	nic, ok := s.nics[id]
   521  	if !ok {
   522  		return false, &tcpip.ErrUnknownNICID{}
   523  	}
   524  
   525  	return nic.setForwarding(protocol, enable)
   526  }
   527  
   528  // NICForwarding returns the forwarding configuration for the specified NIC.
   529  func (s *Stack) NICForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) {
   530  	s.mu.RLock()
   531  	defer s.mu.RUnlock()
   532  
   533  	nic, ok := s.nics[id]
   534  	if !ok {
   535  		return false, &tcpip.ErrUnknownNICID{}
   536  	}
   537  
   538  	return nic.forwarding(protocol)
   539  }
   540  
   541  // SetForwardingDefaultAndAllNICs sets packet forwarding for all NICs for the
   542  // passed protocol and sets the default setting for newly created NICs.
   543  func (s *Stack) SetForwardingDefaultAndAllNICs(protocol tcpip.NetworkProtocolNumber, enable bool) tcpip.Error {
   544  	s.mu.Lock()
   545  	defer s.mu.Unlock()
   546  
   547  	doneOnce := false
   548  	for id, nic := range s.nics {
   549  		if _, err := nic.setForwarding(protocol, enable); err != nil {
   550  			// Expect forwarding to be settable on all interfaces if it was set on
   551  			// one.
   552  			if doneOnce {
   553  				panic(fmt.Sprintf("nic(id=%d).setForwarding(%d, %t): %s", id, protocol, enable, err))
   554  			}
   555  
   556  			return err
   557  		}
   558  
   559  		doneOnce = true
   560  	}
   561  
   562  	if enable {
   563  		s.defaultForwardingEnabled[protocol] = struct{}{}
   564  	} else {
   565  		delete(s.defaultForwardingEnabled, protocol)
   566  	}
   567  
   568  	return nil
   569  }
   570  
   571  // AddMulticastRoute adds a multicast route to be used for the specified
   572  // addresses and protocol.
   573  func (s *Stack) AddMulticastRoute(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination, route MulticastRoute) tcpip.Error {
   574  	netProto, ok := s.networkProtocols[protocol]
   575  	if !ok {
   576  		return &tcpip.ErrUnknownProtocol{}
   577  	}
   578  
   579  	forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol)
   580  	if !ok {
   581  		return &tcpip.ErrNotSupported{}
   582  	}
   583  
   584  	return forwardingNetProto.AddMulticastRoute(addresses, route)
   585  }
   586  
   587  // RemoveMulticastRoute removes a multicast route that matches the specified
   588  // addresses and protocol.
   589  func (s *Stack) RemoveMulticastRoute(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination) tcpip.Error {
   590  	netProto, ok := s.networkProtocols[protocol]
   591  	if !ok {
   592  		return &tcpip.ErrUnknownProtocol{}
   593  	}
   594  
   595  	forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol)
   596  	if !ok {
   597  		return &tcpip.ErrNotSupported{}
   598  	}
   599  
   600  	return forwardingNetProto.RemoveMulticastRoute(addresses)
   601  }
   602  
   603  // MulticastRouteLastUsedTime returns a monotonic timestamp that represents the
   604  // last time that the route that matches the provided addresses and protocol
   605  // was used or updated.
   606  func (s *Stack) MulticastRouteLastUsedTime(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination) (tcpip.MonotonicTime, tcpip.Error) {
   607  	netProto, ok := s.networkProtocols[protocol]
   608  	if !ok {
   609  		return tcpip.MonotonicTime{}, &tcpip.ErrUnknownProtocol{}
   610  	}
   611  
   612  	forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol)
   613  	if !ok {
   614  		return tcpip.MonotonicTime{}, &tcpip.ErrNotSupported{}
   615  	}
   616  
   617  	return forwardingNetProto.MulticastRouteLastUsedTime(addresses)
   618  }
   619  
   620  // EnableMulticastForwardingForProtocol enables multicast forwarding for the
   621  // provided protocol.
   622  //
   623  // Returns true if forwarding was already enabled on the protocol.
   624  // Additionally, returns an error if:
   625  //
   626  //   - The protocol is not found.
   627  //   - The protocol doesn't support multicast forwarding.
   628  //   - The multicast forwarding event dispatcher is nil.
   629  //
   630  // If successful, future multicast forwarding events will be sent to the
   631  // provided event dispatcher.
   632  func (s *Stack) EnableMulticastForwardingForProtocol(protocol tcpip.NetworkProtocolNumber, disp MulticastForwardingEventDispatcher) (bool, tcpip.Error) {
   633  	netProto, ok := s.networkProtocols[protocol]
   634  	if !ok {
   635  		return false, &tcpip.ErrUnknownProtocol{}
   636  	}
   637  
   638  	forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol)
   639  	if !ok {
   640  		return false, &tcpip.ErrNotSupported{}
   641  	}
   642  
   643  	return forwardingNetProto.EnableMulticastForwarding(disp)
   644  }
   645  
   646  // DisableMulticastForwardingForProtocol disables multicast forwarding for the
   647  // provided protocol.
   648  //
   649  // Returns an error if the provided protocol is not found or if it does not
   650  // support multicast forwarding.
   651  func (s *Stack) DisableMulticastForwardingForProtocol(protocol tcpip.NetworkProtocolNumber) tcpip.Error {
   652  	netProto, ok := s.networkProtocols[protocol]
   653  	if !ok {
   654  		return &tcpip.ErrUnknownProtocol{}
   655  	}
   656  
   657  	forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol)
   658  	if !ok {
   659  		return &tcpip.ErrNotSupported{}
   660  	}
   661  
   662  	forwardingNetProto.DisableMulticastForwarding()
   663  	return nil
   664  }
   665  
   666  // SetNICMulticastForwarding enables or disables multicast packet forwarding on
   667  // the specified NIC for the passed protocol.
   668  //
   669  // Returns the previous configuration on the NIC.
   670  //
   671  // TODO(https://gvisor.dev/issue/7338): Implement support for multicast
   672  // forwarding. Currently, setting this value is a no-op and is not ready for
   673  // use.
   674  func (s *Stack) SetNICMulticastForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) {
   675  	s.mu.RLock()
   676  	defer s.mu.RUnlock()
   677  
   678  	nic, ok := s.nics[id]
   679  	if !ok {
   680  		return false, &tcpip.ErrUnknownNICID{}
   681  	}
   682  
   683  	return nic.setMulticastForwarding(protocol, enable)
   684  }
   685  
   686  // NICMulticastForwarding returns the multicast forwarding configuration for
   687  // the specified NIC.
   688  func (s *Stack) NICMulticastForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) {
   689  	s.mu.RLock()
   690  	defer s.mu.RUnlock()
   691  
   692  	nic, ok := s.nics[id]
   693  	if !ok {
   694  		return false, &tcpip.ErrUnknownNICID{}
   695  	}
   696  
   697  	return nic.multicastForwarding(protocol)
   698  }
   699  
   700  // PortRange returns the UDP and TCP inclusive range of ephemeral ports used in
   701  // both IPv4 and IPv6.
   702  func (s *Stack) PortRange() (uint16, uint16) {
   703  	return s.PortManager.PortRange()
   704  }
   705  
   706  // SetPortRange sets the UDP and TCP IPv4 and IPv6 ephemeral port range
   707  // (inclusive).
   708  func (s *Stack) SetPortRange(start uint16, end uint16) tcpip.Error {
   709  	return s.PortManager.SetPortRange(start, end)
   710  }
   711  
   712  // GROTimeout returns the GRO timeout.
   713  func (s *Stack) GROTimeout(nicID tcpip.NICID) (time.Duration, tcpip.Error) {
   714  	s.mu.RLock()
   715  	defer s.mu.RUnlock()
   716  
   717  	nic, ok := s.nics[nicID]
   718  	if !ok {
   719  		return 0, &tcpip.ErrUnknownNICID{}
   720  	}
   721  
   722  	return nic.gro.getInterval(), nil
   723  }
   724  
   725  // SetGROTimeout sets the GRO timeout.
   726  func (s *Stack) SetGROTimeout(nicID tcpip.NICID, timeout time.Duration) tcpip.Error {
   727  	s.mu.RLock()
   728  	defer s.mu.RUnlock()
   729  
   730  	nic, ok := s.nics[nicID]
   731  	if !ok {
   732  		return &tcpip.ErrUnknownNICID{}
   733  	}
   734  
   735  	nic.gro.setInterval(timeout)
   736  	return nil
   737  }
   738  
   739  // SetRouteTable assigns the route table to be used by this stack. It
   740  // specifies which NIC to use for given destination address ranges.
   741  //
   742  // This method takes ownership of the table.
   743  func (s *Stack) SetRouteTable(table []tcpip.Route) {
   744  	s.routeMu.Lock()
   745  	defer s.routeMu.Unlock()
   746  	s.routeTable = table
   747  }
   748  
   749  // GetRouteTable returns the route table which is currently in use.
   750  func (s *Stack) GetRouteTable() []tcpip.Route {
   751  	s.routeMu.RLock()
   752  	defer s.routeMu.RUnlock()
   753  	return append([]tcpip.Route(nil), s.routeTable...)
   754  }
   755  
   756  // AddRoute appends a route to the route table.
   757  func (s *Stack) AddRoute(route tcpip.Route) {
   758  	s.routeMu.Lock()
   759  	defer s.routeMu.Unlock()
   760  	s.routeTable = append(s.routeTable, route)
   761  }
   762  
   763  // RemoveRoutes removes matching routes from the route table.
   764  func (s *Stack) RemoveRoutes(match func(tcpip.Route) bool) {
   765  	s.routeMu.Lock()
   766  	defer s.routeMu.Unlock()
   767  
   768  	var filteredRoutes []tcpip.Route
   769  	for _, route := range s.routeTable {
   770  		if !match(route) {
   771  			filteredRoutes = append(filteredRoutes, route)
   772  		}
   773  	}
   774  	s.routeTable = filteredRoutes
   775  }
   776  
   777  // NewEndpoint creates a new transport layer endpoint of the given protocol.
   778  func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) {
   779  	t, ok := s.transportProtocols[transport]
   780  	if !ok {
   781  		return nil, &tcpip.ErrUnknownProtocol{}
   782  	}
   783  
   784  	return t.proto.NewEndpoint(network, waiterQueue)
   785  }
   786  
   787  // NewRawEndpoint creates a new raw transport layer endpoint of the given
   788  // protocol. Raw endpoints receive all traffic for a given protocol regardless
   789  // of address.
   790  func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, tcpip.Error) {
   791  	if s.rawFactory == nil {
   792  		netRawMissingLogger.Infof("A process tried to create a raw socket, but --net-raw was not specified. Should runsc be run with --net-raw?")
   793  		return nil, &tcpip.ErrNotPermitted{}
   794  	}
   795  
   796  	if !associated {
   797  		return s.rawFactory.NewUnassociatedEndpoint(s, network, transport, waiterQueue)
   798  	}
   799  
   800  	t, ok := s.transportProtocols[transport]
   801  	if !ok {
   802  		return nil, &tcpip.ErrUnknownProtocol{}
   803  	}
   804  
   805  	return t.proto.NewRawEndpoint(network, waiterQueue)
   806  }
   807  
   808  // NewPacketEndpoint creates a new packet endpoint listening for the given
   809  // netProto.
   810  func (s *Stack) NewPacketEndpoint(cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) {
   811  	if s.rawFactory == nil {
   812  		return nil, &tcpip.ErrNotPermitted{}
   813  	}
   814  
   815  	return s.rawFactory.NewPacketEndpoint(s, cooked, netProto, waiterQueue)
   816  }
   817  
   818  // NICContext is an opaque pointer used to store client-supplied NIC metadata.
   819  type NICContext any
   820  
   821  // NICOptions specifies the configuration of a NIC as it is being created.
   822  // The zero value creates an enabled, unnamed NIC.
   823  type NICOptions struct {
   824  	// Name specifies the name of the NIC.
   825  	Name string
   826  
   827  	// Disabled specifies whether to avoid calling Attach on the passed
   828  	// LinkEndpoint.
   829  	Disabled bool
   830  
   831  	// Context specifies user-defined data that will be returned in stack.NICInfo
   832  	// for the NIC. Clients of this library can use it to add metadata that
   833  	// should be tracked alongside a NIC, to avoid having to keep a
   834  	// map[tcpip.NICID]metadata mirroring stack.Stack's nic map.
   835  	Context NICContext
   836  
   837  	// QDisc is the queue discipline to use for this NIC.
   838  	QDisc QueueingDiscipline
   839  
   840  	// GROTimeout specifies the GRO timeout. Zero bypasses GRO.
   841  	GROTimeout time.Duration
   842  }
   843  
   844  // CreateNICWithOptions creates a NIC with the provided id, LinkEndpoint, and
   845  // NICOptions. See the documentation on type NICOptions for details on how
   846  // NICs can be configured.
   847  //
   848  // LinkEndpoint.Attach will be called to bind ep with a NetworkDispatcher.
   849  func (s *Stack) CreateNICWithOptions(id tcpip.NICID, ep LinkEndpoint, opts NICOptions) tcpip.Error {
   850  	s.mu.Lock()
   851  	defer s.mu.Unlock()
   852  
   853  	// Make sure id is unique.
   854  	if _, ok := s.nics[id]; ok {
   855  		return &tcpip.ErrDuplicateNICID{}
   856  	}
   857  
   858  	// Make sure name is unique, unless unnamed.
   859  	if opts.Name != "" {
   860  		for _, n := range s.nics {
   861  			if n.Name() == opts.Name {
   862  				return &tcpip.ErrDuplicateNICID{}
   863  			}
   864  		}
   865  	}
   866  
   867  	n := newNIC(s, id, ep, opts)
   868  	for proto := range s.defaultForwardingEnabled {
   869  		if _, err := n.setForwarding(proto, true); err != nil {
   870  			panic(fmt.Sprintf("newNIC(%d, ...).setForwarding(%d, true): %s", id, proto, err))
   871  		}
   872  	}
   873  	s.nics[id] = n
   874  	if !opts.Disabled {
   875  		return n.enable()
   876  	}
   877  
   878  	return nil
   879  }
   880  
   881  // CreateNIC creates a NIC with the provided id and LinkEndpoint and calls
   882  // LinkEndpoint.Attach to bind ep with a NetworkDispatcher.
   883  func (s *Stack) CreateNIC(id tcpip.NICID, ep LinkEndpoint) tcpip.Error {
   884  	return s.CreateNICWithOptions(id, ep, NICOptions{})
   885  }
   886  
   887  // GetLinkEndpointByName gets the link endpoint specified by name.
   888  func (s *Stack) GetLinkEndpointByName(name string) LinkEndpoint {
   889  	s.mu.RLock()
   890  	defer s.mu.RUnlock()
   891  	for _, nic := range s.nics {
   892  		if nic.Name() == name {
   893  			linkEP, ok := nic.NetworkLinkEndpoint.(LinkEndpoint)
   894  			if !ok {
   895  				panic(fmt.Sprintf("unexpected NetworkLinkEndpoint(%#v) is not a LinkEndpoint", nic.NetworkLinkEndpoint))
   896  			}
   897  			return linkEP
   898  		}
   899  	}
   900  	return nil
   901  }
   902  
   903  // EnableNIC enables the given NIC so that the link-layer endpoint can start
   904  // delivering packets to it.
   905  func (s *Stack) EnableNIC(id tcpip.NICID) tcpip.Error {
   906  	s.mu.RLock()
   907  	defer s.mu.RUnlock()
   908  
   909  	nic, ok := s.nics[id]
   910  	if !ok {
   911  		return &tcpip.ErrUnknownNICID{}
   912  	}
   913  
   914  	return nic.enable()
   915  }
   916  
   917  // DisableNIC disables the given NIC.
   918  func (s *Stack) DisableNIC(id tcpip.NICID) tcpip.Error {
   919  	s.mu.RLock()
   920  	defer s.mu.RUnlock()
   921  
   922  	nic, ok := s.nics[id]
   923  	if !ok {
   924  		return &tcpip.ErrUnknownNICID{}
   925  	}
   926  
   927  	nic.disable()
   928  	return nil
   929  }
   930  
   931  // CheckNIC checks if a NIC is usable.
   932  func (s *Stack) CheckNIC(id tcpip.NICID) bool {
   933  	s.mu.RLock()
   934  	defer s.mu.RUnlock()
   935  
   936  	nic, ok := s.nics[id]
   937  	if !ok {
   938  		return false
   939  	}
   940  
   941  	return nic.Enabled()
   942  }
   943  
   944  // RemoveNIC removes NIC and all related routes from the network stack.
   945  func (s *Stack) RemoveNIC(id tcpip.NICID) tcpip.Error {
   946  	s.mu.Lock()
   947  	defer s.mu.Unlock()
   948  
   949  	return s.removeNICLocked(id)
   950  }
   951  
   952  // removeNICLocked removes NIC and all related routes from the network stack.
   953  //
   954  // +checklocks:s.mu
   955  func (s *Stack) removeNICLocked(id tcpip.NICID) tcpip.Error {
   956  	nic, ok := s.nics[id]
   957  	if !ok {
   958  		return &tcpip.ErrUnknownNICID{}
   959  	}
   960  	delete(s.nics, id)
   961  
   962  	// Remove routes in-place. n tracks the number of routes written.
   963  	s.routeMu.Lock()
   964  	n := 0
   965  	for i, r := range s.routeTable {
   966  		s.routeTable[i] = tcpip.Route{}
   967  		if r.NIC != id {
   968  			// Keep this route.
   969  			s.routeTable[n] = r
   970  			n++
   971  		}
   972  	}
   973  	s.routeTable = s.routeTable[:n]
   974  	s.routeMu.Unlock()
   975  
   976  	return nic.remove()
   977  }
   978  
   979  // NICInfo captures the name and addresses assigned to a NIC.
   980  type NICInfo struct {
   981  	Name              string
   982  	LinkAddress       tcpip.LinkAddress
   983  	ProtocolAddresses []tcpip.ProtocolAddress
   984  
   985  	// Flags indicate the state of the NIC.
   986  	Flags NICStateFlags
   987  
   988  	// MTU is the maximum transmission unit.
   989  	MTU uint32
   990  
   991  	Stats tcpip.NICStats
   992  
   993  	// NetworkStats holds the stats of each NetworkEndpoint bound to the NIC.
   994  	NetworkStats map[tcpip.NetworkProtocolNumber]NetworkEndpointStats
   995  
   996  	// Context is user-supplied data optionally supplied in CreateNICWithOptions.
   997  	// See type NICOptions for more details.
   998  	Context NICContext
   999  
  1000  	// ARPHardwareType holds the ARP Hardware type of the NIC. This is the
  1001  	// value sent in haType field of an ARP Request sent by this NIC and the
  1002  	// value expected in the haType field of an ARP response.
  1003  	ARPHardwareType header.ARPHardwareType
  1004  
  1005  	// Forwarding holds the forwarding status for each network endpoint that
  1006  	// supports forwarding.
  1007  	Forwarding map[tcpip.NetworkProtocolNumber]bool
  1008  
  1009  	// MulticastForwarding holds the forwarding status for each network endpoint
  1010  	// that supports multicast forwarding.
  1011  	MulticastForwarding map[tcpip.NetworkProtocolNumber]bool
  1012  }
  1013  
  1014  // HasNIC returns true if the NICID is defined in the stack.
  1015  func (s *Stack) HasNIC(id tcpip.NICID) bool {
  1016  	s.mu.RLock()
  1017  	_, ok := s.nics[id]
  1018  	s.mu.RUnlock()
  1019  	return ok
  1020  }
  1021  
  1022  // NICInfo returns a map of NICIDs to their associated information.
  1023  func (s *Stack) NICInfo() map[tcpip.NICID]NICInfo {
  1024  	s.mu.RLock()
  1025  	defer s.mu.RUnlock()
  1026  
  1027  	type forwardingFn func(tcpip.NetworkProtocolNumber) (bool, tcpip.Error)
  1028  	forwardingValue := func(forwardingFn forwardingFn, proto tcpip.NetworkProtocolNumber, nicID tcpip.NICID, fnName string) (forward bool, ok bool) {
  1029  		switch forwarding, err := forwardingFn(proto); err.(type) {
  1030  		case nil:
  1031  			return forwarding, true
  1032  		case *tcpip.ErrUnknownProtocol:
  1033  			panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nicID))
  1034  		case *tcpip.ErrNotSupported:
  1035  			// Not all network protocols support forwarding.
  1036  		default:
  1037  			panic(fmt.Sprintf("nic(id=%d).%s(%d): %s", nicID, fnName, proto, err))
  1038  		}
  1039  		return false, false
  1040  	}
  1041  
  1042  	nics := make(map[tcpip.NICID]NICInfo)
  1043  	for id, nic := range s.nics {
  1044  		flags := NICStateFlags{
  1045  			Up:          true, // Netstack interfaces are always up.
  1046  			Running:     nic.Enabled(),
  1047  			Promiscuous: nic.Promiscuous(),
  1048  			Loopback:    nic.IsLoopback(),
  1049  		}
  1050  
  1051  		netStats := make(map[tcpip.NetworkProtocolNumber]NetworkEndpointStats)
  1052  		for proto, netEP := range nic.networkEndpoints {
  1053  			netStats[proto] = netEP.Stats()
  1054  		}
  1055  
  1056  		info := NICInfo{
  1057  			Name:                nic.name,
  1058  			LinkAddress:         nic.NetworkLinkEndpoint.LinkAddress(),
  1059  			ProtocolAddresses:   nic.primaryAddresses(),
  1060  			Flags:               flags,
  1061  			MTU:                 nic.NetworkLinkEndpoint.MTU(),
  1062  			Stats:               nic.stats.local,
  1063  			NetworkStats:        netStats,
  1064  			Context:             nic.context,
  1065  			ARPHardwareType:     nic.NetworkLinkEndpoint.ARPHardwareType(),
  1066  			Forwarding:          make(map[tcpip.NetworkProtocolNumber]bool),
  1067  			MulticastForwarding: make(map[tcpip.NetworkProtocolNumber]bool),
  1068  		}
  1069  
  1070  		for proto := range s.networkProtocols {
  1071  			if forwarding, ok := forwardingValue(nic.forwarding, proto, id, "forwarding"); ok {
  1072  				info.Forwarding[proto] = forwarding
  1073  			}
  1074  
  1075  			if multicastForwarding, ok := forwardingValue(nic.multicastForwarding, proto, id, "multicastForwarding"); ok {
  1076  				info.MulticastForwarding[proto] = multicastForwarding
  1077  			}
  1078  		}
  1079  
  1080  		nics[id] = info
  1081  	}
  1082  	return nics
  1083  }
  1084  
  1085  // NICStateFlags holds information about the state of an NIC.
  1086  type NICStateFlags struct {
  1087  	// Up indicates whether the interface is running.
  1088  	Up bool
  1089  
  1090  	// Running indicates whether resources are allocated.
  1091  	Running bool
  1092  
  1093  	// Promiscuous indicates whether the interface is in promiscuous mode.
  1094  	Promiscuous bool
  1095  
  1096  	// Loopback indicates whether the interface is a loopback.
  1097  	Loopback bool
  1098  }
  1099  
  1100  // AddProtocolAddress adds an address to the specified NIC, possibly with extra
  1101  // properties.
  1102  func (s *Stack) AddProtocolAddress(id tcpip.NICID, protocolAddress tcpip.ProtocolAddress, properties AddressProperties) tcpip.Error {
  1103  	s.mu.RLock()
  1104  	defer s.mu.RUnlock()
  1105  
  1106  	nic, ok := s.nics[id]
  1107  	if !ok {
  1108  		return &tcpip.ErrUnknownNICID{}
  1109  	}
  1110  
  1111  	return nic.addAddress(protocolAddress, properties)
  1112  }
  1113  
  1114  // RemoveAddress removes an existing network-layer address from the specified
  1115  // NIC.
  1116  func (s *Stack) RemoveAddress(id tcpip.NICID, addr tcpip.Address) tcpip.Error {
  1117  	s.mu.RLock()
  1118  	defer s.mu.RUnlock()
  1119  
  1120  	if nic, ok := s.nics[id]; ok {
  1121  		return nic.removeAddress(addr)
  1122  	}
  1123  
  1124  	return &tcpip.ErrUnknownNICID{}
  1125  }
  1126  
  1127  // SetAddressLifetimes sets informational preferred and valid lifetimes, and
  1128  // whether the address should be preferred or deprecated.
  1129  func (s *Stack) SetAddressLifetimes(id tcpip.NICID, addr tcpip.Address, lifetimes AddressLifetimes) tcpip.Error {
  1130  	s.mu.RLock()
  1131  	defer s.mu.RUnlock()
  1132  
  1133  	if nic, ok := s.nics[id]; ok {
  1134  		return nic.setAddressLifetimes(addr, lifetimes)
  1135  	}
  1136  
  1137  	return &tcpip.ErrUnknownNICID{}
  1138  }
  1139  
  1140  // AllAddresses returns a map of NICIDs to their protocol addresses (primary
  1141  // and non-primary).
  1142  func (s *Stack) AllAddresses() map[tcpip.NICID][]tcpip.ProtocolAddress {
  1143  	s.mu.RLock()
  1144  	defer s.mu.RUnlock()
  1145  
  1146  	nics := make(map[tcpip.NICID][]tcpip.ProtocolAddress)
  1147  	for id, nic := range s.nics {
  1148  		nics[id] = nic.allPermanentAddresses()
  1149  	}
  1150  	return nics
  1151  }
  1152  
  1153  // GetMainNICAddress returns the first non-deprecated primary address and prefix
  1154  // for the given NIC and protocol. If no non-deprecated primary addresses exist,
  1155  // a deprecated address will be returned. If no deprecated addresses exist, the
  1156  // zero value will be returned.
  1157  func (s *Stack) GetMainNICAddress(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, tcpip.Error) {
  1158  	s.mu.RLock()
  1159  	defer s.mu.RUnlock()
  1160  
  1161  	nic, ok := s.nics[id]
  1162  	if !ok {
  1163  		return tcpip.AddressWithPrefix{}, &tcpip.ErrUnknownNICID{}
  1164  	}
  1165  
  1166  	return nic.PrimaryAddress(protocol)
  1167  }
  1168  
  1169  func (s *Stack) getAddressEP(nic *nic, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) AssignableAddressEndpoint {
  1170  	if localAddr.BitLen() == 0 {
  1171  		return nic.primaryEndpoint(netProto, remoteAddr)
  1172  	}
  1173  	return nic.findEndpoint(netProto, localAddr, CanBePrimaryEndpoint)
  1174  }
  1175  
  1176  // NewRouteForMulticast returns a Route that may be used to forward multicast
  1177  // packets.
  1178  //
  1179  // Returns nil if validation fails.
  1180  func (s *Stack) NewRouteForMulticast(nicID tcpip.NICID, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route {
  1181  	s.mu.RLock()
  1182  	defer s.mu.RUnlock()
  1183  
  1184  	nic, ok := s.nics[nicID]
  1185  	if !ok || !nic.Enabled() {
  1186  		return nil
  1187  	}
  1188  
  1189  	if addressEndpoint := s.getAddressEP(nic, tcpip.Address{} /* localAddr */, remoteAddr, netProto); addressEndpoint != nil {
  1190  		return constructAndValidateRoute(netProto, addressEndpoint, nic, nic, tcpip.Address{} /* gateway */, tcpip.Address{} /* localAddr */, remoteAddr, s.handleLocal, false /* multicastLoop */)
  1191  	}
  1192  	return nil
  1193  }
  1194  
  1195  // findLocalRouteFromNICRLocked is like findLocalRouteRLocked but finds a route
  1196  // from the specified NIC.
  1197  //
  1198  // +checklocksread:s.mu
  1199  func (s *Stack) findLocalRouteFromNICRLocked(localAddressNIC *nic, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route {
  1200  	localAddressEndpoint := localAddressNIC.getAddressOrCreateTempInner(netProto, localAddr, false /* createTemp */, NeverPrimaryEndpoint)
  1201  	if localAddressEndpoint == nil {
  1202  		return nil
  1203  	}
  1204  
  1205  	var outgoingNIC *nic
  1206  	// Prefer a local route to the same interface as the local address.
  1207  	if localAddressNIC.hasAddress(netProto, remoteAddr) {
  1208  		outgoingNIC = localAddressNIC
  1209  	}
  1210  
  1211  	// If the remote address isn't owned by the local address's NIC, check all
  1212  	// NICs.
  1213  	if outgoingNIC == nil {
  1214  		for _, nic := range s.nics {
  1215  			if nic.hasAddress(netProto, remoteAddr) {
  1216  				outgoingNIC = nic
  1217  				break
  1218  			}
  1219  		}
  1220  	}
  1221  
  1222  	// If the remote address is not owned by the stack, we can't return a local
  1223  	// route.
  1224  	if outgoingNIC == nil {
  1225  		localAddressEndpoint.DecRef()
  1226  		return nil
  1227  	}
  1228  
  1229  	r := makeLocalRoute(
  1230  		netProto,
  1231  		localAddr,
  1232  		remoteAddr,
  1233  		outgoingNIC,
  1234  		localAddressNIC,
  1235  		localAddressEndpoint,
  1236  	)
  1237  
  1238  	if r.IsOutboundBroadcast() {
  1239  		r.Release()
  1240  		return nil
  1241  	}
  1242  
  1243  	return r
  1244  }
  1245  
  1246  // findLocalRouteRLocked returns a local route.
  1247  //
  1248  // A local route is a route to some remote address which the stack owns. That
  1249  // is, a local route is a route where packets never have to leave the stack.
  1250  //
  1251  // +checklocksread:s.mu
  1252  func (s *Stack) findLocalRouteRLocked(localAddressNICID tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route {
  1253  	if localAddr.BitLen() == 0 {
  1254  		localAddr = remoteAddr
  1255  	}
  1256  
  1257  	if localAddressNICID == 0 {
  1258  		for _, localAddressNIC := range s.nics {
  1259  			if r := s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto); r != nil {
  1260  				return r
  1261  			}
  1262  		}
  1263  
  1264  		return nil
  1265  	}
  1266  
  1267  	if localAddressNIC, ok := s.nics[localAddressNICID]; ok {
  1268  		return s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto)
  1269  	}
  1270  
  1271  	return nil
  1272  }
  1273  
  1274  // HandleLocal returns true if non-loopback interfaces are allowed to loop packets.
  1275  func (s *Stack) HandleLocal() bool {
  1276  	return s.handleLocal
  1277  }
  1278  
  1279  func isNICForwarding(nic *nic, proto tcpip.NetworkProtocolNumber) bool {
  1280  	switch forwarding, err := nic.forwarding(proto); err.(type) {
  1281  	case nil:
  1282  		return forwarding
  1283  	case *tcpip.ErrUnknownProtocol:
  1284  		panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nic.ID()))
  1285  	case *tcpip.ErrNotSupported:
  1286  		// Not all network protocols support forwarding.
  1287  		return false
  1288  	default:
  1289  		panic(fmt.Sprintf("nic(id=%d).forwarding(%d): %s", nic.ID(), proto, err))
  1290  	}
  1291  }
  1292  
  1293  // FindRoute creates a route to the given destination address, leaving through
  1294  // the given NIC and local address (if provided).
  1295  //
  1296  // If a NIC is not specified, the returned route will leave through the same
  1297  // NIC as the NIC that has the local address assigned when forwarding is
  1298  // disabled. If forwarding is enabled and the NIC is unspecified, the route may
  1299  // leave through any interface unless the route is link-local.
  1300  //
  1301  // If no local address is provided, the stack will select a local address. If no
  1302  // remote address is provided, the stack wil use a remote address equal to the
  1303  // local address.
  1304  func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) (*Route, tcpip.Error) {
  1305  	s.mu.RLock()
  1306  	defer s.mu.RUnlock()
  1307  
  1308  	isLinkLocal := header.IsV6LinkLocalUnicastAddress(remoteAddr) || header.IsV6LinkLocalMulticastAddress(remoteAddr)
  1309  	isLocalBroadcast := remoteAddr == header.IPv4Broadcast
  1310  	isMulticast := header.IsV4MulticastAddress(remoteAddr) || header.IsV6MulticastAddress(remoteAddr)
  1311  	isLoopback := header.IsV4LoopbackAddress(remoteAddr) || header.IsV6LoopbackAddress(remoteAddr)
  1312  	needRoute := !(isLocalBroadcast || isMulticast || isLinkLocal || isLoopback)
  1313  
  1314  	if s.handleLocal && !isMulticast && !isLocalBroadcast {
  1315  		if r := s.findLocalRouteRLocked(id, localAddr, remoteAddr, netProto); r != nil {
  1316  			return r, nil
  1317  		}
  1318  	}
  1319  
  1320  	// If the interface is specified and we do not need a route, return a route
  1321  	// through the interface if the interface is valid and enabled.
  1322  	if id != 0 && !needRoute {
  1323  		if nic, ok := s.nics[id]; ok && nic.Enabled() {
  1324  			if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, netProto); addressEndpoint != nil {
  1325  				return makeRoute(
  1326  					netProto,
  1327  					tcpip.Address{}, /* gateway */
  1328  					localAddr,
  1329  					remoteAddr,
  1330  					nic, /* outboundNIC */
  1331  					nic, /* localAddressNIC*/
  1332  					addressEndpoint,
  1333  					s.handleLocal,
  1334  					multicastLoop,
  1335  				), nil
  1336  			}
  1337  		}
  1338  
  1339  		if isLoopback {
  1340  			return nil, &tcpip.ErrBadLocalAddress{}
  1341  		}
  1342  		return nil, &tcpip.ErrNetworkUnreachable{}
  1343  	}
  1344  
  1345  	onlyGlobalAddresses := !header.IsV6LinkLocalUnicastAddress(localAddr) && !isLinkLocal
  1346  
  1347  	// Find a route to the remote with the route table.
  1348  	var chosenRoute tcpip.Route
  1349  	if r := func() *Route {
  1350  		s.routeMu.RLock()
  1351  		defer s.routeMu.RUnlock()
  1352  
  1353  		for _, route := range s.routeTable {
  1354  			if remoteAddr.BitLen() != 0 && !route.Destination.Contains(remoteAddr) {
  1355  				continue
  1356  			}
  1357  
  1358  			nic, ok := s.nics[route.NIC]
  1359  			if !ok || !nic.Enabled() {
  1360  				continue
  1361  			}
  1362  
  1363  			if id == 0 || id == route.NIC {
  1364  				if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, netProto); addressEndpoint != nil {
  1365  					var gateway tcpip.Address
  1366  					if needRoute {
  1367  						gateway = route.Gateway
  1368  					}
  1369  					r := constructAndValidateRoute(netProto, addressEndpoint, nic /* outgoingNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop)
  1370  					if r == nil {
  1371  						panic(fmt.Sprintf("non-forwarding route validation failed with route table entry = %#v, id = %d, localAddr = %s, remoteAddr = %s", route, id, localAddr, remoteAddr))
  1372  					}
  1373  					return r
  1374  				}
  1375  			}
  1376  
  1377  			// If the stack has forwarding enabled and we haven't found a valid route
  1378  			// to the remote address yet, keep track of the first valid route. We
  1379  			// keep iterating because we prefer routes that let us use a local
  1380  			// address that is assigned to the outgoing interface. There is no
  1381  			// requirement to do this from any RFC but simply a choice made to better
  1382  			// follow a strong host model which the netstack follows at the time of
  1383  			// writing.
  1384  			if onlyGlobalAddresses && chosenRoute.Equal(tcpip.Route{}) && isNICForwarding(nic, netProto) {
  1385  				chosenRoute = route
  1386  			}
  1387  		}
  1388  
  1389  		return nil
  1390  	}(); r != nil {
  1391  		return r, nil
  1392  	}
  1393  
  1394  	if !chosenRoute.Equal(tcpip.Route{}) {
  1395  		// At this point we know the stack has forwarding enabled since chosenRoute is
  1396  		// only set when forwarding is enabled.
  1397  		nic, ok := s.nics[chosenRoute.NIC]
  1398  		if !ok {
  1399  			// If the route's NIC was invalid, we should not have chosen the route.
  1400  			panic(fmt.Sprintf("chosen route must have a valid NIC with ID = %d", chosenRoute.NIC))
  1401  		}
  1402  
  1403  		var gateway tcpip.Address
  1404  		if needRoute {
  1405  			gateway = chosenRoute.Gateway
  1406  		}
  1407  
  1408  		// Use the specified NIC to get the local address endpoint.
  1409  		if id != 0 {
  1410  			if aNIC, ok := s.nics[id]; ok {
  1411  				if addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, netProto); addressEndpoint != nil {
  1412  					if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop); r != nil {
  1413  						return r, nil
  1414  					}
  1415  				}
  1416  			}
  1417  
  1418  			// TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable.
  1419  			return nil, &tcpip.ErrHostUnreachable{}
  1420  		}
  1421  
  1422  		if id == 0 {
  1423  			// If an interface is not specified, try to find a NIC that holds the local
  1424  			// address endpoint to construct a route.
  1425  			for _, aNIC := range s.nics {
  1426  				addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, netProto)
  1427  				if addressEndpoint == nil {
  1428  					continue
  1429  				}
  1430  
  1431  				if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop); r != nil {
  1432  					return r, nil
  1433  				}
  1434  			}
  1435  		}
  1436  	}
  1437  
  1438  	if needRoute {
  1439  		// TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable.
  1440  		return nil, &tcpip.ErrHostUnreachable{}
  1441  	}
  1442  	if header.IsV6LoopbackAddress(remoteAddr) {
  1443  		return nil, &tcpip.ErrBadLocalAddress{}
  1444  	}
  1445  	// TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable.
  1446  	return nil, &tcpip.ErrNetworkUnreachable{}
  1447  }
  1448  
  1449  // CheckNetworkProtocol checks if a given network protocol is enabled in the
  1450  // stack.
  1451  func (s *Stack) CheckNetworkProtocol(protocol tcpip.NetworkProtocolNumber) bool {
  1452  	_, ok := s.networkProtocols[protocol]
  1453  	return ok
  1454  }
  1455  
  1456  // CheckDuplicateAddress performs duplicate address detection for the address on
  1457  // the specified interface.
  1458  func (s *Stack) CheckDuplicateAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, h DADCompletionHandler) (DADCheckAddressDisposition, tcpip.Error) {
  1459  	s.mu.RLock()
  1460  	nic, ok := s.nics[nicID]
  1461  	s.mu.RUnlock()
  1462  
  1463  	if !ok {
  1464  		return 0, &tcpip.ErrUnknownNICID{}
  1465  	}
  1466  
  1467  	return nic.checkDuplicateAddress(protocol, addr, h)
  1468  }
  1469  
  1470  // CheckLocalAddress determines if the given local address exists, and if it
  1471  // does, returns the id of the NIC it's bound to. Returns 0 if the address
  1472  // does not exist.
  1473  func (s *Stack) CheckLocalAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.NICID {
  1474  	s.mu.RLock()
  1475  	defer s.mu.RUnlock()
  1476  
  1477  	// If a NIC is specified, use its NIC id.
  1478  	if nicID != 0 {
  1479  		nic, ok := s.nics[nicID]
  1480  		if !ok {
  1481  			return 0
  1482  		}
  1483  		// In IPv4, linux only checks the interface. If it matches, then it does
  1484  		// not bother with the address.
  1485  		// https://github.com/torvalds/linux/blob/15205c2829ca2cbb5ece5ceaafe1171a8470e62b/net/ipv4/igmp.c#L1829-L1837
  1486  		if protocol == header.IPv4ProtocolNumber {
  1487  			return nic.id
  1488  		}
  1489  		if nic.CheckLocalAddress(protocol, addr) {
  1490  			return nic.id
  1491  		}
  1492  		return 0
  1493  	}
  1494  
  1495  	// Go through all the NICs.
  1496  	for _, nic := range s.nics {
  1497  		if nic.CheckLocalAddress(protocol, addr) {
  1498  			return nic.id
  1499  		}
  1500  	}
  1501  
  1502  	return 0
  1503  }
  1504  
  1505  // SetPromiscuousMode enables or disables promiscuous mode in the given NIC.
  1506  func (s *Stack) SetPromiscuousMode(nicID tcpip.NICID, enable bool) tcpip.Error {
  1507  	s.mu.RLock()
  1508  	defer s.mu.RUnlock()
  1509  
  1510  	nic, ok := s.nics[nicID]
  1511  	if !ok {
  1512  		return &tcpip.ErrUnknownNICID{}
  1513  	}
  1514  
  1515  	nic.setPromiscuousMode(enable)
  1516  
  1517  	return nil
  1518  }
  1519  
  1520  // SetSpoofing enables or disables address spoofing in the given NIC, allowing
  1521  // endpoints to bind to any address in the NIC.
  1522  func (s *Stack) SetSpoofing(nicID tcpip.NICID, enable bool) tcpip.Error {
  1523  	s.mu.RLock()
  1524  	defer s.mu.RUnlock()
  1525  
  1526  	nic, ok := s.nics[nicID]
  1527  	if !ok {
  1528  		return &tcpip.ErrUnknownNICID{}
  1529  	}
  1530  
  1531  	nic.setSpoofing(enable)
  1532  
  1533  	return nil
  1534  }
  1535  
  1536  // LinkResolutionResult is the result of a link address resolution attempt.
  1537  type LinkResolutionResult struct {
  1538  	LinkAddress tcpip.LinkAddress
  1539  	Err         tcpip.Error
  1540  }
  1541  
  1542  // GetLinkAddress finds the link address corresponding to a network address.
  1543  //
  1544  // Returns ErrNotSupported if the stack is not configured with a link address
  1545  // resolver for the specified network protocol.
  1546  //
  1547  // Returns ErrWouldBlock if the link address is not readily available, along
  1548  // with a notification channel for the caller to block on. Triggers address
  1549  // resolution asynchronously.
  1550  //
  1551  // onResolve will be called either immediately, if resolution is not required,
  1552  // or when address resolution is complete, with the resolved link address and
  1553  // whether resolution succeeded.
  1554  //
  1555  // If specified, the local address must be an address local to the interface
  1556  // the neighbor cache belongs to. The local address is the source address of
  1557  // a packet prompting NUD/link address resolution.
  1558  func (s *Stack) GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, onResolve func(LinkResolutionResult)) tcpip.Error {
  1559  	s.mu.RLock()
  1560  	nic, ok := s.nics[nicID]
  1561  	s.mu.RUnlock()
  1562  	if !ok {
  1563  		return &tcpip.ErrUnknownNICID{}
  1564  	}
  1565  
  1566  	return nic.getLinkAddress(addr, localAddr, protocol, onResolve)
  1567  }
  1568  
  1569  // Neighbors returns all IP to MAC address associations.
  1570  func (s *Stack) Neighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) ([]NeighborEntry, tcpip.Error) {
  1571  	s.mu.RLock()
  1572  	nic, ok := s.nics[nicID]
  1573  	s.mu.RUnlock()
  1574  
  1575  	if !ok {
  1576  		return nil, &tcpip.ErrUnknownNICID{}
  1577  	}
  1578  
  1579  	return nic.neighbors(protocol)
  1580  }
  1581  
  1582  // AddStaticNeighbor statically associates an IP address to a MAC address.
  1583  func (s *Stack) AddStaticNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress) tcpip.Error {
  1584  	s.mu.RLock()
  1585  	nic, ok := s.nics[nicID]
  1586  	s.mu.RUnlock()
  1587  
  1588  	if !ok {
  1589  		return &tcpip.ErrUnknownNICID{}
  1590  	}
  1591  
  1592  	return nic.addStaticNeighbor(addr, protocol, linkAddr)
  1593  }
  1594  
  1595  // RemoveNeighbor removes an IP to MAC address association previously created
  1596  // either automically or by AddStaticNeighbor. Returns ErrBadAddress if there
  1597  // is no association with the provided address.
  1598  func (s *Stack) RemoveNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error {
  1599  	s.mu.RLock()
  1600  	nic, ok := s.nics[nicID]
  1601  	s.mu.RUnlock()
  1602  
  1603  	if !ok {
  1604  		return &tcpip.ErrUnknownNICID{}
  1605  	}
  1606  
  1607  	return nic.removeNeighbor(protocol, addr)
  1608  }
  1609  
  1610  // ClearNeighbors removes all IP to MAC address associations.
  1611  func (s *Stack) ClearNeighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) tcpip.Error {
  1612  	s.mu.RLock()
  1613  	nic, ok := s.nics[nicID]
  1614  	s.mu.RUnlock()
  1615  
  1616  	if !ok {
  1617  		return &tcpip.ErrUnknownNICID{}
  1618  	}
  1619  
  1620  	return nic.clearNeighbors(protocol)
  1621  }
  1622  
  1623  // RegisterTransportEndpoint registers the given endpoint with the stack
  1624  // transport dispatcher. Received packets that match the provided id will be
  1625  // delivered to the given endpoint; specifying a nic is optional, but
  1626  // nic-specific IDs have precedence over global ones.
  1627  func (s *Stack) RegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error {
  1628  	return s.demux.registerEndpoint(netProtos, protocol, id, ep, flags, bindToDevice)
  1629  }
  1630  
  1631  // CheckRegisterTransportEndpoint checks if an endpoint can be registered with
  1632  // the stack transport dispatcher.
  1633  func (s *Stack) CheckRegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error {
  1634  	return s.demux.checkEndpoint(netProtos, protocol, id, flags, bindToDevice)
  1635  }
  1636  
  1637  // UnregisterTransportEndpoint removes the endpoint with the given id from the
  1638  // stack transport dispatcher.
  1639  func (s *Stack) UnregisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) {
  1640  	s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice)
  1641  }
  1642  
  1643  // StartTransportEndpointCleanup removes the endpoint with the given id from
  1644  // the stack transport dispatcher. It also transitions it to the cleanup stage.
  1645  func (s *Stack) StartTransportEndpointCleanup(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) {
  1646  	s.cleanupEndpointsMu.Lock()
  1647  	s.cleanupEndpoints[ep] = struct{}{}
  1648  	s.cleanupEndpointsMu.Unlock()
  1649  
  1650  	s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice)
  1651  }
  1652  
  1653  // CompleteTransportEndpointCleanup removes the endpoint from the cleanup
  1654  // stage.
  1655  func (s *Stack) CompleteTransportEndpointCleanup(ep TransportEndpoint) {
  1656  	s.cleanupEndpointsMu.Lock()
  1657  	delete(s.cleanupEndpoints, ep)
  1658  	s.cleanupEndpointsMu.Unlock()
  1659  }
  1660  
  1661  // FindTransportEndpoint finds an endpoint that most closely matches the provided
  1662  // id. If no endpoint is found it returns nil.
  1663  func (s *Stack) FindTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, id TransportEndpointID, nicID tcpip.NICID) TransportEndpoint {
  1664  	return s.demux.findTransportEndpoint(netProto, transProto, id, nicID)
  1665  }
  1666  
  1667  // RegisterRawTransportEndpoint registers the given endpoint with the stack
  1668  // transport dispatcher. Received packets that match the provided transport
  1669  // protocol will be delivered to the given endpoint.
  1670  func (s *Stack) RegisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) tcpip.Error {
  1671  	return s.demux.registerRawEndpoint(netProto, transProto, ep)
  1672  }
  1673  
  1674  // UnregisterRawTransportEndpoint removes the endpoint for the transport
  1675  // protocol from the stack transport dispatcher.
  1676  func (s *Stack) UnregisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) {
  1677  	s.demux.unregisterRawEndpoint(netProto, transProto, ep)
  1678  }
  1679  
  1680  // RegisterRestoredEndpoint records e as an endpoint that has been restored on
  1681  // this stack.
  1682  func (s *Stack) RegisterRestoredEndpoint(e ResumableEndpoint) {
  1683  	s.mu.Lock()
  1684  	s.resumableEndpoints = append(s.resumableEndpoints, e)
  1685  	s.mu.Unlock()
  1686  }
  1687  
  1688  // RegisteredEndpoints returns all endpoints which are currently registered.
  1689  func (s *Stack) RegisteredEndpoints() []TransportEndpoint {
  1690  	s.mu.Lock()
  1691  	defer s.mu.Unlock()
  1692  	var es []TransportEndpoint
  1693  	for _, e := range s.demux.protocol {
  1694  		es = append(es, e.transportEndpoints()...)
  1695  	}
  1696  	return es
  1697  }
  1698  
  1699  // CleanupEndpoints returns endpoints currently in the cleanup state.
  1700  func (s *Stack) CleanupEndpoints() []TransportEndpoint {
  1701  	s.cleanupEndpointsMu.Lock()
  1702  	es := make([]TransportEndpoint, 0, len(s.cleanupEndpoints))
  1703  	for e := range s.cleanupEndpoints {
  1704  		es = append(es, e)
  1705  	}
  1706  	s.cleanupEndpointsMu.Unlock()
  1707  	return es
  1708  }
  1709  
  1710  // RestoreCleanupEndpoints adds endpoints to cleanup tracking. This is useful
  1711  // for restoring a stack after a save.
  1712  func (s *Stack) RestoreCleanupEndpoints(es []TransportEndpoint) {
  1713  	s.cleanupEndpointsMu.Lock()
  1714  	for _, e := range es {
  1715  		s.cleanupEndpoints[e] = struct{}{}
  1716  	}
  1717  	s.cleanupEndpointsMu.Unlock()
  1718  }
  1719  
  1720  // Close closes all currently registered transport endpoints.
  1721  //
  1722  // Endpoints created or modified during this call may not get closed.
  1723  func (s *Stack) Close() {
  1724  	for _, e := range s.RegisteredEndpoints() {
  1725  		e.Abort()
  1726  	}
  1727  	for _, p := range s.transportProtocols {
  1728  		p.proto.Close()
  1729  	}
  1730  	for _, p := range s.networkProtocols {
  1731  		p.Close()
  1732  	}
  1733  }
  1734  
  1735  // Wait waits for all transport and link endpoints to halt their worker
  1736  // goroutines.
  1737  //
  1738  // Endpoints created or modified during this call may not get waited on.
  1739  //
  1740  // Note that link endpoints must be stopped via an implementation specific
  1741  // mechanism.
  1742  func (s *Stack) Wait() {
  1743  	for _, e := range s.RegisteredEndpoints() {
  1744  		e.Wait()
  1745  	}
  1746  	for _, e := range s.CleanupEndpoints() {
  1747  		e.Wait()
  1748  	}
  1749  	for _, p := range s.transportProtocols {
  1750  		p.proto.Wait()
  1751  	}
  1752  	for _, p := range s.networkProtocols {
  1753  		p.Wait()
  1754  	}
  1755  
  1756  	s.mu.Lock()
  1757  	defer s.mu.Unlock()
  1758  
  1759  	for id, n := range s.nics {
  1760  		// Remove NIC to ensure that qDisc goroutines are correctly
  1761  		// terminated on stack teardown.
  1762  		s.removeNICLocked(id)
  1763  		n.NetworkLinkEndpoint.Wait()
  1764  	}
  1765  }
  1766  
  1767  // Destroy destroys the stack with all endpoints.
  1768  func (s *Stack) Destroy() {
  1769  	s.Close()
  1770  	s.Wait()
  1771  }
  1772  
  1773  // Pause pauses any protocol level background workers.
  1774  func (s *Stack) Pause() {
  1775  	for _, p := range s.transportProtocols {
  1776  		p.proto.Pause()
  1777  	}
  1778  }
  1779  
  1780  // Resume restarts the stack after a restore. This must be called after the
  1781  // entire system has been restored.
  1782  func (s *Stack) Resume() {
  1783  	// ResumableEndpoint.Resume() may call other methods on s, so we can't hold
  1784  	// s.mu while resuming the endpoints.
  1785  	s.mu.Lock()
  1786  	eps := s.resumableEndpoints
  1787  	s.resumableEndpoints = nil
  1788  	s.mu.Unlock()
  1789  	for _, e := range eps {
  1790  		e.Resume(s)
  1791  	}
  1792  	// Now resume any protocol level background workers.
  1793  	for _, p := range s.transportProtocols {
  1794  		p.proto.Resume()
  1795  	}
  1796  }
  1797  
  1798  // RegisterPacketEndpoint registers ep with the stack, causing it to receive
  1799  // all traffic of the specified netProto on the given NIC. If nicID is 0, it
  1800  // receives traffic from every NIC.
  1801  func (s *Stack) RegisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) tcpip.Error {
  1802  	s.mu.Lock()
  1803  	defer s.mu.Unlock()
  1804  
  1805  	// If no NIC is specified, capture on all devices.
  1806  	if nicID == 0 {
  1807  		// Register with each NIC.
  1808  		for _, nic := range s.nics {
  1809  			if err := nic.registerPacketEndpoint(netProto, ep); err != nil {
  1810  				s.unregisterPacketEndpointLocked(0, netProto, ep)
  1811  				return err
  1812  			}
  1813  		}
  1814  		return nil
  1815  	}
  1816  
  1817  	// Capture on a specific device.
  1818  	nic, ok := s.nics[nicID]
  1819  	if !ok {
  1820  		return &tcpip.ErrUnknownNICID{}
  1821  	}
  1822  	if err := nic.registerPacketEndpoint(netProto, ep); err != nil {
  1823  		return err
  1824  	}
  1825  
  1826  	return nil
  1827  }
  1828  
  1829  // UnregisterPacketEndpoint unregisters ep for packets of the specified
  1830  // netProto from the specified NIC. If nicID is 0, ep is unregistered from all
  1831  // NICs.
  1832  func (s *Stack) UnregisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
  1833  	s.mu.Lock()
  1834  	defer s.mu.Unlock()
  1835  	s.unregisterPacketEndpointLocked(nicID, netProto, ep)
  1836  }
  1837  
  1838  // +checklocks:s.mu
  1839  func (s *Stack) unregisterPacketEndpointLocked(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
  1840  	// If no NIC is specified, unregister on all devices.
  1841  	if nicID == 0 {
  1842  		// Unregister with each NIC.
  1843  		for _, nic := range s.nics {
  1844  			nic.unregisterPacketEndpoint(netProto, ep)
  1845  		}
  1846  		return
  1847  	}
  1848  
  1849  	// Unregister in a single device.
  1850  	nic, ok := s.nics[nicID]
  1851  	if !ok {
  1852  		return
  1853  	}
  1854  	nic.unregisterPacketEndpoint(netProto, ep)
  1855  }
  1856  
  1857  // WritePacketToRemote writes a payload on the specified NIC using the provided
  1858  // network protocol and remote link address.
  1859  func (s *Stack) WritePacketToRemote(nicID tcpip.NICID, remote tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, payload buffer.Buffer) tcpip.Error {
  1860  	s.mu.Lock()
  1861  	nic, ok := s.nics[nicID]
  1862  	s.mu.Unlock()
  1863  	if !ok {
  1864  		return &tcpip.ErrUnknownDevice{}
  1865  	}
  1866  	pkt := NewPacketBuffer(PacketBufferOptions{
  1867  		ReserveHeaderBytes: int(nic.MaxHeaderLength()),
  1868  		Payload:            payload,
  1869  	})
  1870  	defer pkt.DecRef()
  1871  	pkt.NetworkProtocolNumber = netProto
  1872  	return nic.WritePacketToRemote(remote, pkt)
  1873  }
  1874  
  1875  // WriteRawPacket writes data directly to the specified NIC without adding any
  1876  // headers.
  1877  func (s *Stack) WriteRawPacket(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber, payload buffer.Buffer) tcpip.Error {
  1878  	s.mu.RLock()
  1879  	nic, ok := s.nics[nicID]
  1880  	s.mu.RUnlock()
  1881  	if !ok {
  1882  		return &tcpip.ErrUnknownNICID{}
  1883  	}
  1884  
  1885  	pkt := NewPacketBuffer(PacketBufferOptions{
  1886  		Payload: payload,
  1887  	})
  1888  	defer pkt.DecRef()
  1889  	pkt.NetworkProtocolNumber = proto
  1890  	return nic.writeRawPacketWithLinkHeaderInPayload(pkt)
  1891  }
  1892  
  1893  // NetworkProtocolInstance returns the protocol instance in the stack for the
  1894  // specified network protocol. This method is public for protocol implementers
  1895  // and tests to use.
  1896  func (s *Stack) NetworkProtocolInstance(num tcpip.NetworkProtocolNumber) NetworkProtocol {
  1897  	if p, ok := s.networkProtocols[num]; ok {
  1898  		return p
  1899  	}
  1900  	return nil
  1901  }
  1902  
  1903  // TransportProtocolInstance returns the protocol instance in the stack for the
  1904  // specified transport protocol. This method is public for protocol implementers
  1905  // and tests to use.
  1906  func (s *Stack) TransportProtocolInstance(num tcpip.TransportProtocolNumber) TransportProtocol {
  1907  	if pState, ok := s.transportProtocols[num]; ok {
  1908  		return pState.proto
  1909  	}
  1910  	return nil
  1911  }
  1912  
  1913  // AddTCPProbe installs a probe function that will be invoked on every segment
  1914  // received by a given TCP endpoint. The probe function is passed a copy of the
  1915  // TCP endpoint state before and after processing of the segment.
  1916  //
  1917  // NOTE: TCPProbe is added only to endpoints created after this call. Endpoints
  1918  // created prior to this call will not call the probe function.
  1919  //
  1920  // Further, installing two different probes back to back can result in some
  1921  // endpoints calling the first one and some the second one. There is no
  1922  // guarantee provided on which probe will be invoked. Ideally this should only
  1923  // be called once per stack.
  1924  func (s *Stack) AddTCPProbe(probe TCPProbeFunc) {
  1925  	s.tcpProbeFunc.Store(probe)
  1926  }
  1927  
  1928  // GetTCPProbe returns the TCPProbeFunc if installed with AddTCPProbe, nil
  1929  // otherwise.
  1930  func (s *Stack) GetTCPProbe() TCPProbeFunc {
  1931  	p := s.tcpProbeFunc.Load()
  1932  	if p == nil {
  1933  		return nil
  1934  	}
  1935  	return p.(TCPProbeFunc)
  1936  }
  1937  
  1938  // RemoveTCPProbe removes an installed TCP probe.
  1939  //
  1940  // NOTE: This only ensures that endpoints created after this call do not
  1941  // have a probe attached. Endpoints already created will continue to invoke
  1942  // TCP probe.
  1943  func (s *Stack) RemoveTCPProbe() {
  1944  	// This must be TCPProbeFunc(nil) because atomic.Value.Store(nil) panics.
  1945  	s.tcpProbeFunc.Store(TCPProbeFunc(nil))
  1946  }
  1947  
  1948  // JoinGroup joins the given multicast group on the given NIC.
  1949  func (s *Stack) JoinGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error {
  1950  	s.mu.RLock()
  1951  	defer s.mu.RUnlock()
  1952  
  1953  	if nic, ok := s.nics[nicID]; ok {
  1954  		return nic.joinGroup(protocol, multicastAddr)
  1955  	}
  1956  	return &tcpip.ErrUnknownNICID{}
  1957  }
  1958  
  1959  // LeaveGroup leaves the given multicast group on the given NIC.
  1960  func (s *Stack) LeaveGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error {
  1961  	s.mu.RLock()
  1962  	defer s.mu.RUnlock()
  1963  
  1964  	if nic, ok := s.nics[nicID]; ok {
  1965  		return nic.leaveGroup(protocol, multicastAddr)
  1966  	}
  1967  	return &tcpip.ErrUnknownNICID{}
  1968  }
  1969  
  1970  // IsInGroup returns true if the NIC with ID nicID has joined the multicast
  1971  // group multicastAddr.
  1972  func (s *Stack) IsInGroup(nicID tcpip.NICID, multicastAddr tcpip.Address) (bool, tcpip.Error) {
  1973  	s.mu.RLock()
  1974  	defer s.mu.RUnlock()
  1975  
  1976  	if nic, ok := s.nics[nicID]; ok {
  1977  		return nic.isInGroup(multicastAddr), nil
  1978  	}
  1979  	return false, &tcpip.ErrUnknownNICID{}
  1980  }
  1981  
  1982  // IPTables returns the stack's iptables.
  1983  func (s *Stack) IPTables() *IPTables {
  1984  	return s.tables
  1985  }
  1986  
  1987  // ICMPLimit returns the maximum number of ICMP messages that can be sent
  1988  // in one second.
  1989  func (s *Stack) ICMPLimit() rate.Limit {
  1990  	return s.icmpRateLimiter.Limit()
  1991  }
  1992  
  1993  // SetICMPLimit sets the maximum number of ICMP messages that be sent
  1994  // in one second.
  1995  func (s *Stack) SetICMPLimit(newLimit rate.Limit) {
  1996  	s.icmpRateLimiter.SetLimit(newLimit)
  1997  }
  1998  
  1999  // ICMPBurst returns the maximum number of ICMP messages that can be sent
  2000  // in a single burst.
  2001  func (s *Stack) ICMPBurst() int {
  2002  	return s.icmpRateLimiter.Burst()
  2003  }
  2004  
  2005  // SetICMPBurst sets the maximum number of ICMP messages that can be sent
  2006  // in a single burst.
  2007  func (s *Stack) SetICMPBurst(burst int) {
  2008  	s.icmpRateLimiter.SetBurst(burst)
  2009  }
  2010  
  2011  // AllowICMPMessage returns true if we the rate limiter allows at least one
  2012  // ICMP message to be sent at this instant.
  2013  func (s *Stack) AllowICMPMessage() bool {
  2014  	return s.icmpRateLimiter.Allow()
  2015  }
  2016  
  2017  // GetNetworkEndpoint returns the NetworkEndpoint with the specified protocol
  2018  // number installed on the specified NIC.
  2019  func (s *Stack) GetNetworkEndpoint(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NetworkEndpoint, tcpip.Error) {
  2020  	s.mu.Lock()
  2021  	defer s.mu.Unlock()
  2022  
  2023  	nic, ok := s.nics[nicID]
  2024  	if !ok {
  2025  		return nil, &tcpip.ErrUnknownNICID{}
  2026  	}
  2027  
  2028  	return nic.getNetworkEndpoint(proto), nil
  2029  }
  2030  
  2031  // NUDConfigurations gets the per-interface NUD configurations.
  2032  func (s *Stack) NUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NUDConfigurations, tcpip.Error) {
  2033  	s.mu.RLock()
  2034  	nic, ok := s.nics[id]
  2035  	s.mu.RUnlock()
  2036  
  2037  	if !ok {
  2038  		return NUDConfigurations{}, &tcpip.ErrUnknownNICID{}
  2039  	}
  2040  
  2041  	return nic.nudConfigs(proto)
  2042  }
  2043  
  2044  // SetNUDConfigurations sets the per-interface NUD configurations.
  2045  //
  2046  // Note, if c contains invalid NUD configuration values, it will be fixed to
  2047  // use default values for the erroneous values.
  2048  func (s *Stack) SetNUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber, c NUDConfigurations) tcpip.Error {
  2049  	s.mu.RLock()
  2050  	nic, ok := s.nics[id]
  2051  	s.mu.RUnlock()
  2052  
  2053  	if !ok {
  2054  		return &tcpip.ErrUnknownNICID{}
  2055  	}
  2056  
  2057  	return nic.setNUDConfigs(proto, c)
  2058  }
  2059  
  2060  // Seed returns a 32 bit value that can be used as a seed value.
  2061  //
  2062  // NOTE: The seed is generated once during stack initialization only.
  2063  func (s *Stack) Seed() uint32 {
  2064  	return s.seed
  2065  }
  2066  
  2067  // Rand returns a reference to a pseudo random generator that can be used
  2068  // to generate random numbers as required.
  2069  func (s *Stack) Rand() *rand.Rand {
  2070  	return s.randomGenerator
  2071  }
  2072  
  2073  // SecureRNG returns the stack's cryptographically secure random number
  2074  // generator.
  2075  func (s *Stack) SecureRNG() io.Reader {
  2076  	return s.secureRNG
  2077  }
  2078  
  2079  // FindNICNameFromID returns the name of the NIC for the given NICID.
  2080  func (s *Stack) FindNICNameFromID(id tcpip.NICID) string {
  2081  	s.mu.RLock()
  2082  	defer s.mu.RUnlock()
  2083  
  2084  	nic, ok := s.nics[id]
  2085  	if !ok {
  2086  		return ""
  2087  	}
  2088  
  2089  	return nic.Name()
  2090  }
  2091  
  2092  // ParseResult indicates the result of a parsing attempt.
  2093  type ParseResult int
  2094  
  2095  const (
  2096  	// ParsedOK indicates that a packet was successfully parsed.
  2097  	ParsedOK ParseResult = iota
  2098  
  2099  	// UnknownTransportProtocol indicates that the transport protocol is unknown.
  2100  	UnknownTransportProtocol
  2101  
  2102  	// TransportLayerParseError indicates that the transport packet was not
  2103  	// successfully parsed.
  2104  	TransportLayerParseError
  2105  )
  2106  
  2107  // ParsePacketBufferTransport parses the provided packet buffer's transport
  2108  // header.
  2109  func (s *Stack) ParsePacketBufferTransport(protocol tcpip.TransportProtocolNumber, pkt PacketBufferPtr) ParseResult {
  2110  	pkt.TransportProtocolNumber = protocol
  2111  	// Parse the transport header if present.
  2112  	state, ok := s.transportProtocols[protocol]
  2113  	if !ok {
  2114  		return UnknownTransportProtocol
  2115  	}
  2116  
  2117  	if !state.proto.Parse(pkt) {
  2118  		return TransportLayerParseError
  2119  	}
  2120  
  2121  	return ParsedOK
  2122  }
  2123  
  2124  // networkProtocolNumbers returns the network protocol numbers the stack is
  2125  // configured with.
  2126  func (s *Stack) networkProtocolNumbers() []tcpip.NetworkProtocolNumber {
  2127  	protos := make([]tcpip.NetworkProtocolNumber, 0, len(s.networkProtocols))
  2128  	for p := range s.networkProtocols {
  2129  		protos = append(protos, p)
  2130  	}
  2131  	return protos
  2132  }
  2133  
  2134  func isSubnetBroadcastOnNIC(nic *nic, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
  2135  	addressEndpoint := nic.getAddressOrCreateTempInner(protocol, addr, false /* createTemp */, NeverPrimaryEndpoint)
  2136  	if addressEndpoint == nil {
  2137  		return false
  2138  	}
  2139  
  2140  	subnet := addressEndpoint.Subnet()
  2141  	addressEndpoint.DecRef()
  2142  	return subnet.IsBroadcast(addr)
  2143  }
  2144  
  2145  // IsSubnetBroadcast returns true if the provided address is a subnet-local
  2146  // broadcast address on the specified NIC and protocol.
  2147  //
  2148  // Returns false if the NIC is unknown or if the protocol is unknown or does
  2149  // not support addressing.
  2150  //
  2151  // If the NIC is not specified, the stack will check all NICs.
  2152  func (s *Stack) IsSubnetBroadcast(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
  2153  	s.mu.RLock()
  2154  	defer s.mu.RUnlock()
  2155  
  2156  	if nicID != 0 {
  2157  		nic, ok := s.nics[nicID]
  2158  		if !ok {
  2159  			return false
  2160  		}
  2161  
  2162  		return isSubnetBroadcastOnNIC(nic, protocol, addr)
  2163  	}
  2164  
  2165  	for _, nic := range s.nics {
  2166  		if isSubnetBroadcastOnNIC(nic, protocol, addr) {
  2167  			return true
  2168  		}
  2169  	}
  2170  
  2171  	return false
  2172  }
  2173  
  2174  // PacketEndpointWriteSupported returns true iff packet endpoints support write
  2175  // operations.
  2176  func (s *Stack) PacketEndpointWriteSupported() bool {
  2177  	return s.packetEndpointWriteSupported
  2178  }