github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/stack/stack.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package stack provides the glue between networking protocols and the
    16  // consumers of the networking stack.
    17  //
    18  // For consumers, the only function of interest is New(), everything else is
    19  // provided by the tcpip/public package.
    20  package stack
    21  
    22  import (
    23  	"encoding/binary"
    24  	"fmt"
    25  	"io"
    26  	"math/rand"
    27  	"sync/atomic"
    28  	"time"
    29  
    30  	"golang.org/x/time/rate"
    31  	"github.com/SagerNet/gvisor/pkg/atomicbitops"
    32  	cryptorand "github.com/SagerNet/gvisor/pkg/rand"
    33  	"github.com/SagerNet/gvisor/pkg/sync"
    34  	"github.com/SagerNet/gvisor/pkg/tcpip"
    35  	"github.com/SagerNet/gvisor/pkg/tcpip/buffer"
    36  	"github.com/SagerNet/gvisor/pkg/tcpip/header"
    37  	"github.com/SagerNet/gvisor/pkg/tcpip/ports"
    38  	"github.com/SagerNet/gvisor/pkg/waiter"
    39  )
    40  
    41  const (
    42  	// DefaultTOS is the default type of service value for network endpoints.
    43  	DefaultTOS = 0
    44  )
    45  
    46  type transportProtocolState struct {
    47  	proto          TransportProtocol
    48  	defaultHandler func(id TransportEndpointID, pkt *PacketBuffer) bool
    49  }
    50  
    51  // ResumableEndpoint is an endpoint that needs to be resumed after restore.
    52  type ResumableEndpoint interface {
    53  	// Resume resumes an endpoint after restore. This can be used to restart
    54  	// background workers such as protocol goroutines. This must be called after
    55  	// all indirect dependencies of the endpoint has been restored, which
    56  	// generally implies at the end of the restore process.
    57  	Resume(*Stack)
    58  }
    59  
    60  // uniqueIDGenerator is a default unique ID generator.
    61  type uniqueIDGenerator atomicbitops.AlignedAtomicUint64
    62  
    63  func (u *uniqueIDGenerator) UniqueID() uint64 {
    64  	return ((*atomicbitops.AlignedAtomicUint64)(u)).Add(1)
    65  }
    66  
    67  // Stack is a networking stack, with all supported protocols, NICs, and route
    68  // table.
    69  type Stack struct {
    70  	transportProtocols map[tcpip.TransportProtocolNumber]*transportProtocolState
    71  	networkProtocols   map[tcpip.NetworkProtocolNumber]NetworkProtocol
    72  
    73  	// rawFactory creates raw endpoints. If nil, raw endpoints are
    74  	// disabled. It is set during Stack creation and is immutable.
    75  	rawFactory RawFactory
    76  
    77  	demux *transportDemuxer
    78  
    79  	stats tcpip.Stats
    80  
    81  	// LOCK ORDERING: mu > route.mu.
    82  	route struct {
    83  		mu struct {
    84  			sync.RWMutex
    85  
    86  			table []tcpip.Route
    87  		}
    88  	}
    89  
    90  	mu                       sync.RWMutex
    91  	nics                     map[tcpip.NICID]*nic
    92  	defaultForwardingEnabled map[tcpip.NetworkProtocolNumber]struct{}
    93  
    94  	// cleanupEndpointsMu protects cleanupEndpoints.
    95  	cleanupEndpointsMu sync.Mutex
    96  	cleanupEndpoints   map[TransportEndpoint]struct{}
    97  
    98  	*ports.PortManager
    99  
   100  	// If not nil, then any new endpoints will have this probe function
   101  	// invoked everytime they receive a TCP segment.
   102  	tcpProbeFunc atomic.Value // TCPProbeFunc
   103  
   104  	// clock is used to generate user-visible times.
   105  	clock tcpip.Clock
   106  
   107  	// handleLocal allows non-loopback interfaces to loop packets.
   108  	handleLocal bool
   109  
   110  	// tables are the iptables packet filtering and manipulation rules.
   111  	// TODO(github.com/SagerNet/issue/4595): S/R this field.
   112  	tables *IPTables
   113  
   114  	// resumableEndpoints is a list of endpoints that need to be resumed if the
   115  	// stack is being restored.
   116  	resumableEndpoints []ResumableEndpoint
   117  
   118  	// icmpRateLimiter is a global rate limiter for all ICMP messages generated
   119  	// by the stack.
   120  	icmpRateLimiter *ICMPRateLimiter
   121  
   122  	// seed is a one-time random value initialized at stack startup
   123  	// and is used to seed the TCP port picking on active connections
   124  	//
   125  	// TODO(github.com/SagerNet/issue/940): S/R this field.
   126  	seed uint32
   127  
   128  	// nudConfigs is the default NUD configurations used by interfaces.
   129  	nudConfigs NUDConfigurations
   130  
   131  	// nudDisp is the NUD event dispatcher that is used to send the netstack
   132  	// integrator NUD related events.
   133  	nudDisp NUDDispatcher
   134  
   135  	// uniqueIDGenerator is a generator of unique identifiers.
   136  	uniqueIDGenerator UniqueID
   137  
   138  	// randomGenerator is an injectable pseudo random generator that can be
   139  	// used when a random number is required.
   140  	randomGenerator *rand.Rand
   141  
   142  	// secureRNG is a cryptographically secure random number generator.
   143  	secureRNG io.Reader
   144  
   145  	// sendBufferSize holds the min/default/max send buffer sizes for
   146  	// endpoints other than TCP.
   147  	sendBufferSize tcpip.SendBufferSizeOption
   148  
   149  	// receiveBufferSize holds the min/default/max receive buffer sizes for
   150  	// endpoints other than TCP.
   151  	receiveBufferSize tcpip.ReceiveBufferSizeOption
   152  
   153  	// tcpInvalidRateLimit is the maximal rate for sending duplicate
   154  	// acknowledgements in response to incoming TCP packets that are for an existing
   155  	// connection but that are invalid due to any of the following reasons:
   156  	//
   157  	//   a) out-of-window sequence number.
   158  	//   b) out-of-window acknowledgement number.
   159  	//   c) PAWS check failure (when implemented).
   160  	//
   161  	// This is required to prevent potential ACK loops.
   162  	// Setting this to 0 will disable all rate limiting.
   163  	tcpInvalidRateLimit time.Duration
   164  }
   165  
   166  // UniqueID is an abstract generator of unique identifiers.
   167  type UniqueID interface {
   168  	UniqueID() uint64
   169  }
   170  
   171  // NetworkProtocolFactory instantiates a network protocol.
   172  //
   173  // NetworkProtocolFactory must not attempt to modify the stack, it may only
   174  // query the stack.
   175  type NetworkProtocolFactory func(*Stack) NetworkProtocol
   176  
   177  // TransportProtocolFactory instantiates a transport protocol.
   178  //
   179  // TransportProtocolFactory must not attempt to modify the stack, it may only
   180  // query the stack.
   181  type TransportProtocolFactory func(*Stack) TransportProtocol
   182  
   183  // Options contains optional Stack configuration.
   184  type Options struct {
   185  	// NetworkProtocols lists the network protocols to enable.
   186  	NetworkProtocols []NetworkProtocolFactory
   187  
   188  	// TransportProtocols lists the transport protocols to enable.
   189  	TransportProtocols []TransportProtocolFactory
   190  
   191  	// Clock is an optional clock used for timekeeping.
   192  	//
   193  	// If Clock is nil, tcpip.NewStdClock() will be used.
   194  	Clock tcpip.Clock
   195  
   196  	// Stats are optional statistic counters.
   197  	Stats tcpip.Stats
   198  
   199  	// HandleLocal indicates whether packets destined to their source
   200  	// should be handled by the stack internally (true) or outside the
   201  	// stack (false).
   202  	HandleLocal bool
   203  
   204  	// UniqueID is an optional generator of unique identifiers.
   205  	UniqueID UniqueID
   206  
   207  	// NUDConfigs is the default NUD configurations used by interfaces.
   208  	NUDConfigs NUDConfigurations
   209  
   210  	// NUDDisp is the NUD event dispatcher that an integrator can provide to
   211  	// receive NUD related events.
   212  	NUDDisp NUDDispatcher
   213  
   214  	// RawFactory produces raw endpoints. Raw endpoints are enabled only if
   215  	// this is non-nil.
   216  	RawFactory RawFactory
   217  
   218  	// RandSource is an optional source to use to generate random
   219  	// numbers. If omitted it defaults to a Source seeded by the data
   220  	// returned by the stack secure RNG.
   221  	//
   222  	// RandSource must be thread-safe.
   223  	RandSource rand.Source
   224  
   225  	// IPTables are the initial iptables rules. If nil, DefaultIPTables will be
   226  	// used to construct the initial iptables rules.
   227  	// all traffic.
   228  	IPTables *IPTables
   229  
   230  	// DefaultIPTables is an optional iptables rules constructor that is called
   231  	// if IPTables is nil. If both fields are nil, iptables will allow all
   232  	// traffic.
   233  	DefaultIPTables func(uint32) *IPTables
   234  
   235  	// SecureRNG is a cryptographically secure random number generator.
   236  	SecureRNG io.Reader
   237  }
   238  
   239  // TransportEndpointInfo holds useful information about a transport endpoint
   240  // which can be queried by monitoring tools.
   241  //
   242  // +stateify savable
   243  type TransportEndpointInfo struct {
   244  	// The following fields are initialized at creation time and are
   245  	// immutable.
   246  
   247  	NetProto   tcpip.NetworkProtocolNumber
   248  	TransProto tcpip.TransportProtocolNumber
   249  
   250  	// The following fields are protected by endpoint mu.
   251  
   252  	ID TransportEndpointID
   253  	// BindNICID and bindAddr are set via calls to Bind(). They are used to
   254  	// reject attempts to send data or connect via a different NIC or
   255  	// address
   256  	BindNICID tcpip.NICID
   257  	BindAddr  tcpip.Address
   258  	// RegisterNICID is the default NICID registered as a side-effect of
   259  	// connect or datagram write.
   260  	RegisterNICID tcpip.NICID
   261  }
   262  
   263  // AddrNetProtoLocked unwraps the specified address if it is a V4-mapped V6
   264  // address and returns the network protocol number to be used to communicate
   265  // with the specified address. It returns an error if the passed address is
   266  // incompatible with the receiver.
   267  //
   268  // Preconditon: the parent endpoint mu must be held while calling this method.
   269  func (t *TransportEndpointInfo) AddrNetProtoLocked(addr tcpip.FullAddress, v6only bool) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, tcpip.Error) {
   270  	netProto := t.NetProto
   271  	switch len(addr.Addr) {
   272  	case header.IPv4AddressSize:
   273  		netProto = header.IPv4ProtocolNumber
   274  	case header.IPv6AddressSize:
   275  		if header.IsV4MappedAddress(addr.Addr) {
   276  			netProto = header.IPv4ProtocolNumber
   277  			addr.Addr = addr.Addr[header.IPv6AddressSize-header.IPv4AddressSize:]
   278  			if addr.Addr == header.IPv4Any {
   279  				addr.Addr = ""
   280  			}
   281  		}
   282  	}
   283  
   284  	switch len(t.ID.LocalAddress) {
   285  	case header.IPv4AddressSize:
   286  		if len(addr.Addr) == header.IPv6AddressSize {
   287  			return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{}
   288  		}
   289  	case header.IPv6AddressSize:
   290  		if len(addr.Addr) == header.IPv4AddressSize {
   291  			return tcpip.FullAddress{}, 0, &tcpip.ErrNetworkUnreachable{}
   292  		}
   293  	}
   294  
   295  	switch {
   296  	case netProto == t.NetProto:
   297  	case netProto == header.IPv4ProtocolNumber && t.NetProto == header.IPv6ProtocolNumber:
   298  		if v6only {
   299  			return tcpip.FullAddress{}, 0, &tcpip.ErrNoRoute{}
   300  		}
   301  	default:
   302  		return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{}
   303  	}
   304  
   305  	return addr, netProto, nil
   306  }
   307  
   308  // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo
   309  // marker interface.
   310  func (*TransportEndpointInfo) IsEndpointInfo() {}
   311  
   312  // New allocates a new networking stack with only the requested networking and
   313  // transport protocols configured with default options.
   314  //
   315  // Note, NDPConfigurations will be fixed before being used by the Stack. That
   316  // is, if an invalid value was provided, it will be reset to the default value.
   317  //
   318  // Protocol options can be changed by calling the
   319  // SetNetworkProtocolOption/SetTransportProtocolOption methods provided by the
   320  // stack. Please refer to individual protocol implementations as to what options
   321  // are supported.
   322  func New(opts Options) *Stack {
   323  	clock := opts.Clock
   324  	if clock == nil {
   325  		clock = tcpip.NewStdClock()
   326  	}
   327  
   328  	if opts.UniqueID == nil {
   329  		opts.UniqueID = new(uniqueIDGenerator)
   330  	}
   331  
   332  	if opts.SecureRNG == nil {
   333  		opts.SecureRNG = cryptorand.Reader
   334  	}
   335  
   336  	randSrc := opts.RandSource
   337  	if randSrc == nil {
   338  		var v int64
   339  		if err := binary.Read(opts.SecureRNG, binary.LittleEndian, &v); err != nil {
   340  			panic(err)
   341  		}
   342  		// Source provided by rand.NewSource is not thread-safe so
   343  		// we wrap it in a simple thread-safe version.
   344  		randSrc = &lockedRandomSource{src: rand.NewSource(v)}
   345  	}
   346  	randomGenerator := rand.New(randSrc)
   347  
   348  	seed := randomGenerator.Uint32()
   349  	if opts.IPTables == nil {
   350  		if opts.DefaultIPTables == nil {
   351  			opts.DefaultIPTables = DefaultTables
   352  		}
   353  		opts.IPTables = opts.DefaultIPTables(seed)
   354  	}
   355  
   356  	opts.NUDConfigs.resetInvalidFields()
   357  
   358  	s := &Stack{
   359  		transportProtocols:       make(map[tcpip.TransportProtocolNumber]*transportProtocolState),
   360  		networkProtocols:         make(map[tcpip.NetworkProtocolNumber]NetworkProtocol),
   361  		nics:                     make(map[tcpip.NICID]*nic),
   362  		defaultForwardingEnabled: make(map[tcpip.NetworkProtocolNumber]struct{}),
   363  		cleanupEndpoints:         make(map[TransportEndpoint]struct{}),
   364  		PortManager:              ports.NewPortManager(),
   365  		clock:                    clock,
   366  		stats:                    opts.Stats.FillIn(),
   367  		handleLocal:              opts.HandleLocal,
   368  		tables:                   opts.IPTables,
   369  		icmpRateLimiter:          NewICMPRateLimiter(),
   370  		seed:                     seed,
   371  		nudConfigs:               opts.NUDConfigs,
   372  		uniqueIDGenerator:        opts.UniqueID,
   373  		nudDisp:                  opts.NUDDisp,
   374  		randomGenerator:          randomGenerator,
   375  		secureRNG:                opts.SecureRNG,
   376  		sendBufferSize: tcpip.SendBufferSizeOption{
   377  			Min:     MinBufferSize,
   378  			Default: DefaultBufferSize,
   379  			Max:     DefaultMaxBufferSize,
   380  		},
   381  		receiveBufferSize: tcpip.ReceiveBufferSizeOption{
   382  			Min:     MinBufferSize,
   383  			Default: DefaultBufferSize,
   384  			Max:     DefaultMaxBufferSize,
   385  		},
   386  		tcpInvalidRateLimit: defaultTCPInvalidRateLimit,
   387  	}
   388  
   389  	// Add specified network protocols.
   390  	for _, netProtoFactory := range opts.NetworkProtocols {
   391  		netProto := netProtoFactory(s)
   392  		s.networkProtocols[netProto.Number()] = netProto
   393  	}
   394  
   395  	// Add specified transport protocols.
   396  	for _, transProtoFactory := range opts.TransportProtocols {
   397  		transProto := transProtoFactory(s)
   398  		s.transportProtocols[transProto.Number()] = &transportProtocolState{
   399  			proto: transProto,
   400  		}
   401  	}
   402  
   403  	// Add the factory for raw endpoints, if present.
   404  	s.rawFactory = opts.RawFactory
   405  
   406  	// Create the global transport demuxer.
   407  	s.demux = newTransportDemuxer(s)
   408  
   409  	return s
   410  }
   411  
   412  // newJob returns a tcpip.Job using the Stack clock.
   413  func (s *Stack) newJob(l sync.Locker, f func()) *tcpip.Job {
   414  	return tcpip.NewJob(s.clock, l, f)
   415  }
   416  
   417  // UniqueID returns a unique identifier.
   418  func (s *Stack) UniqueID() uint64 {
   419  	return s.uniqueIDGenerator.UniqueID()
   420  }
   421  
   422  // SetNetworkProtocolOption allows configuring individual protocol level
   423  // options. This method returns an error if the protocol is not supported or
   424  // option is not supported by the protocol implementation or the provided value
   425  // is incorrect.
   426  func (s *Stack) SetNetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.SettableNetworkProtocolOption) tcpip.Error {
   427  	netProto, ok := s.networkProtocols[network]
   428  	if !ok {
   429  		return &tcpip.ErrUnknownProtocol{}
   430  	}
   431  	return netProto.SetOption(option)
   432  }
   433  
   434  // NetworkProtocolOption allows retrieving individual protocol level option
   435  // values. This method returns an error if the protocol is not supported or
   436  // option is not supported by the protocol implementation.
   437  // e.g.
   438  // var v ipv4.MyOption
   439  // err := s.NetworkProtocolOption(tcpip.IPv4ProtocolNumber, &v)
   440  // if err != nil {
   441  //   ...
   442  // }
   443  func (s *Stack) NetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.GettableNetworkProtocolOption) tcpip.Error {
   444  	netProto, ok := s.networkProtocols[network]
   445  	if !ok {
   446  		return &tcpip.ErrUnknownProtocol{}
   447  	}
   448  	return netProto.Option(option)
   449  }
   450  
   451  // SetTransportProtocolOption allows configuring individual protocol level
   452  // options. This method returns an error if the protocol is not supported or
   453  // option is not supported by the protocol implementation or the provided value
   454  // is incorrect.
   455  func (s *Stack) SetTransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.SettableTransportProtocolOption) tcpip.Error {
   456  	transProtoState, ok := s.transportProtocols[transport]
   457  	if !ok {
   458  		return &tcpip.ErrUnknownProtocol{}
   459  	}
   460  	return transProtoState.proto.SetOption(option)
   461  }
   462  
   463  // TransportProtocolOption allows retrieving individual protocol level option
   464  // values. This method returns an error if the protocol is not supported or
   465  // option is not supported by the protocol implementation.
   466  // var v tcp.SACKEnabled
   467  // if err := s.TransportProtocolOption(tcpip.TCPProtocolNumber, &v); err != nil {
   468  //   ...
   469  // }
   470  func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.GettableTransportProtocolOption) tcpip.Error {
   471  	transProtoState, ok := s.transportProtocols[transport]
   472  	if !ok {
   473  		return &tcpip.ErrUnknownProtocol{}
   474  	}
   475  	return transProtoState.proto.Option(option)
   476  }
   477  
   478  // SetTransportProtocolHandler sets the per-stack default handler for the given
   479  // protocol.
   480  //
   481  // It must be called only during initialization of the stack. Changing it as the
   482  // stack is operating is not supported.
   483  func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h func(TransportEndpointID, *PacketBuffer) bool) {
   484  	state := s.transportProtocols[p]
   485  	if state != nil {
   486  		state.defaultHandler = h
   487  	}
   488  }
   489  
   490  // Clock returns the Stack's clock for retrieving the current time and
   491  // scheduling work.
   492  func (s *Stack) Clock() tcpip.Clock {
   493  	return s.clock
   494  }
   495  
   496  // Stats returns a mutable copy of the current stats.
   497  //
   498  // This is not generally exported via the public interface, but is available
   499  // internally.
   500  func (s *Stack) Stats() tcpip.Stats {
   501  	return s.stats
   502  }
   503  
   504  // SetNICForwarding enables or disables packet forwarding on the specified NIC
   505  // for the passed protocol.
   506  func (s *Stack) SetNICForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, enable bool) tcpip.Error {
   507  	s.mu.RLock()
   508  	defer s.mu.RUnlock()
   509  
   510  	nic, ok := s.nics[id]
   511  	if !ok {
   512  		return &tcpip.ErrUnknownNICID{}
   513  	}
   514  
   515  	return nic.setForwarding(protocol, enable)
   516  }
   517  
   518  // NICForwarding returns the forwarding configuration for the specified NIC.
   519  func (s *Stack) NICForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) {
   520  	s.mu.RLock()
   521  	defer s.mu.RUnlock()
   522  
   523  	nic, ok := s.nics[id]
   524  	if !ok {
   525  		return false, &tcpip.ErrUnknownNICID{}
   526  	}
   527  
   528  	return nic.forwarding(protocol)
   529  }
   530  
   531  // SetForwardingDefaultAndAllNICs sets packet forwarding for all NICs for the
   532  // passed protocol and sets the default setting for newly created NICs.
   533  func (s *Stack) SetForwardingDefaultAndAllNICs(protocol tcpip.NetworkProtocolNumber, enable bool) tcpip.Error {
   534  	s.mu.Lock()
   535  	defer s.mu.Unlock()
   536  
   537  	doneOnce := false
   538  	for id, nic := range s.nics {
   539  		if err := nic.setForwarding(protocol, enable); err != nil {
   540  			// Expect forwarding to be settable on all interfaces if it was set on
   541  			// one.
   542  			if doneOnce {
   543  				panic(fmt.Sprintf("nic(id=%d).setForwarding(%d, %t): %s", id, protocol, enable, err))
   544  			}
   545  
   546  			return err
   547  		}
   548  
   549  		doneOnce = true
   550  	}
   551  
   552  	if enable {
   553  		s.defaultForwardingEnabled[protocol] = struct{}{}
   554  	} else {
   555  		delete(s.defaultForwardingEnabled, protocol)
   556  	}
   557  
   558  	return nil
   559  }
   560  
   561  // PortRange returns the UDP and TCP inclusive range of ephemeral ports used in
   562  // both IPv4 and IPv6.
   563  func (s *Stack) PortRange() (uint16, uint16) {
   564  	return s.PortManager.PortRange()
   565  }
   566  
   567  // SetPortRange sets the UDP and TCP IPv4 and IPv6 ephemeral port range
   568  // (inclusive).
   569  func (s *Stack) SetPortRange(start uint16, end uint16) tcpip.Error {
   570  	return s.PortManager.SetPortRange(start, end)
   571  }
   572  
   573  // SetRouteTable assigns the route table to be used by this stack. It
   574  // specifies which NIC to use for given destination address ranges.
   575  //
   576  // This method takes ownership of the table.
   577  func (s *Stack) SetRouteTable(table []tcpip.Route) {
   578  	s.route.mu.Lock()
   579  	defer s.route.mu.Unlock()
   580  	s.route.mu.table = table
   581  }
   582  
   583  // GetRouteTable returns the route table which is currently in use.
   584  func (s *Stack) GetRouteTable() []tcpip.Route {
   585  	s.route.mu.RLock()
   586  	defer s.route.mu.RUnlock()
   587  	return append([]tcpip.Route(nil), s.route.mu.table...)
   588  }
   589  
   590  // AddRoute appends a route to the route table.
   591  func (s *Stack) AddRoute(route tcpip.Route) {
   592  	s.route.mu.Lock()
   593  	defer s.route.mu.Unlock()
   594  	s.route.mu.table = append(s.route.mu.table, route)
   595  }
   596  
   597  // RemoveRoutes removes matching routes from the route table.
   598  func (s *Stack) RemoveRoutes(match func(tcpip.Route) bool) {
   599  	s.route.mu.Lock()
   600  	defer s.route.mu.Unlock()
   601  
   602  	var filteredRoutes []tcpip.Route
   603  	for _, route := range s.route.mu.table {
   604  		if !match(route) {
   605  			filteredRoutes = append(filteredRoutes, route)
   606  		}
   607  	}
   608  	s.route.mu.table = filteredRoutes
   609  }
   610  
   611  // NewEndpoint creates a new transport layer endpoint of the given protocol.
   612  func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) {
   613  	t, ok := s.transportProtocols[transport]
   614  	if !ok {
   615  		return nil, &tcpip.ErrUnknownProtocol{}
   616  	}
   617  
   618  	return t.proto.NewEndpoint(network, waiterQueue)
   619  }
   620  
   621  // NewRawEndpoint creates a new raw transport layer endpoint of the given
   622  // protocol. Raw endpoints receive all traffic for a given protocol regardless
   623  // of address.
   624  func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, tcpip.Error) {
   625  	if s.rawFactory == nil {
   626  		return nil, &tcpip.ErrNotPermitted{}
   627  	}
   628  
   629  	if !associated {
   630  		return s.rawFactory.NewUnassociatedEndpoint(s, network, transport, waiterQueue)
   631  	}
   632  
   633  	t, ok := s.transportProtocols[transport]
   634  	if !ok {
   635  		return nil, &tcpip.ErrUnknownProtocol{}
   636  	}
   637  
   638  	return t.proto.NewRawEndpoint(network, waiterQueue)
   639  }
   640  
   641  // NewPacketEndpoint creates a new packet endpoint listening for the given
   642  // netProto.
   643  func (s *Stack) NewPacketEndpoint(cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) {
   644  	if s.rawFactory == nil {
   645  		return nil, &tcpip.ErrNotPermitted{}
   646  	}
   647  
   648  	return s.rawFactory.NewPacketEndpoint(s, cooked, netProto, waiterQueue)
   649  }
   650  
   651  // NICContext is an opaque pointer used to store client-supplied NIC metadata.
   652  type NICContext interface{}
   653  
   654  // NICOptions specifies the configuration of a NIC as it is being created.
   655  // The zero value creates an enabled, unnamed NIC.
   656  type NICOptions struct {
   657  	// Name specifies the name of the NIC.
   658  	Name string
   659  
   660  	// Disabled specifies whether to avoid calling Attach on the passed
   661  	// LinkEndpoint.
   662  	Disabled bool
   663  
   664  	// Context specifies user-defined data that will be returned in stack.NICInfo
   665  	// for the NIC. Clients of this library can use it to add metadata that
   666  	// should be tracked alongside a NIC, to avoid having to keep a
   667  	// map[tcpip.NICID]metadata mirroring stack.Stack's nic map.
   668  	Context NICContext
   669  }
   670  
   671  // CreateNICWithOptions creates a NIC with the provided id, LinkEndpoint, and
   672  // NICOptions. See the documentation on type NICOptions for details on how
   673  // NICs can be configured.
   674  //
   675  // LinkEndpoint.Attach will be called to bind ep with a NetworkDispatcher.
   676  func (s *Stack) CreateNICWithOptions(id tcpip.NICID, ep LinkEndpoint, opts NICOptions) tcpip.Error {
   677  	s.mu.Lock()
   678  	defer s.mu.Unlock()
   679  
   680  	// Make sure id is unique.
   681  	if _, ok := s.nics[id]; ok {
   682  		return &tcpip.ErrDuplicateNICID{}
   683  	}
   684  
   685  	// Make sure name is unique, unless unnamed.
   686  	if opts.Name != "" {
   687  		for _, n := range s.nics {
   688  			if n.Name() == opts.Name {
   689  				return &tcpip.ErrDuplicateNICID{}
   690  			}
   691  		}
   692  	}
   693  
   694  	n := newNIC(s, id, opts.Name, ep, opts.Context)
   695  	for proto := range s.defaultForwardingEnabled {
   696  		if err := n.setForwarding(proto, true); err != nil {
   697  			panic(fmt.Sprintf("newNIC(%d, ...).setForwarding(%d, true): %s", id, proto, err))
   698  		}
   699  	}
   700  	s.nics[id] = n
   701  	if !opts.Disabled {
   702  		return n.enable()
   703  	}
   704  
   705  	return nil
   706  }
   707  
   708  // CreateNIC creates a NIC with the provided id and LinkEndpoint and calls
   709  // LinkEndpoint.Attach to bind ep with a NetworkDispatcher.
   710  func (s *Stack) CreateNIC(id tcpip.NICID, ep LinkEndpoint) tcpip.Error {
   711  	return s.CreateNICWithOptions(id, ep, NICOptions{})
   712  }
   713  
   714  // GetLinkEndpointByName gets the link endpoint specified by name.
   715  func (s *Stack) GetLinkEndpointByName(name string) LinkEndpoint {
   716  	s.mu.RLock()
   717  	defer s.mu.RUnlock()
   718  	for _, nic := range s.nics {
   719  		if nic.Name() == name {
   720  			return nic.LinkEndpoint
   721  		}
   722  	}
   723  	return nil
   724  }
   725  
   726  // EnableNIC enables the given NIC so that the link-layer endpoint can start
   727  // delivering packets to it.
   728  func (s *Stack) EnableNIC(id tcpip.NICID) tcpip.Error {
   729  	s.mu.RLock()
   730  	defer s.mu.RUnlock()
   731  
   732  	nic, ok := s.nics[id]
   733  	if !ok {
   734  		return &tcpip.ErrUnknownNICID{}
   735  	}
   736  
   737  	return nic.enable()
   738  }
   739  
   740  // DisableNIC disables the given NIC.
   741  func (s *Stack) DisableNIC(id tcpip.NICID) tcpip.Error {
   742  	s.mu.RLock()
   743  	defer s.mu.RUnlock()
   744  
   745  	nic, ok := s.nics[id]
   746  	if !ok {
   747  		return &tcpip.ErrUnknownNICID{}
   748  	}
   749  
   750  	nic.disable()
   751  	return nil
   752  }
   753  
   754  // CheckNIC checks if a NIC is usable.
   755  func (s *Stack) CheckNIC(id tcpip.NICID) bool {
   756  	s.mu.RLock()
   757  	defer s.mu.RUnlock()
   758  
   759  	nic, ok := s.nics[id]
   760  	if !ok {
   761  		return false
   762  	}
   763  
   764  	return nic.Enabled()
   765  }
   766  
   767  // RemoveNIC removes NIC and all related routes from the network stack.
   768  func (s *Stack) RemoveNIC(id tcpip.NICID) tcpip.Error {
   769  	s.mu.Lock()
   770  	defer s.mu.Unlock()
   771  
   772  	return s.removeNICLocked(id)
   773  }
   774  
   775  // removeNICLocked removes NIC and all related routes from the network stack.
   776  //
   777  // s.mu must be locked.
   778  func (s *Stack) removeNICLocked(id tcpip.NICID) tcpip.Error {
   779  	nic, ok := s.nics[id]
   780  	if !ok {
   781  		return &tcpip.ErrUnknownNICID{}
   782  	}
   783  	delete(s.nics, id)
   784  
   785  	// Remove routes in-place. n tracks the number of routes written.
   786  	s.route.mu.Lock()
   787  	n := 0
   788  	for i, r := range s.route.mu.table {
   789  		s.route.mu.table[i] = tcpip.Route{}
   790  		if r.NIC != id {
   791  			// Keep this route.
   792  			s.route.mu.table[n] = r
   793  			n++
   794  		}
   795  	}
   796  	s.route.mu.table = s.route.mu.table[:n]
   797  	s.route.mu.Unlock()
   798  
   799  	return nic.remove()
   800  }
   801  
   802  // NICInfo captures the name and addresses assigned to a NIC.
   803  type NICInfo struct {
   804  	Name              string
   805  	LinkAddress       tcpip.LinkAddress
   806  	ProtocolAddresses []tcpip.ProtocolAddress
   807  
   808  	// Flags indicate the state of the NIC.
   809  	Flags NICStateFlags
   810  
   811  	// MTU is the maximum transmission unit.
   812  	MTU uint32
   813  
   814  	Stats tcpip.NICStats
   815  
   816  	// NetworkStats holds the stats of each NetworkEndpoint bound to the NIC.
   817  	NetworkStats map[tcpip.NetworkProtocolNumber]NetworkEndpointStats
   818  
   819  	// Context is user-supplied data optionally supplied in CreateNICWithOptions.
   820  	// See type NICOptions for more details.
   821  	Context NICContext
   822  
   823  	// ARPHardwareType holds the ARP Hardware type of the NIC. This is the
   824  	// value sent in haType field of an ARP Request sent by this NIC and the
   825  	// value expected in the haType field of an ARP response.
   826  	ARPHardwareType header.ARPHardwareType
   827  
   828  	// Forwarding holds the forwarding status for each network endpoint that
   829  	// supports forwarding.
   830  	Forwarding map[tcpip.NetworkProtocolNumber]bool
   831  }
   832  
   833  // HasNIC returns true if the NICID is defined in the stack.
   834  func (s *Stack) HasNIC(id tcpip.NICID) bool {
   835  	s.mu.RLock()
   836  	_, ok := s.nics[id]
   837  	s.mu.RUnlock()
   838  	return ok
   839  }
   840  
   841  // NICInfo returns a map of NICIDs to their associated information.
   842  func (s *Stack) NICInfo() map[tcpip.NICID]NICInfo {
   843  	s.mu.RLock()
   844  	defer s.mu.RUnlock()
   845  
   846  	nics := make(map[tcpip.NICID]NICInfo)
   847  	for id, nic := range s.nics {
   848  		flags := NICStateFlags{
   849  			Up:          true, // Netstack interfaces are always up.
   850  			Running:     nic.Enabled(),
   851  			Promiscuous: nic.Promiscuous(),
   852  			Loopback:    nic.IsLoopback(),
   853  		}
   854  
   855  		netStats := make(map[tcpip.NetworkProtocolNumber]NetworkEndpointStats)
   856  		for proto, netEP := range nic.networkEndpoints {
   857  			netStats[proto] = netEP.Stats()
   858  		}
   859  
   860  		info := NICInfo{
   861  			Name:              nic.name,
   862  			LinkAddress:       nic.LinkEndpoint.LinkAddress(),
   863  			ProtocolAddresses: nic.primaryAddresses(),
   864  			Flags:             flags,
   865  			MTU:               nic.LinkEndpoint.MTU(),
   866  			Stats:             nic.stats.local,
   867  			NetworkStats:      netStats,
   868  			Context:           nic.context,
   869  			ARPHardwareType:   nic.LinkEndpoint.ARPHardwareType(),
   870  			Forwarding:        make(map[tcpip.NetworkProtocolNumber]bool),
   871  		}
   872  
   873  		for proto := range s.networkProtocols {
   874  			switch forwarding, err := nic.forwarding(proto); err.(type) {
   875  			case nil:
   876  				info.Forwarding[proto] = forwarding
   877  			case *tcpip.ErrUnknownProtocol:
   878  				panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nic.ID()))
   879  			case *tcpip.ErrNotSupported:
   880  				// Not all network protocols support forwarding.
   881  			default:
   882  				panic(fmt.Sprintf("nic(id=%d).forwarding(%d): %s", nic.ID(), proto, err))
   883  			}
   884  		}
   885  
   886  		nics[id] = info
   887  	}
   888  	return nics
   889  }
   890  
   891  // NICStateFlags holds information about the state of an NIC.
   892  type NICStateFlags struct {
   893  	// Up indicates whether the interface is running.
   894  	Up bool
   895  
   896  	// Running indicates whether resources are allocated.
   897  	Running bool
   898  
   899  	// Promiscuous indicates whether the interface is in promiscuous mode.
   900  	Promiscuous bool
   901  
   902  	// Loopback indicates whether the interface is a loopback.
   903  	Loopback bool
   904  }
   905  
   906  // AddAddress adds a new network-layer address to the specified NIC.
   907  func (s *Stack) AddAddress(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error {
   908  	return s.AddAddressWithOptions(id, protocol, addr, CanBePrimaryEndpoint)
   909  }
   910  
   911  // AddAddressWithPrefix is the same as AddAddress, but allows you to specify
   912  // the address prefix.
   913  func (s *Stack) AddAddressWithPrefix(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.AddressWithPrefix) tcpip.Error {
   914  	ap := tcpip.ProtocolAddress{
   915  		Protocol:          protocol,
   916  		AddressWithPrefix: addr,
   917  	}
   918  	return s.AddProtocolAddressWithOptions(id, ap, CanBePrimaryEndpoint)
   919  }
   920  
   921  // AddProtocolAddress adds a new network-layer protocol address to the
   922  // specified NIC.
   923  func (s *Stack) AddProtocolAddress(id tcpip.NICID, protocolAddress tcpip.ProtocolAddress) tcpip.Error {
   924  	return s.AddProtocolAddressWithOptions(id, protocolAddress, CanBePrimaryEndpoint)
   925  }
   926  
   927  // AddAddressWithOptions is the same as AddAddress, but allows you to specify
   928  // whether the new endpoint can be primary or not.
   929  func (s *Stack) AddAddressWithOptions(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, peb PrimaryEndpointBehavior) tcpip.Error {
   930  	netProto, ok := s.networkProtocols[protocol]
   931  	if !ok {
   932  		return &tcpip.ErrUnknownProtocol{}
   933  	}
   934  	return s.AddProtocolAddressWithOptions(id, tcpip.ProtocolAddress{
   935  		Protocol: protocol,
   936  		AddressWithPrefix: tcpip.AddressWithPrefix{
   937  			Address:   addr,
   938  			PrefixLen: netProto.DefaultPrefixLen(),
   939  		},
   940  	}, peb)
   941  }
   942  
   943  // AddProtocolAddressWithOptions is the same as AddProtocolAddress, but allows
   944  // you to specify whether the new endpoint can be primary or not.
   945  func (s *Stack) AddProtocolAddressWithOptions(id tcpip.NICID, protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) tcpip.Error {
   946  	s.mu.RLock()
   947  	defer s.mu.RUnlock()
   948  
   949  	nic, ok := s.nics[id]
   950  	if !ok {
   951  		return &tcpip.ErrUnknownNICID{}
   952  	}
   953  
   954  	return nic.addAddress(protocolAddress, peb)
   955  }
   956  
   957  // RemoveAddress removes an existing network-layer address from the specified
   958  // NIC.
   959  func (s *Stack) RemoveAddress(id tcpip.NICID, addr tcpip.Address) tcpip.Error {
   960  	s.mu.RLock()
   961  	defer s.mu.RUnlock()
   962  
   963  	if nic, ok := s.nics[id]; ok {
   964  		return nic.removeAddress(addr)
   965  	}
   966  
   967  	return &tcpip.ErrUnknownNICID{}
   968  }
   969  
   970  // AllAddresses returns a map of NICIDs to their protocol addresses (primary
   971  // and non-primary).
   972  func (s *Stack) AllAddresses() map[tcpip.NICID][]tcpip.ProtocolAddress {
   973  	s.mu.RLock()
   974  	defer s.mu.RUnlock()
   975  
   976  	nics := make(map[tcpip.NICID][]tcpip.ProtocolAddress)
   977  	for id, nic := range s.nics {
   978  		nics[id] = nic.allPermanentAddresses()
   979  	}
   980  	return nics
   981  }
   982  
   983  // GetMainNICAddress returns the first non-deprecated primary address and prefix
   984  // for the given NIC and protocol. If no non-deprecated primary addresses exist,
   985  // a deprecated address will be returned. If no deprecated addresses exist, the
   986  // zero value will be returned.
   987  func (s *Stack) GetMainNICAddress(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, tcpip.Error) {
   988  	s.mu.RLock()
   989  	defer s.mu.RUnlock()
   990  
   991  	nic, ok := s.nics[id]
   992  	if !ok {
   993  		return tcpip.AddressWithPrefix{}, &tcpip.ErrUnknownNICID{}
   994  	}
   995  
   996  	return nic.PrimaryAddress(protocol)
   997  }
   998  
   999  func (s *Stack) getAddressEP(nic *nic, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) AssignableAddressEndpoint {
  1000  	if len(localAddr) == 0 {
  1001  		return nic.primaryEndpoint(netProto, remoteAddr)
  1002  	}
  1003  	return nic.findEndpoint(netProto, localAddr, CanBePrimaryEndpoint)
  1004  }
  1005  
  1006  // findLocalRouteFromNICRLocked is like findLocalRouteRLocked but finds a route
  1007  // from the specified NIC.
  1008  //
  1009  // Precondition: s.mu must be read locked.
  1010  func (s *Stack) findLocalRouteFromNICRLocked(localAddressNIC *nic, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route {
  1011  	localAddressEndpoint := localAddressNIC.getAddressOrCreateTempInner(netProto, localAddr, false /* createTemp */, NeverPrimaryEndpoint)
  1012  	if localAddressEndpoint == nil {
  1013  		return nil
  1014  	}
  1015  
  1016  	var outgoingNIC *nic
  1017  	// Prefer a local route to the same interface as the local address.
  1018  	if localAddressNIC.hasAddress(netProto, remoteAddr) {
  1019  		outgoingNIC = localAddressNIC
  1020  	}
  1021  
  1022  	// If the remote address isn't owned by the local address's NIC, check all
  1023  	// NICs.
  1024  	if outgoingNIC == nil {
  1025  		for _, nic := range s.nics {
  1026  			if nic.hasAddress(netProto, remoteAddr) {
  1027  				outgoingNIC = nic
  1028  				break
  1029  			}
  1030  		}
  1031  	}
  1032  
  1033  	// If the remote address is not owned by the stack, we can't return a local
  1034  	// route.
  1035  	if outgoingNIC == nil {
  1036  		localAddressEndpoint.DecRef()
  1037  		return nil
  1038  	}
  1039  
  1040  	r := makeLocalRoute(
  1041  		netProto,
  1042  		localAddr,
  1043  		remoteAddr,
  1044  		outgoingNIC,
  1045  		localAddressNIC,
  1046  		localAddressEndpoint,
  1047  	)
  1048  
  1049  	if r.IsOutboundBroadcast() {
  1050  		r.Release()
  1051  		return nil
  1052  	}
  1053  
  1054  	return r
  1055  }
  1056  
  1057  // findLocalRouteRLocked returns a local route.
  1058  //
  1059  // A local route is a route to some remote address which the stack owns. That
  1060  // is, a local route is a route where packets never have to leave the stack.
  1061  //
  1062  // Precondition: s.mu must be read locked.
  1063  func (s *Stack) findLocalRouteRLocked(localAddressNICID tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route {
  1064  	if len(localAddr) == 0 {
  1065  		localAddr = remoteAddr
  1066  	}
  1067  
  1068  	if localAddressNICID == 0 {
  1069  		for _, localAddressNIC := range s.nics {
  1070  			if r := s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto); r != nil {
  1071  				return r
  1072  			}
  1073  		}
  1074  
  1075  		return nil
  1076  	}
  1077  
  1078  	if localAddressNIC, ok := s.nics[localAddressNICID]; ok {
  1079  		return s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto)
  1080  	}
  1081  
  1082  	return nil
  1083  }
  1084  
  1085  // HandleLocal returns true if non-loopback interfaces are allowed to loop packets.
  1086  func (s *Stack) HandleLocal() bool {
  1087  	return s.handleLocal
  1088  }
  1089  
  1090  func isNICForwarding(nic *nic, proto tcpip.NetworkProtocolNumber) bool {
  1091  	switch forwarding, err := nic.forwarding(proto); err.(type) {
  1092  	case nil:
  1093  		return forwarding
  1094  	case *tcpip.ErrUnknownProtocol:
  1095  		panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nic.ID()))
  1096  	case *tcpip.ErrNotSupported:
  1097  		// Not all network protocols support forwarding.
  1098  		return false
  1099  	default:
  1100  		panic(fmt.Sprintf("nic(id=%d).forwarding(%d): %s", nic.ID(), proto, err))
  1101  	}
  1102  }
  1103  
  1104  // FindRoute creates a route to the given destination address, leaving through
  1105  // the given NIC and local address (if provided).
  1106  //
  1107  // If a NIC is not specified, the returned route will leave through the same
  1108  // NIC as the NIC that has the local address assigned when forwarding is
  1109  // disabled. If forwarding is enabled and the NIC is unspecified, the route may
  1110  // leave through any interface unless the route is link-local.
  1111  //
  1112  // If no local address is provided, the stack will select a local address. If no
  1113  // remote address is provided, the stack wil use a remote address equal to the
  1114  // local address.
  1115  func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) (*Route, tcpip.Error) {
  1116  	s.mu.RLock()
  1117  	defer s.mu.RUnlock()
  1118  
  1119  	isLinkLocal := header.IsV6LinkLocalUnicastAddress(remoteAddr) || header.IsV6LinkLocalMulticastAddress(remoteAddr)
  1120  	isLocalBroadcast := remoteAddr == header.IPv4Broadcast
  1121  	isMulticast := header.IsV4MulticastAddress(remoteAddr) || header.IsV6MulticastAddress(remoteAddr)
  1122  	isLoopback := header.IsV4LoopbackAddress(remoteAddr) || header.IsV6LoopbackAddress(remoteAddr)
  1123  	needRoute := !(isLocalBroadcast || isMulticast || isLinkLocal || isLoopback)
  1124  
  1125  	if s.handleLocal && !isMulticast && !isLocalBroadcast {
  1126  		if r := s.findLocalRouteRLocked(id, localAddr, remoteAddr, netProto); r != nil {
  1127  			return r, nil
  1128  		}
  1129  	}
  1130  
  1131  	// If the interface is specified and we do not need a route, return a route
  1132  	// through the interface if the interface is valid and enabled.
  1133  	if id != 0 && !needRoute {
  1134  		if nic, ok := s.nics[id]; ok && nic.Enabled() {
  1135  			if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, netProto); addressEndpoint != nil {
  1136  				return makeRoute(
  1137  					netProto,
  1138  					"", /* gateway */
  1139  					localAddr,
  1140  					remoteAddr,
  1141  					nic, /* outboundNIC */
  1142  					nic, /* localAddressNIC*/
  1143  					addressEndpoint,
  1144  					s.handleLocal,
  1145  					multicastLoop,
  1146  				), nil
  1147  			}
  1148  		}
  1149  
  1150  		if isLoopback {
  1151  			return nil, &tcpip.ErrBadLocalAddress{}
  1152  		}
  1153  		return nil, &tcpip.ErrNetworkUnreachable{}
  1154  	}
  1155  
  1156  	onlyGlobalAddresses := !header.IsV6LinkLocalUnicastAddress(localAddr) && !isLinkLocal
  1157  
  1158  	// Find a route to the remote with the route table.
  1159  	var chosenRoute tcpip.Route
  1160  	if r := func() *Route {
  1161  		s.route.mu.RLock()
  1162  		defer s.route.mu.RUnlock()
  1163  
  1164  		for _, route := range s.route.mu.table {
  1165  			if len(remoteAddr) != 0 && !route.Destination.Contains(remoteAddr) {
  1166  				continue
  1167  			}
  1168  
  1169  			nic, ok := s.nics[route.NIC]
  1170  			if !ok || !nic.Enabled() {
  1171  				continue
  1172  			}
  1173  
  1174  			if id == 0 || id == route.NIC {
  1175  				if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, netProto); addressEndpoint != nil {
  1176  					var gateway tcpip.Address
  1177  					if needRoute {
  1178  						gateway = route.Gateway
  1179  					}
  1180  					r := constructAndValidateRoute(netProto, addressEndpoint, nic /* outgoingNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop)
  1181  					if r == nil {
  1182  						panic(fmt.Sprintf("non-forwarding route validation failed with route table entry = %#v, id = %d, localAddr = %s, remoteAddr = %s", route, id, localAddr, remoteAddr))
  1183  					}
  1184  					return r
  1185  				}
  1186  			}
  1187  
  1188  			// If the stack has forwarding enabled and we haven't found a valid route
  1189  			// to the remote address yet, keep track of the first valid route. We
  1190  			// keep iterating because we prefer routes that let us use a local
  1191  			// address that is assigned to the outgoing interface. There is no
  1192  			// requirement to do this from any RFC but simply a choice made to better
  1193  			// follow a strong host model which the netstack follows at the time of
  1194  			// writing.
  1195  			if onlyGlobalAddresses && chosenRoute == (tcpip.Route{}) && isNICForwarding(nic, netProto) {
  1196  				chosenRoute = route
  1197  			}
  1198  		}
  1199  
  1200  		return nil
  1201  	}(); r != nil {
  1202  		return r, nil
  1203  	}
  1204  
  1205  	if chosenRoute != (tcpip.Route{}) {
  1206  		// At this point we know the stack has forwarding enabled since chosenRoute is
  1207  		// only set when forwarding is enabled.
  1208  		nic, ok := s.nics[chosenRoute.NIC]
  1209  		if !ok {
  1210  			// If the route's NIC was invalid, we should not have chosen the route.
  1211  			panic(fmt.Sprintf("chosen route must have a valid NIC with ID = %d", chosenRoute.NIC))
  1212  		}
  1213  
  1214  		var gateway tcpip.Address
  1215  		if needRoute {
  1216  			gateway = chosenRoute.Gateway
  1217  		}
  1218  
  1219  		// Use the specified NIC to get the local address endpoint.
  1220  		if id != 0 {
  1221  			if aNIC, ok := s.nics[id]; ok {
  1222  				if addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, netProto); addressEndpoint != nil {
  1223  					if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop); r != nil {
  1224  						return r, nil
  1225  					}
  1226  				}
  1227  			}
  1228  
  1229  			return nil, &tcpip.ErrNoRoute{}
  1230  		}
  1231  
  1232  		if id == 0 {
  1233  			// If an interface is not specified, try to find a NIC that holds the local
  1234  			// address endpoint to construct a route.
  1235  			for _, aNIC := range s.nics {
  1236  				addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, netProto)
  1237  				if addressEndpoint == nil {
  1238  					continue
  1239  				}
  1240  
  1241  				if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop); r != nil {
  1242  					return r, nil
  1243  				}
  1244  			}
  1245  		}
  1246  	}
  1247  
  1248  	if needRoute {
  1249  		return nil, &tcpip.ErrNoRoute{}
  1250  	}
  1251  	if header.IsV6LoopbackAddress(remoteAddr) {
  1252  		return nil, &tcpip.ErrBadLocalAddress{}
  1253  	}
  1254  	return nil, &tcpip.ErrNetworkUnreachable{}
  1255  }
  1256  
  1257  // CheckNetworkProtocol checks if a given network protocol is enabled in the
  1258  // stack.
  1259  func (s *Stack) CheckNetworkProtocol(protocol tcpip.NetworkProtocolNumber) bool {
  1260  	_, ok := s.networkProtocols[protocol]
  1261  	return ok
  1262  }
  1263  
  1264  // CheckDuplicateAddress performs duplicate address detection for the address on
  1265  // the specified interface.
  1266  func (s *Stack) CheckDuplicateAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, h DADCompletionHandler) (DADCheckAddressDisposition, tcpip.Error) {
  1267  	nic, ok := s.nics[nicID]
  1268  	if !ok {
  1269  		return 0, &tcpip.ErrUnknownNICID{}
  1270  	}
  1271  
  1272  	return nic.checkDuplicateAddress(protocol, addr, h)
  1273  }
  1274  
  1275  // CheckLocalAddress determines if the given local address exists, and if it
  1276  // does, returns the id of the NIC it's bound to. Returns 0 if the address
  1277  // does not exist.
  1278  func (s *Stack) CheckLocalAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.NICID {
  1279  	s.mu.RLock()
  1280  	defer s.mu.RUnlock()
  1281  
  1282  	// If a NIC is specified, we try to find the address there only.
  1283  	if nicID != 0 {
  1284  		nic, ok := s.nics[nicID]
  1285  		if !ok {
  1286  			return 0
  1287  		}
  1288  
  1289  		if nic.CheckLocalAddress(protocol, addr) {
  1290  			return nic.id
  1291  		}
  1292  
  1293  		return 0
  1294  	}
  1295  
  1296  	// Go through all the NICs.
  1297  	for _, nic := range s.nics {
  1298  		if nic.CheckLocalAddress(protocol, addr) {
  1299  			return nic.id
  1300  		}
  1301  	}
  1302  
  1303  	return 0
  1304  }
  1305  
  1306  // SetPromiscuousMode enables or disables promiscuous mode in the given NIC.
  1307  func (s *Stack) SetPromiscuousMode(nicID tcpip.NICID, enable bool) tcpip.Error {
  1308  	s.mu.RLock()
  1309  	defer s.mu.RUnlock()
  1310  
  1311  	nic, ok := s.nics[nicID]
  1312  	if !ok {
  1313  		return &tcpip.ErrUnknownNICID{}
  1314  	}
  1315  
  1316  	nic.setPromiscuousMode(enable)
  1317  
  1318  	return nil
  1319  }
  1320  
  1321  // SetSpoofing enables or disables address spoofing in the given NIC, allowing
  1322  // endpoints to bind to any address in the NIC.
  1323  func (s *Stack) SetSpoofing(nicID tcpip.NICID, enable bool) tcpip.Error {
  1324  	s.mu.RLock()
  1325  	defer s.mu.RUnlock()
  1326  
  1327  	nic, ok := s.nics[nicID]
  1328  	if !ok {
  1329  		return &tcpip.ErrUnknownNICID{}
  1330  	}
  1331  
  1332  	nic.setSpoofing(enable)
  1333  
  1334  	return nil
  1335  }
  1336  
  1337  // LinkResolutionResult is the result of a link address resolution attempt.
  1338  type LinkResolutionResult struct {
  1339  	LinkAddress tcpip.LinkAddress
  1340  	Err         tcpip.Error
  1341  }
  1342  
  1343  // GetLinkAddress finds the link address corresponding to a network address.
  1344  //
  1345  // Returns ErrNotSupported if the stack is not configured with a link address
  1346  // resolver for the specified network protocol.
  1347  //
  1348  // Returns ErrWouldBlock if the link address is not readily available, along
  1349  // with a notification channel for the caller to block on. Triggers address
  1350  // resolution asynchronously.
  1351  //
  1352  // onResolve will be called either immediately, if resolution is not required,
  1353  // or when address resolution is complete, with the resolved link address and
  1354  // whether resolution succeeded.
  1355  //
  1356  // If specified, the local address must be an address local to the interface
  1357  // the neighbor cache belongs to. The local address is the source address of
  1358  // a packet prompting NUD/link address resolution.
  1359  func (s *Stack) GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, onResolve func(LinkResolutionResult)) tcpip.Error {
  1360  	s.mu.RLock()
  1361  	nic, ok := s.nics[nicID]
  1362  	s.mu.RUnlock()
  1363  	if !ok {
  1364  		return &tcpip.ErrUnknownNICID{}
  1365  	}
  1366  
  1367  	return nic.getLinkAddress(addr, localAddr, protocol, onResolve)
  1368  }
  1369  
  1370  // Neighbors returns all IP to MAC address associations.
  1371  func (s *Stack) Neighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) ([]NeighborEntry, tcpip.Error) {
  1372  	s.mu.RLock()
  1373  	nic, ok := s.nics[nicID]
  1374  	s.mu.RUnlock()
  1375  
  1376  	if !ok {
  1377  		return nil, &tcpip.ErrUnknownNICID{}
  1378  	}
  1379  
  1380  	return nic.neighbors(protocol)
  1381  }
  1382  
  1383  // AddStaticNeighbor statically associates an IP address to a MAC address.
  1384  func (s *Stack) AddStaticNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress) tcpip.Error {
  1385  	s.mu.RLock()
  1386  	nic, ok := s.nics[nicID]
  1387  	s.mu.RUnlock()
  1388  
  1389  	if !ok {
  1390  		return &tcpip.ErrUnknownNICID{}
  1391  	}
  1392  
  1393  	return nic.addStaticNeighbor(addr, protocol, linkAddr)
  1394  }
  1395  
  1396  // RemoveNeighbor removes an IP to MAC address association previously created
  1397  // either automically or by AddStaticNeighbor. Returns ErrBadAddress if there
  1398  // is no association with the provided address.
  1399  func (s *Stack) RemoveNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error {
  1400  	s.mu.RLock()
  1401  	nic, ok := s.nics[nicID]
  1402  	s.mu.RUnlock()
  1403  
  1404  	if !ok {
  1405  		return &tcpip.ErrUnknownNICID{}
  1406  	}
  1407  
  1408  	return nic.removeNeighbor(protocol, addr)
  1409  }
  1410  
  1411  // ClearNeighbors removes all IP to MAC address associations.
  1412  func (s *Stack) ClearNeighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) tcpip.Error {
  1413  	s.mu.RLock()
  1414  	nic, ok := s.nics[nicID]
  1415  	s.mu.RUnlock()
  1416  
  1417  	if !ok {
  1418  		return &tcpip.ErrUnknownNICID{}
  1419  	}
  1420  
  1421  	return nic.clearNeighbors(protocol)
  1422  }
  1423  
  1424  // RegisterTransportEndpoint registers the given endpoint with the stack
  1425  // transport dispatcher. Received packets that match the provided id will be
  1426  // delivered to the given endpoint; specifying a nic is optional, but
  1427  // nic-specific IDs have precedence over global ones.
  1428  func (s *Stack) RegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error {
  1429  	return s.demux.registerEndpoint(netProtos, protocol, id, ep, flags, bindToDevice)
  1430  }
  1431  
  1432  // CheckRegisterTransportEndpoint checks if an endpoint can be registered with
  1433  // the stack transport dispatcher.
  1434  func (s *Stack) CheckRegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error {
  1435  	return s.demux.checkEndpoint(netProtos, protocol, id, flags, bindToDevice)
  1436  }
  1437  
  1438  // UnregisterTransportEndpoint removes the endpoint with the given id from the
  1439  // stack transport dispatcher.
  1440  func (s *Stack) UnregisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) {
  1441  	s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice)
  1442  }
  1443  
  1444  // StartTransportEndpointCleanup removes the endpoint with the given id from
  1445  // the stack transport dispatcher. It also transitions it to the cleanup stage.
  1446  func (s *Stack) StartTransportEndpointCleanup(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) {
  1447  	s.cleanupEndpointsMu.Lock()
  1448  	s.cleanupEndpoints[ep] = struct{}{}
  1449  	s.cleanupEndpointsMu.Unlock()
  1450  
  1451  	s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice)
  1452  }
  1453  
  1454  // CompleteTransportEndpointCleanup removes the endpoint from the cleanup
  1455  // stage.
  1456  func (s *Stack) CompleteTransportEndpointCleanup(ep TransportEndpoint) {
  1457  	s.cleanupEndpointsMu.Lock()
  1458  	delete(s.cleanupEndpoints, ep)
  1459  	s.cleanupEndpointsMu.Unlock()
  1460  }
  1461  
  1462  // FindTransportEndpoint finds an endpoint that most closely matches the provided
  1463  // id. If no endpoint is found it returns nil.
  1464  func (s *Stack) FindTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, id TransportEndpointID, nicID tcpip.NICID) TransportEndpoint {
  1465  	return s.demux.findTransportEndpoint(netProto, transProto, id, nicID)
  1466  }
  1467  
  1468  // RegisterRawTransportEndpoint registers the given endpoint with the stack
  1469  // transport dispatcher. Received packets that match the provided transport
  1470  // protocol will be delivered to the given endpoint.
  1471  func (s *Stack) RegisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) tcpip.Error {
  1472  	return s.demux.registerRawEndpoint(netProto, transProto, ep)
  1473  }
  1474  
  1475  // UnregisterRawTransportEndpoint removes the endpoint for the transport
  1476  // protocol from the stack transport dispatcher.
  1477  func (s *Stack) UnregisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) {
  1478  	s.demux.unregisterRawEndpoint(netProto, transProto, ep)
  1479  }
  1480  
  1481  // RegisterRestoredEndpoint records e as an endpoint that has been restored on
  1482  // this stack.
  1483  func (s *Stack) RegisterRestoredEndpoint(e ResumableEndpoint) {
  1484  	s.mu.Lock()
  1485  	s.resumableEndpoints = append(s.resumableEndpoints, e)
  1486  	s.mu.Unlock()
  1487  }
  1488  
  1489  // RegisteredEndpoints returns all endpoints which are currently registered.
  1490  func (s *Stack) RegisteredEndpoints() []TransportEndpoint {
  1491  	s.mu.Lock()
  1492  	defer s.mu.Unlock()
  1493  	var es []TransportEndpoint
  1494  	for _, e := range s.demux.protocol {
  1495  		es = append(es, e.transportEndpoints()...)
  1496  	}
  1497  	return es
  1498  }
  1499  
  1500  // CleanupEndpoints returns endpoints currently in the cleanup state.
  1501  func (s *Stack) CleanupEndpoints() []TransportEndpoint {
  1502  	s.cleanupEndpointsMu.Lock()
  1503  	es := make([]TransportEndpoint, 0, len(s.cleanupEndpoints))
  1504  	for e := range s.cleanupEndpoints {
  1505  		es = append(es, e)
  1506  	}
  1507  	s.cleanupEndpointsMu.Unlock()
  1508  	return es
  1509  }
  1510  
  1511  // RestoreCleanupEndpoints adds endpoints to cleanup tracking. This is useful
  1512  // for restoring a stack after a save.
  1513  func (s *Stack) RestoreCleanupEndpoints(es []TransportEndpoint) {
  1514  	s.cleanupEndpointsMu.Lock()
  1515  	for _, e := range es {
  1516  		s.cleanupEndpoints[e] = struct{}{}
  1517  	}
  1518  	s.cleanupEndpointsMu.Unlock()
  1519  }
  1520  
  1521  // Close closes all currently registered transport endpoints.
  1522  //
  1523  // Endpoints created or modified during this call may not get closed.
  1524  func (s *Stack) Close() {
  1525  	for _, e := range s.RegisteredEndpoints() {
  1526  		e.Abort()
  1527  	}
  1528  	for _, p := range s.transportProtocols {
  1529  		p.proto.Close()
  1530  	}
  1531  	for _, p := range s.networkProtocols {
  1532  		p.Close()
  1533  	}
  1534  }
  1535  
  1536  // Wait waits for all transport and link endpoints to halt their worker
  1537  // goroutines.
  1538  //
  1539  // Endpoints created or modified during this call may not get waited on.
  1540  //
  1541  // Note that link endpoints must be stopped via an implementation specific
  1542  // mechanism.
  1543  func (s *Stack) Wait() {
  1544  	for _, e := range s.RegisteredEndpoints() {
  1545  		e.Wait()
  1546  	}
  1547  	for _, e := range s.CleanupEndpoints() {
  1548  		e.Wait()
  1549  	}
  1550  	for _, p := range s.transportProtocols {
  1551  		p.proto.Wait()
  1552  	}
  1553  	for _, p := range s.networkProtocols {
  1554  		p.Wait()
  1555  	}
  1556  
  1557  	s.mu.RLock()
  1558  	defer s.mu.RUnlock()
  1559  	for _, n := range s.nics {
  1560  		n.LinkEndpoint.Wait()
  1561  	}
  1562  }
  1563  
  1564  // Resume restarts the stack after a restore. This must be called after the
  1565  // entire system has been restored.
  1566  func (s *Stack) Resume() {
  1567  	// ResumableEndpoint.Resume() may call other methods on s, so we can't hold
  1568  	// s.mu while resuming the endpoints.
  1569  	s.mu.Lock()
  1570  	eps := s.resumableEndpoints
  1571  	s.resumableEndpoints = nil
  1572  	s.mu.Unlock()
  1573  	for _, e := range eps {
  1574  		e.Resume(s)
  1575  	}
  1576  }
  1577  
  1578  // RegisterPacketEndpoint registers ep with the stack, causing it to receive
  1579  // all traffic of the specified netProto on the given NIC. If nicID is 0, it
  1580  // receives traffic from every NIC.
  1581  func (s *Stack) RegisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) tcpip.Error {
  1582  	s.mu.Lock()
  1583  	defer s.mu.Unlock()
  1584  
  1585  	// If no NIC is specified, capture on all devices.
  1586  	if nicID == 0 {
  1587  		// Register with each NIC.
  1588  		for _, nic := range s.nics {
  1589  			if err := nic.registerPacketEndpoint(netProto, ep); err != nil {
  1590  				s.unregisterPacketEndpointLocked(0, netProto, ep)
  1591  				return err
  1592  			}
  1593  		}
  1594  		return nil
  1595  	}
  1596  
  1597  	// Capture on a specific device.
  1598  	nic, ok := s.nics[nicID]
  1599  	if !ok {
  1600  		return &tcpip.ErrUnknownNICID{}
  1601  	}
  1602  	if err := nic.registerPacketEndpoint(netProto, ep); err != nil {
  1603  		return err
  1604  	}
  1605  
  1606  	return nil
  1607  }
  1608  
  1609  // UnregisterPacketEndpoint unregisters ep for packets of the specified
  1610  // netProto from the specified NIC. If nicID is 0, ep is unregistered from all
  1611  // NICs.
  1612  func (s *Stack) UnregisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
  1613  	s.mu.Lock()
  1614  	defer s.mu.Unlock()
  1615  	s.unregisterPacketEndpointLocked(nicID, netProto, ep)
  1616  }
  1617  
  1618  func (s *Stack) unregisterPacketEndpointLocked(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
  1619  	// If no NIC is specified, unregister on all devices.
  1620  	if nicID == 0 {
  1621  		// Unregister with each NIC.
  1622  		for _, nic := range s.nics {
  1623  			nic.unregisterPacketEndpoint(netProto, ep)
  1624  		}
  1625  		return
  1626  	}
  1627  
  1628  	// Unregister in a single device.
  1629  	nic, ok := s.nics[nicID]
  1630  	if !ok {
  1631  		return
  1632  	}
  1633  	nic.unregisterPacketEndpoint(netProto, ep)
  1634  }
  1635  
  1636  // WritePacketToRemote writes a payload on the specified NIC using the provided
  1637  // network protocol and remote link address.
  1638  func (s *Stack) WritePacketToRemote(nicID tcpip.NICID, remote tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, payload buffer.VectorisedView) tcpip.Error {
  1639  	s.mu.Lock()
  1640  	nic, ok := s.nics[nicID]
  1641  	s.mu.Unlock()
  1642  	if !ok {
  1643  		return &tcpip.ErrUnknownDevice{}
  1644  	}
  1645  	pkt := NewPacketBuffer(PacketBufferOptions{
  1646  		ReserveHeaderBytes: int(nic.MaxHeaderLength()),
  1647  		Data:               payload,
  1648  	})
  1649  	return nic.WritePacketToRemote(remote, netProto, pkt)
  1650  }
  1651  
  1652  // NetworkProtocolInstance returns the protocol instance in the stack for the
  1653  // specified network protocol. This method is public for protocol implementers
  1654  // and tests to use.
  1655  func (s *Stack) NetworkProtocolInstance(num tcpip.NetworkProtocolNumber) NetworkProtocol {
  1656  	if p, ok := s.networkProtocols[num]; ok {
  1657  		return p
  1658  	}
  1659  	return nil
  1660  }
  1661  
  1662  // TransportProtocolInstance returns the protocol instance in the stack for the
  1663  // specified transport protocol. This method is public for protocol implementers
  1664  // and tests to use.
  1665  func (s *Stack) TransportProtocolInstance(num tcpip.TransportProtocolNumber) TransportProtocol {
  1666  	if pState, ok := s.transportProtocols[num]; ok {
  1667  		return pState.proto
  1668  	}
  1669  	return nil
  1670  }
  1671  
  1672  // AddTCPProbe installs a probe function that will be invoked on every segment
  1673  // received by a given TCP endpoint. The probe function is passed a copy of the
  1674  // TCP endpoint state before and after processing of the segment.
  1675  //
  1676  // NOTE: TCPProbe is added only to endpoints created after this call. Endpoints
  1677  // created prior to this call will not call the probe function.
  1678  //
  1679  // Further, installing two different probes back to back can result in some
  1680  // endpoints calling the first one and some the second one. There is no
  1681  // guarantee provided on which probe will be invoked. Ideally this should only
  1682  // be called once per stack.
  1683  func (s *Stack) AddTCPProbe(probe TCPProbeFunc) {
  1684  	s.tcpProbeFunc.Store(probe)
  1685  }
  1686  
  1687  // GetTCPProbe returns the TCPProbeFunc if installed with AddTCPProbe, nil
  1688  // otherwise.
  1689  func (s *Stack) GetTCPProbe() TCPProbeFunc {
  1690  	p := s.tcpProbeFunc.Load()
  1691  	if p == nil {
  1692  		return nil
  1693  	}
  1694  	return p.(TCPProbeFunc)
  1695  }
  1696  
  1697  // RemoveTCPProbe removes an installed TCP probe.
  1698  //
  1699  // NOTE: This only ensures that endpoints created after this call do not
  1700  // have a probe attached. Endpoints already created will continue to invoke
  1701  // TCP probe.
  1702  func (s *Stack) RemoveTCPProbe() {
  1703  	// This must be TCPProbeFunc(nil) because atomic.Value.Store(nil) panics.
  1704  	s.tcpProbeFunc.Store(TCPProbeFunc(nil))
  1705  }
  1706  
  1707  // JoinGroup joins the given multicast group on the given NIC.
  1708  func (s *Stack) JoinGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error {
  1709  	s.mu.RLock()
  1710  	defer s.mu.RUnlock()
  1711  
  1712  	if nic, ok := s.nics[nicID]; ok {
  1713  		return nic.joinGroup(protocol, multicastAddr)
  1714  	}
  1715  	return &tcpip.ErrUnknownNICID{}
  1716  }
  1717  
  1718  // LeaveGroup leaves the given multicast group on the given NIC.
  1719  func (s *Stack) LeaveGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error {
  1720  	s.mu.RLock()
  1721  	defer s.mu.RUnlock()
  1722  
  1723  	if nic, ok := s.nics[nicID]; ok {
  1724  		return nic.leaveGroup(protocol, multicastAddr)
  1725  	}
  1726  	return &tcpip.ErrUnknownNICID{}
  1727  }
  1728  
  1729  // IsInGroup returns true if the NIC with ID nicID has joined the multicast
  1730  // group multicastAddr.
  1731  func (s *Stack) IsInGroup(nicID tcpip.NICID, multicastAddr tcpip.Address) (bool, tcpip.Error) {
  1732  	s.mu.RLock()
  1733  	defer s.mu.RUnlock()
  1734  
  1735  	if nic, ok := s.nics[nicID]; ok {
  1736  		return nic.isInGroup(multicastAddr), nil
  1737  	}
  1738  	return false, &tcpip.ErrUnknownNICID{}
  1739  }
  1740  
  1741  // IPTables returns the stack's iptables.
  1742  func (s *Stack) IPTables() *IPTables {
  1743  	return s.tables
  1744  }
  1745  
  1746  // ICMPLimit returns the maximum number of ICMP messages that can be sent
  1747  // in one second.
  1748  func (s *Stack) ICMPLimit() rate.Limit {
  1749  	return s.icmpRateLimiter.Limit()
  1750  }
  1751  
  1752  // SetICMPLimit sets the maximum number of ICMP messages that be sent
  1753  // in one second.
  1754  func (s *Stack) SetICMPLimit(newLimit rate.Limit) {
  1755  	s.icmpRateLimiter.SetLimit(newLimit)
  1756  }
  1757  
  1758  // ICMPBurst returns the maximum number of ICMP messages that can be sent
  1759  // in a single burst.
  1760  func (s *Stack) ICMPBurst() int {
  1761  	return s.icmpRateLimiter.Burst()
  1762  }
  1763  
  1764  // SetICMPBurst sets the maximum number of ICMP messages that can be sent
  1765  // in a single burst.
  1766  func (s *Stack) SetICMPBurst(burst int) {
  1767  	s.icmpRateLimiter.SetBurst(burst)
  1768  }
  1769  
  1770  // AllowICMPMessage returns true if we the rate limiter allows at least one
  1771  // ICMP message to be sent at this instant.
  1772  func (s *Stack) AllowICMPMessage() bool {
  1773  	return s.icmpRateLimiter.Allow()
  1774  }
  1775  
  1776  // GetNetworkEndpoint returns the NetworkEndpoint with the specified protocol
  1777  // number installed on the specified NIC.
  1778  func (s *Stack) GetNetworkEndpoint(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NetworkEndpoint, tcpip.Error) {
  1779  	s.mu.Lock()
  1780  	defer s.mu.Unlock()
  1781  
  1782  	nic, ok := s.nics[nicID]
  1783  	if !ok {
  1784  		return nil, &tcpip.ErrUnknownNICID{}
  1785  	}
  1786  
  1787  	return nic.getNetworkEndpoint(proto), nil
  1788  }
  1789  
  1790  // NUDConfigurations gets the per-interface NUD configurations.
  1791  func (s *Stack) NUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NUDConfigurations, tcpip.Error) {
  1792  	s.mu.RLock()
  1793  	nic, ok := s.nics[id]
  1794  	s.mu.RUnlock()
  1795  
  1796  	if !ok {
  1797  		return NUDConfigurations{}, &tcpip.ErrUnknownNICID{}
  1798  	}
  1799  
  1800  	return nic.nudConfigs(proto)
  1801  }
  1802  
  1803  // SetNUDConfigurations sets the per-interface NUD configurations.
  1804  //
  1805  // Note, if c contains invalid NUD configuration values, it will be fixed to
  1806  // use default values for the erroneous values.
  1807  func (s *Stack) SetNUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber, c NUDConfigurations) tcpip.Error {
  1808  	s.mu.RLock()
  1809  	nic, ok := s.nics[id]
  1810  	s.mu.RUnlock()
  1811  
  1812  	if !ok {
  1813  		return &tcpip.ErrUnknownNICID{}
  1814  	}
  1815  
  1816  	return nic.setNUDConfigs(proto, c)
  1817  }
  1818  
  1819  // Seed returns a 32 bit value that can be used as a seed value for port
  1820  // picking, ISN generation etc.
  1821  //
  1822  // NOTE: The seed is generated once during stack initialization only.
  1823  func (s *Stack) Seed() uint32 {
  1824  	return s.seed
  1825  }
  1826  
  1827  // Rand returns a reference to a pseudo random generator that can be used
  1828  // to generate random numbers as required.
  1829  func (s *Stack) Rand() *rand.Rand {
  1830  	return s.randomGenerator
  1831  }
  1832  
  1833  // SecureRNG returns the stack's cryptographically secure random number
  1834  // generator.
  1835  func (s *Stack) SecureRNG() io.Reader {
  1836  	return s.secureRNG
  1837  }
  1838  
  1839  // FindNICNameFromID returns the name of the NIC for the given NICID.
  1840  func (s *Stack) FindNICNameFromID(id tcpip.NICID) string {
  1841  	s.mu.RLock()
  1842  	defer s.mu.RUnlock()
  1843  
  1844  	nic, ok := s.nics[id]
  1845  	if !ok {
  1846  		return ""
  1847  	}
  1848  
  1849  	return nic.Name()
  1850  }
  1851  
  1852  // NewJob returns a new tcpip.Job using the stack's clock.
  1853  func (s *Stack) NewJob(l sync.Locker, f func()) *tcpip.Job {
  1854  	return tcpip.NewJob(s.clock, l, f)
  1855  }
  1856  
  1857  // ParseResult indicates the result of a parsing attempt.
  1858  type ParseResult int
  1859  
  1860  const (
  1861  	// ParsedOK indicates that a packet was successfully parsed.
  1862  	ParsedOK ParseResult = iota
  1863  
  1864  	// UnknownTransportProtocol indicates that the transport protocol is unknown.
  1865  	UnknownTransportProtocol
  1866  
  1867  	// TransportLayerParseError indicates that the transport packet was not
  1868  	// successfully parsed.
  1869  	TransportLayerParseError
  1870  )
  1871  
  1872  // ParsePacketBufferTransport parses the provided packet buffer's transport
  1873  // header.
  1874  func (s *Stack) ParsePacketBufferTransport(protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) ParseResult {
  1875  	// ICMP packets don't have their TransportHeader fields set yet, parse it
  1876  	// here. See icmp/protocol.go:protocol.Parse for a full explanation.
  1877  	if protocol == header.ICMPv4ProtocolNumber || protocol == header.ICMPv6ProtocolNumber {
  1878  		return ParsedOK
  1879  	}
  1880  
  1881  	pkt.TransportProtocolNumber = protocol
  1882  	// Parse the transport header if present.
  1883  	state, ok := s.transportProtocols[protocol]
  1884  	if !ok {
  1885  		return UnknownTransportProtocol
  1886  	}
  1887  
  1888  	if !state.proto.Parse(pkt) {
  1889  		return TransportLayerParseError
  1890  	}
  1891  
  1892  	return ParsedOK
  1893  }
  1894  
  1895  // networkProtocolNumbers returns the network protocol numbers the stack is
  1896  // configured with.
  1897  func (s *Stack) networkProtocolNumbers() []tcpip.NetworkProtocolNumber {
  1898  	protos := make([]tcpip.NetworkProtocolNumber, 0, len(s.networkProtocols))
  1899  	for p := range s.networkProtocols {
  1900  		protos = append(protos, p)
  1901  	}
  1902  	return protos
  1903  }
  1904  
  1905  func isSubnetBroadcastOnNIC(nic *nic, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
  1906  	addressEndpoint := nic.getAddressOrCreateTempInner(protocol, addr, false /* createTemp */, NeverPrimaryEndpoint)
  1907  	if addressEndpoint == nil {
  1908  		return false
  1909  	}
  1910  
  1911  	subnet := addressEndpoint.Subnet()
  1912  	addressEndpoint.DecRef()
  1913  	return subnet.IsBroadcast(addr)
  1914  }
  1915  
  1916  // IsSubnetBroadcast returns true if the provided address is a subnet-local
  1917  // broadcast address on the specified NIC and protocol.
  1918  //
  1919  // Returns false if the NIC is unknown or if the protocol is unknown or does
  1920  // not support addressing.
  1921  //
  1922  // If the NIC is not specified, the stack will check all NICs.
  1923  func (s *Stack) IsSubnetBroadcast(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
  1924  	s.mu.RLock()
  1925  	defer s.mu.RUnlock()
  1926  
  1927  	if nicID != 0 {
  1928  		nic, ok := s.nics[nicID]
  1929  		if !ok {
  1930  			return false
  1931  		}
  1932  
  1933  		return isSubnetBroadcastOnNIC(nic, protocol, addr)
  1934  	}
  1935  
  1936  	for _, nic := range s.nics {
  1937  		if isSubnetBroadcastOnNIC(nic, protocol, addr) {
  1938  			return true
  1939  		}
  1940  	}
  1941  
  1942  	return false
  1943  }