github.com/flowerwrong/netstack@v0.0.0-20191009141956-e5848263af28/tcpip/stack/stack.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package stack provides the glue between networking protocols and the
    16  // consumers of the networking stack.
    17  //
    18  // For consumers, the only function of interest is New(), everything else is
    19  // provided by the tcpip/public package.
    20  package stack
    21  
    22  import (
    23  	"encoding/binary"
    24  	"sync"
    25  	"time"
    26  
    27  	"github.com/FlowerWrong/netstack/rand"
    28  	"github.com/FlowerWrong/netstack/sleep"
    29  	"github.com/FlowerWrong/netstack/tcpip"
    30  	"github.com/FlowerWrong/netstack/tcpip/buffer"
    31  	"github.com/FlowerWrong/netstack/tcpip/header"
    32  	"github.com/FlowerWrong/netstack/tcpip/iptables"
    33  	"github.com/FlowerWrong/netstack/tcpip/ports"
    34  	"github.com/FlowerWrong/netstack/tcpip/seqnum"
    35  	"github.com/FlowerWrong/netstack/waiter"
    36  	"golang.org/x/time/rate"
    37  )
    38  
    39  const (
    40  	// ageLimit is set to the same cache stale time used in Linux.
    41  	ageLimit = 1 * time.Minute
    42  	// resolutionTimeout is set to the same ARP timeout used in Linux.
    43  	resolutionTimeout = 1 * time.Second
    44  	// resolutionAttempts is set to the same ARP retries used in Linux.
    45  	resolutionAttempts = 3
    46  )
    47  
    48  type transportProtocolState struct {
    49  	proto          TransportProtocol
    50  	defaultHandler func(r *Route, id TransportEndpointID, netHeader buffer.View, vv buffer.VectorisedView) bool
    51  }
    52  
    53  // TCPProbeFunc is the expected function type for a TCP probe function to be
    54  // passed to stack.AddTCPProbe.
    55  type TCPProbeFunc func(s TCPEndpointState)
    56  
    57  // TCPCubicState is used to hold a copy of the internal cubic state when the
    58  // TCPProbeFunc is invoked.
    59  type TCPCubicState struct {
    60  	WLastMax                float64
    61  	WMax                    float64
    62  	T                       time.Time
    63  	TimeSinceLastCongestion time.Duration
    64  	C                       float64
    65  	K                       float64
    66  	Beta                    float64
    67  	WC                      float64
    68  	WEst                    float64
    69  }
    70  
    71  // TCPEndpointID is the unique 4 tuple that identifies a given endpoint.
    72  type TCPEndpointID struct {
    73  	// LocalPort is the local port associated with the endpoint.
    74  	LocalPort uint16
    75  
    76  	// LocalAddress is the local [network layer] address associated with
    77  	// the endpoint.
    78  	LocalAddress tcpip.Address
    79  
    80  	// RemotePort is the remote port associated with the endpoint.
    81  	RemotePort uint16
    82  
    83  	// RemoteAddress it the remote [network layer] address associated with
    84  	// the endpoint.
    85  	RemoteAddress tcpip.Address
    86  }
    87  
    88  // TCPFastRecoveryState holds a copy of the internal fast recovery state of a
    89  // TCP endpoint.
    90  type TCPFastRecoveryState struct {
    91  	// Active if true indicates the endpoint is in fast recovery.
    92  	Active bool
    93  
    94  	// First is the first unacknowledged sequence number being recovered.
    95  	First seqnum.Value
    96  
    97  	// Last is the 'recover' sequence number that indicates the point at
    98  	// which we should exit recovery barring any timeouts etc.
    99  	Last seqnum.Value
   100  
   101  	// MaxCwnd is the maximum value we are permitted to grow the congestion
   102  	// window during recovery. This is set at the time we enter recovery.
   103  	MaxCwnd int
   104  
   105  	// HighRxt is the highest sequence number which has been retransmitted
   106  	// during the current loss recovery phase.
   107  	// See: RFC 6675 Section 2 for details.
   108  	HighRxt seqnum.Value
   109  
   110  	// RescueRxt is the highest sequence number which has been
   111  	// optimistically retransmitted to prevent stalling of the ACK clock
   112  	// when there is loss at the end of the window and no new data is
   113  	// available for transmission.
   114  	// See: RFC 6675 Section 2 for details.
   115  	RescueRxt seqnum.Value
   116  }
   117  
   118  // TCPReceiverState holds a copy of the internal state of the receiver for
   119  // a given TCP endpoint.
   120  type TCPReceiverState struct {
   121  	// RcvNxt is the TCP variable RCV.NXT.
   122  	RcvNxt seqnum.Value
   123  
   124  	// RcvAcc is the TCP variable RCV.ACC.
   125  	RcvAcc seqnum.Value
   126  
   127  	// RcvWndScale is the window scaling to use for inbound segments.
   128  	RcvWndScale uint8
   129  
   130  	// PendingBufUsed is the number of bytes pending in the receive
   131  	// queue.
   132  	PendingBufUsed seqnum.Size
   133  
   134  	// PendingBufSize is the size of the socket receive buffer.
   135  	PendingBufSize seqnum.Size
   136  }
   137  
   138  // TCPSenderState holds a copy of the internal state of the sender for
   139  // a given TCP Endpoint.
   140  type TCPSenderState struct {
   141  	// LastSendTime is the time at which we sent the last segment.
   142  	LastSendTime time.Time
   143  
   144  	// DupAckCount is the number of Duplicate ACK's received.
   145  	DupAckCount int
   146  
   147  	// SndCwnd is the size of the sending congestion window in packets.
   148  	SndCwnd int
   149  
   150  	// Ssthresh is the slow start threshold in packets.
   151  	Ssthresh int
   152  
   153  	// SndCAAckCount is the number of packets consumed in congestion
   154  	// avoidance mode.
   155  	SndCAAckCount int
   156  
   157  	// Outstanding is the number of packets in flight.
   158  	Outstanding int
   159  
   160  	// SndWnd is the send window size in bytes.
   161  	SndWnd seqnum.Size
   162  
   163  	// SndUna is the next unacknowledged sequence number.
   164  	SndUna seqnum.Value
   165  
   166  	// SndNxt is the sequence number of the next segment to be sent.
   167  	SndNxt seqnum.Value
   168  
   169  	// RTTMeasureSeqNum is the sequence number being used for the latest RTT
   170  	// measurement.
   171  	RTTMeasureSeqNum seqnum.Value
   172  
   173  	// RTTMeasureTime is the time when the RTTMeasureSeqNum was sent.
   174  	RTTMeasureTime time.Time
   175  
   176  	// Closed indicates that the caller has closed the endpoint for sending.
   177  	Closed bool
   178  
   179  	// SRTT is the smoothed round-trip time as defined in section 2 of
   180  	// RFC 6298.
   181  	SRTT time.Duration
   182  
   183  	// RTO is the retransmit timeout as defined in section of 2 of RFC 6298.
   184  	RTO time.Duration
   185  
   186  	// RTTVar is the round-trip time variation as defined in section 2 of
   187  	// RFC 6298.
   188  	RTTVar time.Duration
   189  
   190  	// SRTTInited if true indicates take a valid RTT measurement has been
   191  	// completed.
   192  	SRTTInited bool
   193  
   194  	// MaxPayloadSize is the maximum size of the payload of a given segment.
   195  	// It is initialized on demand.
   196  	MaxPayloadSize int
   197  
   198  	// SndWndScale is the number of bits to shift left when reading the send
   199  	// window size from a segment.
   200  	SndWndScale uint8
   201  
   202  	// MaxSentAck is the highest acknowledgement number sent till now.
   203  	MaxSentAck seqnum.Value
   204  
   205  	// FastRecovery holds the fast recovery state for the endpoint.
   206  	FastRecovery TCPFastRecoveryState
   207  
   208  	// Cubic holds the state related to CUBIC congestion control.
   209  	Cubic TCPCubicState
   210  }
   211  
   212  // TCPSACKInfo holds TCP SACK related information for a given TCP endpoint.
   213  type TCPSACKInfo struct {
   214  	// Blocks is the list of SACK Blocks that identify the out of order segments
   215  	// held by a given TCP endpoint.
   216  	Blocks []header.SACKBlock
   217  
   218  	// ReceivedBlocks are the SACK blocks received by this endpoint
   219  	// from the peer endpoint.
   220  	ReceivedBlocks []header.SACKBlock
   221  
   222  	// MaxSACKED is the highest sequence number that has been SACKED
   223  	// by the peer.
   224  	MaxSACKED seqnum.Value
   225  }
   226  
   227  // RcvBufAutoTuneParams holds state related to TCP receive buffer auto-tuning.
   228  type RcvBufAutoTuneParams struct {
   229  	// MeasureTime is the time at which the current measurement
   230  	// was started.
   231  	MeasureTime time.Time
   232  
   233  	// CopiedBytes is the number of bytes copied to user space since
   234  	// this measure began.
   235  	CopiedBytes int
   236  
   237  	// PrevCopiedBytes is the number of bytes copied to user space in
   238  	// the previous RTT period.
   239  	PrevCopiedBytes int
   240  
   241  	// RcvBufSize is the auto tuned receive buffer size.
   242  	RcvBufSize int
   243  
   244  	// RTT is the smoothed RTT as measured by observing the time between
   245  	// when a byte is first acknowledged and the receipt of data that is at
   246  	// least one window beyond the sequence number that was acknowledged.
   247  	RTT time.Duration
   248  
   249  	// RTTVar is the "round-trip time variation" as defined in section 2
   250  	// of RFC6298.
   251  	RTTVar time.Duration
   252  
   253  	// RTTMeasureSeqNumber is the highest acceptable sequence number at the
   254  	// time this RTT measurement period began.
   255  	RTTMeasureSeqNumber seqnum.Value
   256  
   257  	// RTTMeasureTime is the absolute time at which the current RTT
   258  	// measurement period began.
   259  	RTTMeasureTime time.Time
   260  
   261  	// Disabled is true if an explicit receive buffer is set for the
   262  	// endpoint.
   263  	Disabled bool
   264  }
   265  
   266  // TCPEndpointState is a copy of the internal state of a TCP endpoint.
   267  type TCPEndpointState struct {
   268  	// ID is a copy of the TransportEndpointID for the endpoint.
   269  	ID TCPEndpointID
   270  
   271  	// SegTime denotes the absolute time when this segment was received.
   272  	SegTime time.Time
   273  
   274  	// RcvBufSize is the size of the receive socket buffer for the endpoint.
   275  	RcvBufSize int
   276  
   277  	// RcvBufUsed is the amount of bytes actually held in the receive socket
   278  	// buffer for the endpoint.
   279  	RcvBufUsed int
   280  
   281  	// RcvBufAutoTuneParams is used to hold state variables to compute
   282  	// the auto tuned receive buffer size.
   283  	RcvAutoParams RcvBufAutoTuneParams
   284  
   285  	// RcvClosed if true, indicates the endpoint has been closed for reading.
   286  	RcvClosed bool
   287  
   288  	// SendTSOk is used to indicate when the TS Option has been negotiated.
   289  	// When sendTSOk is true every non-RST segment should carry a TS as per
   290  	// RFC7323#section-1.1.
   291  	SendTSOk bool
   292  
   293  	// RecentTS is the timestamp that should be sent in the TSEcr field of
   294  	// the timestamp for future segments sent by the endpoint. This field is
   295  	// updated if required when a new segment is received by this endpoint.
   296  	RecentTS uint32
   297  
   298  	// TSOffset is a randomized offset added to the value of the TSVal field
   299  	// in the timestamp option.
   300  	TSOffset uint32
   301  
   302  	// SACKPermitted is set to true if the peer sends the TCPSACKPermitted
   303  	// option in the SYN/SYN-ACK.
   304  	SACKPermitted bool
   305  
   306  	// SACK holds TCP SACK related information for this endpoint.
   307  	SACK TCPSACKInfo
   308  
   309  	// SndBufSize is the size of the socket send buffer.
   310  	SndBufSize int
   311  
   312  	// SndBufUsed is the number of bytes held in the socket send buffer.
   313  	SndBufUsed int
   314  
   315  	// SndClosed indicates that the endpoint has been closed for sends.
   316  	SndClosed bool
   317  
   318  	// SndBufInQueue is the number of bytes in the send queue.
   319  	SndBufInQueue seqnum.Size
   320  
   321  	// PacketTooBigCount is used to notify the main protocol routine how
   322  	// many times a "packet too big" control packet is received.
   323  	PacketTooBigCount int
   324  
   325  	// SndMTU is the smallest MTU seen in the control packets received.
   326  	SndMTU int
   327  
   328  	// Receiver holds variables related to the TCP receiver for the endpoint.
   329  	Receiver TCPReceiverState
   330  
   331  	// Sender holds state related to the TCP Sender for the endpoint.
   332  	Sender TCPSenderState
   333  }
   334  
   335  // ResumableEndpoint is an endpoint that needs to be resumed after restore.
   336  type ResumableEndpoint interface {
   337  	// Resume resumes an endpoint after restore. This can be used to restart
   338  	// background workers such as protocol goroutines. This must be called after
   339  	// all indirect dependencies of the endpoint has been restored, which
   340  	// generally implies at the end of the restore process.
   341  	Resume(*Stack)
   342  }
   343  
   344  // Stack is a networking stack, with all supported protocols, NICs, and route
   345  // table.
   346  type Stack struct {
   347  	transportProtocols map[tcpip.TransportProtocolNumber]*transportProtocolState
   348  	networkProtocols   map[tcpip.NetworkProtocolNumber]NetworkProtocol
   349  	linkAddrResolvers  map[tcpip.NetworkProtocolNumber]LinkAddressResolver
   350  
   351  	// unassociatedFactory creates unassociated endpoints. If nil, raw
   352  	// endpoints are disabled. It is set during Stack creation and is
   353  	// immutable.
   354  	unassociatedFactory UnassociatedEndpointFactory
   355  
   356  	demux *transportDemuxer
   357  
   358  	stats tcpip.Stats
   359  
   360  	linkAddrCache *linkAddrCache
   361  
   362  	mu         sync.RWMutex
   363  	nics       map[tcpip.NICID]*NIC
   364  	forwarding bool
   365  
   366  	// route is the route table passed in by the user via SetRouteTable(),
   367  	// it is used by FindRoute() to build a route for a specific
   368  	// destination.
   369  	routeTable []tcpip.Route
   370  
   371  	*ports.PortManager
   372  
   373  	// If not nil, then any new endpoints will have this probe function
   374  	// invoked everytime they receive a TCP segment.
   375  	tcpProbeFunc TCPProbeFunc
   376  
   377  	// clock is used to generate user-visible times.
   378  	clock tcpip.Clock
   379  
   380  	// handleLocal allows non-loopback interfaces to loop packets.
   381  	handleLocal bool
   382  
   383  	// tables are the iptables packet filtering and manipulation rules.
   384  	tables iptables.IPTables
   385  
   386  	// resumableEndpoints is a list of endpoints that need to be resumed if the
   387  	// stack is being restored.
   388  	resumableEndpoints []ResumableEndpoint
   389  
   390  	// icmpRateLimiter is a global rate limiter for all ICMP messages generated
   391  	// by the stack.
   392  	icmpRateLimiter *ICMPRateLimiter
   393  
   394  	// portSeed is a one-time random value initialized at stack startup
   395  	// and is used to seed the TCP port picking on active connections
   396  	//
   397  	// TODO(gvisor.dev/issues/940): S/R this field.
   398  	portSeed uint32
   399  }
   400  
   401  // Options contains optional Stack configuration.
   402  type Options struct {
   403  	// NetworkProtocols lists the network protocols to enable.
   404  	NetworkProtocols []NetworkProtocol
   405  
   406  	// TransportProtocols lists the transport protocols to enable.
   407  	TransportProtocols []TransportProtocol
   408  
   409  	// Clock is an optional clock source used for timestampping packets.
   410  	//
   411  	// If no Clock is specified, the clock source will be time.Now.
   412  	Clock tcpip.Clock
   413  
   414  	// Stats are optional statistic counters.
   415  	Stats tcpip.Stats
   416  
   417  	// HandleLocal indicates whether packets destined to their source
   418  	// should be handled by the stack internally (true) or outside the
   419  	// stack (false).
   420  	HandleLocal bool
   421  
   422  	// UnassociatedFactory produces unassociated endpoints raw endpoints.
   423  	// Raw endpoints are enabled only if this is non-nil.
   424  	UnassociatedFactory UnassociatedEndpointFactory
   425  }
   426  
   427  // New allocates a new networking stack with only the requested networking and
   428  // transport protocols configured with default options.
   429  //
   430  // Protocol options can be changed by calling the
   431  // SetNetworkProtocolOption/SetTransportProtocolOption methods provided by the
   432  // stack. Please refer to individual protocol implementations as to what options
   433  // are supported.
   434  func New(opts Options) *Stack {
   435  	clock := opts.Clock
   436  	if clock == nil {
   437  		clock = &tcpip.StdClock{}
   438  	}
   439  
   440  	s := &Stack{
   441  		transportProtocols: make(map[tcpip.TransportProtocolNumber]*transportProtocolState),
   442  		networkProtocols:   make(map[tcpip.NetworkProtocolNumber]NetworkProtocol),
   443  		linkAddrResolvers:  make(map[tcpip.NetworkProtocolNumber]LinkAddressResolver),
   444  		nics:               make(map[tcpip.NICID]*NIC),
   445  		linkAddrCache:      newLinkAddrCache(ageLimit, resolutionTimeout, resolutionAttempts),
   446  		PortManager:        ports.NewPortManager(),
   447  		clock:              clock,
   448  		stats:              opts.Stats.FillIn(),
   449  		handleLocal:        opts.HandleLocal,
   450  		icmpRateLimiter:    NewICMPRateLimiter(),
   451  		portSeed:           generateRandUint32(),
   452  	}
   453  
   454  	// Add specified network protocols.
   455  	for _, netProto := range opts.NetworkProtocols {
   456  		s.networkProtocols[netProto.Number()] = netProto
   457  		if r, ok := netProto.(LinkAddressResolver); ok {
   458  			s.linkAddrResolvers[r.LinkAddressProtocol()] = r
   459  		}
   460  	}
   461  
   462  	// Add specified transport protocols.
   463  	for _, transProto := range opts.TransportProtocols {
   464  		s.transportProtocols[transProto.Number()] = &transportProtocolState{
   465  			proto: transProto,
   466  		}
   467  	}
   468  
   469  	// Add the factory for unassociated endpoints, if present.
   470  	s.unassociatedFactory = opts.UnassociatedFactory
   471  
   472  	// Create the global transport demuxer.
   473  	s.demux = newTransportDemuxer(s)
   474  
   475  	return s
   476  }
   477  
   478  // SetNetworkProtocolOption allows configuring individual protocol level
   479  // options. This method returns an error if the protocol is not supported or
   480  // option is not supported by the protocol implementation or the provided value
   481  // is incorrect.
   482  func (s *Stack) SetNetworkProtocolOption(network tcpip.NetworkProtocolNumber, option interface{}) *tcpip.Error {
   483  	netProto, ok := s.networkProtocols[network]
   484  	if !ok {
   485  		return tcpip.ErrUnknownProtocol
   486  	}
   487  	return netProto.SetOption(option)
   488  }
   489  
   490  // NetworkProtocolOption allows retrieving individual protocol level option
   491  // values. This method returns an error if the protocol is not supported or
   492  // option is not supported by the protocol implementation.
   493  // e.g.
   494  // var v ipv4.MyOption
   495  // err := s.NetworkProtocolOption(tcpip.IPv4ProtocolNumber, &v)
   496  // if err != nil {
   497  //   ...
   498  // }
   499  func (s *Stack) NetworkProtocolOption(network tcpip.NetworkProtocolNumber, option interface{}) *tcpip.Error {
   500  	netProto, ok := s.networkProtocols[network]
   501  	if !ok {
   502  		return tcpip.ErrUnknownProtocol
   503  	}
   504  	return netProto.Option(option)
   505  }
   506  
   507  // SetTransportProtocolOption allows configuring individual protocol level
   508  // options. This method returns an error if the protocol is not supported or
   509  // option is not supported by the protocol implementation or the provided value
   510  // is incorrect.
   511  func (s *Stack) SetTransportProtocolOption(transport tcpip.TransportProtocolNumber, option interface{}) *tcpip.Error {
   512  	transProtoState, ok := s.transportProtocols[transport]
   513  	if !ok {
   514  		return tcpip.ErrUnknownProtocol
   515  	}
   516  	return transProtoState.proto.SetOption(option)
   517  }
   518  
   519  // TransportProtocolOption allows retrieving individual protocol level option
   520  // values. This method returns an error if the protocol is not supported or
   521  // option is not supported by the protocol implementation.
   522  // var v tcp.SACKEnabled
   523  // if err := s.TransportProtocolOption(tcpip.TCPProtocolNumber, &v); err != nil {
   524  //   ...
   525  // }
   526  func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber, option interface{}) *tcpip.Error {
   527  	transProtoState, ok := s.transportProtocols[transport]
   528  	if !ok {
   529  		return tcpip.ErrUnknownProtocol
   530  	}
   531  	return transProtoState.proto.Option(option)
   532  }
   533  
   534  // SetTransportProtocolHandler sets the per-stack default handler for the given
   535  // protocol.
   536  //
   537  // It must be called only during initialization of the stack. Changing it as the
   538  // stack is operating is not supported.
   539  func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h func(*Route, TransportEndpointID, buffer.View, buffer.VectorisedView) bool) {
   540  	state := s.transportProtocols[p]
   541  	if state != nil {
   542  		state.defaultHandler = h
   543  	}
   544  }
   545  
   546  // NowNanoseconds implements tcpip.Clock.NowNanoseconds.
   547  func (s *Stack) NowNanoseconds() int64 {
   548  	return s.clock.NowNanoseconds()
   549  }
   550  
   551  // Stats returns a mutable copy of the current stats.
   552  //
   553  // This is not generally exported via the public interface, but is available
   554  // internally.
   555  func (s *Stack) Stats() tcpip.Stats {
   556  	return s.stats
   557  }
   558  
   559  // SetForwarding enables or disables the packet forwarding between NICs.
   560  func (s *Stack) SetForwarding(enable bool) {
   561  	// TODO(igudger, bgeffon): Expose via /proc/sys/net/ipv4/ip_forward.
   562  	s.mu.Lock()
   563  	s.forwarding = enable
   564  	s.mu.Unlock()
   565  }
   566  
   567  // Forwarding returns if the packet forwarding between NICs is enabled.
   568  func (s *Stack) Forwarding() bool {
   569  	// TODO(igudger, bgeffon): Expose via /proc/sys/net/ipv4/ip_forward.
   570  	s.mu.RLock()
   571  	defer s.mu.RUnlock()
   572  	return s.forwarding
   573  }
   574  
   575  // SetRouteTable assigns the route table to be used by this stack. It
   576  // specifies which NIC to use for given destination address ranges.
   577  func (s *Stack) SetRouteTable(table []tcpip.Route) {
   578  	s.mu.Lock()
   579  	defer s.mu.Unlock()
   580  
   581  	s.routeTable = table
   582  }
   583  
   584  // GetRouteTable returns the route table which is currently in use.
   585  func (s *Stack) GetRouteTable() []tcpip.Route {
   586  	s.mu.Lock()
   587  	defer s.mu.Unlock()
   588  	return append([]tcpip.Route(nil), s.routeTable...)
   589  }
   590  
   591  // NewEndpoint creates a new transport layer endpoint of the given protocol.
   592  func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
   593  	t, ok := s.transportProtocols[transport]
   594  	if !ok {
   595  		return nil, tcpip.ErrUnknownProtocol
   596  	}
   597  
   598  	return t.proto.NewEndpoint(s, network, waiterQueue)
   599  }
   600  
   601  // NewRawEndpoint creates a new raw transport layer endpoint of the given
   602  // protocol. Raw endpoints receive all traffic for a given protocol regardless
   603  // of address.
   604  func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, *tcpip.Error) {
   605  	if s.unassociatedFactory == nil {
   606  		return nil, tcpip.ErrNotPermitted
   607  	}
   608  
   609  	if !associated {
   610  		return s.unassociatedFactory.NewUnassociatedRawEndpoint(s, network, transport, waiterQueue)
   611  	}
   612  
   613  	t, ok := s.transportProtocols[transport]
   614  	if !ok {
   615  		return nil, tcpip.ErrUnknownProtocol
   616  	}
   617  
   618  	return t.proto.NewRawEndpoint(s, network, waiterQueue)
   619  }
   620  
   621  // createNIC creates a NIC with the provided id and link-layer endpoint, and
   622  // optionally enable it.
   623  func (s *Stack) createNIC(id tcpip.NICID, name string, ep LinkEndpoint, enabled, loopback bool) *tcpip.Error {
   624  	s.mu.Lock()
   625  	defer s.mu.Unlock()
   626  
   627  	// Make sure id is unique.
   628  	if _, ok := s.nics[id]; ok {
   629  		return tcpip.ErrDuplicateNICID
   630  	}
   631  
   632  	n := newNIC(s, id, name, ep, loopback)
   633  
   634  	s.nics[id] = n
   635  	if enabled {
   636  		return n.enable()
   637  	}
   638  
   639  	return nil
   640  }
   641  
   642  // CreateNIC creates a NIC with the provided id and link-layer endpoint.
   643  func (s *Stack) CreateNIC(id tcpip.NICID, ep LinkEndpoint) *tcpip.Error {
   644  	return s.createNIC(id, "", ep, true, false)
   645  }
   646  
   647  // CreateNamedNIC creates a NIC with the provided id and link-layer endpoint,
   648  // and a human-readable name.
   649  func (s *Stack) CreateNamedNIC(id tcpip.NICID, name string, ep LinkEndpoint) *tcpip.Error {
   650  	return s.createNIC(id, name, ep, true, false)
   651  }
   652  
   653  // CreateNamedLoopbackNIC creates a NIC with the provided id and link-layer
   654  // endpoint, and a human-readable name.
   655  func (s *Stack) CreateNamedLoopbackNIC(id tcpip.NICID, name string, ep LinkEndpoint) *tcpip.Error {
   656  	return s.createNIC(id, name, ep, true, true)
   657  }
   658  
   659  // CreateDisabledNIC creates a NIC with the provided id and link-layer endpoint,
   660  // but leave it disable. Stack.EnableNIC must be called before the link-layer
   661  // endpoint starts delivering packets to it.
   662  func (s *Stack) CreateDisabledNIC(id tcpip.NICID, ep LinkEndpoint) *tcpip.Error {
   663  	return s.createNIC(id, "", ep, false, false)
   664  }
   665  
   666  // CreateDisabledNamedNIC is a combination of CreateNamedNIC and
   667  // CreateDisabledNIC.
   668  func (s *Stack) CreateDisabledNamedNIC(id tcpip.NICID, name string, ep LinkEndpoint) *tcpip.Error {
   669  	return s.createNIC(id, name, ep, false, false)
   670  }
   671  
   672  // EnableNIC enables the given NIC so that the link-layer endpoint can start
   673  // delivering packets to it.
   674  func (s *Stack) EnableNIC(id tcpip.NICID) *tcpip.Error {
   675  	s.mu.RLock()
   676  	defer s.mu.RUnlock()
   677  
   678  	nic := s.nics[id]
   679  	if nic == nil {
   680  		return tcpip.ErrUnknownNICID
   681  	}
   682  
   683  	return nic.enable()
   684  }
   685  
   686  // CheckNIC checks if a NIC is usable.
   687  func (s *Stack) CheckNIC(id tcpip.NICID) bool {
   688  	s.mu.RLock()
   689  	nic, ok := s.nics[id]
   690  	s.mu.RUnlock()
   691  	if ok {
   692  		return nic.linkEP.IsAttached()
   693  	}
   694  	return false
   695  }
   696  
   697  // NICSubnets returns a map of NICIDs to their associated subnets.
   698  func (s *Stack) NICAddressRanges() map[tcpip.NICID][]tcpip.Subnet {
   699  	s.mu.RLock()
   700  	defer s.mu.RUnlock()
   701  
   702  	nics := map[tcpip.NICID][]tcpip.Subnet{}
   703  
   704  	for id, nic := range s.nics {
   705  		nics[id] = append(nics[id], nic.AddressRanges()...)
   706  	}
   707  	return nics
   708  }
   709  
   710  // NICInfo captures the name and addresses assigned to a NIC.
   711  type NICInfo struct {
   712  	Name              string
   713  	LinkAddress       tcpip.LinkAddress
   714  	ProtocolAddresses []tcpip.ProtocolAddress
   715  
   716  	// Flags indicate the state of the NIC.
   717  	Flags NICStateFlags
   718  
   719  	// MTU is the maximum transmission unit.
   720  	MTU uint32
   721  
   722  	Stats NICStats
   723  }
   724  
   725  // NICInfo returns a map of NICIDs to their associated information.
   726  func (s *Stack) NICInfo() map[tcpip.NICID]NICInfo {
   727  	s.mu.RLock()
   728  	defer s.mu.RUnlock()
   729  
   730  	nics := make(map[tcpip.NICID]NICInfo)
   731  	for id, nic := range s.nics {
   732  		flags := NICStateFlags{
   733  			Up:          true, // Netstack interfaces are always up.
   734  			Running:     nic.linkEP.IsAttached(),
   735  			Promiscuous: nic.isPromiscuousMode(),
   736  			Loopback:    nic.linkEP.Capabilities()&CapabilityLoopback != 0,
   737  		}
   738  		nics[id] = NICInfo{
   739  			Name:              nic.name,
   740  			LinkAddress:       nic.linkEP.LinkAddress(),
   741  			ProtocolAddresses: nic.PrimaryAddresses(),
   742  			Flags:             flags,
   743  			MTU:               nic.linkEP.MTU(),
   744  			Stats:             nic.stats,
   745  		}
   746  	}
   747  	return nics
   748  }
   749  
   750  // NICStateFlags holds information about the state of an NIC.
   751  type NICStateFlags struct {
   752  	// Up indicates whether the interface is running.
   753  	Up bool
   754  
   755  	// Running indicates whether resources are allocated.
   756  	Running bool
   757  
   758  	// Promiscuous indicates whether the interface is in promiscuous mode.
   759  	Promiscuous bool
   760  
   761  	// Loopback indicates whether the interface is a loopback.
   762  	Loopback bool
   763  }
   764  
   765  // AddAddress adds a new network-layer address to the specified NIC.
   766  func (s *Stack) AddAddress(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error {
   767  	return s.AddAddressWithOptions(id, protocol, addr, CanBePrimaryEndpoint)
   768  }
   769  
   770  // AddProtocolAddress adds a new network-layer protocol address to the
   771  // specified NIC.
   772  func (s *Stack) AddProtocolAddress(id tcpip.NICID, protocolAddress tcpip.ProtocolAddress) *tcpip.Error {
   773  	return s.AddProtocolAddressWithOptions(id, protocolAddress, CanBePrimaryEndpoint)
   774  }
   775  
   776  // AddAddressWithOptions is the same as AddAddress, but allows you to specify
   777  // whether the new endpoint can be primary or not.
   778  func (s *Stack) AddAddressWithOptions(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, peb PrimaryEndpointBehavior) *tcpip.Error {
   779  	netProto, ok := s.networkProtocols[protocol]
   780  	if !ok {
   781  		return tcpip.ErrUnknownProtocol
   782  	}
   783  	return s.AddProtocolAddressWithOptions(id, tcpip.ProtocolAddress{
   784  		Protocol: protocol,
   785  		AddressWithPrefix: tcpip.AddressWithPrefix{
   786  			Address:   addr,
   787  			PrefixLen: netProto.DefaultPrefixLen(),
   788  		},
   789  	}, peb)
   790  }
   791  
   792  // AddProtocolAddressWithOptions is the same as AddProtocolAddress, but allows
   793  // you to specify whether the new endpoint can be primary or not.
   794  func (s *Stack) AddProtocolAddressWithOptions(id tcpip.NICID, protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) *tcpip.Error {
   795  	s.mu.RLock()
   796  	defer s.mu.RUnlock()
   797  
   798  	nic := s.nics[id]
   799  	if nic == nil {
   800  		return tcpip.ErrUnknownNICID
   801  	}
   802  
   803  	return nic.AddAddress(protocolAddress, peb)
   804  }
   805  
   806  // AddAddressRange adds a range of addresses to the specified NIC. The range is
   807  // given by a subnet address, and all addresses contained in the subnet are
   808  // used except for the subnet address itself and the subnet's broadcast
   809  // address.
   810  func (s *Stack) AddAddressRange(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, subnet tcpip.Subnet) *tcpip.Error {
   811  	s.mu.RLock()
   812  	defer s.mu.RUnlock()
   813  
   814  	if nic, ok := s.nics[id]; ok {
   815  		nic.AddAddressRange(protocol, subnet)
   816  		return nil
   817  	}
   818  
   819  	return tcpip.ErrUnknownNICID
   820  }
   821  
   822  // RemoveAddressRange removes the range of addresses from the specified NIC.
   823  func (s *Stack) RemoveAddressRange(id tcpip.NICID, subnet tcpip.Subnet) *tcpip.Error {
   824  	s.mu.RLock()
   825  	defer s.mu.RUnlock()
   826  
   827  	if nic, ok := s.nics[id]; ok {
   828  		nic.RemoveAddressRange(subnet)
   829  		return nil
   830  	}
   831  
   832  	return tcpip.ErrUnknownNICID
   833  }
   834  
   835  // RemoveAddress removes an existing network-layer address from the specified
   836  // NIC.
   837  func (s *Stack) RemoveAddress(id tcpip.NICID, addr tcpip.Address) *tcpip.Error {
   838  	s.mu.RLock()
   839  	defer s.mu.RUnlock()
   840  
   841  	if nic, ok := s.nics[id]; ok {
   842  		return nic.RemoveAddress(addr)
   843  	}
   844  
   845  	return tcpip.ErrUnknownNICID
   846  }
   847  
   848  // AllAddresses returns a map of NICIDs to their protocol addresses (primary
   849  // and non-primary).
   850  func (s *Stack) AllAddresses() map[tcpip.NICID][]tcpip.ProtocolAddress {
   851  	s.mu.RLock()
   852  	defer s.mu.RUnlock()
   853  
   854  	nics := make(map[tcpip.NICID][]tcpip.ProtocolAddress)
   855  	for id, nic := range s.nics {
   856  		nics[id] = nic.AllAddresses()
   857  	}
   858  	return nics
   859  }
   860  
   861  // GetMainNICAddress returns the first primary address and prefix for the given
   862  // NIC and protocol. Returns an error if the NIC doesn't exist and an empty
   863  // value if the NIC doesn't have a primary address for the given protocol.
   864  func (s *Stack) GetMainNICAddress(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, *tcpip.Error) {
   865  	s.mu.RLock()
   866  	defer s.mu.RUnlock()
   867  
   868  	nic, ok := s.nics[id]
   869  	if !ok {
   870  		return tcpip.AddressWithPrefix{}, tcpip.ErrUnknownNICID
   871  	}
   872  
   873  	for _, a := range nic.PrimaryAddresses() {
   874  		if a.Protocol == protocol {
   875  			return a.AddressWithPrefix, nil
   876  		}
   877  	}
   878  	return tcpip.AddressWithPrefix{}, nil
   879  }
   880  
   881  func (s *Stack) getRefEP(nic *NIC, localAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) (ref *referencedNetworkEndpoint) {
   882  	if len(localAddr) == 0 {
   883  		return nic.primaryEndpoint(netProto)
   884  	}
   885  	return nic.findEndpoint(netProto, localAddr, CanBePrimaryEndpoint)
   886  }
   887  
   888  // FindRoute creates a route to the given destination address, leaving through
   889  // the given nic and local address (if provided).
   890  func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) (Route, *tcpip.Error) {
   891  	s.mu.RLock()
   892  	defer s.mu.RUnlock()
   893  
   894  	isBroadcast := remoteAddr == header.IPv4Broadcast
   895  	isMulticast := header.IsV4MulticastAddress(remoteAddr) || header.IsV6MulticastAddress(remoteAddr)
   896  	needRoute := !(isBroadcast || isMulticast || header.IsV6LinkLocalAddress(remoteAddr))
   897  	if id != 0 && !needRoute {
   898  		if nic, ok := s.nics[id]; ok {
   899  			if ref := s.getRefEP(nic, localAddr, netProto); ref != nil {
   900  				return makeRoute(netProto, ref.ep.ID().LocalAddress, remoteAddr, nic.linkEP.LinkAddress(), ref, s.handleLocal && !nic.loopback, multicastLoop && !nic.loopback), nil
   901  			}
   902  		}
   903  	} else {
   904  		for _, route := range s.routeTable {
   905  			if (id != 0 && id != route.NIC) || (len(remoteAddr) != 0 && !route.Destination.Contains(remoteAddr)) {
   906  				continue
   907  			}
   908  			if nic, ok := s.nics[route.NIC]; ok {
   909  				if ref := s.getRefEP(nic, localAddr, netProto); ref != nil {
   910  					if len(remoteAddr) == 0 {
   911  						// If no remote address was provided, then the route
   912  						// provided will refer to the link local address.
   913  						remoteAddr = ref.ep.ID().LocalAddress
   914  					}
   915  
   916  					r := makeRoute(netProto, ref.ep.ID().LocalAddress, remoteAddr, nic.linkEP.LinkAddress(), ref, s.handleLocal && !nic.loopback, multicastLoop && !nic.loopback)
   917  					if needRoute {
   918  						r.NextHop = route.Gateway
   919  					}
   920  					return r, nil
   921  				}
   922  			}
   923  		}
   924  	}
   925  
   926  	if !needRoute {
   927  		return Route{}, tcpip.ErrNetworkUnreachable
   928  	}
   929  
   930  	return Route{}, tcpip.ErrNoRoute
   931  }
   932  
   933  // CheckNetworkProtocol checks if a given network protocol is enabled in the
   934  // stack.
   935  func (s *Stack) CheckNetworkProtocol(protocol tcpip.NetworkProtocolNumber) bool {
   936  	_, ok := s.networkProtocols[protocol]
   937  	return ok
   938  }
   939  
   940  // CheckLocalAddress determines if the given local address exists, and if it
   941  // does, returns the id of the NIC it's bound to. Returns 0 if the address
   942  // does not exist.
   943  func (s *Stack) CheckLocalAddress(nicid tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.NICID {
   944  	s.mu.RLock()
   945  	defer s.mu.RUnlock()
   946  
   947  	// If a NIC is specified, we try to find the address there only.
   948  	if nicid != 0 {
   949  		nic := s.nics[nicid]
   950  		if nic == nil {
   951  			return 0
   952  		}
   953  
   954  		ref := nic.findEndpoint(protocol, addr, CanBePrimaryEndpoint)
   955  		if ref == nil {
   956  			return 0
   957  		}
   958  
   959  		ref.decRef()
   960  
   961  		return nic.id
   962  	}
   963  
   964  	// Go through all the NICs.
   965  	for _, nic := range s.nics {
   966  		ref := nic.findEndpoint(protocol, addr, CanBePrimaryEndpoint)
   967  		if ref != nil {
   968  			ref.decRef()
   969  			return nic.id
   970  		}
   971  	}
   972  
   973  	return 0
   974  }
   975  
   976  // SetPromiscuousMode enables or disables promiscuous mode in the given NIC.
   977  func (s *Stack) SetPromiscuousMode(nicID tcpip.NICID, enable bool) *tcpip.Error {
   978  	s.mu.RLock()
   979  	defer s.mu.RUnlock()
   980  
   981  	nic := s.nics[nicID]
   982  	if nic == nil {
   983  		return tcpip.ErrUnknownNICID
   984  	}
   985  
   986  	nic.setPromiscuousMode(enable)
   987  
   988  	return nil
   989  }
   990  
   991  // SetSpoofing enables or disables address spoofing in the given NIC, allowing
   992  // endpoints to bind to any address in the NIC.
   993  func (s *Stack) SetSpoofing(nicID tcpip.NICID, enable bool) *tcpip.Error {
   994  	s.mu.RLock()
   995  	defer s.mu.RUnlock()
   996  
   997  	nic := s.nics[nicID]
   998  	if nic == nil {
   999  		return tcpip.ErrUnknownNICID
  1000  	}
  1001  
  1002  	nic.setSpoofing(enable)
  1003  
  1004  	return nil
  1005  }
  1006  
  1007  // AddLinkAddress adds a link address to the stack link cache.
  1008  func (s *Stack) AddLinkAddress(nicid tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress) {
  1009  	fullAddr := tcpip.FullAddress{NIC: nicid, Addr: addr}
  1010  	s.linkAddrCache.add(fullAddr, linkAddr)
  1011  	// TODO: provide a way for a transport endpoint to receive a signal
  1012  	// that AddLinkAddress for a particular address has been called.
  1013  }
  1014  
  1015  // GetLinkAddress implements LinkAddressCache.GetLinkAddress.
  1016  func (s *Stack) GetLinkAddress(nicid tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, waker *sleep.Waker) (tcpip.LinkAddress, <-chan struct{}, *tcpip.Error) {
  1017  	s.mu.RLock()
  1018  	nic := s.nics[nicid]
  1019  	if nic == nil {
  1020  		s.mu.RUnlock()
  1021  		return "", nil, tcpip.ErrUnknownNICID
  1022  	}
  1023  	s.mu.RUnlock()
  1024  
  1025  	fullAddr := tcpip.FullAddress{NIC: nicid, Addr: addr}
  1026  	linkRes := s.linkAddrResolvers[protocol]
  1027  	return s.linkAddrCache.get(fullAddr, linkRes, localAddr, nic.linkEP, waker)
  1028  }
  1029  
  1030  // RemoveWaker implements LinkAddressCache.RemoveWaker.
  1031  func (s *Stack) RemoveWaker(nicid tcpip.NICID, addr tcpip.Address, waker *sleep.Waker) {
  1032  	s.mu.RLock()
  1033  	defer s.mu.RUnlock()
  1034  
  1035  	if nic := s.nics[nicid]; nic == nil {
  1036  		fullAddr := tcpip.FullAddress{NIC: nicid, Addr: addr}
  1037  		s.linkAddrCache.removeWaker(fullAddr, waker)
  1038  	}
  1039  }
  1040  
  1041  // RegisterTransportEndpoint registers the given endpoint with the stack
  1042  // transport dispatcher. Received packets that match the provided id will be
  1043  // delivered to the given endpoint; specifying a nic is optional, but
  1044  // nic-specific IDs have precedence over global ones.
  1045  func (s *Stack) RegisterTransportEndpoint(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, reusePort bool, bindToDevice tcpip.NICID) *tcpip.Error {
  1046  	return s.demux.registerEndpoint(netProtos, protocol, id, ep, reusePort, bindToDevice)
  1047  }
  1048  
  1049  // UnregisterTransportEndpoint removes the endpoint with the given id from the
  1050  // stack transport dispatcher.
  1051  func (s *Stack) UnregisterTransportEndpoint(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, bindToDevice tcpip.NICID) {
  1052  	s.demux.unregisterEndpoint(netProtos, protocol, id, ep, bindToDevice)
  1053  }
  1054  
  1055  // RegisterRawTransportEndpoint registers the given endpoint with the stack
  1056  // transport dispatcher. Received packets that match the provided transport
  1057  // protocol will be delivered to the given endpoint.
  1058  func (s *Stack) RegisterRawTransportEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) *tcpip.Error {
  1059  	return s.demux.registerRawEndpoint(netProto, transProto, ep)
  1060  }
  1061  
  1062  // UnregisterRawTransportEndpoint removes the endpoint for the transport
  1063  // protocol from the stack transport dispatcher.
  1064  func (s *Stack) UnregisterRawTransportEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) {
  1065  	s.demux.unregisterRawEndpoint(netProto, transProto, ep)
  1066  }
  1067  
  1068  // RegisterRestoredEndpoint records e as an endpoint that has been restored on
  1069  // this stack.
  1070  func (s *Stack) RegisterRestoredEndpoint(e ResumableEndpoint) {
  1071  	s.mu.Lock()
  1072  	s.resumableEndpoints = append(s.resumableEndpoints, e)
  1073  	s.mu.Unlock()
  1074  }
  1075  
  1076  // Resume restarts the stack after a restore. This must be called after the
  1077  // entire system has been restored.
  1078  func (s *Stack) Resume() {
  1079  	// ResumableEndpoint.Resume() may call other methods on s, so we can't hold
  1080  	// s.mu while resuming the endpoints.
  1081  	s.mu.Lock()
  1082  	eps := s.resumableEndpoints
  1083  	s.resumableEndpoints = nil
  1084  	s.mu.Unlock()
  1085  	for _, e := range eps {
  1086  		e.Resume(s)
  1087  	}
  1088  }
  1089  
  1090  // NetworkProtocolInstance returns the protocol instance in the stack for the
  1091  // specified network protocol. This method is public for protocol implementers
  1092  // and tests to use.
  1093  func (s *Stack) NetworkProtocolInstance(num tcpip.NetworkProtocolNumber) NetworkProtocol {
  1094  	if p, ok := s.networkProtocols[num]; ok {
  1095  		return p
  1096  	}
  1097  	return nil
  1098  }
  1099  
  1100  // TransportProtocolInstance returns the protocol instance in the stack for the
  1101  // specified transport protocol. This method is public for protocol implementers
  1102  // and tests to use.
  1103  func (s *Stack) TransportProtocolInstance(num tcpip.TransportProtocolNumber) TransportProtocol {
  1104  	if pState, ok := s.transportProtocols[num]; ok {
  1105  		return pState.proto
  1106  	}
  1107  	return nil
  1108  }
  1109  
  1110  // AddTCPProbe installs a probe function that will be invoked on every segment
  1111  // received by a given TCP endpoint. The probe function is passed a copy of the
  1112  // TCP endpoint state before and after processing of the segment.
  1113  //
  1114  // NOTE: TCPProbe is added only to endpoints created after this call. Endpoints
  1115  // created prior to this call will not call the probe function.
  1116  //
  1117  // Further, installing two different probes back to back can result in some
  1118  // endpoints calling the first one and some the second one. There is no
  1119  // guarantee provided on which probe will be invoked. Ideally this should only
  1120  // be called once per stack.
  1121  func (s *Stack) AddTCPProbe(probe TCPProbeFunc) {
  1122  	s.mu.Lock()
  1123  	s.tcpProbeFunc = probe
  1124  	s.mu.Unlock()
  1125  }
  1126  
  1127  // GetTCPProbe returns the TCPProbeFunc if installed with AddTCPProbe, nil
  1128  // otherwise.
  1129  func (s *Stack) GetTCPProbe() TCPProbeFunc {
  1130  	s.mu.Lock()
  1131  	p := s.tcpProbeFunc
  1132  	s.mu.Unlock()
  1133  	return p
  1134  }
  1135  
  1136  // RemoveTCPProbe removes an installed TCP probe.
  1137  //
  1138  // NOTE: This only ensures that endpoints created after this call do not
  1139  // have a probe attached. Endpoints already created will continue to invoke
  1140  // TCP probe.
  1141  func (s *Stack) RemoveTCPProbe() {
  1142  	s.mu.Lock()
  1143  	s.tcpProbeFunc = nil
  1144  	s.mu.Unlock()
  1145  }
  1146  
  1147  // JoinGroup joins the given multicast group on the given NIC.
  1148  func (s *Stack) JoinGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) *tcpip.Error {
  1149  	// TODO: notify network of subscription via igmp protocol.
  1150  	s.mu.RLock()
  1151  	defer s.mu.RUnlock()
  1152  
  1153  	if nic, ok := s.nics[nicID]; ok {
  1154  		return nic.joinGroup(protocol, multicastAddr)
  1155  	}
  1156  	return tcpip.ErrUnknownNICID
  1157  }
  1158  
  1159  // LeaveGroup leaves the given multicast group on the given NIC.
  1160  func (s *Stack) LeaveGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) *tcpip.Error {
  1161  	s.mu.RLock()
  1162  	defer s.mu.RUnlock()
  1163  
  1164  	if nic, ok := s.nics[nicID]; ok {
  1165  		return nic.leaveGroup(multicastAddr)
  1166  	}
  1167  	return tcpip.ErrUnknownNICID
  1168  }
  1169  
  1170  // IPTables returns the stack's iptables.
  1171  func (s *Stack) IPTables() iptables.IPTables {
  1172  	return s.tables
  1173  }
  1174  
  1175  // SetIPTables sets the stack's iptables.
  1176  func (s *Stack) SetIPTables(ipt iptables.IPTables) {
  1177  	s.tables = ipt
  1178  }
  1179  
  1180  // ICMPLimit returns the maximum number of ICMP messages that can be sent
  1181  // in one second.
  1182  func (s *Stack) ICMPLimit() rate.Limit {
  1183  	return s.icmpRateLimiter.Limit()
  1184  }
  1185  
  1186  // SetICMPLimit sets the maximum number of ICMP messages that be sent
  1187  // in one second.
  1188  func (s *Stack) SetICMPLimit(newLimit rate.Limit) {
  1189  	s.icmpRateLimiter.SetLimit(newLimit)
  1190  }
  1191  
  1192  // ICMPBurst returns the maximum number of ICMP messages that can be sent
  1193  // in a single burst.
  1194  func (s *Stack) ICMPBurst() int {
  1195  	return s.icmpRateLimiter.Burst()
  1196  }
  1197  
  1198  // SetICMPBurst sets the maximum number of ICMP messages that can be sent
  1199  // in a single burst.
  1200  func (s *Stack) SetICMPBurst(burst int) {
  1201  	s.icmpRateLimiter.SetBurst(burst)
  1202  }
  1203  
  1204  // AllowICMPMessage returns true if we the rate limiter allows at least one
  1205  // ICMP message to be sent at this instant.
  1206  func (s *Stack) AllowICMPMessage() bool {
  1207  	return s.icmpRateLimiter.Allow()
  1208  }
  1209  
  1210  // PortSeed returns a 32 bit value that can be used as a seed value for port
  1211  // picking.
  1212  //
  1213  // NOTE: The seed is generated once during stack initialization only.
  1214  func (s *Stack) PortSeed() uint32 {
  1215  	return s.portSeed
  1216  }
  1217  
  1218  func generateRandUint32() uint32 {
  1219  	b := make([]byte, 4)
  1220  	if _, err := rand.Read(b); err != nil {
  1221  		panic(err)
  1222  	}
  1223  	return binary.LittleEndian.Uint32(b)
  1224  }