gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/tcpip/tcpip.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package tcpip provides the interfaces and related types that users of the
    16  // tcpip stack will use in order to create endpoints used to send and receive
    17  // data over the network stack.
    18  //
    19  // The starting point is the creation and configuration of a stack. A stack can
    20  // be created by calling the New() function of the tcpip/stack/stack package;
    21  // configuring a stack involves creating NICs (via calls to Stack.CreateNIC()),
    22  // adding network addresses (via calls to Stack.AddProtocolAddress()), and
    23  // setting a route table (via a call to Stack.SetRouteTable()).
    24  //
    25  // Once a stack is configured, endpoints can be created by calling
    26  // Stack.NewEndpoint(). Such endpoints can be used to send/receive data, connect
    27  // to peers, listen for connections, accept connections, etc., depending on the
    28  // transport protocol selected.
    29  package tcpip
    30  
    31  import (
    32  	"bytes"
    33  	"errors"
    34  	"fmt"
    35  	"io"
    36  	"math"
    37  	"math/bits"
    38  	"reflect"
    39  	"strconv"
    40  	"strings"
    41  	"time"
    42  
    43  	"gvisor.dev/gvisor/pkg/atomicbitops"
    44  	"gvisor.dev/gvisor/pkg/sync"
    45  	"gvisor.dev/gvisor/pkg/waiter"
    46  )
    47  
    48  // Using the header package here would cause an import cycle.
    49  const (
    50  	ipv4AddressSize    = 4
    51  	ipv4ProtocolNumber = 0x0800
    52  	ipv6AddressSize    = 16
    53  	ipv6ProtocolNumber = 0x86dd
    54  )
    55  
    56  // Errors related to Subnet
    57  var (
    58  	errSubnetLengthMismatch = errors.New("subnet length of address and mask differ")
    59  	errSubnetAddressMasked  = errors.New("subnet address has bits set outside the mask")
    60  )
    61  
    62  // ErrSaveRejection indicates a failed save due to unsupported networking state.
    63  // This type of errors is only used for save logic.
    64  type ErrSaveRejection struct {
    65  	Err error
    66  }
    67  
    68  // Error returns a sensible description of the save rejection error.
    69  func (e *ErrSaveRejection) Error() string {
    70  	return "save rejected due to unsupported networking state: " + e.Err.Error()
    71  }
    72  
    73  // MonotonicTime is a monotonic clock reading.
    74  //
    75  // +stateify savable
    76  type MonotonicTime struct {
    77  	nanoseconds int64
    78  }
    79  
    80  // String implements Stringer.
    81  func (mt MonotonicTime) String() string {
    82  	return strconv.FormatInt(mt.nanoseconds, 10)
    83  }
    84  
    85  // MonotonicTimeInfinite returns the monotonic timestamp as far away in the
    86  // future as possible.
    87  func MonotonicTimeInfinite() MonotonicTime {
    88  	return MonotonicTime{nanoseconds: math.MaxInt64}
    89  }
    90  
    91  // Before reports whether the monotonic clock reading mt is before u.
    92  func (mt MonotonicTime) Before(u MonotonicTime) bool {
    93  	return mt.nanoseconds < u.nanoseconds
    94  }
    95  
    96  // After reports whether the monotonic clock reading mt is after u.
    97  func (mt MonotonicTime) After(u MonotonicTime) bool {
    98  	return mt.nanoseconds > u.nanoseconds
    99  }
   100  
   101  // Add returns the monotonic clock reading mt+d.
   102  func (mt MonotonicTime) Add(d time.Duration) MonotonicTime {
   103  	return MonotonicTime{
   104  		nanoseconds: time.Unix(0, mt.nanoseconds).Add(d).Sub(time.Unix(0, 0)).Nanoseconds(),
   105  	}
   106  }
   107  
   108  // Sub returns the duration mt-u. If the result exceeds the maximum (or minimum)
   109  // value that can be stored in a Duration, the maximum (or minimum) duration
   110  // will be returned. To compute t-d for a duration d, use t.Add(-d).
   111  func (mt MonotonicTime) Sub(u MonotonicTime) time.Duration {
   112  	return time.Unix(0, mt.nanoseconds).Sub(time.Unix(0, u.nanoseconds))
   113  }
   114  
   115  // Milliseconds returns the time in milliseconds.
   116  func (mt MonotonicTime) Milliseconds() int64 {
   117  	return mt.nanoseconds / 1e6
   118  }
   119  
   120  // A Clock provides the current time and schedules work for execution.
   121  //
   122  // Times returned by a Clock should always be used for application-visible
   123  // time. Only monotonic times should be used for netstack internal timekeeping.
   124  type Clock interface {
   125  	// Now returns the current local time.
   126  	Now() time.Time
   127  
   128  	// NowMonotonic returns the current monotonic clock reading.
   129  	NowMonotonic() MonotonicTime
   130  
   131  	// AfterFunc waits for the duration to elapse and then calls f in its own
   132  	// goroutine. It returns a Timer that can be used to cancel the call using
   133  	// its Stop method.
   134  	AfterFunc(d time.Duration, f func()) Timer
   135  }
   136  
   137  // Timer represents a single event. A Timer must be created with
   138  // Clock.AfterFunc.
   139  type Timer interface {
   140  	// Stop prevents the Timer from firing. It returns true if the call stops the
   141  	// timer, false if the timer has already expired or been stopped.
   142  	//
   143  	// If Stop returns false, then the timer has already expired and the function
   144  	// f of Clock.AfterFunc(d, f) has been started in its own goroutine; Stop
   145  	// does not wait for f to complete before returning. If the caller needs to
   146  	// know whether f is completed, it must coordinate with f explicitly.
   147  	Stop() bool
   148  
   149  	// Reset changes the timer to expire after duration d.
   150  	//
   151  	// Reset should be invoked only on stopped or expired timers. If the timer is
   152  	// known to have expired, Reset can be used directly. Otherwise, the caller
   153  	// must coordinate with the function f of Clock.AfterFunc(d, f).
   154  	Reset(d time.Duration)
   155  }
   156  
   157  // Address is a byte slice cast as a string that represents the address of a
   158  // network node. Or, in the case of unix endpoints, it may represent a path.
   159  //
   160  // +stateify savable
   161  type Address struct {
   162  	addr   [16]byte
   163  	length int
   164  }
   165  
   166  // AddrFrom4 converts addr to an Address.
   167  func AddrFrom4(addr [4]byte) Address {
   168  	ret := Address{
   169  		length: 4,
   170  	}
   171  	// It's guaranteed that copy will return 4.
   172  	copy(ret.addr[:], addr[:])
   173  	return ret
   174  }
   175  
   176  // AddrFrom4Slice converts addr to an Address. It panics if len(addr) != 4.
   177  func AddrFrom4Slice(addr []byte) Address {
   178  	if len(addr) != 4 {
   179  		panic(fmt.Sprintf("bad address length for address %v", addr))
   180  	}
   181  	ret := Address{
   182  		length: 4,
   183  	}
   184  	// It's guaranteed that copy will return 4.
   185  	copy(ret.addr[:], addr)
   186  	return ret
   187  }
   188  
   189  // AddrFrom16 converts addr to an Address.
   190  func AddrFrom16(addr [16]byte) Address {
   191  	ret := Address{
   192  		length: 16,
   193  	}
   194  	// It's guaranteed that copy will return 16.
   195  	copy(ret.addr[:], addr[:])
   196  	return ret
   197  }
   198  
   199  // AddrFrom16Slice converts addr to an Address. It panics if len(addr) != 16.
   200  func AddrFrom16Slice(addr []byte) Address {
   201  	if len(addr) != 16 {
   202  		panic(fmt.Sprintf("bad address length for address %v", addr))
   203  	}
   204  	ret := Address{
   205  		length: 16,
   206  	}
   207  	// It's guaranteed that copy will return 16.
   208  	copy(ret.addr[:], addr)
   209  	return ret
   210  }
   211  
   212  // AddrFromSlice converts addr to an Address. It returns the Address zero value
   213  // if len(addr) != 4 or 16.
   214  func AddrFromSlice(addr []byte) Address {
   215  	switch len(addr) {
   216  	case ipv4AddressSize:
   217  		return AddrFrom4Slice(addr)
   218  	case ipv6AddressSize:
   219  		return AddrFrom16Slice(addr)
   220  	}
   221  	return Address{}
   222  }
   223  
   224  // As4 returns a as a 4 byte array. It panics if the address length is not 4.
   225  func (a Address) As4() [4]byte {
   226  	if a.Len() != 4 {
   227  		panic(fmt.Sprintf("bad address length for address %v", a.addr))
   228  	}
   229  	return [4]byte(a.addr[:4])
   230  }
   231  
   232  // As16 returns a as a 16 byte array. It panics if the address length is not 16.
   233  func (a Address) As16() [16]byte {
   234  	if a.Len() != 16 {
   235  		panic(fmt.Sprintf("bad address length for address %v", a.addr))
   236  	}
   237  	return [16]byte(a.addr[:16])
   238  }
   239  
   240  // AsSlice returns a as a byte slice. Callers should be careful as it can
   241  // return a window into existing memory.
   242  //
   243  // +checkescape
   244  func (a *Address) AsSlice() []byte {
   245  	return a.addr[:a.length]
   246  }
   247  
   248  // BitLen returns the length in bits of a.
   249  func (a Address) BitLen() int {
   250  	return a.Len() * 8
   251  }
   252  
   253  // Len returns the length in bytes of a.
   254  func (a Address) Len() int {
   255  	return a.length
   256  }
   257  
   258  // WithPrefix returns the address with a prefix that represents a point subnet.
   259  func (a Address) WithPrefix() AddressWithPrefix {
   260  	return AddressWithPrefix{
   261  		Address:   a,
   262  		PrefixLen: a.BitLen(),
   263  	}
   264  }
   265  
   266  // Unspecified returns true if the address is unspecified.
   267  func (a Address) Unspecified() bool {
   268  	for _, b := range a.addr {
   269  		if b != 0 {
   270  			return false
   271  		}
   272  	}
   273  	return true
   274  }
   275  
   276  // Equal returns whether a and other are equal. It exists for use by the cmp
   277  // library.
   278  func (a Address) Equal(other Address) bool {
   279  	return a == other
   280  }
   281  
   282  // MatchingPrefix returns the matching prefix length in bits.
   283  //
   284  // Panics if b and a have different lengths.
   285  func (a Address) MatchingPrefix(b Address) uint8 {
   286  	const bitsInAByte = 8
   287  
   288  	if a.Len() != b.Len() {
   289  		panic(fmt.Sprintf("addresses %s and %s do not have the same length", a, b))
   290  	}
   291  
   292  	var prefix uint8
   293  	for i := 0; i < a.length; i++ {
   294  		aByte := a.addr[i]
   295  		bByte := b.addr[i]
   296  
   297  		if aByte == bByte {
   298  			prefix += bitsInAByte
   299  			continue
   300  		}
   301  
   302  		// Count the remaining matching bits in the byte from MSbit to LSBbit.
   303  		mask := uint8(1) << (bitsInAByte - 1)
   304  		for {
   305  			if aByte&mask == bByte&mask {
   306  				prefix++
   307  				mask >>= 1
   308  				continue
   309  			}
   310  
   311  			break
   312  		}
   313  
   314  		break
   315  	}
   316  
   317  	return prefix
   318  }
   319  
   320  // AddressMask is a bitmask for an address.
   321  //
   322  // +stateify savable
   323  type AddressMask struct {
   324  	mask   [16]byte
   325  	length int
   326  }
   327  
   328  // MaskFrom returns a Mask based on str.
   329  //
   330  // MaskFrom may allocate, and so should not be in hot paths.
   331  func MaskFrom(str string) AddressMask {
   332  	mask := AddressMask{length: len(str)}
   333  	copy(mask.mask[:], str)
   334  	return mask
   335  }
   336  
   337  // MaskFromBytes returns a Mask based on bs.
   338  func MaskFromBytes(bs []byte) AddressMask {
   339  	mask := AddressMask{length: len(bs)}
   340  	copy(mask.mask[:], bs)
   341  	return mask
   342  }
   343  
   344  // String implements Stringer.
   345  func (m AddressMask) String() string {
   346  	return fmt.Sprintf("%x", m.mask)
   347  }
   348  
   349  // AsSlice returns a as a byte slice. Callers should be careful as it can
   350  // return a window into existing memory.
   351  func (m *AddressMask) AsSlice() []byte {
   352  	return []byte(m.mask[:m.length])
   353  }
   354  
   355  // BitLen returns the length of the mask in bits.
   356  func (m AddressMask) BitLen() int {
   357  	return m.length * 8
   358  }
   359  
   360  // Len returns the length of the mask in bytes.
   361  func (m AddressMask) Len() int {
   362  	return m.length
   363  }
   364  
   365  // Prefix returns the number of bits before the first host bit.
   366  func (m AddressMask) Prefix() int {
   367  	p := 0
   368  	for _, b := range m.mask[:m.length] {
   369  		p += bits.LeadingZeros8(^b)
   370  	}
   371  	return p
   372  }
   373  
   374  // Equal returns whether m and other are equal. It exists for use by the cmp
   375  // library.
   376  func (m AddressMask) Equal(other AddressMask) bool {
   377  	return m == other
   378  }
   379  
   380  // Subnet is a subnet defined by its address and mask.
   381  type Subnet struct {
   382  	address Address
   383  	mask    AddressMask
   384  }
   385  
   386  // NewSubnet creates a new Subnet, checking that the address and mask are the same length.
   387  func NewSubnet(a Address, m AddressMask) (Subnet, error) {
   388  	if a.Len() != m.Len() {
   389  		return Subnet{}, errSubnetLengthMismatch
   390  	}
   391  	for i := 0; i < a.Len(); i++ {
   392  		if a.addr[i]&^m.mask[i] != 0 {
   393  			return Subnet{}, errSubnetAddressMasked
   394  		}
   395  	}
   396  	return Subnet{a, m}, nil
   397  }
   398  
   399  // String implements Stringer.
   400  func (s Subnet) String() string {
   401  	return fmt.Sprintf("%s/%d", s.ID(), s.Prefix())
   402  }
   403  
   404  // Contains returns true iff the address is of the same length and matches the
   405  // subnet address and mask.
   406  func (s *Subnet) Contains(a Address) bool {
   407  	if a.Len() != s.address.Len() {
   408  		return false
   409  	}
   410  	for i := 0; i < a.Len(); i++ {
   411  		if a.addr[i]&s.mask.mask[i] != s.address.addr[i] {
   412  			return false
   413  		}
   414  	}
   415  	return true
   416  }
   417  
   418  // ID returns the subnet ID.
   419  func (s *Subnet) ID() Address {
   420  	return s.address
   421  }
   422  
   423  // Bits returns the number of ones (network bits) and zeros (host bits) in the
   424  // subnet mask.
   425  func (s *Subnet) Bits() (ones int, zeros int) {
   426  	ones = s.mask.Prefix()
   427  	return ones, s.mask.BitLen() - ones
   428  }
   429  
   430  // Prefix returns the number of bits before the first host bit.
   431  func (s *Subnet) Prefix() int {
   432  	return s.mask.Prefix()
   433  }
   434  
   435  // Mask returns the subnet mask.
   436  func (s *Subnet) Mask() AddressMask {
   437  	return s.mask
   438  }
   439  
   440  // Broadcast returns the subnet's broadcast address.
   441  func (s *Subnet) Broadcast() Address {
   442  	addrCopy := s.address
   443  	for i := 0; i < addrCopy.Len(); i++ {
   444  		addrCopy.addr[i] |= ^s.mask.mask[i]
   445  	}
   446  	return addrCopy
   447  }
   448  
   449  // IsBroadcast returns true if the address is considered a broadcast address.
   450  func (s *Subnet) IsBroadcast(address Address) bool {
   451  	// Only IPv4 supports the notion of a broadcast address.
   452  	if address.Len() != ipv4AddressSize {
   453  		return false
   454  	}
   455  
   456  	// Normally, we would just compare address with the subnet's broadcast
   457  	// address but there is an exception where a simple comparison is not
   458  	// correct. This exception is for /31 and /32 IPv4 subnets where all
   459  	// addresses are considered valid host addresses.
   460  	//
   461  	// For /31 subnets, the case is easy. RFC 3021 Section 2.1 states that
   462  	// both addresses in a /31 subnet "MUST be interpreted as host addresses."
   463  	//
   464  	// For /32, the case is a bit more vague. RFC 3021 makes no mention of /32
   465  	// subnets. However, the same reasoning applies - if an exception is not
   466  	// made, then there do not exist any host addresses in a /32 subnet. RFC
   467  	// 4632 Section 3.1 also vaguely implies this interpretation by referring
   468  	// to addresses in /32 subnets as "host routes."
   469  	return s.Prefix() <= 30 && s.Broadcast() == address
   470  }
   471  
   472  // Equal returns true if this Subnet is equal to the given Subnet.
   473  func (s Subnet) Equal(o Subnet) bool {
   474  	// If this changes, update Route.Equal accordingly.
   475  	return s == o
   476  }
   477  
   478  // NICID is a number that uniquely identifies a NIC.
   479  type NICID int32
   480  
   481  // ShutdownFlags represents flags that can be passed to the Shutdown() method
   482  // of the Endpoint interface.
   483  type ShutdownFlags int
   484  
   485  // Values of the flags that can be passed to the Shutdown() method. They can
   486  // be OR'ed together.
   487  const (
   488  	ShutdownRead ShutdownFlags = 1 << iota
   489  	ShutdownWrite
   490  )
   491  
   492  // PacketType is used to indicate the destination of the packet.
   493  type PacketType uint8
   494  
   495  const (
   496  	// PacketHost indicates a packet addressed to the local host.
   497  	PacketHost PacketType = iota
   498  
   499  	// PacketOtherHost indicates an outgoing packet addressed to
   500  	// another host caught by a NIC in promiscuous mode.
   501  	PacketOtherHost
   502  
   503  	// PacketOutgoing for a packet originating from the local host
   504  	// that is looped back to a packet socket.
   505  	PacketOutgoing
   506  
   507  	// PacketBroadcast indicates a link layer broadcast packet.
   508  	PacketBroadcast
   509  
   510  	// PacketMulticast indicates a link layer multicast packet.
   511  	PacketMulticast
   512  )
   513  
   514  // FullAddress represents a full transport node address, as required by the
   515  // Connect() and Bind() methods.
   516  //
   517  // +stateify savable
   518  type FullAddress struct {
   519  	// NIC is the ID of the NIC this address refers to.
   520  	//
   521  	// This may not be used by all endpoint types.
   522  	NIC NICID
   523  
   524  	// Addr is the network address.
   525  	Addr Address
   526  
   527  	// Port is the transport port.
   528  	//
   529  	// This may not be used by all endpoint types.
   530  	Port uint16
   531  
   532  	// LinkAddr is the link layer address.
   533  	LinkAddr LinkAddress
   534  }
   535  
   536  // Payloader is an interface that provides data.
   537  //
   538  // This interface allows the endpoint to request the amount of data it needs
   539  // based on internal buffers without exposing them.
   540  type Payloader interface {
   541  	io.Reader
   542  
   543  	// Len returns the number of bytes of the unread portion of the
   544  	// Reader.
   545  	Len() int
   546  }
   547  
   548  var _ Payloader = (*bytes.Buffer)(nil)
   549  var _ Payloader = (*bytes.Reader)(nil)
   550  
   551  var _ io.Writer = (*SliceWriter)(nil)
   552  
   553  // SliceWriter implements io.Writer for slices.
   554  type SliceWriter []byte
   555  
   556  // Write implements io.Writer.Write.
   557  func (s *SliceWriter) Write(b []byte) (int, error) {
   558  	n := copy(*s, b)
   559  	*s = (*s)[n:]
   560  	var err error
   561  	if n != len(b) {
   562  		err = io.ErrShortWrite
   563  	}
   564  	return n, err
   565  }
   566  
   567  var _ io.Writer = (*LimitedWriter)(nil)
   568  
   569  // A LimitedWriter writes to W but limits the amount of data copied to just N
   570  // bytes. Each call to Write updates N to reflect the new amount remaining.
   571  type LimitedWriter struct {
   572  	W io.Writer
   573  	N int64
   574  }
   575  
   576  func (l *LimitedWriter) Write(p []byte) (int, error) {
   577  	pLen := int64(len(p))
   578  	if pLen > l.N {
   579  		p = p[:l.N]
   580  	}
   581  	n, err := l.W.Write(p)
   582  	n64 := int64(n)
   583  	if err == nil && n64 != pLen {
   584  		err = io.ErrShortWrite
   585  	}
   586  	l.N -= n64
   587  	return n, err
   588  }
   589  
   590  // SendableControlMessages contains socket control messages that can be written.
   591  //
   592  // +stateify savable
   593  type SendableControlMessages struct {
   594  	// HasTTL indicates whether TTL is valid/set.
   595  	HasTTL bool
   596  
   597  	// TTL is the IPv4 Time To Live of the associated packet.
   598  	TTL uint8
   599  
   600  	// HasHopLimit indicates whether HopLimit is valid/set.
   601  	HasHopLimit bool
   602  
   603  	// HopLimit is the IPv6 Hop Limit of the associated packet.
   604  	HopLimit uint8
   605  
   606  	// HasIPv6PacketInfo indicates whether IPv6PacketInfo is set.
   607  	HasIPv6PacketInfo bool
   608  
   609  	// IPv6PacketInfo holds interface and address data on an incoming packet.
   610  	IPv6PacketInfo IPv6PacketInfo
   611  }
   612  
   613  // ReceivableControlMessages contains socket control messages that can be
   614  // received.
   615  //
   616  // +stateify savable
   617  type ReceivableControlMessages struct {
   618  	// Timestamp is the time that the last packet used to create the read data
   619  	// was received.
   620  	Timestamp time.Time `state:".(int64)"`
   621  
   622  	// HasInq indicates whether Inq is valid/set.
   623  	HasInq bool
   624  
   625  	// Inq is the number of bytes ready to be received.
   626  	Inq int32
   627  
   628  	// HasTOS indicates whether TOS is valid/set.
   629  	HasTOS bool
   630  
   631  	// TOS is the IPv4 type of service of the associated packet.
   632  	TOS uint8
   633  
   634  	// HasTTL indicates whether TTL is valid/set.
   635  	HasTTL bool
   636  
   637  	// TTL is the IPv4 Time To Live of the associated packet.
   638  	TTL uint8
   639  
   640  	// HasHopLimit indicates whether HopLimit is valid/set.
   641  	HasHopLimit bool
   642  
   643  	// HopLimit is the IPv6 Hop Limit of the associated packet.
   644  	HopLimit uint8
   645  
   646  	// HasTimestamp indicates whether Timestamp is valid/set.
   647  	HasTimestamp bool
   648  
   649  	// HasTClass indicates whether TClass is valid/set.
   650  	HasTClass bool
   651  
   652  	// TClass is the IPv6 traffic class of the associated packet.
   653  	TClass uint32
   654  
   655  	// HasIPPacketInfo indicates whether PacketInfo is set.
   656  	HasIPPacketInfo bool
   657  
   658  	// PacketInfo holds interface and address data on an incoming packet.
   659  	PacketInfo IPPacketInfo
   660  
   661  	// HasIPv6PacketInfo indicates whether IPv6PacketInfo is set.
   662  	HasIPv6PacketInfo bool
   663  
   664  	// IPv6PacketInfo holds interface and address data on an incoming packet.
   665  	IPv6PacketInfo IPv6PacketInfo
   666  
   667  	// HasOriginalDestinationAddress indicates whether OriginalDstAddress is
   668  	// set.
   669  	HasOriginalDstAddress bool
   670  
   671  	// OriginalDestinationAddress holds the original destination address
   672  	// and port of the incoming packet.
   673  	OriginalDstAddress FullAddress
   674  
   675  	// SockErr is the dequeued socket error on recvmsg(MSG_ERRQUEUE).
   676  	SockErr *SockError
   677  }
   678  
   679  // PacketOwner is used to get UID and GID of the packet.
   680  type PacketOwner interface {
   681  	// KUID returns KUID of the packet.
   682  	KUID() uint32
   683  
   684  	// KGID returns KGID of the packet.
   685  	KGID() uint32
   686  }
   687  
   688  // ReadOptions contains options for Endpoint.Read.
   689  type ReadOptions struct {
   690  	// Peek indicates whether this read is a peek.
   691  	Peek bool
   692  
   693  	// NeedRemoteAddr indicates whether to return the remote address, if
   694  	// supported.
   695  	NeedRemoteAddr bool
   696  
   697  	// NeedLinkPacketInfo indicates whether to return the link-layer information,
   698  	// if supported.
   699  	NeedLinkPacketInfo bool
   700  }
   701  
   702  // ReadResult represents result for a successful Endpoint.Read.
   703  type ReadResult struct {
   704  	// Count is the number of bytes received and written to the buffer.
   705  	Count int
   706  
   707  	// Total is the number of bytes of the received packet. This can be used to
   708  	// determine whether the read is truncated.
   709  	Total int
   710  
   711  	// ControlMessages is the control messages received.
   712  	ControlMessages ReceivableControlMessages
   713  
   714  	// RemoteAddr is the remote address if ReadOptions.NeedAddr is true.
   715  	RemoteAddr FullAddress
   716  
   717  	// LinkPacketInfo is the link-layer information of the received packet if
   718  	// ReadOptions.NeedLinkPacketInfo is true.
   719  	LinkPacketInfo LinkPacketInfo
   720  }
   721  
   722  // Endpoint is the interface implemented by transport protocols (e.g., tcp, udp)
   723  // that exposes functionality like read, write, connect, etc. to users of the
   724  // networking stack.
   725  type Endpoint interface {
   726  	// Close puts the endpoint in a closed state and frees all resources
   727  	// associated with it. Close initiates the teardown process, the
   728  	// Endpoint may not be fully closed when Close returns.
   729  	Close()
   730  
   731  	// Abort initiates an expedited endpoint teardown. As compared to
   732  	// Close, Abort prioritizes closing the Endpoint quickly over cleanly.
   733  	// Abort is best effort; implementing Abort with Close is acceptable.
   734  	Abort()
   735  
   736  	// Read reads data from the endpoint and optionally writes to dst.
   737  	//
   738  	// This method does not block if there is no data pending; in this case,
   739  	// ErrWouldBlock is returned.
   740  	//
   741  	// If non-zero number of bytes are successfully read and written to dst, err
   742  	// must be nil. Otherwise, if dst failed to write anything, ErrBadBuffer
   743  	// should be returned.
   744  	Read(io.Writer, ReadOptions) (ReadResult, Error)
   745  
   746  	// Write writes data to the endpoint's peer. This method does not block if
   747  	// the data cannot be written.
   748  	//
   749  	// Unlike io.Writer.Write, Endpoint.Write transfers ownership of any bytes
   750  	// successfully written to the Endpoint. That is, if a call to
   751  	// Write(SlicePayload{data}) returns (n, err), it may retain data[:n], and
   752  	// the caller should not use data[:n] after Write returns.
   753  	//
   754  	// Note that unlike io.Writer.Write, it is not an error for Write to
   755  	// perform a partial write (if n > 0, no error may be returned). Only
   756  	// stream (TCP) Endpoints may return partial writes, and even then only
   757  	// in the case where writing additional data would block. Other Endpoints
   758  	// will either write the entire message or return an error.
   759  	Write(Payloader, WriteOptions) (int64, Error)
   760  
   761  	// Connect connects the endpoint to its peer. Specifying a NIC is
   762  	// optional.
   763  	//
   764  	// There are three classes of return values:
   765  	//	nil -- the attempt to connect succeeded.
   766  	//	ErrConnectStarted/ErrAlreadyConnecting -- the connect attempt started
   767  	//		but hasn't completed yet. In this case, the caller must call Connect
   768  	//		or GetSockOpt(ErrorOption) when the endpoint becomes writable to
   769  	//		get the actual result. The first call to Connect after the socket has
   770  	//		connected returns nil. Calling connect again results in ErrAlreadyConnected.
   771  	//	Anything else -- the attempt to connect failed.
   772  	//
   773  	// If address.Addr is empty, this means that Endpoint has to be
   774  	// disconnected if this is supported, otherwise
   775  	// ErrAddressFamilyNotSupported must be returned.
   776  	Connect(address FullAddress) Error
   777  
   778  	// Disconnect disconnects the endpoint from its peer.
   779  	Disconnect() Error
   780  
   781  	// Shutdown closes the read and/or write end of the endpoint connection
   782  	// to its peer.
   783  	Shutdown(flags ShutdownFlags) Error
   784  
   785  	// Listen puts the endpoint in "listen" mode, which allows it to accept
   786  	// new connections.
   787  	Listen(backlog int) Error
   788  
   789  	// Accept returns a new endpoint if a peer has established a connection
   790  	// to an endpoint previously set to listen mode. This method does not
   791  	// block if no new connections are available.
   792  	//
   793  	// The returned Queue is the wait queue for the newly created endpoint.
   794  	//
   795  	// If peerAddr is not nil then it is populated with the peer address of the
   796  	// returned endpoint.
   797  	Accept(peerAddr *FullAddress) (Endpoint, *waiter.Queue, Error)
   798  
   799  	// Bind binds the endpoint to a specific local address and port.
   800  	// Specifying a NIC is optional.
   801  	Bind(address FullAddress) Error
   802  
   803  	// GetLocalAddress returns the address to which the endpoint is bound.
   804  	GetLocalAddress() (FullAddress, Error)
   805  
   806  	// GetRemoteAddress returns the address to which the endpoint is
   807  	// connected.
   808  	GetRemoteAddress() (FullAddress, Error)
   809  
   810  	// Readiness returns the current readiness of the endpoint. For example,
   811  	// if waiter.EventIn is set, the endpoint is immediately readable.
   812  	Readiness(mask waiter.EventMask) waiter.EventMask
   813  
   814  	// SetSockOpt sets a socket option.
   815  	SetSockOpt(opt SettableSocketOption) Error
   816  
   817  	// SetSockOptInt sets a socket option, for simple cases where a value
   818  	// has the int type.
   819  	SetSockOptInt(opt SockOptInt, v int) Error
   820  
   821  	// GetSockOpt gets a socket option.
   822  	GetSockOpt(opt GettableSocketOption) Error
   823  
   824  	// GetSockOptInt gets a socket option for simple cases where a return
   825  	// value has the int type.
   826  	GetSockOptInt(SockOptInt) (int, Error)
   827  
   828  	// State returns a socket's lifecycle state. The returned value is
   829  	// protocol-specific and is primarily used for diagnostics.
   830  	State() uint32
   831  
   832  	// ModerateRecvBuf should be called everytime data is copied to the user
   833  	// space. This allows for dynamic tuning of recv buffer space for a
   834  	// given socket.
   835  	//
   836  	// NOTE: This method is a no-op for sockets other than TCP.
   837  	ModerateRecvBuf(copied int)
   838  
   839  	// Info returns a copy to the transport endpoint info.
   840  	Info() EndpointInfo
   841  
   842  	// Stats returns a reference to the endpoint stats.
   843  	Stats() EndpointStats
   844  
   845  	// SetOwner sets the task owner to the endpoint owner.
   846  	SetOwner(owner PacketOwner)
   847  
   848  	// LastError clears and returns the last error reported by the endpoint.
   849  	LastError() Error
   850  
   851  	// SocketOptions returns the structure which contains all the socket
   852  	// level options.
   853  	SocketOptions() *SocketOptions
   854  }
   855  
   856  // EndpointWithPreflight is the interface implemented by endpoints that need
   857  // to expose the `Preflight` method for preparing the endpoint prior to
   858  // calling `Write`.
   859  type EndpointWithPreflight interface {
   860  	// Prepares the endpoint for writes using the provided WriteOptions,
   861  	// returning an error if the options were incompatible with the endpoint's
   862  	// current state.
   863  	Preflight(WriteOptions) Error
   864  }
   865  
   866  // LinkPacketInfo holds Link layer information for a received packet.
   867  //
   868  // +stateify savable
   869  type LinkPacketInfo struct {
   870  	// Protocol is the NetworkProtocolNumber for the packet.
   871  	Protocol NetworkProtocolNumber
   872  
   873  	// PktType is used to indicate the destination of the packet.
   874  	PktType PacketType
   875  }
   876  
   877  // EndpointInfo is the interface implemented by each endpoint info struct.
   878  type EndpointInfo interface {
   879  	// IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo
   880  	// marker interface.
   881  	IsEndpointInfo()
   882  }
   883  
   884  // EndpointStats is the interface implemented by each endpoint stats struct.
   885  type EndpointStats interface {
   886  	// IsEndpointStats is an empty method to implement the tcpip.EndpointStats
   887  	// marker interface.
   888  	IsEndpointStats()
   889  }
   890  
   891  // WriteOptions contains options for Endpoint.Write.
   892  type WriteOptions struct {
   893  	// If To is not nil, write to the given address instead of the endpoint's
   894  	// peer.
   895  	To *FullAddress
   896  
   897  	// More has the same semantics as Linux's MSG_MORE.
   898  	More bool
   899  
   900  	// EndOfRecord has the same semantics as Linux's MSG_EOR.
   901  	EndOfRecord bool
   902  
   903  	// Atomic means that all data fetched from Payloader must be written to the
   904  	// endpoint. If Atomic is false, then data fetched from the Payloader may be
   905  	// discarded if available endpoint buffer space is insufficient.
   906  	Atomic bool
   907  
   908  	// ControlMessages contains optional overrides used when writing a packet.
   909  	ControlMessages SendableControlMessages
   910  }
   911  
   912  // SockOptInt represents socket options which values have the int type.
   913  type SockOptInt int
   914  
   915  const (
   916  	// KeepaliveCountOption is used by SetSockOptInt/GetSockOptInt to
   917  	// specify the number of un-ACKed TCP keepalives that will be sent
   918  	// before the connection is closed.
   919  	KeepaliveCountOption SockOptInt = iota
   920  
   921  	// IPv4TOSOption is used by SetSockOptInt/GetSockOptInt to specify TOS
   922  	// for all subsequent outgoing IPv4 packets from the endpoint.
   923  	IPv4TOSOption
   924  
   925  	// IPv6TrafficClassOption is used by SetSockOptInt/GetSockOptInt to
   926  	// specify TOS for all subsequent outgoing IPv6 packets from the
   927  	// endpoint.
   928  	IPv6TrafficClassOption
   929  
   930  	// MaxSegOption is used by SetSockOptInt/GetSockOptInt to set/get the
   931  	// current Maximum Segment Size(MSS) value as specified using the
   932  	// TCP_MAXSEG option.
   933  	MaxSegOption
   934  
   935  	// MTUDiscoverOption is used to set/get the path MTU discovery setting.
   936  	//
   937  	// NOTE: Setting this option to any other value than PMTUDiscoveryDont
   938  	// is not supported and will fail as such, and getting this option will
   939  	// always return PMTUDiscoveryDont.
   940  	MTUDiscoverOption
   941  
   942  	// MulticastTTLOption is used by SetSockOptInt/GetSockOptInt to control
   943  	// the default TTL value for multicast messages. The default is 1.
   944  	MulticastTTLOption
   945  
   946  	// ReceiveQueueSizeOption is used in GetSockOptInt to specify that the
   947  	// number of unread bytes in the input buffer should be returned.
   948  	ReceiveQueueSizeOption
   949  
   950  	// SendQueueSizeOption is used in GetSockOptInt to specify that the
   951  	// number of unread bytes in the output buffer should be returned.
   952  	SendQueueSizeOption
   953  
   954  	// IPv4TTLOption is used by SetSockOptInt/GetSockOptInt to control the default
   955  	// TTL value for unicast messages.
   956  	//
   957  	// The default is configured by DefaultTTLOption. A UseDefaultIPv4TTL value
   958  	// configures the endpoint to use the default.
   959  	IPv4TTLOption
   960  
   961  	// IPv6HopLimitOption is used by SetSockOptInt/GetSockOptInt to control the
   962  	// default hop limit value for unicast messages.
   963  	//
   964  	// The default is configured by DefaultTTLOption. A UseDefaultIPv6HopLimit
   965  	// value configures the endpoint to use the default.
   966  	IPv6HopLimitOption
   967  
   968  	// TCPSynCountOption is used by SetSockOptInt/GetSockOptInt to specify
   969  	// the number of SYN retransmits that TCP should send before aborting
   970  	// the attempt to connect. It cannot exceed 255.
   971  	//
   972  	// NOTE: This option is currently only stubbed out and is no-op.
   973  	TCPSynCountOption
   974  
   975  	// TCPWindowClampOption is used by SetSockOptInt/GetSockOptInt to bound
   976  	// the size of the advertised window to this value.
   977  	//
   978  	// NOTE: This option is currently only stubed out and is a no-op
   979  	TCPWindowClampOption
   980  
   981  	// IPv6Checksum is used to request the stack to populate and validate the IPv6
   982  	// checksum for transport level headers.
   983  	IPv6Checksum
   984  )
   985  
   986  const (
   987  	// UseDefaultIPv4TTL is the IPv4TTLOption value that configures an endpoint to
   988  	// use the default ttl currently configured by the IPv4 protocol (see
   989  	// DefaultTTLOption).
   990  	UseDefaultIPv4TTL = 0
   991  
   992  	// UseDefaultIPv6HopLimit is the IPv6HopLimitOption value that configures an
   993  	// endpoint to use the default hop limit currently configured by the IPv6
   994  	// protocol (see DefaultTTLOption).
   995  	UseDefaultIPv6HopLimit = -1
   996  )
   997  
   998  // PMTUDStrategy is the kind of PMTUD to perform.
   999  type PMTUDStrategy int
  1000  
  1001  const (
  1002  	// PMTUDiscoveryWant is a setting of the MTUDiscoverOption to use
  1003  	// per-route settings.
  1004  	PMTUDiscoveryWant PMTUDStrategy = iota
  1005  
  1006  	// PMTUDiscoveryDont is a setting of the MTUDiscoverOption to disable
  1007  	// path MTU discovery.
  1008  	PMTUDiscoveryDont
  1009  
  1010  	// PMTUDiscoveryDo is a setting of the MTUDiscoverOption to always do
  1011  	// path MTU discovery.
  1012  	PMTUDiscoveryDo
  1013  
  1014  	// PMTUDiscoveryProbe is a setting of the MTUDiscoverOption to set DF
  1015  	// but ignore path MTU.
  1016  	PMTUDiscoveryProbe
  1017  )
  1018  
  1019  // GettableNetworkProtocolOption is a marker interface for network protocol
  1020  // options that may be queried.
  1021  type GettableNetworkProtocolOption interface {
  1022  	isGettableNetworkProtocolOption()
  1023  }
  1024  
  1025  // SettableNetworkProtocolOption is a marker interface for network protocol
  1026  // options that may be set.
  1027  type SettableNetworkProtocolOption interface {
  1028  	isSettableNetworkProtocolOption()
  1029  }
  1030  
  1031  // DefaultTTLOption is used by stack.(*Stack).NetworkProtocolOption to specify
  1032  // a default TTL.
  1033  type DefaultTTLOption uint8
  1034  
  1035  func (*DefaultTTLOption) isGettableNetworkProtocolOption() {}
  1036  
  1037  func (*DefaultTTLOption) isSettableNetworkProtocolOption() {}
  1038  
  1039  // GettableTransportProtocolOption is a marker interface for transport protocol
  1040  // options that may be queried.
  1041  type GettableTransportProtocolOption interface {
  1042  	isGettableTransportProtocolOption()
  1043  }
  1044  
  1045  // SettableTransportProtocolOption is a marker interface for transport protocol
  1046  // options that may be set.
  1047  type SettableTransportProtocolOption interface {
  1048  	isSettableTransportProtocolOption()
  1049  }
  1050  
  1051  // TCPSACKEnabled the SACK option for TCP.
  1052  //
  1053  // See: https://tools.ietf.org/html/rfc2018.
  1054  type TCPSACKEnabled bool
  1055  
  1056  func (*TCPSACKEnabled) isGettableTransportProtocolOption() {}
  1057  
  1058  func (*TCPSACKEnabled) isSettableTransportProtocolOption() {}
  1059  
  1060  // TCPRecovery is the loss deteoction algorithm used by TCP.
  1061  type TCPRecovery int32
  1062  
  1063  func (*TCPRecovery) isGettableTransportProtocolOption() {}
  1064  
  1065  func (*TCPRecovery) isSettableTransportProtocolOption() {}
  1066  
  1067  // TCPAlwaysUseSynCookies indicates unconditional usage of syncookies.
  1068  type TCPAlwaysUseSynCookies bool
  1069  
  1070  func (*TCPAlwaysUseSynCookies) isGettableTransportProtocolOption() {}
  1071  
  1072  func (*TCPAlwaysUseSynCookies) isSettableTransportProtocolOption() {}
  1073  
  1074  const (
  1075  	// TCPRACKLossDetection indicates RACK is used for loss detection and
  1076  	// recovery.
  1077  	TCPRACKLossDetection TCPRecovery = 1 << iota
  1078  
  1079  	// TCPRACKStaticReoWnd indicates the reordering window should not be
  1080  	// adjusted when DSACK is received.
  1081  	TCPRACKStaticReoWnd
  1082  
  1083  	// TCPRACKNoDupTh indicates RACK should not consider the classic three
  1084  	// duplicate acknowledgements rule to mark the segments as lost. This
  1085  	// is used when reordering is not detected.
  1086  	TCPRACKNoDupTh
  1087  )
  1088  
  1089  // TCPDelayEnabled enables/disables Nagle's algorithm in TCP.
  1090  type TCPDelayEnabled bool
  1091  
  1092  func (*TCPDelayEnabled) isGettableTransportProtocolOption() {}
  1093  
  1094  func (*TCPDelayEnabled) isSettableTransportProtocolOption() {}
  1095  
  1096  // TCPSendBufferSizeRangeOption is the send buffer size range for TCP.
  1097  type TCPSendBufferSizeRangeOption struct {
  1098  	Min     int
  1099  	Default int
  1100  	Max     int
  1101  }
  1102  
  1103  func (*TCPSendBufferSizeRangeOption) isGettableTransportProtocolOption() {}
  1104  
  1105  func (*TCPSendBufferSizeRangeOption) isSettableTransportProtocolOption() {}
  1106  
  1107  // TCPReceiveBufferSizeRangeOption is the receive buffer size range for TCP.
  1108  type TCPReceiveBufferSizeRangeOption struct {
  1109  	Min     int
  1110  	Default int
  1111  	Max     int
  1112  }
  1113  
  1114  func (*TCPReceiveBufferSizeRangeOption) isGettableTransportProtocolOption() {}
  1115  
  1116  func (*TCPReceiveBufferSizeRangeOption) isSettableTransportProtocolOption() {}
  1117  
  1118  // TCPAvailableCongestionControlOption is the supported congestion control
  1119  // algorithms for TCP
  1120  type TCPAvailableCongestionControlOption string
  1121  
  1122  func (*TCPAvailableCongestionControlOption) isGettableTransportProtocolOption() {}
  1123  
  1124  func (*TCPAvailableCongestionControlOption) isSettableTransportProtocolOption() {}
  1125  
  1126  // TCPModerateReceiveBufferOption enables/disables receive buffer moderation
  1127  // for TCP.
  1128  type TCPModerateReceiveBufferOption bool
  1129  
  1130  func (*TCPModerateReceiveBufferOption) isGettableTransportProtocolOption() {}
  1131  
  1132  func (*TCPModerateReceiveBufferOption) isSettableTransportProtocolOption() {}
  1133  
  1134  // GettableSocketOption is a marker interface for socket options that may be
  1135  // queried.
  1136  type GettableSocketOption interface {
  1137  	isGettableSocketOption()
  1138  }
  1139  
  1140  // SettableSocketOption is a marker interface for socket options that may be
  1141  // configured.
  1142  type SettableSocketOption interface {
  1143  	isSettableSocketOption()
  1144  }
  1145  
  1146  // ICMPv6Filter specifies a filter for ICMPv6 types.
  1147  //
  1148  // +stateify savable
  1149  type ICMPv6Filter struct {
  1150  	// DenyType indicates if an ICMP type should be blocked.
  1151  	//
  1152  	// The ICMPv6 type field is 8 bits so there are up to 256 different ICMPv6
  1153  	// types.
  1154  	DenyType [8]uint32
  1155  }
  1156  
  1157  // ShouldDeny returns true iff the ICMPv6 Type should be denied.
  1158  func (f *ICMPv6Filter) ShouldDeny(icmpType uint8) bool {
  1159  	const bitsInUint32 = 32
  1160  	i := icmpType / bitsInUint32
  1161  	b := icmpType % bitsInUint32
  1162  	return f.DenyType[i]&(1<<b) != 0
  1163  }
  1164  
  1165  func (*ICMPv6Filter) isGettableSocketOption() {}
  1166  
  1167  func (*ICMPv6Filter) isSettableSocketOption() {}
  1168  
  1169  // EndpointState represents the state of an endpoint.
  1170  type EndpointState uint8
  1171  
  1172  // CongestionControlState indicates the current congestion control state for
  1173  // TCP sender.
  1174  type CongestionControlState int
  1175  
  1176  const (
  1177  	// Open indicates that the sender is receiving acks in order and
  1178  	// no loss or dupACK's etc have been detected.
  1179  	Open CongestionControlState = iota
  1180  	// RTORecovery indicates that an RTO has occurred and the sender
  1181  	// has entered an RTO based recovery phase.
  1182  	RTORecovery
  1183  	// FastRecovery indicates that the sender has entered FastRecovery
  1184  	// based on receiving nDupAck's. This state is entered only when
  1185  	// SACK is not in use.
  1186  	FastRecovery
  1187  	// SACKRecovery indicates that the sender has entered SACK based
  1188  	// recovery.
  1189  	SACKRecovery
  1190  	// Disorder indicates the sender either received some SACK blocks
  1191  	// or dupACK's.
  1192  	Disorder
  1193  )
  1194  
  1195  // TCPInfoOption is used by GetSockOpt to expose TCP statistics.
  1196  type TCPInfoOption struct {
  1197  	// RTT is the smoothed round trip time.
  1198  	RTT time.Duration
  1199  
  1200  	// RTTVar is the round trip time variation.
  1201  	RTTVar time.Duration
  1202  
  1203  	// RTO is the retransmission timeout for the endpoint.
  1204  	RTO time.Duration
  1205  
  1206  	// State is the current endpoint protocol state.
  1207  	State EndpointState
  1208  
  1209  	// CcState is the congestion control state.
  1210  	CcState CongestionControlState
  1211  
  1212  	// SndCwnd is the congestion window, in packets.
  1213  	SndCwnd uint32
  1214  
  1215  	// SndSsthresh is the threshold between slow start and congestion
  1216  	// avoidance.
  1217  	SndSsthresh uint32
  1218  
  1219  	// ReorderSeen indicates if reordering is seen in the endpoint.
  1220  	ReorderSeen bool
  1221  }
  1222  
  1223  func (*TCPInfoOption) isGettableSocketOption() {}
  1224  
  1225  // KeepaliveIdleOption is used by SetSockOpt/GetSockOpt to specify the time a
  1226  // connection must remain idle before the first TCP keepalive packet is sent.
  1227  // Once this time is reached, KeepaliveIntervalOption is used instead.
  1228  type KeepaliveIdleOption time.Duration
  1229  
  1230  func (*KeepaliveIdleOption) isGettableSocketOption() {}
  1231  
  1232  func (*KeepaliveIdleOption) isSettableSocketOption() {}
  1233  
  1234  // KeepaliveIntervalOption is used by SetSockOpt/GetSockOpt to specify the
  1235  // interval between sending TCP keepalive packets.
  1236  type KeepaliveIntervalOption time.Duration
  1237  
  1238  func (*KeepaliveIntervalOption) isGettableSocketOption() {}
  1239  
  1240  func (*KeepaliveIntervalOption) isSettableSocketOption() {}
  1241  
  1242  // TCPUserTimeoutOption is used by SetSockOpt/GetSockOpt to specify a user
  1243  // specified timeout for a given TCP connection.
  1244  // See: RFC5482 for details.
  1245  type TCPUserTimeoutOption time.Duration
  1246  
  1247  func (*TCPUserTimeoutOption) isGettableSocketOption() {}
  1248  
  1249  func (*TCPUserTimeoutOption) isSettableSocketOption() {}
  1250  
  1251  // CongestionControlOption is used by SetSockOpt/GetSockOpt to set/get
  1252  // the current congestion control algorithm.
  1253  type CongestionControlOption string
  1254  
  1255  func (*CongestionControlOption) isGettableSocketOption() {}
  1256  
  1257  func (*CongestionControlOption) isSettableSocketOption() {}
  1258  
  1259  func (*CongestionControlOption) isGettableTransportProtocolOption() {}
  1260  
  1261  func (*CongestionControlOption) isSettableTransportProtocolOption() {}
  1262  
  1263  // TCPLingerTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the
  1264  // maximum duration for which a socket lingers in the TCP_FIN_WAIT_2 state
  1265  // before being marked closed.
  1266  type TCPLingerTimeoutOption time.Duration
  1267  
  1268  func (*TCPLingerTimeoutOption) isGettableSocketOption() {}
  1269  
  1270  func (*TCPLingerTimeoutOption) isSettableSocketOption() {}
  1271  
  1272  func (*TCPLingerTimeoutOption) isGettableTransportProtocolOption() {}
  1273  
  1274  func (*TCPLingerTimeoutOption) isSettableTransportProtocolOption() {}
  1275  
  1276  // TCPTimeWaitTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the
  1277  // maximum duration for which a socket lingers in the TIME_WAIT state
  1278  // before being marked closed.
  1279  type TCPTimeWaitTimeoutOption time.Duration
  1280  
  1281  func (*TCPTimeWaitTimeoutOption) isGettableSocketOption() {}
  1282  
  1283  func (*TCPTimeWaitTimeoutOption) isSettableSocketOption() {}
  1284  
  1285  func (*TCPTimeWaitTimeoutOption) isGettableTransportProtocolOption() {}
  1286  
  1287  func (*TCPTimeWaitTimeoutOption) isSettableTransportProtocolOption() {}
  1288  
  1289  // TCPDeferAcceptOption is used by SetSockOpt/GetSockOpt to allow a
  1290  // accept to return a completed connection only when there is data to be
  1291  // read. This usually means the listening socket will drop the final ACK
  1292  // for a handshake till the specified timeout until a segment with data arrives.
  1293  type TCPDeferAcceptOption time.Duration
  1294  
  1295  func (*TCPDeferAcceptOption) isGettableSocketOption() {}
  1296  
  1297  func (*TCPDeferAcceptOption) isSettableSocketOption() {}
  1298  
  1299  // TCPMinRTOOption is use by SetSockOpt/GetSockOpt to allow overriding
  1300  // default MinRTO used by the Stack.
  1301  type TCPMinRTOOption time.Duration
  1302  
  1303  func (*TCPMinRTOOption) isGettableSocketOption() {}
  1304  
  1305  func (*TCPMinRTOOption) isSettableSocketOption() {}
  1306  
  1307  func (*TCPMinRTOOption) isGettableTransportProtocolOption() {}
  1308  
  1309  func (*TCPMinRTOOption) isSettableTransportProtocolOption() {}
  1310  
  1311  // TCPMaxRTOOption is use by SetSockOpt/GetSockOpt to allow overriding
  1312  // default MaxRTO used by the Stack.
  1313  type TCPMaxRTOOption time.Duration
  1314  
  1315  func (*TCPMaxRTOOption) isGettableSocketOption() {}
  1316  
  1317  func (*TCPMaxRTOOption) isSettableSocketOption() {}
  1318  
  1319  func (*TCPMaxRTOOption) isGettableTransportProtocolOption() {}
  1320  
  1321  func (*TCPMaxRTOOption) isSettableTransportProtocolOption() {}
  1322  
  1323  // TCPMaxRetriesOption is used by SetSockOpt/GetSockOpt to set/get the
  1324  // maximum number of retransmits after which we time out the connection.
  1325  type TCPMaxRetriesOption uint64
  1326  
  1327  func (*TCPMaxRetriesOption) isGettableSocketOption() {}
  1328  
  1329  func (*TCPMaxRetriesOption) isSettableSocketOption() {}
  1330  
  1331  func (*TCPMaxRetriesOption) isGettableTransportProtocolOption() {}
  1332  
  1333  func (*TCPMaxRetriesOption) isSettableTransportProtocolOption() {}
  1334  
  1335  // TCPSynRetriesOption is used by SetSockOpt/GetSockOpt to specify stack-wide
  1336  // default for number of times SYN is retransmitted before aborting a connect.
  1337  type TCPSynRetriesOption uint8
  1338  
  1339  func (*TCPSynRetriesOption) isGettableSocketOption() {}
  1340  
  1341  func (*TCPSynRetriesOption) isSettableSocketOption() {}
  1342  
  1343  func (*TCPSynRetriesOption) isGettableTransportProtocolOption() {}
  1344  
  1345  func (*TCPSynRetriesOption) isSettableTransportProtocolOption() {}
  1346  
  1347  // MulticastInterfaceOption is used by SetSockOpt/GetSockOpt to specify a
  1348  // default interface for multicast.
  1349  type MulticastInterfaceOption struct {
  1350  	NIC           NICID
  1351  	InterfaceAddr Address
  1352  }
  1353  
  1354  func (*MulticastInterfaceOption) isGettableSocketOption() {}
  1355  
  1356  func (*MulticastInterfaceOption) isSettableSocketOption() {}
  1357  
  1358  // MembershipOption is used to identify a multicast membership on an interface.
  1359  type MembershipOption struct {
  1360  	NIC           NICID
  1361  	InterfaceAddr Address
  1362  	MulticastAddr Address
  1363  }
  1364  
  1365  // AddMembershipOption identifies a multicast group to join on some interface.
  1366  type AddMembershipOption MembershipOption
  1367  
  1368  func (*AddMembershipOption) isSettableSocketOption() {}
  1369  
  1370  // RemoveMembershipOption identifies a multicast group to leave on some
  1371  // interface.
  1372  type RemoveMembershipOption MembershipOption
  1373  
  1374  func (*RemoveMembershipOption) isSettableSocketOption() {}
  1375  
  1376  // SocketDetachFilterOption is used by SetSockOpt to detach a previously attached
  1377  // classic BPF filter on a given endpoint.
  1378  type SocketDetachFilterOption int
  1379  
  1380  func (*SocketDetachFilterOption) isSettableSocketOption() {}
  1381  
  1382  // OriginalDestinationOption is used to get the original destination address
  1383  // and port of a redirected packet.
  1384  type OriginalDestinationOption FullAddress
  1385  
  1386  func (*OriginalDestinationOption) isGettableSocketOption() {}
  1387  
  1388  // TCPTimeWaitReuseOption is used stack.(*Stack).TransportProtocolOption to
  1389  // specify if the stack can reuse the port bound by an endpoint in TIME-WAIT for
  1390  // new connections when it is safe from protocol viewpoint.
  1391  type TCPTimeWaitReuseOption uint8
  1392  
  1393  func (*TCPTimeWaitReuseOption) isGettableSocketOption() {}
  1394  
  1395  func (*TCPTimeWaitReuseOption) isSettableSocketOption() {}
  1396  
  1397  func (*TCPTimeWaitReuseOption) isGettableTransportProtocolOption() {}
  1398  
  1399  func (*TCPTimeWaitReuseOption) isSettableTransportProtocolOption() {}
  1400  
  1401  const (
  1402  	// TCPTimeWaitReuseDisabled indicates reuse of port bound by endpoints in TIME-WAIT cannot
  1403  	// be reused for new connections.
  1404  	TCPTimeWaitReuseDisabled TCPTimeWaitReuseOption = iota
  1405  
  1406  	// TCPTimeWaitReuseGlobal indicates reuse of port bound by endpoints in TIME-WAIT can
  1407  	// be reused for new connections irrespective of the src/dest addresses.
  1408  	TCPTimeWaitReuseGlobal
  1409  
  1410  	// TCPTimeWaitReuseLoopbackOnly indicates reuse of port bound by endpoint in TIME-WAIT can
  1411  	// only be reused if the connection was a connection over loopback. i.e src/dest addresses
  1412  	// are loopback addresses.
  1413  	TCPTimeWaitReuseLoopbackOnly
  1414  )
  1415  
  1416  // LingerOption is used by SetSockOpt/GetSockOpt to set/get the
  1417  // duration for which a socket lingers before returning from Close.
  1418  //
  1419  // +marshal
  1420  // +stateify savable
  1421  type LingerOption struct {
  1422  	Enabled bool
  1423  	Timeout time.Duration
  1424  }
  1425  
  1426  // IPPacketInfo is the message structure for IP_PKTINFO.
  1427  //
  1428  // +stateify savable
  1429  type IPPacketInfo struct {
  1430  	// NIC is the ID of the NIC to be used.
  1431  	NIC NICID
  1432  
  1433  	// LocalAddr is the local address.
  1434  	LocalAddr Address
  1435  
  1436  	// DestinationAddr is the destination address found in the IP header.
  1437  	DestinationAddr Address
  1438  }
  1439  
  1440  // IPv6PacketInfo is the message structure for IPV6_PKTINFO.
  1441  //
  1442  // +stateify savable
  1443  type IPv6PacketInfo struct {
  1444  	Addr Address
  1445  	NIC  NICID
  1446  }
  1447  
  1448  // SendBufferSizeOption is used by stack.(Stack*).Option/SetOption to
  1449  // get/set the default, min and max send buffer sizes.
  1450  type SendBufferSizeOption struct {
  1451  	// Min is the minimum size for send buffer.
  1452  	Min int
  1453  
  1454  	// Default is the default size for send buffer.
  1455  	Default int
  1456  
  1457  	// Max is the maximum size for send buffer.
  1458  	Max int
  1459  }
  1460  
  1461  // ReceiveBufferSizeOption is used by stack.(Stack*).Option/SetOption to
  1462  // get/set the default, min and max receive buffer sizes.
  1463  type ReceiveBufferSizeOption struct {
  1464  	// Min is the minimum size for send buffer.
  1465  	Min int
  1466  
  1467  	// Default is the default size for send buffer.
  1468  	Default int
  1469  
  1470  	// Max is the maximum size for send buffer.
  1471  	Max int
  1472  }
  1473  
  1474  // GetSendBufferLimits is used to get the send buffer size limits.
  1475  type GetSendBufferLimits func(StackHandler) SendBufferSizeOption
  1476  
  1477  // GetStackSendBufferLimits is used to get default, min and max send buffer size.
  1478  func GetStackSendBufferLimits(so StackHandler) SendBufferSizeOption {
  1479  	var ss SendBufferSizeOption
  1480  	if err := so.Option(&ss); err != nil {
  1481  		panic(fmt.Sprintf("s.Option(%#v) = %s", ss, err))
  1482  	}
  1483  	return ss
  1484  }
  1485  
  1486  // GetReceiveBufferLimits is used to get the send buffer size limits.
  1487  type GetReceiveBufferLimits func(StackHandler) ReceiveBufferSizeOption
  1488  
  1489  // GetStackReceiveBufferLimits is used to get default, min and max send buffer size.
  1490  func GetStackReceiveBufferLimits(so StackHandler) ReceiveBufferSizeOption {
  1491  	var ss ReceiveBufferSizeOption
  1492  	if err := so.Option(&ss); err != nil {
  1493  		panic(fmt.Sprintf("s.Option(%#v) = %s", ss, err))
  1494  	}
  1495  	return ss
  1496  }
  1497  
  1498  // Route is a row in the routing table. It specifies through which NIC (and
  1499  // gateway) sets of packets should be routed. A row is considered viable if the
  1500  // masked target address matches the destination address in the row.
  1501  type Route struct {
  1502  	// Destination must contain the target address for this row to be viable.
  1503  	Destination Subnet
  1504  
  1505  	// Gateway is the gateway to be used if this row is viable.
  1506  	Gateway Address
  1507  
  1508  	// NIC is the id of the nic to be used if this row is viable.
  1509  	NIC NICID
  1510  
  1511  	// SourceHint indicates a preferred source address to use when NICs
  1512  	// have multiple addresses.
  1513  	SourceHint Address
  1514  }
  1515  
  1516  // String implements the fmt.Stringer interface.
  1517  func (r Route) String() string {
  1518  	var out strings.Builder
  1519  	_, _ = fmt.Fprintf(&out, "%s", r.Destination)
  1520  	if r.Gateway.length > 0 {
  1521  		_, _ = fmt.Fprintf(&out, " via %s", r.Gateway)
  1522  	}
  1523  	_, _ = fmt.Fprintf(&out, " nic %d", r.NIC)
  1524  	return out.String()
  1525  }
  1526  
  1527  // Equal returns true if the given Route is equal to this Route.
  1528  func (r Route) Equal(to Route) bool {
  1529  	// NOTE: This relies on the fact that r.Destination == to.Destination
  1530  	return r.Destination.Equal(to.Destination) && r.Gateway == to.Gateway && r.NIC == to.NIC
  1531  }
  1532  
  1533  // TransportProtocolNumber is the number of a transport protocol.
  1534  type TransportProtocolNumber uint32
  1535  
  1536  // NetworkProtocolNumber is the EtherType of a network protocol in an Ethernet
  1537  // frame.
  1538  //
  1539  // See: https://www.iana.org/assignments/ieee-802-numbers/ieee-802-numbers.xhtml
  1540  type NetworkProtocolNumber uint32
  1541  
  1542  // A StatCounter keeps track of a statistic.
  1543  //
  1544  // +stateify savable
  1545  type StatCounter struct {
  1546  	count atomicbitops.Uint64
  1547  }
  1548  
  1549  // Increment adds one to the counter.
  1550  func (s *StatCounter) Increment() {
  1551  	s.IncrementBy(1)
  1552  }
  1553  
  1554  // Decrement minuses one to the counter.
  1555  func (s *StatCounter) Decrement() {
  1556  	s.IncrementBy(^uint64(0))
  1557  }
  1558  
  1559  // Value returns the current value of the counter.
  1560  func (s *StatCounter) Value() uint64 {
  1561  	return s.count.Load()
  1562  }
  1563  
  1564  // IncrementBy increments the counter by v.
  1565  func (s *StatCounter) IncrementBy(v uint64) {
  1566  	s.count.Add(v)
  1567  }
  1568  
  1569  func (s *StatCounter) String() string {
  1570  	return strconv.FormatUint(s.Value(), 10)
  1571  }
  1572  
  1573  // A MultiCounterStat keeps track of two counters at once.
  1574  type MultiCounterStat struct {
  1575  	a *StatCounter
  1576  	b *StatCounter
  1577  }
  1578  
  1579  // Init sets both internal counters to point to a and b.
  1580  func (m *MultiCounterStat) Init(a, b *StatCounter) {
  1581  	m.a = a
  1582  	m.b = b
  1583  }
  1584  
  1585  // Increment adds one to the counters.
  1586  func (m *MultiCounterStat) Increment() {
  1587  	m.a.Increment()
  1588  	m.b.Increment()
  1589  }
  1590  
  1591  // IncrementBy increments the counters by v.
  1592  func (m *MultiCounterStat) IncrementBy(v uint64) {
  1593  	m.a.IncrementBy(v)
  1594  	m.b.IncrementBy(v)
  1595  }
  1596  
  1597  // ICMPv4PacketStats enumerates counts for all ICMPv4 packet types.
  1598  type ICMPv4PacketStats struct {
  1599  	// LINT.IfChange(ICMPv4PacketStats)
  1600  
  1601  	// EchoRequest is the number of ICMPv4 echo packets counted.
  1602  	EchoRequest *StatCounter
  1603  
  1604  	// EchoReply is the number of ICMPv4 echo reply packets counted.
  1605  	EchoReply *StatCounter
  1606  
  1607  	// DstUnreachable is the number of ICMPv4 destination unreachable packets
  1608  	// counted.
  1609  	DstUnreachable *StatCounter
  1610  
  1611  	// SrcQuench is the number of ICMPv4 source quench packets counted.
  1612  	SrcQuench *StatCounter
  1613  
  1614  	// Redirect is the number of ICMPv4 redirect packets counted.
  1615  	Redirect *StatCounter
  1616  
  1617  	// TimeExceeded is the number of ICMPv4 time exceeded packets counted.
  1618  	TimeExceeded *StatCounter
  1619  
  1620  	// ParamProblem is the number of ICMPv4 parameter problem packets counted.
  1621  	ParamProblem *StatCounter
  1622  
  1623  	// Timestamp is the number of ICMPv4 timestamp packets counted.
  1624  	Timestamp *StatCounter
  1625  
  1626  	// TimestampReply is the number of ICMPv4 timestamp reply packets counted.
  1627  	TimestampReply *StatCounter
  1628  
  1629  	// InfoRequest is the number of ICMPv4 information request packets counted.
  1630  	InfoRequest *StatCounter
  1631  
  1632  	// InfoReply is the number of ICMPv4 information reply packets counted.
  1633  	InfoReply *StatCounter
  1634  
  1635  	// LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4PacketStats)
  1636  }
  1637  
  1638  // ICMPv4SentPacketStats collects outbound ICMPv4-specific stats.
  1639  type ICMPv4SentPacketStats struct {
  1640  	// LINT.IfChange(ICMPv4SentPacketStats)
  1641  
  1642  	ICMPv4PacketStats
  1643  
  1644  	// Dropped is the number of ICMPv4 packets dropped due to link layer errors.
  1645  	Dropped *StatCounter
  1646  
  1647  	// RateLimited is the number of ICMPv4 packets dropped due to rate limit being
  1648  	// exceeded.
  1649  	RateLimited *StatCounter
  1650  
  1651  	// LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4SentPacketStats)
  1652  }
  1653  
  1654  // ICMPv4ReceivedPacketStats collects inbound ICMPv4-specific stats.
  1655  type ICMPv4ReceivedPacketStats struct {
  1656  	// LINT.IfChange(ICMPv4ReceivedPacketStats)
  1657  
  1658  	ICMPv4PacketStats
  1659  
  1660  	// Invalid is the number of invalid ICMPv4 packets received.
  1661  	Invalid *StatCounter
  1662  
  1663  	// LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4ReceivedPacketStats)
  1664  }
  1665  
  1666  // ICMPv4Stats collects ICMPv4-specific stats.
  1667  type ICMPv4Stats struct {
  1668  	// LINT.IfChange(ICMPv4Stats)
  1669  
  1670  	// PacketsSent contains statistics about sent packets.
  1671  	PacketsSent ICMPv4SentPacketStats
  1672  
  1673  	// PacketsReceived contains statistics about received packets.
  1674  	PacketsReceived ICMPv4ReceivedPacketStats
  1675  
  1676  	// LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4Stats)
  1677  }
  1678  
  1679  // ICMPv6PacketStats enumerates counts for all ICMPv6 packet types.
  1680  type ICMPv6PacketStats struct {
  1681  	// LINT.IfChange(ICMPv6PacketStats)
  1682  
  1683  	// EchoRequest is the number of ICMPv6 echo request packets counted.
  1684  	EchoRequest *StatCounter
  1685  
  1686  	// EchoReply is the number of ICMPv6 echo reply packets counted.
  1687  	EchoReply *StatCounter
  1688  
  1689  	// DstUnreachable is the number of ICMPv6 destination unreachable packets
  1690  	// counted.
  1691  	DstUnreachable *StatCounter
  1692  
  1693  	// PacketTooBig is the number of ICMPv6 packet too big packets counted.
  1694  	PacketTooBig *StatCounter
  1695  
  1696  	// TimeExceeded is the number of ICMPv6 time exceeded packets counted.
  1697  	TimeExceeded *StatCounter
  1698  
  1699  	// ParamProblem is the number of ICMPv6 parameter problem packets counted.
  1700  	ParamProblem *StatCounter
  1701  
  1702  	// RouterSolicit is the number of ICMPv6 router solicit packets counted.
  1703  	RouterSolicit *StatCounter
  1704  
  1705  	// RouterAdvert is the number of ICMPv6 router advert packets counted.
  1706  	RouterAdvert *StatCounter
  1707  
  1708  	// NeighborSolicit is the number of ICMPv6 neighbor solicit packets counted.
  1709  	NeighborSolicit *StatCounter
  1710  
  1711  	// NeighborAdvert is the number of ICMPv6 neighbor advert packets counted.
  1712  	NeighborAdvert *StatCounter
  1713  
  1714  	// RedirectMsg is the number of ICMPv6 redirect message packets counted.
  1715  	RedirectMsg *StatCounter
  1716  
  1717  	// MulticastListenerQuery is the number of Multicast Listener Query messages
  1718  	// counted.
  1719  	MulticastListenerQuery *StatCounter
  1720  
  1721  	// MulticastListenerReport is the number of Multicast Listener Report messages
  1722  	// counted.
  1723  	MulticastListenerReport *StatCounter
  1724  
  1725  	// MulticastListenerReportV2 is the number of Multicast Listener Report
  1726  	// messages counted.
  1727  	MulticastListenerReportV2 *StatCounter
  1728  
  1729  	// MulticastListenerDone is the number of Multicast Listener Done messages
  1730  	// counted.
  1731  	MulticastListenerDone *StatCounter
  1732  
  1733  	// LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6PacketStats)
  1734  }
  1735  
  1736  // ICMPv6SentPacketStats collects outbound ICMPv6-specific stats.
  1737  type ICMPv6SentPacketStats struct {
  1738  	// LINT.IfChange(ICMPv6SentPacketStats)
  1739  
  1740  	ICMPv6PacketStats
  1741  
  1742  	// Dropped is the number of ICMPv6 packets dropped due to link layer errors.
  1743  	Dropped *StatCounter
  1744  
  1745  	// RateLimited is the number of ICMPv6 packets dropped due to rate limit being
  1746  	// exceeded.
  1747  	RateLimited *StatCounter
  1748  
  1749  	// LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6SentPacketStats)
  1750  }
  1751  
  1752  // ICMPv6ReceivedPacketStats collects inbound ICMPv6-specific stats.
  1753  type ICMPv6ReceivedPacketStats struct {
  1754  	// LINT.IfChange(ICMPv6ReceivedPacketStats)
  1755  
  1756  	ICMPv6PacketStats
  1757  
  1758  	// Unrecognized is the number of ICMPv6 packets received that the transport
  1759  	// layer does not know how to parse.
  1760  	Unrecognized *StatCounter
  1761  
  1762  	// Invalid is the number of invalid ICMPv6 packets received.
  1763  	Invalid *StatCounter
  1764  
  1765  	// RouterOnlyPacketsDroppedByHost is the number of ICMPv6 packets dropped due
  1766  	// to being router-specific packets.
  1767  	RouterOnlyPacketsDroppedByHost *StatCounter
  1768  
  1769  	// LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6ReceivedPacketStats)
  1770  }
  1771  
  1772  // ICMPv6Stats collects ICMPv6-specific stats.
  1773  type ICMPv6Stats struct {
  1774  	// LINT.IfChange(ICMPv6Stats)
  1775  
  1776  	// PacketsSent contains statistics about sent packets.
  1777  	PacketsSent ICMPv6SentPacketStats
  1778  
  1779  	// PacketsReceived contains statistics about received packets.
  1780  	PacketsReceived ICMPv6ReceivedPacketStats
  1781  
  1782  	// LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6Stats)
  1783  }
  1784  
  1785  // ICMPStats collects ICMP-specific stats (both v4 and v6).
  1786  type ICMPStats struct {
  1787  	// V4 contains the ICMPv4-specifics stats.
  1788  	V4 ICMPv4Stats
  1789  
  1790  	// V6 contains the ICMPv4-specifics stats.
  1791  	V6 ICMPv6Stats
  1792  }
  1793  
  1794  // IGMPPacketStats enumerates counts for all IGMP packet types.
  1795  type IGMPPacketStats struct {
  1796  	// LINT.IfChange(IGMPPacketStats)
  1797  
  1798  	// MembershipQuery is the number of Membership Query messages counted.
  1799  	MembershipQuery *StatCounter
  1800  
  1801  	// V1MembershipReport is the number of Version 1 Membership Report messages
  1802  	// counted.
  1803  	V1MembershipReport *StatCounter
  1804  
  1805  	// V2MembershipReport is the number of Version 2 Membership Report messages
  1806  	// counted.
  1807  	V2MembershipReport *StatCounter
  1808  
  1809  	// V3MembershipReport is the number of Version 3 Membership Report messages
  1810  	// counted.
  1811  	V3MembershipReport *StatCounter
  1812  
  1813  	// LeaveGroup is the number of Leave Group messages counted.
  1814  	LeaveGroup *StatCounter
  1815  
  1816  	// LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPPacketStats)
  1817  }
  1818  
  1819  // IGMPSentPacketStats collects outbound IGMP-specific stats.
  1820  type IGMPSentPacketStats struct {
  1821  	// LINT.IfChange(IGMPSentPacketStats)
  1822  
  1823  	IGMPPacketStats
  1824  
  1825  	// Dropped is the number of IGMP packets dropped.
  1826  	Dropped *StatCounter
  1827  
  1828  	// LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPSentPacketStats)
  1829  }
  1830  
  1831  // IGMPReceivedPacketStats collects inbound IGMP-specific stats.
  1832  type IGMPReceivedPacketStats struct {
  1833  	// LINT.IfChange(IGMPReceivedPacketStats)
  1834  
  1835  	IGMPPacketStats
  1836  
  1837  	// Invalid is the number of invalid IGMP packets received.
  1838  	Invalid *StatCounter
  1839  
  1840  	// ChecksumErrors is the number of IGMP packets dropped due to bad checksums.
  1841  	ChecksumErrors *StatCounter
  1842  
  1843  	// Unrecognized is the number of unrecognized messages counted, these are
  1844  	// silently ignored for forward-compatibilty.
  1845  	Unrecognized *StatCounter
  1846  
  1847  	// LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPReceivedPacketStats)
  1848  }
  1849  
  1850  // IGMPStats collects IGMP-specific stats.
  1851  type IGMPStats struct {
  1852  	// LINT.IfChange(IGMPStats)
  1853  
  1854  	// PacketsSent contains statistics about sent packets.
  1855  	PacketsSent IGMPSentPacketStats
  1856  
  1857  	// PacketsReceived contains statistics about received packets.
  1858  	PacketsReceived IGMPReceivedPacketStats
  1859  
  1860  	// LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPStats)
  1861  }
  1862  
  1863  // IPForwardingStats collects stats related to IP forwarding (both v4 and v6).
  1864  type IPForwardingStats struct {
  1865  	// LINT.IfChange(IPForwardingStats)
  1866  
  1867  	// Unrouteable is the number of IP packets received which were dropped
  1868  	// because a route to their destination could not be constructed.
  1869  	Unrouteable *StatCounter
  1870  
  1871  	// ExhaustedTTL is the number of IP packets received which were dropped
  1872  	// because their TTL was exhausted.
  1873  	ExhaustedTTL *StatCounter
  1874  
  1875  	// InitializingSource is the number of IP packets which were dropped
  1876  	// because they contained a source address that may only be used on the local
  1877  	// network as part of initialization work.
  1878  	InitializingSource *StatCounter
  1879  
  1880  	// LinkLocalSource is the number of IP packets which were dropped
  1881  	// because they contained a link-local source address.
  1882  	LinkLocalSource *StatCounter
  1883  
  1884  	// LinkLocalDestination is the number of IP packets which were dropped
  1885  	// because they contained a link-local destination address.
  1886  	LinkLocalDestination *StatCounter
  1887  
  1888  	// PacketTooBig is the number of IP packets which were dropped because they
  1889  	// were too big for the outgoing MTU.
  1890  	PacketTooBig *StatCounter
  1891  
  1892  	// HostUnreachable is the number of IP packets received which could not be
  1893  	// successfully forwarded due to an unresolvable next hop.
  1894  	HostUnreachable *StatCounter
  1895  
  1896  	// ExtensionHeaderProblem is the number of IP packets which were dropped
  1897  	// because of a problem encountered when processing an IPv6 extension
  1898  	// header.
  1899  	ExtensionHeaderProblem *StatCounter
  1900  
  1901  	// UnexpectedMulticastInputInterface is the number of multicast packets that
  1902  	// were received on an interface that did not match the corresponding route's
  1903  	// expected input interface.
  1904  	UnexpectedMulticastInputInterface *StatCounter
  1905  
  1906  	// UnknownOutputEndpoint is the number of packets that could not be forwarded
  1907  	// because the output endpoint could not be found.
  1908  	UnknownOutputEndpoint *StatCounter
  1909  
  1910  	// NoMulticastPendingQueueBufferSpace is the number of multicast packets that
  1911  	// were dropped due to insufficient buffer space in the pending packet queue.
  1912  	NoMulticastPendingQueueBufferSpace *StatCounter
  1913  
  1914  	// OutgoingDeviceNoBufferSpace is the number of packets that were dropped due
  1915  	// to insufficient space in the outgoing device.
  1916  	OutgoingDeviceNoBufferSpace *StatCounter
  1917  
  1918  	// Errors is the number of IP packets received which could not be
  1919  	// successfully forwarded.
  1920  	Errors *StatCounter
  1921  
  1922  	// LINT.ThenChange(network/internal/ip/stats.go:MultiCounterIPForwardingStats)
  1923  }
  1924  
  1925  // IPStats collects IP-specific stats (both v4 and v6).
  1926  type IPStats struct {
  1927  	// LINT.IfChange(IPStats)
  1928  
  1929  	// PacketsReceived is the number of IP packets received from the link layer.
  1930  	PacketsReceived *StatCounter
  1931  
  1932  	// ValidPacketsReceived is the number of valid IP packets that reached the IP
  1933  	// layer.
  1934  	ValidPacketsReceived *StatCounter
  1935  
  1936  	// DisabledPacketsReceived is the number of IP packets received from the link
  1937  	// layer when the IP layer is disabled.
  1938  	DisabledPacketsReceived *StatCounter
  1939  
  1940  	// InvalidDestinationAddressesReceived is the number of IP packets received
  1941  	// with an unknown or invalid destination address.
  1942  	InvalidDestinationAddressesReceived *StatCounter
  1943  
  1944  	// InvalidSourceAddressesReceived is the number of IP packets received with a
  1945  	// source address that should never have been received on the wire.
  1946  	InvalidSourceAddressesReceived *StatCounter
  1947  
  1948  	// PacketsDelivered is the number of incoming IP packets that are successfully
  1949  	// delivered to the transport layer.
  1950  	PacketsDelivered *StatCounter
  1951  
  1952  	// PacketsSent is the number of IP packets sent via WritePacket.
  1953  	PacketsSent *StatCounter
  1954  
  1955  	// OutgoingPacketErrors is the number of IP packets which failed to write to a
  1956  	// link-layer endpoint.
  1957  	OutgoingPacketErrors *StatCounter
  1958  
  1959  	// MalformedPacketsReceived is the number of IP Packets that were dropped due
  1960  	// to the IP packet header failing validation checks.
  1961  	MalformedPacketsReceived *StatCounter
  1962  
  1963  	// MalformedFragmentsReceived is the number of IP Fragments that were dropped
  1964  	// due to the fragment failing validation checks.
  1965  	MalformedFragmentsReceived *StatCounter
  1966  
  1967  	// IPTablesPreroutingDropped is the number of IP packets dropped in the
  1968  	// Prerouting chain.
  1969  	IPTablesPreroutingDropped *StatCounter
  1970  
  1971  	// IPTablesInputDropped is the number of IP packets dropped in the Input
  1972  	// chain.
  1973  	IPTablesInputDropped *StatCounter
  1974  
  1975  	// IPTablesForwardDropped is the number of IP packets dropped in the Forward
  1976  	// chain.
  1977  	IPTablesForwardDropped *StatCounter
  1978  
  1979  	// IPTablesOutputDropped is the number of IP packets dropped in the Output
  1980  	// chain.
  1981  	IPTablesOutputDropped *StatCounter
  1982  
  1983  	// IPTablesPostroutingDropped is the number of IP packets dropped in the
  1984  	// Postrouting chain.
  1985  	IPTablesPostroutingDropped *StatCounter
  1986  
  1987  	// TODO(https://gvisor.dev/issues/5529): Move the IPv4-only option stats out
  1988  	// of IPStats.
  1989  	// OptionTimestampReceived is the number of Timestamp options seen.
  1990  	OptionTimestampReceived *StatCounter
  1991  
  1992  	// OptionRecordRouteReceived is the number of Record Route options seen.
  1993  	OptionRecordRouteReceived *StatCounter
  1994  
  1995  	// OptionRouterAlertReceived is the number of Router Alert options seen.
  1996  	OptionRouterAlertReceived *StatCounter
  1997  
  1998  	// OptionUnknownReceived is the number of unknown IP options seen.
  1999  	OptionUnknownReceived *StatCounter
  2000  
  2001  	// Forwarding collects stats related to IP forwarding.
  2002  	Forwarding IPForwardingStats
  2003  
  2004  	// LINT.ThenChange(network/internal/ip/stats.go:MultiCounterIPStats)
  2005  }
  2006  
  2007  // ARPStats collects ARP-specific stats.
  2008  type ARPStats struct {
  2009  	// LINT.IfChange(ARPStats)
  2010  
  2011  	// PacketsReceived is the number of ARP packets received from the link layer.
  2012  	PacketsReceived *StatCounter
  2013  
  2014  	// DisabledPacketsReceived is the number of ARP packets received from the link
  2015  	// layer when the ARP layer is disabled.
  2016  	DisabledPacketsReceived *StatCounter
  2017  
  2018  	// MalformedPacketsReceived is the number of ARP packets that were dropped due
  2019  	// to being malformed.
  2020  	MalformedPacketsReceived *StatCounter
  2021  
  2022  	// RequestsReceived is the number of ARP requests received.
  2023  	RequestsReceived *StatCounter
  2024  
  2025  	// RequestsReceivedUnknownTargetAddress is the number of ARP requests that
  2026  	// were targeted to an interface different from the one it was received on.
  2027  	RequestsReceivedUnknownTargetAddress *StatCounter
  2028  
  2029  	// OutgoingRequestInterfaceHasNoLocalAddressErrors is the number of failures
  2030  	// to send an ARP request because the interface has no network address
  2031  	// assigned to it.
  2032  	OutgoingRequestInterfaceHasNoLocalAddressErrors *StatCounter
  2033  
  2034  	// OutgoingRequestBadLocalAddressErrors is the number of failures to send an
  2035  	// ARP request with a bad local address.
  2036  	OutgoingRequestBadLocalAddressErrors *StatCounter
  2037  
  2038  	// OutgoingRequestsDropped is the number of ARP requests which failed to write
  2039  	// to a link-layer endpoint.
  2040  	OutgoingRequestsDropped *StatCounter
  2041  
  2042  	// OutgoingRequestSent is the number of ARP requests successfully written to a
  2043  	// link-layer endpoint.
  2044  	OutgoingRequestsSent *StatCounter
  2045  
  2046  	// RepliesReceived is the number of ARP replies received.
  2047  	RepliesReceived *StatCounter
  2048  
  2049  	// OutgoingRepliesDropped is the number of ARP replies which failed to write
  2050  	// to a link-layer endpoint.
  2051  	OutgoingRepliesDropped *StatCounter
  2052  
  2053  	// OutgoingRepliesSent is the number of ARP replies successfully written to a
  2054  	// link-layer endpoint.
  2055  	OutgoingRepliesSent *StatCounter
  2056  
  2057  	// LINT.ThenChange(network/arp/stats.go:multiCounterARPStats)
  2058  }
  2059  
  2060  // TCPStats collects TCP-specific stats.
  2061  type TCPStats struct {
  2062  	// ActiveConnectionOpenings is the number of connections opened
  2063  	// successfully via Connect.
  2064  	ActiveConnectionOpenings *StatCounter
  2065  
  2066  	// PassiveConnectionOpenings is the number of connections opened
  2067  	// successfully via Listen.
  2068  	PassiveConnectionOpenings *StatCounter
  2069  
  2070  	// CurrentEstablished is the number of TCP connections for which the
  2071  	// current state is ESTABLISHED.
  2072  	CurrentEstablished *StatCounter
  2073  
  2074  	// CurrentConnected is the number of TCP connections that
  2075  	// are in connected state.
  2076  	CurrentConnected *StatCounter
  2077  
  2078  	// EstablishedResets is the number of times TCP connections have made
  2079  	// a direct transition to the CLOSED state from either the
  2080  	// ESTABLISHED state or the CLOSE-WAIT state.
  2081  	EstablishedResets *StatCounter
  2082  
  2083  	// EstablishedClosed is the number of times established TCP connections
  2084  	// made a transition to CLOSED state.
  2085  	EstablishedClosed *StatCounter
  2086  
  2087  	// EstablishedTimedout is the number of times an established connection
  2088  	// was reset because of keep-alive time out.
  2089  	EstablishedTimedout *StatCounter
  2090  
  2091  	// ListenOverflowSynDrop is the number of times the listen queue overflowed
  2092  	// and a SYN was dropped.
  2093  	ListenOverflowSynDrop *StatCounter
  2094  
  2095  	// ListenOverflowAckDrop is the number of times the final ACK
  2096  	// in the handshake was dropped due to overflow.
  2097  	ListenOverflowAckDrop *StatCounter
  2098  
  2099  	// ListenOverflowCookieSent is the number of times a SYN cookie was sent.
  2100  	ListenOverflowSynCookieSent *StatCounter
  2101  
  2102  	// ListenOverflowSynCookieRcvd is the number of times a valid SYN
  2103  	// cookie was received.
  2104  	ListenOverflowSynCookieRcvd *StatCounter
  2105  
  2106  	// ListenOverflowInvalidSynCookieRcvd is the number of times an invalid SYN cookie
  2107  	// was received.
  2108  	ListenOverflowInvalidSynCookieRcvd *StatCounter
  2109  
  2110  	// FailedConnectionAttempts is the number of calls to Connect or Listen
  2111  	// (active and passive openings, respectively) that end in an error.
  2112  	FailedConnectionAttempts *StatCounter
  2113  
  2114  	// ValidSegmentsReceived is the number of TCP segments received that
  2115  	// the transport layer successfully parsed.
  2116  	ValidSegmentsReceived *StatCounter
  2117  
  2118  	// InvalidSegmentsReceived is the number of TCP segments received that
  2119  	// the transport layer could not parse.
  2120  	InvalidSegmentsReceived *StatCounter
  2121  
  2122  	// SegmentsSent is the number of TCP segments sent.
  2123  	SegmentsSent *StatCounter
  2124  
  2125  	// SegmentSendErrors is the number of TCP segments failed to be sent.
  2126  	SegmentSendErrors *StatCounter
  2127  
  2128  	// ResetsSent is the number of TCP resets sent.
  2129  	ResetsSent *StatCounter
  2130  
  2131  	// ResetsReceived is the number of TCP resets received.
  2132  	ResetsReceived *StatCounter
  2133  
  2134  	// Retransmits is the number of TCP segments retransmitted.
  2135  	Retransmits *StatCounter
  2136  
  2137  	// FastRecovery is the number of times Fast Recovery was used to
  2138  	// recover from packet loss.
  2139  	FastRecovery *StatCounter
  2140  
  2141  	// SACKRecovery is the number of times SACK Recovery was used to
  2142  	// recover from packet loss.
  2143  	SACKRecovery *StatCounter
  2144  
  2145  	// TLPRecovery is the number of times recovery was accomplished by the tail
  2146  	// loss probe.
  2147  	TLPRecovery *StatCounter
  2148  
  2149  	// SlowStartRetransmits is the number of segments retransmitted in slow
  2150  	// start.
  2151  	SlowStartRetransmits *StatCounter
  2152  
  2153  	// FastRetransmit is the number of segments retransmitted in fast
  2154  	// recovery.
  2155  	FastRetransmit *StatCounter
  2156  
  2157  	// Timeouts is the number of times the RTO expired.
  2158  	Timeouts *StatCounter
  2159  
  2160  	// ChecksumErrors is the number of segments dropped due to bad checksums.
  2161  	ChecksumErrors *StatCounter
  2162  
  2163  	// FailedPortReservations is the number of times TCP failed to reserve
  2164  	// a port.
  2165  	FailedPortReservations *StatCounter
  2166  
  2167  	// SegmentsAckedWithDSACK is the number of segments acknowledged with
  2168  	// DSACK.
  2169  	SegmentsAckedWithDSACK *StatCounter
  2170  
  2171  	// SpuriousRecovery is the number of times the connection entered loss
  2172  	// recovery spuriously.
  2173  	SpuriousRecovery *StatCounter
  2174  
  2175  	// SpuriousRTORecovery is the number of spurious RTOs.
  2176  	SpuriousRTORecovery *StatCounter
  2177  
  2178  	// ForwardMaxInFlightDrop is the number of connection requests that are
  2179  	// dropped due to exceeding the maximum number of in-flight connection
  2180  	// requests.
  2181  	ForwardMaxInFlightDrop *StatCounter
  2182  }
  2183  
  2184  // UDPStats collects UDP-specific stats.
  2185  type UDPStats struct {
  2186  	// PacketsReceived is the number of UDP datagrams received via
  2187  	// HandlePacket.
  2188  	PacketsReceived *StatCounter
  2189  
  2190  	// UnknownPortErrors is the number of incoming UDP datagrams dropped
  2191  	// because they did not have a known destination port.
  2192  	UnknownPortErrors *StatCounter
  2193  
  2194  	// ReceiveBufferErrors is the number of incoming UDP datagrams dropped
  2195  	// due to the receiving buffer being in an invalid state.
  2196  	ReceiveBufferErrors *StatCounter
  2197  
  2198  	// MalformedPacketsReceived is the number of incoming UDP datagrams
  2199  	// dropped due to the UDP header being in a malformed state.
  2200  	MalformedPacketsReceived *StatCounter
  2201  
  2202  	// PacketsSent is the number of UDP datagrams sent via sendUDP.
  2203  	PacketsSent *StatCounter
  2204  
  2205  	// PacketSendErrors is the number of datagrams failed to be sent.
  2206  	PacketSendErrors *StatCounter
  2207  
  2208  	// ChecksumErrors is the number of datagrams dropped due to bad checksums.
  2209  	ChecksumErrors *StatCounter
  2210  }
  2211  
  2212  // NICNeighborStats holds metrics for the neighbor table.
  2213  type NICNeighborStats struct {
  2214  	// LINT.IfChange(NICNeighborStats)
  2215  
  2216  	// UnreachableEntryLookups counts the number of lookups performed on an
  2217  	// entry in Unreachable state.
  2218  	UnreachableEntryLookups *StatCounter
  2219  
  2220  	// DroppedConfirmationForNoninitiatedNeighbor counts the number of neighbor
  2221  	// responses that were dropped because they didn't match an entry in the
  2222  	// cache.
  2223  	DroppedConfirmationForNoninitiatedNeighbor *StatCounter
  2224  
  2225  	// DroppedInvalidLinkAddressConfirmations counts the number of neighbor
  2226  	// responses that were ignored because they had an invalid source link-layer
  2227  	// address.
  2228  	DroppedInvalidLinkAddressConfirmations *StatCounter
  2229  
  2230  	// LINT.ThenChange(stack/nic_stats.go:multiCounterNICNeighborStats)
  2231  }
  2232  
  2233  // NICPacketStats holds basic packet statistics.
  2234  type NICPacketStats struct {
  2235  	// LINT.IfChange(NICPacketStats)
  2236  
  2237  	// Packets is the number of packets counted.
  2238  	Packets *StatCounter
  2239  
  2240  	// Bytes is the number of bytes counted.
  2241  	Bytes *StatCounter
  2242  
  2243  	// LINT.ThenChange(stack/nic_stats.go:multiCounterNICPacketStats)
  2244  }
  2245  
  2246  // IntegralStatCounterMap holds a map associating integral keys with
  2247  // StatCounters.
  2248  type IntegralStatCounterMap struct {
  2249  	mu sync.RWMutex
  2250  	// +checklocks:mu
  2251  	counterMap map[uint64]*StatCounter
  2252  }
  2253  
  2254  // Keys returns all keys present in the map.
  2255  func (m *IntegralStatCounterMap) Keys() []uint64 {
  2256  	m.mu.RLock()
  2257  	defer m.mu.RUnlock()
  2258  	var keys []uint64
  2259  	for k := range m.counterMap {
  2260  		keys = append(keys, k)
  2261  	}
  2262  	return keys
  2263  }
  2264  
  2265  // Get returns the counter mapped by the provided key.
  2266  func (m *IntegralStatCounterMap) Get(key uint64) (*StatCounter, bool) {
  2267  	m.mu.RLock()
  2268  	defer m.mu.RUnlock()
  2269  	counter, ok := m.counterMap[key]
  2270  	return counter, ok
  2271  }
  2272  
  2273  // Init initializes the map.
  2274  func (m *IntegralStatCounterMap) Init() {
  2275  	m.mu.Lock()
  2276  	defer m.mu.Unlock()
  2277  	m.counterMap = make(map[uint64]*StatCounter)
  2278  }
  2279  
  2280  // Increment increments the counter associated with the provided key.
  2281  func (m *IntegralStatCounterMap) Increment(key uint64) {
  2282  	m.mu.RLock()
  2283  	counter, ok := m.counterMap[key]
  2284  	m.mu.RUnlock()
  2285  
  2286  	if !ok {
  2287  		m.mu.Lock()
  2288  		counter, ok = m.counterMap[key]
  2289  		if !ok {
  2290  			counter = new(StatCounter)
  2291  			m.counterMap[key] = counter
  2292  		}
  2293  		m.mu.Unlock()
  2294  	}
  2295  	counter.Increment()
  2296  }
  2297  
  2298  // A MultiIntegralStatCounterMap keeps track of two integral counter maps at
  2299  // once.
  2300  type MultiIntegralStatCounterMap struct {
  2301  	a *IntegralStatCounterMap
  2302  	b *IntegralStatCounterMap
  2303  }
  2304  
  2305  // Init sets the internal integral counter maps to point to a and b.
  2306  func (m *MultiIntegralStatCounterMap) Init(a, b *IntegralStatCounterMap) {
  2307  	m.a = a
  2308  	m.b = b
  2309  }
  2310  
  2311  // Increment increments the counter in each map corresponding to the
  2312  // provided key.
  2313  func (m *MultiIntegralStatCounterMap) Increment(key uint64) {
  2314  	m.a.Increment(key)
  2315  	m.b.Increment(key)
  2316  }
  2317  
  2318  // NICStats holds NIC statistics.
  2319  type NICStats struct {
  2320  	// LINT.IfChange(NICStats)
  2321  
  2322  	// UnknownL3ProtocolRcvdPacketCounts records the number of packets received
  2323  	// for each unknown or unsupported network protocol number.
  2324  	UnknownL3ProtocolRcvdPacketCounts *IntegralStatCounterMap
  2325  
  2326  	// UnknownL4ProtocolRcvdPacketCounts records the number of packets received
  2327  	// for each unknown or unsupported transport protocol number.
  2328  	UnknownL4ProtocolRcvdPacketCounts *IntegralStatCounterMap
  2329  
  2330  	// MalformedL4RcvdPackets is the number of packets received by a NIC that
  2331  	// could not be delivered to a transport endpoint because the L4 header could
  2332  	// not be parsed.
  2333  	MalformedL4RcvdPackets *StatCounter
  2334  
  2335  	// Tx contains statistics about transmitted packets.
  2336  	Tx NICPacketStats
  2337  
  2338  	// TxPacketsDroppedNoBufferSpace is the number of packets dropepd due to the
  2339  	// NIC not having enough buffer space to send the packet.
  2340  	//
  2341  	// Packets may be dropped with a no buffer space error when the device TX
  2342  	// queue is full.
  2343  	TxPacketsDroppedNoBufferSpace *StatCounter
  2344  
  2345  	// Rx contains statistics about received packets.
  2346  	Rx NICPacketStats
  2347  
  2348  	// DisabledRx contains statistics about received packets on disabled NICs.
  2349  	DisabledRx NICPacketStats
  2350  
  2351  	// Neighbor contains statistics about neighbor entries.
  2352  	Neighbor NICNeighborStats
  2353  
  2354  	// LINT.ThenChange(stack/nic_stats.go:multiCounterNICStats)
  2355  }
  2356  
  2357  // FillIn returns a copy of s with nil fields initialized to new StatCounters.
  2358  func (s NICStats) FillIn() NICStats {
  2359  	InitStatCounters(reflect.ValueOf(&s).Elem())
  2360  	return s
  2361  }
  2362  
  2363  // Stats holds statistics about the networking stack.
  2364  type Stats struct {
  2365  	// TODO(https://gvisor.dev/issues/5986): Make the DroppedPackets stat less
  2366  	// ambiguous.
  2367  
  2368  	// DroppedPackets is the number of packets dropped at the transport layer.
  2369  	DroppedPackets *StatCounter
  2370  
  2371  	// NICs is an aggregation of every NIC's statistics. These should not be
  2372  	// incremented using this field, but using the relevant NIC multicounters.
  2373  	NICs NICStats
  2374  
  2375  	// ICMP is an aggregation of every NetworkEndpoint's ICMP statistics (both v4
  2376  	// and v6). These should not be incremented using this field, but using the
  2377  	// relevant NetworkEndpoint ICMP multicounters.
  2378  	ICMP ICMPStats
  2379  
  2380  	// IGMP is an aggregation of every NetworkEndpoint's IGMP statistics. These
  2381  	// should not be incremented using this field, but using the relevant
  2382  	// NetworkEndpoint IGMP multicounters.
  2383  	IGMP IGMPStats
  2384  
  2385  	// IP is an aggregation of every NetworkEndpoint's IP statistics. These should
  2386  	// not be incremented using this field, but using the relevant NetworkEndpoint
  2387  	// IP multicounters.
  2388  	IP IPStats
  2389  
  2390  	// ARP is an aggregation of every NetworkEndpoint's ARP statistics. These
  2391  	// should not be incremented using this field, but using the relevant
  2392  	// NetworkEndpoint ARP multicounters.
  2393  	ARP ARPStats
  2394  
  2395  	// TCP holds TCP-specific stats.
  2396  	TCP TCPStats
  2397  
  2398  	// UDP holds UDP-specific stats.
  2399  	UDP UDPStats
  2400  }
  2401  
  2402  // ReceiveErrors collects packet receive errors within transport endpoint.
  2403  //
  2404  // +stateify savable
  2405  type ReceiveErrors struct {
  2406  	// ReceiveBufferOverflow is the number of received packets dropped
  2407  	// due to the receive buffer being full.
  2408  	ReceiveBufferOverflow StatCounter
  2409  
  2410  	// MalformedPacketsReceived is the number of incoming packets
  2411  	// dropped due to the packet header being in a malformed state.
  2412  	MalformedPacketsReceived StatCounter
  2413  
  2414  	// ClosedReceiver is the number of received packets dropped because
  2415  	// of receiving endpoint state being closed.
  2416  	ClosedReceiver StatCounter
  2417  
  2418  	// ChecksumErrors is the number of packets dropped due to bad checksums.
  2419  	ChecksumErrors StatCounter
  2420  }
  2421  
  2422  // SendErrors collects packet send errors within the transport layer for an
  2423  // endpoint.
  2424  //
  2425  // +stateify savable
  2426  type SendErrors struct {
  2427  	// SendToNetworkFailed is the number of packets failed to be written to
  2428  	// the network endpoint.
  2429  	SendToNetworkFailed StatCounter
  2430  
  2431  	// NoRoute is the number of times we failed to resolve IP route.
  2432  	NoRoute StatCounter
  2433  }
  2434  
  2435  // ReadErrors collects segment read errors from an endpoint read call.
  2436  //
  2437  // +stateify savable
  2438  type ReadErrors struct {
  2439  	// ReadClosed is the number of received packet drops because the endpoint
  2440  	// was shutdown for read.
  2441  	ReadClosed StatCounter
  2442  
  2443  	// InvalidEndpointState is the number of times we found the endpoint state
  2444  	// to be unexpected.
  2445  	InvalidEndpointState StatCounter
  2446  
  2447  	// NotConnected is the number of times we tried to read but found that the
  2448  	// endpoint was not connected.
  2449  	NotConnected StatCounter
  2450  }
  2451  
  2452  // WriteErrors collects packet write errors from an endpoint write call.
  2453  //
  2454  // +stateify savable
  2455  type WriteErrors struct {
  2456  	// WriteClosed is the number of packet drops because the endpoint
  2457  	// was shutdown for write.
  2458  	WriteClosed StatCounter
  2459  
  2460  	// InvalidEndpointState is the number of times we found the endpoint state
  2461  	// to be unexpected.
  2462  	InvalidEndpointState StatCounter
  2463  
  2464  	// InvalidArgs is the number of times invalid input arguments were
  2465  	// provided for endpoint Write call.
  2466  	InvalidArgs StatCounter
  2467  }
  2468  
  2469  // TransportEndpointStats collects statistics about the endpoint.
  2470  //
  2471  // +stateify savable
  2472  type TransportEndpointStats struct {
  2473  	// PacketsReceived is the number of successful packet receives.
  2474  	PacketsReceived StatCounter
  2475  
  2476  	// PacketsSent is the number of successful packet sends.
  2477  	PacketsSent StatCounter
  2478  
  2479  	// ReceiveErrors collects packet receive errors within transport layer.
  2480  	ReceiveErrors ReceiveErrors
  2481  
  2482  	// ReadErrors collects packet read errors from an endpoint read call.
  2483  	ReadErrors ReadErrors
  2484  
  2485  	// SendErrors collects packet send errors within the transport layer.
  2486  	SendErrors SendErrors
  2487  
  2488  	// WriteErrors collects packet write errors from an endpoint write call.
  2489  	WriteErrors WriteErrors
  2490  }
  2491  
  2492  // IsEndpointStats is an empty method to implement the tcpip.EndpointStats
  2493  // marker interface.
  2494  func (*TransportEndpointStats) IsEndpointStats() {}
  2495  
  2496  // InitStatCounters initializes v's fields with nil StatCounter fields to new
  2497  // StatCounters.
  2498  func InitStatCounters(v reflect.Value) {
  2499  	for i := 0; i < v.NumField(); i++ {
  2500  		v := v.Field(i)
  2501  		if s, ok := v.Addr().Interface().(**StatCounter); ok {
  2502  			if *s == nil {
  2503  				*s = new(StatCounter)
  2504  			}
  2505  		} else if s, ok := v.Addr().Interface().(**IntegralStatCounterMap); ok {
  2506  			if *s == nil {
  2507  				*s = new(IntegralStatCounterMap)
  2508  				(*s).Init()
  2509  			}
  2510  		} else {
  2511  			InitStatCounters(v)
  2512  		}
  2513  	}
  2514  }
  2515  
  2516  // FillIn returns a copy of s with nil fields initialized to new StatCounters.
  2517  func (s Stats) FillIn() Stats {
  2518  	InitStatCounters(reflect.ValueOf(&s).Elem())
  2519  	return s
  2520  }
  2521  
  2522  // Clone clones a copy of the TransportEndpointStats into dst by atomically
  2523  // reading each field.
  2524  func (src *TransportEndpointStats) Clone(dst *TransportEndpointStats) {
  2525  	clone(reflect.ValueOf(dst).Elem(), reflect.ValueOf(src).Elem())
  2526  }
  2527  
  2528  func clone(dst reflect.Value, src reflect.Value) {
  2529  	for i := 0; i < dst.NumField(); i++ {
  2530  		d := dst.Field(i)
  2531  		s := src.Field(i)
  2532  		if c, ok := s.Addr().Interface().(*StatCounter); ok {
  2533  			d.Addr().Interface().(*StatCounter).IncrementBy(c.Value())
  2534  		} else {
  2535  			clone(d, s)
  2536  		}
  2537  	}
  2538  }
  2539  
  2540  // String implements the fmt.Stringer interface.
  2541  func (a Address) String() string {
  2542  	switch l := a.Len(); l {
  2543  	case 4:
  2544  		return fmt.Sprintf("%d.%d.%d.%d", int(a.addr[0]), int(a.addr[1]), int(a.addr[2]), int(a.addr[3]))
  2545  	case 16:
  2546  		// Find the longest subsequence of hexadecimal zeros.
  2547  		start, end := -1, -1
  2548  		for i := 0; i < a.Len(); i += 2 {
  2549  			j := i
  2550  			for j < a.Len() && a.addr[j] == 0 && a.addr[j+1] == 0 {
  2551  				j += 2
  2552  			}
  2553  			if j > i+2 && j-i > end-start {
  2554  				start, end = i, j
  2555  			}
  2556  		}
  2557  
  2558  		var b strings.Builder
  2559  		for i := 0; i < a.Len(); i += 2 {
  2560  			if i == start {
  2561  				b.WriteString("::")
  2562  				i = end
  2563  				if end >= a.Len() {
  2564  					break
  2565  				}
  2566  			} else if i > 0 {
  2567  				b.WriteByte(':')
  2568  			}
  2569  			v := uint16(a.addr[i+0])<<8 | uint16(a.addr[i+1])
  2570  			if v == 0 {
  2571  				b.WriteByte('0')
  2572  			} else {
  2573  				const digits = "0123456789abcdef"
  2574  				for i := uint(3); i < 4; i-- {
  2575  					if v := v >> (i * 4); v != 0 {
  2576  						b.WriteByte(digits[v&0xf])
  2577  					}
  2578  				}
  2579  			}
  2580  		}
  2581  		return b.String()
  2582  	default:
  2583  		return fmt.Sprintf("%x", a.addr[:l])
  2584  	}
  2585  }
  2586  
  2587  // To4 converts the IPv4 address to a 4-byte representation.
  2588  // If the address is not an IPv4 address, To4 returns the empty Address.
  2589  func (a Address) To4() Address {
  2590  	const (
  2591  		ipv4len = 4
  2592  		ipv6len = 16
  2593  	)
  2594  	if a.Len() == ipv4len {
  2595  		return a
  2596  	}
  2597  	if a.Len() == ipv6len &&
  2598  		isZeros(a.addr[:10]) &&
  2599  		a.addr[10] == 0xff &&
  2600  		a.addr[11] == 0xff {
  2601  		return AddrFrom4Slice(a.addr[12:16])
  2602  	}
  2603  	return Address{}
  2604  }
  2605  
  2606  // isZeros reports whether addr is all zeros.
  2607  func isZeros(addr []byte) bool {
  2608  	for _, b := range addr {
  2609  		if b != 0 {
  2610  			return false
  2611  		}
  2612  	}
  2613  	return true
  2614  }
  2615  
  2616  // LinkAddress is a byte slice cast as a string that represents a link address.
  2617  // It is typically a 6-byte MAC address.
  2618  type LinkAddress string
  2619  
  2620  // String implements the fmt.Stringer interface.
  2621  func (a LinkAddress) String() string {
  2622  	switch len(a) {
  2623  	case 6:
  2624  		return fmt.Sprintf("%02x:%02x:%02x:%02x:%02x:%02x", a[0], a[1], a[2], a[3], a[4], a[5])
  2625  	default:
  2626  		return fmt.Sprintf("%x", []byte(a))
  2627  	}
  2628  }
  2629  
  2630  // ParseMACAddress parses an IEEE 802 address.
  2631  //
  2632  // It must be in the format aa:bb:cc:dd:ee:ff or aa-bb-cc-dd-ee-ff.
  2633  func ParseMACAddress(s string) (LinkAddress, error) {
  2634  	parts := strings.FieldsFunc(s, func(c rune) bool {
  2635  		return c == ':' || c == '-'
  2636  	})
  2637  	if len(parts) != 6 {
  2638  		return "", fmt.Errorf("inconsistent parts: %s", s)
  2639  	}
  2640  	addr := make([]byte, 0, len(parts))
  2641  	for _, part := range parts {
  2642  		u, err := strconv.ParseUint(part, 16, 8)
  2643  		if err != nil {
  2644  			return "", fmt.Errorf("invalid hex digits: %s", s)
  2645  		}
  2646  		addr = append(addr, byte(u))
  2647  	}
  2648  	return LinkAddress(addr), nil
  2649  }
  2650  
  2651  // AddressWithPrefix is an address with its subnet prefix length.
  2652  //
  2653  // +stateify savable
  2654  type AddressWithPrefix struct {
  2655  	// Address is a network address.
  2656  	Address Address
  2657  
  2658  	// PrefixLen is the subnet prefix length.
  2659  	PrefixLen int
  2660  }
  2661  
  2662  // String implements the fmt.Stringer interface.
  2663  func (a AddressWithPrefix) String() string {
  2664  	return fmt.Sprintf("%s/%d", a.Address, a.PrefixLen)
  2665  }
  2666  
  2667  // Subnet converts the address and prefix into a Subnet value and returns it.
  2668  func (a AddressWithPrefix) Subnet() Subnet {
  2669  	addrLen := a.Address.length
  2670  	if a.PrefixLen <= 0 {
  2671  		return Subnet{
  2672  			address: Address{length: addrLen},
  2673  			mask:    AddressMask{length: addrLen},
  2674  		}
  2675  	}
  2676  	if a.PrefixLen >= addrLen*8 {
  2677  		sub := Subnet{
  2678  			address: a.Address,
  2679  			mask:    AddressMask{length: addrLen},
  2680  		}
  2681  		for i := 0; i < addrLen; i++ {
  2682  			sub.mask.mask[i] = 0xff
  2683  		}
  2684  		return sub
  2685  	}
  2686  
  2687  	sa := Address{length: addrLen}
  2688  	sm := AddressMask{length: addrLen}
  2689  	n := uint(a.PrefixLen)
  2690  	for i := 0; i < addrLen; i++ {
  2691  		if n >= 8 {
  2692  			sa.addr[i] = a.Address.addr[i]
  2693  			sm.mask[i] = 0xff
  2694  			n -= 8
  2695  			continue
  2696  		}
  2697  		sm.mask[i] = ^byte(0xff >> n)
  2698  		sa.addr[i] = a.Address.addr[i] & sm.mask[i]
  2699  		n = 0
  2700  	}
  2701  
  2702  	// For extra caution, call NewSubnet rather than directly creating the Subnet
  2703  	// value. If that fails it indicates a serious bug in this code, so panic is
  2704  	// in order.
  2705  	s, err := NewSubnet(sa, sm)
  2706  	if err != nil {
  2707  		panic("invalid subnet: " + err.Error())
  2708  	}
  2709  	return s
  2710  }
  2711  
  2712  // ProtocolAddress is an address and the network protocol it is associated
  2713  // with.
  2714  type ProtocolAddress struct {
  2715  	// Protocol is the protocol of the address.
  2716  	Protocol NetworkProtocolNumber
  2717  
  2718  	// AddressWithPrefix is a network address with its subnet prefix length.
  2719  	AddressWithPrefix AddressWithPrefix
  2720  }
  2721  
  2722  var (
  2723  	// danglingEndpointsMu protects access to danglingEndpoints.
  2724  	danglingEndpointsMu sync.Mutex
  2725  
  2726  	// danglingEndpoints tracks all dangling endpoints no longer owned by the app.
  2727  	danglingEndpoints = make(map[Endpoint]struct{})
  2728  )
  2729  
  2730  // GetDanglingEndpoints returns all dangling endpoints.
  2731  func GetDanglingEndpoints() []Endpoint {
  2732  	danglingEndpointsMu.Lock()
  2733  	es := make([]Endpoint, 0, len(danglingEndpoints))
  2734  	for e := range danglingEndpoints {
  2735  		es = append(es, e)
  2736  	}
  2737  	danglingEndpointsMu.Unlock()
  2738  	return es
  2739  }
  2740  
  2741  // ReleaseDanglingEndpoints clears out all all reference counted objects held by
  2742  // dangling endpoints.
  2743  func ReleaseDanglingEndpoints() {
  2744  	// Get the dangling endpoints first to avoid locking around Release(), which
  2745  	// can cause a lock inversion with endpoint.mu and danglingEndpointsMu.
  2746  	// Calling Release on a dangling endpoint that has been deleted is a noop.
  2747  	eps := GetDanglingEndpoints()
  2748  	for _, ep := range eps {
  2749  		ep.Abort()
  2750  	}
  2751  }
  2752  
  2753  // AddDanglingEndpoint adds a dangling endpoint.
  2754  func AddDanglingEndpoint(e Endpoint) {
  2755  	danglingEndpointsMu.Lock()
  2756  	danglingEndpoints[e] = struct{}{}
  2757  	danglingEndpointsMu.Unlock()
  2758  }
  2759  
  2760  // DeleteDanglingEndpoint removes a dangling endpoint.
  2761  func DeleteDanglingEndpoint(e Endpoint) {
  2762  	danglingEndpointsMu.Lock()
  2763  	delete(danglingEndpoints, e)
  2764  	danglingEndpointsMu.Unlock()
  2765  }
  2766  
  2767  // AsyncLoading is the global barrier for asynchronous endpoint loading
  2768  // activities.
  2769  var AsyncLoading sync.WaitGroup