github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/stack/registration.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package stack
    16  
    17  import (
    18  	"fmt"
    19  	"time"
    20  
    21  	"github.com/SagerNet/gvisor/pkg/tcpip"
    22  	"github.com/SagerNet/gvisor/pkg/tcpip/buffer"
    23  	"github.com/SagerNet/gvisor/pkg/tcpip/header"
    24  	"github.com/SagerNet/gvisor/pkg/waiter"
    25  )
    26  
    27  // NetworkEndpointID is the identifier of a network layer protocol endpoint.
    28  // Currently the local address is sufficient because all supported protocols
    29  // (i.e., IPv4 and IPv6) have different sizes for their addresses.
    30  type NetworkEndpointID struct {
    31  	LocalAddress tcpip.Address
    32  }
    33  
    34  // TransportEndpointID is the identifier of a transport layer protocol endpoint.
    35  //
    36  // +stateify savable
    37  type TransportEndpointID struct {
    38  	// LocalPort is the local port associated with the endpoint.
    39  	LocalPort uint16
    40  
    41  	// LocalAddress is the local [network layer] address associated with
    42  	// the endpoint.
    43  	LocalAddress tcpip.Address
    44  
    45  	// RemotePort is the remote port associated with the endpoint.
    46  	RemotePort uint16
    47  
    48  	// RemoteAddress it the remote [network layer] address associated with
    49  	// the endpoint.
    50  	RemoteAddress tcpip.Address
    51  }
    52  
    53  // NetworkPacketInfo holds information about a network layer packet.
    54  type NetworkPacketInfo struct {
    55  	// LocalAddressBroadcast is true if the packet's local address is a broadcast
    56  	// address.
    57  	LocalAddressBroadcast bool
    58  
    59  	// IsForwardedPacket is true if the packet is being forwarded.
    60  	IsForwardedPacket bool
    61  }
    62  
    63  // TransportErrorKind enumerates error types that are handled by the transport
    64  // layer.
    65  type TransportErrorKind int
    66  
    67  const (
    68  	// PacketTooBigTransportError indicates that a packet did not reach its
    69  	// destination because a link on the path to the destination had an MTU that
    70  	// was too small to carry the packet.
    71  	PacketTooBigTransportError TransportErrorKind = iota
    72  
    73  	// DestinationHostUnreachableTransportError indicates that the destination
    74  	// host was unreachable.
    75  	DestinationHostUnreachableTransportError
    76  
    77  	// DestinationPortUnreachableTransportError indicates that a packet reached
    78  	// the destination host, but the transport protocol was not active on the
    79  	// destination port.
    80  	DestinationPortUnreachableTransportError
    81  
    82  	// DestinationNetworkUnreachableTransportError indicates that the destination
    83  	// network was unreachable.
    84  	DestinationNetworkUnreachableTransportError
    85  )
    86  
    87  // TransportError is a marker interface for errors that may be handled by the
    88  // transport layer.
    89  type TransportError interface {
    90  	tcpip.SockErrorCause
    91  
    92  	// Kind returns the type of the transport error.
    93  	Kind() TransportErrorKind
    94  }
    95  
    96  // TransportEndpoint is the interface that needs to be implemented by transport
    97  // protocol (e.g., tcp, udp) endpoints that can handle packets.
    98  type TransportEndpoint interface {
    99  	// UniqueID returns an unique ID for this transport endpoint.
   100  	UniqueID() uint64
   101  
   102  	// HandlePacket is called by the stack when new packets arrive to this
   103  	// transport endpoint. It sets the packet buffer's transport header.
   104  	//
   105  	// HandlePacket takes ownership of the packet.
   106  	HandlePacket(TransportEndpointID, *PacketBuffer)
   107  
   108  	// HandleError is called when the transport endpoint receives an error.
   109  	//
   110  	// HandleError takes ownership of the packet buffer.
   111  	HandleError(TransportError, *PacketBuffer)
   112  
   113  	// Abort initiates an expedited endpoint teardown. It puts the endpoint
   114  	// in a closed state and frees all resources associated with it. This
   115  	// cleanup may happen asynchronously. Wait can be used to block on this
   116  	// asynchronous cleanup.
   117  	Abort()
   118  
   119  	// Wait waits for any worker goroutines owned by the endpoint to stop.
   120  	//
   121  	// An endpoint can be requested to stop its worker goroutines by calling
   122  	// its Close method.
   123  	//
   124  	// Wait will not block if the endpoint hasn't started any goroutines
   125  	// yet, even if it might later.
   126  	Wait()
   127  }
   128  
   129  // RawTransportEndpoint is the interface that needs to be implemented by raw
   130  // transport protocol endpoints. RawTransportEndpoints receive the entire
   131  // packet - including the network and transport headers - as delivered to
   132  // netstack.
   133  type RawTransportEndpoint interface {
   134  	// HandlePacket is called by the stack when new packets arrive to
   135  	// this transport endpoint. The packet contains all data from the link
   136  	// layer up.
   137  	//
   138  	// HandlePacket takes ownership of the packet.
   139  	HandlePacket(*PacketBuffer)
   140  }
   141  
   142  // PacketEndpoint is the interface that needs to be implemented by packet
   143  // transport protocol endpoints. These endpoints receive link layer headers in
   144  // addition to whatever they contain (usually network and transport layer
   145  // headers and a payload).
   146  type PacketEndpoint interface {
   147  	// HandlePacket is called by the stack when new packets arrive that
   148  	// match the endpoint.
   149  	//
   150  	// Implementers should treat packet as immutable and should copy it
   151  	// before before modification.
   152  	//
   153  	// linkHeader may have a length of 0, in which case the PacketEndpoint
   154  	// should construct its own ethernet header for applications.
   155  	//
   156  	// HandlePacket takes ownership of pkt.
   157  	HandlePacket(nicID tcpip.NICID, addr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
   158  }
   159  
   160  // UnknownDestinationPacketDisposition enumerates the possible return values from
   161  // HandleUnknownDestinationPacket().
   162  type UnknownDestinationPacketDisposition int
   163  
   164  const (
   165  	// UnknownDestinationPacketMalformed denotes that the packet was malformed
   166  	// and no further processing should be attempted other than updating
   167  	// statistics.
   168  	UnknownDestinationPacketMalformed UnknownDestinationPacketDisposition = iota
   169  
   170  	// UnknownDestinationPacketUnhandled tells the caller that the packet was
   171  	// well formed but that the issue was not handled and the stack should take
   172  	// the default action.
   173  	UnknownDestinationPacketUnhandled
   174  
   175  	// UnknownDestinationPacketHandled tells the caller that it should do
   176  	// no further processing.
   177  	UnknownDestinationPacketHandled
   178  )
   179  
   180  // TransportProtocol is the interface that needs to be implemented by transport
   181  // protocols (e.g., tcp, udp) that want to be part of the networking stack.
   182  type TransportProtocol interface {
   183  	// Number returns the transport protocol number.
   184  	Number() tcpip.TransportProtocolNumber
   185  
   186  	// NewEndpoint creates a new endpoint of the transport protocol.
   187  	NewEndpoint(netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error)
   188  
   189  	// NewRawEndpoint creates a new raw endpoint of the transport protocol.
   190  	NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error)
   191  
   192  	// MinimumPacketSize returns the minimum valid packet size of this
   193  	// transport protocol. The stack automatically drops any packets smaller
   194  	// than this targeted at this protocol.
   195  	MinimumPacketSize() int
   196  
   197  	// ParsePorts returns the source and destination ports stored in a
   198  	// packet of this protocol.
   199  	ParsePorts(v buffer.View) (src, dst uint16, err tcpip.Error)
   200  
   201  	// HandleUnknownDestinationPacket handles packets targeted at this
   202  	// protocol that don't match any existing endpoint. For example,
   203  	// it is targeted at a port that has no listeners.
   204  	//
   205  	// HandleUnknownDestinationPacket takes ownership of the packet if it handles
   206  	// the issue.
   207  	HandleUnknownDestinationPacket(TransportEndpointID, *PacketBuffer) UnknownDestinationPacketDisposition
   208  
   209  	// SetOption allows enabling/disabling protocol specific features.
   210  	// SetOption returns an error if the option is not supported or the
   211  	// provided option value is invalid.
   212  	SetOption(option tcpip.SettableTransportProtocolOption) tcpip.Error
   213  
   214  	// Option allows retrieving protocol specific option values.
   215  	// Option returns an error if the option is not supported or the
   216  	// provided option value is invalid.
   217  	Option(option tcpip.GettableTransportProtocolOption) tcpip.Error
   218  
   219  	// Close requests that any worker goroutines owned by the protocol
   220  	// stop.
   221  	Close()
   222  
   223  	// Wait waits for any worker goroutines owned by the protocol to stop.
   224  	Wait()
   225  
   226  	// Parse sets pkt.TransportHeader and trims pkt.Data appropriately. It does
   227  	// neither and returns false if pkt.Data is too small, i.e. pkt.Data.Size() <
   228  	// MinimumPacketSize()
   229  	Parse(pkt *PacketBuffer) (ok bool)
   230  }
   231  
   232  // TransportPacketDisposition is the result from attempting to deliver a packet
   233  // to the transport layer.
   234  type TransportPacketDisposition int
   235  
   236  const (
   237  	// TransportPacketHandled indicates that a transport packet was handled by the
   238  	// transport layer and callers need not take any further action.
   239  	TransportPacketHandled TransportPacketDisposition = iota
   240  
   241  	// TransportPacketProtocolUnreachable indicates that the transport
   242  	// protocol requested in the packet is not supported.
   243  	TransportPacketProtocolUnreachable
   244  
   245  	// TransportPacketDestinationPortUnreachable indicates that there weren't any
   246  	// listeners interested in the packet and the transport protocol has no means
   247  	// to notify the sender.
   248  	TransportPacketDestinationPortUnreachable
   249  )
   250  
   251  // TransportDispatcher contains the methods used by the network stack to deliver
   252  // packets to the appropriate transport endpoint after it has been handled by
   253  // the network layer.
   254  type TransportDispatcher interface {
   255  	// DeliverTransportPacket delivers packets to the appropriate
   256  	// transport protocol endpoint.
   257  	//
   258  	// pkt.NetworkHeader must be set before calling DeliverTransportPacket.
   259  	//
   260  	// DeliverTransportPacket takes ownership of the packet.
   261  	DeliverTransportPacket(tcpip.TransportProtocolNumber, *PacketBuffer) TransportPacketDisposition
   262  
   263  	// DeliverTransportError delivers an error to the appropriate transport
   264  	// endpoint.
   265  	//
   266  	// DeliverTransportError takes ownership of the packet buffer.
   267  	DeliverTransportError(local, remote tcpip.Address, _ tcpip.NetworkProtocolNumber, _ tcpip.TransportProtocolNumber, _ TransportError, _ *PacketBuffer)
   268  
   269  	// DeliverRawPacket delivers a packet to any subscribed raw sockets.
   270  	//
   271  	// DeliverRawPacket does NOT take ownership of the packet buffer.
   272  	DeliverRawPacket(tcpip.TransportProtocolNumber, *PacketBuffer)
   273  }
   274  
   275  // PacketLooping specifies where an outbound packet should be sent.
   276  type PacketLooping byte
   277  
   278  const (
   279  	// PacketOut indicates that the packet should be passed to the link
   280  	// endpoint.
   281  	PacketOut PacketLooping = 1 << iota
   282  
   283  	// PacketLoop indicates that the packet should be handled locally.
   284  	PacketLoop
   285  )
   286  
   287  // NetworkHeaderParams are the header parameters given as input by the
   288  // transport endpoint to the network.
   289  type NetworkHeaderParams struct {
   290  	// Protocol refers to the transport protocol number.
   291  	Protocol tcpip.TransportProtocolNumber
   292  
   293  	// TTL refers to Time To Live field of the IP-header.
   294  	TTL uint8
   295  
   296  	// TOS refers to TypeOfService or TrafficClass field of the IP-header.
   297  	TOS uint8
   298  }
   299  
   300  // GroupAddressableEndpoint is an endpoint that supports group addressing.
   301  //
   302  // An endpoint is considered to support group addressing when one or more
   303  // endpoints may associate themselves with the same identifier (group address).
   304  type GroupAddressableEndpoint interface {
   305  	// JoinGroup joins the specified group.
   306  	JoinGroup(group tcpip.Address) tcpip.Error
   307  
   308  	// LeaveGroup attempts to leave the specified group.
   309  	LeaveGroup(group tcpip.Address) tcpip.Error
   310  
   311  	// IsInGroup returns true if the endpoint is a member of the specified group.
   312  	IsInGroup(group tcpip.Address) bool
   313  }
   314  
   315  // PrimaryEndpointBehavior is an enumeration of an AddressEndpoint's primary
   316  // behavior.
   317  type PrimaryEndpointBehavior int
   318  
   319  const (
   320  	// CanBePrimaryEndpoint indicates the endpoint can be used as a primary
   321  	// endpoint for new connections with no local address. This is the
   322  	// default when calling NIC.AddAddress.
   323  	CanBePrimaryEndpoint PrimaryEndpointBehavior = iota
   324  
   325  	// FirstPrimaryEndpoint indicates the endpoint should be the first
   326  	// primary endpoint considered. If there are multiple endpoints with
   327  	// this behavior, they are ordered by recency.
   328  	FirstPrimaryEndpoint
   329  
   330  	// NeverPrimaryEndpoint indicates the endpoint should never be a
   331  	// primary endpoint.
   332  	NeverPrimaryEndpoint
   333  )
   334  
   335  // AddressConfigType is the method used to add an address.
   336  type AddressConfigType int
   337  
   338  const (
   339  	// AddressConfigStatic is a statically configured address endpoint that was
   340  	// added by some user-specified action (adding an explicit address, joining a
   341  	// multicast group).
   342  	AddressConfigStatic AddressConfigType = iota
   343  
   344  	// AddressConfigSlaac is an address endpoint added by SLAAC, as per RFC 4862
   345  	// section 5.5.3.
   346  	AddressConfigSlaac
   347  
   348  	// AddressConfigSlaacTemp is a temporary address endpoint added by SLAAC as
   349  	// per RFC 4941. Temporary SLAAC addresses are short-lived and are not
   350  	// to be valid (or preferred) forever; hence the term temporary.
   351  	AddressConfigSlaacTemp
   352  )
   353  
   354  // AssignableAddressEndpoint is a reference counted address endpoint that may be
   355  // assigned to a NetworkEndpoint.
   356  type AssignableAddressEndpoint interface {
   357  	// AddressWithPrefix returns the endpoint's address.
   358  	AddressWithPrefix() tcpip.AddressWithPrefix
   359  
   360  	// Subnet returns the subnet of the endpoint's address.
   361  	Subnet() tcpip.Subnet
   362  
   363  	// IsAssigned returns whether or not the endpoint is considered bound
   364  	// to its NetworkEndpoint.
   365  	IsAssigned(allowExpired bool) bool
   366  
   367  	// IncRef increments this endpoint's reference count.
   368  	//
   369  	// Returns true if it was successfully incremented. If it returns false, then
   370  	// the endpoint is considered expired and should no longer be used.
   371  	IncRef() bool
   372  
   373  	// DecRef decrements this endpoint's reference count.
   374  	DecRef()
   375  }
   376  
   377  // AddressEndpoint is an endpoint representing an address assigned to an
   378  // AddressableEndpoint.
   379  type AddressEndpoint interface {
   380  	AssignableAddressEndpoint
   381  
   382  	// GetKind returns the address kind for this endpoint.
   383  	GetKind() AddressKind
   384  
   385  	// SetKind sets the address kind for this endpoint.
   386  	SetKind(AddressKind)
   387  
   388  	// ConfigType returns the method used to add the address.
   389  	ConfigType() AddressConfigType
   390  
   391  	// Deprecated returns whether or not this endpoint is deprecated.
   392  	Deprecated() bool
   393  
   394  	// SetDeprecated sets this endpoint's deprecated status.
   395  	SetDeprecated(bool)
   396  }
   397  
   398  // AddressKind is the kind of an address.
   399  //
   400  // See the values of AddressKind for more details.
   401  type AddressKind int
   402  
   403  const (
   404  	// PermanentTentative is a permanent address endpoint that is not yet
   405  	// considered to be fully bound to an interface in the traditional
   406  	// sense. That is, the address is associated with a NIC, but packets
   407  	// destined to the address MUST NOT be accepted and MUST be silently
   408  	// dropped, and the address MUST NOT be used as a source address for
   409  	// outgoing packets. For IPv6, addresses are of this kind until NDP's
   410  	// Duplicate Address Detection (DAD) resolves. If DAD fails, the address
   411  	// is removed.
   412  	PermanentTentative AddressKind = iota
   413  
   414  	// Permanent is a permanent endpoint (vs. a temporary one) assigned to the
   415  	// NIC. Its reference count is biased by 1 to avoid removal when no route
   416  	// holds a reference to it. It is removed by explicitly removing the address
   417  	// from the NIC.
   418  	Permanent
   419  
   420  	// PermanentExpired is a permanent endpoint that had its address removed from
   421  	// the NIC, and it is waiting to be removed once no references to it are held.
   422  	//
   423  	// If the address is re-added before the endpoint is removed, its type
   424  	// changes back to Permanent.
   425  	PermanentExpired
   426  
   427  	// Temporary is an endpoint, created on a one-off basis to temporarily
   428  	// consider the NIC bound an an address that it is not explicitly bound to
   429  	// (such as a permanent address). Its reference count must not be biased by 1
   430  	// so that the address is removed immediately when references to it are no
   431  	// longer held.
   432  	//
   433  	// A temporary endpoint may be promoted to permanent if the address is added
   434  	// permanently.
   435  	Temporary
   436  )
   437  
   438  // IsPermanent returns true if the AddressKind represents a permanent address.
   439  func (k AddressKind) IsPermanent() bool {
   440  	switch k {
   441  	case Permanent, PermanentTentative:
   442  		return true
   443  	case Temporary, PermanentExpired:
   444  		return false
   445  	default:
   446  		panic(fmt.Sprintf("unrecognized address kind = %d", k))
   447  	}
   448  }
   449  
   450  // AddressableEndpoint is an endpoint that supports addressing.
   451  //
   452  // An endpoint is considered to support addressing when the endpoint may
   453  // associate itself with an identifier (address).
   454  type AddressableEndpoint interface {
   455  	// AddAndAcquirePermanentAddress adds the passed permanent address.
   456  	//
   457  	// Returns *tcpip.ErrDuplicateAddress if the address exists.
   458  	//
   459  	// Acquires and returns the AddressEndpoint for the added address.
   460  	AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb PrimaryEndpointBehavior, configType AddressConfigType, deprecated bool) (AddressEndpoint, tcpip.Error)
   461  
   462  	// RemovePermanentAddress removes the passed address if it is a permanent
   463  	// address.
   464  	//
   465  	// Returns *tcpip.ErrBadLocalAddress if the endpoint does not have the passed
   466  	// permanent address.
   467  	RemovePermanentAddress(addr tcpip.Address) tcpip.Error
   468  
   469  	// MainAddress returns the endpoint's primary permanent address.
   470  	MainAddress() tcpip.AddressWithPrefix
   471  
   472  	// AcquireAssignedAddress returns an address endpoint for the passed address
   473  	// that is considered bound to the endpoint, optionally creating a temporary
   474  	// endpoint if requested and no existing address exists.
   475  	//
   476  	// The returned endpoint's reference count is incremented.
   477  	//
   478  	// Returns nil if the specified address is not local to this endpoint.
   479  	AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB PrimaryEndpointBehavior) AddressEndpoint
   480  
   481  	// AcquireOutgoingPrimaryAddress returns a primary address that may be used as
   482  	// a source address when sending packets to the passed remote address.
   483  	//
   484  	// If allowExpired is true, expired addresses may be returned.
   485  	//
   486  	// The returned endpoint's reference count is incremented.
   487  	//
   488  	// Returns nil if a primary address is not available.
   489  	AcquireOutgoingPrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) AddressEndpoint
   490  
   491  	// PrimaryAddresses returns the primary addresses.
   492  	PrimaryAddresses() []tcpip.AddressWithPrefix
   493  
   494  	// PermanentAddresses returns all the permanent addresses.
   495  	PermanentAddresses() []tcpip.AddressWithPrefix
   496  }
   497  
   498  // NDPEndpoint is a network endpoint that supports NDP.
   499  type NDPEndpoint interface {
   500  	NetworkEndpoint
   501  
   502  	// InvalidateDefaultRouter invalidates a default router discovered through
   503  	// NDP.
   504  	InvalidateDefaultRouter(tcpip.Address)
   505  }
   506  
   507  // NetworkInterface is a network interface.
   508  type NetworkInterface interface {
   509  	NetworkLinkEndpoint
   510  
   511  	// ID returns the interface's ID.
   512  	ID() tcpip.NICID
   513  
   514  	// IsLoopback returns true if the interface is a loopback interface.
   515  	IsLoopback() bool
   516  
   517  	// Name returns the name of the interface.
   518  	//
   519  	// May return an empty string if the interface is not configured with a name.
   520  	Name() string
   521  
   522  	// Enabled returns true if the interface is enabled.
   523  	Enabled() bool
   524  
   525  	// Promiscuous returns true if the interface is in promiscuous mode.
   526  	//
   527  	// When in promiscuous mode, the interface should accept all packets.
   528  	Promiscuous() bool
   529  
   530  	// Spoofing returns true if the interface is in spoofing mode.
   531  	//
   532  	// When in spoofing mode, the interface should consider all addresses as
   533  	// assigned to it.
   534  	Spoofing() bool
   535  
   536  	// PrimaryAddress returns the primary address associated with the interface.
   537  	//
   538  	// PrimaryAddress will return the first non-deprecated address if such an
   539  	// address exists. If no non-deprecated addresses exist, the first deprecated
   540  	// address will be returned. If no deprecated addresses exist, the zero value
   541  	// will be returned.
   542  	PrimaryAddress(tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, tcpip.Error)
   543  
   544  	// CheckLocalAddress returns true if the address exists on the interface.
   545  	CheckLocalAddress(tcpip.NetworkProtocolNumber, tcpip.Address) bool
   546  
   547  	// WritePacketToRemote writes the packet to the given remote link address.
   548  	WritePacketToRemote(tcpip.LinkAddress, tcpip.NetworkProtocolNumber, *PacketBuffer) tcpip.Error
   549  
   550  	// WritePacket writes a packet with the given protocol through the given
   551  	// route.
   552  	//
   553  	// WritePacket takes ownership of the packet buffer. The packet buffer's
   554  	// network and transport header must be set.
   555  	WritePacket(*Route, tcpip.NetworkProtocolNumber, *PacketBuffer) tcpip.Error
   556  
   557  	// WritePackets writes packets with the given protocol through the given
   558  	// route. Must not be called with an empty list of packet buffers.
   559  	//
   560  	// WritePackets takes ownership of the packet buffers.
   561  	//
   562  	// Right now, WritePackets is used only when the software segmentation
   563  	// offload is enabled. If it will be used for something else, syscall filters
   564  	// may need to be updated.
   565  	WritePackets(*Route, PacketBufferList, tcpip.NetworkProtocolNumber) (int, tcpip.Error)
   566  
   567  	// HandleNeighborProbe processes an incoming neighbor probe (e.g. ARP
   568  	// request or NDP Neighbor Solicitation).
   569  	//
   570  	// HandleNeighborProbe assumes that the probe is valid for the network
   571  	// interface the probe was received on.
   572  	HandleNeighborProbe(tcpip.NetworkProtocolNumber, tcpip.Address, tcpip.LinkAddress) tcpip.Error
   573  
   574  	// HandleNeighborConfirmation processes an incoming neighbor confirmation
   575  	// (e.g. ARP reply or NDP Neighbor Advertisement).
   576  	HandleNeighborConfirmation(tcpip.NetworkProtocolNumber, tcpip.Address, tcpip.LinkAddress, ReachabilityConfirmationFlags) tcpip.Error
   577  }
   578  
   579  // LinkResolvableNetworkEndpoint handles link resolution events.
   580  type LinkResolvableNetworkEndpoint interface {
   581  	// HandleLinkResolutionFailure is called when link resolution prevents the
   582  	// argument from having been sent.
   583  	HandleLinkResolutionFailure(*PacketBuffer)
   584  }
   585  
   586  // NetworkEndpoint is the interface that needs to be implemented by endpoints
   587  // of network layer protocols (e.g., ipv4, ipv6).
   588  type NetworkEndpoint interface {
   589  	// Enable enables the endpoint.
   590  	//
   591  	// Must only be called when the stack is in a state that allows the endpoint
   592  	// to send and receive packets.
   593  	//
   594  	// Returns *tcpip.ErrNotPermitted if the endpoint cannot be enabled.
   595  	Enable() tcpip.Error
   596  
   597  	// Enabled returns true if the endpoint is enabled.
   598  	Enabled() bool
   599  
   600  	// Disable disables the endpoint.
   601  	Disable()
   602  
   603  	// DefaultTTL is the default time-to-live value (or hop limit, in ipv6)
   604  	// for this endpoint.
   605  	DefaultTTL() uint8
   606  
   607  	// MTU is the maximum transmission unit for this endpoint. This is
   608  	// generally calculated as the MTU of the underlying data link endpoint
   609  	// minus the network endpoint max header length.
   610  	MTU() uint32
   611  
   612  	// MaxHeaderLength returns the maximum size the network (and lower
   613  	// level layers combined) headers can have. Higher levels use this
   614  	// information to reserve space in the front of the packets they're
   615  	// building.
   616  	MaxHeaderLength() uint16
   617  
   618  	// WritePacket writes a packet to the given destination address and
   619  	// protocol. It takes ownership of pkt. pkt.TransportHeader must have
   620  	// already been set.
   621  	WritePacket(r *Route, params NetworkHeaderParams, pkt *PacketBuffer) tcpip.Error
   622  
   623  	// WritePackets writes packets to the given destination address and
   624  	// protocol. pkts must not be zero length. It takes ownership of pkts and
   625  	// underlying packets.
   626  	WritePackets(r *Route, pkts PacketBufferList, params NetworkHeaderParams) (int, tcpip.Error)
   627  
   628  	// WriteHeaderIncludedPacket writes a packet that includes a network
   629  	// header to the given destination address. It takes ownership of pkt.
   630  	WriteHeaderIncludedPacket(r *Route, pkt *PacketBuffer) tcpip.Error
   631  
   632  	// HandlePacket is called by the link layer when new packets arrive to
   633  	// this network endpoint. It sets pkt.NetworkHeader.
   634  	//
   635  	// HandlePacket takes ownership of pkt.
   636  	HandlePacket(pkt *PacketBuffer)
   637  
   638  	// Close is called when the endpoint is removed from a stack.
   639  	Close()
   640  
   641  	// NetworkProtocolNumber returns the tcpip.NetworkProtocolNumber for
   642  	// this endpoint.
   643  	NetworkProtocolNumber() tcpip.NetworkProtocolNumber
   644  
   645  	// Stats returns a reference to the network endpoint stats.
   646  	Stats() NetworkEndpointStats
   647  }
   648  
   649  // NetworkEndpointStats is the interface implemented by each network endpoint
   650  // stats struct.
   651  type NetworkEndpointStats interface {
   652  	// IsNetworkEndpointStats is an empty method to implement the
   653  	// NetworkEndpointStats marker interface.
   654  	IsNetworkEndpointStats()
   655  }
   656  
   657  // IPNetworkEndpointStats is a NetworkEndpointStats that tracks IP-related
   658  // statistics.
   659  type IPNetworkEndpointStats interface {
   660  	NetworkEndpointStats
   661  
   662  	// IPStats returns the IP statistics of a network endpoint.
   663  	IPStats() *tcpip.IPStats
   664  }
   665  
   666  // ForwardingNetworkEndpoint is a network endpoint that may forward packets.
   667  type ForwardingNetworkEndpoint interface {
   668  	NetworkEndpoint
   669  
   670  	// Forwarding returns the forwarding configuration.
   671  	Forwarding() bool
   672  
   673  	// SetForwarding sets the forwarding configuration.
   674  	SetForwarding(bool)
   675  }
   676  
   677  // NetworkProtocol is the interface that needs to be implemented by network
   678  // protocols (e.g., ipv4, ipv6) that want to be part of the networking stack.
   679  type NetworkProtocol interface {
   680  	// Number returns the network protocol number.
   681  	Number() tcpip.NetworkProtocolNumber
   682  
   683  	// MinimumPacketSize returns the minimum valid packet size of this
   684  	// network protocol. The stack automatically drops any packets smaller
   685  	// than this targeted at this protocol.
   686  	MinimumPacketSize() int
   687  
   688  	// DefaultPrefixLen returns the protocol's default prefix length.
   689  	DefaultPrefixLen() int
   690  
   691  	// ParseAddresses returns the source and destination addresses stored in a
   692  	// packet of this protocol.
   693  	ParseAddresses(v buffer.View) (src, dst tcpip.Address)
   694  
   695  	// NewEndpoint creates a new endpoint of this protocol.
   696  	NewEndpoint(nic NetworkInterface, dispatcher TransportDispatcher) NetworkEndpoint
   697  
   698  	// SetOption allows enabling/disabling protocol specific features.
   699  	// SetOption returns an error if the option is not supported or the
   700  	// provided option value is invalid.
   701  	SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error
   702  
   703  	// Option allows retrieving protocol specific option values.
   704  	// Option returns an error if the option is not supported or the
   705  	// provided option value is invalid.
   706  	Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error
   707  
   708  	// Close requests that any worker goroutines owned by the protocol
   709  	// stop.
   710  	Close()
   711  
   712  	// Wait waits for any worker goroutines owned by the protocol to stop.
   713  	Wait()
   714  
   715  	// Parse sets pkt.NetworkHeader and trims pkt.Data appropriately. It
   716  	// returns:
   717  	// - The encapsulated protocol, if present.
   718  	// - Whether there is an encapsulated transport protocol payload (e.g. ARP
   719  	//   does not encapsulate anything).
   720  	// - Whether pkt.Data was large enough to parse and set pkt.NetworkHeader.
   721  	Parse(pkt *PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool)
   722  }
   723  
   724  // NetworkDispatcher contains the methods used by the network stack to deliver
   725  // inbound/outbound packets to the appropriate network/packet(if any) endpoints.
   726  type NetworkDispatcher interface {
   727  	// DeliverNetworkPacket finds the appropriate network protocol endpoint
   728  	// and hands the packet over for further processing.
   729  	//
   730  	// pkt.LinkHeader may or may not be set before calling
   731  	// DeliverNetworkPacket. Some packets do not have link headers (e.g.
   732  	// packets sent via loopback), and won't have the field set.
   733  	//
   734  	// DeliverNetworkPacket takes ownership of pkt.
   735  	DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
   736  
   737  	// DeliverOutboundPacket is called by link layer when a packet is being
   738  	// sent out.
   739  	//
   740  	// pkt.LinkHeader may or may not be set before calling
   741  	// DeliverOutboundPacket. Some packets do not have link headers (e.g.
   742  	// packets sent via loopback), and won't have the field set.
   743  	//
   744  	// DeliverOutboundPacket takes ownership of pkt.
   745  	DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
   746  }
   747  
   748  // LinkEndpointCapabilities is the type associated with the capabilities
   749  // supported by a link-layer endpoint. It is a set of bitfields.
   750  type LinkEndpointCapabilities uint
   751  
   752  // The following are the supported link endpoint capabilities.
   753  const (
   754  	CapabilityNone LinkEndpointCapabilities = 0
   755  	// CapabilityTXChecksumOffload indicates that the link endpoint supports
   756  	// checksum computation for outgoing packets and the stack can skip
   757  	// computing checksums when sending packets.
   758  	CapabilityTXChecksumOffload LinkEndpointCapabilities = 1 << iota
   759  	// CapabilityRXChecksumOffload indicates that the link endpoint supports
   760  	// checksum verification on received packets and that it's safe for the
   761  	// stack to skip checksum verification.
   762  	CapabilityRXChecksumOffload
   763  	CapabilityResolutionRequired
   764  	CapabilitySaveRestore
   765  	CapabilityDisconnectOk
   766  	CapabilityLoopback
   767  )
   768  
   769  // NetworkLinkEndpoint is a data-link layer that supports sending network
   770  // layer packets.
   771  type NetworkLinkEndpoint interface {
   772  	// MTU is the maximum transmission unit for this endpoint. This is
   773  	// usually dictated by the backing physical network; when such a
   774  	// physical network doesn't exist, the limit is generally 64k, which
   775  	// includes the maximum size of an IP packet.
   776  	MTU() uint32
   777  
   778  	// MaxHeaderLength returns the maximum size the data link (and
   779  	// lower level layers combined) headers can have. Higher levels use this
   780  	// information to reserve space in the front of the packets they're
   781  	// building.
   782  	MaxHeaderLength() uint16
   783  
   784  	// LinkAddress returns the link address (typically a MAC) of the
   785  	// endpoint.
   786  	LinkAddress() tcpip.LinkAddress
   787  }
   788  
   789  // LinkEndpoint is the interface implemented by data link layer protocols (e.g.,
   790  // ethernet, loopback, raw) and used by network layer protocols to send packets
   791  // out through the implementer's data link endpoint. When a link header exists,
   792  // it sets each PacketBuffer's LinkHeader field before passing it up the
   793  // stack.
   794  type LinkEndpoint interface {
   795  	NetworkLinkEndpoint
   796  
   797  	// Capabilities returns the set of capabilities supported by the
   798  	// endpoint.
   799  	Capabilities() LinkEndpointCapabilities
   800  
   801  	// Attach attaches the data link layer endpoint to the network-layer
   802  	// dispatcher of the stack.
   803  	//
   804  	// Attach is called with a nil dispatcher when the endpoint's NIC is being
   805  	// removed.
   806  	Attach(dispatcher NetworkDispatcher)
   807  
   808  	// IsAttached returns whether a NetworkDispatcher is attached to the
   809  	// endpoint.
   810  	IsAttached() bool
   811  
   812  	// Wait waits for any worker goroutines owned by the endpoint to stop.
   813  	//
   814  	// For now, requesting that an endpoint's worker goroutine(s) stop is
   815  	// implementation specific.
   816  	//
   817  	// Wait will not block if the endpoint hasn't started any goroutines
   818  	// yet, even if it might later.
   819  	Wait()
   820  
   821  	// ARPHardwareType returns the ARPHRD_TYPE of the link endpoint.
   822  	//
   823  	// See:
   824  	// https://github.com/torvalds/linux/blob/aa0c9086b40c17a7ad94425b3b70dd1fdd7497bf/include/uapi/linux/if_arp.h#L30
   825  	ARPHardwareType() header.ARPHardwareType
   826  
   827  	// AddHeader adds a link layer header to pkt if required.
   828  	AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
   829  
   830  	// WritePacket writes a packet with the given protocol and route.
   831  	//
   832  	// WritePacket takes ownership of the packet buffer. The packet buffer's
   833  	// network and transport header must be set.
   834  	//
   835  	// To participate in transparent bridging, a LinkEndpoint implementation
   836  	// should call eth.Encode with header.EthernetFields.SrcAddr set to
   837  	// r.LocalLinkAddress if it is provided.
   838  	WritePacket(RouteInfo, tcpip.NetworkProtocolNumber, *PacketBuffer) tcpip.Error
   839  
   840  	// WritePackets writes packets with the given protocol and route. Must not be
   841  	// called with an empty list of packet buffers.
   842  	//
   843  	// WritePackets takes ownership of the packet buffers.
   844  	//
   845  	// Right now, WritePackets is used only when the software segmentation
   846  	// offload is enabled. If it will be used for something else, syscall filters
   847  	// may need to be updated.
   848  	WritePackets(RouteInfo, PacketBufferList, tcpip.NetworkProtocolNumber) (int, tcpip.Error)
   849  }
   850  
   851  // InjectableLinkEndpoint is a LinkEndpoint where inbound packets are
   852  // delivered via the Inject method.
   853  type InjectableLinkEndpoint interface {
   854  	LinkEndpoint
   855  
   856  	// InjectInbound injects an inbound packet.
   857  	InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
   858  
   859  	// InjectOutbound writes a fully formed outbound packet directly to the
   860  	// link.
   861  	//
   862  	// dest is used by endpoints with multiple raw destinations.
   863  	InjectOutbound(dest tcpip.Address, packet []byte) tcpip.Error
   864  }
   865  
   866  // DADResult is a marker interface for the result of a duplicate address
   867  // detection process.
   868  type DADResult interface {
   869  	isDADResult()
   870  }
   871  
   872  var _ DADResult = (*DADSucceeded)(nil)
   873  
   874  // DADSucceeded indicates DAD completed without finding any duplicate addresses.
   875  type DADSucceeded struct{}
   876  
   877  func (*DADSucceeded) isDADResult() {}
   878  
   879  var _ DADResult = (*DADError)(nil)
   880  
   881  // DADError indicates DAD hit an error.
   882  type DADError struct {
   883  	Err tcpip.Error
   884  }
   885  
   886  func (*DADError) isDADResult() {}
   887  
   888  var _ DADResult = (*DADAborted)(nil)
   889  
   890  // DADAborted indicates DAD was aborted.
   891  type DADAborted struct{}
   892  
   893  func (*DADAborted) isDADResult() {}
   894  
   895  var _ DADResult = (*DADDupAddrDetected)(nil)
   896  
   897  // DADDupAddrDetected indicates DAD detected a duplicate address.
   898  type DADDupAddrDetected struct {
   899  	// HolderLinkAddress is the link address of the node that holds the duplicate
   900  	// address.
   901  	HolderLinkAddress tcpip.LinkAddress
   902  }
   903  
   904  func (*DADDupAddrDetected) isDADResult() {}
   905  
   906  // DADCompletionHandler is a handler for DAD completion.
   907  type DADCompletionHandler func(DADResult)
   908  
   909  // DADCheckAddressDisposition enumerates the possible return values from
   910  // DAD.CheckDuplicateAddress.
   911  type DADCheckAddressDisposition int
   912  
   913  const (
   914  	_ DADCheckAddressDisposition = iota
   915  
   916  	// DADDisabled indicates that DAD is disabled.
   917  	DADDisabled
   918  
   919  	// DADStarting indicates that DAD is starting for an address.
   920  	DADStarting
   921  
   922  	// DADAlreadyRunning indicates that DAD was already started for an address.
   923  	DADAlreadyRunning
   924  )
   925  
   926  const (
   927  	// defaultDupAddrDetectTransmits is the default number of NDP Neighbor
   928  	// Solicitation messages to send when doing Duplicate Address Detection
   929  	// for a tentative address.
   930  	//
   931  	// Default = 1 (from RFC 4862 section 5.1)
   932  	defaultDupAddrDetectTransmits = 1
   933  )
   934  
   935  // DADConfigurations holds configurations for duplicate address detection.
   936  type DADConfigurations struct {
   937  	// The number of Neighbor Solicitation messages to send when doing
   938  	// Duplicate Address Detection for a tentative address.
   939  	//
   940  	// Note, a value of zero effectively disables DAD.
   941  	DupAddrDetectTransmits uint8
   942  
   943  	// The amount of time to wait between sending Neighbor Solicitation
   944  	// messages.
   945  	//
   946  	// Must be greater than or equal to 1ms.
   947  	RetransmitTimer time.Duration
   948  }
   949  
   950  // DefaultDADConfigurations returns the default DAD configurations.
   951  func DefaultDADConfigurations() DADConfigurations {
   952  	return DADConfigurations{
   953  		DupAddrDetectTransmits: defaultDupAddrDetectTransmits,
   954  		RetransmitTimer:        defaultRetransmitTimer,
   955  	}
   956  }
   957  
   958  // Validate modifies the configuration with valid values. If invalid values are
   959  // present in the configurations, the corresponding default values are used
   960  // instead.
   961  func (c *DADConfigurations) Validate() {
   962  	if c.RetransmitTimer < minimumRetransmitTimer {
   963  		c.RetransmitTimer = defaultRetransmitTimer
   964  	}
   965  }
   966  
   967  // DuplicateAddressDetector handles checking if an address is already assigned
   968  // to some neighboring node on the link.
   969  type DuplicateAddressDetector interface {
   970  	// CheckDuplicateAddress checks if an address is assigned to a neighbor.
   971  	//
   972  	// If DAD is already being performed for the address, the handler will be
   973  	// called with the result of the original DAD request.
   974  	CheckDuplicateAddress(tcpip.Address, DADCompletionHandler) DADCheckAddressDisposition
   975  
   976  	// SetDADConfigurations sets the configurations for DAD.
   977  	SetDADConfigurations(c DADConfigurations)
   978  
   979  	// DuplicateAddressProtocol returns the network protocol the receiver can
   980  	// perform duplicate address detection for.
   981  	DuplicateAddressProtocol() tcpip.NetworkProtocolNumber
   982  }
   983  
   984  // LinkAddressResolver handles link address resolution for a network protocol.
   985  type LinkAddressResolver interface {
   986  	// LinkAddressRequest sends a request for the link address of the target
   987  	// address. The request is broadcast on the local network if a remote link
   988  	// address is not provided.
   989  	LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) tcpip.Error
   990  
   991  	// ResolveStaticAddress attempts to resolve address without sending
   992  	// requests. It either resolves the name immediately or returns the
   993  	// empty LinkAddress.
   994  	//
   995  	// It can be used to resolve broadcast addresses for example.
   996  	ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool)
   997  
   998  	// LinkAddressProtocol returns the network protocol of the
   999  	// addresses this resolver can resolve.
  1000  	LinkAddressProtocol() tcpip.NetworkProtocolNumber
  1001  }
  1002  
  1003  // RawFactory produces endpoints for writing various types of raw packets.
  1004  type RawFactory interface {
  1005  	// NewUnassociatedEndpoint produces endpoints for writing packets not
  1006  	// associated with a particular transport protocol. Such endpoints can
  1007  	// be used to write arbitrary packets that include the network header.
  1008  	NewUnassociatedEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error)
  1009  
  1010  	// NewPacketEndpoint produces endpoints for reading and writing packets
  1011  	// that include network and (when cooked is false) link layer headers.
  1012  	NewPacketEndpoint(stack *Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error)
  1013  }
  1014  
  1015  // GSOType is the type of GSO segments.
  1016  //
  1017  // +stateify savable
  1018  type GSOType int
  1019  
  1020  // Types of gso segments.
  1021  const (
  1022  	GSONone GSOType = iota
  1023  
  1024  	// Hardware GSO types:
  1025  	GSOTCPv4
  1026  	GSOTCPv6
  1027  
  1028  	// GSOSW is used for software GSO segments which have to be sent by
  1029  	// endpoint.WritePackets.
  1030  	GSOSW
  1031  )
  1032  
  1033  // GSO contains generic segmentation offload properties.
  1034  //
  1035  // +stateify savable
  1036  type GSO struct {
  1037  	// Type is one of GSONone, GSOTCPv4, etc.
  1038  	Type GSOType
  1039  	// NeedsCsum is set if the checksum offload is enabled.
  1040  	NeedsCsum bool
  1041  	// CsumOffset is offset after that to place checksum.
  1042  	CsumOffset uint16
  1043  
  1044  	// Mss is maximum segment size.
  1045  	MSS uint16
  1046  	// L3Len is L3 (IP) header length.
  1047  	L3HdrLen uint16
  1048  
  1049  	// MaxSize is maximum GSO packet size.
  1050  	MaxSize uint32
  1051  }
  1052  
  1053  // SupportedGSO returns the type of segmentation offloading supported.
  1054  type SupportedGSO int
  1055  
  1056  const (
  1057  	// GSONotSupported indicates that segmentation offloading is not supported.
  1058  	GSONotSupported SupportedGSO = iota
  1059  
  1060  	// HWGSOSupported indicates that segmentation offloading may be performed by
  1061  	// the hardware.
  1062  	HWGSOSupported
  1063  
  1064  	// SWGSOSupported indicates that segmentation offloading may be performed in
  1065  	// software.
  1066  	SWGSOSupported
  1067  )
  1068  
  1069  // GSOEndpoint provides access to GSO properties.
  1070  type GSOEndpoint interface {
  1071  	// GSOMaxSize returns the maximum GSO packet size.
  1072  	GSOMaxSize() uint32
  1073  
  1074  	// SupportedGSO returns the supported segmentation offloading.
  1075  	SupportedGSO() SupportedGSO
  1076  }
  1077  
  1078  // SoftwareGSOMaxSize is a maximum allowed size of a software GSO segment.
  1079  // This isn't a hard limit, because it is never set into packet headers.
  1080  const SoftwareGSOMaxSize = 1 << 16