inet.af/netstack@v0.0.0-20220214151720-7585b01ddccf/tcpip/stack/registration.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package stack
    16  
    17  import (
    18  	"fmt"
    19  	"time"
    20  
    21  	"inet.af/netstack/tcpip"
    22  	"inet.af/netstack/tcpip/buffer"
    23  	"inet.af/netstack/tcpip/header"
    24  	"inet.af/netstack/waiter"
    25  )
    26  
    27  // NetworkEndpointID is the identifier of a network layer protocol endpoint.
    28  // Currently the local address is sufficient because all supported protocols
    29  // (i.e., IPv4 and IPv6) have different sizes for their addresses.
    30  type NetworkEndpointID struct {
    31  	LocalAddress tcpip.Address
    32  }
    33  
    34  // TransportEndpointID is the identifier of a transport layer protocol endpoint.
    35  //
    36  // +stateify savable
    37  type TransportEndpointID struct {
    38  	// LocalPort is the local port associated with the endpoint.
    39  	LocalPort uint16
    40  
    41  	// LocalAddress is the local [network layer] address associated with
    42  	// the endpoint.
    43  	LocalAddress tcpip.Address
    44  
    45  	// RemotePort is the remote port associated with the endpoint.
    46  	RemotePort uint16
    47  
    48  	// RemoteAddress it the remote [network layer] address associated with
    49  	// the endpoint.
    50  	RemoteAddress tcpip.Address
    51  }
    52  
    53  // NetworkPacketInfo holds information about a network layer packet.
    54  type NetworkPacketInfo struct {
    55  	// LocalAddressBroadcast is true if the packet's local address is a broadcast
    56  	// address.
    57  	LocalAddressBroadcast bool
    58  
    59  	// IsForwardedPacket is true if the packet is being forwarded.
    60  	IsForwardedPacket bool
    61  }
    62  
    63  // TransportErrorKind enumerates error types that are handled by the transport
    64  // layer.
    65  type TransportErrorKind int
    66  
    67  const (
    68  	// PacketTooBigTransportError indicates that a packet did not reach its
    69  	// destination because a link on the path to the destination had an MTU that
    70  	// was too small to carry the packet.
    71  	PacketTooBigTransportError TransportErrorKind = iota
    72  
    73  	// DestinationHostUnreachableTransportError indicates that the destination
    74  	// host was unreachable.
    75  	DestinationHostUnreachableTransportError
    76  
    77  	// DestinationPortUnreachableTransportError indicates that a packet reached
    78  	// the destination host, but the transport protocol was not active on the
    79  	// destination port.
    80  	DestinationPortUnreachableTransportError
    81  
    82  	// DestinationNetworkUnreachableTransportError indicates that the destination
    83  	// network was unreachable.
    84  	DestinationNetworkUnreachableTransportError
    85  )
    86  
    87  // TransportError is a marker interface for errors that may be handled by the
    88  // transport layer.
    89  type TransportError interface {
    90  	tcpip.SockErrorCause
    91  
    92  	// Kind returns the type of the transport error.
    93  	Kind() TransportErrorKind
    94  }
    95  
    96  // TransportEndpoint is the interface that needs to be implemented by transport
    97  // protocol (e.g., tcp, udp) endpoints that can handle packets.
    98  type TransportEndpoint interface {
    99  	// UniqueID returns an unique ID for this transport endpoint.
   100  	UniqueID() uint64
   101  
   102  	// HandlePacket is called by the stack when new packets arrive to this
   103  	// transport endpoint. It sets the packet buffer's transport header.
   104  	//
   105  	// HandlePacket may modify the packet.
   106  	HandlePacket(TransportEndpointID, *PacketBuffer)
   107  
   108  	// HandleError is called when the transport endpoint receives an error.
   109  	//
   110  	// HandleError takes may modify the packet buffer.
   111  	HandleError(TransportError, *PacketBuffer)
   112  
   113  	// Abort initiates an expedited endpoint teardown. It puts the endpoint
   114  	// in a closed state and frees all resources associated with it. This
   115  	// cleanup may happen asynchronously. Wait can be used to block on this
   116  	// asynchronous cleanup.
   117  	Abort()
   118  
   119  	// Wait waits for any worker goroutines owned by the endpoint to stop.
   120  	//
   121  	// An endpoint can be requested to stop its worker goroutines by calling
   122  	// its Close method.
   123  	//
   124  	// Wait will not block if the endpoint hasn't started any goroutines
   125  	// yet, even if it might later.
   126  	Wait()
   127  }
   128  
   129  // RawTransportEndpoint is the interface that needs to be implemented by raw
   130  // transport protocol endpoints. RawTransportEndpoints receive the entire
   131  // packet - including the network and transport headers - as delivered to
   132  // netstack.
   133  type RawTransportEndpoint interface {
   134  	// HandlePacket is called by the stack when new packets arrive to
   135  	// this transport endpoint. The packet contains all data from the link
   136  	// layer up.
   137  	//
   138  	// HandlePacket may modify the packet.
   139  	HandlePacket(*PacketBuffer)
   140  }
   141  
   142  // PacketEndpoint is the interface that needs to be implemented by packet
   143  // transport protocol endpoints. These endpoints receive link layer headers in
   144  // addition to whatever they contain (usually network and transport layer
   145  // headers and a payload).
   146  type PacketEndpoint interface {
   147  	// HandlePacket is called by the stack when new packets arrive that
   148  	// match the endpoint.
   149  	//
   150  	// Implementers should treat packet as immutable and should copy it
   151  	// before before modification.
   152  	//
   153  	// linkHeader may have a length of 0, in which case the PacketEndpoint
   154  	// should construct its own ethernet header for applications.
   155  	//
   156  	// HandlePacket may modify pkt.
   157  	HandlePacket(nicID tcpip.NICID, addr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
   158  }
   159  
   160  // UnknownDestinationPacketDisposition enumerates the possible return values from
   161  // HandleUnknownDestinationPacket().
   162  type UnknownDestinationPacketDisposition int
   163  
   164  const (
   165  	// UnknownDestinationPacketMalformed denotes that the packet was malformed
   166  	// and no further processing should be attempted other than updating
   167  	// statistics.
   168  	UnknownDestinationPacketMalformed UnknownDestinationPacketDisposition = iota
   169  
   170  	// UnknownDestinationPacketUnhandled tells the caller that the packet was
   171  	// well formed but that the issue was not handled and the stack should take
   172  	// the default action.
   173  	UnknownDestinationPacketUnhandled
   174  
   175  	// UnknownDestinationPacketHandled tells the caller that it should do
   176  	// no further processing.
   177  	UnknownDestinationPacketHandled
   178  )
   179  
   180  // TransportProtocol is the interface that needs to be implemented by transport
   181  // protocols (e.g., tcp, udp) that want to be part of the networking stack.
   182  type TransportProtocol interface {
   183  	// Number returns the transport protocol number.
   184  	Number() tcpip.TransportProtocolNumber
   185  
   186  	// NewEndpoint creates a new endpoint of the transport protocol.
   187  	NewEndpoint(netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error)
   188  
   189  	// NewRawEndpoint creates a new raw endpoint of the transport protocol.
   190  	NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error)
   191  
   192  	// MinimumPacketSize returns the minimum valid packet size of this
   193  	// transport protocol. The stack automatically drops any packets smaller
   194  	// than this targeted at this protocol.
   195  	MinimumPacketSize() int
   196  
   197  	// ParsePorts returns the source and destination ports stored in a
   198  	// packet of this protocol.
   199  	ParsePorts(v buffer.View) (src, dst uint16, err tcpip.Error)
   200  
   201  	// HandleUnknownDestinationPacket handles packets targeted at this
   202  	// protocol that don't match any existing endpoint. For example,
   203  	// it is targeted at a port that has no listeners.
   204  	//
   205  	// HandleUnknownDestinationPacket may modify the packet if it handles
   206  	// the issue.
   207  	HandleUnknownDestinationPacket(TransportEndpointID, *PacketBuffer) UnknownDestinationPacketDisposition
   208  
   209  	// SetOption allows enabling/disabling protocol specific features.
   210  	// SetOption returns an error if the option is not supported or the
   211  	// provided option value is invalid.
   212  	SetOption(option tcpip.SettableTransportProtocolOption) tcpip.Error
   213  
   214  	// Option allows retrieving protocol specific option values.
   215  	// Option returns an error if the option is not supported or the
   216  	// provided option value is invalid.
   217  	Option(option tcpip.GettableTransportProtocolOption) tcpip.Error
   218  
   219  	// Close requests that any worker goroutines owned by the protocol
   220  	// stop.
   221  	Close()
   222  
   223  	// Wait waits for any worker goroutines owned by the protocol to stop.
   224  	Wait()
   225  
   226  	// Parse sets pkt.TransportHeader and trims pkt.Data appropriately. It does
   227  	// neither and returns false if pkt.Data is too small, i.e. pkt.Data.Size() <
   228  	// MinimumPacketSize()
   229  	Parse(pkt *PacketBuffer) (ok bool)
   230  }
   231  
   232  // TransportPacketDisposition is the result from attempting to deliver a packet
   233  // to the transport layer.
   234  type TransportPacketDisposition int
   235  
   236  const (
   237  	// TransportPacketHandled indicates that a transport packet was handled by the
   238  	// transport layer and callers need not take any further action.
   239  	TransportPacketHandled TransportPacketDisposition = iota
   240  
   241  	// TransportPacketProtocolUnreachable indicates that the transport
   242  	// protocol requested in the packet is not supported.
   243  	TransportPacketProtocolUnreachable
   244  
   245  	// TransportPacketDestinationPortUnreachable indicates that there weren't any
   246  	// listeners interested in the packet and the transport protocol has no means
   247  	// to notify the sender.
   248  	TransportPacketDestinationPortUnreachable
   249  )
   250  
   251  // TransportDispatcher contains the methods used by the network stack to deliver
   252  // packets to the appropriate transport endpoint after it has been handled by
   253  // the network layer.
   254  type TransportDispatcher interface {
   255  	// DeliverTransportPacket delivers packets to the appropriate
   256  	// transport protocol endpoint.
   257  	//
   258  	// pkt.NetworkHeader must be set before calling DeliverTransportPacket.
   259  	//
   260  	// DeliverTransportPacket may modify the packet.
   261  	DeliverTransportPacket(tcpip.TransportProtocolNumber, *PacketBuffer) TransportPacketDisposition
   262  
   263  	// DeliverTransportError delivers an error to the appropriate transport
   264  	// endpoint.
   265  	//
   266  	// DeliverTransportError may modify the packet buffer.
   267  	DeliverTransportError(local, remote tcpip.Address, _ tcpip.NetworkProtocolNumber, _ tcpip.TransportProtocolNumber, _ TransportError, _ *PacketBuffer)
   268  
   269  	// DeliverRawPacket delivers a packet to any subscribed raw sockets.
   270  	//
   271  	// DeliverRawPacket does NOT take ownership of the packet buffer.
   272  	DeliverRawPacket(tcpip.TransportProtocolNumber, *PacketBuffer)
   273  }
   274  
   275  // PacketLooping specifies where an outbound packet should be sent.
   276  type PacketLooping byte
   277  
   278  const (
   279  	// PacketOut indicates that the packet should be passed to the link
   280  	// endpoint.
   281  	PacketOut PacketLooping = 1 << iota
   282  
   283  	// PacketLoop indicates that the packet should be handled locally.
   284  	PacketLoop
   285  )
   286  
   287  // NetworkHeaderParams are the header parameters given as input by the
   288  // transport endpoint to the network.
   289  type NetworkHeaderParams struct {
   290  	// Protocol refers to the transport protocol number.
   291  	Protocol tcpip.TransportProtocolNumber
   292  
   293  	// TTL refers to Time To Live field of the IP-header.
   294  	TTL uint8
   295  
   296  	// TOS refers to TypeOfService or TrafficClass field of the IP-header.
   297  	TOS uint8
   298  }
   299  
   300  // GroupAddressableEndpoint is an endpoint that supports group addressing.
   301  //
   302  // An endpoint is considered to support group addressing when one or more
   303  // endpoints may associate themselves with the same identifier (group address).
   304  type GroupAddressableEndpoint interface {
   305  	// JoinGroup joins the specified group.
   306  	JoinGroup(group tcpip.Address) tcpip.Error
   307  
   308  	// LeaveGroup attempts to leave the specified group.
   309  	LeaveGroup(group tcpip.Address) tcpip.Error
   310  
   311  	// IsInGroup returns true if the endpoint is a member of the specified group.
   312  	IsInGroup(group tcpip.Address) bool
   313  }
   314  
   315  // PrimaryEndpointBehavior is an enumeration of an AddressEndpoint's primary
   316  // behavior.
   317  type PrimaryEndpointBehavior int
   318  
   319  const (
   320  	// CanBePrimaryEndpoint indicates the endpoint can be used as a primary
   321  	// endpoint for new connections with no local address.
   322  	CanBePrimaryEndpoint PrimaryEndpointBehavior = iota
   323  
   324  	// FirstPrimaryEndpoint indicates the endpoint should be the first
   325  	// primary endpoint considered. If there are multiple endpoints with
   326  	// this behavior, they are ordered by recency.
   327  	FirstPrimaryEndpoint
   328  
   329  	// NeverPrimaryEndpoint indicates the endpoint should never be a
   330  	// primary endpoint.
   331  	NeverPrimaryEndpoint
   332  )
   333  
   334  func (peb PrimaryEndpointBehavior) String() string {
   335  	switch peb {
   336  	case CanBePrimaryEndpoint:
   337  		return "CanBePrimaryEndpoint"
   338  	case FirstPrimaryEndpoint:
   339  		return "FirstPrimaryEndpoint"
   340  	case NeverPrimaryEndpoint:
   341  		return "NeverPrimaryEndpoint"
   342  	default:
   343  		panic(fmt.Sprintf("unknown primary endpoint behavior: %d", peb))
   344  	}
   345  }
   346  
   347  // AddressConfigType is the method used to add an address.
   348  type AddressConfigType int
   349  
   350  const (
   351  	// AddressConfigStatic is a statically configured address endpoint that was
   352  	// added by some user-specified action (adding an explicit address, joining a
   353  	// multicast group).
   354  	AddressConfigStatic AddressConfigType = iota
   355  
   356  	// AddressConfigSlaac is an address endpoint added by SLAAC, as per RFC 4862
   357  	// section 5.5.3.
   358  	AddressConfigSlaac
   359  
   360  	// AddressConfigSlaacTemp is a temporary address endpoint added by SLAAC as
   361  	// per RFC 4941. Temporary SLAAC addresses are short-lived and are not
   362  	// to be valid (or preferred) forever; hence the term temporary.
   363  	AddressConfigSlaacTemp
   364  )
   365  
   366  // AddressProperties contains additional properties that can be configured when
   367  // adding an address.
   368  type AddressProperties struct {
   369  	PEB        PrimaryEndpointBehavior
   370  	ConfigType AddressConfigType
   371  	Deprecated bool
   372  }
   373  
   374  // AssignableAddressEndpoint is a reference counted address endpoint that may be
   375  // assigned to a NetworkEndpoint.
   376  type AssignableAddressEndpoint interface {
   377  	// AddressWithPrefix returns the endpoint's address.
   378  	AddressWithPrefix() tcpip.AddressWithPrefix
   379  
   380  	// Subnet returns the subnet of the endpoint's address.
   381  	Subnet() tcpip.Subnet
   382  
   383  	// IsAssigned returns whether or not the endpoint is considered bound
   384  	// to its NetworkEndpoint.
   385  	IsAssigned(allowExpired bool) bool
   386  
   387  	// IncRef increments this endpoint's reference count.
   388  	//
   389  	// Returns true if it was successfully incremented. If it returns false, then
   390  	// the endpoint is considered expired and should no longer be used.
   391  	IncRef() bool
   392  
   393  	// DecRef decrements this endpoint's reference count.
   394  	DecRef()
   395  }
   396  
   397  // AddressEndpoint is an endpoint representing an address assigned to an
   398  // AddressableEndpoint.
   399  type AddressEndpoint interface {
   400  	AssignableAddressEndpoint
   401  
   402  	// GetKind returns the address kind for this endpoint.
   403  	GetKind() AddressKind
   404  
   405  	// SetKind sets the address kind for this endpoint.
   406  	SetKind(AddressKind)
   407  
   408  	// ConfigType returns the method used to add the address.
   409  	ConfigType() AddressConfigType
   410  
   411  	// Deprecated returns whether or not this endpoint is deprecated.
   412  	Deprecated() bool
   413  
   414  	// SetDeprecated sets this endpoint's deprecated status.
   415  	SetDeprecated(bool)
   416  }
   417  
   418  // AddressKind is the kind of an address.
   419  //
   420  // See the values of AddressKind for more details.
   421  type AddressKind int
   422  
   423  const (
   424  	// PermanentTentative is a permanent address endpoint that is not yet
   425  	// considered to be fully bound to an interface in the traditional
   426  	// sense. That is, the address is associated with a NIC, but packets
   427  	// destined to the address MUST NOT be accepted and MUST be silently
   428  	// dropped, and the address MUST NOT be used as a source address for
   429  	// outgoing packets. For IPv6, addresses are of this kind until NDP's
   430  	// Duplicate Address Detection (DAD) resolves. If DAD fails, the address
   431  	// is removed.
   432  	PermanentTentative AddressKind = iota
   433  
   434  	// Permanent is a permanent endpoint (vs. a temporary one) assigned to the
   435  	// NIC. Its reference count is biased by 1 to avoid removal when no route
   436  	// holds a reference to it. It is removed by explicitly removing the address
   437  	// from the NIC.
   438  	Permanent
   439  
   440  	// PermanentExpired is a permanent endpoint that had its address removed from
   441  	// the NIC, and it is waiting to be removed once no references to it are held.
   442  	//
   443  	// If the address is re-added before the endpoint is removed, its type
   444  	// changes back to Permanent.
   445  	PermanentExpired
   446  
   447  	// Temporary is an endpoint, created on a one-off basis to temporarily
   448  	// consider the NIC bound an an address that it is not explicitly bound to
   449  	// (such as a permanent address). Its reference count must not be biased by 1
   450  	// so that the address is removed immediately when references to it are no
   451  	// longer held.
   452  	//
   453  	// A temporary endpoint may be promoted to permanent if the address is added
   454  	// permanently.
   455  	Temporary
   456  )
   457  
   458  // IsPermanent returns true if the AddressKind represents a permanent address.
   459  func (k AddressKind) IsPermanent() bool {
   460  	switch k {
   461  	case Permanent, PermanentTentative:
   462  		return true
   463  	case Temporary, PermanentExpired:
   464  		return false
   465  	default:
   466  		panic(fmt.Sprintf("unrecognized address kind = %d", k))
   467  	}
   468  }
   469  
   470  // AddressableEndpoint is an endpoint that supports addressing.
   471  //
   472  // An endpoint is considered to support addressing when the endpoint may
   473  // associate itself with an identifier (address).
   474  type AddressableEndpoint interface {
   475  	// AddAndAcquirePermanentAddress adds the passed permanent address.
   476  	//
   477  	// Returns *tcpip.ErrDuplicateAddress if the address exists.
   478  	//
   479  	// Acquires and returns the AddressEndpoint for the added address.
   480  	AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, properties AddressProperties) (AddressEndpoint, tcpip.Error)
   481  
   482  	// RemovePermanentAddress removes the passed address if it is a permanent
   483  	// address.
   484  	//
   485  	// Returns *tcpip.ErrBadLocalAddress if the endpoint does not have the passed
   486  	// permanent address.
   487  	RemovePermanentAddress(addr tcpip.Address) tcpip.Error
   488  
   489  	// MainAddress returns the endpoint's primary permanent address.
   490  	MainAddress() tcpip.AddressWithPrefix
   491  
   492  	// AcquireAssignedAddress returns an address endpoint for the passed address
   493  	// that is considered bound to the endpoint, optionally creating a temporary
   494  	// endpoint if requested and no existing address exists.
   495  	//
   496  	// The returned endpoint's reference count is incremented.
   497  	//
   498  	// Returns nil if the specified address is not local to this endpoint.
   499  	AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB PrimaryEndpointBehavior) AddressEndpoint
   500  
   501  	// AcquireOutgoingPrimaryAddress returns a primary address that may be used as
   502  	// a source address when sending packets to the passed remote address.
   503  	//
   504  	// If allowExpired is true, expired addresses may be returned.
   505  	//
   506  	// The returned endpoint's reference count is incremented.
   507  	//
   508  	// Returns nil if a primary address is not available.
   509  	AcquireOutgoingPrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) AddressEndpoint
   510  
   511  	// PrimaryAddresses returns the primary addresses.
   512  	PrimaryAddresses() []tcpip.AddressWithPrefix
   513  
   514  	// PermanentAddresses returns all the permanent addresses.
   515  	PermanentAddresses() []tcpip.AddressWithPrefix
   516  }
   517  
   518  // NDPEndpoint is a network endpoint that supports NDP.
   519  type NDPEndpoint interface {
   520  	NetworkEndpoint
   521  
   522  	// InvalidateDefaultRouter invalidates a default router discovered through
   523  	// NDP.
   524  	InvalidateDefaultRouter(tcpip.Address)
   525  }
   526  
   527  // NetworkInterface is a network interface.
   528  type NetworkInterface interface {
   529  	NetworkLinkEndpoint
   530  
   531  	// ID returns the interface's ID.
   532  	ID() tcpip.NICID
   533  
   534  	// IsLoopback returns true if the interface is a loopback interface.
   535  	IsLoopback() bool
   536  
   537  	// Name returns the name of the interface.
   538  	//
   539  	// May return an empty string if the interface is not configured with a name.
   540  	Name() string
   541  
   542  	// Enabled returns true if the interface is enabled.
   543  	Enabled() bool
   544  
   545  	// Promiscuous returns true if the interface is in promiscuous mode.
   546  	//
   547  	// When in promiscuous mode, the interface should accept all packets.
   548  	Promiscuous() bool
   549  
   550  	// Spoofing returns true if the interface is in spoofing mode.
   551  	//
   552  	// When in spoofing mode, the interface should consider all addresses as
   553  	// assigned to it.
   554  	Spoofing() bool
   555  
   556  	// PrimaryAddress returns the primary address associated with the interface.
   557  	//
   558  	// PrimaryAddress will return the first non-deprecated address if such an
   559  	// address exists. If no non-deprecated addresses exist, the first deprecated
   560  	// address will be returned. If no deprecated addresses exist, the zero value
   561  	// will be returned.
   562  	PrimaryAddress(tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, tcpip.Error)
   563  
   564  	// CheckLocalAddress returns true if the address exists on the interface.
   565  	CheckLocalAddress(tcpip.NetworkProtocolNumber, tcpip.Address) bool
   566  
   567  	// WritePacketToRemote writes the packet to the given remote link address.
   568  	WritePacketToRemote(tcpip.LinkAddress, tcpip.NetworkProtocolNumber, *PacketBuffer) tcpip.Error
   569  
   570  	// WritePacket writes a packet with the given protocol through the given
   571  	// route.
   572  	//
   573  	// WritePacket may modify the packet buffer. The packet buffer's
   574  	// network and transport header must be set.
   575  	WritePacket(*Route, tcpip.NetworkProtocolNumber, *PacketBuffer) tcpip.Error
   576  
   577  	// WritePackets writes packets with the given protocol through the given
   578  	// route. Must not be called with an empty list of packet buffers.
   579  	//
   580  	// WritePackets may modify the packet buffers.
   581  	//
   582  	// Right now, WritePackets is used only when the software segmentation
   583  	// offload is enabled. If it will be used for something else, syscall filters
   584  	// may need to be updated.
   585  	WritePackets(*Route, PacketBufferList, tcpip.NetworkProtocolNumber) (int, tcpip.Error)
   586  
   587  	// HandleNeighborProbe processes an incoming neighbor probe (e.g. ARP
   588  	// request or NDP Neighbor Solicitation).
   589  	//
   590  	// HandleNeighborProbe assumes that the probe is valid for the network
   591  	// interface the probe was received on.
   592  	HandleNeighborProbe(tcpip.NetworkProtocolNumber, tcpip.Address, tcpip.LinkAddress) tcpip.Error
   593  
   594  	// HandleNeighborConfirmation processes an incoming neighbor confirmation
   595  	// (e.g. ARP reply or NDP Neighbor Advertisement).
   596  	HandleNeighborConfirmation(tcpip.NetworkProtocolNumber, tcpip.Address, tcpip.LinkAddress, ReachabilityConfirmationFlags) tcpip.Error
   597  }
   598  
   599  // LinkResolvableNetworkEndpoint handles link resolution events.
   600  type LinkResolvableNetworkEndpoint interface {
   601  	// HandleLinkResolutionFailure is called when link resolution prevents the
   602  	// argument from having been sent.
   603  	HandleLinkResolutionFailure(*PacketBuffer)
   604  }
   605  
   606  // NetworkEndpoint is the interface that needs to be implemented by endpoints
   607  // of network layer protocols (e.g., ipv4, ipv6).
   608  type NetworkEndpoint interface {
   609  	// Enable enables the endpoint.
   610  	//
   611  	// Must only be called when the stack is in a state that allows the endpoint
   612  	// to send and receive packets.
   613  	//
   614  	// Returns *tcpip.ErrNotPermitted if the endpoint cannot be enabled.
   615  	Enable() tcpip.Error
   616  
   617  	// Enabled returns true if the endpoint is enabled.
   618  	Enabled() bool
   619  
   620  	// Disable disables the endpoint.
   621  	Disable()
   622  
   623  	// DefaultTTL is the default time-to-live value (or hop limit, in ipv6)
   624  	// for this endpoint.
   625  	DefaultTTL() uint8
   626  
   627  	// MTU is the maximum transmission unit for this endpoint. This is
   628  	// generally calculated as the MTU of the underlying data link endpoint
   629  	// minus the network endpoint max header length.
   630  	MTU() uint32
   631  
   632  	// MaxHeaderLength returns the maximum size the network (and lower
   633  	// level layers combined) headers can have. Higher levels use this
   634  	// information to reserve space in the front of the packets they're
   635  	// building.
   636  	MaxHeaderLength() uint16
   637  
   638  	// WritePacket writes a packet to the given destination address and
   639  	// protocol. It may modify pkt. pkt.TransportHeader must have
   640  	// already been set.
   641  	WritePacket(r *Route, params NetworkHeaderParams, pkt *PacketBuffer) tcpip.Error
   642  
   643  	// WritePackets writes packets to the given destination address and
   644  	// protocol. pkts must not be zero length. It may modify pkts and
   645  	// underlying packets.
   646  	WritePackets(r *Route, pkts PacketBufferList, params NetworkHeaderParams) (int, tcpip.Error)
   647  
   648  	// WriteHeaderIncludedPacket writes a packet that includes a network
   649  	// header to the given destination address. It may modify pkt.
   650  	WriteHeaderIncludedPacket(r *Route, pkt *PacketBuffer) tcpip.Error
   651  
   652  	// HandlePacket is called by the link layer when new packets arrive to
   653  	// this network endpoint. It sets pkt.NetworkHeader.
   654  	//
   655  	// HandlePacket may modify pkt.
   656  	HandlePacket(pkt *PacketBuffer)
   657  
   658  	// Close is called when the endpoint is removed from a stack.
   659  	Close()
   660  
   661  	// NetworkProtocolNumber returns the tcpip.NetworkProtocolNumber for
   662  	// this endpoint.
   663  	NetworkProtocolNumber() tcpip.NetworkProtocolNumber
   664  
   665  	// Stats returns a reference to the network endpoint stats.
   666  	Stats() NetworkEndpointStats
   667  }
   668  
   669  // NetworkEndpointStats is the interface implemented by each network endpoint
   670  // stats struct.
   671  type NetworkEndpointStats interface {
   672  	// IsNetworkEndpointStats is an empty method to implement the
   673  	// NetworkEndpointStats marker interface.
   674  	IsNetworkEndpointStats()
   675  }
   676  
   677  // IPNetworkEndpointStats is a NetworkEndpointStats that tracks IP-related
   678  // statistics.
   679  type IPNetworkEndpointStats interface {
   680  	NetworkEndpointStats
   681  
   682  	// IPStats returns the IP statistics of a network endpoint.
   683  	IPStats() *tcpip.IPStats
   684  }
   685  
   686  // ForwardingNetworkEndpoint is a network endpoint that may forward packets.
   687  type ForwardingNetworkEndpoint interface {
   688  	NetworkEndpoint
   689  
   690  	// Forwarding returns the forwarding configuration.
   691  	Forwarding() bool
   692  
   693  	// SetForwarding sets the forwarding configuration.
   694  	SetForwarding(bool)
   695  }
   696  
   697  // NetworkProtocol is the interface that needs to be implemented by network
   698  // protocols (e.g., ipv4, ipv6) that want to be part of the networking stack.
   699  type NetworkProtocol interface {
   700  	// Number returns the network protocol number.
   701  	Number() tcpip.NetworkProtocolNumber
   702  
   703  	// MinimumPacketSize returns the minimum valid packet size of this
   704  	// network protocol. The stack automatically drops any packets smaller
   705  	// than this targeted at this protocol.
   706  	MinimumPacketSize() int
   707  
   708  	// ParseAddresses returns the source and destination addresses stored in a
   709  	// packet of this protocol.
   710  	ParseAddresses(v buffer.View) (src, dst tcpip.Address)
   711  
   712  	// NewEndpoint creates a new endpoint of this protocol.
   713  	NewEndpoint(nic NetworkInterface, dispatcher TransportDispatcher) NetworkEndpoint
   714  
   715  	// SetOption allows enabling/disabling protocol specific features.
   716  	// SetOption returns an error if the option is not supported or the
   717  	// provided option value is invalid.
   718  	SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error
   719  
   720  	// Option allows retrieving protocol specific option values.
   721  	// Option returns an error if the option is not supported or the
   722  	// provided option value is invalid.
   723  	Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error
   724  
   725  	// Close requests that any worker goroutines owned by the protocol
   726  	// stop.
   727  	Close()
   728  
   729  	// Wait waits for any worker goroutines owned by the protocol to stop.
   730  	Wait()
   731  
   732  	// Parse sets pkt.NetworkHeader and trims pkt.Data appropriately. It
   733  	// returns:
   734  	// - The encapsulated protocol, if present.
   735  	// - Whether there is an encapsulated transport protocol payload (e.g. ARP
   736  	//   does not encapsulate anything).
   737  	// - Whether pkt.Data was large enough to parse and set pkt.NetworkHeader.
   738  	Parse(pkt *PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool)
   739  }
   740  
   741  // NetworkDispatcher contains the methods used by the network stack to deliver
   742  // inbound/outbound packets to the appropriate network/packet(if any) endpoints.
   743  type NetworkDispatcher interface {
   744  	// DeliverNetworkPacket finds the appropriate network protocol endpoint
   745  	// and hands the packet over for further processing.
   746  	//
   747  	// pkt.LinkHeader may or may not be set before calling
   748  	// DeliverNetworkPacket. Some packets do not have link headers (e.g.
   749  	// packets sent via loopback), and won't have the field set.
   750  	//
   751  	// DeliverNetworkPacket may modify pkt.
   752  	DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
   753  }
   754  
   755  // LinkEndpointCapabilities is the type associated with the capabilities
   756  // supported by a link-layer endpoint. It is a set of bitfields.
   757  type LinkEndpointCapabilities uint
   758  
   759  // The following are the supported link endpoint capabilities.
   760  const (
   761  	CapabilityNone LinkEndpointCapabilities = 0
   762  	// CapabilityTXChecksumOffload indicates that the link endpoint supports
   763  	// checksum computation for outgoing packets and the stack can skip
   764  	// computing checksums when sending packets.
   765  	CapabilityTXChecksumOffload LinkEndpointCapabilities = 1 << iota
   766  	// CapabilityRXChecksumOffload indicates that the link endpoint supports
   767  	// checksum verification on received packets and that it's safe for the
   768  	// stack to skip checksum verification.
   769  	CapabilityRXChecksumOffload
   770  	CapabilityResolutionRequired
   771  	CapabilitySaveRestore
   772  	CapabilityDisconnectOk
   773  	CapabilityLoopback
   774  )
   775  
   776  // NetworkLinkEndpoint is a data-link layer that supports sending network
   777  // layer packets.
   778  type NetworkLinkEndpoint interface {
   779  	// MTU is the maximum transmission unit for this endpoint. This is
   780  	// usually dictated by the backing physical network; when such a
   781  	// physical network doesn't exist, the limit is generally 64k, which
   782  	// includes the maximum size of an IP packet.
   783  	MTU() uint32
   784  
   785  	// MaxHeaderLength returns the maximum size the data link (and
   786  	// lower level layers combined) headers can have. Higher levels use this
   787  	// information to reserve space in the front of the packets they're
   788  	// building.
   789  	MaxHeaderLength() uint16
   790  
   791  	// LinkAddress returns the link address (typically a MAC) of the
   792  	// endpoint.
   793  	LinkAddress() tcpip.LinkAddress
   794  }
   795  
   796  // LinkEndpoint is the interface implemented by data link layer protocols (e.g.,
   797  // ethernet, loopback, raw) and used by network layer protocols to send packets
   798  // out through the implementer's data link endpoint. When a link header exists,
   799  // it sets each PacketBuffer's LinkHeader field before passing it up the
   800  // stack.
   801  type LinkEndpoint interface {
   802  	NetworkLinkEndpoint
   803  
   804  	// Capabilities returns the set of capabilities supported by the
   805  	// endpoint.
   806  	Capabilities() LinkEndpointCapabilities
   807  
   808  	// Attach attaches the data link layer endpoint to the network-layer
   809  	// dispatcher of the stack.
   810  	//
   811  	// Attach is called with a nil dispatcher when the endpoint's NIC is being
   812  	// removed.
   813  	Attach(dispatcher NetworkDispatcher)
   814  
   815  	// IsAttached returns whether a NetworkDispatcher is attached to the
   816  	// endpoint.
   817  	IsAttached() bool
   818  
   819  	// Wait waits for any worker goroutines owned by the endpoint to stop.
   820  	//
   821  	// For now, requesting that an endpoint's worker goroutine(s) stop is
   822  	// implementation specific.
   823  	//
   824  	// Wait will not block if the endpoint hasn't started any goroutines
   825  	// yet, even if it might later.
   826  	Wait()
   827  
   828  	// ARPHardwareType returns the ARPHRD_TYPE of the link endpoint.
   829  	//
   830  	// See:
   831  	// https://github.com/torvalds/linux/blob/aa0c9086b40c17a7ad94425b3b70dd1fdd7497bf/include/uapi/linux/if_arp.h#L30
   832  	ARPHardwareType() header.ARPHardwareType
   833  
   834  	// AddHeader adds a link layer header to pkt if required.
   835  	AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
   836  
   837  	// WritePacket writes a packet with the given protocol and route.
   838  	//
   839  	// WritePacket may modify the packet buffer. The packet buffer's
   840  	// network and transport header must be set.
   841  	//
   842  	// To participate in transparent bridging, a LinkEndpoint implementation
   843  	// should call eth.Encode with header.EthernetFields.SrcAddr set to
   844  	// r.LocalLinkAddress if it is provided.
   845  	WritePacket(RouteInfo, tcpip.NetworkProtocolNumber, *PacketBuffer) tcpip.Error
   846  
   847  	// WritePackets writes packets with the given protocol and route. Must not be
   848  	// called with an empty list of packet buffers.
   849  	//
   850  	// WritePackets may modify the packet buffers.
   851  	//
   852  	// Right now, WritePackets is used only when the software segmentation
   853  	// offload is enabled. If it will be used for something else, syscall filters
   854  	// may need to be updated.
   855  	WritePackets(RouteInfo, PacketBufferList, tcpip.NetworkProtocolNumber) (int, tcpip.Error)
   856  
   857  	// WriteRawPacket writes a packet directly to the link.
   858  	//
   859  	// If the link-layer has its own header, the payload must already include the
   860  	// header.
   861  	//
   862  	// WriteRawPacket may modify the packet.
   863  	WriteRawPacket(*PacketBuffer) tcpip.Error
   864  }
   865  
   866  // InjectableLinkEndpoint is a LinkEndpoint where inbound packets are
   867  // delivered via the Inject method.
   868  type InjectableLinkEndpoint interface {
   869  	LinkEndpoint
   870  
   871  	// InjectInbound injects an inbound packet.
   872  	InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
   873  
   874  	// InjectOutbound writes a fully formed outbound packet directly to the
   875  	// link.
   876  	//
   877  	// dest is used by endpoints with multiple raw destinations.
   878  	InjectOutbound(dest tcpip.Address, packet []byte) tcpip.Error
   879  }
   880  
   881  // DADResult is a marker interface for the result of a duplicate address
   882  // detection process.
   883  type DADResult interface {
   884  	isDADResult()
   885  }
   886  
   887  var _ DADResult = (*DADSucceeded)(nil)
   888  
   889  // DADSucceeded indicates DAD completed without finding any duplicate addresses.
   890  type DADSucceeded struct{}
   891  
   892  func (*DADSucceeded) isDADResult() {}
   893  
   894  var _ DADResult = (*DADError)(nil)
   895  
   896  // DADError indicates DAD hit an error.
   897  type DADError struct {
   898  	Err tcpip.Error
   899  }
   900  
   901  func (*DADError) isDADResult() {}
   902  
   903  var _ DADResult = (*DADAborted)(nil)
   904  
   905  // DADAborted indicates DAD was aborted.
   906  type DADAborted struct{}
   907  
   908  func (*DADAborted) isDADResult() {}
   909  
   910  var _ DADResult = (*DADDupAddrDetected)(nil)
   911  
   912  // DADDupAddrDetected indicates DAD detected a duplicate address.
   913  type DADDupAddrDetected struct {
   914  	// HolderLinkAddress is the link address of the node that holds the duplicate
   915  	// address.
   916  	HolderLinkAddress tcpip.LinkAddress
   917  }
   918  
   919  func (*DADDupAddrDetected) isDADResult() {}
   920  
   921  // DADCompletionHandler is a handler for DAD completion.
   922  type DADCompletionHandler func(DADResult)
   923  
   924  // DADCheckAddressDisposition enumerates the possible return values from
   925  // DAD.CheckDuplicateAddress.
   926  type DADCheckAddressDisposition int
   927  
   928  const (
   929  	_ DADCheckAddressDisposition = iota
   930  
   931  	// DADDisabled indicates that DAD is disabled.
   932  	DADDisabled
   933  
   934  	// DADStarting indicates that DAD is starting for an address.
   935  	DADStarting
   936  
   937  	// DADAlreadyRunning indicates that DAD was already started for an address.
   938  	DADAlreadyRunning
   939  )
   940  
   941  const (
   942  	// defaultDupAddrDetectTransmits is the default number of NDP Neighbor
   943  	// Solicitation messages to send when doing Duplicate Address Detection
   944  	// for a tentative address.
   945  	//
   946  	// Default = 1 (from RFC 4862 section 5.1)
   947  	defaultDupAddrDetectTransmits = 1
   948  )
   949  
   950  // DADConfigurations holds configurations for duplicate address detection.
   951  type DADConfigurations struct {
   952  	// The number of Neighbor Solicitation messages to send when doing
   953  	// Duplicate Address Detection for a tentative address.
   954  	//
   955  	// Note, a value of zero effectively disables DAD.
   956  	DupAddrDetectTransmits uint8
   957  
   958  	// The amount of time to wait between sending Neighbor Solicitation
   959  	// messages.
   960  	//
   961  	// Must be greater than or equal to 1ms.
   962  	RetransmitTimer time.Duration
   963  }
   964  
   965  // DefaultDADConfigurations returns the default DAD configurations.
   966  func DefaultDADConfigurations() DADConfigurations {
   967  	return DADConfigurations{
   968  		DupAddrDetectTransmits: defaultDupAddrDetectTransmits,
   969  		RetransmitTimer:        defaultRetransmitTimer,
   970  	}
   971  }
   972  
   973  // Validate modifies the configuration with valid values. If invalid values are
   974  // present in the configurations, the corresponding default values are used
   975  // instead.
   976  func (c *DADConfigurations) Validate() {
   977  	if c.RetransmitTimer < minimumRetransmitTimer {
   978  		c.RetransmitTimer = defaultRetransmitTimer
   979  	}
   980  }
   981  
   982  // DuplicateAddressDetector handles checking if an address is already assigned
   983  // to some neighboring node on the link.
   984  type DuplicateAddressDetector interface {
   985  	// CheckDuplicateAddress checks if an address is assigned to a neighbor.
   986  	//
   987  	// If DAD is already being performed for the address, the handler will be
   988  	// called with the result of the original DAD request.
   989  	CheckDuplicateAddress(tcpip.Address, DADCompletionHandler) DADCheckAddressDisposition
   990  
   991  	// SetDADConfigurations sets the configurations for DAD.
   992  	SetDADConfigurations(c DADConfigurations)
   993  
   994  	// DuplicateAddressProtocol returns the network protocol the receiver can
   995  	// perform duplicate address detection for.
   996  	DuplicateAddressProtocol() tcpip.NetworkProtocolNumber
   997  }
   998  
   999  // LinkAddressResolver handles link address resolution for a network protocol.
  1000  type LinkAddressResolver interface {
  1001  	// LinkAddressRequest sends a request for the link address of the target
  1002  	// address. The request is broadcast on the local network if a remote link
  1003  	// address is not provided.
  1004  	LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) tcpip.Error
  1005  
  1006  	// ResolveStaticAddress attempts to resolve address without sending
  1007  	// requests. It either resolves the name immediately or returns the
  1008  	// empty LinkAddress.
  1009  	//
  1010  	// It can be used to resolve broadcast addresses for example.
  1011  	ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool)
  1012  
  1013  	// LinkAddressProtocol returns the network protocol of the
  1014  	// addresses this resolver can resolve.
  1015  	LinkAddressProtocol() tcpip.NetworkProtocolNumber
  1016  }
  1017  
  1018  // RawFactory produces endpoints for writing various types of raw packets.
  1019  type RawFactory interface {
  1020  	// NewUnassociatedEndpoint produces endpoints for writing packets not
  1021  	// associated with a particular transport protocol. Such endpoints can
  1022  	// be used to write arbitrary packets that include the network header.
  1023  	NewUnassociatedEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error)
  1024  
  1025  	// NewPacketEndpoint produces endpoints for reading and writing packets
  1026  	// that include network and (when cooked is false) link layer headers.
  1027  	NewPacketEndpoint(stack *Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error)
  1028  }
  1029  
  1030  // GSOType is the type of GSO segments.
  1031  //
  1032  // +stateify savable
  1033  type GSOType int
  1034  
  1035  // Types of gso segments.
  1036  const (
  1037  	GSONone GSOType = iota
  1038  
  1039  	// Hardware GSO types:
  1040  	GSOTCPv4
  1041  	GSOTCPv6
  1042  
  1043  	// GSOSW is used for software GSO segments which have to be sent by
  1044  	// endpoint.WritePackets.
  1045  	GSOSW
  1046  )
  1047  
  1048  // GSO contains generic segmentation offload properties.
  1049  //
  1050  // +stateify savable
  1051  type GSO struct {
  1052  	// Type is one of GSONone, GSOTCPv4, etc.
  1053  	Type GSOType
  1054  	// NeedsCsum is set if the checksum offload is enabled.
  1055  	NeedsCsum bool
  1056  	// CsumOffset is offset after that to place checksum.
  1057  	CsumOffset uint16
  1058  
  1059  	// Mss is maximum segment size.
  1060  	MSS uint16
  1061  	// L3Len is L3 (IP) header length.
  1062  	L3HdrLen uint16
  1063  
  1064  	// MaxSize is maximum GSO packet size.
  1065  	MaxSize uint32
  1066  }
  1067  
  1068  // SupportedGSO returns the type of segmentation offloading supported.
  1069  type SupportedGSO int
  1070  
  1071  const (
  1072  	// GSONotSupported indicates that segmentation offloading is not supported.
  1073  	GSONotSupported SupportedGSO = iota
  1074  
  1075  	// HWGSOSupported indicates that segmentation offloading may be performed by
  1076  	// the hardware.
  1077  	HWGSOSupported
  1078  
  1079  	// SWGSOSupported indicates that segmentation offloading may be performed in
  1080  	// software.
  1081  	SWGSOSupported
  1082  )
  1083  
  1084  // GSOEndpoint provides access to GSO properties.
  1085  type GSOEndpoint interface {
  1086  	// GSOMaxSize returns the maximum GSO packet size.
  1087  	GSOMaxSize() uint32
  1088  
  1089  	// SupportedGSO returns the supported segmentation offloading.
  1090  	SupportedGSO() SupportedGSO
  1091  }
  1092  
  1093  // SoftwareGSOMaxSize is a maximum allowed size of a software GSO segment.
  1094  // This isn't a hard limit, because it is never set into packet headers.
  1095  const SoftwareGSOMaxSize = 1 << 16