github.com/vpnishe/netstack@v1.10.6/tcpip/stack/registration.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package stack
    16  
    17  import (
    18  	"github.com/vpnishe/netstack/sleep"
    19  	"github.com/vpnishe/netstack/tcpip"
    20  	"github.com/vpnishe/netstack/tcpip/buffer"
    21  	"github.com/vpnishe/netstack/waiter"
    22  )
    23  
    24  // NetworkEndpointID is the identifier of a network layer protocol endpoint.
    25  // Currently the local address is sufficient because all supported protocols
    26  // (i.e., IPv4 and IPv6) have different sizes for their addresses.
    27  type NetworkEndpointID struct {
    28  	LocalAddress tcpip.Address
    29  }
    30  
    31  // TransportEndpointID is the identifier of a transport layer protocol endpoint.
    32  //
    33  // +stateify savable
    34  type TransportEndpointID struct {
    35  	// LocalPort is the local port associated with the endpoint.
    36  	LocalPort uint16
    37  
    38  	// LocalAddress is the local [network layer] address associated with
    39  	// the endpoint.
    40  	LocalAddress tcpip.Address
    41  
    42  	// RemotePort is the remote port associated with the endpoint.
    43  	RemotePort uint16
    44  
    45  	// RemoteAddress it the remote [network layer] address associated with
    46  	// the endpoint.
    47  	RemoteAddress tcpip.Address
    48  }
    49  
    50  // ControlType is the type of network control message.
    51  type ControlType int
    52  
    53  // The following are the allowed values for ControlType values.
    54  const (
    55  	ControlPacketTooBig ControlType = iota
    56  	ControlPortUnreachable
    57  	ControlUnknown
    58  )
    59  
    60  // TransportEndpoint is the interface that needs to be implemented by transport
    61  // protocol (e.g., tcp, udp) endpoints that can handle packets.
    62  type TransportEndpoint interface {
    63  	// UniqueID returns an unique ID for this transport endpoint.
    64  	UniqueID() uint64
    65  
    66  	// HandlePacket is called by the stack when new packets arrive to
    67  	// this transport endpoint. It sets pkt.TransportHeader.
    68  	//
    69  	// HandlePacket takes ownership of pkt.
    70  	HandlePacket(r *Route, id TransportEndpointID, pkt tcpip.PacketBuffer)
    71  
    72  	// HandleControlPacket is called by the stack when new control (e.g.
    73  	// ICMP) packets arrive to this transport endpoint.
    74  	// HandleControlPacket takes ownership of pkt.
    75  	HandleControlPacket(id TransportEndpointID, typ ControlType, extra uint32, pkt tcpip.PacketBuffer)
    76  
    77  	// Close puts the endpoint in a closed state and frees all resources
    78  	// associated with it. This cleanup may happen asynchronously. Wait can
    79  	// be used to block on this asynchronous cleanup.
    80  	Close()
    81  
    82  	// Wait waits for any worker goroutines owned by the endpoint to stop.
    83  	//
    84  	// An endpoint can be requested to stop its worker goroutines by calling
    85  	// its Close method.
    86  	//
    87  	// Wait will not block if the endpoint hasn't started any goroutines
    88  	// yet, even if it might later.
    89  	Wait()
    90  }
    91  
    92  // RawTransportEndpoint is the interface that needs to be implemented by raw
    93  // transport protocol endpoints. RawTransportEndpoints receive the entire
    94  // packet - including the network and transport headers - as delivered to
    95  // netstack.
    96  type RawTransportEndpoint interface {
    97  	// HandlePacket is called by the stack when new packets arrive to
    98  	// this transport endpoint. The packet contains all data from the link
    99  	// layer up.
   100  	//
   101  	// HandlePacket takes ownership of pkt.
   102  	HandlePacket(r *Route, pkt tcpip.PacketBuffer)
   103  }
   104  
   105  // PacketEndpoint is the interface that needs to be implemented by packet
   106  // transport protocol endpoints. These endpoints receive link layer headers in
   107  // addition to whatever they contain (usually network and transport layer
   108  // headers and a payload).
   109  type PacketEndpoint interface {
   110  	// HandlePacket is called by the stack when new packets arrive that
   111  	// match the endpoint.
   112  	//
   113  	// Implementers should treat packet as immutable and should copy it
   114  	// before before modification.
   115  	//
   116  	// linkHeader may have a length of 0, in which case the PacketEndpoint
   117  	// should construct its own ethernet header for applications.
   118  	//
   119  	// HandlePacket takes ownership of pkt.
   120  	HandlePacket(nicID tcpip.NICID, addr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer)
   121  }
   122  
   123  // TransportProtocol is the interface that needs to be implemented by transport
   124  // protocols (e.g., tcp, udp) that want to be part of the networking stack.
   125  type TransportProtocol interface {
   126  	// Number returns the transport protocol number.
   127  	Number() tcpip.TransportProtocolNumber
   128  
   129  	// NewEndpoint creates a new endpoint of the transport protocol.
   130  	NewEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
   131  
   132  	// NewRawEndpoint creates a new raw endpoint of the transport protocol.
   133  	NewRawEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
   134  
   135  	// MinimumPacketSize returns the minimum valid packet size of this
   136  	// transport protocol. The stack automatically drops any packets smaller
   137  	// than this targeted at this protocol.
   138  	MinimumPacketSize() int
   139  
   140  	// ParsePorts returns the source and destination ports stored in a
   141  	// packet of this protocol.
   142  	ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error)
   143  
   144  	// HandleUnknownDestinationPacket handles packets targeted at this
   145  	// protocol but that don't match any existing endpoint. For example,
   146  	// it is targeted at a port that have no listeners.
   147  	//
   148  	// The return value indicates whether the packet was well-formed (for
   149  	// stats purposes only).
   150  	//
   151  	// HandleUnknownDestinationPacket takes ownership of pkt.
   152  	HandleUnknownDestinationPacket(r *Route, id TransportEndpointID, pkt tcpip.PacketBuffer) bool
   153  
   154  	// SetOption allows enabling/disabling protocol specific features.
   155  	// SetOption returns an error if the option is not supported or the
   156  	// provided option value is invalid.
   157  	SetOption(option interface{}) *tcpip.Error
   158  
   159  	// Option allows retrieving protocol specific option values.
   160  	// Option returns an error if the option is not supported or the
   161  	// provided option value is invalid.
   162  	Option(option interface{}) *tcpip.Error
   163  }
   164  
   165  // TransportDispatcher contains the methods used by the network stack to deliver
   166  // packets to the appropriate transport endpoint after it has been handled by
   167  // the network layer.
   168  type TransportDispatcher interface {
   169  	// DeliverTransportPacket delivers packets to the appropriate
   170  	// transport protocol endpoint.
   171  	//
   172  	// pkt.NetworkHeader must be set before calling DeliverTransportPacket.
   173  	//
   174  	// DeliverTransportPacket takes ownership of pkt.
   175  	DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt tcpip.PacketBuffer)
   176  
   177  	// DeliverTransportControlPacket delivers control packets to the
   178  	// appropriate transport protocol endpoint.
   179  	//
   180  	// pkt.NetworkHeader must be set before calling
   181  	// DeliverTransportControlPacket.
   182  	//
   183  	// DeliverTransportControlPacket takes ownership of pkt.
   184  	DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt tcpip.PacketBuffer)
   185  }
   186  
   187  // PacketLooping specifies where an outbound packet should be sent.
   188  type PacketLooping byte
   189  
   190  const (
   191  	// PacketOut indicates that the packet should be passed to the link
   192  	// endpoint.
   193  	PacketOut PacketLooping = 1 << iota
   194  
   195  	// PacketLoop indicates that the packet should be handled locally.
   196  	PacketLoop
   197  )
   198  
   199  // NetworkHeaderParams are the header parameters given as input by the
   200  // transport endpoint to the network.
   201  type NetworkHeaderParams struct {
   202  	// Protocol refers to the transport protocol number.
   203  	Protocol tcpip.TransportProtocolNumber
   204  
   205  	// TTL refers to Time To Live field of the IP-header.
   206  	TTL uint8
   207  
   208  	// TOS refers to TypeOfService or TrafficClass field of the IP-header.
   209  	TOS uint8
   210  }
   211  
   212  // NetworkEndpoint is the interface that needs to be implemented by endpoints
   213  // of network layer protocols (e.g., ipv4, ipv6).
   214  type NetworkEndpoint interface {
   215  	// DefaultTTL is the default time-to-live value (or hop limit, in ipv6)
   216  	// for this endpoint.
   217  	DefaultTTL() uint8
   218  
   219  	// MTU is the maximum transmission unit for this endpoint. This is
   220  	// generally calculated as the MTU of the underlying data link endpoint
   221  	// minus the network endpoint max header length.
   222  	MTU() uint32
   223  
   224  	// Capabilities returns the set of capabilities supported by the
   225  	// underlying link-layer endpoint.
   226  	Capabilities() LinkEndpointCapabilities
   227  
   228  	// MaxHeaderLength returns the maximum size the network (and lower
   229  	// level layers combined) headers can have. Higher levels use this
   230  	// information to reserve space in the front of the packets they're
   231  	// building.
   232  	MaxHeaderLength() uint16
   233  
   234  	// WritePacket writes a packet to the given destination address and
   235  	// protocol. It sets pkt.NetworkHeader. pkt.TransportHeader must have
   236  	// already been set.
   237  	WritePacket(r *Route, gso *GSO, params NetworkHeaderParams, loop PacketLooping, pkt tcpip.PacketBuffer) *tcpip.Error
   238  
   239  	// WritePackets writes packets to the given destination address and
   240  	// protocol.
   241  	WritePackets(r *Route, gso *GSO, hdrs []PacketDescriptor, payload buffer.VectorisedView, params NetworkHeaderParams, loop PacketLooping) (int, *tcpip.Error)
   242  
   243  	// WriteHeaderIncludedPacket writes a packet that includes a network
   244  	// header to the given destination address.
   245  	WriteHeaderIncludedPacket(r *Route, loop PacketLooping, pkt tcpip.PacketBuffer) *tcpip.Error
   246  
   247  	// ID returns the network protocol endpoint ID.
   248  	ID() *NetworkEndpointID
   249  
   250  	// PrefixLen returns the network endpoint's subnet prefix length in bits.
   251  	PrefixLen() int
   252  
   253  	// NICID returns the id of the NIC this endpoint belongs to.
   254  	NICID() tcpip.NICID
   255  
   256  	// HandlePacket is called by the link layer when new packets arrive to
   257  	// this network endpoint. It sets pkt.NetworkHeader.
   258  	//
   259  	// HandlePacket takes ownership of pkt.
   260  	HandlePacket(r *Route, pkt tcpip.PacketBuffer)
   261  
   262  	// Close is called when the endpoint is reomved from a stack.
   263  	Close()
   264  }
   265  
   266  // NetworkProtocol is the interface that needs to be implemented by network
   267  // protocols (e.g., ipv4, ipv6) that want to be part of the networking stack.
   268  type NetworkProtocol interface {
   269  	// Number returns the network protocol number.
   270  	Number() tcpip.NetworkProtocolNumber
   271  
   272  	// MinimumPacketSize returns the minimum valid packet size of this
   273  	// network protocol. The stack automatically drops any packets smaller
   274  	// than this targeted at this protocol.
   275  	MinimumPacketSize() int
   276  
   277  	// DefaultPrefixLen returns the protocol's default prefix length.
   278  	DefaultPrefixLen() int
   279  
   280  	// ParsePorts returns the source and destination addresses stored in a
   281  	// packet of this protocol.
   282  	ParseAddresses(v buffer.View) (src, dst tcpip.Address)
   283  
   284  	// NewEndpoint creates a new endpoint of this protocol.
   285  	NewEndpoint(nicID tcpip.NICID, addrWithPrefix tcpip.AddressWithPrefix, linkAddrCache LinkAddressCache, dispatcher TransportDispatcher, sender LinkEndpoint) (NetworkEndpoint, *tcpip.Error)
   286  
   287  	// SetOption allows enabling/disabling protocol specific features.
   288  	// SetOption returns an error if the option is not supported or the
   289  	// provided option value is invalid.
   290  	SetOption(option interface{}) *tcpip.Error
   291  
   292  	// Option allows retrieving protocol specific option values.
   293  	// Option returns an error if the option is not supported or the
   294  	// provided option value is invalid.
   295  	Option(option interface{}) *tcpip.Error
   296  }
   297  
   298  // NetworkDispatcher contains the methods used by the network stack to deliver
   299  // packets to the appropriate network endpoint after it has been handled by
   300  // the data link layer.
   301  type NetworkDispatcher interface {
   302  	// DeliverNetworkPacket finds the appropriate network protocol endpoint
   303  	// and hands the packet over for further processing.
   304  	//
   305  	// pkt.LinkHeader may or may not be set before calling
   306  	// DeliverNetworkPacket. Some packets do not have link headers (e.g.
   307  	// packets sent via loopback), and won't have the field set.
   308  	//
   309  	// DeliverNetworkPacket takes ownership of pkt.
   310  	DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer)
   311  }
   312  
   313  // LinkEndpointCapabilities is the type associated with the capabilities
   314  // supported by a link-layer endpoint. It is a set of bitfields.
   315  type LinkEndpointCapabilities uint
   316  
   317  // The following are the supported link endpoint capabilities.
   318  const (
   319  	CapabilityNone LinkEndpointCapabilities = 0
   320  	// CapabilityTXChecksumOffload indicates that the link endpoint supports
   321  	// checksum computation for outgoing packets and the stack can skip
   322  	// computing checksums when sending packets.
   323  	CapabilityTXChecksumOffload LinkEndpointCapabilities = 1 << iota
   324  	// CapabilityRXChecksumOffload indicates that the link endpoint supports
   325  	// checksum verification on received packets and that it's safe for the
   326  	// stack to skip checksum verification.
   327  	CapabilityRXChecksumOffload
   328  	CapabilityResolutionRequired
   329  	CapabilitySaveRestore
   330  	CapabilityDisconnectOk
   331  	CapabilityLoopback
   332  	CapabilityHardwareGSO
   333  
   334  	// CapabilitySoftwareGSO indicates the link endpoint supports of sending
   335  	// multiple packets using a single call (LinkEndpoint.WritePackets).
   336  	CapabilitySoftwareGSO
   337  )
   338  
   339  // LinkEndpoint is the interface implemented by data link layer protocols (e.g.,
   340  // ethernet, loopback, raw) and used by network layer protocols to send packets
   341  // out through the implementer's data link endpoint. When a link header exists,
   342  // it sets each tcpip.PacketBuffer's LinkHeader field before passing it up the
   343  // stack.
   344  type LinkEndpoint interface {
   345  	// MTU is the maximum transmission unit for this endpoint. This is
   346  	// usually dictated by the backing physical network; when such a
   347  	// physical network doesn't exist, the limit is generally 64k, which
   348  	// includes the maximum size of an IP packet.
   349  	MTU() uint32
   350  
   351  	// Capabilities returns the set of capabilities supported by the
   352  	// endpoint.
   353  	Capabilities() LinkEndpointCapabilities
   354  
   355  	// MaxHeaderLength returns the maximum size the data link (and
   356  	// lower level layers combined) headers can have. Higher levels use this
   357  	// information to reserve space in the front of the packets they're
   358  	// building.
   359  	MaxHeaderLength() uint16
   360  
   361  	// LinkAddress returns the link address (typically a MAC) of the
   362  	// link endpoint.
   363  	LinkAddress() tcpip.LinkAddress
   364  
   365  	// WritePacket writes a packet with the given protocol through the
   366  	// given route. It sets pkt.LinkHeader if a link layer header exists.
   367  	// pkt.NetworkHeader and pkt.TransportHeader must have already been
   368  	// set.
   369  	//
   370  	// To participate in transparent bridging, a LinkEndpoint implementation
   371  	// should call eth.Encode with header.EthernetFields.SrcAddr set to
   372  	// r.LocalLinkAddress if it is provided.
   373  	WritePacket(r *Route, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) *tcpip.Error
   374  
   375  	// WritePackets writes packets with the given protocol through the
   376  	// given route.
   377  	//
   378  	// Right now, WritePackets is used only when the software segmentation
   379  	// offload is enabled. If it will be used for something else, it may
   380  	// require to change syscall filters.
   381  	WritePackets(r *Route, gso *GSO, hdrs []PacketDescriptor, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error)
   382  
   383  	// WriteRawPacket writes a packet directly to the link. The packet
   384  	// should already have an ethernet header.
   385  	WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error
   386  
   387  	// Attach attaches the data link layer endpoint to the network-layer
   388  	// dispatcher of the stack.
   389  	Attach(dispatcher NetworkDispatcher)
   390  
   391  	// IsAttached returns whether a NetworkDispatcher is attached to the
   392  	// endpoint.
   393  	IsAttached() bool
   394  
   395  	// Wait waits for any worker goroutines owned by the endpoint to stop.
   396  	//
   397  	// For now, requesting that an endpoint's worker goroutine(s) stop is
   398  	// implementation specific.
   399  	//
   400  	// Wait will not block if the endpoint hasn't started any goroutines
   401  	// yet, even if it might later.
   402  	Wait()
   403  }
   404  
   405  // InjectableLinkEndpoint is a LinkEndpoint where inbound packets are
   406  // delivered via the Inject method.
   407  type InjectableLinkEndpoint interface {
   408  	LinkEndpoint
   409  
   410  	// InjectInbound injects an inbound packet.
   411  	InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer)
   412  
   413  	// InjectOutbound writes a fully formed outbound packet directly to the
   414  	// link.
   415  	//
   416  	// dest is used by endpoints with multiple raw destinations.
   417  	InjectOutbound(dest tcpip.Address, packet []byte) *tcpip.Error
   418  }
   419  
   420  // A LinkAddressResolver is an extension to a NetworkProtocol that
   421  // can resolve link addresses.
   422  type LinkAddressResolver interface {
   423  	// LinkAddressRequest sends a request for the LinkAddress of addr.
   424  	// The request is sent on linkEP with localAddr as the source.
   425  	//
   426  	// A valid response will cause the discovery protocol's network
   427  	// endpoint to call AddLinkAddress.
   428  	LinkAddressRequest(addr, localAddr tcpip.Address, linkEP LinkEndpoint) *tcpip.Error
   429  
   430  	// ResolveStaticAddress attempts to resolve address without sending
   431  	// requests. It either resolves the name immediately or returns the
   432  	// empty LinkAddress.
   433  	//
   434  	// It can be used to resolve broadcast addresses for example.
   435  	ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool)
   436  
   437  	// LinkAddressProtocol returns the network protocol of the
   438  	// addresses this this resolver can resolve.
   439  	LinkAddressProtocol() tcpip.NetworkProtocolNumber
   440  }
   441  
   442  // A LinkAddressCache caches link addresses.
   443  type LinkAddressCache interface {
   444  	// CheckLocalAddress determines if the given local address exists, and if it
   445  	// does not exist.
   446  	CheckLocalAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.NICID
   447  
   448  	// AddLinkAddress adds a link address to the cache.
   449  	AddLinkAddress(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress)
   450  
   451  	// GetLinkAddress looks up the cache to translate address to link address (e.g. IP -> MAC).
   452  	// If the LinkEndpoint requests address resolution and there is a LinkAddressResolver
   453  	// registered with the network protocol, the cache attempts to resolve the address
   454  	// and returns ErrWouldBlock. Waker is notified when address resolution is
   455  	// complete (success or not).
   456  	//
   457  	// If address resolution is required, ErrNoLinkAddress and a notification channel is
   458  	// returned for the top level caller to block. Channel is closed once address resolution
   459  	// is complete (success or not).
   460  	GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, w *sleep.Waker) (tcpip.LinkAddress, <-chan struct{}, *tcpip.Error)
   461  
   462  	// RemoveWaker removes a waker that has been added in GetLinkAddress().
   463  	RemoveWaker(nicID tcpip.NICID, addr tcpip.Address, waker *sleep.Waker)
   464  }
   465  
   466  // RawFactory produces endpoints for writing various types of raw packets.
   467  type RawFactory interface {
   468  	// NewUnassociatedEndpoint produces endpoints for writing packets not
   469  	// associated with a particular transport protocol. Such endpoints can
   470  	// be used to write arbitrary packets that include the network header.
   471  	NewUnassociatedEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
   472  
   473  	// NewPacketEndpoint produces endpoints for reading and writing packets
   474  	// that include network and (when cooked is false) link layer headers.
   475  	NewPacketEndpoint(stack *Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
   476  }
   477  
   478  // GSOType is the type of GSO segments.
   479  //
   480  // +stateify savable
   481  type GSOType int
   482  
   483  // Types of gso segments.
   484  const (
   485  	GSONone GSOType = iota
   486  
   487  	// Hardware GSO types:
   488  	GSOTCPv4
   489  	GSOTCPv6
   490  
   491  	// GSOSW is used for software GSO segments which have to be sent by
   492  	// endpoint.WritePackets.
   493  	GSOSW
   494  )
   495  
   496  // GSO contains generic segmentation offload properties.
   497  //
   498  // +stateify savable
   499  type GSO struct {
   500  	// Type is one of GSONone, GSOTCPv4, etc.
   501  	Type GSOType
   502  	// NeedsCsum is set if the checksum offload is enabled.
   503  	NeedsCsum bool
   504  	// CsumOffset is offset after that to place checksum.
   505  	CsumOffset uint16
   506  
   507  	// Mss is maximum segment size.
   508  	MSS uint16
   509  	// L3Len is L3 (IP) header length.
   510  	L3HdrLen uint16
   511  
   512  	// MaxSize is maximum GSO packet size.
   513  	MaxSize uint32
   514  }
   515  
   516  // GSOEndpoint provides access to GSO properties.
   517  type GSOEndpoint interface {
   518  	// GSOMaxSize returns the maximum GSO packet size.
   519  	GSOMaxSize() uint32
   520  }
   521  
   522  // SoftwareGSOMaxSize is a maximum allowed size of a software GSO segment.
   523  // This isn't a hard limit, because it is never set into packet headers.
   524  const SoftwareGSOMaxSize = (1 << 16)