github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/tcpip/stack/nic.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package stack
    16  
    17  import (
    18  	"fmt"
    19  	"reflect"
    20  
    21  	"github.com/metacubex/gvisor/pkg/atomicbitops"
    22  	"github.com/metacubex/gvisor/pkg/tcpip"
    23  	"github.com/metacubex/gvisor/pkg/tcpip/header"
    24  )
    25  
    26  type linkResolver struct {
    27  	resolver LinkAddressResolver
    28  
    29  	neigh neighborCache
    30  }
    31  
    32  var _ NetworkInterface = (*nic)(nil)
    33  var _ NetworkDispatcher = (*nic)(nil)
    34  
    35  // nic represents a "network interface card" to which the networking stack is
    36  // attached.
    37  type nic struct {
    38  	NetworkLinkEndpoint
    39  
    40  	stack   *Stack
    41  	id      tcpip.NICID
    42  	name    string
    43  	context NICContext
    44  
    45  	stats sharedStats
    46  
    47  	// enableDisableMu is used to synchronize attempts to enable/disable the NIC.
    48  	// Without this mutex, calls to enable/disable the NIC may interleave and
    49  	// leave the NIC in an inconsistent state.
    50  	enableDisableMu nicRWMutex
    51  
    52  	// The network endpoints themselves may be modified by calling the interface's
    53  	// methods, but the map reference and entries must be constant.
    54  	networkEndpoints          map[tcpip.NetworkProtocolNumber]NetworkEndpoint
    55  	linkAddrResolvers         map[tcpip.NetworkProtocolNumber]*linkResolver
    56  	duplicateAddressDetectors map[tcpip.NetworkProtocolNumber]DuplicateAddressDetector
    57  
    58  	// enabled indicates whether the NIC is enabled.
    59  	enabled atomicbitops.Bool
    60  
    61  	// spoofing indicates whether the NIC is spoofing.
    62  	spoofing atomicbitops.Bool
    63  
    64  	// promiscuous indicates whether the NIC is promiscuous.
    65  	promiscuous atomicbitops.Bool
    66  
    67  	// linkResQueue holds packets that are waiting for link resolution to
    68  	// complete.
    69  	linkResQueue packetsPendingLinkResolution
    70  
    71  	// packetEPsMu protects annotated fields below.
    72  	packetEPsMu packetEPsRWMutex
    73  
    74  	// eps is protected by the mutex, but the values contained in it are not.
    75  	//
    76  	// +checklocks:packetEPsMu
    77  	packetEPs map[tcpip.NetworkProtocolNumber]*packetEndpointList
    78  
    79  	qDisc QueueingDiscipline
    80  
    81  	gro groDispatcher
    82  
    83  	// deliverLinkPackets specifies whether this NIC delivers packets to
    84  	// packet sockets. It is immutable.
    85  	//
    86  	// deliverLinkPackets is off by default because some users already
    87  	// deliver link packets by explicitly calling nic.DeliverLinkPackets.
    88  	deliverLinkPackets bool
    89  }
    90  
    91  // makeNICStats initializes the NIC statistics and associates them to the global
    92  // NIC statistics.
    93  func makeNICStats(global tcpip.NICStats) sharedStats {
    94  	var stats sharedStats
    95  	tcpip.InitStatCounters(reflect.ValueOf(&stats.local).Elem())
    96  	stats.init(&stats.local, &global)
    97  	return stats
    98  }
    99  
   100  type packetEndpointList struct {
   101  	mu packetEndpointListRWMutex
   102  
   103  	// eps is protected by mu, but the contained PacketEndpoint values are not.
   104  	//
   105  	// +checklocks:mu
   106  	eps []PacketEndpoint
   107  }
   108  
   109  func (p *packetEndpointList) add(ep PacketEndpoint) {
   110  	p.mu.Lock()
   111  	defer p.mu.Unlock()
   112  	p.eps = append(p.eps, ep)
   113  }
   114  
   115  func (p *packetEndpointList) remove(ep PacketEndpoint) {
   116  	p.mu.Lock()
   117  	defer p.mu.Unlock()
   118  	for i, epOther := range p.eps {
   119  		if epOther == ep {
   120  			p.eps = append(p.eps[:i], p.eps[i+1:]...)
   121  			break
   122  		}
   123  	}
   124  }
   125  
   126  func (p *packetEndpointList) len() int {
   127  	p.mu.RLock()
   128  	defer p.mu.RUnlock()
   129  	return len(p.eps)
   130  }
   131  
   132  // forEach calls fn with each endpoints in p while holding the read lock on p.
   133  func (p *packetEndpointList) forEach(fn func(PacketEndpoint)) {
   134  	p.mu.RLock()
   135  	defer p.mu.RUnlock()
   136  	for _, ep := range p.eps {
   137  		fn(ep)
   138  	}
   139  }
   140  
   141  var _ QueueingDiscipline = (*delegatingQueueingDiscipline)(nil)
   142  
   143  type delegatingQueueingDiscipline struct {
   144  	LinkWriter
   145  }
   146  
   147  func (*delegatingQueueingDiscipline) Close() {}
   148  
   149  // WritePacket passes the packet through to the underlying LinkWriter's WritePackets.
   150  func (qDisc *delegatingQueueingDiscipline) WritePacket(pkt *PacketBuffer) tcpip.Error {
   151  	var pkts PacketBufferList
   152  	pkts.PushBack(pkt)
   153  	_, err := qDisc.LinkWriter.WritePackets(pkts)
   154  	return err
   155  }
   156  
   157  // newNIC returns a new NIC using the default NDP configurations from stack.
   158  func newNIC(stack *Stack, id tcpip.NICID, ep LinkEndpoint, opts NICOptions) *nic {
   159  	// TODO(b/141011931): Validate a LinkEndpoint (ep) is valid. For
   160  	// example, make sure that the link address it provides is a valid
   161  	// unicast ethernet address.
   162  
   163  	// If no queueing discipline was specified provide a stub implementation that
   164  	// just delegates to the lower link endpoint.
   165  	qDisc := opts.QDisc
   166  	if qDisc == nil {
   167  		qDisc = &delegatingQueueingDiscipline{LinkWriter: ep}
   168  	}
   169  
   170  	// TODO(b/143357959): RFC 8200 section 5 requires that IPv6 endpoints
   171  	// observe an MTU of at least 1280 bytes. Ensure that this requirement
   172  	// of IPv6 is supported on this endpoint's LinkEndpoint.
   173  	nic := &nic{
   174  		NetworkLinkEndpoint:       ep,
   175  		stack:                     stack,
   176  		id:                        id,
   177  		name:                      opts.Name,
   178  		context:                   opts.Context,
   179  		stats:                     makeNICStats(stack.Stats().NICs),
   180  		networkEndpoints:          make(map[tcpip.NetworkProtocolNumber]NetworkEndpoint),
   181  		linkAddrResolvers:         make(map[tcpip.NetworkProtocolNumber]*linkResolver),
   182  		duplicateAddressDetectors: make(map[tcpip.NetworkProtocolNumber]DuplicateAddressDetector),
   183  		qDisc:                     qDisc,
   184  		deliverLinkPackets:        opts.DeliverLinkPackets,
   185  	}
   186  	nic.linkResQueue.init(nic)
   187  
   188  	nic.packetEPsMu.Lock()
   189  	defer nic.packetEPsMu.Unlock()
   190  
   191  	nic.packetEPs = make(map[tcpip.NetworkProtocolNumber]*packetEndpointList)
   192  
   193  	resolutionRequired := ep.Capabilities()&CapabilityResolutionRequired != 0
   194  
   195  	for _, netProto := range stack.networkProtocols {
   196  		netNum := netProto.Number()
   197  		netEP := netProto.NewEndpoint(nic, nic)
   198  		nic.networkEndpoints[netNum] = netEP
   199  
   200  		if resolutionRequired {
   201  			if r, ok := netEP.(LinkAddressResolver); ok {
   202  				l := &linkResolver{resolver: r}
   203  				l.neigh.init(nic, r)
   204  				nic.linkAddrResolvers[r.LinkAddressProtocol()] = l
   205  			}
   206  		}
   207  
   208  		if d, ok := netEP.(DuplicateAddressDetector); ok {
   209  			nic.duplicateAddressDetectors[d.DuplicateAddressProtocol()] = d
   210  		}
   211  	}
   212  
   213  	nic.gro.init(opts.GROTimeout)
   214  	nic.NetworkLinkEndpoint.Attach(nic)
   215  
   216  	return nic
   217  }
   218  
   219  func (n *nic) getNetworkEndpoint(proto tcpip.NetworkProtocolNumber) NetworkEndpoint {
   220  	return n.networkEndpoints[proto]
   221  }
   222  
   223  // Enabled implements NetworkInterface.
   224  func (n *nic) Enabled() bool {
   225  	return n.enabled.Load()
   226  }
   227  
   228  // setEnabled sets the enabled status for the NIC.
   229  //
   230  // Returns true if the enabled status was updated.
   231  //
   232  // +checklocks:n.enableDisableMu
   233  func (n *nic) setEnabled(v bool) bool {
   234  	return n.enabled.Swap(v) != v
   235  }
   236  
   237  // disable disables n.
   238  //
   239  // It undoes the work done by enable.
   240  func (n *nic) disable() {
   241  	n.enableDisableMu.Lock()
   242  	defer n.enableDisableMu.Unlock()
   243  	n.disableLocked()
   244  }
   245  
   246  // disableLocked disables n.
   247  //
   248  // It undoes the work done by enable.
   249  //
   250  // +checklocks:n.enableDisableMu
   251  func (n *nic) disableLocked() {
   252  	if !n.Enabled() {
   253  		return
   254  	}
   255  
   256  	// TODO(gvisor.dev/issue/1491): Should Routes that are currently bound to n be
   257  	// invalidated? Currently, Routes will continue to work when a NIC is enabled
   258  	// again, and applications may not know that the underlying NIC was ever
   259  	// disabled.
   260  
   261  	for _, ep := range n.networkEndpoints {
   262  		ep.Disable()
   263  
   264  		// Clear the neighbour table (including static entries) as we cannot
   265  		// guarantee that the current neighbour table will be valid when the NIC is
   266  		// enabled again.
   267  		//
   268  		// This matches linux's behaviour at the time of writing:
   269  		// https://github.com/torvalds/linux/blob/71c061d2443814de15e177489d5cc00a4a253ef3/net/core/neighbour.c#L371
   270  		netProto := ep.NetworkProtocolNumber()
   271  		switch err := n.clearNeighbors(netProto); err.(type) {
   272  		case nil, *tcpip.ErrNotSupported:
   273  		default:
   274  			panic(fmt.Sprintf("n.clearNeighbors(%d): %s", netProto, err))
   275  		}
   276  	}
   277  
   278  	if !n.setEnabled(false) {
   279  		panic("should have only done work to disable the NIC if it was enabled")
   280  	}
   281  }
   282  
   283  // enable enables n.
   284  //
   285  // If the stack has IPv6 enabled, enable will join the IPv6 All-Nodes Multicast
   286  // address (ff02::1), start DAD for permanent addresses, and start soliciting
   287  // routers if the stack is not operating as a router. If the stack is also
   288  // configured to auto-generate a link-local address, one will be generated.
   289  func (n *nic) enable() tcpip.Error {
   290  	n.enableDisableMu.Lock()
   291  	defer n.enableDisableMu.Unlock()
   292  
   293  	if !n.setEnabled(true) {
   294  		return nil
   295  	}
   296  
   297  	for _, ep := range n.networkEndpoints {
   298  		if err := ep.Enable(); err != nil {
   299  			return err
   300  		}
   301  	}
   302  
   303  	return nil
   304  }
   305  
   306  // remove detaches NIC from the link endpoint and releases network endpoint
   307  // resources. This guarantees no packets between this NIC and the network
   308  // stack.
   309  func (n *nic) remove() tcpip.Error {
   310  	n.enableDisableMu.Lock()
   311  
   312  	n.disableLocked()
   313  
   314  	for _, ep := range n.networkEndpoints {
   315  		ep.Close()
   316  	}
   317  
   318  	n.enableDisableMu.Unlock()
   319  
   320  	// Shutdown GRO.
   321  	n.gro.close()
   322  
   323  	// Drain and drop any packets pending link resolution.
   324  	// We must not hold n.enableDisableMu here.
   325  	n.linkResQueue.cancel()
   326  
   327  	// Prevent packets from going down to the link before shutting the link down.
   328  	n.qDisc.Close()
   329  	n.NetworkLinkEndpoint.Attach(nil)
   330  
   331  	return nil
   332  }
   333  
   334  // setPromiscuousMode enables or disables promiscuous mode.
   335  func (n *nic) setPromiscuousMode(enable bool) {
   336  	n.promiscuous.Store(enable)
   337  }
   338  
   339  // Promiscuous implements NetworkInterface.
   340  func (n *nic) Promiscuous() bool {
   341  	return n.promiscuous.Load()
   342  }
   343  
   344  // IsLoopback implements NetworkInterface.
   345  func (n *nic) IsLoopback() bool {
   346  	return n.NetworkLinkEndpoint.Capabilities()&CapabilityLoopback != 0
   347  }
   348  
   349  // WritePacket implements NetworkEndpoint.
   350  func (n *nic) WritePacket(r *Route, pkt *PacketBuffer) tcpip.Error {
   351  	routeInfo, _, err := r.resolvedFields(nil)
   352  	switch err.(type) {
   353  	case nil:
   354  		pkt.EgressRoute = routeInfo
   355  		return n.writePacket(pkt)
   356  	case *tcpip.ErrWouldBlock:
   357  		// As per relevant RFCs, we should queue packets while we wait for link
   358  		// resolution to complete.
   359  		//
   360  		// RFC 1122 section 2.3.2.2 (for IPv4):
   361  		//   The link layer SHOULD save (rather than discard) at least
   362  		//   one (the latest) packet of each set of packets destined to
   363  		//   the same unresolved IP address, and transmit the saved
   364  		//   packet when the address has been resolved.
   365  		//
   366  		// RFC 4861 section 7.2.2 (for IPv6):
   367  		//   While waiting for address resolution to complete, the sender MUST, for
   368  		//   each neighbor, retain a small queue of packets waiting for address
   369  		//   resolution to complete. The queue MUST hold at least one packet, and
   370  		//   MAY contain more. However, the number of queued packets per neighbor
   371  		//   SHOULD be limited to some small value. When a queue overflows, the new
   372  		//   arrival SHOULD replace the oldest entry. Once address resolution
   373  		//   completes, the node transmits any queued packets.
   374  		return n.linkResQueue.enqueue(r, pkt)
   375  	default:
   376  		return err
   377  	}
   378  }
   379  
   380  // WritePacketToRemote implements NetworkInterface.
   381  func (n *nic) WritePacketToRemote(remoteLinkAddr tcpip.LinkAddress, pkt *PacketBuffer) tcpip.Error {
   382  	pkt.EgressRoute = RouteInfo{
   383  		routeInfo: routeInfo{
   384  			NetProto:         pkt.NetworkProtocolNumber,
   385  			LocalLinkAddress: n.LinkAddress(),
   386  		},
   387  		RemoteLinkAddress: remoteLinkAddr,
   388  	}
   389  	return n.writePacket(pkt)
   390  }
   391  
   392  func (n *nic) writePacket(pkt *PacketBuffer) tcpip.Error {
   393  	n.NetworkLinkEndpoint.AddHeader(pkt)
   394  	return n.writeRawPacket(pkt)
   395  }
   396  
   397  func (n *nic) writeRawPacketWithLinkHeaderInPayload(pkt *PacketBuffer) tcpip.Error {
   398  	if !n.NetworkLinkEndpoint.ParseHeader(pkt) {
   399  		return &tcpip.ErrMalformedHeader{}
   400  	}
   401  	return n.writeRawPacket(pkt)
   402  }
   403  
   404  func (n *nic) writeRawPacket(pkt *PacketBuffer) tcpip.Error {
   405  	// Always an outgoing packet.
   406  	pkt.PktType = tcpip.PacketOutgoing
   407  
   408  	if n.deliverLinkPackets {
   409  		n.DeliverLinkPacket(pkt.NetworkProtocolNumber, pkt)
   410  	}
   411  
   412  	if err := n.qDisc.WritePacket(pkt); err != nil {
   413  		if _, ok := err.(*tcpip.ErrNoBufferSpace); ok {
   414  			n.stats.txPacketsDroppedNoBufferSpace.Increment()
   415  		}
   416  		return err
   417  	}
   418  
   419  	n.stats.tx.packets.Increment()
   420  	n.stats.tx.bytes.IncrementBy(uint64(pkt.Size()))
   421  	return nil
   422  }
   423  
   424  // setSpoofing enables or disables address spoofing.
   425  func (n *nic) setSpoofing(enable bool) {
   426  	n.spoofing.Store(enable)
   427  }
   428  
   429  // Spoofing implements NetworkInterface.
   430  func (n *nic) Spoofing() bool {
   431  	return n.spoofing.Load()
   432  }
   433  
   434  // primaryAddress returns an address that can be used to communicate with
   435  // remoteAddr.
   436  func (n *nic) primaryEndpoint(protocol tcpip.NetworkProtocolNumber, remoteAddr, srcHint tcpip.Address) AssignableAddressEndpoint {
   437  	ep := n.getNetworkEndpoint(protocol)
   438  	if ep == nil {
   439  		return nil
   440  	}
   441  
   442  	addressableEndpoint, ok := ep.(AddressableEndpoint)
   443  	if !ok {
   444  		return nil
   445  	}
   446  
   447  	return addressableEndpoint.AcquireOutgoingPrimaryAddress(remoteAddr, srcHint, n.Spoofing())
   448  }
   449  
   450  type getAddressBehaviour int
   451  
   452  const (
   453  	// spoofing indicates that the NIC's spoofing flag should be observed when
   454  	// getting a NIC's address endpoint.
   455  	spoofing getAddressBehaviour = iota
   456  
   457  	// promiscuous indicates that the NIC's promiscuous flag should be observed
   458  	// when getting a NIC's address endpoint.
   459  	promiscuous
   460  )
   461  
   462  func (n *nic) getAddress(protocol tcpip.NetworkProtocolNumber, dst tcpip.Address) AssignableAddressEndpoint {
   463  	return n.getAddressOrCreateTemp(protocol, dst, CanBePrimaryEndpoint, promiscuous)
   464  }
   465  
   466  func (n *nic) hasAddress(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
   467  	ep := n.getAddressOrCreateTempInner(protocol, addr, false, NeverPrimaryEndpoint)
   468  	if ep != nil {
   469  		ep.DecRef()
   470  		return true
   471  	}
   472  
   473  	return false
   474  }
   475  
   476  // findEndpoint finds the endpoint, if any, with the given address.
   477  func (n *nic) findEndpoint(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior) AssignableAddressEndpoint {
   478  	return n.getAddressOrCreateTemp(protocol, address, peb, spoofing)
   479  }
   480  
   481  // getAddressEpOrCreateTemp returns the address endpoint for the given protocol
   482  // and address.
   483  //
   484  // If none exists a temporary one may be created if we are in promiscuous mode
   485  // or spoofing. Promiscuous mode will only be checked if promiscuous is true.
   486  // Similarly, spoofing will only be checked if spoofing is true.
   487  //
   488  // If the address is the IPv4 broadcast address for an endpoint's network, that
   489  // endpoint will be returned.
   490  func (n *nic) getAddressOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior, tempRef getAddressBehaviour) AssignableAddressEndpoint {
   491  	var spoofingOrPromiscuous bool
   492  	switch tempRef {
   493  	case spoofing:
   494  		spoofingOrPromiscuous = n.Spoofing()
   495  	case promiscuous:
   496  		spoofingOrPromiscuous = n.Promiscuous()
   497  	}
   498  	return n.getAddressOrCreateTempInner(protocol, address, spoofingOrPromiscuous, peb)
   499  }
   500  
   501  // getAddressOrCreateTempInner is like getAddressEpOrCreateTemp except a boolean
   502  // is passed to indicate whether or not we should generate temporary endpoints.
   503  func (n *nic) getAddressOrCreateTempInner(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, createTemp bool, peb PrimaryEndpointBehavior) AssignableAddressEndpoint {
   504  	ep := n.getNetworkEndpoint(protocol)
   505  	if ep == nil {
   506  		return nil
   507  	}
   508  
   509  	addressableEndpoint, ok := ep.(AddressableEndpoint)
   510  	if !ok {
   511  		return nil
   512  	}
   513  
   514  	return addressableEndpoint.AcquireAssignedAddress(address, createTemp, peb)
   515  }
   516  
   517  // addAddress adds a new address to n, so that it starts accepting packets
   518  // targeted at the given address (and network protocol).
   519  func (n *nic) addAddress(protocolAddress tcpip.ProtocolAddress, properties AddressProperties) tcpip.Error {
   520  	ep := n.getNetworkEndpoint(protocolAddress.Protocol)
   521  	if ep == nil {
   522  		return &tcpip.ErrUnknownProtocol{}
   523  	}
   524  
   525  	addressableEndpoint, ok := ep.(AddressableEndpoint)
   526  	if !ok {
   527  		return &tcpip.ErrNotSupported{}
   528  	}
   529  
   530  	addressEndpoint, err := addressableEndpoint.AddAndAcquirePermanentAddress(protocolAddress.AddressWithPrefix, properties)
   531  	if err == nil {
   532  		// We have no need for the address endpoint.
   533  		addressEndpoint.DecRef()
   534  	}
   535  	return err
   536  }
   537  
   538  // allPermanentAddresses returns all permanent addresses associated with
   539  // this NIC.
   540  func (n *nic) allPermanentAddresses() []tcpip.ProtocolAddress {
   541  	var addrs []tcpip.ProtocolAddress
   542  	for p, ep := range n.networkEndpoints {
   543  		addressableEndpoint, ok := ep.(AddressableEndpoint)
   544  		if !ok {
   545  			continue
   546  		}
   547  
   548  		for _, a := range addressableEndpoint.PermanentAddresses() {
   549  			addrs = append(addrs, tcpip.ProtocolAddress{Protocol: p, AddressWithPrefix: a})
   550  		}
   551  	}
   552  	return addrs
   553  }
   554  
   555  // primaryAddresses returns the primary addresses associated with this NIC.
   556  func (n *nic) primaryAddresses() []tcpip.ProtocolAddress {
   557  	var addrs []tcpip.ProtocolAddress
   558  	for p, ep := range n.networkEndpoints {
   559  		addressableEndpoint, ok := ep.(AddressableEndpoint)
   560  		if !ok {
   561  			continue
   562  		}
   563  
   564  		for _, a := range addressableEndpoint.PrimaryAddresses() {
   565  			addrs = append(addrs, tcpip.ProtocolAddress{Protocol: p, AddressWithPrefix: a})
   566  		}
   567  	}
   568  	return addrs
   569  }
   570  
   571  // PrimaryAddress implements NetworkInterface.
   572  func (n *nic) PrimaryAddress(proto tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, tcpip.Error) {
   573  	ep := n.getNetworkEndpoint(proto)
   574  	if ep == nil {
   575  		return tcpip.AddressWithPrefix{}, &tcpip.ErrUnknownProtocol{}
   576  	}
   577  
   578  	addressableEndpoint, ok := ep.(AddressableEndpoint)
   579  	if !ok {
   580  		return tcpip.AddressWithPrefix{}, &tcpip.ErrNotSupported{}
   581  	}
   582  
   583  	return addressableEndpoint.MainAddress(), nil
   584  }
   585  
   586  // removeAddress removes an address from n.
   587  func (n *nic) removeAddress(addr tcpip.Address) tcpip.Error {
   588  	for _, ep := range n.networkEndpoints {
   589  		addressableEndpoint, ok := ep.(AddressableEndpoint)
   590  		if !ok {
   591  			continue
   592  		}
   593  
   594  		switch err := addressableEndpoint.RemovePermanentAddress(addr); err.(type) {
   595  		case *tcpip.ErrBadLocalAddress:
   596  			continue
   597  		default:
   598  			return err
   599  		}
   600  	}
   601  
   602  	return &tcpip.ErrBadLocalAddress{}
   603  }
   604  
   605  func (n *nic) setAddressLifetimes(addr tcpip.Address, lifetimes AddressLifetimes) tcpip.Error {
   606  	for _, ep := range n.networkEndpoints {
   607  		ep, ok := ep.(AddressableEndpoint)
   608  		if !ok {
   609  			continue
   610  		}
   611  
   612  		switch err := ep.SetLifetimes(addr, lifetimes); err.(type) {
   613  		case *tcpip.ErrBadLocalAddress:
   614  			continue
   615  		default:
   616  			return err
   617  		}
   618  	}
   619  
   620  	return &tcpip.ErrBadLocalAddress{}
   621  }
   622  
   623  func (n *nic) getLinkAddress(addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, onResolve func(LinkResolutionResult)) tcpip.Error {
   624  	linkRes, ok := n.linkAddrResolvers[protocol]
   625  	if !ok {
   626  		return &tcpip.ErrNotSupported{}
   627  	}
   628  
   629  	if linkAddr, ok := linkRes.resolver.ResolveStaticAddress(addr); ok {
   630  		onResolve(LinkResolutionResult{LinkAddress: linkAddr, Err: nil})
   631  		return nil
   632  	}
   633  
   634  	_, _, err := linkRes.neigh.entry(addr, localAddr, onResolve)
   635  	return err
   636  }
   637  
   638  func (n *nic) neighbors(protocol tcpip.NetworkProtocolNumber) ([]NeighborEntry, tcpip.Error) {
   639  	if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
   640  		return linkRes.neigh.entries(), nil
   641  	}
   642  
   643  	return nil, &tcpip.ErrNotSupported{}
   644  }
   645  
   646  func (n *nic) addStaticNeighbor(addr tcpip.Address, protocol tcpip.NetworkProtocolNumber, linkAddress tcpip.LinkAddress) tcpip.Error {
   647  	if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
   648  		linkRes.neigh.addStaticEntry(addr, linkAddress)
   649  		return nil
   650  	}
   651  
   652  	return &tcpip.ErrNotSupported{}
   653  }
   654  
   655  func (n *nic) removeNeighbor(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error {
   656  	if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
   657  		if !linkRes.neigh.removeEntry(addr) {
   658  			return &tcpip.ErrBadAddress{}
   659  		}
   660  		return nil
   661  	}
   662  
   663  	return &tcpip.ErrNotSupported{}
   664  }
   665  
   666  func (n *nic) clearNeighbors(protocol tcpip.NetworkProtocolNumber) tcpip.Error {
   667  	if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
   668  		linkRes.neigh.clear()
   669  		return nil
   670  	}
   671  
   672  	return &tcpip.ErrNotSupported{}
   673  }
   674  
   675  // joinGroup adds a new endpoint for the given multicast address, if none
   676  // exists yet. Otherwise it just increments its count.
   677  func (n *nic) joinGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error {
   678  	// TODO(b/143102137): When implementing MLD, make sure MLD packets are
   679  	// not sent unless a valid link-local address is available for use on n
   680  	// as an MLD packet's source address must be a link-local address as
   681  	// outlined in RFC 3810 section 5.
   682  
   683  	ep := n.getNetworkEndpoint(protocol)
   684  	if ep == nil {
   685  		return &tcpip.ErrNotSupported{}
   686  	}
   687  
   688  	gep, ok := ep.(GroupAddressableEndpoint)
   689  	if !ok {
   690  		return &tcpip.ErrNotSupported{}
   691  	}
   692  
   693  	return gep.JoinGroup(addr)
   694  }
   695  
   696  // leaveGroup decrements the count for the given multicast address, and when it
   697  // reaches zero removes the endpoint for this address.
   698  func (n *nic) leaveGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error {
   699  	ep := n.getNetworkEndpoint(protocol)
   700  	if ep == nil {
   701  		return &tcpip.ErrNotSupported{}
   702  	}
   703  
   704  	gep, ok := ep.(GroupAddressableEndpoint)
   705  	if !ok {
   706  		return &tcpip.ErrNotSupported{}
   707  	}
   708  
   709  	return gep.LeaveGroup(addr)
   710  }
   711  
   712  // isInGroup returns true if n has joined the multicast group addr.
   713  func (n *nic) isInGroup(addr tcpip.Address) bool {
   714  	for _, ep := range n.networkEndpoints {
   715  		gep, ok := ep.(GroupAddressableEndpoint)
   716  		if !ok {
   717  			continue
   718  		}
   719  
   720  		if gep.IsInGroup(addr) {
   721  			return true
   722  		}
   723  	}
   724  
   725  	return false
   726  }
   727  
   728  // DeliverNetworkPacket finds the appropriate network protocol endpoint and
   729  // hands the packet over for further processing. This function is called when
   730  // the NIC receives a packet from the link endpoint.
   731  func (n *nic) DeliverNetworkPacket(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
   732  	enabled := n.Enabled()
   733  	// If the NIC is not yet enabled, don't receive any packets.
   734  	if !enabled {
   735  		n.stats.disabledRx.packets.Increment()
   736  		n.stats.disabledRx.bytes.IncrementBy(uint64(pkt.Data().Size()))
   737  		return
   738  	}
   739  
   740  	n.stats.rx.packets.Increment()
   741  	n.stats.rx.bytes.IncrementBy(uint64(pkt.Data().Size()))
   742  
   743  	networkEndpoint := n.getNetworkEndpoint(protocol)
   744  	if networkEndpoint == nil {
   745  		n.stats.unknownL3ProtocolRcvdPacketCounts.Increment(uint64(protocol))
   746  		return
   747  	}
   748  
   749  	pkt.RXChecksumValidated = n.NetworkLinkEndpoint.Capabilities()&CapabilityRXChecksumOffload != 0
   750  
   751  	if n.deliverLinkPackets {
   752  		n.DeliverLinkPacket(protocol, pkt)
   753  	}
   754  
   755  	n.gro.dispatch(pkt, protocol, networkEndpoint)
   756  }
   757  
   758  func (n *nic) DeliverLinkPacket(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
   759  	// Deliver to interested packet endpoints without holding NIC lock.
   760  	var packetEPPkt *PacketBuffer
   761  	defer func() {
   762  		if !packetEPPkt.IsNil() {
   763  			packetEPPkt.DecRef()
   764  		}
   765  	}()
   766  	deliverPacketEPs := func(ep PacketEndpoint) {
   767  		if packetEPPkt.IsNil() {
   768  			// Packet endpoints hold the full packet.
   769  			//
   770  			// We perform a deep copy because higher-level endpoints may point to
   771  			// the middle of a view that is held by a packet endpoint. Save/Restore
   772  			// does not support overlapping slices and will panic in this case.
   773  			//
   774  			// TODO(https://gvisor.dev/issue/6517): Avoid this copy once S/R supports
   775  			// overlapping slices (e.g. by passing a shallow copy of pkt to the packet
   776  			// endpoint).
   777  			packetEPPkt = NewPacketBuffer(PacketBufferOptions{
   778  				Payload: BufferSince(pkt.LinkHeader()),
   779  			})
   780  			// If a link header was populated in the original packet buffer, then
   781  			// populate it in the packet buffer we provide to packet endpoints as
   782  			// packet endpoints inspect link headers.
   783  			packetEPPkt.LinkHeader().Consume(len(pkt.LinkHeader().Slice()))
   784  			packetEPPkt.PktType = pkt.PktType
   785  			// Assume the packet is for us if the packet type is unset.
   786  			// The packet type is set to PacketOutgoing when sending packets so
   787  			// this may only be unset for incoming packets where link endpoints
   788  			// have not set it.
   789  			if packetEPPkt.PktType == 0 {
   790  				packetEPPkt.PktType = tcpip.PacketHost
   791  			}
   792  		}
   793  
   794  		clone := packetEPPkt.Clone()
   795  		defer clone.DecRef()
   796  		ep.HandlePacket(n.id, protocol, clone)
   797  	}
   798  
   799  	n.packetEPsMu.Lock()
   800  	// Are any packet type sockets listening for this network protocol?
   801  	protoEPs, protoEPsOK := n.packetEPs[protocol]
   802  	// Other packet type sockets that are listening for all protocols.
   803  	anyEPs, anyEPsOK := n.packetEPs[header.EthernetProtocolAll]
   804  	n.packetEPsMu.Unlock()
   805  
   806  	// On Linux, only ETH_P_ALL endpoints get outbound packets.
   807  	if pkt.PktType != tcpip.PacketOutgoing && protoEPsOK {
   808  		protoEPs.forEach(deliverPacketEPs)
   809  	}
   810  	if anyEPsOK {
   811  		anyEPs.forEach(deliverPacketEPs)
   812  	}
   813  }
   814  
   815  // DeliverTransportPacket delivers the packets to the appropriate transport
   816  // protocol endpoint.
   817  func (n *nic) DeliverTransportPacket(protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) TransportPacketDisposition {
   818  	state, ok := n.stack.transportProtocols[protocol]
   819  	if !ok {
   820  		n.stats.unknownL4ProtocolRcvdPacketCounts.Increment(uint64(protocol))
   821  		return TransportPacketProtocolUnreachable
   822  	}
   823  
   824  	transProto := state.proto
   825  
   826  	if len(pkt.TransportHeader().Slice()) == 0 {
   827  		n.stats.malformedL4RcvdPackets.Increment()
   828  		return TransportPacketHandled
   829  	}
   830  
   831  	srcPort, dstPort, err := transProto.ParsePorts(pkt.TransportHeader().Slice())
   832  	if err != nil {
   833  		n.stats.malformedL4RcvdPackets.Increment()
   834  		return TransportPacketHandled
   835  	}
   836  
   837  	netProto, ok := n.stack.networkProtocols[pkt.NetworkProtocolNumber]
   838  	if !ok {
   839  		panic(fmt.Sprintf("expected network protocol = %d, have = %#v", pkt.NetworkProtocolNumber, n.stack.networkProtocolNumbers()))
   840  	}
   841  
   842  	src, dst := netProto.ParseAddresses(pkt.NetworkHeader().Slice())
   843  	id := TransportEndpointID{
   844  		LocalPort:     dstPort,
   845  		LocalAddress:  dst,
   846  		RemotePort:    srcPort,
   847  		RemoteAddress: src,
   848  	}
   849  	if n.stack.demux.deliverPacket(protocol, pkt, id) {
   850  		return TransportPacketHandled
   851  	}
   852  
   853  	// Try to deliver to per-stack default handler.
   854  	if state.defaultHandler != nil {
   855  		if state.defaultHandler(id, pkt) {
   856  			return TransportPacketHandled
   857  		}
   858  	}
   859  
   860  	// We could not find an appropriate destination for this packet so
   861  	// give the protocol specific error handler a chance to handle it.
   862  	// If it doesn't handle it then we should do so.
   863  	switch res := transProto.HandleUnknownDestinationPacket(id, pkt); res {
   864  	case UnknownDestinationPacketMalformed:
   865  		n.stats.malformedL4RcvdPackets.Increment()
   866  		return TransportPacketHandled
   867  	case UnknownDestinationPacketUnhandled:
   868  		return TransportPacketDestinationPortUnreachable
   869  	case UnknownDestinationPacketHandled:
   870  		return TransportPacketHandled
   871  	default:
   872  		panic(fmt.Sprintf("unrecognized result from HandleUnknownDestinationPacket = %d", res))
   873  	}
   874  }
   875  
   876  // DeliverTransportError implements TransportDispatcher.
   877  func (n *nic) DeliverTransportError(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, transErr TransportError, pkt *PacketBuffer) {
   878  	state, ok := n.stack.transportProtocols[trans]
   879  	if !ok {
   880  		return
   881  	}
   882  
   883  	transProto := state.proto
   884  
   885  	// ICMPv4 only guarantees that 8 bytes of the transport protocol will
   886  	// be present in the payload. We know that the ports are within the
   887  	// first 8 bytes for all known transport protocols.
   888  	transHeader, ok := pkt.Data().PullUp(8)
   889  	if !ok {
   890  		return
   891  	}
   892  
   893  	srcPort, dstPort, err := transProto.ParsePorts(transHeader)
   894  	if err != nil {
   895  		return
   896  	}
   897  
   898  	id := TransportEndpointID{srcPort, local, dstPort, remote}
   899  	if n.stack.demux.deliverError(n, net, trans, transErr, pkt, id) {
   900  		return
   901  	}
   902  }
   903  
   904  // DeliverRawPacket implements TransportDispatcher.
   905  func (n *nic) DeliverRawPacket(protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) {
   906  	// For ICMPv4 only we validate the header length for compatibility with
   907  	// raw(7) ICMP_FILTER. The same check is made in Linux here:
   908  	// https://github.com/torvalds/linux/blob/70585216/net/ipv4/raw.c#L189.
   909  	if protocol == header.ICMPv4ProtocolNumber && len(pkt.TransportHeader().Slice())+pkt.Data().Size() < header.ICMPv4MinimumSize {
   910  		return
   911  	}
   912  	n.stack.demux.deliverRawPacket(protocol, pkt)
   913  }
   914  
   915  // ID implements NetworkInterface.
   916  func (n *nic) ID() tcpip.NICID {
   917  	return n.id
   918  }
   919  
   920  // Name implements NetworkInterface.
   921  func (n *nic) Name() string {
   922  	return n.name
   923  }
   924  
   925  // nudConfigs gets the NUD configurations for n.
   926  func (n *nic) nudConfigs(protocol tcpip.NetworkProtocolNumber) (NUDConfigurations, tcpip.Error) {
   927  	if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
   928  		return linkRes.neigh.config(), nil
   929  	}
   930  
   931  	return NUDConfigurations{}, &tcpip.ErrNotSupported{}
   932  }
   933  
   934  // setNUDConfigs sets the NUD configurations for n.
   935  //
   936  // Note, if c contains invalid NUD configuration values, it will be fixed to
   937  // use default values for the erroneous values.
   938  func (n *nic) setNUDConfigs(protocol tcpip.NetworkProtocolNumber, c NUDConfigurations) tcpip.Error {
   939  	if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
   940  		c.resetInvalidFields()
   941  		linkRes.neigh.setConfig(c)
   942  		return nil
   943  	}
   944  
   945  	return &tcpip.ErrNotSupported{}
   946  }
   947  
   948  func (n *nic) registerPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
   949  	n.packetEPsMu.Lock()
   950  	defer n.packetEPsMu.Unlock()
   951  
   952  	eps, ok := n.packetEPs[netProto]
   953  	if !ok {
   954  		eps = new(packetEndpointList)
   955  		n.packetEPs[netProto] = eps
   956  	}
   957  	eps.add(ep)
   958  }
   959  
   960  func (n *nic) unregisterPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
   961  	n.packetEPsMu.Lock()
   962  	defer n.packetEPsMu.Unlock()
   963  
   964  	eps, ok := n.packetEPs[netProto]
   965  	if !ok {
   966  		return
   967  	}
   968  	eps.remove(ep)
   969  	if eps.len() == 0 {
   970  		delete(n.packetEPs, netProto)
   971  	}
   972  }
   973  
   974  // isValidForOutgoing returns true if the endpoint can be used to send out a
   975  // packet. It requires the endpoint to not be marked expired (i.e., its address
   976  // has been removed) unless the NIC is in spoofing mode, or temporary.
   977  func (n *nic) isValidForOutgoing(ep AssignableAddressEndpoint) bool {
   978  	return n.Enabled() && ep.IsAssigned(n.Spoofing())
   979  }
   980  
   981  // HandleNeighborProbe implements NetworkInterface.
   982  func (n *nic) HandleNeighborProbe(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress) tcpip.Error {
   983  	if l, ok := n.linkAddrResolvers[protocol]; ok {
   984  		l.neigh.handleProbe(addr, linkAddr)
   985  		return nil
   986  	}
   987  
   988  	return &tcpip.ErrNotSupported{}
   989  }
   990  
   991  // HandleNeighborConfirmation implements NetworkInterface.
   992  func (n *nic) HandleNeighborConfirmation(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress, flags ReachabilityConfirmationFlags) tcpip.Error {
   993  	if l, ok := n.linkAddrResolvers[protocol]; ok {
   994  		l.neigh.handleConfirmation(addr, linkAddr, flags)
   995  		return nil
   996  	}
   997  
   998  	return &tcpip.ErrNotSupported{}
   999  }
  1000  
  1001  // CheckLocalAddress implements NetworkInterface.
  1002  func (n *nic) CheckLocalAddress(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
  1003  	if n.Spoofing() {
  1004  		return true
  1005  	}
  1006  
  1007  	if addressEndpoint := n.getAddressOrCreateTempInner(protocol, addr, false /* createTemp */, NeverPrimaryEndpoint); addressEndpoint != nil {
  1008  		addressEndpoint.DecRef()
  1009  		return true
  1010  	}
  1011  
  1012  	return false
  1013  }
  1014  
  1015  func (n *nic) checkDuplicateAddress(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, h DADCompletionHandler) (DADCheckAddressDisposition, tcpip.Error) {
  1016  	d, ok := n.duplicateAddressDetectors[protocol]
  1017  	if !ok {
  1018  		return 0, &tcpip.ErrNotSupported{}
  1019  	}
  1020  
  1021  	return d.CheckDuplicateAddress(addr, h), nil
  1022  }
  1023  
  1024  func (n *nic) setForwarding(protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) {
  1025  	ep := n.getNetworkEndpoint(protocol)
  1026  	if ep == nil {
  1027  		return false, &tcpip.ErrUnknownProtocol{}
  1028  	}
  1029  
  1030  	forwardingEP, ok := ep.(ForwardingNetworkEndpoint)
  1031  	if !ok {
  1032  		return false, &tcpip.ErrNotSupported{}
  1033  	}
  1034  
  1035  	return forwardingEP.SetForwarding(enable), nil
  1036  }
  1037  
  1038  func (n *nic) forwarding(protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) {
  1039  	ep := n.getNetworkEndpoint(protocol)
  1040  	if ep == nil {
  1041  		return false, &tcpip.ErrUnknownProtocol{}
  1042  	}
  1043  
  1044  	forwardingEP, ok := ep.(ForwardingNetworkEndpoint)
  1045  	if !ok {
  1046  		return false, &tcpip.ErrNotSupported{}
  1047  	}
  1048  
  1049  	return forwardingEP.Forwarding(), nil
  1050  }
  1051  
  1052  func (n *nic) multicastForwardingEndpoint(protocol tcpip.NetworkProtocolNumber) (MulticastForwardingNetworkEndpoint, tcpip.Error) {
  1053  	ep := n.getNetworkEndpoint(protocol)
  1054  	if ep == nil {
  1055  		return nil, &tcpip.ErrUnknownProtocol{}
  1056  	}
  1057  
  1058  	forwardingEP, ok := ep.(MulticastForwardingNetworkEndpoint)
  1059  	if !ok {
  1060  		return nil, &tcpip.ErrNotSupported{}
  1061  	}
  1062  
  1063  	return forwardingEP, nil
  1064  }
  1065  
  1066  func (n *nic) setMulticastForwarding(protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) {
  1067  	ep, err := n.multicastForwardingEndpoint(protocol)
  1068  	if err != nil {
  1069  		return false, err
  1070  	}
  1071  
  1072  	return ep.SetMulticastForwarding(enable), nil
  1073  }
  1074  
  1075  func (n *nic) multicastForwarding(protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) {
  1076  	ep, err := n.multicastForwardingEndpoint(protocol)
  1077  	if err != nil {
  1078  		return false, err
  1079  	}
  1080  
  1081  	return ep.MulticastForwarding(), nil
  1082  }