github.com/flowerwrong/netstack@v0.0.0-20191009141956-e5848263af28/tcpip/stack/nic.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package stack
    16  
    17  import (
    18  	"strings"
    19  	"sync"
    20  	"sync/atomic"
    21  
    22  	"github.com/FlowerWrong/netstack/ilist"
    23  	"github.com/FlowerWrong/netstack/tcpip"
    24  	"github.com/FlowerWrong/netstack/tcpip/buffer"
    25  	"github.com/FlowerWrong/netstack/tcpip/header"
    26  )
    27  
    28  // NIC represents a "network interface card" to which the networking stack is
    29  // attached.
    30  type NIC struct {
    31  	stack    *Stack
    32  	id       tcpip.NICID
    33  	name     string
    34  	linkEP   LinkEndpoint
    35  	loopback bool
    36  
    37  	mu            sync.RWMutex
    38  	spoofing      bool
    39  	promiscuous   bool
    40  	primary       map[tcpip.NetworkProtocolNumber]*ilist.List
    41  	endpoints     map[NetworkEndpointID]*referencedNetworkEndpoint
    42  	addressRanges []tcpip.Subnet
    43  	mcastJoins    map[NetworkEndpointID]int32
    44  
    45  	stats NICStats
    46  }
    47  
    48  // NICStats includes transmitted and received stats.
    49  type NICStats struct {
    50  	Tx DirectionStats
    51  	Rx DirectionStats
    52  }
    53  
    54  // DirectionStats includes packet and byte counts.
    55  type DirectionStats struct {
    56  	Packets *tcpip.StatCounter
    57  	Bytes   *tcpip.StatCounter
    58  }
    59  
    60  // PrimaryEndpointBehavior is an enumeration of an endpoint's primacy behavior.
    61  type PrimaryEndpointBehavior int
    62  
    63  const (
    64  	// CanBePrimaryEndpoint indicates the endpoint can be used as a primary
    65  	// endpoint for new connections with no local address. This is the
    66  	// default when calling NIC.AddAddress.
    67  	CanBePrimaryEndpoint PrimaryEndpointBehavior = iota
    68  
    69  	// FirstPrimaryEndpoint indicates the endpoint should be the first
    70  	// primary endpoint considered. If there are multiple endpoints with
    71  	// this behavior, the most recently-added one will be first.
    72  	FirstPrimaryEndpoint
    73  
    74  	// NeverPrimaryEndpoint indicates the endpoint should never be a
    75  	// primary endpoint.
    76  	NeverPrimaryEndpoint
    77  )
    78  
    79  func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, loopback bool) *NIC {
    80  	return &NIC{
    81  		stack:      stack,
    82  		id:         id,
    83  		name:       name,
    84  		linkEP:     ep,
    85  		loopback:   loopback,
    86  		primary:    make(map[tcpip.NetworkProtocolNumber]*ilist.List),
    87  		endpoints:  make(map[NetworkEndpointID]*referencedNetworkEndpoint),
    88  		mcastJoins: make(map[NetworkEndpointID]int32),
    89  		stats: NICStats{
    90  			Tx: DirectionStats{
    91  				Packets: &tcpip.StatCounter{},
    92  				Bytes:   &tcpip.StatCounter{},
    93  			},
    94  			Rx: DirectionStats{
    95  				Packets: &tcpip.StatCounter{},
    96  				Bytes:   &tcpip.StatCounter{},
    97  			},
    98  		},
    99  	}
   100  }
   101  
   102  // enable enables the NIC. enable will attach the link to its LinkEndpoint and
   103  // join the IPv6 All-Nodes Multicast address (ff02::1).
   104  func (n *NIC) enable() *tcpip.Error {
   105  	n.attachLinkEndpoint()
   106  
   107  	// Create an endpoint to receive broadcast packets on this interface.
   108  	if _, ok := n.stack.networkProtocols[header.IPv4ProtocolNumber]; ok {
   109  		if err := n.AddAddress(tcpip.ProtocolAddress{
   110  			Protocol:          header.IPv4ProtocolNumber,
   111  			AddressWithPrefix: tcpip.AddressWithPrefix{header.IPv4Broadcast, 8 * header.IPv4AddressSize},
   112  		}, NeverPrimaryEndpoint); err != nil {
   113  			return err
   114  		}
   115  	}
   116  
   117  	// Join the IPv6 All-Nodes Multicast group if the stack is configured to
   118  	// use IPv6. This is required to ensure that this node properly receives
   119  	// and responds to the various NDP messages that are destined to the
   120  	// all-nodes multicast address. An example is the Neighbor Advertisement
   121  	// when we perform Duplicate Address Detection, or Router Advertisement
   122  	// when we do Router Discovery. See RFC 4862, section 5.4.2 and RFC 4861
   123  	// section 4.2 for more information.
   124  	if _, ok := n.stack.networkProtocols[header.IPv6ProtocolNumber]; ok {
   125  		return n.joinGroup(header.IPv6ProtocolNumber, header.IPv6AllNodesMulticastAddress)
   126  	}
   127  
   128  	return nil
   129  }
   130  
   131  // attachLinkEndpoint attaches the NIC to the endpoint, which will enable it
   132  // to start delivering packets.
   133  func (n *NIC) attachLinkEndpoint() {
   134  	n.linkEP.Attach(n)
   135  }
   136  
   137  // setPromiscuousMode enables or disables promiscuous mode.
   138  func (n *NIC) setPromiscuousMode(enable bool) {
   139  	n.mu.Lock()
   140  	n.promiscuous = enable
   141  	n.mu.Unlock()
   142  }
   143  
   144  func (n *NIC) isPromiscuousMode() bool {
   145  	n.mu.RLock()
   146  	rv := n.promiscuous
   147  	n.mu.RUnlock()
   148  	return rv
   149  }
   150  
   151  // setSpoofing enables or disables address spoofing.
   152  func (n *NIC) setSpoofing(enable bool) {
   153  	n.mu.Lock()
   154  	n.spoofing = enable
   155  	n.mu.Unlock()
   156  }
   157  
   158  // primaryEndpoint returns the primary endpoint of n for the given network
   159  // protocol.
   160  func (n *NIC) primaryEndpoint(protocol tcpip.NetworkProtocolNumber) *referencedNetworkEndpoint {
   161  	n.mu.RLock()
   162  	defer n.mu.RUnlock()
   163  
   164  	list := n.primary[protocol]
   165  	if list == nil {
   166  		return nil
   167  	}
   168  
   169  	for e := list.Front(); e != nil; e = e.Next() {
   170  		r := e.(*referencedNetworkEndpoint)
   171  		// TODO(crawshaw): allow broadcast address when SO_BROADCAST is set.
   172  		switch r.ep.ID().LocalAddress {
   173  		case header.IPv4Broadcast, header.IPv4Any:
   174  			continue
   175  		}
   176  		if r.isValidForOutgoing() && r.tryIncRef() {
   177  			return r
   178  		}
   179  	}
   180  
   181  	return nil
   182  }
   183  
   184  func (n *NIC) getRef(protocol tcpip.NetworkProtocolNumber, dst tcpip.Address) *referencedNetworkEndpoint {
   185  	return n.getRefOrCreateTemp(protocol, dst, CanBePrimaryEndpoint, n.promiscuous)
   186  }
   187  
   188  // findEndpoint finds the endpoint, if any, with the given address.
   189  func (n *NIC) findEndpoint(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior) *referencedNetworkEndpoint {
   190  	return n.getRefOrCreateTemp(protocol, address, peb, n.spoofing)
   191  }
   192  
   193  // getRefEpOrCreateTemp returns the referenced network endpoint for the given
   194  // protocol and address. If none exists a temporary one may be created if
   195  // we are in promiscuous mode or spoofing.
   196  func (n *NIC) getRefOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior, spoofingOrPromiscuous bool) *referencedNetworkEndpoint {
   197  	id := NetworkEndpointID{address}
   198  
   199  	n.mu.RLock()
   200  
   201  	if ref, ok := n.endpoints[id]; ok {
   202  		// An endpoint with this id exists, check if it can be used and return it.
   203  		switch ref.getKind() {
   204  		case permanentExpired:
   205  			if !spoofingOrPromiscuous {
   206  				n.mu.RUnlock()
   207  				return nil
   208  			}
   209  			fallthrough
   210  		case temporary, permanent:
   211  			if ref.tryIncRef() {
   212  				n.mu.RUnlock()
   213  				return ref
   214  			}
   215  		}
   216  	}
   217  
   218  	// A usable reference was not found, create a temporary one if requested by
   219  	// the caller or if the address is found in the NIC's subnets.
   220  	createTempEP := spoofingOrPromiscuous
   221  	if !createTempEP {
   222  		for _, sn := range n.addressRanges {
   223  			// Skip the subnet address.
   224  			if address == sn.ID() {
   225  				continue
   226  			}
   227  			// For now just skip the broadcast address, until we support it.
   228  			// FIXME(b/137608825): Add support for sending/receiving directed
   229  			// (subnet) broadcast.
   230  			if address == sn.Broadcast() {
   231  				continue
   232  			}
   233  			if sn.Contains(address) {
   234  				createTempEP = true
   235  				break
   236  			}
   237  		}
   238  	}
   239  
   240  	n.mu.RUnlock()
   241  
   242  	if !createTempEP {
   243  		return nil
   244  	}
   245  
   246  	// Try again with the lock in exclusive mode. If we still can't get the
   247  	// endpoint, create a new "temporary" endpoint. It will only exist while
   248  	// there's a route through it.
   249  	n.mu.Lock()
   250  	if ref, ok := n.endpoints[id]; ok {
   251  		// No need to check the type as we are ok with expired endpoints at this
   252  		// point.
   253  		if ref.tryIncRef() {
   254  			n.mu.Unlock()
   255  			return ref
   256  		}
   257  		// tryIncRef failing means the endpoint is scheduled to be removed once the
   258  		// lock is released. Remove it here so we can create a new (temporary) one.
   259  		// The removal logic waiting for the lock handles this case.
   260  		n.removeEndpointLocked(ref)
   261  	}
   262  
   263  	// Add a new temporary endpoint.
   264  	netProto, ok := n.stack.networkProtocols[protocol]
   265  	if !ok {
   266  		n.mu.Unlock()
   267  		return nil
   268  	}
   269  	ref, _ := n.addAddressLocked(tcpip.ProtocolAddress{
   270  		Protocol: protocol,
   271  		AddressWithPrefix: tcpip.AddressWithPrefix{
   272  			Address:   address,
   273  			PrefixLen: netProto.DefaultPrefixLen(),
   274  		},
   275  	}, peb, temporary)
   276  
   277  	n.mu.Unlock()
   278  	return ref
   279  }
   280  
   281  func (n *NIC) addPermanentAddressLocked(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) (*referencedNetworkEndpoint, *tcpip.Error) {
   282  	id := NetworkEndpointID{protocolAddress.AddressWithPrefix.Address}
   283  	if ref, ok := n.endpoints[id]; ok {
   284  		switch ref.getKind() {
   285  		case permanent:
   286  			// The NIC already have a permanent endpoint with that address.
   287  			return nil, tcpip.ErrDuplicateAddress
   288  		case permanentExpired, temporary:
   289  			// Promote the endpoint to become permanent.
   290  			if ref.tryIncRef() {
   291  				ref.setKind(permanent)
   292  				return ref, nil
   293  			}
   294  			// tryIncRef failing means the endpoint is scheduled to be removed once
   295  			// the lock is released. Remove it here so we can create a new
   296  			// (permanent) one. The removal logic waiting for the lock handles this
   297  			// case.
   298  			n.removeEndpointLocked(ref)
   299  		}
   300  	}
   301  	return n.addAddressLocked(protocolAddress, peb, permanent)
   302  }
   303  
   304  func (n *NIC) addAddressLocked(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior, kind networkEndpointKind) (*referencedNetworkEndpoint, *tcpip.Error) {
   305  	// TODO(b/141022673): Validate IP address before adding them.
   306  
   307  	// Sanity check.
   308  	id := NetworkEndpointID{protocolAddress.AddressWithPrefix.Address}
   309  	if _, ok := n.endpoints[id]; ok {
   310  		// Endpoint already exists.
   311  		return nil, tcpip.ErrDuplicateAddress
   312  	}
   313  
   314  	netProto, ok := n.stack.networkProtocols[protocolAddress.Protocol]
   315  	if !ok {
   316  		return nil, tcpip.ErrUnknownProtocol
   317  	}
   318  
   319  	// Create the new network endpoint.
   320  	ep, err := netProto.NewEndpoint(n.id, protocolAddress.AddressWithPrefix, n.stack, n, n.linkEP)
   321  	if err != nil {
   322  		return nil, err
   323  	}
   324  	ref := &referencedNetworkEndpoint{
   325  		refs:     1,
   326  		ep:       ep,
   327  		nic:      n,
   328  		protocol: protocolAddress.Protocol,
   329  		kind:     kind,
   330  	}
   331  
   332  	// Set up cache if link address resolution exists for this protocol.
   333  	if n.linkEP.Capabilities()&CapabilityResolutionRequired != 0 {
   334  		if _, ok := n.stack.linkAddrResolvers[protocolAddress.Protocol]; ok {
   335  			ref.linkCache = n.stack
   336  		}
   337  	}
   338  
   339  	// If we are adding an IPv6 unicast address, join the solicited-node
   340  	// multicast address.
   341  	if protocolAddress.Protocol == header.IPv6ProtocolNumber && header.IsV6UnicastAddress(protocolAddress.AddressWithPrefix.Address) {
   342  		snmc := header.SolicitedNodeAddr(protocolAddress.AddressWithPrefix.Address)
   343  		if err := n.joinGroupLocked(protocolAddress.Protocol, snmc); err != nil {
   344  			return nil, err
   345  		}
   346  	}
   347  
   348  	n.endpoints[id] = ref
   349  
   350  	l, ok := n.primary[protocolAddress.Protocol]
   351  	if !ok {
   352  		l = &ilist.List{}
   353  		n.primary[protocolAddress.Protocol] = l
   354  	}
   355  
   356  	switch peb {
   357  	case CanBePrimaryEndpoint:
   358  		l.PushBack(ref)
   359  	case FirstPrimaryEndpoint:
   360  		l.PushFront(ref)
   361  	}
   362  
   363  	return ref, nil
   364  }
   365  
   366  // AddAddress adds a new address to n, so that it starts accepting packets
   367  // targeted at the given address (and network protocol).
   368  func (n *NIC) AddAddress(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) *tcpip.Error {
   369  	// Add the endpoint.
   370  	n.mu.Lock()
   371  	_, err := n.addPermanentAddressLocked(protocolAddress, peb)
   372  	n.mu.Unlock()
   373  
   374  	return err
   375  }
   376  
   377  // AllAddresses returns all addresses (primary and non-primary) associated with
   378  // this NIC.
   379  func (n *NIC) AllAddresses() []tcpip.ProtocolAddress {
   380  	n.mu.RLock()
   381  	defer n.mu.RUnlock()
   382  
   383  	addrs := make([]tcpip.ProtocolAddress, 0, len(n.endpoints))
   384  	for nid, ref := range n.endpoints {
   385  		// Don't include expired or temporary endpoints to avoid confusion and
   386  		// prevent the caller from using those.
   387  		switch ref.getKind() {
   388  		case permanentExpired, temporary:
   389  			continue
   390  		}
   391  		addrs = append(addrs, tcpip.ProtocolAddress{
   392  			Protocol: ref.protocol,
   393  			AddressWithPrefix: tcpip.AddressWithPrefix{
   394  				Address:   nid.LocalAddress,
   395  				PrefixLen: ref.ep.PrefixLen(),
   396  			},
   397  		})
   398  	}
   399  	return addrs
   400  }
   401  
   402  // PrimaryAddresses returns the primary addresses associated with this NIC.
   403  func (n *NIC) PrimaryAddresses() []tcpip.ProtocolAddress {
   404  	n.mu.RLock()
   405  	defer n.mu.RUnlock()
   406  
   407  	var addrs []tcpip.ProtocolAddress
   408  	for proto, list := range n.primary {
   409  		for e := list.Front(); e != nil; e = e.Next() {
   410  			ref := e.(*referencedNetworkEndpoint)
   411  			// Don't include expired or tempory endpoints to avoid confusion and
   412  			// prevent the caller from using those.
   413  			switch ref.getKind() {
   414  			case permanentExpired, temporary:
   415  				continue
   416  			}
   417  
   418  			addrs = append(addrs, tcpip.ProtocolAddress{
   419  				Protocol: proto,
   420  				AddressWithPrefix: tcpip.AddressWithPrefix{
   421  					Address:   ref.ep.ID().LocalAddress,
   422  					PrefixLen: ref.ep.PrefixLen(),
   423  				},
   424  			})
   425  		}
   426  	}
   427  	return addrs
   428  }
   429  
   430  // AddAddressRange adds a range of addresses to n, so that it starts accepting
   431  // packets targeted at the given addresses and network protocol. The range is
   432  // given by a subnet address, and all addresses contained in the subnet are
   433  // used except for the subnet address itself and the subnet's broadcast
   434  // address.
   435  func (n *NIC) AddAddressRange(protocol tcpip.NetworkProtocolNumber, subnet tcpip.Subnet) {
   436  	n.mu.Lock()
   437  	n.addressRanges = append(n.addressRanges, subnet)
   438  	n.mu.Unlock()
   439  }
   440  
   441  // RemoveAddressRange removes the given address range from n.
   442  func (n *NIC) RemoveAddressRange(subnet tcpip.Subnet) {
   443  	n.mu.Lock()
   444  
   445  	// Use the same underlying array.
   446  	tmp := n.addressRanges[:0]
   447  	for _, sub := range n.addressRanges {
   448  		if sub != subnet {
   449  			tmp = append(tmp, sub)
   450  		}
   451  	}
   452  	n.addressRanges = tmp
   453  
   454  	n.mu.Unlock()
   455  }
   456  
   457  // Subnets returns the Subnets associated with this NIC.
   458  func (n *NIC) AddressRanges() []tcpip.Subnet {
   459  	n.mu.RLock()
   460  	defer n.mu.RUnlock()
   461  	sns := make([]tcpip.Subnet, 0, len(n.addressRanges)+len(n.endpoints))
   462  	for nid := range n.endpoints {
   463  		sn, err := tcpip.NewSubnet(nid.LocalAddress, tcpip.AddressMask(strings.Repeat("\xff", len(nid.LocalAddress))))
   464  		if err != nil {
   465  			// This should never happen as the mask has been carefully crafted to
   466  			// match the address.
   467  			panic("Invalid endpoint subnet: " + err.Error())
   468  		}
   469  		sns = append(sns, sn)
   470  	}
   471  	return append(sns, n.addressRanges...)
   472  }
   473  
   474  func (n *NIC) removeEndpointLocked(r *referencedNetworkEndpoint) {
   475  	id := *r.ep.ID()
   476  
   477  	// Nothing to do if the reference has already been replaced with a different
   478  	// one. This happens in the case where 1) this endpoint's ref count hit zero
   479  	// and was waiting (on the lock) to be removed and 2) the same address was
   480  	// re-added in the meantime by removing this endpoint from the list and
   481  	// adding a new one.
   482  	if n.endpoints[id] != r {
   483  		return
   484  	}
   485  
   486  	if r.getKind() == permanent {
   487  		panic("Reference count dropped to zero before being removed")
   488  	}
   489  
   490  	delete(n.endpoints, id)
   491  	wasInList := r.Next() != nil || r.Prev() != nil || r == n.primary[r.protocol].Front()
   492  	if wasInList {
   493  		n.primary[r.protocol].Remove(r)
   494  	}
   495  
   496  	r.ep.Close()
   497  }
   498  
   499  func (n *NIC) removeEndpoint(r *referencedNetworkEndpoint) {
   500  	n.mu.Lock()
   501  	n.removeEndpointLocked(r)
   502  	n.mu.Unlock()
   503  }
   504  
   505  func (n *NIC) removePermanentAddressLocked(addr tcpip.Address) *tcpip.Error {
   506  	r, ok := n.endpoints[NetworkEndpointID{addr}]
   507  	if !ok || r.getKind() != permanent {
   508  		return tcpip.ErrBadLocalAddress
   509  	}
   510  
   511  	r.setKind(permanentExpired)
   512  	if !r.decRefLocked() {
   513  		// The endpoint still has references to it.
   514  		return nil
   515  	}
   516  
   517  	// At this point the endpoint is deleted.
   518  
   519  	// If we are removing an IPv6 unicast address, leave the solicited-node
   520  	// multicast address.
   521  	if r.protocol == header.IPv6ProtocolNumber && header.IsV6UnicastAddress(addr) {
   522  		snmc := header.SolicitedNodeAddr(addr)
   523  		if err := n.leaveGroupLocked(snmc); err != nil {
   524  			return err
   525  		}
   526  	}
   527  
   528  	return nil
   529  }
   530  
   531  // RemoveAddress removes an address from n.
   532  func (n *NIC) RemoveAddress(addr tcpip.Address) *tcpip.Error {
   533  	n.mu.Lock()
   534  	defer n.mu.Unlock()
   535  	return n.removePermanentAddressLocked(addr)
   536  }
   537  
   538  // joinGroup adds a new endpoint for the given multicast address, if none
   539  // exists yet. Otherwise it just increments its count.
   540  func (n *NIC) joinGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error {
   541  	n.mu.Lock()
   542  	defer n.mu.Unlock()
   543  
   544  	return n.joinGroupLocked(protocol, addr)
   545  }
   546  
   547  // joinGroupLocked adds a new endpoint for the given multicast address, if none
   548  // exists yet. Otherwise it just increments its count. n MUST be locked before
   549  // joinGroupLocked is called.
   550  func (n *NIC) joinGroupLocked(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error {
   551  	id := NetworkEndpointID{addr}
   552  	joins := n.mcastJoins[id]
   553  	if joins == 0 {
   554  		netProto, ok := n.stack.networkProtocols[protocol]
   555  		if !ok {
   556  			return tcpip.ErrUnknownProtocol
   557  		}
   558  		if _, err := n.addPermanentAddressLocked(tcpip.ProtocolAddress{
   559  			Protocol: protocol,
   560  			AddressWithPrefix: tcpip.AddressWithPrefix{
   561  				Address:   addr,
   562  				PrefixLen: netProto.DefaultPrefixLen(),
   563  			},
   564  		}, NeverPrimaryEndpoint); err != nil {
   565  			return err
   566  		}
   567  	}
   568  	n.mcastJoins[id] = joins + 1
   569  	return nil
   570  }
   571  
   572  // leaveGroup decrements the count for the given multicast address, and when it
   573  // reaches zero removes the endpoint for this address.
   574  func (n *NIC) leaveGroup(addr tcpip.Address) *tcpip.Error {
   575  	n.mu.Lock()
   576  	defer n.mu.Unlock()
   577  
   578  	return n.leaveGroupLocked(addr)
   579  }
   580  
   581  // leaveGroupLocked decrements the count for the given multicast address, and
   582  // when it reaches zero removes the endpoint for this address. n MUST be locked
   583  // before leaveGroupLocked is called.
   584  func (n *NIC) leaveGroupLocked(addr tcpip.Address) *tcpip.Error {
   585  	id := NetworkEndpointID{addr}
   586  	joins := n.mcastJoins[id]
   587  	switch joins {
   588  	case 0:
   589  		// There are no joins with this address on this NIC.
   590  		return tcpip.ErrBadLocalAddress
   591  	case 1:
   592  		// This is the last one, clean up.
   593  		if err := n.removePermanentAddressLocked(addr); err != nil {
   594  			return err
   595  		}
   596  	}
   597  	n.mcastJoins[id] = joins - 1
   598  	return nil
   599  }
   600  
   601  func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address, localLinkAddr, remotelinkAddr tcpip.LinkAddress, ref *referencedNetworkEndpoint, vv buffer.VectorisedView) {
   602  	r := makeRoute(protocol, dst, src, localLinkAddr, ref, false /* handleLocal */, false /* multicastLoop */)
   603  	r.RemoteLinkAddress = remotelinkAddr
   604  	ref.ep.HandlePacket(&r, vv)
   605  	ref.decRef()
   606  }
   607  
   608  // DeliverNetworkPacket finds the appropriate network protocol endpoint and
   609  // hands the packet over for further processing. This function is called when
   610  // the NIC receives a packet from the physical interface.
   611  // Note that the ownership of the slice backing vv is retained by the caller.
   612  // This rule applies only to the slice itself, not to the items of the slice;
   613  // the ownership of the items is not retained by the caller.
   614  func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, _ tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) {
   615  	n.stats.Rx.Packets.Increment()
   616  	n.stats.Rx.Bytes.IncrementBy(uint64(vv.Size()))
   617  
   618  	netProto, ok := n.stack.networkProtocols[protocol]
   619  	if !ok {
   620  		n.stack.stats.UnknownProtocolRcvdPackets.Increment()
   621  		return
   622  	}
   623  
   624  	if netProto.Number() == header.IPv4ProtocolNumber || netProto.Number() == header.IPv6ProtocolNumber {
   625  		n.stack.stats.IP.PacketsReceived.Increment()
   626  	}
   627  
   628  	if len(vv.First()) < netProto.MinimumPacketSize() {
   629  		n.stack.stats.MalformedRcvdPackets.Increment()
   630  		return
   631  	}
   632  
   633  	src, dst := netProto.ParseAddresses(vv.First())
   634  
   635  	n.stack.AddLinkAddress(n.id, src, remote)
   636  
   637  	if ref := n.getRef(protocol, dst); ref != nil {
   638  		handlePacket(protocol, dst, src, linkEP.LinkAddress(), remote, ref, vv)
   639  		return
   640  	}
   641  
   642  	// This NIC doesn't care about the packet. Find a NIC that cares about the
   643  	// packet and forward it to the NIC.
   644  	//
   645  	// TODO: Should we be forwarding the packet even if promiscuous?
   646  	if n.stack.Forwarding() {
   647  		r, err := n.stack.FindRoute(0, "", dst, protocol, false /* multicastLoop */)
   648  		if err != nil {
   649  			n.stack.stats.IP.InvalidAddressesReceived.Increment()
   650  			return
   651  		}
   652  		defer r.Release()
   653  
   654  		r.LocalLinkAddress = n.linkEP.LinkAddress()
   655  		r.RemoteLinkAddress = remote
   656  
   657  		// Found a NIC.
   658  		n := r.ref.nic
   659  		n.mu.RLock()
   660  		ref, ok := n.endpoints[NetworkEndpointID{dst}]
   661  		ok = ok && ref.isValidForOutgoing() && ref.tryIncRef()
   662  		n.mu.RUnlock()
   663  		if ok {
   664  			r.RemoteAddress = src
   665  			// TODO(b/123449044): Update the source NIC as well.
   666  			ref.ep.HandlePacket(&r, vv)
   667  			ref.decRef()
   668  		} else {
   669  			// n doesn't have a destination endpoint.
   670  			// Send the packet out of n.
   671  			hdr := buffer.NewPrependableFromView(vv.First())
   672  			vv.RemoveFirst()
   673  
   674  			// TODO(b/128629022): use route.WritePacket.
   675  			if err := n.linkEP.WritePacket(&r, nil /* gso */, hdr, vv, protocol); err != nil {
   676  				r.Stats().IP.OutgoingPacketErrors.Increment()
   677  			} else {
   678  				n.stats.Tx.Packets.Increment()
   679  				n.stats.Tx.Bytes.IncrementBy(uint64(hdr.UsedLength() + vv.Size()))
   680  			}
   681  		}
   682  		return
   683  	}
   684  
   685  	n.stack.stats.IP.InvalidAddressesReceived.Increment()
   686  }
   687  
   688  // DeliverTransportPacket delivers the packets to the appropriate transport
   689  // protocol endpoint.
   690  func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, netHeader buffer.View, vv buffer.VectorisedView) {
   691  	state, ok := n.stack.transportProtocols[protocol]
   692  	if !ok {
   693  		n.stack.stats.UnknownProtocolRcvdPackets.Increment()
   694  		return
   695  	}
   696  
   697  	transProto := state.proto
   698  
   699  	// Raw socket packets are delivered based solely on the transport
   700  	// protocol number. We do not inspect the payload to ensure it's
   701  	// validly formed.
   702  	n.stack.demux.deliverRawPacket(r, protocol, netHeader, vv)
   703  
   704  	if len(vv.First()) < transProto.MinimumPacketSize() {
   705  		n.stack.stats.MalformedRcvdPackets.Increment()
   706  		return
   707  	}
   708  
   709  	srcPort, dstPort, err := transProto.ParsePorts(vv.First())
   710  	if err != nil {
   711  		n.stack.stats.MalformedRcvdPackets.Increment()
   712  		return
   713  	}
   714  
   715  	id := TransportEndpointID{dstPort, r.LocalAddress, srcPort, r.RemoteAddress}
   716  	if n.stack.demux.deliverPacket(r, protocol, netHeader, vv, id) {
   717  		return
   718  	}
   719  
   720  	// Try to deliver to per-stack default handler.
   721  	if state.defaultHandler != nil {
   722  		if state.defaultHandler(r, id, netHeader, vv) {
   723  			return
   724  		}
   725  	}
   726  
   727  	// We could not find an appropriate destination for this packet, so
   728  	// deliver it to the global handler.
   729  	if !transProto.HandleUnknownDestinationPacket(r, id, netHeader, vv) {
   730  		n.stack.stats.MalformedRcvdPackets.Increment()
   731  	}
   732  }
   733  
   734  // DeliverTransportControlPacket delivers control packets to the appropriate
   735  // transport protocol endpoint.
   736  func (n *NIC) DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, vv buffer.VectorisedView) {
   737  	state, ok := n.stack.transportProtocols[trans]
   738  	if !ok {
   739  		return
   740  	}
   741  
   742  	transProto := state.proto
   743  
   744  	// ICMPv4 only guarantees that 8 bytes of the transport protocol will
   745  	// be present in the payload. We know that the ports are within the
   746  	// first 8 bytes for all known transport protocols.
   747  	if len(vv.First()) < 8 {
   748  		return
   749  	}
   750  
   751  	srcPort, dstPort, err := transProto.ParsePorts(vv.First())
   752  	if err != nil {
   753  		return
   754  	}
   755  
   756  	id := TransportEndpointID{srcPort, local, dstPort, remote}
   757  	if n.stack.demux.deliverControlPacket(n, net, trans, typ, extra, vv, id) {
   758  		return
   759  	}
   760  }
   761  
   762  // ID returns the identifier of n.
   763  func (n *NIC) ID() tcpip.NICID {
   764  	return n.id
   765  }
   766  
   767  // Stack returns the instance of the Stack that owns this NIC.
   768  func (n *NIC) Stack() *Stack {
   769  	return n.stack
   770  }
   771  
   772  type networkEndpointKind int32
   773  
   774  const (
   775  	// A permanent endpoint is created by adding a permanent address (vs. a
   776  	// temporary one) to the NIC. Its reference count is biased by 1 to avoid
   777  	// removal when no route holds a reference to it. It is removed by explicitly
   778  	// removing the permanent address from the NIC.
   779  	permanent networkEndpointKind = iota
   780  
   781  	// An expired permanent endoint is a permanent endoint that had its address
   782  	// removed from the NIC, and it is waiting to be removed once no more routes
   783  	// hold a reference to it. This is achieved by decreasing its reference count
   784  	// by 1. If its address is re-added before the endpoint is removed, its type
   785  	// changes back to permanent and its reference count increases by 1 again.
   786  	permanentExpired
   787  
   788  	// A temporary endpoint is created for spoofing outgoing packets, or when in
   789  	// promiscuous mode and accepting incoming packets that don't match any
   790  	// permanent endpoint. Its reference count is not biased by 1 and the
   791  	// endpoint is removed immediately when no more route holds a reference to
   792  	// it. A temporary endpoint can be promoted to permanent if its address
   793  	// is added permanently.
   794  	temporary
   795  )
   796  
   797  type referencedNetworkEndpoint struct {
   798  	ilist.Entry
   799  	ep       NetworkEndpoint
   800  	nic      *NIC
   801  	protocol tcpip.NetworkProtocolNumber
   802  
   803  	// linkCache is set if link address resolution is enabled for this
   804  	// protocol. Set to nil otherwise.
   805  	linkCache LinkAddressCache
   806  
   807  	// refs is counting references held for this endpoint. When refs hits zero it
   808  	// triggers the automatic removal of the endpoint from the NIC.
   809  	refs int32
   810  
   811  	// networkEndpointKind must only be accessed using {get,set}Kind().
   812  	kind networkEndpointKind
   813  }
   814  
   815  func (r *referencedNetworkEndpoint) getKind() networkEndpointKind {
   816  	return networkEndpointKind(atomic.LoadInt32((*int32)(&r.kind)))
   817  }
   818  
   819  func (r *referencedNetworkEndpoint) setKind(kind networkEndpointKind) {
   820  	atomic.StoreInt32((*int32)(&r.kind), int32(kind))
   821  }
   822  
   823  // isValidForOutgoing returns true if the endpoint can be used to send out a
   824  // packet. It requires the endpoint to not be marked expired (i.e., its address
   825  // has been removed), or the NIC to be in spoofing mode.
   826  func (r *referencedNetworkEndpoint) isValidForOutgoing() bool {
   827  	return r.getKind() != permanentExpired || r.nic.spoofing
   828  }
   829  
   830  // isValidForIncoming returns true if the endpoint can accept an incoming
   831  // packet. It requires the endpoint to not be marked expired (i.e., its address
   832  // has been removed), or the NIC to be in promiscuous mode.
   833  func (r *referencedNetworkEndpoint) isValidForIncoming() bool {
   834  	return r.getKind() != permanentExpired || r.nic.promiscuous
   835  }
   836  
   837  // decRef decrements the ref count and cleans up the endpoint once it reaches
   838  // zero.
   839  func (r *referencedNetworkEndpoint) decRef() {
   840  	if atomic.AddInt32(&r.refs, -1) == 0 {
   841  		r.nic.removeEndpoint(r)
   842  	}
   843  }
   844  
   845  // decRefLocked is the same as decRef but assumes that the NIC.mu mutex is
   846  // locked. Returns true if the endpoint was removed.
   847  func (r *referencedNetworkEndpoint) decRefLocked() bool {
   848  	if atomic.AddInt32(&r.refs, -1) == 0 {
   849  		r.nic.removeEndpointLocked(r)
   850  		return true
   851  	}
   852  
   853  	return false
   854  }
   855  
   856  // incRef increments the ref count. It must only be called when the caller is
   857  // known to be holding a reference to the endpoint, otherwise tryIncRef should
   858  // be used.
   859  func (r *referencedNetworkEndpoint) incRef() {
   860  	atomic.AddInt32(&r.refs, 1)
   861  }
   862  
   863  // tryIncRef attempts to increment the ref count from n to n+1, but only if n is
   864  // not zero. That is, it will increment the count if the endpoint is still
   865  // alive, and do nothing if it has already been clean up.
   866  func (r *referencedNetworkEndpoint) tryIncRef() bool {
   867  	for {
   868  		v := atomic.LoadInt32(&r.refs)
   869  		if v == 0 {
   870  			return false
   871  		}
   872  
   873  		if atomic.CompareAndSwapInt32(&r.refs, v, v+1) {
   874  			return true
   875  		}
   876  	}
   877  }
   878  
   879  // stack returns the Stack instance that owns the underlying endpoint.
   880  func (r *referencedNetworkEndpoint) stack() *Stack {
   881  	return r.nic.stack
   882  }