github.com/vpnishe/netstack@v1.10.6/tcpip/stack/nic.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package stack
    16  
    17  import (
    18  	"strings"
    19  	"sync"
    20  	"sync/atomic"
    21  
    22  	"github.com/vpnishe/netstack/tcpip"
    23  	"github.com/vpnishe/netstack/tcpip/buffer"
    24  	"github.com/vpnishe/netstack/tcpip/header"
    25  )
    26  
    27  // NIC represents a "network interface card" to which the networking stack is
    28  // attached.
    29  type NIC struct {
    30  	stack    *Stack
    31  	id       tcpip.NICID
    32  	name     string
    33  	linkEP   LinkEndpoint
    34  	loopback bool
    35  
    36  	mu            sync.RWMutex
    37  	spoofing      bool
    38  	promiscuous   bool
    39  	primary       map[tcpip.NetworkProtocolNumber][]*referencedNetworkEndpoint
    40  	endpoints     map[NetworkEndpointID]*referencedNetworkEndpoint
    41  	addressRanges []tcpip.Subnet
    42  	mcastJoins    map[NetworkEndpointID]int32
    43  	// packetEPs is protected by mu, but the contained PacketEndpoint
    44  	// values are not.
    45  	packetEPs map[tcpip.NetworkProtocolNumber][]PacketEndpoint
    46  
    47  	stats NICStats
    48  
    49  	// ndp is the NDP related state for NIC.
    50  	//
    51  	// Note, read and write operations on ndp require that the NIC is
    52  	// appropriately locked.
    53  	ndp ndpState
    54  }
    55  
    56  // NICStats includes transmitted and received stats.
    57  type NICStats struct {
    58  	Tx DirectionStats
    59  	Rx DirectionStats
    60  }
    61  
    62  // DirectionStats includes packet and byte counts.
    63  type DirectionStats struct {
    64  	Packets *tcpip.StatCounter
    65  	Bytes   *tcpip.StatCounter
    66  }
    67  
    68  // PrimaryEndpointBehavior is an enumeration of an endpoint's primacy behavior.
    69  type PrimaryEndpointBehavior int
    70  
    71  const (
    72  	// CanBePrimaryEndpoint indicates the endpoint can be used as a primary
    73  	// endpoint for new connections with no local address. This is the
    74  	// default when calling NIC.AddAddress.
    75  	CanBePrimaryEndpoint PrimaryEndpointBehavior = iota
    76  
    77  	// FirstPrimaryEndpoint indicates the endpoint should be the first
    78  	// primary endpoint considered. If there are multiple endpoints with
    79  	// this behavior, the most recently-added one will be first.
    80  	FirstPrimaryEndpoint
    81  
    82  	// NeverPrimaryEndpoint indicates the endpoint should never be a
    83  	// primary endpoint.
    84  	NeverPrimaryEndpoint
    85  )
    86  
    87  // newNIC returns a new NIC using the default NDP configurations from stack.
    88  func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, loopback bool) *NIC {
    89  	// TODO(b/141011931): Validate a LinkEndpoint (ep) is valid. For
    90  	// example, make sure that the link address it provides is a valid
    91  	// unicast ethernet address.
    92  
    93  	// TODO(b/143357959): RFC 8200 section 5 requires that IPv6 endpoints
    94  	// observe an MTU of at least 1280 bytes. Ensure that this requirement
    95  	// of IPv6 is supported on this endpoint's LinkEndpoint.
    96  
    97  	nic := &NIC{
    98  		stack:      stack,
    99  		id:         id,
   100  		name:       name,
   101  		linkEP:     ep,
   102  		loopback:   loopback,
   103  		primary:    make(map[tcpip.NetworkProtocolNumber][]*referencedNetworkEndpoint),
   104  		endpoints:  make(map[NetworkEndpointID]*referencedNetworkEndpoint),
   105  		mcastJoins: make(map[NetworkEndpointID]int32),
   106  		packetEPs:  make(map[tcpip.NetworkProtocolNumber][]PacketEndpoint),
   107  		stats: NICStats{
   108  			Tx: DirectionStats{
   109  				Packets: &tcpip.StatCounter{},
   110  				Bytes:   &tcpip.StatCounter{},
   111  			},
   112  			Rx: DirectionStats{
   113  				Packets: &tcpip.StatCounter{},
   114  				Bytes:   &tcpip.StatCounter{},
   115  			},
   116  		},
   117  		ndp: ndpState{
   118  			configs:        stack.ndpConfigs,
   119  			dad:            make(map[tcpip.Address]dadState),
   120  			defaultRouters: make(map[tcpip.Address]defaultRouterState),
   121  			onLinkPrefixes: make(map[tcpip.Subnet]onLinkPrefixState),
   122  		},
   123  	}
   124  	nic.ndp.nic = nic
   125  
   126  	// Register supported packet endpoint protocols.
   127  	for _, netProto := range header.Ethertypes {
   128  		nic.packetEPs[netProto] = []PacketEndpoint{}
   129  	}
   130  	for _, netProto := range stack.networkProtocols {
   131  		nic.packetEPs[netProto.Number()] = []PacketEndpoint{}
   132  	}
   133  
   134  	return nic
   135  }
   136  
   137  // enable enables the NIC. enable will attach the link to its LinkEndpoint and
   138  // join the IPv6 All-Nodes Multicast address (ff02::1).
   139  func (n *NIC) enable() *tcpip.Error {
   140  	n.attachLinkEndpoint()
   141  
   142  	// Create an endpoint to receive broadcast packets on this interface.
   143  	if _, ok := n.stack.networkProtocols[header.IPv4ProtocolNumber]; ok {
   144  		if err := n.AddAddress(tcpip.ProtocolAddress{
   145  			Protocol:          header.IPv4ProtocolNumber,
   146  			AddressWithPrefix: tcpip.AddressWithPrefix{header.IPv4Broadcast, 8 * header.IPv4AddressSize},
   147  		}, NeverPrimaryEndpoint); err != nil {
   148  			return err
   149  		}
   150  	}
   151  
   152  	// Join the IPv6 All-Nodes Multicast group if the stack is configured to
   153  	// use IPv6. This is required to ensure that this node properly receives
   154  	// and responds to the various NDP messages that are destined to the
   155  	// all-nodes multicast address. An example is the Neighbor Advertisement
   156  	// when we perform Duplicate Address Detection, or Router Advertisement
   157  	// when we do Router Discovery. See RFC 4862, section 5.4.2 and RFC 4861
   158  	// section 4.2 for more information.
   159  	//
   160  	// Also auto-generate an IPv6 link-local address based on the NIC's
   161  	// link address if it is configured to do so. Note, each interface is
   162  	// required to have IPv6 link-local unicast address, as per RFC 4291
   163  	// section 2.1.
   164  	_, ok := n.stack.networkProtocols[header.IPv6ProtocolNumber]
   165  	if !ok {
   166  		return nil
   167  	}
   168  
   169  	n.mu.Lock()
   170  	defer n.mu.Unlock()
   171  
   172  	if err := n.joinGroupLocked(header.IPv6ProtocolNumber, header.IPv6AllNodesMulticastAddress); err != nil {
   173  		return err
   174  	}
   175  
   176  	if !n.stack.autoGenIPv6LinkLocal {
   177  		return nil
   178  	}
   179  
   180  	l2addr := n.linkEP.LinkAddress()
   181  
   182  	// Only attempt to generate the link-local address if we have a
   183  	// valid MAC address.
   184  	//
   185  	// TODO(b/141011931): Validate a LinkEndpoint's link address
   186  	// (provided by LinkEndpoint.LinkAddress) before reaching this
   187  	// point.
   188  	if !header.IsValidUnicastEthernetAddress(l2addr) {
   189  		return nil
   190  	}
   191  
   192  	addr := header.LinkLocalAddr(l2addr)
   193  
   194  	_, err := n.addPermanentAddressLocked(tcpip.ProtocolAddress{
   195  		Protocol: header.IPv6ProtocolNumber,
   196  		AddressWithPrefix: tcpip.AddressWithPrefix{
   197  			Address:   addr,
   198  			PrefixLen: header.IPv6LinkLocalPrefix.PrefixLen,
   199  		},
   200  	}, CanBePrimaryEndpoint)
   201  
   202  	return err
   203  }
   204  
   205  // attachLinkEndpoint attaches the NIC to the endpoint, which will enable it
   206  // to start delivering packets.
   207  func (n *NIC) attachLinkEndpoint() {
   208  	n.linkEP.Attach(n)
   209  }
   210  
   211  // setPromiscuousMode enables or disables promiscuous mode.
   212  func (n *NIC) setPromiscuousMode(enable bool) {
   213  	n.mu.Lock()
   214  	n.promiscuous = enable
   215  	n.mu.Unlock()
   216  }
   217  
   218  func (n *NIC) isPromiscuousMode() bool {
   219  	n.mu.RLock()
   220  	rv := n.promiscuous
   221  	n.mu.RUnlock()
   222  	return rv
   223  }
   224  
   225  // setSpoofing enables or disables address spoofing.
   226  func (n *NIC) setSpoofing(enable bool) {
   227  	n.mu.Lock()
   228  	n.spoofing = enable
   229  	n.mu.Unlock()
   230  }
   231  
   232  // primaryEndpoint returns the primary endpoint of n for the given network
   233  // protocol.
   234  func (n *NIC) primaryEndpoint(protocol tcpip.NetworkProtocolNumber) *referencedNetworkEndpoint {
   235  	n.mu.RLock()
   236  	defer n.mu.RUnlock()
   237  
   238  	for _, r := range n.primary[protocol] {
   239  		if r.isValidForOutgoing() && r.tryIncRef() {
   240  			return r
   241  		}
   242  	}
   243  
   244  	return nil
   245  }
   246  
   247  func (n *NIC) getRef(protocol tcpip.NetworkProtocolNumber, dst tcpip.Address) *referencedNetworkEndpoint {
   248  	return n.getRefOrCreateTemp(protocol, dst, CanBePrimaryEndpoint, n.promiscuous)
   249  }
   250  
   251  // findEndpoint finds the endpoint, if any, with the given address.
   252  func (n *NIC) findEndpoint(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior) *referencedNetworkEndpoint {
   253  	return n.getRefOrCreateTemp(protocol, address, peb, n.spoofing)
   254  }
   255  
   256  // getRefEpOrCreateTemp returns the referenced network endpoint for the given
   257  // protocol and address. If none exists a temporary one may be created if
   258  // we are in promiscuous mode or spoofing.
   259  func (n *NIC) getRefOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior, spoofingOrPromiscuous bool) *referencedNetworkEndpoint {
   260  	id := NetworkEndpointID{address}
   261  
   262  	n.mu.RLock()
   263  
   264  	if ref, ok := n.endpoints[id]; ok {
   265  		// An endpoint with this id exists, check if it can be used and return it.
   266  		switch ref.getKind() {
   267  		case permanentExpired:
   268  			if !spoofingOrPromiscuous {
   269  				n.mu.RUnlock()
   270  				return nil
   271  			}
   272  			fallthrough
   273  		case temporary, permanent:
   274  			if ref.tryIncRef() {
   275  				n.mu.RUnlock()
   276  				return ref
   277  			}
   278  		}
   279  	}
   280  
   281  	// A usable reference was not found, create a temporary one if requested by
   282  	// the caller or if the address is found in the NIC's subnets.
   283  	createTempEP := spoofingOrPromiscuous
   284  	if !createTempEP {
   285  		for _, sn := range n.addressRanges {
   286  			// Skip the subnet address.
   287  			if address == sn.ID() {
   288  				continue
   289  			}
   290  			// For now just skip the broadcast address, until we support it.
   291  			// FIXME(b/137608825): Add support for sending/receiving directed
   292  			// (subnet) broadcast.
   293  			if address == sn.Broadcast() {
   294  				continue
   295  			}
   296  			if sn.Contains(address) {
   297  				createTempEP = true
   298  				break
   299  			}
   300  		}
   301  	}
   302  
   303  	n.mu.RUnlock()
   304  
   305  	if !createTempEP {
   306  		return nil
   307  	}
   308  
   309  	// Try again with the lock in exclusive mode. If we still can't get the
   310  	// endpoint, create a new "temporary" endpoint. It will only exist while
   311  	// there's a route through it.
   312  	n.mu.Lock()
   313  	if ref, ok := n.endpoints[id]; ok {
   314  		// No need to check the type as we are ok with expired endpoints at this
   315  		// point.
   316  		if ref.tryIncRef() {
   317  			n.mu.Unlock()
   318  			return ref
   319  		}
   320  		// tryIncRef failing means the endpoint is scheduled to be removed once the
   321  		// lock is released. Remove it here so we can create a new (temporary) one.
   322  		// The removal logic waiting for the lock handles this case.
   323  		n.removeEndpointLocked(ref)
   324  	}
   325  
   326  	// Add a new temporary endpoint.
   327  	netProto, ok := n.stack.networkProtocols[protocol]
   328  	if !ok {
   329  		n.mu.Unlock()
   330  		return nil
   331  	}
   332  	ref, _ := n.addAddressLocked(tcpip.ProtocolAddress{
   333  		Protocol: protocol,
   334  		AddressWithPrefix: tcpip.AddressWithPrefix{
   335  			Address:   address,
   336  			PrefixLen: netProto.DefaultPrefixLen(),
   337  		},
   338  	}, peb, temporary)
   339  
   340  	n.mu.Unlock()
   341  	return ref
   342  }
   343  
   344  func (n *NIC) addPermanentAddressLocked(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) (*referencedNetworkEndpoint, *tcpip.Error) {
   345  	id := NetworkEndpointID{protocolAddress.AddressWithPrefix.Address}
   346  	if ref, ok := n.endpoints[id]; ok {
   347  		switch ref.getKind() {
   348  		case permanentTentative, permanent:
   349  			// The NIC already have a permanent endpoint with that address.
   350  			return nil, tcpip.ErrDuplicateAddress
   351  		case permanentExpired, temporary:
   352  			// Promote the endpoint to become permanent and respect
   353  			// the new peb.
   354  			if ref.tryIncRef() {
   355  				ref.setKind(permanent)
   356  
   357  				refs := n.primary[ref.protocol]
   358  				for i, r := range refs {
   359  					if r == ref {
   360  						switch peb {
   361  						case CanBePrimaryEndpoint:
   362  							return ref, nil
   363  						case FirstPrimaryEndpoint:
   364  							if i == 0 {
   365  								return ref, nil
   366  							}
   367  							n.primary[r.protocol] = append(refs[:i], refs[i+1:]...)
   368  						case NeverPrimaryEndpoint:
   369  							n.primary[r.protocol] = append(refs[:i], refs[i+1:]...)
   370  							return ref, nil
   371  						}
   372  					}
   373  				}
   374  
   375  				n.insertPrimaryEndpointLocked(ref, peb)
   376  
   377  				return ref, nil
   378  			}
   379  			// tryIncRef failing means the endpoint is scheduled to be removed once
   380  			// the lock is released. Remove it here so we can create a new
   381  			// (permanent) one. The removal logic waiting for the lock handles this
   382  			// case.
   383  			n.removeEndpointLocked(ref)
   384  		}
   385  	}
   386  
   387  	return n.addAddressLocked(protocolAddress, peb, permanent)
   388  }
   389  
   390  func (n *NIC) addAddressLocked(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior, kind networkEndpointKind) (*referencedNetworkEndpoint, *tcpip.Error) {
   391  	// TODO(b/141022673): Validate IP address before adding them.
   392  
   393  	// Sanity check.
   394  	id := NetworkEndpointID{protocolAddress.AddressWithPrefix.Address}
   395  	if _, ok := n.endpoints[id]; ok {
   396  		// Endpoint already exists.
   397  		return nil, tcpip.ErrDuplicateAddress
   398  	}
   399  
   400  	netProto, ok := n.stack.networkProtocols[protocolAddress.Protocol]
   401  	if !ok {
   402  		return nil, tcpip.ErrUnknownProtocol
   403  	}
   404  
   405  	// Create the new network endpoint.
   406  	ep, err := netProto.NewEndpoint(n.id, protocolAddress.AddressWithPrefix, n.stack, n, n.linkEP)
   407  	if err != nil {
   408  		return nil, err
   409  	}
   410  
   411  	isIPv6Unicast := protocolAddress.Protocol == header.IPv6ProtocolNumber && header.IsV6UnicastAddress(protocolAddress.AddressWithPrefix.Address)
   412  
   413  	// If the address is an IPv6 address and it is a permanent address,
   414  	// mark it as tentative so it goes through the DAD process.
   415  	if isIPv6Unicast && kind == permanent {
   416  		kind = permanentTentative
   417  	}
   418  
   419  	ref := &referencedNetworkEndpoint{
   420  		refs:     1,
   421  		ep:       ep,
   422  		nic:      n,
   423  		protocol: protocolAddress.Protocol,
   424  		kind:     kind,
   425  	}
   426  
   427  	// Set up cache if link address resolution exists for this protocol.
   428  	if n.linkEP.Capabilities()&CapabilityResolutionRequired != 0 {
   429  		if _, ok := n.stack.linkAddrResolvers[protocolAddress.Protocol]; ok {
   430  			ref.linkCache = n.stack
   431  		}
   432  	}
   433  
   434  	// If we are adding an IPv6 unicast address, join the solicited-node
   435  	// multicast address.
   436  	if isIPv6Unicast {
   437  		snmc := header.SolicitedNodeAddr(protocolAddress.AddressWithPrefix.Address)
   438  		if err := n.joinGroupLocked(protocolAddress.Protocol, snmc); err != nil {
   439  			return nil, err
   440  		}
   441  	}
   442  
   443  	n.endpoints[id] = ref
   444  
   445  	n.insertPrimaryEndpointLocked(ref, peb)
   446  
   447  	// If we are adding a tentative IPv6 address, start DAD.
   448  	if isIPv6Unicast && kind == permanentTentative {
   449  		if err := n.ndp.startDuplicateAddressDetection(protocolAddress.AddressWithPrefix.Address, ref); err != nil {
   450  			return nil, err
   451  		}
   452  	}
   453  
   454  	return ref, nil
   455  }
   456  
   457  // AddAddress adds a new address to n, so that it starts accepting packets
   458  // targeted at the given address (and network protocol).
   459  func (n *NIC) AddAddress(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) *tcpip.Error {
   460  	// Add the endpoint.
   461  	n.mu.Lock()
   462  	_, err := n.addPermanentAddressLocked(protocolAddress, peb)
   463  	n.mu.Unlock()
   464  
   465  	return err
   466  }
   467  
   468  // AllAddresses returns all addresses (primary and non-primary) associated with
   469  // this NIC.
   470  func (n *NIC) AllAddresses() []tcpip.ProtocolAddress {
   471  	n.mu.RLock()
   472  	defer n.mu.RUnlock()
   473  
   474  	addrs := make([]tcpip.ProtocolAddress, 0, len(n.endpoints))
   475  	for nid, ref := range n.endpoints {
   476  		// Don't include tentative, expired or temporary endpoints to
   477  		// avoid confusion and prevent the caller from using those.
   478  		switch ref.getKind() {
   479  		case permanentTentative, permanentExpired, temporary:
   480  			// TODO(b/140898488): Should tentative addresses be
   481  			//                    returned?
   482  			continue
   483  		}
   484  		addrs = append(addrs, tcpip.ProtocolAddress{
   485  			Protocol: ref.protocol,
   486  			AddressWithPrefix: tcpip.AddressWithPrefix{
   487  				Address:   nid.LocalAddress,
   488  				PrefixLen: ref.ep.PrefixLen(),
   489  			},
   490  		})
   491  	}
   492  	return addrs
   493  }
   494  
   495  // PrimaryAddresses returns the primary addresses associated with this NIC.
   496  func (n *NIC) PrimaryAddresses() []tcpip.ProtocolAddress {
   497  	n.mu.RLock()
   498  	defer n.mu.RUnlock()
   499  
   500  	var addrs []tcpip.ProtocolAddress
   501  	for proto, list := range n.primary {
   502  		for _, ref := range list {
   503  			// Don't include tentative, expired or tempory endpoints
   504  			// to avoid confusion and prevent the caller from using
   505  			// those.
   506  			switch ref.getKind() {
   507  			case permanentTentative, permanentExpired, temporary:
   508  				continue
   509  			}
   510  
   511  			addrs = append(addrs, tcpip.ProtocolAddress{
   512  				Protocol: proto,
   513  				AddressWithPrefix: tcpip.AddressWithPrefix{
   514  					Address:   ref.ep.ID().LocalAddress,
   515  					PrefixLen: ref.ep.PrefixLen(),
   516  				},
   517  			})
   518  		}
   519  	}
   520  	return addrs
   521  }
   522  
   523  // AddAddressRange adds a range of addresses to n, so that it starts accepting
   524  // packets targeted at the given addresses and network protocol. The range is
   525  // given by a subnet address, and all addresses contained in the subnet are
   526  // used except for the subnet address itself and the subnet's broadcast
   527  // address.
   528  func (n *NIC) AddAddressRange(protocol tcpip.NetworkProtocolNumber, subnet tcpip.Subnet) {
   529  	n.mu.Lock()
   530  	n.addressRanges = append(n.addressRanges, subnet)
   531  	n.mu.Unlock()
   532  }
   533  
   534  // RemoveAddressRange removes the given address range from n.
   535  func (n *NIC) RemoveAddressRange(subnet tcpip.Subnet) {
   536  	n.mu.Lock()
   537  
   538  	// Use the same underlying array.
   539  	tmp := n.addressRanges[:0]
   540  	for _, sub := range n.addressRanges {
   541  		if sub != subnet {
   542  			tmp = append(tmp, sub)
   543  		}
   544  	}
   545  	n.addressRanges = tmp
   546  
   547  	n.mu.Unlock()
   548  }
   549  
   550  // Subnets returns the Subnets associated with this NIC.
   551  func (n *NIC) AddressRanges() []tcpip.Subnet {
   552  	n.mu.RLock()
   553  	defer n.mu.RUnlock()
   554  	sns := make([]tcpip.Subnet, 0, len(n.addressRanges)+len(n.endpoints))
   555  	for nid := range n.endpoints {
   556  		sn, err := tcpip.NewSubnet(nid.LocalAddress, tcpip.AddressMask(strings.Repeat("\xff", len(nid.LocalAddress))))
   557  		if err != nil {
   558  			// This should never happen as the mask has been carefully crafted to
   559  			// match the address.
   560  			panic("Invalid endpoint subnet: " + err.Error())
   561  		}
   562  		sns = append(sns, sn)
   563  	}
   564  	return append(sns, n.addressRanges...)
   565  }
   566  
   567  // insertPrimaryEndpointLocked adds r to n's primary endpoint list as required
   568  // by peb.
   569  //
   570  // n MUST be locked.
   571  func (n *NIC) insertPrimaryEndpointLocked(r *referencedNetworkEndpoint, peb PrimaryEndpointBehavior) {
   572  	switch peb {
   573  	case CanBePrimaryEndpoint:
   574  		n.primary[r.protocol] = append(n.primary[r.protocol], r)
   575  	case FirstPrimaryEndpoint:
   576  		n.primary[r.protocol] = append([]*referencedNetworkEndpoint{r}, n.primary[r.protocol]...)
   577  	}
   578  }
   579  
   580  func (n *NIC) removeEndpointLocked(r *referencedNetworkEndpoint) {
   581  	id := *r.ep.ID()
   582  
   583  	// Nothing to do if the reference has already been replaced with a different
   584  	// one. This happens in the case where 1) this endpoint's ref count hit zero
   585  	// and was waiting (on the lock) to be removed and 2) the same address was
   586  	// re-added in the meantime by removing this endpoint from the list and
   587  	// adding a new one.
   588  	if n.endpoints[id] != r {
   589  		return
   590  	}
   591  
   592  	if r.getKind() == permanent {
   593  		panic("Reference count dropped to zero before being removed")
   594  	}
   595  
   596  	delete(n.endpoints, id)
   597  	refs := n.primary[r.protocol]
   598  	for i, ref := range refs {
   599  		if ref == r {
   600  			n.primary[r.protocol] = append(refs[:i], refs[i+1:]...)
   601  			break
   602  		}
   603  	}
   604  
   605  	r.ep.Close()
   606  }
   607  
   608  func (n *NIC) removeEndpoint(r *referencedNetworkEndpoint) {
   609  	n.mu.Lock()
   610  	n.removeEndpointLocked(r)
   611  	n.mu.Unlock()
   612  }
   613  
   614  func (n *NIC) removePermanentAddressLocked(addr tcpip.Address) *tcpip.Error {
   615  	r, ok := n.endpoints[NetworkEndpointID{addr}]
   616  	if !ok {
   617  		return tcpip.ErrBadLocalAddress
   618  	}
   619  
   620  	kind := r.getKind()
   621  	if kind != permanent && kind != permanentTentative {
   622  		return tcpip.ErrBadLocalAddress
   623  	}
   624  
   625  	isIPv6Unicast := r.protocol == header.IPv6ProtocolNumber && header.IsV6UnicastAddress(addr)
   626  
   627  	// If we are removing a tentative IPv6 unicast address, stop DAD.
   628  	if isIPv6Unicast && kind == permanentTentative {
   629  		n.ndp.stopDuplicateAddressDetection(addr)
   630  	}
   631  
   632  	r.setKind(permanentExpired)
   633  	if !r.decRefLocked() {
   634  		// The endpoint still has references to it.
   635  		return nil
   636  	}
   637  
   638  	// At this point the endpoint is deleted.
   639  
   640  	// If we are removing an IPv6 unicast address, leave the solicited-node
   641  	// multicast address.
   642  	if isIPv6Unicast {
   643  		snmc := header.SolicitedNodeAddr(addr)
   644  		if err := n.leaveGroupLocked(snmc); err != nil {
   645  			return err
   646  		}
   647  	}
   648  
   649  	return nil
   650  }
   651  
   652  // RemoveAddress removes an address from n.
   653  func (n *NIC) RemoveAddress(addr tcpip.Address) *tcpip.Error {
   654  	n.mu.Lock()
   655  	defer n.mu.Unlock()
   656  	return n.removePermanentAddressLocked(addr)
   657  }
   658  
   659  // joinGroup adds a new endpoint for the given multicast address, if none
   660  // exists yet. Otherwise it just increments its count.
   661  func (n *NIC) joinGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error {
   662  	n.mu.Lock()
   663  	defer n.mu.Unlock()
   664  
   665  	return n.joinGroupLocked(protocol, addr)
   666  }
   667  
   668  // joinGroupLocked adds a new endpoint for the given multicast address, if none
   669  // exists yet. Otherwise it just increments its count. n MUST be locked before
   670  // joinGroupLocked is called.
   671  func (n *NIC) joinGroupLocked(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error {
   672  	// TODO(b/143102137): When implementing MLD, make sure MLD packets are
   673  	// not sent unless a valid link-local address is available for use on n
   674  	// as an MLD packet's source address must be a link-local address as
   675  	// outlined in RFC 3810 section 5.
   676  
   677  	id := NetworkEndpointID{addr}
   678  	joins := n.mcastJoins[id]
   679  	if joins == 0 {
   680  		netProto, ok := n.stack.networkProtocols[protocol]
   681  		if !ok {
   682  			return tcpip.ErrUnknownProtocol
   683  		}
   684  		if _, err := n.addPermanentAddressLocked(tcpip.ProtocolAddress{
   685  			Protocol: protocol,
   686  			AddressWithPrefix: tcpip.AddressWithPrefix{
   687  				Address:   addr,
   688  				PrefixLen: netProto.DefaultPrefixLen(),
   689  			},
   690  		}, NeverPrimaryEndpoint); err != nil {
   691  			return err
   692  		}
   693  	}
   694  	n.mcastJoins[id] = joins + 1
   695  	return nil
   696  }
   697  
   698  // leaveGroup decrements the count for the given multicast address, and when it
   699  // reaches zero removes the endpoint for this address.
   700  func (n *NIC) leaveGroup(addr tcpip.Address) *tcpip.Error {
   701  	n.mu.Lock()
   702  	defer n.mu.Unlock()
   703  
   704  	return n.leaveGroupLocked(addr)
   705  }
   706  
   707  // leaveGroupLocked decrements the count for the given multicast address, and
   708  // when it reaches zero removes the endpoint for this address. n MUST be locked
   709  // before leaveGroupLocked is called.
   710  func (n *NIC) leaveGroupLocked(addr tcpip.Address) *tcpip.Error {
   711  	id := NetworkEndpointID{addr}
   712  	joins := n.mcastJoins[id]
   713  	switch joins {
   714  	case 0:
   715  		// There are no joins with this address on this NIC.
   716  		return tcpip.ErrBadLocalAddress
   717  	case 1:
   718  		// This is the last one, clean up.
   719  		if err := n.removePermanentAddressLocked(addr); err != nil {
   720  			return err
   721  		}
   722  	}
   723  	n.mcastJoins[id] = joins - 1
   724  	return nil
   725  }
   726  
   727  func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address, localLinkAddr, remotelinkAddr tcpip.LinkAddress, ref *referencedNetworkEndpoint, pkt tcpip.PacketBuffer) {
   728  	r := makeRoute(protocol, dst, src, localLinkAddr, ref, false /* handleLocal */, false /* multicastLoop */)
   729  	r.RemoteLinkAddress = remotelinkAddr
   730  	ref.ep.HandlePacket(&r, pkt)
   731  	ref.decRef()
   732  }
   733  
   734  // DeliverNetworkPacket finds the appropriate network protocol endpoint and
   735  // hands the packet over for further processing. This function is called when
   736  // the NIC receives a packet from the physical interface.
   737  // Note that the ownership of the slice backing vv is retained by the caller.
   738  // This rule applies only to the slice itself, not to the items of the slice;
   739  // the ownership of the items is not retained by the caller.
   740  func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) {
   741  	n.stats.Rx.Packets.Increment()
   742  	n.stats.Rx.Bytes.IncrementBy(uint64(pkt.Data.Size()))
   743  
   744  	netProto, ok := n.stack.networkProtocols[protocol]
   745  	if !ok {
   746  		n.stack.stats.UnknownProtocolRcvdPackets.Increment()
   747  		return
   748  	}
   749  
   750  	// If no local link layer address is provided, assume it was sent
   751  	// directly to this NIC.
   752  	if local == "" {
   753  		local = n.linkEP.LinkAddress()
   754  	}
   755  
   756  	// Are any packet sockets listening for this network protocol?
   757  	n.mu.RLock()
   758  	packetEPs := n.packetEPs[protocol]
   759  	// Check whether there are packet sockets listening for every protocol.
   760  	// If we received a packet with protocol EthernetProtocolAll, then the
   761  	// previous for loop will have handled it.
   762  	if protocol != header.EthernetProtocolAll {
   763  		packetEPs = append(packetEPs, n.packetEPs[header.EthernetProtocolAll]...)
   764  	}
   765  	n.mu.RUnlock()
   766  	for _, ep := range packetEPs {
   767  		ep.HandlePacket(n.id, local, protocol, pkt.Clone())
   768  	}
   769  
   770  	if netProto.Number() == header.IPv4ProtocolNumber || netProto.Number() == header.IPv6ProtocolNumber {
   771  		n.stack.stats.IP.PacketsReceived.Increment()
   772  	}
   773  
   774  	if len(pkt.Data.First()) < netProto.MinimumPacketSize() {
   775  		n.stack.stats.MalformedRcvdPackets.Increment()
   776  		return
   777  	}
   778  
   779  	src, dst := netProto.ParseAddresses(pkt.Data.First())
   780  
   781  	if ref := n.getRef(protocol, dst); ref != nil {
   782  		handlePacket(protocol, dst, src, linkEP.LinkAddress(), remote, ref, pkt)
   783  		return
   784  	}
   785  
   786  	// This NIC doesn't care about the packet. Find a NIC that cares about the
   787  	// packet and forward it to the NIC.
   788  	//
   789  	// TODO: Should we be forwarding the packet even if promiscuous?
   790  	if n.stack.Forwarding() {
   791  		r, err := n.stack.FindRoute(0, "", dst, protocol, false /* multicastLoop */)
   792  		if err != nil {
   793  			n.stack.stats.IP.InvalidAddressesReceived.Increment()
   794  			return
   795  		}
   796  		defer r.Release()
   797  
   798  		r.LocalLinkAddress = n.linkEP.LinkAddress()
   799  		r.RemoteLinkAddress = remote
   800  
   801  		// Found a NIC.
   802  		n := r.ref.nic
   803  		n.mu.RLock()
   804  		ref, ok := n.endpoints[NetworkEndpointID{dst}]
   805  		ok = ok && ref.isValidForOutgoing() && ref.tryIncRef()
   806  		n.mu.RUnlock()
   807  		if ok {
   808  			r.RemoteAddress = src
   809  			// TODO(b/123449044): Update the source NIC as well.
   810  			ref.ep.HandlePacket(&r, pkt)
   811  			ref.decRef()
   812  		} else {
   813  			// n doesn't have a destination endpoint.
   814  			// Send the packet out of n.
   815  			pkt.Header = buffer.NewPrependableFromView(pkt.Data.First())
   816  			pkt.Data.RemoveFirst()
   817  
   818  			// TODO(b/128629022): use route.WritePacket.
   819  			if err := n.linkEP.WritePacket(&r, nil /* gso */, protocol, pkt); err != nil {
   820  				r.Stats().IP.OutgoingPacketErrors.Increment()
   821  			} else {
   822  				n.stats.Tx.Packets.Increment()
   823  				n.stats.Tx.Bytes.IncrementBy(uint64(pkt.Header.UsedLength() + pkt.Data.Size()))
   824  			}
   825  		}
   826  		return
   827  	}
   828  
   829  	// If a packet socket handled the packet, don't treat it as invalid.
   830  	if len(packetEPs) == 0 {
   831  		n.stack.stats.IP.InvalidAddressesReceived.Increment()
   832  	}
   833  }
   834  
   835  // DeliverTransportPacket delivers the packets to the appropriate transport
   836  // protocol endpoint.
   837  func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt tcpip.PacketBuffer) {
   838  	state, ok := n.stack.transportProtocols[protocol]
   839  	if !ok {
   840  		n.stack.stats.UnknownProtocolRcvdPackets.Increment()
   841  		return
   842  	}
   843  
   844  	transProto := state.proto
   845  
   846  	// Raw socket packets are delivered based solely on the transport
   847  	// protocol number. We do not inspect the payload to ensure it's
   848  	// validly formed.
   849  	n.stack.demux.deliverRawPacket(r, protocol, pkt)
   850  
   851  	if len(pkt.Data.First()) < transProto.MinimumPacketSize() {
   852  		n.stack.stats.MalformedRcvdPackets.Increment()
   853  		return
   854  	}
   855  
   856  	srcPort, dstPort, err := transProto.ParsePorts(pkt.Data.First())
   857  	if err != nil {
   858  		n.stack.stats.MalformedRcvdPackets.Increment()
   859  		return
   860  	}
   861  
   862  	id := TransportEndpointID{dstPort, r.LocalAddress, srcPort, r.RemoteAddress}
   863  	if n.stack.demux.deliverPacket(r, protocol, pkt, id) {
   864  		return
   865  	}
   866  
   867  	// Try to deliver to per-stack default handler.
   868  	if state.defaultHandler != nil {
   869  		if state.defaultHandler(r, id, pkt) {
   870  			return
   871  		}
   872  	}
   873  
   874  	// We could not find an appropriate destination for this packet, so
   875  	// deliver it to the global handler.
   876  	if !transProto.HandleUnknownDestinationPacket(r, id, pkt) {
   877  		n.stack.stats.MalformedRcvdPackets.Increment()
   878  	}
   879  }
   880  
   881  // DeliverTransportControlPacket delivers control packets to the appropriate
   882  // transport protocol endpoint.
   883  func (n *NIC) DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt tcpip.PacketBuffer) {
   884  	state, ok := n.stack.transportProtocols[trans]
   885  	if !ok {
   886  		return
   887  	}
   888  
   889  	transProto := state.proto
   890  
   891  	// ICMPv4 only guarantees that 8 bytes of the transport protocol will
   892  	// be present in the payload. We know that the ports are within the
   893  	// first 8 bytes for all known transport protocols.
   894  	if len(pkt.Data.First()) < 8 {
   895  		return
   896  	}
   897  
   898  	srcPort, dstPort, err := transProto.ParsePorts(pkt.Data.First())
   899  	if err != nil {
   900  		return
   901  	}
   902  
   903  	id := TransportEndpointID{srcPort, local, dstPort, remote}
   904  	if n.stack.demux.deliverControlPacket(n, net, trans, typ, extra, pkt, id) {
   905  		return
   906  	}
   907  }
   908  
   909  // ID returns the identifier of n.
   910  func (n *NIC) ID() tcpip.NICID {
   911  	return n.id
   912  }
   913  
   914  // Stack returns the instance of the Stack that owns this NIC.
   915  func (n *NIC) Stack() *Stack {
   916  	return n.stack
   917  }
   918  
   919  // isAddrTentative returns true if addr is tentative on n.
   920  //
   921  // Note that if addr is not associated with n, then this function will return
   922  // false. It will only return true if the address is associated with the NIC
   923  // AND it is tentative.
   924  func (n *NIC) isAddrTentative(addr tcpip.Address) bool {
   925  	ref, ok := n.endpoints[NetworkEndpointID{addr}]
   926  	if !ok {
   927  		return false
   928  	}
   929  
   930  	return ref.getKind() == permanentTentative
   931  }
   932  
   933  // dupTentativeAddrDetected attempts to inform n that a tentative addr
   934  // is a duplicate on a link.
   935  //
   936  // dupTentativeAddrDetected will delete the tentative address if it exists.
   937  func (n *NIC) dupTentativeAddrDetected(addr tcpip.Address) *tcpip.Error {
   938  	n.mu.Lock()
   939  	defer n.mu.Unlock()
   940  
   941  	ref, ok := n.endpoints[NetworkEndpointID{addr}]
   942  	if !ok {
   943  		return tcpip.ErrBadAddress
   944  	}
   945  
   946  	if ref.getKind() != permanentTentative {
   947  		return tcpip.ErrInvalidEndpointState
   948  	}
   949  
   950  	return n.removePermanentAddressLocked(addr)
   951  }
   952  
   953  // setNDPConfigs sets the NDP configurations for n.
   954  //
   955  // Note, if c contains invalid NDP configuration values, it will be fixed to
   956  // use default values for the erroneous values.
   957  func (n *NIC) setNDPConfigs(c NDPConfigurations) {
   958  	c.validate()
   959  
   960  	n.mu.Lock()
   961  	n.ndp.configs = c
   962  	n.mu.Unlock()
   963  }
   964  
   965  // handleNDPRA handles an NDP Router Advertisement message that arrived on n.
   966  func (n *NIC) handleNDPRA(ip tcpip.Address, ra header.NDPRouterAdvert) {
   967  	n.mu.Lock()
   968  	defer n.mu.Unlock()
   969  
   970  	n.ndp.handleRA(ip, ra)
   971  }
   972  
   973  type networkEndpointKind int32
   974  
   975  const (
   976  	// A permanentTentative endpoint is a permanent address that is not yet
   977  	// considered to be fully bound to an interface in the traditional
   978  	// sense. That is, the address is associated with a NIC, but packets
   979  	// destined to the address MUST NOT be accepted and MUST be silently
   980  	// dropped, and the address MUST NOT be used as a source address for
   981  	// outgoing packets. For IPv6, addresses will be of this kind until
   982  	// NDP's Duplicate Address Detection has resolved, or be deleted if
   983  	// the process results in detecting a duplicate address.
   984  	permanentTentative networkEndpointKind = iota
   985  
   986  	// A permanent endpoint is created by adding a permanent address (vs. a
   987  	// temporary one) to the NIC. Its reference count is biased by 1 to avoid
   988  	// removal when no route holds a reference to it. It is removed by explicitly
   989  	// removing the permanent address from the NIC.
   990  	permanent
   991  
   992  	// An expired permanent endoint is a permanent endoint that had its address
   993  	// removed from the NIC, and it is waiting to be removed once no more routes
   994  	// hold a reference to it. This is achieved by decreasing its reference count
   995  	// by 1. If its address is re-added before the endpoint is removed, its type
   996  	// changes back to permanent and its reference count increases by 1 again.
   997  	permanentExpired
   998  
   999  	// A temporary endpoint is created for spoofing outgoing packets, or when in
  1000  	// promiscuous mode and accepting incoming packets that don't match any
  1001  	// permanent endpoint. Its reference count is not biased by 1 and the
  1002  	// endpoint is removed immediately when no more route holds a reference to
  1003  	// it. A temporary endpoint can be promoted to permanent if its address
  1004  	// is added permanently.
  1005  	temporary
  1006  )
  1007  
  1008  func (n *NIC) registerPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) *tcpip.Error {
  1009  	n.mu.Lock()
  1010  	defer n.mu.Unlock()
  1011  
  1012  	eps, ok := n.packetEPs[netProto]
  1013  	if !ok {
  1014  		return tcpip.ErrNotSupported
  1015  	}
  1016  	n.packetEPs[netProto] = append(eps, ep)
  1017  
  1018  	return nil
  1019  }
  1020  
  1021  func (n *NIC) unregisterPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
  1022  	n.mu.Lock()
  1023  	defer n.mu.Unlock()
  1024  
  1025  	eps, ok := n.packetEPs[netProto]
  1026  	if !ok {
  1027  		return
  1028  	}
  1029  
  1030  	for i, epOther := range eps {
  1031  		if epOther == ep {
  1032  			n.packetEPs[netProto] = append(eps[:i], eps[i+1:]...)
  1033  			return
  1034  		}
  1035  	}
  1036  }
  1037  
  1038  type referencedNetworkEndpoint struct {
  1039  	ep       NetworkEndpoint
  1040  	nic      *NIC
  1041  	protocol tcpip.NetworkProtocolNumber
  1042  
  1043  	// linkCache is set if link address resolution is enabled for this
  1044  	// protocol. Set to nil otherwise.
  1045  	linkCache LinkAddressCache
  1046  
  1047  	// refs is counting references held for this endpoint. When refs hits zero it
  1048  	// triggers the automatic removal of the endpoint from the NIC.
  1049  	refs int32
  1050  
  1051  	// networkEndpointKind must only be accessed using {get,set}Kind().
  1052  	kind networkEndpointKind
  1053  }
  1054  
  1055  func (r *referencedNetworkEndpoint) getKind() networkEndpointKind {
  1056  	return networkEndpointKind(atomic.LoadInt32((*int32)(&r.kind)))
  1057  }
  1058  
  1059  func (r *referencedNetworkEndpoint) setKind(kind networkEndpointKind) {
  1060  	atomic.StoreInt32((*int32)(&r.kind), int32(kind))
  1061  }
  1062  
  1063  // isValidForOutgoing returns true if the endpoint can be used to send out a
  1064  // packet. It requires the endpoint to not be marked expired (i.e., its address
  1065  // has been removed), or the NIC to be in spoofing mode.
  1066  func (r *referencedNetworkEndpoint) isValidForOutgoing() bool {
  1067  	return r.getKind() != permanentExpired || r.nic.spoofing
  1068  }
  1069  
  1070  // isValidForIncoming returns true if the endpoint can accept an incoming
  1071  // packet. It requires the endpoint to not be marked expired (i.e., its address
  1072  // has been removed), or the NIC to be in promiscuous mode.
  1073  func (r *referencedNetworkEndpoint) isValidForIncoming() bool {
  1074  	return r.getKind() != permanentExpired || r.nic.promiscuous
  1075  }
  1076  
  1077  // decRef decrements the ref count and cleans up the endpoint once it reaches
  1078  // zero.
  1079  func (r *referencedNetworkEndpoint) decRef() {
  1080  	if atomic.AddInt32(&r.refs, -1) == 0 {
  1081  		r.nic.removeEndpoint(r)
  1082  	}
  1083  }
  1084  
  1085  // decRefLocked is the same as decRef but assumes that the NIC.mu mutex is
  1086  // locked. Returns true if the endpoint was removed.
  1087  func (r *referencedNetworkEndpoint) decRefLocked() bool {
  1088  	if atomic.AddInt32(&r.refs, -1) == 0 {
  1089  		r.nic.removeEndpointLocked(r)
  1090  		return true
  1091  	}
  1092  
  1093  	return false
  1094  }
  1095  
  1096  // incRef increments the ref count. It must only be called when the caller is
  1097  // known to be holding a reference to the endpoint, otherwise tryIncRef should
  1098  // be used.
  1099  func (r *referencedNetworkEndpoint) incRef() {
  1100  	atomic.AddInt32(&r.refs, 1)
  1101  }
  1102  
  1103  // tryIncRef attempts to increment the ref count from n to n+1, but only if n is
  1104  // not zero. That is, it will increment the count if the endpoint is still
  1105  // alive, and do nothing if it has already been clean up.
  1106  func (r *referencedNetworkEndpoint) tryIncRef() bool {
  1107  	for {
  1108  		v := atomic.LoadInt32(&r.refs)
  1109  		if v == 0 {
  1110  			return false
  1111  		}
  1112  
  1113  		if atomic.CompareAndSwapInt32(&r.refs, v, v+1) {
  1114  			return true
  1115  		}
  1116  	}
  1117  }
  1118  
  1119  // stack returns the Stack instance that owns the underlying endpoint.
  1120  func (r *referencedNetworkEndpoint) stack() *Stack {
  1121  	return r.nic.stack
  1122  }