inet.af/netstack@v0.0.0-20220214151720-7585b01ddccf/tcpip/network/ipv6/ipv6.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package ipv6 contains the implementation of the ipv6 network protocol.
    16  package ipv6
    17  
    18  import (
    19  	"encoding/binary"
    20  	"fmt"
    21  	"hash/fnv"
    22  	"math"
    23  	"reflect"
    24  	"sort"
    25  	"sync/atomic"
    26  	"time"
    27  
    28  	"inet.af/netstack/sync"
    29  	"inet.af/netstack/tcpip"
    30  	"inet.af/netstack/tcpip/buffer"
    31  	"inet.af/netstack/tcpip/header"
    32  	"inet.af/netstack/tcpip/header/parse"
    33  	"inet.af/netstack/tcpip/network/hash"
    34  	"inet.af/netstack/tcpip/network/internal/fragmentation"
    35  	"inet.af/netstack/tcpip/network/internal/ip"
    36  	"inet.af/netstack/tcpip/stack"
    37  )
    38  
    39  const (
    40  	// ReassembleTimeout controls how long a fragment will be held.
    41  	// As per RFC 8200 section 4.5:
    42  	//
    43  	//   If insufficient fragments are received to complete reassembly of a packet
    44  	//   within 60 seconds of the reception of the first-arriving fragment of that
    45  	//   packet, reassembly of that packet must be abandoned.
    46  	//
    47  	// Linux also uses 60 seconds for reassembly timeout:
    48  	// https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/include/net/ipv6.h#L456
    49  	ReassembleTimeout = 60 * time.Second
    50  
    51  	// ProtocolNumber is the ipv6 protocol number.
    52  	ProtocolNumber = header.IPv6ProtocolNumber
    53  
    54  	// maxPayloadSize is the maximum size that can be encoded in the 16-bit
    55  	// PayloadLength field of the ipv6 header.
    56  	maxPayloadSize = 0xffff
    57  
    58  	// DefaultTTL is the default hop limit for IPv6 Packets egressed by
    59  	// Netstack.
    60  	DefaultTTL = 64
    61  
    62  	// buckets for fragment identifiers
    63  	buckets = 2048
    64  )
    65  
    66  const (
    67  	forwardingDisabled = 0
    68  	forwardingEnabled  = 1
    69  )
    70  
    71  // policyTable is the default policy table defined in RFC 6724 section 2.1.
    72  //
    73  // A more human-readable version:
    74  //
    75  //  Prefix        Precedence Label
    76  //  ::1/128               50     0
    77  //  ::/0                  40     1
    78  //  ::ffff:0:0/96         35     4
    79  //  2002::/16             30     2
    80  //  2001::/32              5     5
    81  //  fc00::/7               3    13
    82  //  ::/96                  1     3
    83  //  fec0::/10              1    11
    84  //  3ffe::/16              1    12
    85  //
    86  // The table is sorted by prefix length so longest-prefix match can be easily
    87  // achieved.
    88  //
    89  // We willingly left out ::/96, fec0::/10 and 3ffe::/16 since those prefix
    90  // assignments are deprecated.
    91  //
    92  // As per RFC 4291 section 2.5.5.1 (for ::/96),
    93  //
    94  //   The "IPv4-Compatible IPv6 address" is now deprecated because the
    95  //   current IPv6 transition mechanisms no longer use these addresses.
    96  //   New or updated implementations are not required to support this
    97  //   address type.
    98  //
    99  // As per RFC 3879 section 4 (for fec0::/10),
   100  //
   101  //    This document formally deprecates the IPv6 site-local unicast prefix
   102  //    defined in [RFC3513], i.e., 1111111011 binary or FEC0::/10.
   103  //
   104  // As per RFC 3701 section 1 (for 3ffe::/16),
   105  //
   106  //   As clearly stated in [TEST-NEW], the addresses for the 6bone are
   107  //   temporary and will be reclaimed in the future. It further states
   108  //   that all users of these addresses (within the 3FFE::/16 prefix) will
   109  //   be required to renumber at some time in the future.
   110  //
   111  // and section 2,
   112  //
   113  //   Thus after the pTLA allocation cutoff date January 1, 2004, it is
   114  //   REQUIRED that no new 6bone 3FFE pTLAs be allocated.
   115  //
   116  // MUST NOT BE MODIFIED.
   117  var policyTable = [...]struct {
   118  	subnet tcpip.Subnet
   119  
   120  	label uint8
   121  }{
   122  	// ::1/128
   123  	{
   124  		subnet: header.IPv6Loopback.WithPrefix().Subnet(),
   125  		label:  0,
   126  	},
   127  	// ::ffff:0:0/96
   128  	{
   129  		subnet: header.IPv4MappedIPv6Subnet,
   130  		label:  4,
   131  	},
   132  	// 2001::/32 (Teredo prefix as per RFC 4380 section 2.6).
   133  	{
   134  		subnet: tcpip.AddressWithPrefix{
   135  			Address:   "\x20\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
   136  			PrefixLen: 32,
   137  		}.Subnet(),
   138  		label: 5,
   139  	},
   140  	// 2002::/16 (6to4 prefix as per RFC 3056 section 2).
   141  	{
   142  		subnet: tcpip.AddressWithPrefix{
   143  			Address:   "\x20\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
   144  			PrefixLen: 16,
   145  		}.Subnet(),
   146  		label: 2,
   147  	},
   148  	// fc00::/7 (Unique local addresses as per RFC 4193 section 3.1).
   149  	{
   150  		subnet: tcpip.AddressWithPrefix{
   151  			Address:   "\xfc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
   152  			PrefixLen: 7,
   153  		}.Subnet(),
   154  		label: 13,
   155  	},
   156  	// ::/0
   157  	{
   158  		subnet: header.IPv6EmptySubnet,
   159  		label:  1,
   160  	},
   161  }
   162  
   163  func getLabel(addr tcpip.Address) uint8 {
   164  	for _, p := range policyTable {
   165  		if p.subnet.Contains(addr) {
   166  			return p.label
   167  		}
   168  	}
   169  
   170  	panic(fmt.Sprintf("should have a label for address = %s", addr))
   171  }
   172  
   173  var _ stack.DuplicateAddressDetector = (*endpoint)(nil)
   174  var _ stack.LinkAddressResolver = (*endpoint)(nil)
   175  var _ stack.LinkResolvableNetworkEndpoint = (*endpoint)(nil)
   176  var _ stack.ForwardingNetworkEndpoint = (*endpoint)(nil)
   177  var _ stack.GroupAddressableEndpoint = (*endpoint)(nil)
   178  var _ stack.AddressableEndpoint = (*endpoint)(nil)
   179  var _ stack.NetworkEndpoint = (*endpoint)(nil)
   180  var _ stack.NDPEndpoint = (*endpoint)(nil)
   181  var _ NDPEndpoint = (*endpoint)(nil)
   182  
   183  type endpoint struct {
   184  	nic        stack.NetworkInterface
   185  	dispatcher stack.TransportDispatcher
   186  	protocol   *protocol
   187  	stats      sharedStats
   188  
   189  	// enabled is set to 1 when the endpoint is enabled and 0 when it is
   190  	// disabled.
   191  	//
   192  	// Must be accessed using atomic operations.
   193  	enabled uint32
   194  
   195  	// forwarding is set to forwardingEnabled when the endpoint has forwarding
   196  	// enabled and forwardingDisabled when it is disabled.
   197  	//
   198  	// Must be accessed using atomic operations.
   199  	forwarding uint32
   200  
   201  	mu struct {
   202  		sync.RWMutex
   203  
   204  		addressableEndpointState stack.AddressableEndpointState
   205  		ndp                      ndpState
   206  		mld                      mldState
   207  	}
   208  
   209  	// dad is used to check if an arbitrary address is already assigned to some
   210  	// neighbor.
   211  	//
   212  	// Note: this is different from mu.ndp.dad which is used to perform DAD for
   213  	// addresses that are assigned to the interface. Removing an address aborts
   214  	// DAD; if we had used the same state, handlers for a removed address would
   215  	// not be called with the actual DAD result.
   216  	//
   217  	// LOCK ORDERING: mu > dad.mu.
   218  	dad struct {
   219  		mu struct {
   220  			sync.Mutex
   221  
   222  			dad ip.DAD
   223  		}
   224  	}
   225  }
   226  
   227  // NICNameFromID is a function that returns a stable name for the specified NIC,
   228  // even if different NIC IDs are used to refer to the same NIC in different
   229  // program runs. It is used when generating opaque interface identifiers (IIDs).
   230  // If the NIC was created with a name, it is passed to NICNameFromID.
   231  //
   232  // NICNameFromID SHOULD return unique NIC names so unique opaque IIDs are
   233  // generated for the same prefix on different NICs.
   234  type NICNameFromID func(tcpip.NICID, string) string
   235  
   236  // OpaqueInterfaceIdentifierOptions holds the options related to the generation
   237  // of opaque interface identifiers (IIDs) as defined by RFC 7217.
   238  type OpaqueInterfaceIdentifierOptions struct {
   239  	// NICNameFromID is a function that returns a stable name for a specified NIC,
   240  	// even if the NIC ID changes over time.
   241  	//
   242  	// Must be specified to generate the opaque IID.
   243  	NICNameFromID NICNameFromID
   244  
   245  	// SecretKey is a pseudo-random number used as the secret key when generating
   246  	// opaque IIDs as defined by RFC 7217. The key SHOULD be at least
   247  	// header.OpaqueIIDSecretKeyMinBytes bytes and MUST follow minimum randomness
   248  	// requirements for security as outlined by RFC 4086. SecretKey MUST NOT
   249  	// change between program runs, unless explicitly changed.
   250  	//
   251  	// OpaqueInterfaceIdentifierOptions takes ownership of SecretKey. SecretKey
   252  	// MUST NOT be modified after Stack is created.
   253  	//
   254  	// May be nil, but a nil value is highly discouraged to maintain
   255  	// some level of randomness between nodes.
   256  	SecretKey []byte
   257  }
   258  
   259  // CheckDuplicateAddress implements stack.DuplicateAddressDetector.
   260  func (e *endpoint) CheckDuplicateAddress(addr tcpip.Address, h stack.DADCompletionHandler) stack.DADCheckAddressDisposition {
   261  	e.dad.mu.Lock()
   262  	defer e.dad.mu.Unlock()
   263  	return e.dad.mu.dad.CheckDuplicateAddressLocked(addr, h)
   264  }
   265  
   266  // SetDADConfigurations implements stack.DuplicateAddressDetector.
   267  func (e *endpoint) SetDADConfigurations(c stack.DADConfigurations) {
   268  	e.mu.Lock()
   269  	defer e.mu.Unlock()
   270  	e.dad.mu.Lock()
   271  	defer e.dad.mu.Unlock()
   272  
   273  	e.mu.ndp.dad.SetConfigsLocked(c)
   274  	e.dad.mu.dad.SetConfigsLocked(c)
   275  }
   276  
   277  // DuplicateAddressProtocol implements stack.DuplicateAddressDetector.
   278  func (*endpoint) DuplicateAddressProtocol() tcpip.NetworkProtocolNumber {
   279  	return ProtocolNumber
   280  }
   281  
   282  // HandleLinkResolutionFailure implements stack.LinkResolvableNetworkEndpoint.
   283  func (e *endpoint) HandleLinkResolutionFailure(pkt *stack.PacketBuffer) {
   284  	// If we are operating as a router, we should return an ICMP error to the
   285  	// original packet's sender.
   286  	if pkt.NetworkPacketInfo.IsForwardedPacket {
   287  		// TODO(gvisor.dev/issue/6005): Propagate asynchronously generated ICMP
   288  		// errors to local endpoints.
   289  		e.protocol.returnError(&icmpReasonHostUnreachable{}, pkt)
   290  		e.stats.ip.Forwarding.Errors.Increment()
   291  		e.stats.ip.Forwarding.HostUnreachable.Increment()
   292  		return
   293  	}
   294  	// handleControl expects the entire offending packet to be in the packet
   295  	// buffer's data field.
   296  	pkt = stack.NewPacketBuffer(stack.PacketBufferOptions{
   297  		Data: buffer.NewVectorisedView(pkt.Size(), pkt.Views()),
   298  	})
   299  	defer pkt.DecRef()
   300  	pkt.NICID = e.nic.ID()
   301  	pkt.NetworkProtocolNumber = ProtocolNumber
   302  	e.handleControl(&icmpv6DestinationAddressUnreachableSockError{}, pkt)
   303  }
   304  
   305  // onAddressAssignedLocked handles an address being assigned.
   306  //
   307  // Precondition: e.mu must be exclusively locked.
   308  func (e *endpoint) onAddressAssignedLocked(addr tcpip.Address) {
   309  	// As per RFC 2710 section 3,
   310  	//
   311  	//   All MLD  messages described in this document are sent with a link-local
   312  	//   IPv6 Source Address, ...
   313  	//
   314  	// If we just completed DAD for a link-local address, then attempt to send any
   315  	// queued MLD reports. Note, we may have sent reports already for some of the
   316  	// groups before we had a valid link-local address to use as the source for
   317  	// the MLD messages, but that was only so that MLD snooping switches are aware
   318  	// of our membership to groups - routers would not have handled those reports.
   319  	//
   320  	// As per RFC 3590 section 4,
   321  	//
   322  	//   MLD Report and Done messages are sent with a link-local address as
   323  	//   the IPv6 source address, if a valid address is available on the
   324  	//   interface. If a valid link-local address is not available (e.g., one
   325  	//   has not been configured), the message is sent with the unspecified
   326  	//   address (::) as the IPv6 source address.
   327  	//
   328  	//   Once a valid link-local address is available, a node SHOULD generate
   329  	//   new MLD Report messages for all multicast addresses joined on the
   330  	//   interface.
   331  	//
   332  	//   Routers receiving an MLD Report or Done message with the unspecified
   333  	//   address as the IPv6 source address MUST silently discard the packet
   334  	//   without taking any action on the packets contents.
   335  	//
   336  	//   Snooping switches MUST manage multicast forwarding state based on MLD
   337  	//   Report and Done messages sent with the unspecified address as the
   338  	//   IPv6 source address.
   339  	if header.IsV6LinkLocalUnicastAddress(addr) {
   340  		e.mu.mld.sendQueuedReports()
   341  	}
   342  }
   343  
   344  // InvalidateDefaultRouter implements stack.NDPEndpoint.
   345  func (e *endpoint) InvalidateDefaultRouter(rtr tcpip.Address) {
   346  	e.mu.Lock()
   347  	defer e.mu.Unlock()
   348  
   349  	// We represent default routers with a default (off-link) route through the
   350  	// router.
   351  	e.mu.ndp.invalidateOffLinkRoute(offLinkRoute{dest: header.IPv6EmptySubnet, router: rtr})
   352  }
   353  
   354  // SetNDPConfigurations implements NDPEndpoint.
   355  func (e *endpoint) SetNDPConfigurations(c NDPConfigurations) {
   356  	c.validate()
   357  	e.mu.Lock()
   358  	defer e.mu.Unlock()
   359  	e.mu.ndp.configs = c
   360  }
   361  
   362  // hasTentativeAddr returns true if addr is tentative on e.
   363  func (e *endpoint) hasTentativeAddr(addr tcpip.Address) bool {
   364  	e.mu.RLock()
   365  	addressEndpoint := e.getAddressRLocked(addr)
   366  	e.mu.RUnlock()
   367  	return addressEndpoint != nil && addressEndpoint.GetKind() == stack.PermanentTentative
   368  }
   369  
   370  // dupTentativeAddrDetected attempts to inform e that a tentative addr is a
   371  // duplicate on a link.
   372  //
   373  // dupTentativeAddrDetected removes the tentative address if it exists. If the
   374  // address was generated via SLAAC, an attempt is made to generate a new
   375  // address.
   376  func (e *endpoint) dupTentativeAddrDetected(addr tcpip.Address, holderLinkAddr tcpip.LinkAddress, nonce []byte) tcpip.Error {
   377  	e.mu.Lock()
   378  	defer e.mu.Unlock()
   379  
   380  	addressEndpoint := e.getAddressRLocked(addr)
   381  	if addressEndpoint == nil {
   382  		return &tcpip.ErrBadAddress{}
   383  	}
   384  
   385  	if addressEndpoint.GetKind() != stack.PermanentTentative {
   386  		return &tcpip.ErrInvalidEndpointState{}
   387  	}
   388  
   389  	switch result := e.mu.ndp.dad.ExtendIfNonceEqualLocked(addr, nonce); result {
   390  	case ip.Extended:
   391  		// The nonce we got back was the same we sent so we know the message
   392  		// indicating a duplicate address was likely ours so do not consider
   393  		// the address duplicate here.
   394  		return nil
   395  	case ip.AlreadyExtended:
   396  		// See Extended.
   397  		//
   398  		// Our DAD message was looped back already.
   399  		return nil
   400  	case ip.NoDADStateFound:
   401  		panic(fmt.Sprintf("expected DAD state for tentative address %s", addr))
   402  	case ip.NonceDisabled:
   403  		// If nonce is disabled then we have no way to know if the packet was
   404  		// looped-back so we have to assume it indicates a duplicate address.
   405  		fallthrough
   406  	case ip.NonceNotEqual:
   407  		// If the address is a SLAAC address, do not invalidate its SLAAC prefix as an
   408  		// attempt will be made to generate a new address for it.
   409  		if err := e.removePermanentEndpointLocked(addressEndpoint, false /* allowSLAACInvalidation */, &stack.DADDupAddrDetected{HolderLinkAddress: holderLinkAddr}); err != nil {
   410  			return err
   411  		}
   412  
   413  		prefix := addressEndpoint.Subnet()
   414  
   415  		switch t := addressEndpoint.ConfigType(); t {
   416  		case stack.AddressConfigStatic:
   417  		case stack.AddressConfigSlaac:
   418  			e.mu.ndp.regenerateSLAACAddr(prefix)
   419  		case stack.AddressConfigSlaacTemp:
   420  			// Do not reset the generation attempts counter for the prefix as the
   421  			// temporary address is being regenerated in response to a DAD conflict.
   422  			e.mu.ndp.regenerateTempSLAACAddr(prefix, false /* resetGenAttempts */)
   423  		default:
   424  			panic(fmt.Sprintf("unrecognized address config type = %d", t))
   425  		}
   426  
   427  		return nil
   428  	default:
   429  		panic(fmt.Sprintf("unhandled result = %d", result))
   430  	}
   431  }
   432  
   433  // Forwarding implements stack.ForwardingNetworkEndpoint.
   434  func (e *endpoint) Forwarding() bool {
   435  	return atomic.LoadUint32(&e.forwarding) == forwardingEnabled
   436  }
   437  
   438  // setForwarding sets the forwarding status for the endpoint.
   439  //
   440  // Returns true if the forwarding status was updated.
   441  func (e *endpoint) setForwarding(v bool) bool {
   442  	forwarding := uint32(forwardingDisabled)
   443  	if v {
   444  		forwarding = forwardingEnabled
   445  	}
   446  
   447  	return atomic.SwapUint32(&e.forwarding, forwarding) != forwarding
   448  }
   449  
   450  // SetForwarding implements stack.ForwardingNetworkEndpoint.
   451  func (e *endpoint) SetForwarding(forwarding bool) {
   452  	e.mu.Lock()
   453  	defer e.mu.Unlock()
   454  
   455  	if !e.setForwarding(forwarding) {
   456  		return
   457  	}
   458  
   459  	allRoutersGroups := [...]tcpip.Address{
   460  		header.IPv6AllRoutersInterfaceLocalMulticastAddress,
   461  		header.IPv6AllRoutersLinkLocalMulticastAddress,
   462  		header.IPv6AllRoutersSiteLocalMulticastAddress,
   463  	}
   464  
   465  	if forwarding {
   466  		// As per RFC 4291 section 2.8:
   467  		//
   468  		//   A router is required to recognize all addresses that a host is
   469  		//   required to recognize, plus the following addresses as identifying
   470  		//   itself:
   471  		//
   472  		//      o The All-Routers multicast addresses defined in Section 2.7.1.
   473  		//
   474  		// As per RFC 4291 section 2.7.1,
   475  		//
   476  		//      All Routers Addresses:   FF01:0:0:0:0:0:0:2
   477  		//                               FF02:0:0:0:0:0:0:2
   478  		//                               FF05:0:0:0:0:0:0:2
   479  		//
   480  		//   The above multicast addresses identify the group of all IPv6 routers,
   481  		//   within scope 1 (interface-local), 2 (link-local), or 5 (site-local).
   482  		for _, g := range allRoutersGroups {
   483  			if err := e.joinGroupLocked(g); err != nil {
   484  				// joinGroupLocked only returns an error if the group address is not a
   485  				// valid IPv6 multicast address.
   486  				panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", g, err))
   487  			}
   488  		}
   489  	} else {
   490  		for _, g := range allRoutersGroups {
   491  			switch err := e.leaveGroupLocked(g).(type) {
   492  			case nil:
   493  			case *tcpip.ErrBadLocalAddress:
   494  				// The endpoint may have already left the multicast group.
   495  			default:
   496  				panic(fmt.Sprintf("e.leaveGroupLocked(%s): %s", g, err))
   497  			}
   498  		}
   499  	}
   500  
   501  	e.mu.ndp.forwardingChanged(forwarding)
   502  }
   503  
   504  // Enable implements stack.NetworkEndpoint.
   505  func (e *endpoint) Enable() tcpip.Error {
   506  	e.mu.Lock()
   507  	defer e.mu.Unlock()
   508  
   509  	// If the NIC is not enabled, the endpoint can't do anything meaningful so
   510  	// don't enable the endpoint.
   511  	if !e.nic.Enabled() {
   512  		return &tcpip.ErrNotPermitted{}
   513  	}
   514  
   515  	// If the endpoint is already enabled, there is nothing for it to do.
   516  	if !e.setEnabled(true) {
   517  		return nil
   518  	}
   519  
   520  	// Groups may have been joined when the endpoint was disabled, or the
   521  	// endpoint may have left groups from the perspective of MLD when the
   522  	// endpoint was disabled. Either way, we need to let routers know to
   523  	// send us multicast traffic.
   524  	e.mu.mld.initializeAll()
   525  
   526  	// Join the IPv6 All-Nodes Multicast group if the stack is configured to
   527  	// use IPv6. This is required to ensure that this node properly receives
   528  	// and responds to the various NDP messages that are destined to the
   529  	// all-nodes multicast address. An example is the Neighbor Advertisement
   530  	// when we perform Duplicate Address Detection, or Router Advertisement
   531  	// when we do Router Discovery. See RFC 4862, section 5.4.2 and RFC 4861
   532  	// section 4.2 for more information.
   533  	//
   534  	// Also auto-generate an IPv6 link-local address based on the endpoint's
   535  	// link address if it is configured to do so. Note, each interface is
   536  	// required to have IPv6 link-local unicast address, as per RFC 4291
   537  	// section 2.1.
   538  
   539  	// Join the All-Nodes multicast group before starting DAD as responses to DAD
   540  	// (NDP NS) messages may be sent to the All-Nodes multicast group if the
   541  	// source address of the NDP NS is the unspecified address, as per RFC 4861
   542  	// section 7.2.4.
   543  	if err := e.joinGroupLocked(header.IPv6AllNodesMulticastAddress); err != nil {
   544  		// joinGroupLocked only returns an error if the group address is not a valid
   545  		// IPv6 multicast address.
   546  		panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv6AllNodesMulticastAddress, err))
   547  	}
   548  
   549  	// Perform DAD on the all the unicast IPv6 endpoints that are in the permanent
   550  	// state.
   551  	//
   552  	// Addresses may have already completed DAD but in the time since the endpoint
   553  	// was last enabled, other devices may have acquired the same addresses.
   554  	var err tcpip.Error
   555  	e.mu.addressableEndpointState.ForEachEndpoint(func(addressEndpoint stack.AddressEndpoint) bool {
   556  		addr := addressEndpoint.AddressWithPrefix().Address
   557  		if !header.IsV6UnicastAddress(addr) {
   558  			return true
   559  		}
   560  
   561  		switch addressEndpoint.GetKind() {
   562  		case stack.Permanent:
   563  			addressEndpoint.SetKind(stack.PermanentTentative)
   564  			fallthrough
   565  		case stack.PermanentTentative:
   566  			err = e.mu.ndp.startDuplicateAddressDetection(addr, addressEndpoint)
   567  			return err == nil
   568  		default:
   569  			return true
   570  		}
   571  	})
   572  	if err != nil {
   573  		return err
   574  	}
   575  
   576  	// Do not auto-generate an IPv6 link-local address for loopback devices.
   577  	if e.protocol.options.AutoGenLinkLocal && !e.nic.IsLoopback() {
   578  		// The valid and preferred lifetime is infinite for the auto-generated
   579  		// link-local address.
   580  		e.mu.ndp.doSLAAC(header.IPv6LinkLocalPrefix.Subnet(), header.NDPInfiniteLifetime, header.NDPInfiniteLifetime)
   581  	}
   582  
   583  	e.mu.ndp.startSolicitingRouters()
   584  	return nil
   585  }
   586  
   587  // Enabled implements stack.NetworkEndpoint.
   588  func (e *endpoint) Enabled() bool {
   589  	return e.nic.Enabled() && e.isEnabled()
   590  }
   591  
   592  // isEnabled returns true if the endpoint is enabled, regardless of the
   593  // enabled status of the NIC.
   594  func (e *endpoint) isEnabled() bool {
   595  	return atomic.LoadUint32(&e.enabled) == 1
   596  }
   597  
   598  // setEnabled sets the enabled status for the endpoint.
   599  //
   600  // Returns true if the enabled status was updated.
   601  func (e *endpoint) setEnabled(v bool) bool {
   602  	if v {
   603  		return atomic.SwapUint32(&e.enabled, 1) == 0
   604  	}
   605  	return atomic.SwapUint32(&e.enabled, 0) == 1
   606  }
   607  
   608  // Disable implements stack.NetworkEndpoint.
   609  func (e *endpoint) Disable() {
   610  	e.mu.Lock()
   611  	defer e.mu.Unlock()
   612  	e.disableLocked()
   613  }
   614  
   615  func (e *endpoint) disableLocked() {
   616  	if !e.Enabled() {
   617  		return
   618  	}
   619  
   620  	e.mu.ndp.stopSolicitingRouters()
   621  	// Stop DAD for all the tentative unicast addresses.
   622  	e.mu.addressableEndpointState.ForEachEndpoint(func(addressEndpoint stack.AddressEndpoint) bool {
   623  		if addressEndpoint.GetKind() != stack.PermanentTentative {
   624  			return true
   625  		}
   626  
   627  		addr := addressEndpoint.AddressWithPrefix().Address
   628  		if header.IsV6UnicastAddress(addr) {
   629  			e.mu.ndp.stopDuplicateAddressDetection(addr, &stack.DADAborted{})
   630  		}
   631  
   632  		return true
   633  	})
   634  	e.mu.ndp.cleanupState()
   635  
   636  	// The endpoint may have already left the multicast group.
   637  	switch err := e.leaveGroupLocked(header.IPv6AllNodesMulticastAddress).(type) {
   638  	case nil, *tcpip.ErrBadLocalAddress:
   639  	default:
   640  		panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv6AllNodesMulticastAddress, err))
   641  	}
   642  
   643  	// Leave groups from the perspective of MLD so that routers know that
   644  	// we are no longer interested in the group.
   645  	e.mu.mld.softLeaveAll()
   646  
   647  	if !e.setEnabled(false) {
   648  		panic("should have only done work to disable the endpoint if it was enabled")
   649  	}
   650  }
   651  
   652  // DefaultTTL is the default hop limit for this endpoint.
   653  func (e *endpoint) DefaultTTL() uint8 {
   654  	return e.protocol.DefaultTTL()
   655  }
   656  
   657  // MTU implements stack.NetworkEndpoint. It returns the link-layer MTU minus the
   658  // network layer max header length.
   659  func (e *endpoint) MTU() uint32 {
   660  	networkMTU, err := calculateNetworkMTU(e.nic.MTU(), header.IPv6MinimumSize)
   661  	if err != nil {
   662  		return 0
   663  	}
   664  	return networkMTU
   665  }
   666  
   667  // MaxHeaderLength returns the maximum length needed by ipv6 headers (and
   668  // underlying protocols).
   669  func (e *endpoint) MaxHeaderLength() uint16 {
   670  	// TODO(gvisor.dev/issues/5035): The maximum header length returned here does
   671  	// not open the possibility for the caller to know about size required for
   672  	// extension headers.
   673  	return e.nic.MaxHeaderLength() + header.IPv6MinimumSize
   674  }
   675  
   676  func addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams, extensionHeaders header.IPv6ExtHdrSerializer) tcpip.Error {
   677  	extHdrsLen := extensionHeaders.Length()
   678  	length := pkt.Size() + extensionHeaders.Length()
   679  	if length > math.MaxUint16 {
   680  		return &tcpip.ErrMessageTooLong{}
   681  	}
   682  	header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize + extHdrsLen)).Encode(&header.IPv6Fields{
   683  		PayloadLength:     uint16(length),
   684  		TransportProtocol: params.Protocol,
   685  		HopLimit:          params.TTL,
   686  		TrafficClass:      params.TOS,
   687  		SrcAddr:           srcAddr,
   688  		DstAddr:           dstAddr,
   689  		ExtensionHeaders:  extensionHeaders,
   690  	})
   691  	pkt.NetworkProtocolNumber = ProtocolNumber
   692  	return nil
   693  }
   694  
   695  func packetMustBeFragmented(pkt *stack.PacketBuffer, networkMTU uint32) bool {
   696  	payload := pkt.TransportHeader().View().Size() + pkt.Data().Size()
   697  	return pkt.GSOOptions.Type == stack.GSONone && uint32(payload) > networkMTU
   698  }
   699  
   700  // handleFragments fragments pkt and calls the handler function on each
   701  // fragment. It returns the number of fragments handled and the number of
   702  // fragments left to be processed. The IP header must already be present in the
   703  // original packet. The transport header protocol number is required to avoid
   704  // parsing the IPv6 extension headers.
   705  func (e *endpoint) handleFragments(r *stack.Route, networkMTU uint32, pkt *stack.PacketBuffer, transProto tcpip.TransportProtocolNumber, handler func(*stack.PacketBuffer) tcpip.Error) (int, int, tcpip.Error) {
   706  	networkHeader := header.IPv6(pkt.NetworkHeader().View())
   707  
   708  	// TODO(gvisor.dev/issue/3912): Once the Authentication or ESP Headers are
   709  	// supported for outbound packets, their length should not affect the fragment
   710  	// maximum payload length because they should only be transmitted once.
   711  	fragmentPayloadLen := (networkMTU - header.IPv6FragmentHeaderSize) &^ 7
   712  	if fragmentPayloadLen < header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit {
   713  		// We need at least 8 bytes of space left for the fragmentable part because
   714  		// the fragment payload must obviously be non-zero and must be a multiple
   715  		// of 8 as per RFC 8200 section 4.5:
   716  		//   Each complete fragment, except possibly the last ("rightmost") one, is
   717  		//   an integer multiple of 8 octets long.
   718  		return 0, 1, &tcpip.ErrMessageTooLong{}
   719  	}
   720  
   721  	if fragmentPayloadLen < uint32(pkt.TransportHeader().View().Size()) {
   722  		// As per RFC 8200 Section 4.5, the Transport Header is expected to be small
   723  		// enough to fit in the first fragment.
   724  		return 0, 1, &tcpip.ErrMessageTooLong{}
   725  	}
   726  
   727  	pf := fragmentation.MakePacketFragmenter(pkt, fragmentPayloadLen, calculateFragmentReserve(pkt))
   728  	id := atomic.AddUint32(&e.protocol.ids[hashRoute(r, e.protocol.hashIV)%buckets], 1)
   729  
   730  	var n int
   731  	for {
   732  		fragPkt, more := buildNextFragment(&pf, networkHeader, transProto, id)
   733  		if err := handler(fragPkt); err != nil {
   734  			return n, pf.RemainingFragmentCount() + 1, err
   735  		}
   736  		n++
   737  		if !more {
   738  			return n, pf.RemainingFragmentCount(), nil
   739  		}
   740  	}
   741  }
   742  
   743  // WritePacket writes a packet to the given destination address and protocol.
   744  func (e *endpoint) WritePacket(r *stack.Route, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) tcpip.Error {
   745  	if err := addIPHeader(r.LocalAddress(), r.RemoteAddress(), pkt, params, nil /* extensionHeaders */); err != nil {
   746  		return err
   747  	}
   748  
   749  	// iptables filtering. All packets that reach here are locally
   750  	// generated.
   751  	outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
   752  	if ok := e.protocol.stack.IPTables().CheckOutput(pkt, r, outNicName); !ok {
   753  		// iptables is telling us to drop the packet.
   754  		e.stats.ip.IPTablesOutputDropped.Increment()
   755  		return nil
   756  	}
   757  
   758  	// If the packet is manipulated as per NAT Output rules, handle packet
   759  	// based on destination address and do not send the packet to link
   760  	// layer.
   761  	//
   762  	// We should do this for every packet, rather than only NATted packets, but
   763  	// removing this check short circuits broadcasts before they are sent out to
   764  	// other hosts.
   765  	if pkt.DNATDone {
   766  		netHeader := header.IPv6(pkt.NetworkHeader().View())
   767  		if ep := e.protocol.findEndpointWithAddress(netHeader.DestinationAddress()); ep != nil {
   768  			// Since we rewrote the packet but it is being routed back to us, we
   769  			// can safely assume the checksum is valid.
   770  			ep.handleLocalPacket(pkt, true /* canSkipRXChecksum */)
   771  			return nil
   772  		}
   773  	}
   774  
   775  	return e.writePacket(r, pkt, params.Protocol, false /* headerIncluded */)
   776  }
   777  
   778  func (e *endpoint) writePacket(r *stack.Route, pkt *stack.PacketBuffer, protocol tcpip.TransportProtocolNumber, headerIncluded bool) tcpip.Error {
   779  	if r.Loop()&stack.PacketLoop != 0 {
   780  		// If the packet was generated by the stack (not a raw/packet endpoint
   781  		// where a packet may be written with the header included), then we can
   782  		// safely assume the checksum is valid.
   783  		e.handleLocalPacket(pkt, !headerIncluded /* canSkipRXChecksum */)
   784  	}
   785  	if r.Loop()&stack.PacketOut == 0 {
   786  		return nil
   787  	}
   788  
   789  	// Postrouting NAT can only change the source address, and does not alter the
   790  	// route or outgoing interface of the packet.
   791  	outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
   792  	if ok := e.protocol.stack.IPTables().CheckPostrouting(pkt, r, e, outNicName); !ok {
   793  		// iptables is telling us to drop the packet.
   794  		e.stats.ip.IPTablesPostroutingDropped.Increment()
   795  		return nil
   796  	}
   797  
   798  	stats := e.stats.ip
   799  	networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(pkt.NetworkHeader().View().Size()))
   800  	if err != nil {
   801  		stats.OutgoingPacketErrors.Increment()
   802  		return err
   803  	}
   804  
   805  	if packetMustBeFragmented(pkt, networkMTU) {
   806  		if pkt.NetworkPacketInfo.IsForwardedPacket {
   807  			// As per RFC 2460, section 4.5:
   808  			//   Unlike IPv4, fragmentation in IPv6 is performed only by source nodes,
   809  			//   not by routers along a packet's delivery path.
   810  			return &tcpip.ErrMessageTooLong{}
   811  		}
   812  		sent, remain, err := e.handleFragments(r, networkMTU, pkt, protocol, func(fragPkt *stack.PacketBuffer) tcpip.Error {
   813  			// TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each
   814  			// fragment one by one using WritePacket() (current strategy) or if we
   815  			// want to create a PacketBufferList from the fragments and feed it to
   816  			// WritePackets(). It'll be faster but cost more memory.
   817  			return e.nic.WritePacket(r, ProtocolNumber, fragPkt)
   818  		})
   819  		stats.PacketsSent.IncrementBy(uint64(sent))
   820  		stats.OutgoingPacketErrors.IncrementBy(uint64(remain))
   821  		return err
   822  	}
   823  
   824  	if err := e.nic.WritePacket(r, ProtocolNumber, pkt); err != nil {
   825  		stats.OutgoingPacketErrors.Increment()
   826  		return err
   827  	}
   828  
   829  	stats.PacketsSent.Increment()
   830  	return nil
   831  }
   832  
   833  // WritePackets implements stack.NetworkEndpoint.
   834  func (e *endpoint) WritePackets(r *stack.Route, pkts stack.PacketBufferList, params stack.NetworkHeaderParams) (int, tcpip.Error) {
   835  	if r.Loop()&stack.PacketLoop != 0 {
   836  		panic("not implemented")
   837  	}
   838  	if r.Loop()&stack.PacketOut == 0 {
   839  		return pkts.Len(), nil
   840  	}
   841  
   842  	stats := e.stats.ip
   843  	linkMTU := e.nic.MTU()
   844  	for pb := pkts.Front(); pb != nil; pb = pb.Next() {
   845  		if err := addIPHeader(r.LocalAddress(), r.RemoteAddress(), pb, params, nil /* extensionHeaders */); err != nil {
   846  			return 0, err
   847  		}
   848  
   849  		networkMTU, err := calculateNetworkMTU(linkMTU, uint32(pb.NetworkHeader().View().Size()))
   850  		if err != nil {
   851  			stats.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len()))
   852  			return 0, err
   853  		}
   854  		if packetMustBeFragmented(pb, networkMTU) {
   855  			// Keep track of the packet that is about to be fragmented so it can be
   856  			// removed once the fragmentation is done.
   857  			originalPkt := pb
   858  			if _, _, err := e.handleFragments(r, networkMTU, pb, params.Protocol, func(fragPkt *stack.PacketBuffer) tcpip.Error {
   859  				fragPkt.IncRef()
   860  				// Modify the packet list in place with the new fragments.
   861  				pkts.InsertAfter(pb, fragPkt)
   862  				pb = fragPkt
   863  				return nil
   864  			}); err != nil {
   865  				stats.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len()))
   866  				return 0, err
   867  			}
   868  			// Remove the packet that was just fragmented and process the rest.
   869  			pkts.Remove(originalPkt)
   870  		}
   871  	}
   872  
   873  	// iptables filtering. All packets that reach here are locally
   874  	// generated.
   875  	outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
   876  	outputDropped, natPkts := e.protocol.stack.IPTables().CheckOutputPackets(pkts, r, outNicName)
   877  	stats.IPTablesOutputDropped.IncrementBy(uint64(len(outputDropped)))
   878  	for pkt := range outputDropped {
   879  		pkts.Remove(pkt)
   880  	}
   881  
   882  	// The NAT-ed packets may now be destined for us.
   883  	locallyDelivered := 0
   884  	for pkt := range natPkts {
   885  		ep := e.protocol.findEndpointWithAddress(header.IPv6(pkt.NetworkHeader().View()).DestinationAddress())
   886  		if ep == nil {
   887  			// The NAT-ed packet is still destined for some remote node.
   888  			continue
   889  		}
   890  
   891  		// Do not send the locally destined packet out the NIC.
   892  		pkts.Remove(pkt)
   893  
   894  		// Deliver the packet locally.
   895  		ep.handleLocalPacket(pkt, true /* canSkipRXChecksum */)
   896  		locallyDelivered++
   897  	}
   898  
   899  	// We ignore the list of NAT-ed packets here because Postrouting NAT can only
   900  	// change the source address, and does not alter the route or outgoing
   901  	// interface of the packet.
   902  	postroutingDropped, _ := e.protocol.stack.IPTables().CheckPostroutingPackets(pkts, r, e, outNicName)
   903  	stats.IPTablesPostroutingDropped.IncrementBy(uint64(len(postroutingDropped)))
   904  	for pkt := range postroutingDropped {
   905  		pkts.Remove(pkt)
   906  	}
   907  
   908  	// The rest of the packets can be delivered to the NIC as a batch.
   909  	pktsLen := pkts.Len()
   910  	written, err := e.nic.WritePackets(r, pkts, ProtocolNumber)
   911  	stats.PacketsSent.IncrementBy(uint64(written))
   912  	stats.OutgoingPacketErrors.IncrementBy(uint64(pktsLen - written))
   913  
   914  	// Dropped packets aren't errors, so include them in the return value.
   915  	return locallyDelivered + written + len(outputDropped) + len(postroutingDropped), err
   916  }
   917  
   918  // WriteHeaderIncludedPacket implements stack.NetworkEndpoint.
   919  func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) tcpip.Error {
   920  	// The packet already has an IP header, but there are a few required checks.
   921  	h, ok := pkt.Data().PullUp(header.IPv6MinimumSize)
   922  	if !ok {
   923  		return &tcpip.ErrMalformedHeader{}
   924  	}
   925  	ipH := header.IPv6(h)
   926  
   927  	// Always set the payload length.
   928  	pktSize := pkt.Data().Size()
   929  	ipH.SetPayloadLength(uint16(pktSize - header.IPv6MinimumSize))
   930  
   931  	// Set the source address when zero.
   932  	if ipH.SourceAddress() == header.IPv6Any {
   933  		ipH.SetSourceAddress(r.LocalAddress())
   934  	}
   935  
   936  	// Populate the packet buffer's network header and don't allow an invalid
   937  	// packet to be sent.
   938  	//
   939  	// Note that parsing only makes sure that the packet is well formed as per the
   940  	// wire format. We also want to check if the header's fields are valid before
   941  	// sending the packet.
   942  	proto, _, _, _, ok := parse.IPv6(pkt)
   943  	if !ok || !header.IPv6(pkt.NetworkHeader().View()).IsValid(pktSize) {
   944  		return &tcpip.ErrMalformedHeader{}
   945  	}
   946  
   947  	return e.writePacket(r, pkt, proto, true /* headerIncluded */)
   948  }
   949  
   950  // forwardPacket attempts to forward a packet to its final destination.
   951  func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) ip.ForwardingError {
   952  	h := header.IPv6(pkt.NetworkHeader().View())
   953  
   954  	dstAddr := h.DestinationAddress()
   955  	// As per RFC 4291 section 2.5.6,
   956  	//
   957  	//   Routers must not forward any packets with Link-Local source or
   958  	//   destination addresses to other links.
   959  	if header.IsV6LinkLocalUnicastAddress(h.SourceAddress()) {
   960  		return &ip.ErrLinkLocalSourceAddress{}
   961  	}
   962  	if header.IsV6LinkLocalUnicastAddress(dstAddr) || header.IsV6LinkLocalMulticastAddress(dstAddr) {
   963  		return &ip.ErrLinkLocalDestinationAddress{}
   964  	}
   965  
   966  	hopLimit := h.HopLimit()
   967  	if hopLimit <= 1 {
   968  		// As per RFC 4443 section 3.3,
   969  		//
   970  		//   If a router receives a packet with a Hop Limit of zero, or if a
   971  		//   router decrements a packet's Hop Limit to zero, it MUST discard the
   972  		//   packet and originate an ICMPv6 Time Exceeded message with Code 0 to
   973  		//   the source of the packet.  This indicates either a routing loop or
   974  		//   too small an initial Hop Limit value.
   975  		//
   976  		// We return the original error rather than the result of returning
   977  		// the ICMP packet because the original error is more relevant to
   978  		// the caller.
   979  		_ = e.protocol.returnError(&icmpReasonHopLimitExceeded{}, pkt)
   980  		return &ip.ErrTTLExceeded{}
   981  	}
   982  
   983  	stk := e.protocol.stack
   984  
   985  	// Check if the destination is owned by the stack.
   986  	if ep := e.protocol.findEndpointWithAddress(dstAddr); ep != nil {
   987  		inNicName := stk.FindNICNameFromID(e.nic.ID())
   988  		outNicName := stk.FindNICNameFromID(ep.nic.ID())
   989  		if ok := stk.IPTables().CheckForward(pkt, inNicName, outNicName); !ok {
   990  			// iptables is telling us to drop the packet.
   991  			e.stats.ip.IPTablesForwardDropped.Increment()
   992  			return nil
   993  		}
   994  
   995  		// The packet originally arrived on e so provide its NIC as the input NIC.
   996  		ep.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */)
   997  		return nil
   998  	}
   999  
  1000  	// Check extension headers for any errors requiring action during forwarding.
  1001  	if err := e.processExtensionHeaders(h, pkt, true /* forwarding */); err != nil {
  1002  		return &ip.ErrParameterProblem{}
  1003  	}
  1004  
  1005  	r, err := stk.FindRoute(0, "", dstAddr, ProtocolNumber, false /* multicastLoop */)
  1006  	switch err.(type) {
  1007  	case nil:
  1008  	case *tcpip.ErrNoRoute, *tcpip.ErrNetworkUnreachable:
  1009  		// We return the original error rather than the result of returning the
  1010  		// ICMP packet because the original error is more relevant to the caller.
  1011  		_ = e.protocol.returnError(&icmpReasonNetUnreachable{}, pkt)
  1012  		return &ip.ErrNoRoute{}
  1013  	default:
  1014  		return &ip.ErrOther{Err: err}
  1015  	}
  1016  	defer r.Release()
  1017  
  1018  	inNicName := stk.FindNICNameFromID(e.nic.ID())
  1019  	outNicName := stk.FindNICNameFromID(r.NICID())
  1020  	if ok := stk.IPTables().CheckForward(pkt, inNicName, outNicName); !ok {
  1021  		// iptables is telling us to drop the packet.
  1022  		e.stats.ip.IPTablesForwardDropped.Increment()
  1023  		return nil
  1024  	}
  1025  
  1026  	// We need to do a deep copy of the IP packet because
  1027  	// WriteHeaderIncludedPacket takes ownership of the packet buffer, but we do
  1028  	// not own it.
  1029  	newPkt := pkt.DeepCopyForForwarding(int(r.MaxHeaderLength()))
  1030  	defer newPkt.DecRef()
  1031  	newHdr := header.IPv6(newPkt.NetworkHeader().View())
  1032  
  1033  	// As per RFC 8200 section 3,
  1034  	//
  1035  	//   Hop Limit           8-bit unsigned integer. Decremented by 1 by
  1036  	//                       each node that forwards the packet.
  1037  	newHdr.SetHopLimit(hopLimit - 1)
  1038  
  1039  	forwardToEp, ok := e.protocol.getEndpointForNIC(r.NICID())
  1040  	if !ok {
  1041  		// The interface was removed after we obtained the route.
  1042  		return &ip.ErrOther{Err: &tcpip.ErrUnknownDevice{}}
  1043  	}
  1044  
  1045  	switch err := forwardToEp.writePacket(r, newPkt, newPkt.TransportProtocolNumber, true /* headerIncluded */); err.(type) {
  1046  	case nil:
  1047  		return nil
  1048  	case *tcpip.ErrMessageTooLong:
  1049  		// As per RFC 4443, section 3.2:
  1050  		//   A Packet Too Big MUST be sent by a router in response to a packet that
  1051  		//   it cannot forward because the packet is larger than the MTU of the
  1052  		//   outgoing link.
  1053  		_ = e.protocol.returnError(&icmpReasonPacketTooBig{}, pkt)
  1054  		return &ip.ErrMessageTooLong{}
  1055  	default:
  1056  		return &ip.ErrOther{Err: err}
  1057  	}
  1058  }
  1059  
  1060  // HandlePacket is called by the link layer when new ipv6 packets arrive for
  1061  // this endpoint.
  1062  func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) {
  1063  	stats := e.stats.ip
  1064  
  1065  	stats.PacketsReceived.Increment()
  1066  
  1067  	if !e.isEnabled() {
  1068  		stats.DisabledPacketsReceived.Increment()
  1069  		return
  1070  	}
  1071  
  1072  	h, ok := e.protocol.parseAndValidate(pkt)
  1073  	if !ok {
  1074  		stats.MalformedPacketsReceived.Increment()
  1075  		return
  1076  	}
  1077  
  1078  	if !e.nic.IsLoopback() {
  1079  		if !e.protocol.options.AllowExternalLoopbackTraffic {
  1080  			if header.IsV6LoopbackAddress(h.SourceAddress()) {
  1081  				stats.InvalidSourceAddressesReceived.Increment()
  1082  				return
  1083  			}
  1084  
  1085  			if header.IsV6LoopbackAddress(h.DestinationAddress()) {
  1086  				stats.InvalidDestinationAddressesReceived.Increment()
  1087  				return
  1088  			}
  1089  		}
  1090  
  1091  		if e.protocol.stack.HandleLocal() {
  1092  			addressEndpoint := e.AcquireAssignedAddress(header.IPv6(pkt.NetworkHeader().View()).SourceAddress(), e.nic.Promiscuous(), stack.CanBePrimaryEndpoint)
  1093  			if addressEndpoint != nil {
  1094  				addressEndpoint.DecRef()
  1095  
  1096  				// The source address is one of our own, so we never should have gotten
  1097  				// a packet like this unless HandleLocal is false or our NIC is the
  1098  				// loopback interface.
  1099  				stats.InvalidSourceAddressesReceived.Increment()
  1100  				return
  1101  			}
  1102  		}
  1103  
  1104  		// Loopback traffic skips the prerouting chain.
  1105  		inNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
  1106  		if ok := e.protocol.stack.IPTables().CheckPrerouting(pkt, e, inNicName); !ok {
  1107  			// iptables is telling us to drop the packet.
  1108  			stats.IPTablesPreroutingDropped.Increment()
  1109  			return
  1110  		}
  1111  	}
  1112  
  1113  	e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */)
  1114  }
  1115  
  1116  // handleLocalPacket is like HandlePacket except it does not perform the
  1117  // prerouting iptables hook or check for loopback traffic that originated from
  1118  // outside of the netstack (i.e. martian loopback packets).
  1119  func (e *endpoint) handleLocalPacket(pkt *stack.PacketBuffer, canSkipRXChecksum bool) {
  1120  	stats := e.stats.ip
  1121  	stats.PacketsReceived.Increment()
  1122  
  1123  	pkt = pkt.CloneToInbound()
  1124  	defer pkt.DecRef()
  1125  	pkt.RXTransportChecksumValidated = canSkipRXChecksum
  1126  
  1127  	h, ok := e.protocol.parseAndValidate(pkt)
  1128  	if !ok {
  1129  		stats.MalformedPacketsReceived.Increment()
  1130  		return
  1131  	}
  1132  
  1133  	e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */)
  1134  }
  1135  
  1136  func (e *endpoint) handleValidatedPacket(h header.IPv6, pkt *stack.PacketBuffer, inNICName string) {
  1137  	pkt.NICID = e.nic.ID()
  1138  
  1139  	// Raw socket packets are delivered based solely on the transport protocol
  1140  	// number. We only require that the packet be valid IPv6.
  1141  	e.dispatcher.DeliverRawPacket(h.TransportProtocol(), pkt)
  1142  
  1143  	stats := e.stats.ip
  1144  	stats.ValidPacketsReceived.Increment()
  1145  
  1146  	srcAddr := h.SourceAddress()
  1147  	dstAddr := h.DestinationAddress()
  1148  
  1149  	// As per RFC 4291 section 2.7:
  1150  	//   Multicast addresses must not be used as source addresses in IPv6
  1151  	//   packets or appear in any Routing header.
  1152  	if header.IsV6MulticastAddress(srcAddr) {
  1153  		stats.InvalidSourceAddressesReceived.Increment()
  1154  		return
  1155  	}
  1156  
  1157  	// The destination address should be an address we own or a group we joined
  1158  	// for us to receive the packet. Otherwise, attempt to forward the packet.
  1159  	if addressEndpoint := e.AcquireAssignedAddress(dstAddr, e.nic.Promiscuous(), stack.CanBePrimaryEndpoint); addressEndpoint != nil {
  1160  		addressEndpoint.DecRef()
  1161  	} else if !e.IsInGroup(dstAddr) {
  1162  		if !e.Forwarding() {
  1163  			stats.InvalidDestinationAddressesReceived.Increment()
  1164  			return
  1165  		}
  1166  		switch err := e.forwardPacket(pkt); err.(type) {
  1167  		case nil:
  1168  			return
  1169  		case *ip.ErrLinkLocalSourceAddress:
  1170  			e.stats.ip.Forwarding.LinkLocalSource.Increment()
  1171  		case *ip.ErrLinkLocalDestinationAddress:
  1172  			e.stats.ip.Forwarding.LinkLocalDestination.Increment()
  1173  		case *ip.ErrTTLExceeded:
  1174  			e.stats.ip.Forwarding.ExhaustedTTL.Increment()
  1175  		case *ip.ErrNoRoute:
  1176  			e.stats.ip.Forwarding.Unrouteable.Increment()
  1177  		case *ip.ErrParameterProblem:
  1178  			e.stats.ip.Forwarding.ExtensionHeaderProblem.Increment()
  1179  		case *ip.ErrMessageTooLong:
  1180  			e.stats.ip.Forwarding.PacketTooBig.Increment()
  1181  		default:
  1182  			panic(fmt.Sprintf("unexpected error %s while trying to forward packet: %#v", err, pkt))
  1183  		}
  1184  		e.stats.ip.Forwarding.Errors.Increment()
  1185  		return
  1186  	}
  1187  
  1188  	// iptables filtering. All packets that reach here are intended for
  1189  	// this machine and need not be forwarded.
  1190  	if ok := e.protocol.stack.IPTables().CheckInput(pkt, inNICName); !ok {
  1191  		// iptables is telling us to drop the packet.
  1192  		stats.IPTablesInputDropped.Increment()
  1193  		return
  1194  	}
  1195  
  1196  	// Any returned error is only useful for terminating execution early, but
  1197  	// we have nothing left to do, so we can drop it.
  1198  	_ = e.processExtensionHeaders(h, pkt, false /* forwarding */)
  1199  }
  1200  
  1201  // processExtensionHeaders processes the extension headers in the given packet.
  1202  // Returns an error if the processing of a header failed or if the packet should
  1203  // be discarded.
  1204  func (e *endpoint) processExtensionHeaders(h header.IPv6, pkt *stack.PacketBuffer, forwarding bool) error {
  1205  	stats := e.stats.ip
  1206  	srcAddr := h.SourceAddress()
  1207  	dstAddr := h.DestinationAddress()
  1208  
  1209  	// Create a VV to parse the packet. We don't plan to modify anything here.
  1210  	// vv consists of:
  1211  	// - Any IPv6 header bytes after the first 40 (i.e. extensions).
  1212  	// - The transport header, if present.
  1213  	// - Any other payload data.
  1214  	vv := pkt.NetworkHeader().View()[header.IPv6MinimumSize:].ToVectorisedView()
  1215  	vv.AppendView(pkt.TransportHeader().View())
  1216  	vv.AppendViews(pkt.Data().Views())
  1217  	it := header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(h.NextHeader()), vv)
  1218  
  1219  	var (
  1220  		hasFragmentHeader bool
  1221  		routerAlert       *header.IPv6RouterAlertOption
  1222  	)
  1223  
  1224  	for {
  1225  		// Keep track of the start of the previous header so we can report the
  1226  		// special case of a Hop by Hop at a location other than at the start.
  1227  		previousHeaderStart := it.HeaderOffset()
  1228  		extHdr, done, err := it.Next()
  1229  		if err != nil {
  1230  			stats.MalformedPacketsReceived.Increment()
  1231  			return err
  1232  		}
  1233  		if done {
  1234  			break
  1235  		}
  1236  
  1237  		// As per RFC 8200, section 4:
  1238  		//
  1239  		//   Extension headers (except for the Hop-by-Hop Options header) are
  1240  		//   not processed, inserted, or deleted by any node along a packet's
  1241  		//   delivery path until the packet reaches the node identified in the
  1242  		//   Destination Address field of the IPv6 header.
  1243  		//
  1244  		// Furthermore, as per RFC 8200 section 4.1, the Hop By Hop extension
  1245  		// header is restricted to appear first in the list of extension headers.
  1246  		//
  1247  		// Therefore, we can immediately return once we hit any header other
  1248  		// than the Hop-by-Hop header while forwarding a packet.
  1249  		if forwarding {
  1250  			if _, ok := extHdr.(header.IPv6HopByHopOptionsExtHdr); !ok {
  1251  				return nil
  1252  			}
  1253  		}
  1254  
  1255  		switch extHdr := extHdr.(type) {
  1256  		case header.IPv6HopByHopOptionsExtHdr:
  1257  			// As per RFC 8200 section 4.1, the Hop By Hop extension header is
  1258  			// restricted to appear immediately after an IPv6 fixed header.
  1259  			if previousHeaderStart != 0 {
  1260  				_ = e.protocol.returnError(&icmpReasonParameterProblem{
  1261  					code:       header.ICMPv6UnknownHeader,
  1262  					pointer:    previousHeaderStart,
  1263  					forwarding: forwarding,
  1264  				}, pkt)
  1265  				return fmt.Errorf("found Hop-by-Hop header = %#v with non-zero previous header offset = %d", extHdr, previousHeaderStart)
  1266  			}
  1267  
  1268  			optsIt := extHdr.Iter()
  1269  
  1270  			for {
  1271  				opt, done, err := optsIt.Next()
  1272  				if err != nil {
  1273  					stats.MalformedPacketsReceived.Increment()
  1274  					return err
  1275  				}
  1276  				if done {
  1277  					break
  1278  				}
  1279  
  1280  				switch opt := opt.(type) {
  1281  				case *header.IPv6RouterAlertOption:
  1282  					if routerAlert != nil {
  1283  						// As per RFC 2711 section 3, there should be at most one Router
  1284  						// Alert option per packet.
  1285  						//
  1286  						//    There MUST only be one option of this type, regardless of
  1287  						//    value, per Hop-by-Hop header.
  1288  						stats.MalformedPacketsReceived.Increment()
  1289  						return fmt.Errorf("found multiple Router Alert options (%#v, %#v)", opt, routerAlert)
  1290  					}
  1291  					routerAlert = opt
  1292  					stats.OptionRouterAlertReceived.Increment()
  1293  				default:
  1294  					switch opt.UnknownAction() {
  1295  					case header.IPv6OptionUnknownActionSkip:
  1296  					case header.IPv6OptionUnknownActionDiscard:
  1297  						return fmt.Errorf("found unknown Hop-by-Hop header option = %#v with discard action", opt)
  1298  					case header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest:
  1299  						if header.IsV6MulticastAddress(dstAddr) {
  1300  							return fmt.Errorf("found unknown hop-by-hop header option = %#v with discard action", opt)
  1301  						}
  1302  						fallthrough
  1303  					case header.IPv6OptionUnknownActionDiscardSendICMP:
  1304  						// This case satisfies a requirement of RFC 8200 section 4.2 which
  1305  						// states that an unknown option starting with bits [10] should:
  1306  						//
  1307  						//    discard the packet and, regardless of whether or not the
  1308  						//    packet's Destination Address was a multicast address, send an
  1309  						//    ICMP Parameter Problem, Code 2, message to the packet's
  1310  						//    Source Address, pointing to the unrecognized Option Type.
  1311  						_ = e.protocol.returnError(&icmpReasonParameterProblem{
  1312  							code:               header.ICMPv6UnknownOption,
  1313  							pointer:            it.ParseOffset() + optsIt.OptionOffset(),
  1314  							respondToMulticast: true,
  1315  							forwarding:         forwarding,
  1316  						}, pkt)
  1317  						return fmt.Errorf("found unknown hop-by-hop header option = %#v with discard action", opt)
  1318  					default:
  1319  						panic(fmt.Sprintf("unrecognized action for an unrecognized Hop By Hop extension header option = %#v", opt))
  1320  					}
  1321  				}
  1322  			}
  1323  
  1324  		case header.IPv6RoutingExtHdr:
  1325  			// As per RFC 8200 section 4.4, if a node encounters a routing header with
  1326  			// an unrecognized routing type value, with a non-zero Segments Left
  1327  			// value, the node must discard the packet and send an ICMP Parameter
  1328  			// Problem, Code 0 to the packet's Source Address, pointing to the
  1329  			// unrecognized Routing Type.
  1330  			//
  1331  			// If the Segments Left is 0, the node must ignore the Routing extension
  1332  			// header and process the next header in the packet.
  1333  			//
  1334  			// Note, the stack does not yet handle any type of routing extension
  1335  			// header, so we just make sure Segments Left is zero before processing
  1336  			// the next extension header.
  1337  			if extHdr.SegmentsLeft() != 0 {
  1338  				_ = e.protocol.returnError(&icmpReasonParameterProblem{
  1339  					code:    header.ICMPv6ErroneousHeader,
  1340  					pointer: it.ParseOffset(),
  1341  					// For the sake of consistency, we're using the value of `forwarding`
  1342  					// here, even though it should always be false if we've reached this
  1343  					// point. If `forwarding` is true here, we're executing undefined
  1344  					// behavior no matter what.
  1345  					forwarding: forwarding,
  1346  				}, pkt)
  1347  				return fmt.Errorf("found unrecognized routing type with non-zero segments left in header = %#v", extHdr)
  1348  			}
  1349  
  1350  		case header.IPv6FragmentExtHdr:
  1351  			hasFragmentHeader = true
  1352  
  1353  			if extHdr.IsAtomic() {
  1354  				// This fragment extension header indicates that this packet is an
  1355  				// atomic fragment. An atomic fragment is a fragment that contains
  1356  				// all the data required to reassemble a full packet. As per RFC 6946,
  1357  				// atomic fragments must not interfere with "normal" fragmented traffic
  1358  				// so we skip processing the fragment instead of feeding it through the
  1359  				// reassembly process below.
  1360  				continue
  1361  			}
  1362  
  1363  			fragmentFieldOffset := it.ParseOffset()
  1364  
  1365  			// Don't consume the iterator if we have the first fragment because we
  1366  			// will use it to validate that the first fragment holds the upper layer
  1367  			// header.
  1368  			rawPayload := it.AsRawHeader(extHdr.FragmentOffset() != 0 /* consume */)
  1369  
  1370  			if extHdr.FragmentOffset() == 0 {
  1371  				// Check that the iterator ends with a raw payload as the first fragment
  1372  				// should include all headers up to and including any upper layer
  1373  				// headers, as per RFC 8200 section 4.5; only upper layer data
  1374  				// (non-headers) should follow the fragment extension header.
  1375  				var lastHdr header.IPv6PayloadHeader
  1376  
  1377  				for {
  1378  					it, done, err := it.Next()
  1379  					if err != nil {
  1380  						stats.MalformedPacketsReceived.Increment()
  1381  						stats.MalformedFragmentsReceived.Increment()
  1382  						return err
  1383  					}
  1384  					if done {
  1385  						break
  1386  					}
  1387  
  1388  					lastHdr = it
  1389  				}
  1390  
  1391  				// If the last header is a raw header, then the last portion of the IPv6
  1392  				// payload is not a known IPv6 extension header. Note, this does not
  1393  				// mean that the last portion is an upper layer header or not an
  1394  				// extension header because:
  1395  				//  1) we do not yet support all extension headers
  1396  				//  2) we do not validate the upper layer header before reassembling.
  1397  				//
  1398  				// This check makes sure that a known IPv6 extension header is not
  1399  				// present after the Fragment extension header in a non-initial
  1400  				// fragment.
  1401  				//
  1402  				// TODO(#2196): Support IPv6 Authentication and Encapsulated
  1403  				// Security Payload extension headers.
  1404  				// TODO(#2333): Validate that the upper layer header is valid.
  1405  				switch lastHdr.(type) {
  1406  				case header.IPv6RawPayloadHeader:
  1407  				default:
  1408  					stats.MalformedPacketsReceived.Increment()
  1409  					stats.MalformedFragmentsReceived.Increment()
  1410  					return fmt.Errorf("known extension header = %#v present after fragment header in a non-initial fragment", lastHdr)
  1411  				}
  1412  			}
  1413  
  1414  			fragmentPayloadLen := rawPayload.Buf.Size()
  1415  			if fragmentPayloadLen == 0 {
  1416  				// Drop the packet as it's marked as a fragment but has no payload.
  1417  				stats.MalformedPacketsReceived.Increment()
  1418  				stats.MalformedFragmentsReceived.Increment()
  1419  				return fmt.Errorf("fragment has no payload")
  1420  			}
  1421  
  1422  			// As per RFC 2460 Section 4.5:
  1423  			//
  1424  			//    If the length of a fragment, as derived from the fragment packet's
  1425  			//    Payload Length field, is not a multiple of 8 octets and the M flag
  1426  			//    of that fragment is 1, then that fragment must be discarded and an
  1427  			//    ICMP Parameter Problem, Code 0, message should be sent to the source
  1428  			//    of the fragment, pointing to the Payload Length field of the
  1429  			//    fragment packet.
  1430  			if extHdr.More() && fragmentPayloadLen%header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit != 0 {
  1431  				stats.MalformedPacketsReceived.Increment()
  1432  				stats.MalformedFragmentsReceived.Increment()
  1433  				_ = e.protocol.returnError(&icmpReasonParameterProblem{
  1434  					code:    header.ICMPv6ErroneousHeader,
  1435  					pointer: header.IPv6PayloadLenOffset,
  1436  				}, pkt)
  1437  				return fmt.Errorf("found fragment length = %d that is not a multiple of 8 octets", fragmentPayloadLen)
  1438  			}
  1439  
  1440  			// The packet is a fragment, let's try to reassemble it.
  1441  			start := extHdr.FragmentOffset() * header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit
  1442  
  1443  			// As per RFC 2460 Section 4.5:
  1444  			//
  1445  			//    If the length and offset of a fragment are such that the Payload
  1446  			//    Length of the packet reassembled from that fragment would exceed
  1447  			//    65,535 octets, then that fragment must be discarded and an ICMP
  1448  			//    Parameter Problem, Code 0, message should be sent to the source of
  1449  			//    the fragment, pointing to the Fragment Offset field of the fragment
  1450  			//    packet.
  1451  			lengthAfterReassembly := int(start) + fragmentPayloadLen
  1452  			if lengthAfterReassembly > header.IPv6MaximumPayloadSize {
  1453  				stats.MalformedPacketsReceived.Increment()
  1454  				stats.MalformedFragmentsReceived.Increment()
  1455  				_ = e.protocol.returnError(&icmpReasonParameterProblem{
  1456  					code:    header.ICMPv6ErroneousHeader,
  1457  					pointer: fragmentFieldOffset,
  1458  				}, pkt)
  1459  				return fmt.Errorf("determined that reassembled packet length = %d would exceed allowed length = %d", lengthAfterReassembly, header.IPv6MaximumPayloadSize)
  1460  			}
  1461  
  1462  			// Note that pkt doesn't have its transport header set after reassembly,
  1463  			// and won't until DeliverNetworkPacket sets it.
  1464  			resPkt, proto, ready, err := e.protocol.fragmentation.Process(
  1465  				// IPv6 ignores the Protocol field since the ID only needs to be unique
  1466  				// across source-destination pairs, as per RFC 8200 section 4.5.
  1467  				fragmentation.FragmentID{
  1468  					Source:      srcAddr,
  1469  					Destination: dstAddr,
  1470  					ID:          extHdr.ID(),
  1471  				},
  1472  				start,
  1473  				start+uint16(fragmentPayloadLen)-1,
  1474  				extHdr.More(),
  1475  				uint8(rawPayload.Identifier),
  1476  				pkt,
  1477  			)
  1478  			if err != nil {
  1479  				stats.MalformedPacketsReceived.Increment()
  1480  				stats.MalformedFragmentsReceived.Increment()
  1481  				return err
  1482  			}
  1483  
  1484  			if ready {
  1485  				pkt = resPkt
  1486  
  1487  				// We create a new iterator with the reassembled packet because we could
  1488  				// have more extension headers in the reassembled payload, as per RFC
  1489  				// 8200 section 4.5. We also use the NextHeader value from the first
  1490  				// fragment.
  1491  				data := pkt.Data()
  1492  				dataVV := buffer.NewVectorisedView(data.Size(), data.Views())
  1493  				it = header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(proto), dataVV)
  1494  			}
  1495  
  1496  		case header.IPv6DestinationOptionsExtHdr:
  1497  			optsIt := extHdr.Iter()
  1498  
  1499  			for {
  1500  				opt, done, err := optsIt.Next()
  1501  				if err != nil {
  1502  					stats.MalformedPacketsReceived.Increment()
  1503  					return err
  1504  				}
  1505  				if done {
  1506  					break
  1507  				}
  1508  
  1509  				// We currently do not support any IPv6 Destination extension header
  1510  				// options.
  1511  				switch opt.UnknownAction() {
  1512  				case header.IPv6OptionUnknownActionSkip:
  1513  				case header.IPv6OptionUnknownActionDiscard:
  1514  					return fmt.Errorf("found unknown destination header option = %#v with discard action", opt)
  1515  				case header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest:
  1516  					if header.IsV6MulticastAddress(dstAddr) {
  1517  						return fmt.Errorf("found unknown destination header option %#v with discard action", opt)
  1518  					}
  1519  					fallthrough
  1520  				case header.IPv6OptionUnknownActionDiscardSendICMP:
  1521  					// This case satisfies a requirement of RFC 8200 section 4.2
  1522  					// which states that an unknown option starting with bits [10] should:
  1523  					//
  1524  					//    discard the packet and, regardless of whether or not the
  1525  					//    packet's Destination Address was a multicast address, send an
  1526  					//    ICMP Parameter Problem, Code 2, message to the packet's
  1527  					//    Source Address, pointing to the unrecognized Option Type.
  1528  					//
  1529  					_ = e.protocol.returnError(&icmpReasonParameterProblem{
  1530  						code:               header.ICMPv6UnknownOption,
  1531  						pointer:            it.ParseOffset() + optsIt.OptionOffset(),
  1532  						respondToMulticast: true,
  1533  					}, pkt)
  1534  					return fmt.Errorf("found unknown destination header option %#v with discard action", opt)
  1535  				default:
  1536  					panic(fmt.Sprintf("unrecognized action for an unrecognized Destination extension header option = %#v", opt))
  1537  				}
  1538  			}
  1539  
  1540  		case header.IPv6RawPayloadHeader:
  1541  			// If the last header in the payload isn't a known IPv6 extension header,
  1542  			// handle it as if it is transport layer data.
  1543  
  1544  			// Calculate the number of octets parsed from data. We want to consume all
  1545  			// the data except the unparsed portion located at the end, whose size is
  1546  			// extHdr.Buf.Size().
  1547  			trim := pkt.Data().Size() - extHdr.Buf.Size()
  1548  
  1549  			// For unfragmented packets, extHdr still contains the transport header.
  1550  			// Consume that too.
  1551  			//
  1552  			// For reassembled fragments, pkt.TransportHeader is unset, so this is a
  1553  			// no-op and pkt.Data begins with the transport header.
  1554  			trim += pkt.TransportHeader().View().Size()
  1555  
  1556  			if _, ok := pkt.Data().Consume(trim); !ok {
  1557  				stats.MalformedPacketsReceived.Increment()
  1558  				return fmt.Errorf("could not consume %d bytes", trim)
  1559  			}
  1560  
  1561  			proto := tcpip.TransportProtocolNumber(extHdr.Identifier)
  1562  			// If the packet was reassembled from a fragment, it will not have a
  1563  			// transport header set yet.
  1564  			if pkt.TransportHeader().View().IsEmpty() {
  1565  				e.protocol.parseTransport(pkt, proto)
  1566  			}
  1567  
  1568  			stats.PacketsDelivered.Increment()
  1569  			if proto == header.ICMPv6ProtocolNumber {
  1570  				e.handleICMP(pkt, hasFragmentHeader, routerAlert)
  1571  			} else {
  1572  				stats.PacketsDelivered.Increment()
  1573  				switch res := e.dispatcher.DeliverTransportPacket(proto, pkt); res {
  1574  				case stack.TransportPacketHandled:
  1575  				case stack.TransportPacketDestinationPortUnreachable:
  1576  					// As per RFC 4443 section 3.1:
  1577  					//   A destination node SHOULD originate a Destination Unreachable
  1578  					//   message with Code 4 in response to a packet for which the
  1579  					//   transport protocol (e.g., UDP) has no listener, if that transport
  1580  					//   protocol has no alternative means to inform the sender.
  1581  					_ = e.protocol.returnError(&icmpReasonPortUnreachable{}, pkt)
  1582  					return fmt.Errorf("destination port unreachable")
  1583  				case stack.TransportPacketProtocolUnreachable:
  1584  					// As per RFC 8200 section 4. (page 7):
  1585  					//   Extension headers are numbered from IANA IP Protocol Numbers
  1586  					//   [IANA-PN], the same values used for IPv4 and IPv6.  When
  1587  					//   processing a sequence of Next Header values in a packet, the
  1588  					//   first one that is not an extension header [IANA-EH] indicates
  1589  					//   that the next item in the packet is the corresponding upper-layer
  1590  					//   header.
  1591  					// With more related information on page 8:
  1592  					//   If, as a result of processing a header, the destination node is
  1593  					//   required to proceed to the next header but the Next Header value
  1594  					//   in the current header is unrecognized by the node, it should
  1595  					//   discard the packet and send an ICMP Parameter Problem message to
  1596  					//   the source of the packet, with an ICMP Code value of 1
  1597  					//   ("unrecognized Next Header type encountered") and the ICMP
  1598  					//   Pointer field containing the offset of the unrecognized value
  1599  					//   within the original packet.
  1600  					//
  1601  					// Which when taken together indicate that an unknown protocol should
  1602  					// be treated as an unrecognized next header value.
  1603  					// The location of the Next Header field is in a different place in
  1604  					// the initial IPv6 header than it is in the extension headers so
  1605  					// treat it specially.
  1606  					prevHdrIDOffset := uint32(header.IPv6NextHeaderOffset)
  1607  					if previousHeaderStart != 0 {
  1608  						prevHdrIDOffset = previousHeaderStart
  1609  					}
  1610  					_ = e.protocol.returnError(&icmpReasonParameterProblem{
  1611  						code:    header.ICMPv6UnknownHeader,
  1612  						pointer: prevHdrIDOffset,
  1613  					}, pkt)
  1614  					return fmt.Errorf("transport protocol unreachable")
  1615  				default:
  1616  					panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res))
  1617  				}
  1618  			}
  1619  
  1620  		default:
  1621  			// Since the iterator returns IPv6RawPayloadHeader for unknown Extension
  1622  			// Header IDs this should never happen unless we missed a supported type
  1623  			// here.
  1624  			panic(fmt.Sprintf("unrecognized type from it.Next() = %T", extHdr))
  1625  
  1626  		}
  1627  	}
  1628  	return nil
  1629  }
  1630  
  1631  // Close cleans up resources associated with the endpoint.
  1632  func (e *endpoint) Close() {
  1633  	e.mu.Lock()
  1634  	e.disableLocked()
  1635  	e.mu.addressableEndpointState.Cleanup()
  1636  	e.mu.Unlock()
  1637  
  1638  	e.protocol.forgetEndpoint(e.nic.ID())
  1639  }
  1640  
  1641  // NetworkProtocolNumber implements stack.NetworkEndpoint.
  1642  func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber {
  1643  	return e.protocol.Number()
  1644  }
  1645  
  1646  // AddAndAcquirePermanentAddress implements stack.AddressableEndpoint.
  1647  func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, properties stack.AddressProperties) (stack.AddressEndpoint, tcpip.Error) {
  1648  	// TODO(b/169350103): add checks here after making sure we no longer receive
  1649  	// an empty address.
  1650  	e.mu.Lock()
  1651  	defer e.mu.Unlock()
  1652  	return e.addAndAcquirePermanentAddressLocked(addr, properties)
  1653  }
  1654  
  1655  // addAndAcquirePermanentAddressLocked is like AddAndAcquirePermanentAddress but
  1656  // with locking requirements.
  1657  //
  1658  // addAndAcquirePermanentAddressLocked also joins the passed address's
  1659  // solicited-node multicast group and start duplicate address detection.
  1660  //
  1661  // Precondition: e.mu must be write locked.
  1662  func (e *endpoint) addAndAcquirePermanentAddressLocked(addr tcpip.AddressWithPrefix, properties stack.AddressProperties) (stack.AddressEndpoint, tcpip.Error) {
  1663  	addressEndpoint, err := e.mu.addressableEndpointState.AddAndAcquirePermanentAddress(addr, properties)
  1664  	if err != nil {
  1665  		return nil, err
  1666  	}
  1667  
  1668  	if !header.IsV6UnicastAddress(addr.Address) {
  1669  		return addressEndpoint, nil
  1670  	}
  1671  
  1672  	addressEndpoint.SetKind(stack.PermanentTentative)
  1673  
  1674  	if e.Enabled() {
  1675  		if err := e.mu.ndp.startDuplicateAddressDetection(addr.Address, addressEndpoint); err != nil {
  1676  			return nil, err
  1677  		}
  1678  	}
  1679  
  1680  	snmc := header.SolicitedNodeAddr(addr.Address)
  1681  	if err := e.joinGroupLocked(snmc); err != nil {
  1682  		// joinGroupLocked only returns an error if the group address is not a valid
  1683  		// IPv6 multicast address.
  1684  		panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", snmc, err))
  1685  	}
  1686  
  1687  	return addressEndpoint, nil
  1688  }
  1689  
  1690  // RemovePermanentAddress implements stack.AddressableEndpoint.
  1691  func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) tcpip.Error {
  1692  	e.mu.Lock()
  1693  	defer e.mu.Unlock()
  1694  
  1695  	addressEndpoint := e.getAddressRLocked(addr)
  1696  	if addressEndpoint == nil || !addressEndpoint.GetKind().IsPermanent() {
  1697  		return &tcpip.ErrBadLocalAddress{}
  1698  	}
  1699  
  1700  	return e.removePermanentEndpointLocked(addressEndpoint, true /* allowSLAACInvalidation */, &stack.DADAborted{})
  1701  }
  1702  
  1703  // removePermanentEndpointLocked is like removePermanentAddressLocked except
  1704  // it works with a stack.AddressEndpoint.
  1705  //
  1706  // Precondition: e.mu must be write locked.
  1707  func (e *endpoint) removePermanentEndpointLocked(addressEndpoint stack.AddressEndpoint, allowSLAACInvalidation bool, dadResult stack.DADResult) tcpip.Error {
  1708  	addr := addressEndpoint.AddressWithPrefix()
  1709  	// If we are removing an address generated via SLAAC, cleanup
  1710  	// its SLAAC resources and notify the integrator.
  1711  	switch addressEndpoint.ConfigType() {
  1712  	case stack.AddressConfigSlaac:
  1713  		e.mu.ndp.cleanupSLAACAddrResourcesAndNotify(addr, allowSLAACInvalidation)
  1714  	case stack.AddressConfigSlaacTemp:
  1715  		e.mu.ndp.cleanupTempSLAACAddrResourcesAndNotify(addr)
  1716  	}
  1717  
  1718  	return e.removePermanentEndpointInnerLocked(addressEndpoint, dadResult)
  1719  }
  1720  
  1721  // removePermanentEndpointInnerLocked is like removePermanentEndpointLocked
  1722  // except it does not cleanup SLAAC address state.
  1723  //
  1724  // Precondition: e.mu must be write locked.
  1725  func (e *endpoint) removePermanentEndpointInnerLocked(addressEndpoint stack.AddressEndpoint, dadResult stack.DADResult) tcpip.Error {
  1726  	addr := addressEndpoint.AddressWithPrefix()
  1727  	e.mu.ndp.stopDuplicateAddressDetection(addr.Address, dadResult)
  1728  
  1729  	if err := e.mu.addressableEndpointState.RemovePermanentEndpoint(addressEndpoint); err != nil {
  1730  		return err
  1731  	}
  1732  
  1733  	snmc := header.SolicitedNodeAddr(addr.Address)
  1734  	err := e.leaveGroupLocked(snmc)
  1735  	// The endpoint may have already left the multicast group.
  1736  	if _, ok := err.(*tcpip.ErrBadLocalAddress); ok {
  1737  		err = nil
  1738  	}
  1739  	return err
  1740  }
  1741  
  1742  // hasPermanentAddressLocked returns true if the endpoint has a permanent
  1743  // address equal to the passed address.
  1744  //
  1745  // Precondition: e.mu must be read or write locked.
  1746  func (e *endpoint) hasPermanentAddressRLocked(addr tcpip.Address) bool {
  1747  	addressEndpoint := e.getAddressRLocked(addr)
  1748  	if addressEndpoint == nil {
  1749  		return false
  1750  	}
  1751  	return addressEndpoint.GetKind().IsPermanent()
  1752  }
  1753  
  1754  // getAddressRLocked returns the endpoint for the passed address.
  1755  //
  1756  // Precondition: e.mu must be read or write locked.
  1757  func (e *endpoint) getAddressRLocked(localAddr tcpip.Address) stack.AddressEndpoint {
  1758  	return e.mu.addressableEndpointState.GetAddress(localAddr)
  1759  }
  1760  
  1761  // MainAddress implements stack.AddressableEndpoint.
  1762  func (e *endpoint) MainAddress() tcpip.AddressWithPrefix {
  1763  	e.mu.RLock()
  1764  	defer e.mu.RUnlock()
  1765  	return e.mu.addressableEndpointState.MainAddress()
  1766  }
  1767  
  1768  // AcquireAssignedAddress implements stack.AddressableEndpoint.
  1769  func (e *endpoint) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint {
  1770  	e.mu.RLock()
  1771  	defer e.mu.RUnlock()
  1772  	return e.acquireAddressOrCreateTempLocked(localAddr, allowTemp, tempPEB)
  1773  }
  1774  
  1775  // acquireAddressOrCreateTempLocked is like AcquireAssignedAddress but with
  1776  // locking requirements.
  1777  //
  1778  // Precondition: e.mu must be write locked.
  1779  func (e *endpoint) acquireAddressOrCreateTempLocked(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint {
  1780  	return e.mu.addressableEndpointState.AcquireAssignedAddress(localAddr, allowTemp, tempPEB)
  1781  }
  1782  
  1783  // AcquireOutgoingPrimaryAddress implements stack.AddressableEndpoint.
  1784  func (e *endpoint) AcquireOutgoingPrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint {
  1785  	e.mu.RLock()
  1786  	defer e.mu.RUnlock()
  1787  	return e.acquireOutgoingPrimaryAddressRLocked(remoteAddr, allowExpired)
  1788  }
  1789  
  1790  // getLinkLocalAddressRLocked returns a link-local address from the primary list
  1791  // of addresses, if one is available.
  1792  //
  1793  // See stack.PrimaryEndpointBehavior for more details about the primary list.
  1794  //
  1795  // Precondition: e.mu must be read locked.
  1796  func (e *endpoint) getLinkLocalAddressRLocked() tcpip.Address {
  1797  	var linkLocalAddr tcpip.Address
  1798  	e.mu.addressableEndpointState.ForEachPrimaryEndpoint(func(addressEndpoint stack.AddressEndpoint) bool {
  1799  		if addressEndpoint.IsAssigned(false /* allowExpired */) {
  1800  			if addr := addressEndpoint.AddressWithPrefix().Address; header.IsV6LinkLocalUnicastAddress(addr) {
  1801  				linkLocalAddr = addr
  1802  				return false
  1803  			}
  1804  		}
  1805  		return true
  1806  	})
  1807  	return linkLocalAddr
  1808  }
  1809  
  1810  // acquireOutgoingPrimaryAddressRLocked is like AcquireOutgoingPrimaryAddress
  1811  // but with locking requirements.
  1812  //
  1813  // Precondition: e.mu must be read locked.
  1814  func (e *endpoint) acquireOutgoingPrimaryAddressRLocked(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint {
  1815  	// addrCandidate is a candidate for Source Address Selection, as per
  1816  	// RFC 6724 section 5.
  1817  	type addrCandidate struct {
  1818  		addressEndpoint stack.AddressEndpoint
  1819  		addr            tcpip.Address
  1820  		scope           header.IPv6AddressScope
  1821  
  1822  		label          uint8
  1823  		matchingPrefix uint8
  1824  	}
  1825  
  1826  	if len(remoteAddr) == 0 {
  1827  		return e.mu.addressableEndpointState.AcquireOutgoingPrimaryAddress(remoteAddr, allowExpired)
  1828  	}
  1829  
  1830  	// Create a candidate set of available addresses we can potentially use as a
  1831  	// source address.
  1832  	var cs []addrCandidate
  1833  	e.mu.addressableEndpointState.ForEachPrimaryEndpoint(func(addressEndpoint stack.AddressEndpoint) bool {
  1834  		// If r is not valid for outgoing connections, it is not a valid endpoint.
  1835  		if !addressEndpoint.IsAssigned(allowExpired) {
  1836  			return true
  1837  		}
  1838  
  1839  		addr := addressEndpoint.AddressWithPrefix().Address
  1840  		scope, err := header.ScopeForIPv6Address(addr)
  1841  		if err != nil {
  1842  			// Should never happen as we got r from the primary IPv6 endpoint list and
  1843  			// ScopeForIPv6Address only returns an error if addr is not an IPv6
  1844  			// address.
  1845  			panic(fmt.Sprintf("header.ScopeForIPv6Address(%s): %s", addr, err))
  1846  		}
  1847  
  1848  		cs = append(cs, addrCandidate{
  1849  			addressEndpoint: addressEndpoint,
  1850  			addr:            addr,
  1851  			scope:           scope,
  1852  			label:           getLabel(addr),
  1853  			matchingPrefix:  remoteAddr.MatchingPrefix(addr),
  1854  		})
  1855  
  1856  		return true
  1857  	})
  1858  
  1859  	remoteScope, err := header.ScopeForIPv6Address(remoteAddr)
  1860  	if err != nil {
  1861  		// primaryIPv6Endpoint should never be called with an invalid IPv6 address.
  1862  		panic(fmt.Sprintf("header.ScopeForIPv6Address(%s): %s", remoteAddr, err))
  1863  	}
  1864  
  1865  	remoteLabel := getLabel(remoteAddr)
  1866  
  1867  	// Sort the addresses as per RFC 6724 section 5 rules 1-3.
  1868  	//
  1869  	// TODO(b/146021396): Implement rules 4, 5 of RFC 6724 section 5.
  1870  	sort.Slice(cs, func(i, j int) bool {
  1871  		sa := cs[i]
  1872  		sb := cs[j]
  1873  
  1874  		// Prefer same address as per RFC 6724 section 5 rule 1.
  1875  		if sa.addr == remoteAddr {
  1876  			return true
  1877  		}
  1878  		if sb.addr == remoteAddr {
  1879  			return false
  1880  		}
  1881  
  1882  		// Prefer appropriate scope as per RFC 6724 section 5 rule 2.
  1883  		if sa.scope < sb.scope {
  1884  			return sa.scope >= remoteScope
  1885  		} else if sb.scope < sa.scope {
  1886  			return sb.scope < remoteScope
  1887  		}
  1888  
  1889  		// Avoid deprecated addresses as per RFC 6724 section 5 rule 3.
  1890  		if saDep, sbDep := sa.addressEndpoint.Deprecated(), sb.addressEndpoint.Deprecated(); saDep != sbDep {
  1891  			// If sa is not deprecated, it is preferred over sb.
  1892  			return sbDep
  1893  		}
  1894  
  1895  		// Prefer matching label as per RFC 6724 section 5 rule 6.
  1896  		if sa, sb := sa.label == remoteLabel, sb.label == remoteLabel; sa != sb {
  1897  			if sa {
  1898  				return true
  1899  			}
  1900  			if sb {
  1901  				return false
  1902  			}
  1903  		}
  1904  
  1905  		// Prefer temporary addresses as per RFC 6724 section 5 rule 7.
  1906  		if saTemp, sbTemp := sa.addressEndpoint.ConfigType() == stack.AddressConfigSlaacTemp, sb.addressEndpoint.ConfigType() == stack.AddressConfigSlaacTemp; saTemp != sbTemp {
  1907  			return saTemp
  1908  		}
  1909  
  1910  		// Use longest matching prefix as per RFC 6724 section 5 rule 8.
  1911  		if sa.matchingPrefix > sb.matchingPrefix {
  1912  			return true
  1913  		}
  1914  		if sb.matchingPrefix > sa.matchingPrefix {
  1915  			return false
  1916  		}
  1917  
  1918  		// sa and sb are equal, return the endpoint that is closest to the front of
  1919  		// the primary endpoint list.
  1920  		return i < j
  1921  	})
  1922  
  1923  	// Return the most preferred address that can have its reference count
  1924  	// incremented.
  1925  	for _, c := range cs {
  1926  		if c.addressEndpoint.IncRef() {
  1927  			return c.addressEndpoint
  1928  		}
  1929  	}
  1930  
  1931  	return nil
  1932  }
  1933  
  1934  // PrimaryAddresses implements stack.AddressableEndpoint.
  1935  func (e *endpoint) PrimaryAddresses() []tcpip.AddressWithPrefix {
  1936  	e.mu.RLock()
  1937  	defer e.mu.RUnlock()
  1938  	return e.mu.addressableEndpointState.PrimaryAddresses()
  1939  }
  1940  
  1941  // PermanentAddresses implements stack.AddressableEndpoint.
  1942  func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix {
  1943  	e.mu.RLock()
  1944  	defer e.mu.RUnlock()
  1945  	return e.mu.addressableEndpointState.PermanentAddresses()
  1946  }
  1947  
  1948  // JoinGroup implements stack.GroupAddressableEndpoint.
  1949  func (e *endpoint) JoinGroup(addr tcpip.Address) tcpip.Error {
  1950  	e.mu.Lock()
  1951  	defer e.mu.Unlock()
  1952  	return e.joinGroupLocked(addr)
  1953  }
  1954  
  1955  // joinGroupLocked is like JoinGroup but with locking requirements.
  1956  //
  1957  // Precondition: e.mu must be locked.
  1958  func (e *endpoint) joinGroupLocked(addr tcpip.Address) tcpip.Error {
  1959  	if !header.IsV6MulticastAddress(addr) {
  1960  		return &tcpip.ErrBadAddress{}
  1961  	}
  1962  
  1963  	e.mu.mld.joinGroup(addr)
  1964  	return nil
  1965  }
  1966  
  1967  // LeaveGroup implements stack.GroupAddressableEndpoint.
  1968  func (e *endpoint) LeaveGroup(addr tcpip.Address) tcpip.Error {
  1969  	e.mu.Lock()
  1970  	defer e.mu.Unlock()
  1971  	return e.leaveGroupLocked(addr)
  1972  }
  1973  
  1974  // leaveGroupLocked is like LeaveGroup but with locking requirements.
  1975  //
  1976  // Precondition: e.mu must be locked.
  1977  func (e *endpoint) leaveGroupLocked(addr tcpip.Address) tcpip.Error {
  1978  	return e.mu.mld.leaveGroup(addr)
  1979  }
  1980  
  1981  // IsInGroup implements stack.GroupAddressableEndpoint.
  1982  func (e *endpoint) IsInGroup(addr tcpip.Address) bool {
  1983  	e.mu.RLock()
  1984  	defer e.mu.RUnlock()
  1985  	return e.mu.mld.isInGroup(addr)
  1986  }
  1987  
  1988  // Stats implements stack.NetworkEndpoint.
  1989  func (e *endpoint) Stats() stack.NetworkEndpointStats {
  1990  	return &e.stats.localStats
  1991  }
  1992  
  1993  var _ stack.NetworkProtocol = (*protocol)(nil)
  1994  var _ fragmentation.TimeoutHandler = (*protocol)(nil)
  1995  
  1996  type protocol struct {
  1997  	stack   *stack.Stack
  1998  	options Options
  1999  
  2000  	mu struct {
  2001  		sync.RWMutex
  2002  
  2003  		// eps is keyed by NICID to allow protocol methods to retrieve an endpoint
  2004  		// when handling a packet, by looking at which NIC handled the packet.
  2005  		eps map[tcpip.NICID]*endpoint
  2006  
  2007  		// ICMP types for which the stack's global rate limiting must apply.
  2008  		icmpRateLimitedTypes map[header.ICMPv6Type]struct{}
  2009  	}
  2010  
  2011  	ids    []uint32
  2012  	hashIV uint32
  2013  
  2014  	// defaultTTL is the current default TTL for the protocol. Only the
  2015  	// uint8 portion of it is meaningful.
  2016  	//
  2017  	// Must be accessed using atomic operations.
  2018  	defaultTTL uint32
  2019  
  2020  	fragmentation   *fragmentation.Fragmentation
  2021  	icmpRateLimiter *stack.ICMPRateLimiter
  2022  }
  2023  
  2024  // Number returns the ipv6 protocol number.
  2025  func (p *protocol) Number() tcpip.NetworkProtocolNumber {
  2026  	return ProtocolNumber
  2027  }
  2028  
  2029  // MinimumPacketSize returns the minimum valid ipv6 packet size.
  2030  func (p *protocol) MinimumPacketSize() int {
  2031  	return header.IPv6MinimumSize
  2032  }
  2033  
  2034  // ParseAddresses implements stack.NetworkProtocol.
  2035  func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) {
  2036  	h := header.IPv6(v)
  2037  	return h.SourceAddress(), h.DestinationAddress()
  2038  }
  2039  
  2040  // NewEndpoint creates a new ipv6 endpoint.
  2041  func (p *protocol) NewEndpoint(nic stack.NetworkInterface, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint {
  2042  	e := &endpoint{
  2043  		nic:        nic,
  2044  		dispatcher: dispatcher,
  2045  		protocol:   p,
  2046  	}
  2047  
  2048  	// NDP options must be 8 octet aligned and the first 2 bytes are used for
  2049  	// the type and length fields leaving 6 octets as the minimum size for a
  2050  	// nonce option without padding.
  2051  	const nonceSize = 6
  2052  
  2053  	// As per RFC 7527 section 4.1,
  2054  	//
  2055  	//   If any probe is looped back within RetransTimer milliseconds after
  2056  	//   having sent DupAddrDetectTransmits NS(DAD) messages, the interface
  2057  	//   continues with another MAX_MULTICAST_SOLICIT number of NS(DAD)
  2058  	//   messages transmitted RetransTimer milliseconds apart.
  2059  	//
  2060  	// Value taken from RFC 4861 section 10.
  2061  	const maxMulticastSolicit = 3
  2062  	dadOptions := ip.DADOptions{
  2063  		Clock:              p.stack.Clock(),
  2064  		SecureRNG:          p.stack.SecureRNG(),
  2065  		NonceSize:          nonceSize,
  2066  		ExtendDADTransmits: maxMulticastSolicit,
  2067  		Protocol:           &e.mu.ndp,
  2068  		NICID:              nic.ID(),
  2069  	}
  2070  
  2071  	e.mu.Lock()
  2072  	e.mu.addressableEndpointState.Init(e)
  2073  	e.mu.ndp.init(e, dadOptions)
  2074  	e.mu.mld.init(e)
  2075  	e.dad.mu.Lock()
  2076  	e.dad.mu.dad.Init(&e.dad.mu, p.options.DADConfigs, dadOptions)
  2077  	e.dad.mu.Unlock()
  2078  	e.mu.Unlock()
  2079  
  2080  	stackStats := p.stack.Stats()
  2081  	tcpip.InitStatCounters(reflect.ValueOf(&e.stats.localStats).Elem())
  2082  	e.stats.ip.Init(&e.stats.localStats.IP, &stackStats.IP)
  2083  	e.stats.icmp.init(&e.stats.localStats.ICMP, &stackStats.ICMP.V6)
  2084  
  2085  	p.mu.Lock()
  2086  	defer p.mu.Unlock()
  2087  	p.mu.eps[nic.ID()] = e
  2088  	return e
  2089  }
  2090  
  2091  func (p *protocol) findEndpointWithAddress(addr tcpip.Address) *endpoint {
  2092  	p.mu.RLock()
  2093  	defer p.mu.RUnlock()
  2094  
  2095  	for _, e := range p.mu.eps {
  2096  		if addressEndpoint := e.AcquireAssignedAddress(addr, false /* allowTemp */, stack.NeverPrimaryEndpoint); addressEndpoint != nil {
  2097  			addressEndpoint.DecRef()
  2098  			return e
  2099  		}
  2100  	}
  2101  
  2102  	return nil
  2103  }
  2104  
  2105  func (p *protocol) getEndpointForNIC(id tcpip.NICID) (*endpoint, bool) {
  2106  	p.mu.RLock()
  2107  	defer p.mu.RUnlock()
  2108  	ep, ok := p.mu.eps[id]
  2109  	return ep, ok
  2110  }
  2111  
  2112  func (p *protocol) forgetEndpoint(nicID tcpip.NICID) {
  2113  	p.mu.Lock()
  2114  	defer p.mu.Unlock()
  2115  	delete(p.mu.eps, nicID)
  2116  }
  2117  
  2118  // SetOption implements stack.NetworkProtocol.
  2119  func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error {
  2120  	switch v := option.(type) {
  2121  	case *tcpip.DefaultTTLOption:
  2122  		p.SetDefaultTTL(uint8(*v))
  2123  		return nil
  2124  	default:
  2125  		return &tcpip.ErrUnknownProtocolOption{}
  2126  	}
  2127  }
  2128  
  2129  // Option implements stack.NetworkProtocol.
  2130  func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error {
  2131  	switch v := option.(type) {
  2132  	case *tcpip.DefaultTTLOption:
  2133  		*v = tcpip.DefaultTTLOption(p.DefaultTTL())
  2134  		return nil
  2135  	default:
  2136  		return &tcpip.ErrUnknownProtocolOption{}
  2137  	}
  2138  }
  2139  
  2140  // SetDefaultTTL sets the default TTL for endpoints created with this protocol.
  2141  func (p *protocol) SetDefaultTTL(ttl uint8) {
  2142  	atomic.StoreUint32(&p.defaultTTL, uint32(ttl))
  2143  }
  2144  
  2145  // DefaultTTL returns the default TTL for endpoints created with this protocol.
  2146  func (p *protocol) DefaultTTL() uint8 {
  2147  	return uint8(atomic.LoadUint32(&p.defaultTTL))
  2148  }
  2149  
  2150  // Close implements stack.TransportProtocol.
  2151  func (*protocol) Close() {}
  2152  
  2153  // Wait implements stack.TransportProtocol.
  2154  func (*protocol) Wait() {}
  2155  
  2156  // parseAndValidate parses the packet (including its transport layer header) and
  2157  // returns the parsed IP header.
  2158  //
  2159  // Returns true if the IP header was successfully parsed.
  2160  func (p *protocol) parseAndValidate(pkt *stack.PacketBuffer) (header.IPv6, bool) {
  2161  	transProtoNum, hasTransportHdr, ok := p.Parse(pkt)
  2162  	if !ok {
  2163  		return nil, false
  2164  	}
  2165  
  2166  	h := header.IPv6(pkt.NetworkHeader().View())
  2167  	// Do not include the link header's size when calculating the size of the IP
  2168  	// packet.
  2169  	if !h.IsValid(pkt.Size() - pkt.LinkHeader().View().Size()) {
  2170  		return nil, false
  2171  	}
  2172  
  2173  	if hasTransportHdr {
  2174  		p.parseTransport(pkt, transProtoNum)
  2175  	}
  2176  
  2177  	return h, true
  2178  }
  2179  
  2180  func (p *protocol) parseTransport(pkt *stack.PacketBuffer, transProtoNum tcpip.TransportProtocolNumber) {
  2181  	if transProtoNum == header.ICMPv6ProtocolNumber {
  2182  		// The transport layer will handle transport layer parsing errors.
  2183  		_ = parse.ICMPv6(pkt)
  2184  		return
  2185  	}
  2186  
  2187  	switch err := p.stack.ParsePacketBufferTransport(transProtoNum, pkt); err {
  2188  	case stack.ParsedOK:
  2189  	case stack.UnknownTransportProtocol, stack.TransportLayerParseError:
  2190  		// The transport layer will handle unknown protocols and transport layer
  2191  		// parsing errors.
  2192  	default:
  2193  		panic(fmt.Sprintf("unexpected error parsing transport header = %d", err))
  2194  	}
  2195  }
  2196  
  2197  // Parse implements stack.NetworkProtocol.
  2198  func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool) {
  2199  	proto, _, fragOffset, fragMore, ok := parse.IPv6(pkt)
  2200  	if !ok {
  2201  		return 0, false, false
  2202  	}
  2203  
  2204  	return proto, !fragMore && fragOffset == 0, true
  2205  }
  2206  
  2207  // allowICMPReply reports whether an ICMP reply with provided type may
  2208  // be sent following the rate mask options and global ICMP rate limiter.
  2209  func (p *protocol) allowICMPReply(icmpType header.ICMPv6Type) bool {
  2210  	p.mu.RLock()
  2211  	defer p.mu.RUnlock()
  2212  
  2213  	if _, ok := p.mu.icmpRateLimitedTypes[icmpType]; ok {
  2214  		return p.stack.AllowICMPMessage()
  2215  	}
  2216  	return true
  2217  }
  2218  
  2219  // calculateNetworkMTU calculates the network-layer payload MTU based on the
  2220  // link-layer payload MTU and the length of every IPv6 header.
  2221  // Note that this is different than the Payload Length field of the IPv6 header,
  2222  // which includes the length of the extension headers.
  2223  func calculateNetworkMTU(linkMTU, networkHeadersLen uint32) (uint32, tcpip.Error) {
  2224  	if linkMTU < header.IPv6MinimumMTU {
  2225  		return 0, &tcpip.ErrInvalidEndpointState{}
  2226  	}
  2227  
  2228  	// As per RFC 7112 section 5, we should discard packets if their IPv6 header
  2229  	// is bigger than 1280 bytes (ie, the minimum link MTU) since we do not
  2230  	// support PMTU discovery:
  2231  	//   Hosts that do not discover the Path MTU MUST limit the IPv6 Header Chain
  2232  	//   length to 1280 bytes.  Limiting the IPv6 Header Chain length to 1280
  2233  	//   bytes ensures that the header chain length does not exceed the IPv6
  2234  	//   minimum MTU.
  2235  	if networkHeadersLen > header.IPv6MinimumMTU {
  2236  		return 0, &tcpip.ErrMalformedHeader{}
  2237  	}
  2238  
  2239  	networkMTU := linkMTU - networkHeadersLen
  2240  	if networkMTU > maxPayloadSize {
  2241  		networkMTU = maxPayloadSize
  2242  	}
  2243  	return networkMTU, nil
  2244  }
  2245  
  2246  // Options holds options to configure a new protocol.
  2247  type Options struct {
  2248  	// NDPConfigs is the default NDP configurations used by interfaces.
  2249  	NDPConfigs NDPConfigurations
  2250  
  2251  	// AutoGenLinkLocal determines whether or not the stack attempts to
  2252  	// auto-generate a link-local address for newly enabled non-loopback
  2253  	// NICs.
  2254  	//
  2255  	// Note, setting this to true does not mean that a link-local address is
  2256  	// assigned right away, or at all. If Duplicate Address Detection is enabled,
  2257  	// an address is only assigned if it successfully resolves. If it fails, no
  2258  	// further attempts are made to auto-generate a link-local address.
  2259  	//
  2260  	// The generated link-local address follows RFC 4291 Appendix A guidelines.
  2261  	AutoGenLinkLocal bool
  2262  
  2263  	// NDPDisp is the NDP event dispatcher that an integrator can provide to
  2264  	// receive NDP related events.
  2265  	NDPDisp NDPDispatcher
  2266  
  2267  	// OpaqueIIDOpts hold the options for generating opaque interface
  2268  	// identifiers (IIDs) as outlined by RFC 7217.
  2269  	OpaqueIIDOpts OpaqueInterfaceIdentifierOptions
  2270  
  2271  	// TempIIDSeed is used to seed the initial temporary interface identifier
  2272  	// history value used to generate IIDs for temporary SLAAC addresses.
  2273  	//
  2274  	// Temporary SLAAC addresses are short-lived addresses which are unpredictable
  2275  	// and random from the perspective of other nodes on the network. It is
  2276  	// recommended that the seed be a random byte buffer of at least
  2277  	// header.IIDSize bytes to make sure that temporary SLAAC addresses are
  2278  	// sufficiently random. It should follow minimum randomness requirements for
  2279  	// security as outlined by RFC 4086.
  2280  	//
  2281  	// Note: using a nil value, the same seed across netstack program runs, or a
  2282  	// seed that is too small would reduce randomness and increase predictability,
  2283  	// defeating the purpose of temporary SLAAC addresses.
  2284  	TempIIDSeed []byte
  2285  
  2286  	// MLD holds options for MLD.
  2287  	MLD MLDOptions
  2288  
  2289  	// DADConfigs holds the default DAD configurations used by IPv6 endpoints.
  2290  	DADConfigs stack.DADConfigurations
  2291  
  2292  	// AllowExternalLoopbackTraffic indicates that inbound loopback packets (i.e.
  2293  	// martian loopback packets) should be accepted.
  2294  	AllowExternalLoopbackTraffic bool
  2295  }
  2296  
  2297  // NewProtocolWithOptions returns an IPv6 network protocol.
  2298  func NewProtocolWithOptions(opts Options) stack.NetworkProtocolFactory {
  2299  	opts.NDPConfigs.validate()
  2300  
  2301  	ids := hash.RandN32(buckets)
  2302  	hashIV := hash.RandN32(1)[0]
  2303  
  2304  	return func(s *stack.Stack) stack.NetworkProtocol {
  2305  		p := &protocol{
  2306  			stack:   s,
  2307  			options: opts,
  2308  
  2309  			ids:    ids,
  2310  			hashIV: hashIV,
  2311  		}
  2312  		p.fragmentation = fragmentation.NewFragmentation(header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock(), p)
  2313  		p.mu.eps = make(map[tcpip.NICID]*endpoint)
  2314  		p.SetDefaultTTL(DefaultTTL)
  2315  		// Set default ICMP rate limiting to Linux defaults.
  2316  		//
  2317  		// Default: 0-1,3-127 (rate limit ICMPv6 errors except Packet Too Big)
  2318  		// See https://www.kernel.org/doc/Documentation/networking/ip-sysctl.txt.
  2319  		defaultIcmpTypes := make(map[header.ICMPv6Type]struct{})
  2320  		for i := header.ICMPv6Type(0); i < header.ICMPv6EchoRequest; i++ {
  2321  			switch i {
  2322  			case header.ICMPv6PacketTooBig:
  2323  				// Do not rate limit packet too big by default.
  2324  			default:
  2325  				defaultIcmpTypes[i] = struct{}{}
  2326  			}
  2327  		}
  2328  		p.mu.icmpRateLimitedTypes = defaultIcmpTypes
  2329  
  2330  		return p
  2331  	}
  2332  }
  2333  
  2334  // NewProtocol is equivalent to NewProtocolWithOptions with an empty Options.
  2335  func NewProtocol(s *stack.Stack) stack.NetworkProtocol {
  2336  	return NewProtocolWithOptions(Options{})(s)
  2337  }
  2338  
  2339  func calculateFragmentReserve(pkt *stack.PacketBuffer) int {
  2340  	return pkt.AvailableHeaderBytes() + pkt.NetworkHeader().View().Size() + header.IPv6FragmentHeaderSize
  2341  }
  2342  
  2343  // hashRoute calculates a hash value for the given route. It uses the source &
  2344  // destination address and 32-bit number to generate the hash.
  2345  func hashRoute(r *stack.Route, hashIV uint32) uint32 {
  2346  	// The FNV-1a was chosen because it is a fast hashing algorithm, and
  2347  	// cryptographic properties are not needed here.
  2348  	h := fnv.New32a()
  2349  	if _, err := h.Write([]byte(r.LocalAddress())); err != nil {
  2350  		panic(fmt.Sprintf("Hash.Write: %s, but Hash' implementation of Write is not expected to ever return an error", err))
  2351  	}
  2352  
  2353  	if _, err := h.Write([]byte(r.RemoteAddress())); err != nil {
  2354  		panic(fmt.Sprintf("Hash.Write: %s, but Hash' implementation of Write is not expected to ever return an error", err))
  2355  	}
  2356  
  2357  	s := make([]byte, 4)
  2358  	binary.LittleEndian.PutUint32(s, hashIV)
  2359  	if _, err := h.Write(s); err != nil {
  2360  		panic(fmt.Sprintf("Hash.Write: %s, but Hash' implementation of Write is not expected ever to return an error", err))
  2361  	}
  2362  
  2363  	return h.Sum32()
  2364  }
  2365  
  2366  func buildNextFragment(pf *fragmentation.PacketFragmenter, originalIPHeaders header.IPv6, transportProto tcpip.TransportProtocolNumber, id uint32) (*stack.PacketBuffer, bool) {
  2367  	fragPkt, offset, copied, more := pf.BuildNextFragment()
  2368  	fragPkt.NetworkProtocolNumber = ProtocolNumber
  2369  
  2370  	originalIPHeadersLength := len(originalIPHeaders)
  2371  
  2372  	s := header.IPv6ExtHdrSerializer{&header.IPv6SerializableFragmentExtHdr{
  2373  		FragmentOffset: uint16(offset / header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit),
  2374  		M:              more,
  2375  		Identification: id,
  2376  	}}
  2377  
  2378  	fragmentIPHeadersLength := originalIPHeadersLength + s.Length()
  2379  	fragmentIPHeaders := header.IPv6(fragPkt.NetworkHeader().Push(fragmentIPHeadersLength))
  2380  
  2381  	// Copy the IPv6 header and any extension headers already populated.
  2382  	if copied := copy(fragmentIPHeaders, originalIPHeaders); copied != originalIPHeadersLength {
  2383  		panic(fmt.Sprintf("wrong number of bytes copied into fragmentIPHeaders: got %d, want %d", copied, originalIPHeadersLength))
  2384  	}
  2385  
  2386  	nextHeader, _ := s.Serialize(transportProto, fragmentIPHeaders[originalIPHeadersLength:])
  2387  
  2388  	fragmentIPHeaders.SetNextHeader(nextHeader)
  2389  	fragmentIPHeaders.SetPayloadLength(uint16(copied + fragmentIPHeadersLength - header.IPv6MinimumSize))
  2390  
  2391  	return fragPkt, more
  2392  }